Repository: HansKristian-Work/dxil-spirv
Branch: master
Commit: 62dbb07f7715
Files: 2195
Total size: 9.0 MB

Directory structure:
gitextract_06b8vcw_/

├── .clang-format
├── .gitattributes
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── DESCRIPTORS.md
├── LICENSE.MIT
├── README.md
├── bc/
│   ├── CMakeLists.txt
│   ├── cast.hpp
│   ├── context.cpp
│   ├── context.hpp
│   ├── data_structures.hpp
│   ├── disassembler.cpp
│   ├── function.cpp
│   ├── function.hpp
│   ├── instruction.cpp
│   ├── instruction.hpp
│   ├── iterator.hpp
│   ├── metadata.cpp
│   ├── metadata.hpp
│   ├── module.cpp
│   ├── module.hpp
│   ├── module_dxbc_ir.cpp
│   ├── type.cpp
│   ├── type.hpp
│   ├── value.cpp
│   └── value.hpp
├── build_dxc.sh
├── cfg_structurizer.cpp
├── cfg_structurizer.hpp
├── checkout_dxc.sh
├── checkout_llvm.sh
├── copy_reference_shaders.py
├── debug/
│   ├── logging.cpp
│   └── logging.hpp
├── descriptor_qa.cpp
├── descriptor_qa.hpp
├── dxbc_spirv_sandbox.cpp
├── dxil-disasm.py
├── dxil.hpp
├── dxil_converter.cpp
├── dxil_converter.hpp
├── dxil_extract.cpp
├── dxil_parser.cpp
├── dxil_parser.hpp
├── dxil_spirv.cpp
├── dxil_spirv_c.cpp
├── dxil_spirv_c.h
├── external/
│   └── CMakeLists.txt
├── format_all.sh
├── ir.hpp
├── link.T
├── llvm_bitcode_parser.cpp
├── llvm_bitcode_parser.hpp
├── memory_stream.cpp
├── memory_stream.hpp
├── meson.build
├── misc/
│   └── structurize_test.cpp
├── node.cpp
├── node.hpp
├── node_pool.cpp
├── node_pool.hpp
├── opcodes/
│   ├── converter_impl.hpp
│   ├── dxil/
│   │   ├── dxil_ags.cpp
│   │   ├── dxil_ags.hpp
│   │   ├── dxil_arithmetic.cpp
│   │   ├── dxil_arithmetic.hpp
│   │   ├── dxil_buffer.cpp
│   │   ├── dxil_buffer.hpp
│   │   ├── dxil_common.cpp
│   │   ├── dxil_common.hpp
│   │   ├── dxil_compute.cpp
│   │   ├── dxil_compute.hpp
│   │   ├── dxil_geometry.cpp
│   │   ├── dxil_geometry.hpp
│   │   ├── dxil_mesh.cpp
│   │   ├── dxil_mesh.hpp
│   │   ├── dxil_nvapi.cpp
│   │   ├── dxil_nvapi.hpp
│   │   ├── dxil_pixel_ops.cpp
│   │   ├── dxil_pixel_ops.hpp
│   │   ├── dxil_ray_tracing.cpp
│   │   ├── dxil_ray_tracing.hpp
│   │   ├── dxil_resources.cpp
│   │   ├── dxil_resources.hpp
│   │   ├── dxil_sampling.cpp
│   │   ├── dxil_sampling.hpp
│   │   ├── dxil_tessellation.cpp
│   │   ├── dxil_tessellation.hpp
│   │   ├── dxil_waveops.cpp
│   │   ├── dxil_waveops.hpp
│   │   ├── dxil_workgraph.cpp
│   │   └── dxil_workgraph.hpp
│   ├── opcodes.hpp
│   ├── opcodes_dxil_builtins.cpp
│   ├── opcodes_dxil_builtins.hpp
│   ├── opcodes_llvm_builtins.cpp
│   └── opcodes_llvm_builtins.hpp
├── pkg-config/
│   └── dxil-spirv-c-shared.pc.in
├── reference/
│   └── shaders/
│       ├── ags/
│       │   ├── ags.ssbo.comp
│       │   ├── cs_constexpr_wmma_gep.sm66.full-wmma.ssbo.comp
│       │   ├── cs_constexpr_wmma_gep.sm66.ssbo.comp
│       │   ├── cs_wmma_alloca.sm66.ssbo.comp
│       │   ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.comp
│       │   ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.full-wmma.comp
│       │   ├── cs_wmma_extract_insert.sm66.ssbo.full-wmma.comp
│       │   ├── cs_wmma_f32_16x16x16_f16_quant_f16.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_at.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_bt.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_ct.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_ot.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_f16_quant_fp8.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.full-wmma.comp
│       │   ├── cs_wmma_f32_16x16x16_fp8_quant_f16.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided_transpose.sm66.ssbo.comp
│       │   ├── cs_wmma_f32_16x16x16_fp8_quant_f32.sm66.ssbo.comp
│       │   ├── cs_wmma_fp16_fp8_conversions.sm66.ssbo.full-wmma.comp
│       │   ├── cs_wmma_fp32_fp16_conversions.sm66.ssbo.full-wmma.comp
│       │   ├── cs_wmma_fp32_fp8_conversions.sm66.ssbo.nv-coopmat2.comp
│       │   ├── cs_wmma_fp8_fp32_conversions.sm66.ssbo.full-wmma.comp
│       │   ├── cs_wmma_lds_transpose.sm66.ssbo.comp
│       │   ├── cs_wmma_matrix_length.sm66.ssbo.comp
│       │   ├── cs_wmma_store_phi.full-wmma.sm66.ssbo.comp
│       │   └── cs_wmma_store_phi.sm66.ssbo.comp
│       ├── alloca-opts/
│       │   ├── bad-stride.frag
│       │   ├── double-array-load.frag
│       │   ├── float4-array-load.bindless.frag
│       │   ├── float4-array-load.bindless.root-constants.frag
│       │   ├── float4-array-load.frag
│       │   ├── float4-array-load.root-constant.frag
│       │   ├── float4-array-load.root-descriptor.frag
│       │   ├── float4-array-load.root-descriptor.root-constants.frag
│       │   ├── load-different.frag
│       │   ├── local-root-constants.local-root-signature.rgen
│       │   ├── matrix-load.frag
│       │   ├── missing-first.frag
│       │   ├── missing-last-element.frag
│       │   ├── out-of-order-load.frag
│       │   ├── store-after-load.frag
│       │   └── uint4-array-load.frag
│       ├── asm/
│       │   ├── bfi.bc.dxil
│       │   ├── cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
│       │   ├── cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
│       │   ├── constant-struct-aggregate.bc.dxil
│       │   ├── control-flow-multi-break-with-non-idom-loop-header.dxil
│       │   ├── ibfe.bc.dxil
│       │   └── ubfe.bc.dxil
│       ├── auto-barrier/
│       │   ├── complex-loop.auto-group-shared-barrier.comp
│       │   ├── inner-to-inner.auto-group-shared-barrier.comp
│       │   ├── inner-to-outer.auto-group-shared-barrier.comp
│       │   ├── outer-to-inner.auto-group-shared-barrier.comp
│       │   ├── single-block-loop.auto-group-shared-barrier.comp
│       │   └── single-block.auto-group-shared-barrier.comp
│       ├── control-flow/
│       │   ├── branch-return-2.comp
│       │   ├── branch-return.comp
│       │   ├── branch.comp
│       │   ├── conditional-break-into-if-else-if-ladder-2.comp
│       │   ├── conditional-break-into-if-else-if-ladder.comp
│       │   ├── dual-inner-loop-early-return.comp
│       │   ├── if-else-if-into-continue.comp
│       │   ├── inner-loop-early-return.comp
│       │   ├── interleaved-unrolled-loop-breaks.comp
│       │   ├── loop-break-2.comp
│       │   ├── loop-break.comp
│       │   ├── loop-continue-2.comp
│       │   ├── loop-continue-3.comp
│       │   ├── loop-continue.comp
│       │   ├── loop-inside-infinite-loop-2.frag
│       │   ├── loop-inside-infinite-loop.frag
│       │   ├── loop-return.comp
│       │   ├── loop.comp
│       │   ├── nested-loop-break-2.comp
│       │   ├── nested-loop-break.comp
│       │   ├── nested-loop.comp
│       │   ├── selection-merge-split-post-domination.frag
│       │   ├── switch-continue.frag
│       │   ├── switch-merge-into-other-merge.comp
│       │   ├── switch-shared-header-with-loop.comp
│       │   └── wave-size-dependent-loop-unroll.comp
│       ├── descriptor_qa/
│       │   ├── acceleration-structure.bindless.descriptor-qa.rgen
│       │   ├── acceleration-structure.bindless.descriptor-qa.sm66.rgen
│       │   ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.descriptor-qa.rgen
│       │   ├── descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp
│       │   ├── descriptor_qa.bindless.descriptor-qa.comp
│       │   ├── descriptor_qa.bindless.descriptor-qa.sm66.comp
│       │   ├── descriptor_qa.bindless.ssbo.descriptor-qa.comp
│       │   ├── early-2.bindless.descriptor-qa.frag
│       │   ├── early-3.bindless.descriptor-qa.frag
│       │   ├── early-4.bindless.descriptor-qa.frag
│       │   ├── early-5.bindless.descriptor-qa.frag
│       │   ├── early-heap.descriptor-qa.sm66.frag
│       │   └── early.bindless.descriptor-qa.frag
│       ├── dxil-builtin/
│       │   ├── accept-hit-and-end-search-ignore-hit.rany
│       │   ├── acos.frag
│       │   ├── asin.frag
│       │   ├── atan.frag
│       │   ├── atomic-bin-op.bindless.root-constant.frag
│       │   ├── atomic-bin-op.frag
│       │   ├── atomic-bin-op.root-descriptor.comp
│       │   ├── atomic-bin-op.ssbo.frag
│       │   ├── atomic-compare-exchange.frag
│       │   ├── atomic-compare-exchange.root-descriptor.comp
│       │   ├── atomic-compare-exchange.ssbo.frag
│       │   ├── attributes.denorm-ftz.comp
│       │   ├── attributes.denorm-preserve.comp
│       │   ├── barrier.comp
│       │   ├── barycentrics-2.frag
│       │   ├── barycentrics.frag
│       │   ├── bfrev.frag
│       │   ├── bitcount-bitrev-sizes.ssbo.comp
│       │   ├── buffer-load-feedback.frag
│       │   ├── buffer-load-signed-feedback.frag
│       │   ├── buffer-load-signed.frag
│       │   ├── buffer-load.frag
│       │   ├── buffer-load.ssbo.frag
│       │   ├── buffer-store-signed.frag
│       │   ├── buffer-store.frag
│       │   ├── buffer-store.ssbo.frag
│       │   ├── buffer-update-counter.frag
│       │   ├── calculate-lod.frag
│       │   ├── call-shader.rgen
│       │   ├── clip.demote-to-helper.frag
│       │   ├── clip.frag
│       │   ├── compute-shader-derivatives-cube-array.noderivs.sm66.ssbo.comp
│       │   ├── compute-shader-derivatives-cube.noderivs.sm66.ssbo.comp
│       │   ├── compute-shader-derivatives-single-thread.sm66.ssbo.comp
│       │   ├── compute-shader-derivatives.noderivs.sm66.ssbo.comp
│       │   ├── cos.frag
│       │   ├── countbits.frag
│       │   ├── coverage.frag
│       │   ├── derivative.frag
│       │   ├── derivative.sm60.frag
│       │   ├── derivative.sm60.native-fp16.frag
│       │   ├── derivatives.sm66.comp
│       │   ├── discard.demote-to-helper.frag
│       │   ├── discard.frag
│       │   ├── dispatch-rays-dimensions.rgen
│       │   ├── dispatch-rays-index.rgen
│       │   ├── dot2.frag
│       │   ├── dot3.frag
│       │   ├── dot4.frag
│       │   ├── eval-centroid.frag
│       │   ├── eval-sample-index.frag
│       │   ├── eval-snapped.frag
│       │   ├── exp.frag
│       │   ├── f16-to-f32.frag
│       │   ├── f32-to-f16.frag
│       │   ├── fabs.frag
│       │   ├── firstbithi-16.sm62.frag
│       │   ├── firstbithi-64.frag
│       │   ├── firstbithi.frag
│       │   ├── firstbitlo-16.sm62.frag
│       │   ├── firstbitlo-64.frag
│       │   ├── firstbitlo.frag
│       │   ├── firstbitshi-16.sm62.frag
│       │   ├── firstbitshi-64.frag
│       │   ├── firstbitshi.frag
│       │   ├── flattened_thread_id_in_group.comp
│       │   ├── fma.frag
│       │   ├── fmad-precise.frag
│       │   ├── fmad.frag
│       │   ├── fmax.frag
│       │   ├── fmin.frag
│       │   ├── frc.frag
│       │   ├── get-dimensions-w-only.frag
│       │   ├── get-dimensions-xyz-only.frag
│       │   ├── get-dimensions.bindless.root-constant.frag
│       │   ├── get-dimensions.bindless.root-constant.ssbo.frag
│       │   ├── get-dimensions.frag
│       │   ├── get-dimensions.ssbo.frag
│       │   ├── group_id.comp
│       │   ├── hcos.frag
│       │   ├── hsin.frag
│       │   ├── htan.frag
│       │   ├── imad.frag
│       │   ├── imax.frag
│       │   ├── imin.frag
│       │   ├── instance-id.vert
│       │   ├── is-helper-lane-2.demote-to-helper.sm66.frag
│       │   ├── is-helper-lane-2.sm66.frag
│       │   ├── is-helper-lane.demote-to-helper.sm66.frag
│       │   ├── is-helper-lane.sm66.frag
│       │   ├── isfinite.frag
│       │   ├── isinf.frag
│       │   ├── isnan.frag
│       │   ├── log.frag
│       │   ├── make-double.frag
│       │   ├── msaa-uav.sm67.comp
│       │   ├── msad.comp
│       │   ├── object-ray-direction.rany
│       │   ├── object-ray-origin.rany
│       │   ├── object-to-world-3x4.rany
│       │   ├── object-to-world-4x3.rany
│       │   ├── pack-unpack.ssbo.sm66.comp
│       │   ├── quad-all-any.sm67.comp
│       │   ├── quad-all-any.sm67.quad-maximal-reconvergence.noglsl.comp
│       │   ├── quad-read-at-2d.comp
│       │   ├── quad-read-at-2d.sm66.comp
│       │   ├── quad-read-at.comp
│       │   ├── quad-read-at.frag
│       │   ├── quad-swap.comp
│       │   ├── quad-swap.frag
│       │   ├── raw-gather-offset-sparse.sm67.ssbo.comp
│       │   ├── raw-gather-offset.sm67.ssbo.comp
│       │   ├── raw-gather-sparse.sm67.ssbo.comp
│       │   ├── raw-gather.sm67.ssbo.comp
│       │   ├── ray-query-phi-multi.invalid.sm66.comp
│       │   ├── ray-query-phi-simple.sm66.comp
│       │   ├── ray-query-select-multi.invalid.sm66.comp
│       │   ├── ray-query-select-simple.sm66.comp
│       │   ├── ray-query-store-multi.invalid.sm66.comp
│       │   ├── ray-query-store-simple.sm66.comp
│       │   ├── ray-query.comp
│       │   ├── ray-t-current.rany
│       │   ├── ray-t-min.rany
│       │   ├── render-target-sample-count.frag
│       │   ├── render-target-sample-position.frag
│       │   ├── report-hit.rint
│       │   ├── round-ne.frag
│       │   ├── round-ni.frag
│       │   ├── round-pi.frag
│       │   ├── round-z.frag
│       │   ├── rsqrt.frag
│       │   ├── rt-geometry-index.rany
│       │   ├── rt-hit-kind.rany
│       │   ├── rt-instance-id.rany
│       │   ├── rt-instance-index.rany
│       │   ├── rt-primitive-index.rany
│       │   ├── rt-ray-flags.rany
│       │   ├── sample-bias-feedback.frag
│       │   ├── sample-bias-offset.frag
│       │   ├── sample-bias.frag
│       │   ├── sample-cmp-bias-feedback.frag
│       │   ├── sample-cmp-bias-offset.frag
│       │   ├── sample-cmp-bias.frag
│       │   ├── sample-cmp-feedback.frag
│       │   ├── sample-cmp-grad-offset-feedback.frag
│       │   ├── sample-cmp-grad-offset.frag
│       │   ├── sample-cmp-grad.frag
│       │   ├── sample-cmp-level.sm67.noglsl.frag
│       │   ├── sample-cmp-levelzero.frag
│       │   ├── sample-cmp-offset-levelzero-feedback.frag
│       │   ├── sample-cmp-offset-levelzero.frag
│       │   ├── sample-cmp-offset.frag
│       │   ├── sample-cmp.frag
│       │   ├── sample-grad-offset-dynamic.noglsl.invalid.sm67.frag
│       │   ├── sample-grad-offset-feedback.frag
│       │   ├── sample-grad-offset.frag
│       │   ├── sample-grad.frag
│       │   ├── sample-id.frag
│       │   ├── sample-level-offset-feedback.frag
│       │   ├── sample-level-offset.frag
│       │   ├── sample-level.frag
│       │   ├── sample-offset-dynamic.noglsl.invalid.sm67.frag
│       │   ├── sample-offset.frag
│       │   ├── sample.frag
│       │   ├── saturate.frag
│       │   ├── sin.frag
│       │   ├── sm64-packed-arithmetic.ssbo.comp
│       │   ├── sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
│       │   ├── sm64-packed-arithmetic.ssbo.mixed-float-dot-product.noglsl.comp
│       │   ├── split-double.frag
│       │   ├── sqrt.frag
│       │   ├── tan.frag
│       │   ├── texture-gather-4offset.frag
│       │   ├── texture-gather-cmp-offset-feedback.frag
│       │   ├── texture-gather-cmp-offset.frag
│       │   ├── texture-gather-cmp.frag
│       │   ├── texture-gather-offset.frag
│       │   ├── texture-gather-signed-feedback.frag
│       │   ├── texture-gather-signed.frag
│       │   ├── texture-gather.frag
│       │   ├── texture-load-feedback.frag
│       │   ├── texture-load-offset-dynamic.sm67.frag
│       │   ├── texture-load-offset.frag
│       │   ├── texture-load-signed.frag
│       │   ├── texture-load.frag
│       │   ├── texture-store-signed.frag
│       │   ├── texture-store.frag
│       │   ├── texture2dms-sample-position.frag
│       │   ├── thread_id.comp
│       │   ├── thread_id_in_group.comp
│       │   ├── trace-ray-flags-2.rgen
│       │   ├── trace-ray-flags.rgen
│       │   ├── trace-ray.rgen
│       │   ├── umad.frag
│       │   ├── umax.frag
│       │   ├── umin.frag
│       │   ├── vertex-id.vert
│       │   ├── wave-active-all-true.comp
│       │   ├── wave-active-all-true.frag
│       │   ├── wave-active-any-true.comp
│       │   ├── wave-active-any-true.frag
│       │   ├── wave-active-ballot-discard.demote-to-helper.frag
│       │   ├── wave-active-ballot-discard.frag
│       │   ├── wave-active-ballot.comp
│       │   ├── wave-active-ballot.demote-to-helper.frag
│       │   ├── wave-active-ballot.frag
│       │   ├── wave-active-count-bits.comp
│       │   ├── wave-active-count-bits.frag
│       │   ├── wave-all-equal.comp
│       │   ├── wave-all-equal.frag
│       │   ├── wave-get-lane-count.comp
│       │   ├── wave-get-lane-index.comp
│       │   ├── wave-is-first-lane.comp
│       │   ├── wave-is-first-lane.frag
│       │   ├── wave-match.comp
│       │   ├── wave-match.frag
│       │   ├── wave-match.partitioned.noglsl.comp
│       │   ├── wave-match.partitioned.noglsl.frag
│       │   ├── wave-multi-prefix-count-bits.comp
│       │   ├── wave-multi-prefix-count-bits.frag
│       │   ├── wave-multi-prefix-op.comp
│       │   ├── wave-multi-prefix-op.frag
│       │   ├── wave-multi-prefix-op.partitioned.noglsl.comp
│       │   ├── wave-multi-prefix-op.partitioned.noglsl.frag
│       │   ├── wave-prefix.comp
│       │   ├── wave-prefix.frag
│       │   ├── wave-read-lane-at-optimizations.comp
│       │   ├── wave-read-lane-at.comp
│       │   ├── wave-read-lane-first.comp
│       │   ├── wave-read-lane-first.frag
│       │   ├── wave-reduce-helpers.sm67.frag
│       │   ├── wave-reduce-helpers.sm67.quad-maximal-reconvergence.frag
│       │   ├── wave-reduce.comp
│       │   ├── wave-reduce.frag
│       │   ├── wave-size.sm66.comp
│       │   ├── world-ray-direction.rany
│       │   ├── world-ray-origin.rany
│       │   ├── world-to-object-3x4.rany
│       │   └── world-to-object-4x3.rany
│       ├── fp16/
│       │   ├── saturate.frag
│       │   ├── saturate.sm60.frag
│       │   └── saturate.sm60.native-fp16.frag
│       ├── heap-robustness/
│       │   ├── misc.bindless.heap-raw-va-cbv.sm66.ssbo.comp
│       │   ├── misc.bindless.heap-robustness.heap-robustness-cbv.sm66.ssbo.comp
│       │   ├── misc.bindless.heap-robustness.sm66.ssbo.comp
│       │   ├── misc.bindless.sm66.ssbo.comp
│       │   └── misc.heap-robustness.bindless.heap-robustness-cbv.heap-raw-va-cbv.sm66.ssbo.comp
│       ├── instrumentation/
│       │   ├── atomics-raw.bindless.bda-instrumentation.ssbo.comp
│       │   ├── atomics-raw.root-descriptor.bda-instrumentation.ssbo.comp
│       │   ├── atomics-structured-counter.bindless.bda-instrumentation.ssbo.comp
│       │   ├── atomics-structured.bindless.bda-instrumentation.ssbo.comp
│       │   ├── atomics-structured.root-descriptor.bda-instrumentation.ssbo.comp
│       │   ├── atomics-typed.bindless.bda-instrumentation.ssbo.comp
│       │   ├── cbv.bindless.bda-instrumentation.comp
│       │   ├── cbv.root-descriptor.bda-instrumentation.comp
│       │   ├── raw.bindless.bda-instrumentation.ssbo.comp
│       │   ├── raw.root-descriptor.bda-instrumentation.ssbo.comp
│       │   ├── structured.bindless.bda-instrumentation.comp
│       │   ├── structured.bindless.bda-instrumentation.ssbo.comp
│       │   ├── structured.root-descriptor.bda-instrumentation.ssbo.comp
│       │   └── typed.bindless.bda-instrumentation.comp
│       ├── llvm-builtin/
│       │   ├── alloca-robustness-cases.extended-robustness.vert
│       │   ├── alloca.frag
│       │   ├── atomic-bin-op.comp
│       │   ├── atomic-compare-exchange.comp
│       │   ├── atomic-compare-exchange.sm66.ssbo.comp
│       │   ├── bool-to-fp.frag
│       │   ├── constant-expression-cast.comp
│       │   ├── constant-expression-gep.comp
│       │   ├── fadd.frag
│       │   ├── fast-mul-div-pair.comp
│       │   ├── fcmp_eq.frag
│       │   ├── fcmp_ge.frag
│       │   ├── fcmp_gt.frag
│       │   ├── fcmp_le.frag
│       │   ├── fcmp_lt.frag
│       │   ├── fcmp_ne.frag
│       │   ├── fdiv.frag
│       │   ├── fmul.frag
│       │   ├── frem.frag
│       │   ├── fsub.frag
│       │   ├── glitched-integer-width.comp
│       │   ├── groupshared.comp
│       │   ├── icmp_eq.frag
│       │   ├── icmp_ne.frag
│       │   ├── icmp_sge.frag
│       │   ├── icmp_sgt.frag
│       │   ├── icmp_sle.frag
│       │   ├── icmp_slt.frag
│       │   ├── icmp_uge.frag
│       │   ├── icmp_ugt.frag
│       │   ├── icmp_ule.frag
│       │   ├── icmp_ult.frag
│       │   ├── logical-and.frag
│       │   ├── logical-equal.frag
│       │   ├── logical-not-equal.frag
│       │   ├── logical-or.frag
│       │   ├── lut.frag
│       │   ├── min16-phi.sm60.comp
│       │   ├── precise_math.frag
│       │   └── zext-bool.frag
│       ├── memory-model/
│       │   ├── uav-coherent-promotion.bindless.ssbo.comp
│       │   ├── uav-coherent-promotion.root-descriptor.ssbo.comp
│       │   ├── uav-coherent-promotion.sm66.bindless.ssbo.comp
│       │   ├── uav-coherent-promotion.sm66.ssbo.comp
│       │   ├── uav-coherent-promotion.ssbo.comp
│       │   ├── uav-coherent.root-descriptor.ssbo.comp
│       │   ├── uav-coherent.sm66.ssbo.comp
│       │   └── uav-coherent.ssbo.comp
│       ├── nvapi/
│       │   ├── bringup.nvapi.ssbo.rgen
│       │   ├── get-special-global-timer.nvapi.ssbo.rgen
│       │   ├── hit-object.local-root-signature.noglsl.nvapi.ssbo.rgen
│       │   ├── ray-query-cluster-id.nvapi.comp
│       │   ├── rt-cluster-id.nvapi.rany
│       │   └── shuffle.nvapi.ssbo.comp
│       ├── opts/
│       │   ├── fp16-fp32-fp16-1.ssbo.comp
│       │   ├── sabs.frag
│       │   ├── sneg.frag
│       │   ├── wave-read-lane-first-heap.sm66.comp
│       │   ├── wave-read-lane-first.bindless.local-root-signature.rmiss
│       │   ├── wave-read-lane-first.comp
│       │   ├── wave-read-lane-first.no-legacy-cbuf-layout.comp
│       │   ├── wave-read-lane-first.no-legacy-cbuf-layout.sm60.comp
│       │   ├── wave-read-lane-first.sm60.comp
│       │   ├── wave-read-lane-first.sm66.comp
│       │   ├── wave-read-lane-first.ssbo.comp
│       │   ├── wave-read-lane-first.ssbo.rgen
│       │   ├── wave-read-lane-first.ssbo.sm60.comp
│       │   ├── wave-read-lane-first.ssbo.sm66.comp
│       │   └── wave-read-lane-first.ssbo.sm66.rgen
│       ├── raw-access/
│       │   ├── bab-double1.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-double2.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-double3.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-double4.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-float1.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-float2.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-float3.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-float4.raw-access-chains.noglsl.ssbo.comp
│       │   ├── bab-float4x4.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-float1.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-float2.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-float3.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-float4.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-float4x4.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-half1.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.comp
│       │   ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
│       │   ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.comp
│       │   ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
│       │   ├── structured-uint1.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-uint2.raw-access-chains.noglsl.ssbo.comp
│       │   ├── structured-uint3.raw-access-chains.noglsl.ssbo.comp
│       │   └── structured-uint4.raw-access-chains.noglsl.ssbo.comp
│       ├── resources/
│       │   ├── acceleration-structure.bindless.rgen
│       │   ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.raw-va-stride-offset.rgen
│       │   ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.rgen
│       │   ├── acceleration-structure.local-root-signature.root-descriptor.rgen
│       │   ├── basic.input-attachment.frag
│       │   ├── buffer-16bit.ssbo.bindless.comp
│       │   ├── buffer-16bit.ssbo.bindless.ssbo-align.comp
│       │   ├── buffer-16bit.ssbo.comp
│       │   ├── buffer-64bit.ssbo.bindless.ssbo-align.comp
│       │   ├── buffer-64bit.ssbo.comp
│       │   ├── buffer-alignment-fixup.bindless.root-constant.offset-layout.typed-buffer-offset.comp
│       │   ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.comp
│       │   ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.offset-layout.typed-buffer-offset.comp
│       │   ├── cbuf.root-constant.min16float.sm60.frag
│       │   ├── cbuf.root-constant.min16float.sm60.native-fp16.frag
│       │   ├── cbuf.root-constant.min16int.sm60.frag
│       │   ├── cbuf.root-constant.min16int.sm60.native-fp16.frag
│       │   ├── cbv-array-nonuniform.frag
│       │   ├── cbv-array.frag
│       │   ├── cbv-dynamic.no-legacy-cbuf-layout.local-root-signature.rmiss
│       │   ├── cbv-indexing.frag
│       │   ├── cbv-indexing.sm66.frag
│       │   ├── cbv-legacy-fp16-fp64.frag
│       │   ├── cbv-legacy-fp16-fp64.root-descriptor.frag
│       │   ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
│       │   ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
│       │   ├── cbv-legacy-fp16-fp64.sm60.frag
│       │   ├── cbv-legacy-fp16-fp64.sm60.native-fp16.frag
│       │   ├── cbv.bindless.root-constant.cbv-as-ssbo.frag
│       │   ├── cbv.bindless.root-constant.frag
│       │   ├── cbv.frag
│       │   ├── cbv.no-legacy-cbuf-layout.bindless.frag
│       │   ├── cbv.no-legacy-cbuf-layout.index-divider.frag
│       │   ├── cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
│       │   ├── cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
│       │   ├── cbv.no-legacy-cbuf-layout.root-constant.frag
│       │   ├── cbv.root-constant.frag
│       │   ├── cbv.root-descriptor.no-legacy-cbuf-layout.frag
│       │   ├── combined-image-sampler-reuse.frag
│       │   ├── dynamic-root-constant.root-constant.bindless.root-descriptor.comp
│       │   ├── min16-alloca-groupshared.sm60.comp
│       │   ├── min16float-ssbo-dxr.ssbo.rgen
│       │   ├── raw-buffer-addressing.comp
│       │   ├── raw-buffer-addressing.ssbo.comp
│       │   ├── root-bda.root-descriptor.comp
│       │   ├── root-bda.root-descriptor.sm60.comp
│       │   ├── root-constant-with-bda.root-descriptor.root-constant.comp
│       │   ├── rt-resources.bindless.local-root-signature.rmiss
│       │   ├── rt-resources.bindless.rmiss
│       │   ├── rt-resources.rmiss
│       │   ├── sampler-array.frag
│       │   ├── sampler-indexing.frag
│       │   ├── sampler-indexing.sm66.frag
│       │   ├── sampler.bindless.root-constant.frag
│       │   ├── sm66/
│       │   │   ├── atomics-64bit-groupshared.ssbo.sm66.comp
│       │   │   ├── atomics-64bit.root-descriptor.sm66.comp
│       │   │   ├── atomics-64bit.ssbo.sm66.comp
│       │   │   ├── atomics-component-alias.sm66.comp
│       │   │   ├── atomics-typed-64bit-heap.sm66.comp
│       │   │   ├── atomics-typed-64bit.bindless.sm66.comp
│       │   │   ├── atomics-typed-64bit.sm66.comp
│       │   │   ├── binding-range-selection.bindless.sm66.comp
│       │   │   ├── binding-range-selection.sm66.comp
│       │   │   ├── buffer-64bit-double.ssbo.sm66.comp
│       │   │   ├── buffer-64bit.ssbo.sm66.comp
│       │   │   ├── buffer-64bit.ssbo.ssbo-align.sm66.comp
│       │   │   ├── cbuffer-heap.sm66.frag
│       │   │   ├── cbv.no-legacy-cbuf-layout.bindless.sm66.frag
│       │   │   ├── cbv.no-legacy-cbuf-layout.sm66.frag
│       │   │   ├── raw-buffer-heap.sm66.frag
│       │   │   ├── raw-buffer-heap.ssbo.sm66.frag
│       │   │   ├── raw-buffer-heap.typed-buffer-offset.sm66.frag
│       │   │   ├── raw-buffers-binding.ssbo.bindless.sm66.frag
│       │   │   ├── raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
│       │   │   ├── raw-buffers-binding.ssbo.sm66.frag
│       │   │   ├── raygen-heap.sm66.rgen
│       │   │   ├── raygen-heap.ssbo-rtas.raw-va-stride-offset.sm66.rgen
│       │   │   ├── raygen-heap.ssbo-rtas.sm66.rgen
│       │   │   ├── raygen.sm66.rgen
│       │   │   ├── raygen.ssbo-rtas.bindless.raw-va-stride-offset.sm66.rgen
│       │   │   ├── raygen.ssbo-rtas.bindless.sm66.rgen
│       │   │   ├── rw-typed-binding.sm66.frag
│       │   │   ├── rw-typed-heap.sm66.frag
│       │   │   ├── sampled-types-binding.sm66.frag
│       │   │   ├── sampled-types.sm66.frag
│       │   │   ├── sampler-binding.sm66.frag
│       │   │   ├── sampler-heap.sm66.frag
│       │   │   ├── structured-16bit-heap.ssbo.sm66.frag
│       │   │   ├── structured-16bit-heap.ssbo.ssbo-align.sm66.frag
│       │   │   ├── structured-buffer-heap.sm66.frag
│       │   │   ├── structured-buffer-heap.ssbo.sm66.frag
│       │   │   ├── structured-buffer-heap.ssbo.ssbo-align.sm66.frag
│       │   │   └── structured-buffer-heap.typed-buffer-offset.sm66.frag
│       │   ├── srv-array-raw-buffer-nonuniform.frag
│       │   ├── srv-array-raw-buffer.frag
│       │   ├── srv-array-structured-buffer-nonuniform.frag
│       │   ├── srv-array-structured-buffer.frag
│       │   ├── srv-array-texture-nonuniform.frag
│       │   ├── srv-array-texture.frag
│       │   ├── srv-array-typed-buffer-nonuniform.frag
│       │   ├── srv-array-typed-buffer.frag
│       │   ├── srv-indexing.frag
│       │   ├── srv-indexing.sm66.frag
│       │   ├── srv-raw-buffer.bindless.root-constant.frag
│       │   ├── srv-raw-buffer.bindless.root-constant.ssbo.frag
│       │   ├── srv-raw-buffer.ssbo.frag
│       │   ├── srv-structured-buffer.bindless.root-constant.frag
│       │   ├── srv-structured-buffer.bindless.root-constant.ssbo.frag
│       │   ├── srv-structured-buffer.ssbo.frag
│       │   ├── srv-texture.bindless.root-constant.frag
│       │   ├── srv-texture.bindless.root-constant.inline-ubo.frag
│       │   ├── srv-typed-buffer.bindless.root-constant.frag
│       │   ├── srv-uav-raw.typed-buffer-offset.comp
│       │   ├── srv-uav.typed-buffer-offset.comp
│       │   ├── ssbo-minprecision.sm60.native-fp16.frag
│       │   ├── ssbo-minprecision.sm60.ssbo.frag
│       │   ├── ssbo-minprecision.sm60.ssbo.native-fp16.frag
│       │   ├── ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
│       │   ├── ssbo-minprecision.sm60.ssbo.root-descriptor.frag
│       │   ├── subobject-parsing.rgen
│       │   ├── typed-resources-16bit-sparse.frag
│       │   ├── typed-resources-16bit.bindless.frag
│       │   ├── typed-resources-16bit.frag
│       │   ├── typed-resources-16bit.sm60.bindless.frag
│       │   ├── typed-resources-16bit.sm60.frag
│       │   ├── typed-resources-16bit.sm60.native-fp16.bindless.frag
│       │   ├── typed-resources-16bit.sm60.native-fp16.frag
│       │   ├── uav-array-raw-buffer-nonuniform.frag
│       │   ├── uav-array-raw-buffer.frag
│       │   ├── uav-array-structured-buffer-nonuniform.frag
│       │   ├── uav-array-structured-buffer-nonuniform.ssbo.bindless.root-constant.frag
│       │   ├── uav-array-structured-buffer-nonuniform.ssbo.frag
│       │   ├── uav-array-structured-buffer.frag
│       │   ├── uav-array-texture-nonuniform.frag
│       │   ├── uav-array-texture.frag
│       │   ├── uav-array-typed-buffer-nonuniform.frag
│       │   ├── uav-array-typed-buffer.frag
│       │   ├── uav-counter-array.ssbo.frag
│       │   ├── uav-counter-array.ssbo.sm66.frag
│       │   ├── uav-counter-array.ssbo.sm66.uav-counter-ssbo.frag
│       │   ├── uav-counter-array.ssbo.uav-counter-ssbo.frag
│       │   ├── uav-counter-heap.sm66.bindless.ssbo.frag
│       │   ├── uav-counter-heap.sm66.uav-counter-ssbo.bindless.ssbo.frag
│       │   ├── uav-counter-heap.sm66.uav-counter-texel-buffer.bindless.ssbo.frag
│       │   ├── uav-counter.bindless.nobda.root-constant.comp
│       │   ├── uav-counter.bindless.nobda.root-constant.raw-va-stride-offset.comp
│       │   ├── uav-counter.bindless.root-constant.comp
│       │   ├── uav-counter.bindless.root-constant.raw-va-stride-offset.comp
│       │   ├── uav-counter.bindless.root-constant.raw-va-stride-offset.heap-raw-va-cbv.comp
│       │   ├── uav-counter.bindless.root-constant.uav-counter-ssbo.comp
│       │   ├── uav-counter.ssbo.comp
│       │   ├── uav-counter.ssbo.raw-va-stride-offset.comp
│       │   ├── uav-counter.ssbo.uav-counter-ssbo.comp
│       │   ├── uav-indexing.frag
│       │   ├── uav-indexing.sm66.frag
│       │   ├── uav-raw-buffer.bindless.root-constant.frag
│       │   ├── uav-raw-buffer.ssbo.frag
│       │   ├── uav-structured-buffer.bindless.root-constant.frag
│       │   ├── uav-typed-buffer.bindless.root-constant.frag
│       │   └── uav-typed.typed-uav-without-format.comp
│       ├── rov/
│       │   ├── rov-bab.bindless.frag
│       │   ├── rov-bab.frag
│       │   ├── rov-bab.ssbo.bindless.frag
│       │   ├── rov-bab.ssbo.frag
│       │   ├── rov-bab.ssbo.root-descriptor.frag
│       │   ├── rov-branch-early-return.frag
│       │   ├── rov-branch.frag
│       │   ├── rov-buffer.frag
│       │   ├── rov-inloop-2.frag
│       │   ├── rov-inloop.frag
│       │   ├── rov-per-sample.sm66.frag
│       │   ├── rov-postloop.frag
│       │   ├── rov-structured.bindless.frag
│       │   ├── rov-structured.frag
│       │   ├── rov-structured.ssbo.bindless.frag
│       │   ├── rov-structured.ssbo.frag
│       │   ├── rov-structured.ssbo.root-descriptor.frag
│       │   ├── rov-tex1d.bindless.frag
│       │   ├── rov-tex1d.frag
│       │   ├── rov-tex1darray.bindless.frag
│       │   ├── rov-tex1darray.frag
│       │   ├── rov-tex2d.bindless.frag
│       │   ├── rov-tex2d.frag
│       │   ├── rov-tex2darray.bindless.frag
│       │   ├── rov-tex2darray.frag
│       │   ├── rov-tex3d.bindless.frag
│       │   ├── rov-tex3d.frag
│       │   ├── rov-undef.frag
│       │   └── rov.sm66.frag
│       ├── sampler-feedback/
│       │   ├── sampler-feedback.frag
│       │   └── sampler-feedback.sm66.frag
│       ├── semantics/
│       │   ├── clip-cull-distance.vert
│       │   ├── clip-cull.frag
│       │   ├── clip-distance-cols.frag
│       │   ├── clip-distance-cols.vert
│       │   ├── clip-distance-flatten.frag
│       │   ├── clip-distance-flatten.vert
│       │   ├── clip-distance-rows.frag
│       │   ├── clip-distance-rows.vert
│       │   ├── clip-distance-single.vert
│       │   ├── coverage.frag
│       │   ├── depth-greater-equal.frag
│       │   ├── depth-less-equal.frag
│       │   ├── depth.frag
│       │   ├── early-depth-stencil.frag
│       │   ├── inner-coverage.noglsl.frag
│       │   ├── is-front-face.frag
│       │   ├── position-short.frag
│       │   ├── position-short.vert
│       │   ├── position.frag
│       │   ├── primitive-id.frag
│       │   ├── primitive-id.geom
│       │   ├── render-target-array-index.frag
│       │   ├── render-target-array-index.geom
│       │   ├── sample-rate-pos.frag
│       │   ├── stencil-ref.frag
│       │   ├── sv-shading-rate.noglsl.frag
│       │   ├── sv-shading-rate.noglsl.vert
│       │   ├── view-id.frag
│       │   ├── view-id.vert
│       │   ├── viewport-array-index.frag
│       │   └── viewport-array-index.geom
│       ├── stages/
│       │   ├── boolean-io.vert
│       │   ├── callable-chain.rcall
│       │   ├── callable.rcall
│       │   ├── closesthit.rclosest
│       │   ├── domain-clip-cull.tese
│       │   ├── domain-patch-input-integer-io.tese
│       │   ├── domain.tese
│       │   ├── extra_output.dual-source-blending.frag
│       │   ├── extra_output_reordered.dual-source-blending.frag
│       │   ├── geometry-clip-cull.geom
│       │   ├── geometry-input-line.geom
│       │   ├── geometry-input-lineadj.geom
│       │   ├── geometry-input-point.geom
│       │   ├── geometry-input-triangle.geom
│       │   ├── geometry-input-triangleadj.geom
│       │   ├── geometry-instancing.geom
│       │   ├── geometry-output-line.geom
│       │   ├── geometry-output-point.geom
│       │   ├── geometry-streams.geom
│       │   ├── hull-arrays.tesc
│       │   ├── hull-clip-cull.tesc
│       │   ├── hull-patch-output-integer-io.tesc
│       │   ├── hull-single-cp.tesc
│       │   ├── hull.tesc
│       │   ├── mesh-basic-line.mesh
│       │   ├── mesh-basic.mesh
│       │   ├── mesh-clip-cull.mesh
│       │   ├── raygen-complex-storage-class.rgen
│       │   ├── raygen-skip-inactive-resources.rgen
│       │   ├── raygen.rgen
│       │   ├── raymiss-chain.rmiss
│       │   ├── raymiss.rmiss
│       │   ├── simple.dual-source-blending.frag
│       │   ├── simple.invariant.vert
│       │   ├── stage-input-output.16bit-io.frag
│       │   ├── stage-input-output.frag
│       │   ├── stream-out.stream-out.vert
│       │   ├── swizzle.rt-swizzle.frag
│       │   ├── task-basic.task
│       │   ├── vertex-array-input.vert
│       │   ├── vertex-array-output.vert
│       │   └── vertex-input-remapping.vert
│       ├── vectorization/
│       │   ├── copy-byte-address.ssbo.comp
│       │   ├── copy-composite-2.ssbo.comp
│       │   ├── copy-composite.ssbo.comp
│       │   ├── copy-composite.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-double2.ssbo.comp
│       │   ├── copy-double2.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-double3.ssbo.comp
│       │   ├── copy-double3.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-double4.ssbo.comp
│       │   ├── copy-float2.ssbo.comp
│       │   ├── copy-float2.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-float2x2.ssbo.comp
│       │   ├── copy-float3.ssbo.comp
│       │   ├── copy-float3.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-float4x4.ssbo.comp
│       │   ├── copy-half2.ssbo.comp
│       │   ├── copy-half2.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-half3.ssbo.comp
│       │   ├── copy-half3.ssbo.ssbo-align.bindless.comp
│       │   ├── copy-half4.ssbo.comp
│       │   └── copy-half4.ssbo.ssbo-align.bindless.comp
│       ├── view-instancing/
│       │   ├── geom/
│       │   │   ├── basic.view-instancing.last-pre-raster.geom
│       │   │   ├── basic.view-instancing.last-pre-raster.view-instance-mask.geom
│       │   │   ├── basic.view-instancing.view-instancing-multiview.last-pre-raster.geom
│       │   │   ├── basic.view-instancing.view-instancing-multiview.view-instancing-viewport-offset.last-pre-raster.geom
│       │   │   └── basic.view-instancing.view-instancing-viewport-offset.last-pre-raster.geom
│       │   ├── mesh/
│       │   │   ├── basic-export-viewport-layer.view-instancing.last-pre-raster.mesh
│       │   │   ├── basic-export-viewport-layer.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
│       │   │   ├── basic-few-thread.view-instancing.last-pre-raster.mesh
│       │   │   ├── basic-many-thread.view-instancing.last-pre-raster.mesh
│       │   │   ├── basic.view-instancing.last-pre-raster.mesh
│       │   │   ├── basic.view-instancing.last-pre-raster.view-instance-mask.mesh
│       │   │   ├── basic.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
│       │   │   └── basic.view-instancing.mesh
│       │   ├── tesc/
│       │   │   ├── basic.view-instancing.tesc
│       │   │   └── basic.view-instancing.view-instancing-multiview.tesc
│       │   ├── tese/
│       │   │   ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.last-pre-raster.tese
│       │   │   ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.tese
│       │   │   ├── domain.view-instancing.last-pre-raster.tese
│       │   │   ├── domain.view-instancing.tese
│       │   │   ├── domain.view-instancing.view-instancing-multiview.last-pre-raster.tese
│       │   │   ├── domain.view-instancing.view-instancing-multiview.tese
│       │   │   ├── domain.view-instancing.view-instancing-viewport-offset.last-pre-raster.tese
│       │   │   └── domain.view-instancing.view-instancing-viewport-offset.tese
│       │   └── vert/
│       │       ├── basic.view-instancing.export-layer-viewport.last-pre-raster.vert
│       │       ├── basic.view-instancing.export-layer-viewport.vert
│       │       ├── basic.view-instancing.export-layer-viewport.view-instancing-multiview.vert
│       │       ├── basic.view-instancing.export-layer-viewport.view-instancing-viewport-offset.vert
│       │       ├── basic.view-instancing.export-layer.last-pre-raster.vert
│       │       ├── basic.view-instancing.export-layer.vert
│       │       ├── basic.view-instancing.export-layer.view-instancing-multiview.vert
│       │       ├── basic.view-instancing.export-layer.view-instancing-viewport-offset.vert
│       │       ├── basic.view-instancing.export-viewport.last-pre-raster.vert
│       │       ├── basic.view-instancing.export-viewport.vert
│       │       ├── basic.view-instancing.export-viewport.view-instancing-multiview.vert
│       │       ├── basic.view-instancing.export-viewport.view-instancing-viewport-offset.vert
│       │       ├── basic.view-instancing.last-pre-raster.vert
│       │       ├── basic.view-instancing.vert
│       │       ├── basic.view-instancing.view-instance-mask.last-pre-raster.vert
│       │       ├── basic.view-instancing.view-instance-mask.vert
│       │       ├── basic.view-instancing.view-instancing-multiview.vert
│       │       └── basic.view-instancing.view-instancing-viewport-offset.vert
│       └── vkmm/
│           ├── coopmat.sm66.ssbo.vkmm.comp
│           ├── cross_group_sharing.vkmm.node.inline-ubo.comp
│           ├── descriptor_qa.bindless.descriptor-qa.vkmm.comp
│           ├── groupshared.vkmm.comp
│           ├── hull.vkmm.tesc
│           ├── image-load-store.vkmm.comp
│           ├── image-load-store.vkmm.sm66.comp
│           ├── memory-model/
│           │   ├── uav-coherent-promotion.bindless.ssbo.vkmm.comp
│           │   ├── uav-coherent-promotion.root-descriptor.ssbo.vkmm.comp
│           │   ├── uav-coherent-promotion.sm66.bindless.ssbo.vkmm.comp
│           │   ├── uav-coherent-promotion.sm66.ssbo.vkmm.comp
│           │   ├── uav-coherent-promotion.ssbo.vkmm.comp
│           │   ├── uav-coherent.root-descriptor.ssbo.vkmm.comp
│           │   ├── uav-coherent.sm66.ssbo.vkmm.comp
│           │   └── uav-coherent.ssbo.vkmm.comp
│           ├── report-hit.vkmm.rint
│           ├── rov-structured.vkmm.frag
│           └── rov-tex2d.vkmm.frag
├── reference-dxbc/
│   ├── test_arithmetic_bool.asm
│   ├── test_arithmetic_bool.glsl
│   ├── test_arithmetic_fp16_compare.asm
│   ├── test_arithmetic_fp16_compare.glsl
│   ├── test_arithmetic_fp16_packing.asm
│   ├── test_arithmetic_fp16_packing.glsl
│   ├── test_arithmetic_fp16_packing_legacy.asm
│   ├── test_arithmetic_fp16_packing_legacy.glsl
│   ├── test_arithmetic_fp16_scalar.asm
│   ├── test_arithmetic_fp16_scalar.glsl
│   ├── test_arithmetic_fp16_vector.asm
│   ├── test_arithmetic_fp16_vector.glsl
│   ├── test_arithmetic_fp32.asm
│   ├── test_arithmetic_fp32.glsl
│   ├── test_arithmetic_fp32_compare.asm
│   ├── test_arithmetic_fp32_compare.glsl
│   ├── test_arithmetic_fp32_precise.asm
│   ├── test_arithmetic_fp32_precise.glsl
│   ├── test_arithmetic_fp32_special.asm
│   ├── test_arithmetic_fp32_special.glsl
│   ├── test_arithmetic_fp64.asm
│   ├── test_arithmetic_fp64.glsl
│   ├── test_arithmetic_fp64_compare.asm
│   ├── test_arithmetic_fp64_compare.glsl
│   ├── test_arithmetic_fp64_packing.asm
│   ├── test_arithmetic_fp64_packing.glsl
│   ├── test_arithmetic_int_extended.asm
│   ├── test_arithmetic_int_extended.glsl
│   ├── test_arithmetic_sint16_compare.asm
│   ├── test_arithmetic_sint16_compare.glsl
│   ├── test_arithmetic_sint16_scalar.asm
│   ├── test_arithmetic_sint16_scalar.glsl
│   ├── test_arithmetic_sint16_vector.asm
│   ├── test_arithmetic_sint16_vector.glsl
│   ├── test_arithmetic_sint32.asm
│   ├── test_arithmetic_sint32.glsl
│   ├── test_arithmetic_sint32_compare.asm
│   ├── test_arithmetic_sint32_compare.glsl
│   ├── test_arithmetic_uint16_compare.asm
│   ├── test_arithmetic_uint16_compare.glsl
│   ├── test_arithmetic_uint16_scalar.asm
│   ├── test_arithmetic_uint16_scalar.glsl
│   ├── test_arithmetic_uint16_vector.asm
│   ├── test_arithmetic_uint16_vector.glsl
│   ├── test_arithmetic_uint32.asm
│   ├── test_arithmetic_uint32.glsl
│   ├── test_arithmetic_uint32_compare.asm
│   ├── test_arithmetic_uint32_compare.glsl
│   ├── test_cfg_if.asm
│   ├── test_cfg_if.glsl
│   ├── test_cfg_if_else.asm
│   ├── test_cfg_if_else.glsl
│   ├── test_cfg_loop_infinite.asm
│   ├── test_cfg_loop_infinite.glsl
│   ├── test_cfg_loop_once.asm
│   ├── test_cfg_loop_once.glsl
│   ├── test_cfg_switch_complex.asm
│   ├── test_cfg_switch_complex.glsl
│   ├── test_cfg_switch_simple.asm
│   ├── test_cfg_switch_simple.glsl
│   ├── test_convert_f_to_f.asm
│   ├── test_convert_f_to_f.glsl
│   ├── test_convert_f_to_i.asm
│   ├── test_convert_f_to_i.glsl
│   ├── test_convert_i_to_f.asm
│   ├── test_convert_i_to_f.glsl
│   ├── test_convert_i_to_i.asm
│   ├── test_convert_i_to_i.glsl
│   ├── test_io_cs_builtins.asm
│   ├── test_io_cs_builtins.glsl
│   ├── test_io_ds_isoline.asm
│   ├── test_io_ds_isoline.glsl
│   ├── test_io_ds_quad.asm
│   ├── test_io_ds_quad.glsl
│   ├── test_io_ds_triangle.asm
│   ├── test_io_ds_triangle.glsl
│   ├── test_io_gs_basic_line.asm
│   ├── test_io_gs_basic_line.glsl
│   ├── test_io_gs_basic_line_adj.asm
│   ├── test_io_gs_basic_line_adj.glsl
│   ├── test_io_gs_basic_point.asm
│   ├── test_io_gs_basic_point.glsl
│   ├── test_io_gs_basic_triangle.asm
│   ├── test_io_gs_basic_triangle.glsl
│   ├── test_io_gs_basic_triangle_adj.asm
│   ├── test_io_gs_basic_triangle_adj.glsl
│   ├── test_io_gs_instanced.asm
│   ├── test_io_gs_instanced.glsl
│   ├── test_io_gs_multi_stream_xfb_raster_0.asm
│   ├── test_io_gs_multi_stream_xfb_raster_0.glsl
│   ├── test_io_gs_multi_stream_xfb_raster_1.asm
│   ├── test_io_gs_multi_stream_xfb_raster_1.glsl
│   ├── test_io_gs_xfb.asm
│   ├── test_io_gs_xfb.glsl
│   ├── test_io_hs_line.asm
│   ├── test_io_hs_line.glsl
│   ├── test_io_hs_point.asm
│   ├── test_io_hs_point.glsl
│   ├── test_io_hs_triangle_ccw.asm
│   ├── test_io_hs_triangle_ccw.glsl
│   ├── test_io_hs_triangle_cw.asm
│   ├── test_io_hs_triangle_cw.glsl
│   ├── test_io_ps_builtins.asm
│   ├── test_io_ps_builtins.glsl
│   ├── test_io_ps_export_depth.asm
│   ├── test_io_ps_export_depth.glsl
│   ├── test_io_ps_export_depth_greater.asm
│   ├── test_io_ps_export_depth_greater.glsl
│   ├── test_io_ps_export_depth_less.asm
│   ├── test_io_ps_export_depth_less.glsl
│   ├── test_io_ps_export_stencil.asm
│   ├── test_io_ps_export_stencil.glsl
│   ├── test_io_ps_fully_covered.asm
│   ├── test_io_ps_fully_covered.glsl
│   ├── test_io_ps_interpolate_centroid.asm
│   ├── test_io_ps_interpolate_centroid.glsl
│   ├── test_io_ps_interpolate_offset.asm
│   ├── test_io_ps_interpolate_offset.glsl
│   ├── test_io_ps_interpolate_sample.asm
│   ├── test_io_ps_interpolate_sample.glsl
│   ├── test_io_vs.asm
│   ├── test_io_vs.glsl
│   ├── test_io_vs_clip_cull_dist.asm
│   ├── test_io_vs_clip_cull_dist.glsl
│   ├── test_io_vs_clip_dist.asm
│   ├── test_io_vs_clip_dist.glsl
│   ├── test_io_vs_cull_dist.asm
│   ├── test_io_vs_cull_dist.glsl
│   ├── test_io_vs_instance_id.asm
│   ├── test_io_vs_instance_id.glsl
│   ├── test_io_vs_layer.asm
│   ├── test_io_vs_layer.glsl
│   ├── test_io_vs_vertex_id.asm
│   ├── test_io_vs_vertex_id.glsl
│   ├── test_io_vs_viewport.asm
│   ├── test_io_vs_viewport.glsl
│   ├── test_misc_constant_load.asm
│   ├── test_misc_constant_load.glsl
│   ├── test_misc_function.asm
│   ├── test_misc_function.glsl
│   ├── test_misc_function_with_args.asm
│   ├── test_misc_function_with_args.glsl
│   ├── test_misc_function_with_return.asm
│   ├── test_misc_function_with_return.glsl
│   ├── test_misc_function_with_undef.asm
│   ├── test_misc_function_with_undef.glsl
│   ├── test_misc_lds.asm
│   ├── test_misc_lds.glsl
│   ├── test_misc_lds_atomic.asm
│   ├── test_misc_lds_atomic.glsl
│   ├── test_misc_ps_demote.asm
│   ├── test_misc_ps_demote.glsl
│   ├── test_misc_ps_early_z.asm
│   ├── test_misc_ps_early_z.glsl
│   ├── test_misc_scratch.asm
│   ├── test_misc_scratch.glsl
│   ├── test_resource_rov.asm
│   ├── test_resource_rov.glsl
│   ├── test_resource_srv_buffer_load_sparse_feedback.asm
│   ├── test_resource_srv_buffer_load_sparse_feedback.glsl
│   ├── test_resource_srv_image_1d_array_load.asm
│   ├── test_resource_srv_image_1d_array_load.glsl
│   ├── test_resource_srv_image_1d_array_query.asm
│   ├── test_resource_srv_image_1d_array_query.glsl
│   ├── test_resource_srv_image_1d_array_sample.asm
│   ├── test_resource_srv_image_1d_array_sample.glsl
│   ├── test_resource_srv_image_1d_load.asm
│   ├── test_resource_srv_image_1d_load.glsl
│   ├── test_resource_srv_image_1d_query.asm
│   ├── test_resource_srv_image_1d_query.glsl
│   ├── test_resource_srv_image_1d_sample.asm
│   ├── test_resource_srv_image_1d_sample.glsl
│   ├── test_resource_srv_image_2d_array_gather.asm
│   ├── test_resource_srv_image_2d_array_gather.glsl
│   ├── test_resource_srv_image_2d_array_gather_depth.asm
│   ├── test_resource_srv_image_2d_array_gather_depth.glsl
│   ├── test_resource_srv_image_2d_array_load.asm
│   ├── test_resource_srv_image_2d_array_load.glsl
│   ├── test_resource_srv_image_2d_array_query.asm
│   ├── test_resource_srv_image_2d_array_query.glsl
│   ├── test_resource_srv_image_2d_array_sample.asm
│   ├── test_resource_srv_image_2d_array_sample.glsl
│   ├── test_resource_srv_image_2d_array_sample_depth.asm
│   ├── test_resource_srv_image_2d_array_sample_depth.glsl
│   ├── test_resource_srv_image_2d_gather.asm
│   ├── test_resource_srv_image_2d_gather.glsl
│   ├── test_resource_srv_image_2d_gather_depth.asm
│   ├── test_resource_srv_image_2d_gather_depth.glsl
│   ├── test_resource_srv_image_2d_load.asm
│   ├── test_resource_srv_image_2d_load.glsl
│   ├── test_resource_srv_image_2d_ms_array_load.asm
│   ├── test_resource_srv_image_2d_ms_array_load.glsl
│   ├── test_resource_srv_image_2d_ms_array_query.asm
│   ├── test_resource_srv_image_2d_ms_array_query.glsl
│   ├── test_resource_srv_image_2d_ms_load.asm
│   ├── test_resource_srv_image_2d_ms_load.glsl
│   ├── test_resource_srv_image_2d_ms_query.asm
│   ├── test_resource_srv_image_2d_ms_query.glsl
│   ├── test_resource_srv_image_2d_query.asm
│   ├── test_resource_srv_image_2d_query.glsl
│   ├── test_resource_srv_image_2d_sample.asm
│   ├── test_resource_srv_image_2d_sample.glsl
│   ├── test_resource_srv_image_2d_sample_depth.asm
│   ├── test_resource_srv_image_2d_sample_depth.glsl
│   ├── test_resource_srv_image_3d_load.asm
│   ├── test_resource_srv_image_3d_load.glsl
│   ├── test_resource_srv_image_3d_query.asm
│   ├── test_resource_srv_image_3d_query.glsl
│   ├── test_resource_srv_image_3d_sample.asm
│   ├── test_resource_srv_image_3d_sample.glsl
│   ├── test_resource_srv_image_cube_array_gather.asm
│   ├── test_resource_srv_image_cube_array_gather.glsl
│   ├── test_resource_srv_image_cube_array_gather_depth.asm
│   ├── test_resource_srv_image_cube_array_gather_depth.glsl
│   ├── test_resource_srv_image_cube_array_query.asm
│   ├── test_resource_srv_image_cube_array_query.glsl
│   ├── test_resource_srv_image_cube_array_sample.asm
│   ├── test_resource_srv_image_cube_array_sample.glsl
│   ├── test_resource_srv_image_cube_array_sample_depth.asm
│   ├── test_resource_srv_image_cube_array_sample_depth.glsl
│   ├── test_resource_srv_image_cube_gather.asm
│   ├── test_resource_srv_image_cube_gather.glsl
│   ├── test_resource_srv_image_cube_gather_depth.asm
│   ├── test_resource_srv_image_cube_gather_depth.glsl
│   ├── test_resource_srv_image_cube_query.asm
│   ├── test_resource_srv_image_cube_query.glsl
│   ├── test_resource_srv_image_cube_sample.asm
│   ├── test_resource_srv_image_cube_sample.glsl
│   ├── test_resource_srv_image_cube_sample_depth.asm
│   ├── test_resource_srv_image_cube_sample_depth.glsl
│   ├── test_resource_srv_image_gather_depth_sparse_feedback.asm
│   ├── test_resource_srv_image_gather_depth_sparse_feedback.glsl
│   ├── test_resource_srv_image_gather_sparse_feedback.asm
│   ├── test_resource_srv_image_gather_sparse_feedback.glsl
│   ├── test_resource_srv_image_load_sparse_feedback.asm
│   ├── test_resource_srv_image_load_sparse_feedback.glsl
│   ├── test_resource_srv_image_sample_depth_sparse_feedback.asm
│   ├── test_resource_srv_image_sample_depth_sparse_feedback.glsl
│   ├── test_resource_srv_image_sample_sparse_feedback.asm
│   ├── test_resource_srv_image_sample_sparse_feedback.glsl
│   ├── test_resource_srv_indexed_image_1d_array_load.asm
│   ├── test_resource_srv_indexed_image_1d_array_load.glsl
│   ├── test_resource_srv_indexed_image_1d_array_query.asm
│   ├── test_resource_srv_indexed_image_1d_array_query.glsl
│   ├── test_resource_srv_indexed_image_1d_array_sample.asm
│   ├── test_resource_srv_indexed_image_1d_array_sample.glsl
│   ├── test_resource_srv_indexed_image_1d_load.asm
│   ├── test_resource_srv_indexed_image_1d_load.glsl
│   ├── test_resource_srv_indexed_image_1d_query.asm
│   ├── test_resource_srv_indexed_image_1d_query.glsl
│   ├── test_resource_srv_indexed_image_1d_sample.asm
│   ├── test_resource_srv_indexed_image_1d_sample.glsl
│   ├── test_resource_srv_indexed_image_2d_array_gather.asm
│   ├── test_resource_srv_indexed_image_2d_array_gather.glsl
│   ├── test_resource_srv_indexed_image_2d_array_gather_depth.asm
│   ├── test_resource_srv_indexed_image_2d_array_gather_depth.glsl
│   ├── test_resource_srv_indexed_image_2d_array_load.asm
│   ├── test_resource_srv_indexed_image_2d_array_load.glsl
│   ├── test_resource_srv_indexed_image_2d_array_query.asm
│   ├── test_resource_srv_indexed_image_2d_array_query.glsl
│   ├── test_resource_srv_indexed_image_2d_array_sample.asm
│   ├── test_resource_srv_indexed_image_2d_array_sample.glsl
│   ├── test_resource_srv_indexed_image_2d_array_sample_depth.asm
│   ├── test_resource_srv_indexed_image_2d_array_sample_depth.glsl
│   ├── test_resource_srv_indexed_image_2d_gather.asm
│   ├── test_resource_srv_indexed_image_2d_gather.glsl
│   ├── test_resource_srv_indexed_image_2d_gather_depth.asm
│   ├── test_resource_srv_indexed_image_2d_gather_depth.glsl
│   ├── test_resource_srv_indexed_image_2d_load.asm
│   ├── test_resource_srv_indexed_image_2d_load.glsl
│   ├── test_resource_srv_indexed_image_2d_ms_array_load.asm
│   ├── test_resource_srv_indexed_image_2d_ms_array_load.glsl
│   ├── test_resource_srv_indexed_image_2d_ms_array_query.asm
│   ├── test_resource_srv_indexed_image_2d_ms_array_query.glsl
│   ├── test_resource_srv_indexed_image_2d_ms_load.asm
│   ├── test_resource_srv_indexed_image_2d_ms_load.glsl
│   ├── test_resource_srv_indexed_image_2d_ms_query.asm
│   ├── test_resource_srv_indexed_image_2d_ms_query.glsl
│   ├── test_resource_srv_indexed_image_2d_query.asm
│   ├── test_resource_srv_indexed_image_2d_query.glsl
│   ├── test_resource_srv_indexed_image_2d_sample.asm
│   ├── test_resource_srv_indexed_image_2d_sample.glsl
│   ├── test_resource_srv_indexed_image_2d_sample_depth.asm
│   ├── test_resource_srv_indexed_image_2d_sample_depth.glsl
│   ├── test_resource_srv_indexed_image_3d_load.asm
│   ├── test_resource_srv_indexed_image_3d_load.glsl
│   ├── test_resource_srv_indexed_image_3d_query.asm
│   ├── test_resource_srv_indexed_image_3d_query.glsl
│   ├── test_resource_srv_indexed_image_3d_sample.asm
│   ├── test_resource_srv_indexed_image_3d_sample.glsl
│   ├── test_resource_srv_indexed_image_cube_array_gather.asm
│   ├── test_resource_srv_indexed_image_cube_array_gather.glsl
│   ├── test_resource_srv_indexed_image_cube_array_gather_depth.asm
│   ├── test_resource_srv_indexed_image_cube_array_gather_depth.glsl
│   ├── test_resource_srv_indexed_image_cube_array_query.asm
│   ├── test_resource_srv_indexed_image_cube_array_query.glsl
│   ├── test_resource_srv_indexed_image_cube_array_sample.asm
│   ├── test_resource_srv_indexed_image_cube_array_sample.glsl
│   ├── test_resource_srv_indexed_image_cube_array_sample_depth.asm
│   ├── test_resource_srv_indexed_image_cube_array_sample_depth.glsl
│   ├── test_resource_srv_indexed_image_cube_gather.asm
│   ├── test_resource_srv_indexed_image_cube_gather.glsl
│   ├── test_resource_srv_indexed_image_cube_gather_depth.asm
│   ├── test_resource_srv_indexed_image_cube_gather_depth.glsl
│   ├── test_resource_srv_indexed_image_cube_query.asm
│   ├── test_resource_srv_indexed_image_cube_query.glsl
│   ├── test_resource_srv_indexed_image_cube_sample.asm
│   ├── test_resource_srv_indexed_image_cube_sample.glsl
│   ├── test_resource_srv_indexed_image_cube_sample_depth.asm
│   ├── test_resource_srv_indexed_image_cube_sample_depth.glsl
│   ├── test_resource_uav_buffer_load_sparse_feedback.asm
│   ├── test_resource_uav_buffer_load_sparse_feedback.glsl
│   ├── test_resource_uav_counter.asm
│   ├── test_resource_uav_counter.glsl
│   ├── test_resource_uav_counter_indexed.asm
│   ├── test_resource_uav_counter_indexed.glsl
│   ├── test_resource_uav_image_1d_array_atomic.asm
│   ├── test_resource_uav_image_1d_array_atomic.glsl
│   ├── test_resource_uav_image_1d_array_load.asm
│   ├── test_resource_uav_image_1d_array_load.glsl
│   ├── test_resource_uav_image_1d_array_query.asm
│   ├── test_resource_uav_image_1d_array_query.glsl
│   ├── test_resource_uav_image_1d_array_store.asm
│   ├── test_resource_uav_image_1d_array_store.glsl
│   ├── test_resource_uav_image_1d_atomic.asm
│   ├── test_resource_uav_image_1d_atomic.glsl
│   ├── test_resource_uav_image_1d_load.asm
│   ├── test_resource_uav_image_1d_load.glsl
│   ├── test_resource_uav_image_1d_query.asm
│   ├── test_resource_uav_image_1d_query.glsl
│   ├── test_resource_uav_image_1d_store.asm
│   ├── test_resource_uav_image_1d_store.glsl
│   ├── test_resource_uav_image_2d_array_atomic.asm
│   ├── test_resource_uav_image_2d_array_atomic.glsl
│   ├── test_resource_uav_image_2d_array_load.asm
│   ├── test_resource_uav_image_2d_array_load.glsl
│   ├── test_resource_uav_image_2d_array_query.asm
│   ├── test_resource_uav_image_2d_array_query.glsl
│   ├── test_resource_uav_image_2d_array_store.asm
│   ├── test_resource_uav_image_2d_array_store.glsl
│   ├── test_resource_uav_image_2d_atomic.asm
│   ├── test_resource_uav_image_2d_atomic.glsl
│   ├── test_resource_uav_image_2d_load.asm
│   ├── test_resource_uav_image_2d_load.glsl
│   ├── test_resource_uav_image_2d_load_precise.asm
│   ├── test_resource_uav_image_2d_load_precise.glsl
│   ├── test_resource_uav_image_2d_query.asm
│   ├── test_resource_uav_image_2d_query.glsl
│   ├── test_resource_uav_image_2d_store.asm
│   ├── test_resource_uav_image_2d_store.glsl
│   ├── test_resource_uav_image_3d_atomic.asm
│   ├── test_resource_uav_image_3d_atomic.glsl
│   ├── test_resource_uav_image_3d_load.asm
│   ├── test_resource_uav_image_3d_load.glsl
│   ├── test_resource_uav_image_3d_query.asm
│   ├── test_resource_uav_image_3d_query.glsl
│   ├── test_resource_uav_image_3d_store.asm
│   ├── test_resource_uav_image_3d_store.glsl
│   ├── test_resource_uav_image_load_sparse_feedback.asm
│   ├── test_resource_uav_image_load_sparse_feedback.glsl
│   ├── test_resource_uav_indexed_image_1d_array_atomic.asm
│   ├── test_resource_uav_indexed_image_1d_array_atomic.glsl
│   ├── test_resource_uav_indexed_image_1d_array_load.asm
│   ├── test_resource_uav_indexed_image_1d_array_load.glsl
│   ├── test_resource_uav_indexed_image_1d_array_query.asm
│   ├── test_resource_uav_indexed_image_1d_array_query.glsl
│   ├── test_resource_uav_indexed_image_1d_array_store.asm
│   ├── test_resource_uav_indexed_image_1d_array_store.glsl
│   ├── test_resource_uav_indexed_image_1d_atomic.asm
│   ├── test_resource_uav_indexed_image_1d_atomic.glsl
│   ├── test_resource_uav_indexed_image_1d_load.asm
│   ├── test_resource_uav_indexed_image_1d_load.glsl
│   ├── test_resource_uav_indexed_image_1d_query.asm
│   ├── test_resource_uav_indexed_image_1d_query.glsl
│   ├── test_resource_uav_indexed_image_1d_store.asm
│   ├── test_resource_uav_indexed_image_1d_store.glsl
│   ├── test_resource_uav_indexed_image_2d_array_atomic.asm
│   ├── test_resource_uav_indexed_image_2d_array_atomic.glsl
│   ├── test_resource_uav_indexed_image_2d_array_load.asm
│   ├── test_resource_uav_indexed_image_2d_array_load.glsl
│   ├── test_resource_uav_indexed_image_2d_array_query.asm
│   ├── test_resource_uav_indexed_image_2d_array_query.glsl
│   ├── test_resource_uav_indexed_image_2d_array_store.asm
│   ├── test_resource_uav_indexed_image_2d_array_store.glsl
│   ├── test_resource_uav_indexed_image_2d_atomic.asm
│   ├── test_resource_uav_indexed_image_2d_atomic.glsl
│   ├── test_resource_uav_indexed_image_2d_load.asm
│   ├── test_resource_uav_indexed_image_2d_load.glsl
│   ├── test_resource_uav_indexed_image_2d_query.asm
│   ├── test_resource_uav_indexed_image_2d_query.glsl
│   ├── test_resource_uav_indexed_image_2d_store.asm
│   ├── test_resource_uav_indexed_image_2d_store.glsl
│   ├── test_resource_uav_indexed_image_3d_atomic.asm
│   ├── test_resource_uav_indexed_image_3d_atomic.glsl
│   ├── test_resource_uav_indexed_image_3d_load.asm
│   ├── test_resource_uav_indexed_image_3d_load.glsl
│   ├── test_resource_uav_indexed_image_3d_query.asm
│   ├── test_resource_uav_indexed_image_3d_query.glsl
│   ├── test_resource_uav_indexed_image_3d_store.asm
│   ├── test_resource_uav_indexed_image_3d_store.glsl
│   ├── test_resources_cbv.asm
│   ├── test_resources_cbv.glsl
│   ├── test_resources_cbv_dynamic.asm
│   ├── test_resources_cbv_dynamic.glsl
│   ├── test_resources_cbv_indexed.asm
│   ├── test_resources_cbv_indexed.glsl
│   ├── test_resources_cbv_indexed_nonuniform.asm
│   ├── test_resources_cbv_indexed_nonuniform.glsl
│   ├── test_resources_srv_buffer_raw_load.asm
│   ├── test_resources_srv_buffer_raw_load.glsl
│   ├── test_resources_srv_buffer_raw_query.asm
│   ├── test_resources_srv_buffer_raw_query.glsl
│   ├── test_resources_srv_buffer_structured_load.asm
│   ├── test_resources_srv_buffer_structured_load.glsl
│   ├── test_resources_srv_buffer_structured_query.asm
│   ├── test_resources_srv_buffer_structured_query.glsl
│   ├── test_resources_srv_buffer_typed_load.asm
│   ├── test_resources_srv_buffer_typed_load.glsl
│   ├── test_resources_srv_buffer_typed_query.asm
│   ├── test_resources_srv_buffer_typed_query.glsl
│   ├── test_resources_srv_indexed_buffer_raw_load.asm
│   ├── test_resources_srv_indexed_buffer_raw_load.glsl
│   ├── test_resources_srv_indexed_buffer_raw_query.asm
│   ├── test_resources_srv_indexed_buffer_raw_query.glsl
│   ├── test_resources_srv_indexed_buffer_structured_load.asm
│   ├── test_resources_srv_indexed_buffer_structured_load.glsl
│   ├── test_resources_srv_indexed_buffer_structured_query.asm
│   ├── test_resources_srv_indexed_buffer_structured_query.glsl
│   ├── test_resources_srv_indexed_buffer_typed_load.asm
│   ├── test_resources_srv_indexed_buffer_typed_load.glsl
│   ├── test_resources_srv_indexed_buffer_typed_query.asm
│   ├── test_resources_srv_indexed_buffer_typed_query.glsl
│   ├── test_resources_uav_buffer_raw_atomic.asm
│   ├── test_resources_uav_buffer_raw_atomic.glsl
│   ├── test_resources_uav_buffer_raw_load.asm
│   ├── test_resources_uav_buffer_raw_load.glsl
│   ├── test_resources_uav_buffer_raw_load_precise.asm
│   ├── test_resources_uav_buffer_raw_load_precise.glsl
│   ├── test_resources_uav_buffer_raw_query.asm
│   ├── test_resources_uav_buffer_raw_query.glsl
│   ├── test_resources_uav_buffer_raw_store.asm
│   ├── test_resources_uav_buffer_raw_store.glsl
│   ├── test_resources_uav_buffer_structured_atomic.asm
│   ├── test_resources_uav_buffer_structured_atomic.glsl
│   ├── test_resources_uav_buffer_structured_load.asm
│   ├── test_resources_uav_buffer_structured_load.glsl
│   ├── test_resources_uav_buffer_structured_load_precise.asm
│   ├── test_resources_uav_buffer_structured_load_precise.glsl
│   ├── test_resources_uav_buffer_structured_query.asm
│   ├── test_resources_uav_buffer_structured_query.glsl
│   ├── test_resources_uav_buffer_structured_store.asm
│   ├── test_resources_uav_buffer_structured_store.glsl
│   ├── test_resources_uav_buffer_typed_atomic.asm
│   ├── test_resources_uav_buffer_typed_atomic.glsl
│   ├── test_resources_uav_buffer_typed_load.asm
│   ├── test_resources_uav_buffer_typed_load.glsl
│   ├── test_resources_uav_buffer_typed_load_precise.asm
│   ├── test_resources_uav_buffer_typed_load_precise.glsl
│   ├── test_resources_uav_buffer_typed_query.asm
│   ├── test_resources_uav_buffer_typed_query.glsl
│   ├── test_resources_uav_buffer_typed_store.asm
│   ├── test_resources_uav_buffer_typed_store.glsl
│   ├── test_resources_uav_indexed_buffer_raw_atomic.asm
│   ├── test_resources_uav_indexed_buffer_raw_atomic.glsl
│   ├── test_resources_uav_indexed_buffer_raw_load.asm
│   ├── test_resources_uav_indexed_buffer_raw_load.glsl
│   ├── test_resources_uav_indexed_buffer_raw_query.asm
│   ├── test_resources_uav_indexed_buffer_raw_query.glsl
│   ├── test_resources_uav_indexed_buffer_raw_store.asm
│   ├── test_resources_uav_indexed_buffer_raw_store.glsl
│   ├── test_resources_uav_indexed_buffer_structured_atomic.asm
│   ├── test_resources_uav_indexed_buffer_structured_atomic.glsl
│   ├── test_resources_uav_indexed_buffer_structured_load.asm
│   ├── test_resources_uav_indexed_buffer_structured_load.glsl
│   ├── test_resources_uav_indexed_buffer_structured_query.asm
│   ├── test_resources_uav_indexed_buffer_structured_query.glsl
│   ├── test_resources_uav_indexed_buffer_structured_store.asm
│   ├── test_resources_uav_indexed_buffer_structured_store.glsl
│   ├── test_resources_uav_indexed_buffer_typed_atomic.asm
│   ├── test_resources_uav_indexed_buffer_typed_atomic.glsl
│   ├── test_resources_uav_indexed_buffer_typed_load.asm
│   ├── test_resources_uav_indexed_buffer_typed_load.glsl
│   ├── test_resources_uav_indexed_buffer_typed_query.asm
│   ├── test_resources_uav_indexed_buffer_typed_query.glsl
│   ├── test_resources_uav_indexed_buffer_typed_store.asm
│   └── test_resources_uav_indexed_buffer_typed_store.glsl
├── roundtrip_shaders.py
├── scratch_pool.hpp
├── shaders/
│   ├── ags/
│   │   ├── ags.ssbo.comp
│   │   ├── ags_shader_intrinsics_dx12.inc
│   │   ├── cs_constexpr_wmma_gep.sm66.full-wmma.ssbo.comp
│   │   ├── cs_constexpr_wmma_gep.sm66.ssbo.comp
│   │   ├── cs_wmma_alloca.sm66.ssbo.comp
│   │   ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.comp
│   │   ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.full-wmma.comp
│   │   ├── cs_wmma_extract_insert.sm66.ssbo.full-wmma.comp
│   │   ├── cs_wmma_f32_16x16x16_f16_quant_f16.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_at.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_bt.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_ct.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_f16_quant_f16_ot.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_f16_quant_fp8.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.full-wmma.comp
│   │   ├── cs_wmma_f32_16x16x16_fp8_quant_f16.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided_transpose.sm66.ssbo.comp
│   │   ├── cs_wmma_f32_16x16x16_fp8_quant_f32.sm66.ssbo.comp
│   │   ├── cs_wmma_fp16_fp8_conversions.sm66.ssbo.full-wmma.comp
│   │   ├── cs_wmma_fp32_fp16_conversions.sm66.ssbo.full-wmma.comp
│   │   ├── cs_wmma_fp32_fp8_conversions.sm66.ssbo.nv-coopmat2.comp
│   │   ├── cs_wmma_fp8_fp32_conversions.sm66.ssbo.full-wmma.comp
│   │   ├── cs_wmma_lds_transpose.sm66.ssbo.comp
│   │   ├── cs_wmma_matrix_length.sm66.ssbo.comp
│   │   ├── cs_wmma_store_phi.full-wmma.sm66.ssbo.comp
│   │   ├── cs_wmma_store_phi.sm66.ssbo.comp
│   │   └── wmma_ags.h
│   ├── alloca-opts/
│   │   ├── bad-stride.frag
│   │   ├── double-array-load.frag
│   │   ├── float4-array-load.bindless.frag
│   │   ├── float4-array-load.bindless.root-constants.frag
│   │   ├── float4-array-load.frag
│   │   ├── float4-array-load.root-constant.frag
│   │   ├── float4-array-load.root-descriptor.frag
│   │   ├── float4-array-load.root-descriptor.root-constants.frag
│   │   ├── load-different.frag
│   │   ├── local-root-constants.local-root-signature.rgen
│   │   ├── matrix-load.frag
│   │   ├── missing-first.frag
│   │   ├── missing-last-element.frag
│   │   ├── out-of-order-load.frag
│   │   ├── store-after-load.frag
│   │   └── uint4-array-load.frag
│   ├── asm/
│   │   ├── bfi.bc.dxil
│   │   ├── cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
│   │   ├── cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
│   │   ├── constant-struct-aggregate.bc.dxil
│   │   ├── control-flow-multi-break-with-non-idom-loop-header.dxil
│   │   ├── ibfe.bc.dxil
│   │   └── ubfe.bc.dxil
│   ├── auto-barrier/
│   │   ├── complex-loop.auto-group-shared-barrier.comp
│   │   ├── inner-to-inner.auto-group-shared-barrier.comp
│   │   ├── inner-to-outer.auto-group-shared-barrier.comp
│   │   ├── outer-to-inner.auto-group-shared-barrier.comp
│   │   ├── single-block-loop.auto-group-shared-barrier.comp
│   │   └── single-block.auto-group-shared-barrier.comp
│   ├── control-flow/
│   │   ├── branch-return-2.comp
│   │   ├── branch-return.comp
│   │   ├── branch.comp
│   │   ├── conditional-break-into-if-else-if-ladder-2.comp
│   │   ├── conditional-break-into-if-else-if-ladder.comp
│   │   ├── dual-inner-loop-early-return.comp
│   │   ├── if-else-if-into-continue.comp
│   │   ├── inner-loop-early-return.comp
│   │   ├── interleaved-unrolled-loop-breaks.comp
│   │   ├── loop-break-2.comp
│   │   ├── loop-break.comp
│   │   ├── loop-continue-2.comp
│   │   ├── loop-continue-3.comp
│   │   ├── loop-continue.comp
│   │   ├── loop-inside-infinite-loop-2.frag
│   │   ├── loop-inside-infinite-loop.frag
│   │   ├── loop-return.comp
│   │   ├── loop.comp
│   │   ├── nested-loop-break-2.comp
│   │   ├── nested-loop-break.comp
│   │   ├── nested-loop.comp
│   │   ├── selection-merge-split-post-domination.frag
│   │   ├── switch-continue.frag
│   │   ├── switch-merge-into-other-merge.comp
│   │   ├── switch-shared-header-with-loop.comp
│   │   └── wave-size-dependent-loop-unroll.comp
│   ├── descriptor_qa/
│   │   ├── acceleration-structure.bindless.descriptor-qa.rgen
│   │   ├── acceleration-structure.bindless.descriptor-qa.sm66.rgen
│   │   ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.descriptor-qa.rgen
│   │   ├── descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp
│   │   ├── descriptor_qa.bindless.descriptor-qa.comp
│   │   ├── descriptor_qa.bindless.descriptor-qa.sm66.comp
│   │   ├── descriptor_qa.bindless.ssbo.descriptor-qa.comp
│   │   ├── early-2.bindless.descriptor-qa.frag
│   │   ├── early-3.bindless.descriptor-qa.frag
│   │   ├── early-4.bindless.descriptor-qa.frag
│   │   ├── early-5.bindless.descriptor-qa.frag
│   │   ├── early-heap.descriptor-qa.sm66.frag
│   │   └── early.bindless.descriptor-qa.frag
│   ├── dxil-builtin/
│   │   ├── accept-hit-and-end-search-ignore-hit.rany
│   │   ├── acos.frag
│   │   ├── asin.frag
│   │   ├── atan.frag
│   │   ├── atomic-bin-op.bindless.root-constant.frag
│   │   ├── atomic-bin-op.frag
│   │   ├── atomic-bin-op.root-descriptor.comp
│   │   ├── atomic-bin-op.ssbo.frag
│   │   ├── atomic-compare-exchange.frag
│   │   ├── atomic-compare-exchange.root-descriptor.comp
│   │   ├── atomic-compare-exchange.ssbo.frag
│   │   ├── attributes.denorm-ftz.comp
│   │   ├── attributes.denorm-preserve.comp
│   │   ├── barrier.comp
│   │   ├── barycentrics-2.frag
│   │   ├── barycentrics.frag
│   │   ├── bfrev.frag
│   │   ├── bitcount-bitrev-sizes.ssbo.comp
│   │   ├── buffer-load-feedback.frag
│   │   ├── buffer-load-signed-feedback.frag
│   │   ├── buffer-load-signed.frag
│   │   ├── buffer-load.frag
│   │   ├── buffer-load.ssbo.frag
│   │   ├── buffer-store-signed.frag
│   │   ├── buffer-store.frag
│   │   ├── buffer-store.ssbo.frag
│   │   ├── buffer-update-counter.frag
│   │   ├── calculate-lod.frag
│   │   ├── call-shader.rgen
│   │   ├── clip.demote-to-helper.frag
│   │   ├── clip.frag
│   │   ├── compute-shader-derivatives-cube-array.noderivs.sm66.ssbo.comp
│   │   ├── compute-shader-derivatives-cube.noderivs.sm66.ssbo.comp
│   │   ├── compute-shader-derivatives-single-thread.sm66.ssbo.comp
│   │   ├── compute-shader-derivatives.noderivs.sm66.ssbo.comp
│   │   ├── cos.frag
│   │   ├── countbits.frag
│   │   ├── coverage.frag
│   │   ├── derivative.frag
│   │   ├── derivative.sm60.frag
│   │   ├── derivative.sm60.native-fp16.frag
│   │   ├── derivatives.sm66.comp
│   │   ├── discard.demote-to-helper.frag
│   │   ├── discard.frag
│   │   ├── dispatch-rays-dimensions.rgen
│   │   ├── dispatch-rays-index.rgen
│   │   ├── dot2.frag
│   │   ├── dot3.frag
│   │   ├── dot4.frag
│   │   ├── eval-centroid.frag
│   │   ├── eval-sample-index.frag
│   │   ├── eval-snapped.frag
│   │   ├── exp.frag
│   │   ├── f16-to-f32.frag
│   │   ├── f32-to-f16.frag
│   │   ├── fabs.frag
│   │   ├── firstbithi-16.sm62.frag
│   │   ├── firstbithi-64.frag
│   │   ├── firstbithi.frag
│   │   ├── firstbitlo-16.sm62.frag
│   │   ├── firstbitlo-64.frag
│   │   ├── firstbitlo.frag
│   │   ├── firstbitshi-16.sm62.frag
│   │   ├── firstbitshi-64.frag
│   │   ├── firstbitshi.frag
│   │   ├── flattened_thread_id_in_group.comp
│   │   ├── fma.frag
│   │   ├── fmad-precise.frag
│   │   ├── fmad.frag
│   │   ├── fmax.frag
│   │   ├── fmin.frag
│   │   ├── frc.frag
│   │   ├── get-dimensions-w-only.frag
│   │   ├── get-dimensions-xyz-only.frag
│   │   ├── get-dimensions.bindless.root-constant.frag
│   │   ├── get-dimensions.bindless.root-constant.ssbo.frag
│   │   ├── get-dimensions.frag
│   │   ├── get-dimensions.ssbo.frag
│   │   ├── group_id.comp
│   │   ├── hcos.frag
│   │   ├── hsin.frag
│   │   ├── htan.frag
│   │   ├── imad.frag
│   │   ├── imax.frag
│   │   ├── imin.frag
│   │   ├── instance-id.vert
│   │   ├── is-helper-lane-2.demote-to-helper.sm66.frag
│   │   ├── is-helper-lane-2.sm66.frag
│   │   ├── is-helper-lane.demote-to-helper.sm66.frag
│   │   ├── is-helper-lane.sm66.frag
│   │   ├── isfinite.frag
│   │   ├── isinf.frag
│   │   ├── isnan.frag
│   │   ├── log.frag
│   │   ├── make-double.frag
│   │   ├── msaa-uav.sm67.comp
│   │   ├── msad.comp
│   │   ├── object-ray-direction.rany
│   │   ├── object-ray-origin.rany
│   │   ├── object-to-world-3x4.rany
│   │   ├── object-to-world-4x3.rany
│   │   ├── pack-unpack.ssbo.sm66.comp
│   │   ├── quad-all-any.sm67.comp
│   │   ├── quad-all-any.sm67.quad-maximal-reconvergence.noglsl.comp
│   │   ├── quad-read-at-2d.comp
│   │   ├── quad-read-at-2d.sm66.comp
│   │   ├── quad-read-at.comp
│   │   ├── quad-read-at.frag
│   │   ├── quad-swap.comp
│   │   ├── quad-swap.frag
│   │   ├── raw-gather-offset-sparse.sm67.ssbo.comp
│   │   ├── raw-gather-offset.sm67.ssbo.comp
│   │   ├── raw-gather-sparse.sm67.ssbo.comp
│   │   ├── raw-gather.sm67.ssbo.comp
│   │   ├── ray-query-phi-multi.invalid.sm66.comp
│   │   ├── ray-query-phi-simple.sm66.comp
│   │   ├── ray-query-select-multi.invalid.sm66.comp
│   │   ├── ray-query-select-simple.sm66.comp
│   │   ├── ray-query-store-multi.invalid.sm66.comp
│   │   ├── ray-query-store-simple.sm66.comp
│   │   ├── ray-query.comp
│   │   ├── ray-t-current.rany
│   │   ├── ray-t-min.rany
│   │   ├── render-target-sample-count.frag
│   │   ├── render-target-sample-position.frag
│   │   ├── report-hit.rint
│   │   ├── round-ne.frag
│   │   ├── round-ni.frag
│   │   ├── round-pi.frag
│   │   ├── round-z.frag
│   │   ├── rsqrt.frag
│   │   ├── rt-geometry-index.rany
│   │   ├── rt-hit-kind.rany
│   │   ├── rt-instance-id.rany
│   │   ├── rt-instance-index.rany
│   │   ├── rt-primitive-index.rany
│   │   ├── rt-ray-flags.rany
│   │   ├── sample-bias-feedback.frag
│   │   ├── sample-bias-offset.frag
│   │   ├── sample-bias.frag
│   │   ├── sample-cmp-bias-feedback.frag
│   │   ├── sample-cmp-bias-offset.frag
│   │   ├── sample-cmp-bias.frag
│   │   ├── sample-cmp-feedback.frag
│   │   ├── sample-cmp-grad-offset-feedback.frag
│   │   ├── sample-cmp-grad-offset.frag
│   │   ├── sample-cmp-grad.frag
│   │   ├── sample-cmp-level.sm67.noglsl.frag
│   │   ├── sample-cmp-levelzero.frag
│   │   ├── sample-cmp-offset-levelzero-feedback.frag
│   │   ├── sample-cmp-offset-levelzero.frag
│   │   ├── sample-cmp-offset.frag
│   │   ├── sample-cmp.frag
│   │   ├── sample-grad-offset-dynamic.noglsl.invalid.sm67.frag
│   │   ├── sample-grad-offset-feedback.frag
│   │   ├── sample-grad-offset.frag
│   │   ├── sample-grad.frag
│   │   ├── sample-id.frag
│   │   ├── sample-level-offset-feedback.frag
│   │   ├── sample-level-offset.frag
│   │   ├── sample-level.frag
│   │   ├── sample-offset-dynamic.noglsl.invalid.sm67.frag
│   │   ├── sample-offset.frag
│   │   ├── sample.frag
│   │   ├── saturate.frag
│   │   ├── sin.frag
│   │   ├── sm64-packed-arithmetic.ssbo.comp
│   │   ├── sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
│   │   ├── sm64-packed-arithmetic.ssbo.mixed-float-dot-product.noglsl.comp
│   │   ├── split-double.frag
│   │   ├── sqrt.frag
│   │   ├── tan.frag
│   │   ├── texture-gather-4offset.frag
│   │   ├── texture-gather-cmp-offset-feedback.frag
│   │   ├── texture-gather-cmp-offset.frag
│   │   ├── texture-gather-cmp.frag
│   │   ├── texture-gather-offset.frag
│   │   ├── texture-gather-signed-feedback.frag
│   │   ├── texture-gather-signed.frag
│   │   ├── texture-gather.frag
│   │   ├── texture-load-feedback.frag
│   │   ├── texture-load-offset-dynamic.sm67.frag
│   │   ├── texture-load-offset.frag
│   │   ├── texture-load-signed.frag
│   │   ├── texture-load.frag
│   │   ├── texture-store-signed.frag
│   │   ├── texture-store.frag
│   │   ├── texture2dms-sample-position.frag
│   │   ├── thread_id.comp
│   │   ├── thread_id_in_group.comp
│   │   ├── trace-ray-flags-2.rgen
│   │   ├── trace-ray-flags.rgen
│   │   ├── trace-ray.rgen
│   │   ├── umad.frag
│   │   ├── umax.frag
│   │   ├── umin.frag
│   │   ├── vertex-id.vert
│   │   ├── wave-active-all-true.comp
│   │   ├── wave-active-all-true.frag
│   │   ├── wave-active-any-true.comp
│   │   ├── wave-active-any-true.frag
│   │   ├── wave-active-ballot-discard.demote-to-helper.frag
│   │   ├── wave-active-ballot-discard.frag
│   │   ├── wave-active-ballot.comp
│   │   ├── wave-active-ballot.demote-to-helper.frag
│   │   ├── wave-active-ballot.frag
│   │   ├── wave-active-count-bits.comp
│   │   ├── wave-active-count-bits.frag
│   │   ├── wave-all-equal.comp
│   │   ├── wave-all-equal.frag
│   │   ├── wave-get-lane-count.comp
│   │   ├── wave-get-lane-index.comp
│   │   ├── wave-is-first-lane.comp
│   │   ├── wave-is-first-lane.frag
│   │   ├── wave-match.comp
│   │   ├── wave-match.frag
│   │   ├── wave-match.partitioned.noglsl.comp
│   │   ├── wave-match.partitioned.noglsl.frag
│   │   ├── wave-multi-prefix-count-bits.comp
│   │   ├── wave-multi-prefix-count-bits.frag
│   │   ├── wave-multi-prefix-op.comp
│   │   ├── wave-multi-prefix-op.frag
│   │   ├── wave-multi-prefix-op.partitioned.noglsl.comp
│   │   ├── wave-multi-prefix-op.partitioned.noglsl.frag
│   │   ├── wave-prefix.comp
│   │   ├── wave-prefix.frag
│   │   ├── wave-read-lane-at-optimizations.comp
│   │   ├── wave-read-lane-at.comp
│   │   ├── wave-read-lane-first.comp
│   │   ├── wave-read-lane-first.frag
│   │   ├── wave-reduce-helpers.sm67.frag
│   │   ├── wave-reduce-helpers.sm67.quad-maximal-reconvergence.frag
│   │   ├── wave-reduce.comp
│   │   ├── wave-reduce.frag
│   │   ├── wave-size.sm66.comp
│   │   ├── world-ray-direction.rany
│   │   ├── world-ray-origin.rany
│   │   ├── world-to-object-3x4.rany
│   │   └── world-to-object-4x3.rany
│   ├── fp16/
│   │   ├── saturate.frag
│   │   ├── saturate.sm60.frag
│   │   └── saturate.sm60.native-fp16.frag
│   ├── heap-robustness/
│   │   ├── misc.bindless.heap-raw-va-cbv.sm66.ssbo.comp
│   │   ├── misc.bindless.heap-robustness.heap-robustness-cbv.sm66.ssbo.comp
│   │   ├── misc.bindless.heap-robustness.sm66.ssbo.comp
│   │   ├── misc.bindless.sm66.ssbo.comp
│   │   └── misc.heap-robustness.bindless.heap-robustness-cbv.heap-raw-va-cbv.sm66.ssbo.comp
│   ├── instrumentation/
│   │   ├── atomics-raw.bindless.bda-instrumentation.ssbo.comp
│   │   ├── atomics-raw.root-descriptor.bda-instrumentation.ssbo.comp
│   │   ├── atomics-structured-counter.bindless.bda-instrumentation.ssbo.comp
│   │   ├── atomics-structured.bindless.bda-instrumentation.ssbo.comp
│   │   ├── atomics-structured.root-descriptor.bda-instrumentation.ssbo.comp
│   │   ├── atomics-typed.bindless.bda-instrumentation.ssbo.comp
│   │   ├── cbv.bindless.bda-instrumentation.comp
│   │   ├── cbv.root-descriptor.bda-instrumentation.comp
│   │   ├── raw.bindless.bda-instrumentation.ssbo.comp
│   │   ├── raw.root-descriptor.bda-instrumentation.ssbo.comp
│   │   ├── structured.bindless.bda-instrumentation.comp
│   │   ├── structured.bindless.bda-instrumentation.ssbo.comp
│   │   ├── structured.root-descriptor.bda-instrumentation.ssbo.comp
│   │   └── typed.bindless.bda-instrumentation.comp
│   ├── llvm-builtin/
│   │   ├── alloca-robustness-cases.extended-robustness.vert
│   │   ├── alloca.frag
│   │   ├── atomic-bin-op.comp
│   │   ├── atomic-compare-exchange.comp
│   │   ├── atomic-compare-exchange.sm66.ssbo.comp
│   │   ├── bool-to-fp.frag
│   │   ├── constant-expression-cast.comp
│   │   ├── constant-expression-gep.comp
│   │   ├── fadd.frag
│   │   ├── fast-mul-div-pair.comp
│   │   ├── fcmp_eq.frag
│   │   ├── fcmp_ge.frag
│   │   ├── fcmp_gt.frag
│   │   ├── fcmp_le.frag
│   │   ├── fcmp_lt.frag
│   │   ├── fcmp_ne.frag
│   │   ├── fdiv.frag
│   │   ├── fmul.frag
│   │   ├── frem.frag
│   │   ├── fsub.frag
│   │   ├── glitched-integer-width.comp
│   │   ├── groupshared.comp
│   │   ├── icmp_eq.frag
│   │   ├── icmp_ne.frag
│   │   ├── icmp_sge.frag
│   │   ├── icmp_sgt.frag
│   │   ├── icmp_sle.frag
│   │   ├── icmp_slt.frag
│   │   ├── icmp_uge.frag
│   │   ├── icmp_ugt.frag
│   │   ├── icmp_ule.frag
│   │   ├── icmp_ult.frag
│   │   ├── logical-and.frag
│   │   ├── logical-equal.frag
│   │   ├── logical-not-equal.frag
│   │   ├── logical-or.frag
│   │   ├── lut.frag
│   │   ├── min16-phi.sm60.comp
│   │   ├── precise_math.frag
│   │   └── zext-bool.frag
│   ├── memory-model/
│   │   ├── uav-coherent-promotion.bindless.ssbo.comp
│   │   ├── uav-coherent-promotion.root-descriptor.ssbo.comp
│   │   ├── uav-coherent-promotion.sm66.bindless.ssbo.comp
│   │   ├── uav-coherent-promotion.sm66.ssbo.comp
│   │   ├── uav-coherent-promotion.ssbo.comp
│   │   ├── uav-coherent.root-descriptor.ssbo.comp
│   │   ├── uav-coherent.sm66.ssbo.comp
│   │   └── uav-coherent.ssbo.comp
│   ├── nvapi/
│   │   ├── bringup.nvapi.ssbo.rgen
│   │   ├── get-special-global-timer.nvapi.ssbo.rgen
│   │   ├── hit-object.local-root-signature.noglsl.nvapi.ssbo.rgen
│   │   ├── nvHLSLExtns.h
│   │   ├── nvHLSLExtnsInternal.h
│   │   ├── nvShaderExtnEnums.h
│   │   ├── ray-query-cluster-id.nvapi.comp
│   │   ├── rt-cluster-id.nvapi.rany
│   │   └── shuffle.nvapi.ssbo.comp
│   ├── opts/
│   │   ├── fp16-fp32-fp16-1.ssbo.comp
│   │   ├── sabs.frag
│   │   ├── sneg.frag
│   │   ├── wave-read-lane-first-heap.sm66.comp
│   │   ├── wave-read-lane-first.bindless.local-root-signature.rmiss
│   │   ├── wave-read-lane-first.comp
│   │   ├── wave-read-lane-first.no-legacy-cbuf-layout.comp
│   │   ├── wave-read-lane-first.no-legacy-cbuf-layout.sm60.comp
│   │   ├── wave-read-lane-first.sm60.comp
│   │   ├── wave-read-lane-first.sm66.comp
│   │   ├── wave-read-lane-first.ssbo.comp
│   │   ├── wave-read-lane-first.ssbo.rgen
│   │   ├── wave-read-lane-first.ssbo.sm60.comp
│   │   ├── wave-read-lane-first.ssbo.sm66.comp
│   │   └── wave-read-lane-first.ssbo.sm66.rgen
│   ├── raw-access/
│   │   ├── bab-double1.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-double2.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-double3.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-double4.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-float1.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-float2.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-float3.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-float4.raw-access-chains.noglsl.ssbo.comp
│   │   ├── bab-float4x4.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-float1.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-float2.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-float3.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-float4.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-float4x4.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-half1.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.comp
│   │   ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
│   │   ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.comp
│   │   ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
│   │   ├── structured-uint1.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-uint2.raw-access-chains.noglsl.ssbo.comp
│   │   ├── structured-uint3.raw-access-chains.noglsl.ssbo.comp
│   │   └── structured-uint4.raw-access-chains.noglsl.ssbo.comp
│   ├── resources/
│   │   ├── acceleration-structure.bindless.rgen
│   │   ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.raw-va-stride-offset.rgen
│   │   ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.rgen
│   │   ├── acceleration-structure.local-root-signature.root-descriptor.rgen
│   │   ├── basic.input-attachment.frag
│   │   ├── buffer-16bit.ssbo.bindless.comp
│   │   ├── buffer-16bit.ssbo.bindless.ssbo-align.comp
│   │   ├── buffer-16bit.ssbo.comp
│   │   ├── buffer-64bit.ssbo.bindless.ssbo-align.comp
│   │   ├── buffer-64bit.ssbo.comp
│   │   ├── buffer-alignment-fixup.bindless.root-constant.offset-layout.typed-buffer-offset.comp
│   │   ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.comp
│   │   ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.offset-layout.typed-buffer-offset.comp
│   │   ├── cbuf.root-constant.min16float.sm60.frag
│   │   ├── cbuf.root-constant.min16float.sm60.native-fp16.frag
│   │   ├── cbuf.root-constant.min16int.sm60.frag
│   │   ├── cbuf.root-constant.min16int.sm60.native-fp16.frag
│   │   ├── cbv-array-nonuniform.frag
│   │   ├── cbv-array.frag
│   │   ├── cbv-dynamic.no-legacy-cbuf-layout.local-root-signature.rmiss
│   │   ├── cbv-indexing.frag
│   │   ├── cbv-indexing.sm66.frag
│   │   ├── cbv-legacy-fp16-fp64.frag
│   │   ├── cbv-legacy-fp16-fp64.root-descriptor.frag
│   │   ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
│   │   ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
│   │   ├── cbv-legacy-fp16-fp64.sm60.frag
│   │   ├── cbv-legacy-fp16-fp64.sm60.native-fp16.frag
│   │   ├── cbv.bindless.root-constant.cbv-as-ssbo.frag
│   │   ├── cbv.bindless.root-constant.frag
│   │   ├── cbv.frag
│   │   ├── cbv.no-legacy-cbuf-layout.bindless.frag
│   │   ├── cbv.no-legacy-cbuf-layout.index-divider.frag
│   │   ├── cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
│   │   ├── cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
│   │   ├── cbv.no-legacy-cbuf-layout.root-constant.frag
│   │   ├── cbv.root-constant.frag
│   │   ├── cbv.root-descriptor.no-legacy-cbuf-layout.frag
│   │   ├── combined-image-sampler-reuse.frag
│   │   ├── dynamic-root-constant.root-constant.bindless.root-descriptor.comp
│   │   ├── min16-alloca-groupshared.sm60.comp
│   │   ├── min16float-ssbo-dxr.ssbo.rgen
│   │   ├── raw-buffer-addressing.comp
│   │   ├── raw-buffer-addressing.ssbo.comp
│   │   ├── root-bda.root-descriptor.comp
│   │   ├── root-bda.root-descriptor.sm60.comp
│   │   ├── root-constant-with-bda.root-descriptor.root-constant.comp
│   │   ├── rt-resources.bindless.local-root-signature.rmiss
│   │   ├── rt-resources.bindless.rmiss
│   │   ├── rt-resources.rmiss
│   │   ├── sampler-array.frag
│   │   ├── sampler-indexing.frag
│   │   ├── sampler-indexing.sm66.frag
│   │   ├── sampler.bindless.root-constant.frag
│   │   ├── sm66/
│   │   │   ├── atomics-64bit-groupshared.ssbo.sm66.comp
│   │   │   ├── atomics-64bit.root-descriptor.sm66.comp
│   │   │   ├── atomics-64bit.ssbo.sm66.comp
│   │   │   ├── atomics-component-alias.sm66.comp
│   │   │   ├── atomics-typed-64bit-heap.sm66.comp
│   │   │   ├── atomics-typed-64bit.bindless.sm66.comp
│   │   │   ├── atomics-typed-64bit.sm66.comp
│   │   │   ├── binding-range-selection.bindless.sm66.comp
│   │   │   ├── binding-range-selection.sm66.comp
│   │   │   ├── buffer-64bit-double.ssbo.sm66.comp
│   │   │   ├── buffer-64bit.ssbo.sm66.comp
│   │   │   ├── buffer-64bit.ssbo.ssbo-align.sm66.comp
│   │   │   ├── cbuffer-heap.sm66.frag
│   │   │   ├── cbv.no-legacy-cbuf-layout.bindless.sm66.frag
│   │   │   ├── cbv.no-legacy-cbuf-layout.sm66.frag
│   │   │   ├── raw-buffer-heap.sm66.frag
│   │   │   ├── raw-buffer-heap.ssbo.sm66.frag
│   │   │   ├── raw-buffer-heap.typed-buffer-offset.sm66.frag
│   │   │   ├── raw-buffers-binding.ssbo.bindless.sm66.frag
│   │   │   ├── raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
│   │   │   ├── raygen-heap.sm66.rgen
│   │   │   ├── raygen-heap.ssbo-rtas.raw-va-stride-offset.sm66.rgen
│   │   │   ├── raygen-heap.ssbo-rtas.sm66.rgen
│   │   │   ├── raygen.sm66.rgen
│   │   │   ├── raygen.ssbo-rtas.bindless.raw-va-stride-offset.sm66.rgen
│   │   │   ├── raygen.ssbo-rtas.bindless.sm66.rgen
│   │   │   ├── rw-typed-binding.sm66.frag
│   │   │   ├── rw-typed-heap.sm66.frag
│   │   │   ├── sampled-types-binding.sm66.frag
│   │   │   ├── sampled-types.sm66.frag
│   │   │   ├── sampler-binding.sm66.frag
│   │   │   ├── sampler-heap.sm66.frag
│   │   │   ├── structured-16bit-heap.ssbo.sm66.frag
│   │   │   ├── structured-16bit-heap.ssbo.ssbo-align.sm66.frag
│   │   │   ├── structured-buffer-heap.sm66.frag
│   │   │   ├── structured-buffer-heap.ssbo.sm66.frag
│   │   │   ├── structured-buffer-heap.ssbo.ssbo-align.sm66.frag
│   │   │   └── structured-buffer-heap.typed-buffer-offset.sm66.frag
│   │   ├── srv-array-raw-buffer-nonuniform.frag
│   │   ├── srv-array-raw-buffer.frag
│   │   ├── srv-array-structured-buffer-nonuniform.frag
│   │   ├── srv-array-structured-buffer.frag
│   │   ├── srv-array-texture-nonuniform.frag
│   │   ├── srv-array-texture.frag
│   │   ├── srv-array-typed-buffer-nonuniform.frag
│   │   ├── srv-array-typed-buffer.frag
│   │   ├── srv-indexing.frag
│   │   ├── srv-indexing.sm66.frag
│   │   ├── srv-raw-buffer.bindless.root-constant.frag
│   │   ├── srv-raw-buffer.bindless.root-constant.ssbo.frag
│   │   ├── srv-raw-buffer.ssbo.frag
│   │   ├── srv-structured-buffer.bindless.root-constant.frag
│   │   ├── srv-structured-buffer.bindless.root-constant.ssbo.frag
│   │   ├── srv-structured-buffer.ssbo.frag
│   │   ├── srv-texture.bindless.root-constant.frag
│   │   ├── srv-texture.bindless.root-constant.inline-ubo.frag
│   │   ├── srv-typed-buffer.bindless.root-constant.frag
│   │   ├── srv-uav-raw.typed-buffer-offset.comp
│   │   ├── srv-uav.typed-buffer-offset.comp
│   │   ├── ssbo-minprecision.sm60.native-fp16.frag
│   │   ├── ssbo-minprecision.sm60.ssbo.frag
│   │   ├── ssbo-minprecision.sm60.ssbo.native-fp16.frag
│   │   ├── ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
│   │   ├── ssbo-minprecision.sm60.ssbo.root-descriptor.frag
│   │   ├── subobject-parsing.rgen
│   │   ├── typed-resources-16bit-sparse.frag
│   │   ├── typed-resources-16bit.bindless.frag
│   │   ├── typed-resources-16bit.frag
│   │   ├── typed-resources-16bit.sm60.bindless.frag
│   │   ├── typed-resources-16bit.sm60.frag
│   │   ├── typed-resources-16bit.sm60.native-fp16.bindless.frag
│   │   ├── typed-resources-16bit.sm60.native-fp16.frag
│   │   ├── uav-array-raw-buffer-nonuniform.frag
│   │   ├── uav-array-raw-buffer.frag
│   │   ├── uav-array-structured-buffer-nonuniform.frag
│   │   ├── uav-array-structured-buffer-nonuniform.ssbo.bindless.root-constant.frag
│   │   ├── uav-array-structured-buffer-nonuniform.ssbo.frag
│   │   ├── uav-array-structured-buffer.frag
│   │   ├── uav-array-texture-nonuniform.frag
│   │   ├── uav-array-texture.frag
│   │   ├── uav-array-typed-buffer-nonuniform.frag
│   │   ├── uav-array-typed-buffer.frag
│   │   ├── uav-counter-array.ssbo.frag
│   │   ├── uav-counter-array.ssbo.sm66.frag
│   │   ├── uav-counter-array.ssbo.sm66.uav-counter-ssbo.frag
│   │   ├── uav-counter-array.ssbo.uav-counter-ssbo.frag
│   │   ├── uav-counter-heap.sm66.bindless.ssbo.frag
│   │   ├── uav-counter-heap.sm66.uav-counter-ssbo.bindless.ssbo.frag
│   │   ├── uav-counter-heap.sm66.uav-counter-texel-buffer.bindless.ssbo.frag
│   │   ├── uav-counter.bindless.nobda.root-constant.comp
│   │   ├── uav-counter.bindless.nobda.root-constant.raw-va-stride-offset.comp
│   │   ├── uav-counter.bindless.root-constant.comp
│   │   ├── uav-counter.bindless.root-constant.raw-va-stride-offset.comp
│   │   ├── uav-counter.bindless.root-constant.raw-va-stride-offset.heap-raw-va-cbv.comp
│   │   ├── uav-counter.bindless.root-constant.uav-counter-ssbo.comp
│   │   ├── uav-counter.ssbo.comp
│   │   ├── uav-counter.ssbo.raw-va-stride-offset.comp
│   │   ├── uav-counter.ssbo.uav-counter-ssbo.comp
│   │   ├── uav-indexing.frag
│   │   ├── uav-indexing.sm66.frag
│   │   ├── uav-raw-buffer.bindless.root-constant.frag
│   │   ├── uav-raw-buffer.ssbo.frag
│   │   ├── uav-structured-buffer.bindless.root-constant.frag
│   │   ├── uav-typed-buffer.bindless.root-constant.frag
│   │   └── uav-typed.typed-uav-without-format.comp
│   ├── rov/
│   │   ├── rov-bab.bindless.frag
│   │   ├── rov-bab.frag
│   │   ├── rov-bab.ssbo.bindless.frag
│   │   ├── rov-bab.ssbo.frag
│   │   ├── rov-bab.ssbo.root-descriptor.frag
│   │   ├── rov-branch-early-return.frag
│   │   ├── rov-branch.frag
│   │   ├── rov-buffer.frag
│   │   ├── rov-inloop-2.frag
│   │   ├── rov-inloop.frag
│   │   ├── rov-per-sample.sm66.frag
│   │   ├── rov-postloop.frag
│   │   ├── rov-structured.bindless.frag
│   │   ├── rov-structured.frag
│   │   ├── rov-structured.ssbo.bindless.frag
│   │   ├── rov-structured.ssbo.frag
│   │   ├── rov-structured.ssbo.root-descriptor.frag
│   │   ├── rov-tex1d.bindless.frag
│   │   ├── rov-tex1d.frag
│   │   ├── rov-tex1darray.bindless.frag
│   │   ├── rov-tex1darray.frag
│   │   ├── rov-tex2d.bindless.frag
│   │   ├── rov-tex2d.frag
│   │   ├── rov-tex2darray.bindless.frag
│   │   ├── rov-tex2darray.frag
│   │   ├── rov-tex3d.bindless.frag
│   │   ├── rov-tex3d.frag
│   │   ├── rov-undef.frag
│   │   └── rov.sm66.frag
│   ├── sampler-feedback/
│   │   ├── sampler-feedback.frag
│   │   └── sampler-feedback.sm66.frag
│   ├── semantics/
│   │   ├── clip-cull-distance.vert
│   │   ├── clip-cull.frag
│   │   ├── clip-distance-cols.frag
│   │   ├── clip-distance-cols.vert
│   │   ├── clip-distance-flatten.frag
│   │   ├── clip-distance-flatten.vert
│   │   ├── clip-distance-rows.frag
│   │   ├── clip-distance-rows.vert
│   │   ├── clip-distance-single.vert
│   │   ├── coverage.frag
│   │   ├── depth-greater-equal.frag
│   │   ├── depth-less-equal.frag
│   │   ├── depth.frag
│   │   ├── early-depth-stencil.frag
│   │   ├── inner-coverage.noglsl.frag
│   │   ├── is-front-face.frag
│   │   ├── position.frag
│   │   ├── primitive-id.frag
│   │   ├── primitive-id.geom
│   │   ├── render-target-array-index.frag
│   │   ├── render-target-array-index.geom
│   │   ├── sample-rate-pos.frag
│   │   ├── stencil-ref.frag
│   │   ├── sv-shading-rate.noglsl.frag
│   │   ├── sv-shading-rate.noglsl.vert
│   │   ├── view-id.frag
│   │   ├── view-id.vert
│   │   ├── viewport-array-index.frag
│   │   └── viewport-array-index.geom
│   ├── stages/
│   │   ├── boolean-io.vert
│   │   ├── callable-chain.rcall
│   │   ├── callable.rcall
│   │   ├── closesthit.rclosest
│   │   ├── domain-clip-cull.tese
│   │   ├── domain-patch-input-integer-io.tese
│   │   ├── domain.tese
│   │   ├── extra_output.dual-source-blending.frag
│   │   ├── extra_output_reordered.dual-source-blending.frag
│   │   ├── geometry-clip-cull.geom
│   │   ├── geometry-input-line.geom
│   │   ├── geometry-input-lineadj.geom
│   │   ├── geometry-input-point.geom
│   │   ├── geometry-input-triangle.geom
│   │   ├── geometry-input-triangleadj.geom
│   │   ├── geometry-instancing.geom
│   │   ├── geometry-output-line.geom
│   │   ├── geometry-output-point.geom
│   │   ├── geometry-streams.geom
│   │   ├── hull-arrays.tesc
│   │   ├── hull-clip-cull.tesc
│   │   ├── hull-patch-output-integer-io.tesc
│   │   ├── hull-single-cp.tesc
│   │   ├── hull.tesc
│   │   ├── mesh-basic-line.mesh
│   │   ├── mesh-basic.mesh
│   │   ├── mesh-clip-cull.mesh
│   │   ├── raygen-complex-storage-class.rgen
│   │   ├── raygen-skip-inactive-resources.rgen
│   │   ├── raygen.rgen
│   │   ├── raymiss-chain.rmiss
│   │   ├── raymiss.rmiss
│   │   ├── simple.dual-source-blending.frag
│   │   ├── simple.invariant.vert
│   │   ├── stage-input-output.16bit-io.frag
│   │   ├── stage-input-output.frag
│   │   ├── stream-out.stream-out.vert
│   │   ├── swizzle.rt-swizzle.frag
│   │   ├── task-basic.task
│   │   ├── vertex-array-input.vert
│   │   ├── vertex-array-output.vert
│   │   └── vertex-input-remapping.vert
│   ├── vectorization/
│   │   ├── copy-byte-address.ssbo.comp
│   │   ├── copy-composite-2.ssbo.comp
│   │   ├── copy-composite.ssbo.comp
│   │   ├── copy-composite.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-double2.ssbo.comp
│   │   ├── copy-double2.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-double3.ssbo.comp
│   │   ├── copy-double3.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-double4.ssbo.comp
│   │   ├── copy-float2.ssbo.comp
│   │   ├── copy-float2.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-float2x2.ssbo.comp
│   │   ├── copy-float3.ssbo.comp
│   │   ├── copy-float3.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-float4x4.ssbo.comp
│   │   ├── copy-half2.ssbo.comp
│   │   ├── copy-half2.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-half3.ssbo.comp
│   │   ├── copy-half3.ssbo.ssbo-align.bindless.comp
│   │   ├── copy-half4.ssbo.comp
│   │   └── copy-half4.ssbo.ssbo-align.bindless.comp
│   ├── view-instancing/
│   │   ├── geom/
│   │   │   ├── basic.view-instancing.last-pre-raster.geom
│   │   │   ├── basic.view-instancing.last-pre-raster.view-instance-mask.geom
│   │   │   ├── basic.view-instancing.view-instancing-multiview.last-pre-raster.geom
│   │   │   ├── basic.view-instancing.view-instancing-multiview.view-instancing-viewport-offset.last-pre-raster.geom
│   │   │   └── basic.view-instancing.view-instancing-viewport-offset.last-pre-raster.geom
│   │   ├── mesh/
│   │   │   ├── basic-export-viewport-layer.view-instancing.last-pre-raster.mesh
│   │   │   ├── basic-export-viewport-layer.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
│   │   │   ├── basic-few-thread.view-instancing.last-pre-raster.mesh
│   │   │   ├── basic-many-thread.view-instancing.last-pre-raster.mesh
│   │   │   ├── basic.view-instancing.last-pre-raster.mesh
│   │   │   ├── basic.view-instancing.last-pre-raster.view-instance-mask.mesh
│   │   │   ├── basic.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
│   │   │   └── basic.view-instancing.mesh
│   │   ├── tesc/
│   │   │   ├── basic.view-instancing.tesc
│   │   │   └── basic.view-instancing.view-instancing-multiview.tesc
│   │   ├── tese/
│   │   │   ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.last-pre-raster.tese
│   │   │   ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.tese
│   │   │   ├── domain.view-instancing.last-pre-raster.tese
│   │   │   ├── domain.view-instancing.tese
│   │   │   ├── domain.view-instancing.view-instancing-multiview.last-pre-raster.tese
│   │   │   ├── domain.view-instancing.view-instancing-multiview.tese
│   │   │   ├── domain.view-instancing.view-instancing-viewport-offset.last-pre-raster.tese
│   │   │   └── domain.view-instancing.view-instancing-viewport-offset.tese
│   │   └── vert/
│   │       ├── basic.view-instancing.export-layer-viewport.last-pre-raster.vert
│   │       ├── basic.view-instancing.export-layer-viewport.vert
│   │       ├── basic.view-instancing.export-layer-viewport.view-instancing-multiview.vert
│   │       ├── basic.view-instancing.export-layer-viewport.view-instancing-viewport-offset.vert
│   │       ├── basic.view-instancing.export-layer.last-pre-raster.vert
│   │       ├── basic.view-instancing.export-layer.vert
│   │       ├── basic.view-instancing.export-layer.view-instancing-multiview.vert
│   │       ├── basic.view-instancing.export-layer.view-instancing-viewport-offset.vert
│   │       ├── basic.view-instancing.export-viewport.last-pre-raster.vert
│   │       ├── basic.view-instancing.export-viewport.vert
│   │       ├── basic.view-instancing.export-viewport.view-instancing-multiview.vert
│   │       ├── basic.view-instancing.export-viewport.view-instancing-viewport-offset.vert
│   │       ├── basic.view-instancing.last-pre-raster.vert
│   │       ├── basic.view-instancing.vert
│   │       ├── basic.view-instancing.view-instance-mask.last-pre-raster.vert
│   │       ├── basic.view-instancing.view-instance-mask.vert
│   │       ├── basic.view-instancing.view-instancing-multiview.vert
│   │       └── basic.view-instancing.view-instancing-viewport-offset.vert
│   └── vkmm/
│       ├── coopmat.sm66.ssbo.vkmm.comp
│       ├── cross_group_sharing.vkmm.node.inline-ubo.comp
│       ├── descriptor_qa.bindless.descriptor-qa.vkmm.comp
│       ├── groupshared.vkmm.comp
│       ├── hull.vkmm.tesc
│       ├── image-load-store.vkmm.comp
│       ├── image-load-store.vkmm.sm66.comp
│       ├── memory-model/
│       │   ├── uav-coherent-promotion.bindless.ssbo.vkmm.comp
│       │   ├── uav-coherent-promotion.root-descriptor.ssbo.vkmm.comp
│       │   ├── uav-coherent-promotion.sm66.bindless.ssbo.vkmm.comp
│       │   ├── uav-coherent-promotion.sm66.ssbo.vkmm.comp
│       │   ├── uav-coherent-promotion.ssbo.vkmm.comp
│       │   ├── uav-coherent.root-descriptor.ssbo.vkmm.comp
│       │   ├── uav-coherent.sm66.ssbo.vkmm.comp
│       │   └── uav-coherent.ssbo.vkmm.comp
│       ├── report-hit.vkmm.rint
│       ├── rov-structured.vkmm.frag
│       ├── rov-tex2d.vkmm.frag
│       └── wmma_ags.h
├── show_graph.py
├── spirv_module.cpp
├── spirv_module.hpp
├── spirv_module_instrumentation.cpp
├── spirv_module_instrumentation.hpp
├── test_shaders.py
├── third_party/
│   ├── CMakeLists.txt
│   ├── bc-decoder/
│   │   ├── llvm_bitreader.h
│   │   ├── llvm_decoder.cpp
│   │   └── llvm_decoder.h
│   ├── cli_parser/
│   │   ├── cli_parser.cpp
│   │   └── cli_parser.hpp
│   └── glslang-spirv/
│       ├── InReadableOrder.cpp
│       ├── Logger.cpp
│       ├── Logger.h
│       ├── SpvBuilder.cpp
│       ├── SpvBuilder.h
│       └── spvIR.h
└── util/
    ├── thread_local_allocator.cpp
    └── thread_local_allocator.hpp

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
# The style used for all options not specifically set in the configuration.
BasedOnStyle: LLVM

# The extra indent or outdent of access modifiers, e.g. public:.
AccessModifierOffset: -4

# If true, aligns escaped newlines as far left as possible. Otherwise puts them into the right-most column.
AlignEscapedNewlinesLeft: true

# If true, aligns trailing comments.
AlignTrailingComments: false

# Allow putting all parameters of a function declaration onto the next line even if BinPackParameters is false.
AllowAllParametersOfDeclarationOnNextLine: false

# Allows contracting simple braced statements to a single line.
AllowShortBlocksOnASingleLine: false

# If true, short case labels will be contracted to a single line.
AllowShortCaseLabelsOnASingleLine: false

# Dependent on the value, int f() { return 0; } can be put on a single line. Possible values: None, Inline, All.
AllowShortFunctionsOnASingleLine: None

# If true, if (a) return; can be put on a single line.
AllowShortIfStatementsOnASingleLine: false

# If true, while (true) continue; can be put on a single line.
AllowShortLoopsOnASingleLine: false

# If true, always break after function definition return types.
AlwaysBreakAfterDefinitionReturnType: false

# If true, always break before multiline string literals.
AlwaysBreakBeforeMultilineStrings: false

# If true, always break after the template<...> of a template declaration.
AlwaysBreakTemplateDeclarations: true

# If false, a function call's arguments will either be all on the same line or will have one line each.
BinPackArguments: true

# If false, a function declaration's or function definition's parameters will either all be on the same line
# or will have one line each.
BinPackParameters: true

# The way to wrap binary operators. Possible values: None, NonAssignment, All.
BreakBeforeBinaryOperators: None

# The brace breaking style to use. Possible values: Attach, Linux, Stroustrup, Allman, GNU.
BreakBeforeBraces: Allman

# If true, ternary operators will be placed after line breaks.
BreakBeforeTernaryOperators: false

# Always break constructor initializers before commas and align the commas with the colon.
BreakConstructorInitializersBeforeComma: true

# The column limit. A column limit of 0 means that there is no column limit.
ColumnLimit: 120

# A regular expression that describes comments with special meaning, which should not be split into lines or otherwise changed.
CommentPragmas: '^ *'

# If the constructor initializers don't fit on a line, put each initializer on its own line.
ConstructorInitializerAllOnOneLineOrOnePerLine: false

# The number of characters to use for indentation of constructor initializer lists.
ConstructorInitializerIndentWidth: 4

# Indent width for line continuations.
ContinuationIndentWidth: 4

# If true, format braced lists as best suited for C++11 braced lists.
Cpp11BracedListStyle: false

# Disables formatting at all.
DisableFormat: false

# A vector of macros that should be interpreted as foreach loops instead of as function calls.
#ForEachMacros: ''

# Indent case labels one level from the switch statement.
# When false, use the same indentation level as for the switch statement.
# Switch statement body is always indented one level more than case labels.
IndentCaseLabels: false

# The number of columns to use for indentation.
IndentWidth: 4

# Indent if a function definition or declaration is wrapped after the type.
IndentWrappedFunctionNames: false

# If true, empty lines at the start of blocks are kept.
KeepEmptyLinesAtTheStartOfBlocks: true

# Language, this format style is targeted at. Possible values: None, Cpp, Java, JavaScript, Proto.
Language: Cpp

# The maximum number of consecutive empty lines to keep.
MaxEmptyLinesToKeep: 1

# The indentation used for namespaces. Possible values: None, Inner, All.
NamespaceIndentation: None

# The penalty for breaking a function call after "call(".
PenaltyBreakBeforeFirstCallParameter: 19

# The penalty for each line break introduced inside a comment.
PenaltyBreakComment: 300

# The penalty for breaking before the first <<.
PenaltyBreakFirstLessLess: 120

# The penalty for each line break introduced inside a string literal.
PenaltyBreakString: 1000

# The penalty for each character outside of the column limit.
PenaltyExcessCharacter: 1000000

# Penalty for putting the return type of a function onto its own line.
PenaltyReturnTypeOnItsOwnLine: 1000000000

# Pointer and reference alignment style. Possible values: Left, Right, Middle.
PointerAlignment: Right

# If true, a space may be inserted after C style casts.
SpaceAfterCStyleCast: false

# If false, spaces will be removed before assignment operators.
SpaceBeforeAssignmentOperators: true

# Defines in which cases to put a space before opening parentheses. Possible values: Never, ControlStatements, Always.
SpaceBeforeParens: ControlStatements

# If true, spaces may be inserted into '()'.
SpaceInEmptyParentheses: false

# The number of spaces before trailing line comments (// - comments).
SpacesBeforeTrailingComments: 1

# If true, spaces will be inserted after '<' and before '>' in template argument lists.
SpacesInAngles: false

# If true, spaces may be inserted into C style casts.
SpacesInCStyleCastParentheses: false

# If true, spaces are inserted inside container literals (e.g. ObjC and Javascript array and dict literals).
SpacesInContainerLiterals: false

# If true, spaces will be inserted after '(' and before ')'.
SpacesInParentheses: false

# If true, spaces will be inserted after '[' and befor']'.
SpacesInSquareBrackets: false

# Format compatible with this standard, e.g. use A<A<int> > instead of A<A<int>> for LS_Cpp03. Possible values: Cpp03, Cpp11, Auto.
Standard: Cpp11

# The number of columns used for tab stops.
TabWidth: 4

# The way to use tab characters in the resulting file. Possible values: Never, ForIndentation, Always.
UseTab: ForIndentation

# Do not reflow comments
ReflowComments: false


================================================
FILE: .gitattributes
================================================
shaders/**/* linguist-language=GLSL
reference/**/* linguist-generated


================================================
FILE: .gitignore
================================================
/cmake-build-debug
/cmake-build-release
*.iml
/.idea
/.vs
/.vscode
/external/dxc*
/external/DirectXShaderCompiler
/external/llvm
/shaders-dxil
/shaders-dxbc
/reference/shaders-dxil
/reference/shaders-dxbc
/build
/out/build


================================================
FILE: .gitmodules
================================================
[submodule "third_party/spirv-headers"]
	path = third_party/spirv-headers
	url = https://github.com/KhronosGroup/SPIRV-Headers
[submodule "third_party/SPIRV-Tools"]
	path = third_party/SPIRV-Tools
	url = https://github.com/KhronosGroup/SPIRV-Tools
[submodule "third_party/SPIRV-Cross"]
	path = third_party/SPIRV-Cross
	url = https://github.com/KhronosGroup/SPIRV-Cross
[submodule "third_party/dxbc-spirv"]
	path = subprojects/dxbc-spirv
	url = https://github.com/doitsujin/dxbc-spirv


================================================
FILE: CMakeLists.txt
================================================
#
# Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
#
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

cmake_minimum_required(VERSION 3.10)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_STANDARD 99)
project(dxil-spirv LANGUAGES CXX C)

add_library(dxil-debug STATIC debug/logging.hpp debug/logging.cpp)
target_include_directories(dxil-debug PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/debug)
set_target_properties(dxil-debug PROPERTIES POSITION_INDEPENDENT_CODE ON)

option(DXIL_SPIRV_CLI "Enable CLI support." ON)
option(DXIL_SPIRV_NATIVE_LLVM "Enable native LLVM support." OFF)

include(GNUInstallDirs)

if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
    set(DXIL_SPV_CXX_FLAGS -Wall -Wextra -Wno-missing-field-initializers -Wno-empty-body -Wno-unused-parameter -fno-exceptions -fno-rtti -fvisibility=hidden)
elseif (MSVC)
    set(DXIL_SPV_CXX_FLAGS /D_CRT_SECURE_NO_WARNINGS /wd4996 /wd4244 /wd4267 /wd4244 /wd4309 /wd4005 /MP /DNOMINMAX)
endif()

add_library(dxil-utils STATIC util/thread_local_allocator.hpp util/thread_local_allocator.cpp)
target_include_directories(dxil-utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/util)
target_compile_options(dxil-utils PRIVATE ${DXIL_SPV_CXX_FLAGS})
set_target_properties(dxil-utils PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_subdirectory(third_party EXCLUDE_FROM_ALL)
add_subdirectory(bc EXCLUDE_FROM_ALL)
add_subdirectory(external EXCLUDE_FROM_ALL)

add_library(spirv-module STATIC
        ir.hpp
        descriptor_qa.cpp descriptor_qa.hpp
        spirv_module.hpp spirv_module.cpp
        spirv_module_instrumentation.hpp spirv_module_instrumentation.cpp)
set_target_properties(spirv-module PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(spirv-module PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(spirv-module PUBLIC glslang-spirv-builder dxil-spirv-headers)
target_link_libraries(spirv-module PRIVATE dxil-utils dxil-debug)
target_compile_options(spirv-module PRIVATE ${DXIL_SPV_CXX_FLAGS})

add_library(dxil-converter STATIC
        memory_stream.hpp memory_stream.cpp
        llvm_bitcode_parser.hpp llvm_bitcode_parser.cpp
        dxil.hpp
        dxil_converter.hpp dxil_converter.cpp
        cfg_structurizer.hpp cfg_structurizer.cpp
        node_pool.hpp node_pool.cpp
        node.hpp node.cpp
        dxil_parser.hpp dxil_parser.cpp
        scratch_pool.hpp
        opcodes/converter_impl.hpp
        opcodes/opcodes.hpp
        opcodes/dxil/dxil_common.hpp opcodes/dxil/dxil_common.cpp
        opcodes/dxil/dxil_resources.hpp opcodes/dxil/dxil_resources.cpp
        opcodes/dxil/dxil_compute.hpp opcodes/dxil/dxil_compute.cpp
        opcodes/dxil/dxil_arithmetic.hpp opcodes/dxil/dxil_arithmetic.cpp
        opcodes/dxil/dxil_pixel_ops.hpp opcodes/dxil/dxil_pixel_ops.cpp
        opcodes/dxil/dxil_geometry.hpp opcodes/dxil/dxil_geometry.cpp
        opcodes/dxil/dxil_tessellation.hpp opcodes/dxil/dxil_tessellation.cpp
        opcodes/dxil/dxil_waveops.hpp opcodes/dxil/dxil_waveops.cpp
        opcodes/dxil/dxil_sampling.hpp opcodes/dxil/dxil_sampling.cpp
        opcodes/dxil/dxil_buffer.hpp opcodes/dxil/dxil_buffer.cpp
        opcodes/dxil/dxil_ray_tracing.hpp opcodes/dxil/dxil_ray_tracing.cpp
        opcodes/dxil/dxil_mesh.hpp opcodes/dxil/dxil_mesh.cpp
        opcodes/dxil/dxil_workgraph.hpp opcodes/dxil/dxil_workgraph.cpp
        opcodes/dxil/dxil_ags.hpp opcodes/dxil/dxil_ags.cpp
        opcodes/dxil/dxil_nvapi.hpp opcodes/dxil/dxil_nvapi.cpp
        opcodes/opcodes_llvm_builtins.hpp opcodes/opcodes_llvm_builtins.cpp
        opcodes/opcodes_dxil_builtins.hpp opcodes/opcodes_dxil_builtins.cpp)
set_target_properties(dxil-converter PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(dxil-converter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_options(dxil-converter PRIVATE ${DXIL_SPV_CXX_FLAGS})
target_link_libraries(dxil-converter PRIVATE dxil-debug external::llvm dxil-utils)

target_link_libraries(dxil-converter PUBLIC spirv-module)

add_library(dxil-spirv-c-shared SHARED dxil_spirv_c.h dxil_spirv_c.cpp)
target_include_directories(dxil-spirv-c-shared
        PUBLIC
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv>)
target_link_libraries(dxil-spirv-c-shared PRIVATE dxil-debug dxil-converter external::llvm dxil-utils)

target_compile_options(dxil-spirv-c-shared PRIVATE ${DXIL_SPV_CXX_FLAGS})
target_compile_definitions(dxil-spirv-c-shared PRIVATE DXIL_SPV_EXPORT_SYMBOLS)
set_target_properties(dxil-spirv-c-shared PROPERTIES PUBLIC_HEADERS dxil_spirv_c.h)

if (WIN32 AND CMAKE_COMPILER_IS_GNUCXX)
    target_link_libraries(dxil-spirv-c-shared PRIVATE -static gcc stdc++ winpthread)
endif()

# If we're linking in full LLVM statically, ensure we don't export all LLVM symbols.
if (NOT MSVC AND DXIL_SPIRV_NATIVE_LLVM)
    set_target_properties(dxil-spirv-c-shared PROPERTIES LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/link.T")
endif()

add_library(dxil-spirv-c-static STATIC dxil_spirv_c.h dxil_spirv_c.cpp)
target_include_directories(dxil-spirv-c-static
        PUBLIC
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv>)
target_link_libraries(dxil-spirv-c-static PRIVATE dxil-debug dxil-converter external::llvm dxil-utils)

target_compile_options(dxil-spirv-c-static PRIVATE ${DXIL_SPV_CXX_FLAGS})
set_target_properties(dxil-spirv-c-static PROPERTIES PUBLIC_HEADERS dxil_spirv_c.h)
set_target_properties(dxil-spirv-c-static PROPERTIES POSITION_INDEPENDENT_CODE ON)

if (DXIL_SPIRV_CLI)
    add_library(cli-parser STATIC
            third_party/cli_parser/cli_parser.hpp
            third_party/cli_parser/cli_parser.cpp)
    target_include_directories(cli-parser PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/cli_parser)
    target_link_libraries(cli-parser PUBLIC dxil-debug)
    target_compile_options(cli-parser PRIVATE ${DXIL_SPV_CXX_FLAGS})

    add_executable(dxil-spirv dxil_spirv.cpp)
    add_executable(dxil-extract dxil_extract.cpp)
    target_link_libraries(dxil-spirv PRIVATE dxil-spirv-c-shared cli-parser SPIRV-Tools-static spirv-cross-c dxil-debug)
    target_compile_options(dxil-spirv PRIVATE ${DXIL_SPV_CXX_FLAGS})
    target_link_libraries(dxil-extract PRIVATE dxil-spirv-c-shared cli-parser external::llvm)
    target_compile_options(dxil-extract PRIVATE ${DXIL_SPV_CXX_FLAGS})

    if (WIN32 AND CMAKE_COMPILER_IS_GNUCXX)
        target_link_libraries(dxil-spirv PRIVATE -static gcc stdc++ winpthread)
        target_link_libraries(dxil-extract PRIVATE -static gcc stdc++ winpthread)
    endif()

    if (NOT DXIL_SPV_NATIVE_LLVM)
        add_executable(dxbc-spirv-sandbox dxbc_spirv_sandbox.cpp)
        target_link_libraries(dxbc-spirv-sandbox PRIVATE
                dxil-utils dxil-debug llvm-bc dxil-converter
                dxbc-spirv dxbc-spirv-test
                spirv-cross-c SPIRV-Tools-static)
    endif()
endif()

set(DXIL_SPV_VERSION_MAJOR 2)
set(DXIL_SPV_VERSION_MINOR 66)
set(DXIL_SPV_VERSION_PATCH 0)
set(DXIL_SPV_VERSION ${DXIL_SPV_VERSION_MAJOR}.${DXIL_SPV_VERSION_MINOR}.${DXIL_SPV_VERSION_PATCH})
set_target_properties(dxil-spirv-c-shared PROPERTIES
        VERSION ${DXIL_SPV_VERSION}
        SOVERSION ${DXIL_SPV_VERSION_MAJOR})

set(DXIL_SPV_INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
set(DXIL_SPV_INSTALL_INC_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/pkg-config/dxil-spirv-c-shared.pc.in
               ${CMAKE_CURRENT_BINARY_DIR}/dxil-spirv-c-shared.pc @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/dxil-spirv-c-shared.pc DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/pkgconfig)

if (DXIL_SPIRV_CLI)
    install(TARGETS dxil-spirv RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
    install(TARGETS dxil-extract RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/dxil_spirv_c.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv)
install(TARGETS dxil-spirv-c-shared
        EXPORT dxil_spirv_c_sharedConfig
        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
        PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv)
install(EXPORT dxil_spirv_c_sharedConfig DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/dxil_spirv_c_shared/cmake)

option(DXIL_SPV_MISC_CLI "Enable misc CLI apps." OFF)

if (DXIL_SPV_MISC_CLI)
    add_executable(structurize-test misc/structurize_test.cpp)
    target_link_libraries(structurize-test PRIVATE dxil-converter SPIRV-Tools-static spirv-cross-c dxil-debug dxil-utils)
    target_compile_options(structurize-test PRIVATE ${DXIL_SPV_CXX_FLAGS})
endif()


================================================
FILE: DESCRIPTORS.md
================================================
# Full SM 6.0+ descriptor compatibility in Vulkan

In this document, I aim to rethink how we implement descriptors.
The goal is to efficiently implement descriptors even in bindless scenarios, i.e. SM 5.1/6.0+.

## Descriptor heaps in D3D12

The API lets you allocate a descriptor heap with N elements. Each element can be a SRV, UAV or CBV or any arbitrary type.
The API exposes some kind of "stride" here, which implies that max(sizeof(SRV), sizeof(UAV), sizeof(CBV)) is some value, i.e. 32 on drivers I've tested.

## Root signatures

Here we can specify up to 64 DWORDs (256 bytes) which gets passed to the shader.

- Descriptor table pointer: 1 DWORD, (not two? interesting ...)
- Root descriptor (UAV/CBV), apparently not bounds checked? (2 DWORD)
- Root constants (1 DWORD each)

First thought that comes to mind is that a descriptor table pointer can correlate to a descriptor set, but this will not work.
We only have 8 descriptor sets available. (4 is technically min-spec, but only some mobile chips expose that. I think it's fair to rely on 8.)

### Descriptor table pointer as an offset

A weird design of D3D12 is SetDescriptorHeaps which lets you bind only two heaps, one SRV/CBV/UAV heap and one SAMPLER heap, and all descriptor table pointers
must refer to one of these.
Looking at the descriptor table pointer only taking 1 DWORD this starts making sense. We should just encode offsets here into the two heaps.

SetDesciptorHeaps now becomes vkCmdBindDescriptorSets directly. The root signature becomes push constants (for 256 byte implementations),
or spills into a versioned uniform buffer (for 128 byte implementations and root sig > 128 bytes, which should be rare).

### Descriptor types

While descriptor types are kind of irrelevant in D3D12 inside heaps, we have very particular types in Vulkan.
`SAMPLED_IMAGE`, `UNIFORM_TEXEL_BUFFER`, `STORAGE_TEXEL_BUFFER`, `UNIFORM_BUFFER` and friends. Ideally, we'd have a "GENERAL" descriptor type which could be anything and we'd save on a lot of bloat in this scenario.

With a descriptor heap, we do not know the root signature yet, so we have two choices on how to allocate the descriptors, either with `VARIABLE_COUNT` or not.
With `VARIABLE_COUNT` we can declare descriptor set layouts which have the maximum number of bindings we expect to support (At least 1M according to Tier2),
and then allocating the descriptor pools we can allocate just the right amount of descriptors. This seems like the correct approach.
Two effects of this is that each descriptor type must live in their own descriptor sets, as only one descriptor binding can have VARYING count.

So far, we thus have 6 descriptor set layouts which will be common across all pipelines. Each set contains one binding, with VARYING size array of that type.

- Set0: `SAMPLED_IMAGE` - Texture
- Set1: `UNIFORM_TEXEL_BUFFER` - TypedBuffer, StructuredBuffer, ByteAddressBuffer
- Set2: `STORAGE_TEXEL_BUFFER` - RWTypedBuffer, RWStructuredBuffer, RWByteAddressBuffer. Descriptors come in pairs of two, the odd indices can deal with UAV counters.
- Set3: `STORAGE_IMAGE`
- Set4: `UNIFORM_BUFFER`
- Set5: `SAMPLER`

This leaves three sets which can be derived from a root signature directly. These include:

- Set6: Immutable samplers. It's useful to keep these in their own set since we don't have to deal with push descriptor restrictions of having to push immutable samplers
(and thus having to keep track of them as well).

- Set4 (reuse `UNIFORM_BUFFER` set): Virtualized bindings. These are bindings we have to repack from descriptor heaps to support implementations with few CBVs (like older Nvidia cards).
Due to RS 1.0 volatile descriptor behavior (descriptor needs only be valid in GPU timeline and can change anytime)
we need to defer the actual vkUpdateDescriptorSet calls to QueueSubmit() time. Using a descriptor update template here would be nice!
Virtualized bindings is currently the *only* path used by vkd3d and it crumbles down for any interesting uses of SM 5.1 and up, i.e. larger arrays of resources.
If we use virtualized descriptors, we can pilfer the set used for uniform buffers as that's the descriptor type we're going to virtualize anyways.

- Set7 (reuse `UNIFORM_BUFFER` set): Versioned push descriptor set. Here we can place:
- Root constants which spill outside maxPushConstantSize
- Root descriptors

For implementations which don't support push descriptors, we can fallback to a versioned descriptor set instead, just like vkd3d does.

#### Why not `STORAGE_BUFFER` for buffer UAVs?

Alignment is a big issue for SSBOs, especially on Nvidia. StructuredBuffers can be bound at very awkward alignments and only `STORAGE_TEXEL_BUFFER` of R32UI can express those.
This might lead us into an awkward path when dealing with 16-bit load/store in SM 6.2. Using physical storage buffers (PSB) for untyped buffers would be great, but we need to consider out-of-bounds behavior, which PSB does not support. Also, if we go the PSB route, we will have another indirection to consider. Since rather than going:

- Load UAV descriptor
- Load/Store data

we end up with:

- Load CBV/UAV descriptor
- Load PSB pointer
- Load/store data

## Sample shader

```
layout(push_constant) uniform RootConstants
{
	uint descriptor_table_offset0;
	uint descriptor_table_offset1;
	uint descriptor_table_offset2;
	uint descriptor_table_offset3;
	uint root_constant0;
	uint root_constant1;
	uint root_constant2;
	uint root_constant3;
} root;

// We can alias descriptors.
layout(set = 0, binding = 0) uniform texture2D Tex2D[];
layout(set = 0, binding = 0) uniform texture3D Tex3D[];

layout(set = 1, binding = 0) textureBuffer TypedBuffers[];
layout(set = 1, binding = 0) textureBuffer StructuredBuffers[];
layout(set = 1, binding = 0) textureBuffer ByteAddressBuffers[];

layout(set = 2, binding = 0) imageBuffer RWTypedBuffers[];
layout(set = 2, binding = 1, r32ui) uimageBuffer RWStructureBuffers[];
layout(set = 2, binding = 2, r32ui) uimageBuffer RWByteAddressBuffers[];

layout(set = 3, binding = 0) uniform image2D RWTex2D[];
layout(set = 3, binding = 0) uniform image3D RWTex3D[];

#if SUPPORTS_MANY_CBVS
layout(set = 4, binding = 0, std140) uniform UBOs
{
	vec4 data[MAX_SIZE];
} CBV[];
#else
// Versioned descriptors.
layout(set = 4, binding = 0, std140) uniform UBO0
{
	vec4 data[MAX_SIZE];
} ubo0;
layout(set = 4, binding = 1, std140) uniform UBO0
{
	vec4 data[MAX_SIZE];
} ubo0;
#endif

layout(set = 5, binding = 0) uniform sampler DynamicSamplers[];
layout(set = 6, binding = 0) uniform sampler ImmutableSampler0;
layout(set = 6, binding = 1) uniform sampler ImmutableSampler1;
layout(set = 6, binding = 2) uniform sampler ImmutableSampler2;

// Root descriptors.
layout(set = 7, binding = 0) uniform RootCBV0
{
	vec4 data[MAX_SIZE];
} root_cbv0;

layout(set = 7, binding = 1) uniform RootCBV1
{
	vec4 data[MAX_SIZE];
} root_cbv1;

void main()
{
	const uint OffsetIntoRootTable = 42; // This is deduced from D3D12_DESCRIPTOR_RANGE.

	// descriptor_table_offset is offset into a SetDescriptorHeaps, which we can find by looking at
	// SetGraphicsRootDescriptor table compared to SetDescriptorHeaps.
	// Tack on nonuniformEXT as required by IL.
	texelFetch(Tex2D[OffsetIntoRootTable + root.descriptor_table_offset1]);
}


================================================
FILE: LICENSE.MIT
================================================
Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation

SPDX-License-Identifier: MIT

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

For third_party/bc-decoder:

/******************************************************************************
 * The MIT License (MIT)
 *
 * Copyright (c) 2019-2020 Baldur Karlsson
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 ******************************************************************************/

For third_party/glslang-spirv (glslang):

--------------------------------------------------------------------------------
The MIT License
--------------------------------------------------------------------------------

Copyright 2020 The Khronos Group Inc

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: README.md
================================================
# dxil-spirv

This project aims to provide translation of DXIL (SM 6.x) shaders to SPIR-V which can be used in the vkd3d project,
which implements D3D12 on top of Vulkan.
Using [dxbc-spirv](https://github.com/doitsujin/dxbc-spirv) it also handles legacy DXBC shaders.

## Building

### Dependencies

Check out submodules first with `git submodule update --init --recursive`.
No external dependencies apart from the submodules are required to build.

This project implements a "small" LLVM C++ API subset which acts as a drop-in replacement for the full LLVM.
It is possible to build against the true LLVM C++ API if llvm is checked out in `external/llvm` and `-DDXIL_SPIRV_NATIVE_LLVM=ON` CMake option is used.
See `checkout_llvm.sh` script.

### Build

Standard CMake build.

```shell
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build . --config Release
```

## Linking against dxil-spirv

Only the C API is installed and is expected to be kept ABI/API stable when it releases.

### pkg-config

```shell
pkg-config dxil-spirv-c-shared --cflags --libs
```

### CMake module

Something like:

```
find_package(dxil_spirv_c_shared)
if (dxil_spirv_c_shared_FOUND)
	message("Found dxil-spirv! Enabling DXIL support.")
	target_link_libraries(vkd3d-shader PRIVATE dxil-spirv-c-shared)
	target_compile_definitions(vkd3d-shader PRIVATE HAVE_DXIL_SPV)
	target_sources(vkd3d-shader PRIVATE vkd3d/libs/vkd3d-shader/dxil.c)
else()
	message("Did not find dxil-spirv :( Disabling DXIL support.")
endif()
```

## Testing

The primary method of testing dxil-spirv and avoiding regressions is through a reference shader suite.

### Build DXC

First, build DXC. To keep output consistent, we must use a fixed version of DXC.
Currently, this only works on Linux, the Windows build of DXC does not seem to support CMake properly.

```shell
./checkout_dxc.sh
./build_dxc.sh
```

The test suite accepts an arbitrary path to DXC, so if you have a standalone binary somewhere, that can work as well.

### Run test suite

When adding new tests, place the HLSL test in `shaders/` somewhere and run:

```shell
./test_shaders.py shaders --dxc external/dxc-build/bin/dxc --dxil-spirv cmake-build-debug/dxil-spirv
```

If there is any mismatch, the test script will complain. If there are legitimate changes to be made,
add `--update` to the command. The updated files should now be committed alongside the dxil-spirv change.
`--parallel` can (and should) be used to speed up the process.

To update DXBC references, run:

```shell
./cmake-build-debug/dxbc-spirv-sandbox ./reference-dxbc
```

### Running large repro suites

For internal development, we also have an extensive repro suite which cover real-world content.
These cannot be made public for obvious reasons, so the intent is that symlinks are set up during development.
Shaders can be dumped with `VKD3D_SHADER_DUMP_PATH`.

```shell
# The scripts might not work properly if the paths aren't laid out like this.
ln -s ${DXIL_SPIRV_REPO}/shaders shaders-dxil
ln -s ${DXBC_SPIRV_REPO}/shaders shaders-dxbc
ln -s ${DXIL_SPIRV_REPO}/reference/shaders reference/shaders-dxil
ln -s ${DXBC_SPIRV_REPO}/reference/shaders reference/shaders-dxbc

./test_shaders.py shaders-dxil --dxil-spirv cmake-build-release/dxil-spirv --parallel --update
./test_shaders.py shaders-dxbc --dxil-spirv cmake-build-release/dxil-spirv --parallel --update
```

To import shaders into the suite:

```shell
mkdir shaders-dxil/dxilgame
mkdir shaders-dxbc/dxbcgame
# For DXIL
./copy_reference_shaders.py --dxil /tmp/path/to/vkd3d-shader-dump-path --raw --output shaders-dxil/dxilgame
# For DXBC
./copy_reference_shaders.py --dxbc /tmp/path/to/vkd3d-shader-dump-path --raw --output shaders-dxbc/dxbcgame
```

To run an isolated subfolder only, there's `--subfolder`.

## License

dxil-spirv is currently licensed as MIT. See LICENSE.MIT for more details.

```c
/* Copyright (c) 2019-2025 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
```


================================================
FILE: bc/CMakeLists.txt
================================================
add_library(llvm-bc STATIC
        cast.hpp iterator.hpp
        data_structures.hpp
        value.hpp value.cpp
        instruction.hpp instruction.cpp
        function.hpp function.cpp
        context.hpp context.cpp
        type.hpp type.cpp
        module.hpp module.cpp module_dxbc_ir.cpp
        metadata.hpp metadata.cpp
        disassembler.cpp)
target_compile_options(llvm-bc PRIVATE ${DXIL_SPV_CXX_FLAGS})
target_compile_definitions(llvm-bc PUBLIC HAVE_LLVMBC)
target_link_libraries(llvm-bc PRIVATE bc-decoder dxil-debug dxil-utils)
target_include_directories(llvm-bc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
set_target_properties(llvm-bc PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(llvm-bc PRIVATE dxbc-spirv)


================================================
FILE: bc/cast.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */


#pragma once

#include "logging.hpp"
#include "metadata.hpp"
#include "type.hpp"
#include "value.hpp"
#include <exception>

namespace LLVMBC
{
struct ModuleParseContext;

template <typename T>
inline T *cast(Type *type)
{
	if (type->getTypeID() != T::get_type_id())
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
	return static_cast<T *>(type);
}

template <typename T>
inline const T *cast(const Type *type)
{
	if (type->getTypeID() != T::get_type_id())
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
	return static_cast<const T *>(type);
}

template <typename T>
inline T *dyn_cast(Type *type)
{
	if (!type)
		return nullptr;

	if (type->getTypeID() != T::get_type_id())
		return nullptr;
	else
		return static_cast<T *>(type);
}

template <typename T>
inline const T *dyn_cast(const Type *type)
{
	if (!type)
		return nullptr;

	if (type->getTypeID() != T::get_type_id())
		return nullptr;
	else
		return static_cast<const T *>(type);
}

template <typename T>
inline bool isa(const Type *type)
{
	return type->getTypeID() == T::get_type_id();
}

class ValueProxy : public Value
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Proxy;
	}
	ValueProxy(Type *type, ModuleParseContext &context, uint64_t id);

	Value *get_proxy_value() const;
	bool resolve();

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	uint64_t id;
	ModuleParseContext &context;
	Value *proxy = nullptr;
};

namespace Internal
{
inline Value *resolve_proxy(Value *value);
inline const Value *resolve_proxy(const Value *value);
} // namespace Internal

template <typename T>
inline T *cast(Value *value)
{
	if (T::get_value_kind() != ValueKind::Proxy)
		value = Internal::resolve_proxy(value);

	if (T::is_base_of_value_kind(value->get_value_kind()))
		return static_cast<T *>(value);
	else
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
}

template <typename T>
inline const T *cast(const Value *value)
{
	if (T::get_value_kind() != ValueKind::Proxy)
		value = Internal::resolve_proxy(value);

	if (T::is_base_of_value_kind(value->get_value_kind()))
		return static_cast<const T *>(value);
	else
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
}

template <typename T>
inline T *dyn_cast(Value *value)
{
	if (!value)
		return nullptr;

	if (T::get_value_kind() != ValueKind::Proxy)
		value = Internal::resolve_proxy(value);

	if (T::is_base_of_value_kind(value->get_value_kind()))
		return static_cast<T *>(value);
	else
		return nullptr;
}

template <typename T>
inline const T *dyn_cast(const Value *value)
{
	if (!value)
		return nullptr;

	if (T::get_value_kind() != ValueKind::Proxy)
		value = Internal::resolve_proxy(value);

	if (T::is_base_of_value_kind(value->get_value_kind()))
		return static_cast<const T *>(value);
	else
		return nullptr;
}

template <typename T>
inline bool isa(const Value *value)
{
	if (T::get_value_kind() != ValueKind::Proxy)
		value = Internal::resolve_proxy(value);

	return T::is_base_of_value_kind(value->get_value_kind());
}

namespace Internal
{
inline Value *resolve_proxy(Value *value)
{
	while (value && value->get_value_kind() == ValueKind::Proxy)
		value = cast<ValueProxy>(value)->get_proxy_value();
	return value;
}

inline const Value *resolve_proxy(const Value *value)
{
	while (value && value->get_value_kind() == ValueKind::Proxy)
		value = cast<ValueProxy>(value)->get_proxy_value();
	return value;
}
} // namespace Internal

template <typename T>
inline T *cast(MDOperand &md)
{
	if (md.get_metadata_kind() == T::get_metadata_kind())
		return static_cast<T *>(&md);
	else
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
}

template <typename T>
inline T *cast(MDOperand *md)
{
	if (md->get_metadata_kind() == T::get_metadata_kind())
		return static_cast<T *>(md);
	else
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
}

template <typename T>
inline const T *cast(const MDOperand &md)
{
	if (md.get_metadata_kind() == T::get_metadata_kind())
		return static_cast<const T *>(&md);
	else
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
}

template <typename T>
inline const T *cast(const MDOperand *md)
{
	if (md->get_metadata_kind() == T::get_metadata_kind())
		return static_cast<const T *>(md);
	else
	{
		LOGE("Invalid type ID in cast<T>.\n");
		std::terminate();
	}
}

template <typename T>
inline T *dyn_cast(MDOperand &md)
{
	if (md.get_metadata_kind() == T::get_metadata_kind())
		return static_cast<T *>(&md);
	else
		return nullptr;
}

template <typename T>
inline T *dyn_cast(MDOperand *md)
{
	if (!md)
		return nullptr;

	if (md->get_metadata_kind() == T::get_metadata_kind())
		return static_cast<T *>(md);
	else
		return nullptr;
}

template <typename T>
inline const T *dyn_cast(const MDOperand &md)
{
	if (md.get_metadata_kind() == T::get_metadata_kind())
		return static_cast<const T *>(&md);
	else
		return nullptr;
}

template <typename T>
inline const T *dyn_cast(const MDOperand *md)
{
	if (!md)
		return nullptr;

	if (md->get_metadata_kind() == T::get_metadata_kind())
		return static_cast<const T *>(md);
	else
		return nullptr;
}

template <typename T>
inline bool isa(const MDOperand &md)
{
	return md.get_metadata_kind() == T::get_metadata_kind();
}

template <typename T>
inline bool isa(const MDOperand *md)
{
	return md->get_metadata_kind() == T::get_metadata_kind();
}

} // namespace LLVMBC


================================================
FILE: bc/context.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "context.hpp"
#include <stdlib.h>

namespace LLVMBC
{
LLVMContext::LLVMContext()
{
}

LLVMContext::~LLVMContext()
{
	for (size_t i = typed_allocations.size(); i; i--)
		typed_allocations[i - 1]->run();
	for (size_t i = raw_allocations.size(); i; i--)
		dxil_spv::free_in_thread(raw_allocations[i - 1]);
}

void *LLVMContext::allocate_from_chain(uintptr_t size, uintptr_t align)
{
	current_block = (current_block + align - 1) & ~(align - 1);
	if (current_block + size <= current_block_end)
	{
		void *ret = reinterpret_cast<void *>(current_block);
		current_block += size;
		return ret;
	}
	else
	{
		current_block = 0;
		current_block_end = 0;
		return nullptr;
	}
}

void LLVMContext::allocate_new_chain(size_t size, size_t align)
{
	size_t min_size = size + align;
	if (min_size < 64 * 1024)
		min_size = 64 * 1024;

	void *ptr = dxil_spv::allocate_in_thread(min_size);
	if (ptr)
	{
		raw_allocations.push_back(ptr);
		current_block = reinterpret_cast<uintptr_t>(ptr);
		current_block_end = current_block + min_size;
	}
	else
	{
		current_block = 0;
		current_block_end = 0;
	}
}

void *LLVMContext::allocate(size_t size, size_t align)
{
	void *ptr = allocate_from_chain(size, align);
	if (!ptr)
	{
		allocate_new_chain(size, align);
		ptr = allocate_from_chain(size, align);
	}
	return ptr;
}
} // namespace LLVMBC


================================================
FILE: bc/context.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "data_structures.hpp"
#include <exception>
#include <stdint.h>
#include <stddef.h>

namespace LLVMBC
{
class Type;

class LLVMContext
{
public:
	LLVMContext();
	~LLVMContext();
	void operator=(const LLVMContext &) = delete;
	LLVMContext(const LLVMContext &) = delete;

	template <typename T, typename... U>
	T *construct(U &&... u)
	{
		T *mem = static_cast<T *>(allocate(sizeof(T), alignof(T)));
		if (!mem)
			std::terminate();
		T *t = new (mem) T(std::forward<U>(u)...);

		if (!std::is_trivially_destructible<T>::value)
			append_typed_destructor(t);
		return t;
	}

	template <typename T, typename... U>
	T *construct_n(size_t n, const U &... u)
	{
		T *mem = static_cast<T *>(allocate(sizeof(T) * n, alignof(T)));
		if (!mem)
			std::terminate();

		for (size_t i = 0; i < n; i++)
		{
			T *tmp = new (&mem[i]) T(u...);
			if (!std::is_trivially_destructible<T>::value)
				append_typed_destructor(tmp);
		}
		return mem;
	}

	Vector<Type *> &get_type_cache()
	{
		return type_cache;
	}

private:
	void *allocate(size_t size, size_t align);

	struct Deleter
	{
		virtual ~Deleter() = default;
		virtual void run() = 0;
	};

	template <typename T>
	struct TypedDeleter : Deleter
	{
		explicit TypedDeleter(T *ptr_)
		    : ptr(ptr_)
		{
		}
		void run() override
		{
			ptr->~T();
		}
		T *ptr;
	};

	uintptr_t current_block = 0;
	uintptr_t current_block_end = 0;

	void *allocate_from_chain(uintptr_t size, uintptr_t align);
	void allocate_new_chain(size_t size, size_t align);

	Vector<void *> raw_allocations;
	Vector<Deleter *> typed_allocations;
	Vector<Type *> type_cache;

	template <typename T, typename... U>
	T *construct_trivial(U &&... u)
	{
		T *mem = static_cast<T *>(allocate(sizeof(T), alignof(T)));
		if (!mem)
			std::terminate();
		T *t = new (mem) T(std::forward<U>(u)...);
		return t;
	}

	template <typename T>
	void append_typed_destructor(T *ptr)
	{
		typed_allocations.push_back(construct_trivial<TypedDeleter<T>>(ptr));
	}
};

} // namespace LLVMBC


================================================
FILE: bc/data_structures.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"

namespace LLVMBC
{
template <typename T>
using Vector = dxil_spv::Vector<T>;
template <typename T>
using UnorderedSet = dxil_spv::UnorderedSet<T>;
template <typename Key, typename Value>
using UnorderedMap = dxil_spv::UnorderedMap<Key, Value>;
using String = dxil_spv::String;
using StringStream = dxil_spv::StringStream;
}


================================================
FILE: bc/disassembler.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "cast.hpp"
#include "context.hpp"
#include "function.hpp"
#include "instruction.hpp"
#include "metadata.hpp"
#include "module.hpp"
#include "type.hpp"
#include "value.hpp"
#include <assert.h>
#include <sstream>
#include <type_traits>

namespace LLVMBC
{
struct StreamState
{
	StringStream stream;
	unsigned indent = 0;

	void append(Type *type);
	void append(IntegerType *type);
	void append(PointerType *type);
	void append(ArrayType *type);
	void append(StructType *type);
	void append(FunctionType *type);
	void append(VectorType *type);

	void append(const String &str);
	void append(Value *value, bool decl = false);
	void append(GlobalVariable *value, bool decl = false);
	void append(Instruction *value);
	void append(Argument *value, bool decl = false);
	void append(ShuffleVectorInst *shuf, bool decl = false);
	void append(ExtractElementInst *extr, bool decl = false);
	void append(InsertElementInst *inst, bool decl = false);
	void append(Function *value, bool decl = false);
	void append(BinaryOperator *value, bool decl = false);
	void append(UnaryOperator *uop, bool decl = false);
	void append(CallInst *value, bool decl = false);
	void append(BranchInst *value, bool decl = false);
	void append(SwitchInst *branch, bool decl = false);
	void append(ReturnInst *value, bool decl = false);
	void append(UndefValue *value, bool decl = false);
	void append(Constant *value, bool decl = false);
	void append(ConstantInt *value, bool decl = false);
	void append(ConstantFP *value, bool decl = false);
	void append(BasicBlock *bb, bool decl = false);
	void append(FCmpInst *value, bool decl = false);
	void append(ICmpInst *value, bool decl = false);
	void append(PHINode *value, bool decl = false);
	void append(CastInst *value, bool decl = false);
	void append(SelectInst *value, bool decl = false);
	void append(ExtractValueInst *value, bool decl = false);
	void append(AllocaInst *value, bool decl = false);
	void append(GetElementPtrInst *value, bool decl = false);
	void append(LoadInst *value, bool decl = false);
	void append(StoreInst *value, bool decl = false);
	void append(AtomicRMWInst *value, bool decl = false);
	void append(AtomicCmpXchgInst *xchg, bool decl = false);
	void append(ConstantAggregate *agg, bool decl = false);
	void append(ConstantAggregateZero *zero, bool decl = false);
	void append(ConstantDataArray *data, bool decl = false);
	void append(ConstantDataVector *vec, bool decl = false);
	void append(ConstantExpr *expr, bool decl = false);

	void append(MDOperand *md);
	void append(NamedMDNode *md);
	void append(MDNode *md, bool decl = false);

	void append(float v);
	void append(double v);
	void append(bool v);
	void append(const char *str);

	void newline();
	void newline_noindent();
	void begin_scope();
	void end_scope();

	template <typename T, typename... Ts>
	void append(T &&t, Ts &&... ts)
	{
		append(std::forward<T>(t));
		append(std::forward<Ts>(ts)...);
	}

	// Only want this overload to trigger on various integer types.
	template <typename T>
	typename std::enable_if<std::is_integral<T>::value, void>::type append(T value)
	{
		stream << value;
	}

	// Need this to avoid the generic template to be deduced.
	template <size_t N>
	void append(char (&str)[N])
	{
		return append(static_cast<const char *>(str));
	}
};

void StreamState::append(IntegerType *type)
{
	append("i", type->getBitWidth());
}

void StreamState::append(StructType *type)
{
	append("{ ");
	for (unsigned i = 0; i < type->getNumElements(); i++)
	{
		append(type->getElementType(i));
		if (i + 1 < type->getNumElements())
			append(", ");
	}
	append(" }");
}

void StreamState::append(PointerType *type)
{
	if (type->getAddressSpace() != 0)
		append(type->getElementType(), " addrspace(", type->getAddressSpace(), ")*");
	else
		append(type->getElementType(), "*");
}

void StreamState::append(ArrayType *type)
{
	append("[", type->getArrayNumElements(), " x ", type->getArrayElementType(), "]");
}

void StreamState::append(FunctionType *type)
{
	append("(", type->getReturnType(), " (*) (");
	for (unsigned i = 0; i < type->getNumParams(); i++)
	{
		append(type->getParamType(i));
		if (i + 1 < type->getNumParams())
			append(", ");
	}
	append("))");
}

void StreamState::append(VectorType *type)
{
	append(type->getElementType(), "x", type->getVectorSize());
}

void StreamState::append(bool v)
{
	stream << (v ? "true" : "false");
}

void StreamState::append(float v)
{
	char buf[1024];
	sprintf(buf, "%e", v);
	append(buf);
}

void StreamState::append(double v)
{
	char buf[1024];
	sprintf(buf, "%e", v);
	append(buf);
}

void StreamState::newline()
{
	stream << "\n";
	for (unsigned i = 0; i < indent; i++)
		stream << "  ";
}

void StreamState::newline_noindent()
{
	stream << "\n";
}

void StreamState::append(const char *str)
{
	stream << str;
}

void StreamState::append(const String &str)
{
	stream << str;
}

void StreamState::begin_scope()
{
	append(" {");
	indent++;
}

void StreamState::end_scope()
{
	assert(indent > 0);
	indent--;
	newline();
	append("}");
}

void StreamState::append(Type *type)
{
	switch (type->getTypeID())
	{
	case Type::TypeID::IntegerTyID:
		return append(cast<IntegerType>(type));
	case Type::TypeID::PointerTyID:
		return append(cast<PointerType>(type));
	case Type::TypeID::StructTyID:
		return append(cast<StructType>(type));
	case Type::TypeID::ArrayTyID:
		return append(cast<ArrayType>(type));
	case Type::TypeID::FunctionTyID:
		return append(cast<FunctionType>(type));
	case Type::TypeID::VectorTyID:
		return append(cast<VectorType>(type));
	case Type::TypeID::HalfTyID:
		return append("half");
	case Type::TypeID::FloatTyID:
		return append("float");
	case Type::TypeID::DoubleTyID:
		return append("double");
	case Type::TypeID::Unknown:
		return append("unknown");
	case Type::TypeID::VoidTyID:
		return append("void");
	case Type::TypeID::OpaqueTyID:
		return append("opaque");
	default:
		break;
	}

	LOGE("Unknown Type %u.\n", unsigned(type->getTypeID()));
}

void StreamState::append(ShuffleVectorInst *shuf, bool decl)
{
	if (decl)
	{
		append("%", shuf->get_tween_id(), " = shufflevector ", shuf->getType(), " ", shuf->getOperand(0), ", ", shuf->getOperand(1), " <");
		auto *vec_type = cast<VectorType>(shuf->getType());
		for (unsigned i = 0; i < vec_type->getVectorSize(); i++)
		{
			append(shuf->getMaskValue(i));
			if (i + 1 < vec_type->getVectorSize())
				append(", ");
		}
		append(">");
	}
	else
		append("%", shuf->get_tween_id());
}

void StreamState::append(ExtractElementInst *extr, bool decl)
{
	if (decl)
	{
		append("%", extr->get_tween_id(), " = extractelement ",
		       extr->getType(), " ", extr->getVectorOperand(), ", ", extr->getIndexOperand());
	}
	else
		append("%", extr->get_tween_id());
}

void StreamState::append(InsertElementInst *inst, bool decl)
{
	if (decl)
	{
		append("%", inst->get_tween_id(), " = insertelement ",
		       inst->getOperand(0), ", ", inst->getOperand(1), ", ", inst->getOperand(2));
	}
	else
		append("%", inst->get_tween_id());
}

void StreamState::append(Argument *arg, bool decl)
{
	append("%arg", arg->getArgNo());
}

void StreamState::append(Function *func, bool decl)
{
	if (decl)
	{
		append("define ", func->getType(), " @", func->getName(), "(");
		auto *type = func->getFunctionType();
		for (unsigned i = 0; i < type->getNumParams(); i++)
		{
			append(type->getParamType(i));
			if (i + 1 < type->getNumParams())
				append(", ");
		}
		append(")");

		if (func->begin() != func->end())
		{
			begin_scope();
			for (auto &bb : *func)
				append(&bb, true);
			end_scope();
		}
	}
	else
		append("@", func->getName());
}

void StreamState::append(GlobalVariable *var, bool decl)
{
	if (decl)
	{
		append("@", var->get_tween_id(), " = ");

		if (cast<PointerType>(var->getType())->getAddressSpace() != 0)
			append("groupshared ");
		else
			append(var->isConstant() ? "constant" : "global", " ");
		append(var->getType()->getPointerElementType());

		if (var->hasInitializer())
			append(" ", var->getInitializer());
		newline();
	}
	else
	{
		append("@", var->get_tween_id());
	}
}

static const char *to_string(BinaryOperator::BinaryOps op)
{
	switch (op)
	{
#define BINOP(op, str)                  \
	case BinaryOperator::BinaryOps::op: \
		return str
		BINOP(InvalidBinaryOp, "invalid");
		BINOP(Add, "add");
		BINOP(FAdd, "fadd");
		BINOP(Sub, "sub");
		BINOP(FSub, "fsub");
		BINOP(Mul, "mul");
		BINOP(FMul, "fmul");
		BINOP(UDiv, "udiv");
		BINOP(SDiv, "sdiv");
		BINOP(FDiv, "fdiv");
		BINOP(URem, "urem");
		BINOP(SRem, "srem");
		BINOP(FRem, "frem");
		BINOP(Shl, "shl");
		BINOP(LShr, "lshr");
		BINOP(AShr, "ashr");
		BINOP(And, "and");
		BINOP(Or, "or");
		BINOP(Xor, "xor");
	}
#undef BINOP
	return "???";
}

static const char *to_string(UnaryOperator::UnaryOps op)
{
	switch (op)
	{
	case UnaryOperator::UnaryOps::FNeg:
		return "fneg";
	default:
		return "invalid";
	}
}

static const char *to_string(Instruction::Predicate pred)
{
	switch (pred)
	{
#define PRED(op, str)            \
	case Instruction::FCMP_##op: \
		return str
		PRED(FALSE, "false");
		PRED(OEQ, "oeq");
		PRED(OGT, "ogt");
		PRED(OGE, "oge");
		PRED(OLT, "olt");
		PRED(OLE, "ole");
		PRED(ONE, "one");
		PRED(ORD, "ord");
		PRED(UNO, "uno");
		PRED(UEQ, "ueq");
		PRED(UGT, "ugt");
		PRED(UGE, "uge");
		PRED(ULT, "ult");
		PRED(ULE, "ule");
		PRED(UNE, "une");
		PRED(TRUE, "true");
#undef PRED
#define PRED(op, str)            \
	case Instruction::ICMP_##op: \
		return str
		PRED(EQ, "eq");
		PRED(NE, "ne");
		PRED(UGT, "ugt");
		PRED(UGE, "uge");
		PRED(ULT, "ult");
		PRED(ULE, "ule");
		PRED(SGT, "sgt");
		PRED(SGE, "sge");
		PRED(SLT, "slt");
		PRED(SLE, "sle");
	}
#undef PRED
	return "???";
}

static const char *to_string(Instruction::CastOps op)
{
	switch (op)
	{
#define CAST(op, str)     \
	case Instruction::op: \
		return str
		CAST(Trunc, "trunc");
		CAST(ZExt, "zext");
		CAST(SExt, "sext");
		CAST(FPToUI, "fptoui");
		CAST(FPToSI, "fptosi");
		CAST(UIToFP, "uitofp");
		CAST(SIToFP, "sitofp");
		CAST(FPTrunc, "fptrunc");
		CAST(FPExt, "fpext");
		CAST(PtrToInt, "ptrtoint");
		CAST(IntToPtr, "inttoptr");
		CAST(BitCast, "bitcast");
		CAST(AddrSpaceCast, "addrspacecast");
	default:
		break;
	}
#undef CAST
	return "???";
}

static const char *to_string(AtomicRMWInst::BinOp op)
{
	switch (op)
	{
#define RMW(op, str)               \
	case AtomicRMWInst::BinOp::op: \
		return str
		RMW(Add, "add");
		RMW(Sub, "sub");
		RMW(Xchg, "xchg");
		RMW(And, "and");
		RMW(Xor, "xor");
		RMW(Or, "or");
		RMW(Nand, "nand");
		RMW(Max, "max");
		RMW(Min, "min");
		RMW(UMax, "umax");
		RMW(UMin, "umin");
		RMW(FAdd, "fadd");
		RMW(FSub, "fsub");
	default:
		break;
	}
#undef RMW
	return "???";
}

void StreamState::append(BinaryOperator *binop, bool decl)
{
	if (decl)
	{
		append("%", binop->get_tween_id(), " = ", to_string(binop->getOpcode()), " ", binop->getType(), " ",
		       binop->getOperand(0), ", ", binop->getOperand(1));
	}
	else
	{
		append("%", binop->get_tween_id());
	}
}

void StreamState::append(UnaryOperator *uop, bool decl)
{
	if (decl)
	{
		append("%", uop->get_tween_id(), " = ", to_string(uop->getOpcode()), " ", uop->getType(), " ",
		       uop->getOperand(0), ", ", uop->getOperand(1));
	}
	else
	{
		append("%", uop->get_tween_id());
	}
}

void StreamState::append(BasicBlock *bb, bool decl)
{
	if (decl)
	{
		newline_noindent();
		newline_noindent();
		append(bb->get_tween_id(), ":");
		for (auto &inst : *bb)
		{
			newline();
			append(&inst);
		}
	}
	else
	{
		append("label %", bb->get_tween_id());
	}
}

void StreamState::append(FCmpInst *value, bool decl)
{
	if (decl)
	{
		append("%", value->get_tween_id(), " = fcmp ", to_string(value->getPredicate()), " ", value->getOperand(0),
		       ", ", value->getOperand(1));
	}
	else
	{
		append("%", value->get_tween_id());
	}
}

void StreamState::append(ICmpInst *value, bool decl)
{
	if (decl)
	{
		append("%", value->get_tween_id(), " = icmp ", to_string(value->getPredicate()), " ", value->getOperand(0),
		       ", ", value->getOperand(1));
	}
	else
	{
		append("%", value->get_tween_id());
	}
}

void StreamState::append(BranchInst *br, bool)
{
	append("br ");
	if (br->getCondition())
		append(br->getCondition(), ", ", br->getSuccessor(0), ", ", br->getSuccessor(1));
	else
		append(br->getSuccessor(0));
}

void StreamState::append(SwitchInst *branch, bool)
{
	append("switch ", branch->getCondition(), ", ", branch->getDefaultDest());
	begin_scope();
	for (auto itr = branch->case_begin(); itr != branch->case_end(); ++itr)
	{
		newline();
		append(itr->getCaseValue(), ", ", itr->getCaseSuccessor());
	}
	end_scope();
}

void StreamState::append(CallInst *call, bool decl)
{
	if (decl)
	{
		if (call->getType()->getTypeID() != Type::TypeID::VoidTyID)
			append("%", call->get_tween_id(), " = ");
		append("call ", call->getType(), " @", call->getCalledFunction()->getName(), "(");
		for (unsigned i = 0; i < call->getNumOperands(); i++)
		{
			append(call->getOperand(i));
			if (i + 1 < call->getNumOperands())
				append(", ");
		}
		append(")");

		for (auto itr = call->metadata_begin(); itr != call->metadata_end(); ++itr)
		{
			append(" !", itr->first, " ", itr->second);
		}
	}
	else
	{
		append("%", call->get_tween_id());
	}
}

void StreamState::append(CastInst *cast, bool decl)
{
	if (decl)
	{
		append("%", cast->get_tween_id(), " = ", to_string(cast->getOpcode()), " ", cast->getOperand(0), " to ",
		       cast->getType());
	}
	else
	{
		append("%", cast->get_tween_id());
	}
}

void StreamState::append(SelectInst *cast, bool decl)
{
	if (decl)
	{
		append("%", cast->get_tween_id(), " = ", "select ", cast->getOperand(0), ", ", cast->getOperand(1), ", ",
		       cast->getOperand(2));
	}
	else
	{
		append("%", cast->get_tween_id());
	}
}

void StreamState::append(ExtractValueInst *ext, bool decl)
{
	if (decl)
	{
		append("%", ext->get_tween_id(), " = ", "extractvalue ", ext->getType(), " ", ext->getAggregateOperand());
		for (unsigned i = 0; i < ext->getNumIndices(); i++)
		{
			append(", ");
			append(ext->getIndices()[i]);
		}
	}
	else
	{
		append("%", ext->get_tween_id());
	}
}

void StreamState::append(AllocaInst *alloca, bool decl)
{
	if (decl)
	{
		append("%", alloca->get_tween_id(), " = alloca ", cast<PointerType>(alloca->getType())->getElementType());
	}
	else
	{
		append("%", alloca->get_tween_id());
	}
}

void StreamState::append(GetElementPtrInst *ptr, bool decl)
{
	if (decl)
	{
		append("%", ptr->get_tween_id(), " = getelementptr ", ptr->isInBounds() ? "inbounds " : "", ptr->getType());
		for (unsigned i = 0; i < ptr->getNumOperands(); i++)
		{
			append(", ");
			append(ptr->getOperand(i));
		}
	}
	else
	{
		append("%", ptr->get_tween_id());
	}
}

void StreamState::append(LoadInst *ptr, bool decl)
{
	if (decl)
		append("%", ptr->get_tween_id(), " = load ", ptr->getType(), " ", ptr->getPointerOperand());
	else
		append("%", ptr->get_tween_id());
}

void StreamState::append(StoreInst *ptr, bool decl)
{
	if (decl)
		append("store ", ptr->getOperand(0), ", ", ptr->getOperand(1));
	else
		append("%", ptr->get_tween_id());
}

void StreamState::append(AtomicRMWInst *atomic_op, bool decl)
{
	if (decl)
	{
		append("%", atomic_op->get_tween_id(), " = atomicrmw ", to_string(atomic_op->getOperation()), " ",
		       atomic_op->getType(), " ", atomic_op->getPointerOperand(), ", ", atomic_op->getValOperand());
	}
	else
		append("%", atomic_op->get_tween_id());
}

void StreamState::append(AtomicCmpXchgInst *xchg, bool decl)
{
	if (decl)
	{
		append("%", xchg->get_tween_id(), " = cmpxchg ", xchg->getType(), " ", xchg->getPointerOperand(), ", ",
		       xchg->getCompareOperand(), ", ", xchg->getNewValOperand());
	}
	else
		append("%", xchg->get_tween_id());
}

void StreamState::append(ConstantAggregate *agg, bool)
{
	append("[");

	if (agg->getNumOperands())
		append(agg->getOperand(0));

	for (unsigned i = 1; i < agg->getNumOperands(); i++)
		append(", ", agg->getOperand(i));

	append("]");
}

void StreamState::append(ConstantAggregateZero *zero, bool)
{
	append("[zeroinitialized]");
}

void StreamState::append(ConstantDataArray *arr, bool)
{
	append("[");
	for (unsigned i = 0; i < arr->getNumElements(); i++)
	{
		append(arr->getElementAsConstant(i));
		if (i + 1 < arr->getNumElements())
			append(", ");
	}
	append("]");
}

void StreamState::append(ConstantDataVector *vec, bool)
{
	append("<");
	for (unsigned i = 0; i < vec->getNumElements(); i++)
	{
		append(vec->getElementAsConstant(i));
		if (i + 1 < vec->getNumElements())
			append(", ");
	}
	append(">");
}

void StreamState::append(ConstantExpr *expr, bool decl)
{
	if (decl)
	{
		append("%", expr->get_tween_id(), " = ", expr->getOpcode(), " ", expr->getType());

		if (expr->getNumOperands())
			append(" ", expr->getOperand(0));

		for (unsigned i = 1; i < expr->getNumOperands(); i++)
			append(", ", expr->getOperand(i));
	}
	else
	{
		append("%", expr->get_tween_id());
	}
}

void StreamState::append(PHINode *phi, bool decl)
{
	if (decl)
	{
		append("%", phi->get_tween_id(), " = phi ", phi->getType(), " ");
		unsigned count = phi->getNumIncomingValues();
		for (unsigned i = 0; i < count; i++)
		{
			Value *value = phi->getIncomingValue(i);
			BasicBlock *bb = phi->getIncomingBlock(i);
			append("[ ", value, ", ", bb, " ]");
			if (i + 1 < count)
				append(", ");
		}
	}
	else
	{
		append("%", phi->get_tween_id());
	}
}

void StreamState::append(ReturnInst *value, bool)
{
	if (value->getReturnValue())
		append("ret ", value);
	else
		append("ret void");
}

void StreamState::append(UndefValue *undef, bool decl)
{
	append(undef->getType(), " undef");
}

void StreamState::append(ConstantFP *value, bool decl)
{
	append(value->getValueAPF().convertToDouble());
}

void StreamState::append(ConstantInt *value, bool decl)
{
	append(value->getType(), " ", value->getUniqueInteger().getSExtValue());
}

void StreamState::append(Constant *value, bool decl)
{
	append(static_cast<Value *>(value), decl);
}

void StreamState::append(Instruction *inst)
{
	append(static_cast<Value *>(inst), true);
}

void StreamState::append(MDNode *md, bool decl)
{
	if (md)
	{
		if (decl)
		{
			append("!", md->get_tween_id(), " = !{");
			for (unsigned i = 0; i < md->getNumOperands(); i++)
			{
				append(&md->getOperand(i));
				if (i + 1 < md->getNumOperands())
					append(", ");
			}
			append("}");
		}
		else
			append("!", md->get_tween_id());
	}
	else
		append("null");
}

void StreamState::append(NamedMDNode *md)
{
	append("!", md->getName(), " = !{");
	for (unsigned i = 0; i < md->getNumOperands(); i++)
	{
		append(md->getOperand(i), false);
		if (i + 1 < md->getNumOperands())
			append(", ");
	}

	append("}");
}

void StreamState::append(MDOperand *md)
{
	if (md)
	{
		switch (md->get_metadata_kind())
		{
		case MetadataKind::NamedNode:
			return append(cast<NamedMDNode>(md));
		case MetadataKind::Node:
			return append(cast<MDNode>(md), false);
		case MetadataKind::Constant:
			return append(cast<ConstantAsMetadata>(md)->getValue());
		case MetadataKind::String:
			return append("\"", cast<MDString>(md)->getString(), "\"");
		case MetadataKind::None:
			return append("null");
		default:
			LOGE("Unknown MetadataKind %u.\n", unsigned(md->get_metadata_kind()));
			break;
		}
	}
	else
		append("null");
}

void StreamState::append(Value *value, bool decl)
{
	switch (value->get_value_kind())
	{
	case ValueKind::Argument:
		return append(cast<Argument>(value), decl);
	case ValueKind::Function:
		return append(cast<Function>(value), decl);
	case ValueKind::BinaryOperator:
		return append(cast<BinaryOperator>(value), decl);
	case ValueKind::UnaryOperator:
		return append(cast<UnaryOperator>(value), decl);
	case ValueKind::Call:
		return append(cast<CallInst>(value), decl);
	case ValueKind::Branch:
		return append(cast<BranchInst>(value), decl);
	case ValueKind::FCmp:
		return append(cast<FCmpInst>(value), decl);
	case ValueKind::ICmp:
		return append(cast<ICmpInst>(value), decl);
	case ValueKind::Return:
		return append(cast<ReturnInst>(value), decl);
	case ValueKind::Undef:
		return append(cast<UndefValue>(value), decl);
	case ValueKind::ConstantInt:
		return append(cast<ConstantInt>(value), decl);
	case ValueKind::ConstantFP:
		return append(cast<ConstantFP>(value), decl);
	case ValueKind::BasicBlock:
		return append(cast<BasicBlock>(value), decl);
	case ValueKind::PHI:
		return append(cast<PHINode>(value), decl);
	case ValueKind::Cast:
		return append(cast<CastInst>(value), decl);
	case ValueKind::Select:
		return append(cast<SelectInst>(value), decl);
	case ValueKind::ExtractValue:
		return append(cast<ExtractValueInst>(value), decl);
	case ValueKind::Alloca:
		return append(cast<AllocaInst>(value), decl);
	case ValueKind::GetElementPtr:
		return append(cast<GetElementPtrInst>(value), decl);
	case ValueKind::Load:
		return append(cast<LoadInst>(value), decl);
	case ValueKind::Store:
		return append(cast<StoreInst>(value), decl);
	case ValueKind::AtomicRMW:
		return append(cast<AtomicRMWInst>(value), decl);
	case ValueKind::AtomicCmpXchg:
		return append(cast<AtomicCmpXchgInst>(value), decl);
	case ValueKind::Global:
		return append(cast<GlobalVariable>(value), decl);
	case ValueKind::ConstantAggregate:
		return append(cast<ConstantAggregate>(value), decl);
	case ValueKind::ConstantAggregateZero:
		return append(cast<ConstantAggregateZero>(value), decl);
	case ValueKind::ConstantDataArray:
		return append(cast<ConstantDataArray>(value), decl);
	case ValueKind::ConstantDataVector:
		return append(cast<ConstantDataVector>(value), decl);
	case ValueKind::ConstantExpr:
		return append(cast<ConstantExpr>(value), decl);
	case ValueKind::Switch:
		return append(cast<SwitchInst>(value), decl);
	case ValueKind::ShuffleVector:
		return append(cast<ShuffleVectorInst>(value), decl);
	case ValueKind::ExtractElement:
		return append(cast<ExtractElementInst>(value), decl);
	case ValueKind::InsertElement:
		return append(cast<InsertElementInst>(value), decl);
	default:
		break;
	}

	LOGE("Unknown ValueKind %u.\n", unsigned(value->get_value_kind()));

	if (decl)
		append("%", value->get_tween_id(), " = unimplemented");
	else
		append("%", value->get_tween_id());
}

bool disassemble(Module &module, String &str)
{
	StreamState state;

	for (auto itr = module.global_begin(); itr != module.global_end(); ++itr)
		state.append(&*itr, true);

	for (auto *func : module)
	{
		state.newline();
		state.append(func, true);
		state.newline();
	}

	state.newline();

	for (auto itr = module.named_metadata_begin(); itr != module.named_metadata_end(); ++itr)
	{
		state.newline();
		state.append(itr->second);
	}

	state.newline();

	for (auto itr = module.unnamed_metadata_begin(); itr != module.unnamed_metadata_end(); ++itr)
	{
		state.newline();
		state.append(*itr, true);
	}

	str = state.stream.str();
	return true;
}
} // namespace LLVMBC


================================================
FILE: bc/function.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "function.hpp"
#include "context.hpp"
#include "instruction.hpp"
#include "module.hpp"
#include "type.hpp"
#include <algorithm>
#include <assert.h>

namespace LLVMBC
{
Function::Function(FunctionType *function_type_, uint64_t value_id_, Module &module_)
    : Constant(function_type_, ValueKind::Function)
    , module(module_)
    , value_id(value_id_)
    , function_type(function_type_)
{
}

const String &Function::getName() const
{
	return module.get_value_name(value_id);
}

void Function::set_basic_blocks(Vector<BasicBlock *> basic_blocks_)
{
	basic_blocks = std::move(basic_blocks_);
}

FunctionType *Function::getFunctionType() const
{
	return function_type;
}

IteratorAdaptor<BasicBlock, Vector<BasicBlock *>::const_iterator> Function::begin() const
{
	return basic_blocks.begin();
}

IteratorAdaptor<BasicBlock, Vector<BasicBlock *>::const_iterator> Function::end() const
{
	return basic_blocks.end();
}

BasicBlock &Function::getEntryBlock() const
{
	return *basic_blocks.front();
}

void Function::add_argument(Argument *arg)
{
	arguments.push_back(arg);
}

String Attribute::getValueAsString() const
{
	// LLVM implementation does this.
	if (value)
		return *value;
	else
		return {};
}

Attribute::Attribute(const String *value_)
	: value(value_)
{
}

Attribute Function::getFnAttribute(const char *attribute) const
{
	for (auto &attr : attributes)
		if (attr.first == attribute)
			return Attribute(&attr.second);
	return Attribute(nullptr);
}

bool Function::hasFnAttribute(const char *attribute) const
{
	for (auto &attr : attributes)
		if (attr.first == attribute)
			return true;
	return false;
}

void Function::set_attributes(Vector<std::pair<String, String>> attributes_)
{
	attributes = std::move(attributes_);
}

void Function::set_structured_control_flow()
{
	structured_control_flow = true;
}

bool Function::get_structured_control_flow() const
{
	return structured_control_flow;
}

IteratorAdaptor<const Argument, Vector<Argument *>::const_iterator> Function::arg_begin() const
{
	return arguments.begin();
}

IteratorAdaptor<const Argument, Vector<Argument *>::const_iterator> Function::arg_end() const
{
	return arguments.end();
}

BasicBlock::BasicBlock(LLVMContext &context_)
    : Value(Type::getLabelTy(context_), ValueKind::BasicBlock)
{
}

void BasicBlock::add_instruction(Instruction *inst)
{
	instructions.push_back(inst);
}

Instruction *BasicBlock::getTerminator() const
{
	if (!instructions.empty() && instructions.back()->isTerminator())
		return instructions.back();
	else
		return nullptr;
}

void BasicBlock::add_successor(BasicBlock *succ)
{
	if (std::find(succs.begin(), succs.end(), succ) == succs.end())
		succs.push_back(succ);
}

BasicBlock::Merge BasicBlock::get_merge() const
{
	return merge;
}

BasicBlock *BasicBlock::get_merge_bb() const
{
	return merge_bb;
}

BasicBlock *BasicBlock::get_continue_bb() const
{
	return continue_bb;
}

void BasicBlock::set_selection_merge(BasicBlock *bb)
{
	merge = Merge::Selection;
	merge_bb = bb;
}

void BasicBlock::set_loop_merge(BasicBlock *merge_bb_, BasicBlock *continue_bb_)
{
	merge = Merge::Loop;
	merge_bb = merge_bb_;
	continue_bb = continue_bb_;
}

IteratorAdaptor<Instruction, Vector<Instruction *>::const_iterator> BasicBlock::begin() const
{
	return instructions.begin();
}

IteratorAdaptor<Instruction, Vector<Instruction *>::const_iterator> BasicBlock::end() const
{
	return instructions.end();
}

Vector<BasicBlock *>::const_iterator BasicBlock::succ_begin() const
{
	return succs.begin();
}

Vector<BasicBlock *>::const_iterator BasicBlock::succ_end() const
{
	return succs.end();
}
} // namespace LLVMBC


================================================
FILE: bc/function.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "iterator.hpp"
#include "value.hpp"

namespace LLVMBC
{
class LLVMContext;
class Instruction;
class Module;
class FunctionType;

class BasicBlock : public Value
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::BasicBlock;
	}
	explicit BasicBlock(LLVMContext &context);

	void add_instruction(Instruction *inst);
	Instruction *getTerminator() const;

	IteratorAdaptor<Instruction, Vector<Instruction *>::const_iterator> begin() const;
	IteratorAdaptor<Instruction, Vector<Instruction *>::const_iterator> end() const;

	void add_successor(BasicBlock *succ);

	enum class Merge
	{
		None,
		Selection,
		Loop
	};
	Merge get_merge() const;
	void set_selection_merge(BasicBlock *bb);
	void set_loop_merge(BasicBlock *merge_bb, BasicBlock *continue_bb);
	BasicBlock *get_merge_bb() const;
	BasicBlock *get_continue_bb() const;

	Vector<BasicBlock *>::const_iterator succ_begin() const;
	Vector<BasicBlock *>::const_iterator succ_end() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Vector<Instruction *> instructions;
	Vector<BasicBlock *> succs;
	Merge merge = Merge::None;
	BasicBlock *merge_bb = nullptr;
	BasicBlock *continue_bb = nullptr;
};

inline Vector<BasicBlock *>::const_iterator succ_begin(const BasicBlock *bb)
{
	return bb->succ_begin();
}

inline Vector<BasicBlock *>::const_iterator succ_end(const BasicBlock *bb)
{
	return bb->succ_end();
}

class Attribute
{
public:
	explicit Attribute(const String *value);
	String getValueAsString() const;

private:
	const String *value;
};

class Function : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Function;
	}
	explicit Function(FunctionType *function_type, uint64_t value_id, Module &module);
	const String &getName() const;

	void set_basic_blocks(Vector<BasicBlock *> basic_blocks);
	IteratorAdaptor<BasicBlock, Vector<BasicBlock *>::const_iterator> begin() const;
	IteratorAdaptor<BasicBlock, Vector<BasicBlock *>::const_iterator> end() const;
	FunctionType *getFunctionType() const;

	BasicBlock &getEntryBlock() const;

	void add_argument(Argument *arg);
	IteratorAdaptor<const Argument, Vector<Argument *>::const_iterator> arg_begin() const;
	IteratorAdaptor<const Argument, Vector<Argument *>::const_iterator> arg_end() const;

	// Bare bones implementation, we only need it for fp32-denorm-mode attribute.
	Attribute getFnAttribute(const char *attribute) const;
	bool hasFnAttribute(const char *attribute) const;
	void set_attributes(Vector<std::pair<String, String>> attributes);

	bool get_structured_control_flow() const;
	void set_structured_control_flow();

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Module &module;
	uint64_t value_id;
	FunctionType *function_type;
	Vector<BasicBlock *> basic_blocks;
	Vector<Argument *> arguments;
	Vector<std::pair<String, String>> attributes;
	bool structured_control_flow = false;
};
} // namespace LLVMBC


================================================
FILE: bc/instruction.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "instruction.hpp"
#include "cast.hpp"
#include <assert.h>

namespace LLVMBC
{
Instruction::Instruction(Type *type, ValueKind kind)
    : Value(type, kind)
{
}

void Instruction::set_operands(Vector<Value *> op)
{
	operands = std::move(op);
}

unsigned Instruction::getNumOperands() const
{
	return operands.size();
}

Value *Instruction::getOperand(unsigned index) const
{
	if (index >= operands.size())
	{
		LOGE("Operand index is out of range.\n");
		return nullptr;
	}

	return Internal::resolve_proxy(operands[index]);
}

bool Instruction::isTerminator() const
{
	return is_terminator;
}

void Instruction::set_terminator()
{
	is_terminator = true;
}

bool Instruction::resolve_proxy_values()
{
	for (auto &op : operands)
		while (op && op->get_value_kind() == ValueKind::Proxy)
			op = cast<ValueProxy>(op)->get_proxy_value();

	if (get_value_kind() == ValueKind::PHI)
	{
		auto *phi = cast<PHINode>(this);
		if (!phi->resolve_proxy_values_incoming())
			return false;
	}
	return true;
}

void Instruction::setMetadata(const String &str, MDNode *node)
{
	attachments[str] = node;
}

UnorderedMap<String, MDNode *>::const_iterator Instruction::metadata_begin() const
{
	return attachments.begin();
}

UnorderedMap<String, MDNode *>::const_iterator Instruction::metadata_end() const
{
	return attachments.end();
}

bool Instruction::hasMetadata(const String &str) const
{
	return attachments.find(str) != attachments.end();
}

MDNode *Instruction::getMetadata(const String &str) const
{
	auto itr = attachments.find(str);
	if (itr != attachments.end())
		return itr->second;
	else
		return nullptr;
}

bool Instruction::is_base_of_value_kind(ValueKind kind)
{
	switch (kind)
	{
	case ValueKind::Return:
	case ValueKind::Unreachable:
	case ValueKind::Call:
	case ValueKind::UnaryOperator:
	case ValueKind::BinaryOperator:
	case ValueKind::Cast:
	case ValueKind::Select:
	case ValueKind::ExtractValue:
	case ValueKind::Alloca:
	case ValueKind::GetElementPtr:
	case ValueKind::Load:
	case ValueKind::Store:
	case ValueKind::CompareBase:
	case ValueKind::FCmp:
	case ValueKind::ICmp:
	case ValueKind::Branch:
	case ValueKind::Switch:
	case ValueKind::PHI:
	case ValueKind::AtomicRMW:
	case ValueKind::AtomicCmpXchg:
	case ValueKind::ShuffleVector:
	case ValueKind::ExtractElement:
	case ValueKind::InsertElement:
		return true;

	default:
		break;
	}

	return false;
}

BinaryOperator::BinaryOperator(Value *LHS, Value *RHS, BinaryOps op_)
    : Instruction(LHS->getType(), ValueKind::BinaryOperator)
    , op(op_)
{
	set_operands({ LHS, RHS });
}

BinaryOperator::BinaryOps BinaryOperator::getOpcode() const
{
	return op;
}

bool BinaryOperator::isFast() const
{
	return fast_math;
}

void BinaryOperator::setFast(bool enabled)
{
	fast_math = enabled;
}

UnaryOperator::UnaryOperator(UnaryOps uop, Value *value)
    : Instruction(value->getType(), ValueKind::UnaryOperator), op(uop)
{
	set_operands({ value });
}

UnaryOperator::UnaryOps UnaryOperator::getOpcode() const
{
	return op;
}

ReturnInst::ReturnInst(Value *value_)
    : Instruction(value_ ? value_->getType() : nullptr, ValueKind::Return)
    , value(value_)
{
	set_terminator();
}

UnreachableInst::UnreachableInst()
	: Instruction(nullptr, ValueKind::Unreachable)
{
	set_terminator();
}

CallInst::CallInst(FunctionType *function_type_, Function *callee_, Vector<Value *> params)
    : Instruction(function_type_->getReturnType(), ValueKind::Call)
    , callee(callee_)
{
	set_operands(std::move(params));
}

Function *CallInst::getCalledFunction() const
{
	return callee;
}

Value *ReturnInst::getReturnValue() const
{
	return Internal::resolve_proxy(value);
}

CmpInst::CmpInst(ValueKind kind, Predicate pred_, Value *LHS, Value *RHS)
    : Instruction(Type::getInt1Ty(LHS->getType()->getContext()), kind)
    , pred(pred_)
{
	set_operands({ LHS, RHS });
}

CastInst::CastInst(Type *type, Value *value, Instruction::CastOps op_)
    : Instruction(type, ValueKind::Cast)
    , op(op_)
{
	set_operands({ value });
}

SelectInst::SelectInst(Value *true_value, Value *false_value, Value *cond)
    : Instruction(true_value->getType(), ValueKind::Select)
{
	set_operands({ cond, true_value, false_value });
}

ExtractValueInst::ExtractValueInst(Type *type, Value *aggregate, Vector<unsigned> indices_)
    : Instruction(type, ValueKind::ExtractValue)
    , indices(std::move(indices_))
{
	set_operands({ aggregate });
}

Value *ExtractValueInst::getAggregateOperand() const
{
	return Internal::resolve_proxy(operands[0]);
}

unsigned ExtractValueInst::getNumIndices() const
{
	return indices.size();
}

const unsigned *ExtractValueInst::getIndices() const
{
	return indices.data();
}

Instruction::CastOps CastInst::getOpcode() const
{
	return op;
}

Instruction::Predicate CmpInst::getPredicate() const
{
	return pred;
}

bool CmpInst::is_base_of_value_kind(ValueKind kind)
{
	return kind == ValueKind::ICmp || kind == ValueKind::FCmp;
}

FCmpInst::FCmpInst(Predicate pred_, Value *LHS, Value *RHS)
    : CmpInst(ValueKind::FCmp, pred_, LHS, RHS)
{
	set_operands({ LHS, RHS });
}

ICmpInst::ICmpInst(Predicate pred_, Value *LHS, Value *RHS)
    : CmpInst(ValueKind::ICmp, pred_, LHS, RHS)
{
	set_operands({ LHS, RHS });
}

BranchInst::BranchInst(BasicBlock *true_block, BasicBlock *false_block, Value *cond_)
    : Instruction(nullptr, ValueKind::Branch)
    , cond(cond_)
{
	set_terminator();
	num_blocks = 2;
	bbs[0] = true_block;
	bbs[1] = false_block;
}

BranchInst::BranchInst(BasicBlock *true_block)
    : Instruction(nullptr, ValueKind::Branch)
{
	set_terminator();
	num_blocks = 1;
	bbs[0] = true_block;
}

bool BranchInst::isConditional() const
{
	return cond != nullptr;
}

Value *BranchInst::getCondition() const
{
	return Internal::resolve_proxy(cond);
}

BasicBlock *BranchInst::getSuccessor(unsigned index) const
{
	assert(index < num_blocks);
	return bbs[index];
}

unsigned BranchInst::getNumSuccessors() const
{
	return num_blocks;
}

SwitchInst::SwitchInst(Value *cond_, BasicBlock *default_block_, unsigned num_cases)
    : Instruction(Type::getVoidTy(cond_->getType()->getContext()), ValueKind::Switch)
    , cond(cond_)
    , default_block(default_block_)
{
	set_terminator();
	cases.reserve(num_cases);
}

void SwitchInst::addCase(Value *case_value, BasicBlock *bb)
{
	cases.push_back({ case_value, bb });
}

Vector<SwitchInst::Case>::const_iterator SwitchInst::case_begin() const
{
	return cases.begin();
}

Vector<SwitchInst::Case>::const_iterator SwitchInst::case_end() const
{
	return cases.end();
}

BasicBlock *SwitchInst::getDefaultDest() const
{
	return default_block;
}

Value *SwitchInst::getCondition() const
{
	return Internal::resolve_proxy(cond);
}

ConstantInt *SwitchInst::Case::getCaseValue() const
{
	return cast<ConstantInt>(value);
}

BasicBlock *SwitchInst::Case::getCaseSuccessor() const
{
	return bb;
}

PHINode::PHINode(Type *type, size_t num_edges)
    : Instruction(type, ValueKind::PHI)
{
	incoming.reserve(num_edges);
}

void PHINode::add_incoming(Value *value, BasicBlock *bb)
{
	incoming.push_back({ value, bb });
}

unsigned PHINode::getNumIncomingValues() const
{
	return unsigned(incoming.size());
}

AllocaInst::AllocaInst(Type *pointer_type, Type *element_type_, Value *size)
    : Instruction(pointer_type, ValueKind::Alloca)
    , array_size(size)
{
}

Value *AllocaInst::getArraySize() const
{
	return Internal::resolve_proxy(array_size);
}

GetElementPtrInst::GetElementPtrInst(Type *pointer_type, Vector<Value *> indices, bool inbounds_)
    : Instruction(pointer_type, ValueKind::GetElementPtr)
    , inbounds(inbounds_)
{
	set_operands(std::move(indices));
}

bool GetElementPtrInst::isInBounds() const
{
	return inbounds;
}

LoadInst::LoadInst(Type *type, Value *ptr)
    : Instruction(type, ValueKind::Load)
{
	set_operands({ ptr });
}

Value *LoadInst::getPointerOperand() const
{
	return getOperand(0);
}

StoreInst::StoreInst(Value *ptr, Value *value)
    : Instruction(Type::getVoidTy(ptr->getType()->getContext()), ValueKind::Store)
{
	set_operands({ value, ptr });
}

BasicBlock *PHINode::getIncomingBlock(unsigned index) const
{
	if (index >= incoming.size())
		return nullptr;
	return incoming[index].bb;
}

Value *PHINode::getIncomingValue(unsigned index) const
{
	if (index >= incoming.size())
		return nullptr;
	return Internal::resolve_proxy(incoming[index].value);
}

bool PHINode::resolve_proxy_values_incoming()
{
	for (auto &node : incoming)
	{
		while (node.value && node.value->get_value_kind() == ValueKind::Proxy)
		{
			node.value = cast<ValueProxy>(node.value)->get_proxy_value();
			if (!node.value)
				return false;
		}
	}
	return true;
}

AtomicRMWInst::AtomicRMWInst(Type *type, Value *ptr_, Value *value_, BinOp op_)
    : Instruction(type, ValueKind::AtomicRMW)
    , ptr(ptr_)
    , value(value_)
    , op(op_)
{
	set_operands({ ptr, value });
}

Value *AtomicRMWInst::getPointerOperand() const
{
	return Internal::resolve_proxy(ptr);
}

Value *AtomicRMWInst::getValOperand() const
{
	return Internal::resolve_proxy(value);
}

AtomicRMWInst::BinOp AtomicRMWInst::getOperation() const
{
	return op;
}

AtomicCmpXchgInst::AtomicCmpXchgInst(Value *ptr_, Value *cmp_, Value *new_value_, Type *type_override)
	: Instruction(type_override ?
	              type_override :
	              StructType::get(new_value_->getType()->getContext(),
	                              { new_value_->getType(), Type::getInt1Ty(new_value_->getType()->getContext()) }),
	              ValueKind::AtomicCmpXchg)
	, ptr(ptr_)
	, new_value(new_value_)
	, cmp_value(cmp_)
{
	set_operands({ ptr, new_value, cmp_value });
}

Value *AtomicCmpXchgInst::getPointerOperand() const
{
	return Internal::resolve_proxy(ptr);
}

Value *AtomicCmpXchgInst::getCompareOperand() const
{
	return Internal::resolve_proxy(cmp_value);
}

Value *AtomicCmpXchgInst::getNewValOperand() const
{
	return Internal::resolve_proxy(new_value);
}

ShuffleVectorInst::ShuffleVectorInst(Type *type, Value *a, Value *b, Value *shuf)
	: Instruction(type, ValueKind::ShuffleVector)
{
	set_operands({ a, b });
	auto *masks = cast<ConstantDataVector>(shuf);
	shuffle_mask.reserve(masks->getNumElements());
	for (unsigned i = 0; i < masks->getNumElements(); i++)
		shuffle_mask.push_back(cast<ConstantInt>(masks->getElementAsConstant(i))->getUniqueInteger().getSExtValue());
}

int ShuffleVectorInst::getMaskValue(unsigned index) const
{
	assert(index < shuffle_mask.size());
	return shuffle_mask[index];
}

ExtractElementInst::ExtractElementInst(Value *vec_, Value *index_)
	: Instruction(cast<VectorType>(vec_->getType())->getElementType(), ValueKind::ExtractElement),
	  vec(vec_), index(index_)
{
	set_operands({ vec, index });
}

Value *ExtractElementInst::getVectorOperand() const
{
	return Internal::resolve_proxy(vec);
}

Value *ExtractElementInst::getIndexOperand() const
{
	return Internal::resolve_proxy(index);
}

InsertElementInst::InsertElementInst(Value *vec, Value *value, Value *index)
	: Instruction(vec->getType(), ValueKind::InsertElement)
{
	set_operands({ vec, value, index });
}

CompositeConstructInst::CompositeConstructInst(Type *type, Vector<Value *> constituents)
	: Instruction(type, ValueKind::CompositeConstruct)
{
	set_operands(std::move(constituents));
}
} // namespace LLVMBC


================================================
FILE: bc/instruction.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "value.hpp"

namespace LLVMBC
{
class FunctionType;
class Function;
class BasicBlock;
class MDNode;

class Instruction : public Value
{
public:
	Instruction(Type *type, ValueKind kind);

	enum Predicate
	{
		FCMP_FALSE = 0,
		FCMP_OEQ = 1,
		FCMP_OGT = 2,
		FCMP_OGE = 3,
		FCMP_OLT = 4,
		FCMP_OLE = 5,
		FCMP_ONE = 6,
		FCMP_ORD = 7,
		FCMP_UNO = 8,
		FCMP_UEQ = 9,
		FCMP_UGT = 10,
		FCMP_UGE = 11,
		FCMP_ULT = 12,
		FCMP_ULE = 13,
		FCMP_UNE = 14,
		FCMP_TRUE = 15,

		ICMP_EQ = 32,
		ICMP_NE = 33,
		ICMP_UGT = 34,
		ICMP_UGE = 35,
		ICMP_ULT = 36,
		ICMP_ULE = 37,
		ICMP_SGT = 38,
		ICMP_SGE = 39,
		ICMP_SLT = 40,
		ICMP_SLE = 41
	};

	enum CastOps
	{
		InvalidCastOp = 100,
		Trunc,
		ZExt,
		SExt,
		FPToUI,
		FPToSI,
		UIToFP,
		SIToFP,
		FPTrunc,
		FPExt,
		PtrToInt,
		IntToPtr,
		BitCast,
		AddrSpaceCast
	};

	enum GEPOps
	{
		GetElementPtr = 200
	};

	enum BinaryOps
	{
		InvalidBinaryOp = 300,
		Add,
		FAdd,
		Sub,
		FSub,
		Mul,
		FMul,
		UDiv,
		SDiv,
		FDiv,
		URem,
		SRem,
		FRem,
		Shl,
		LShr,
		AShr,
		And,
		Or,
		Xor
	};

	bool isTerminator() const;

	Value *getOperand(unsigned index) const;
	unsigned getNumOperands() const;

	bool resolve_proxy_values();

	MDNode *getMetadata(const String &str) const;
	bool hasMetadata(const String &str) const;
	void setMetadata(const String &str, MDNode *node);

	UnorderedMap<String, MDNode *>::const_iterator metadata_begin() const;
	UnorderedMap<String, MDNode *>::const_iterator metadata_end() const;

	static bool is_base_of_value_kind(ValueKind kind);
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::InstructionBase;
	}

protected:
	void set_terminator();
	bool is_terminator = false;
	void set_operands(Vector<Value *> op);
	Vector<Value *> operands;
	UnorderedMap<String, MDNode *> attachments;
};

class ReturnInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Return;
	}
	explicit ReturnInst(Value *value);
	Value *getReturnValue() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Value *value;
};

class UnreachableInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Unreachable;
	}

	UnreachableInst();
	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class CallInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Call;
	}
	CallInst(FunctionType *function_type, Function *callee, Vector<Value *> params);
	Function *getCalledFunction() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Function *callee;
};

class UnaryOperator : public Instruction
{
public:
	enum class UnaryOps
	{
		Invalid,
		FNeg,
		INeg, // custom extension
	};

	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::UnaryOperator;
	}
	UnaryOperator(UnaryOps uop, Value *value);
	UnaryOps getOpcode() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	UnaryOps op;
};

class BinaryOperator : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::BinaryOperator;
	}
	BinaryOperator(Value *LHS, Value *RHS, BinaryOps op);
	BinaryOps getOpcode() const;

	void setFast(bool enabled);
	bool isFast() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	BinaryOps op;
	bool fast_math = false;
};

class CastInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Cast;
	}
	CastInst(Type *type, Value *value, Instruction::CastOps op);
	Instruction::CastOps getOpcode() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Instruction::CastOps op;
};

class SelectInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Select;
	}
	SelectInst(Value *true_value, Value *false_value, Value *cond);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class ExtractValueInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ExtractValue;
	}
	ExtractValueInst(Type *type, Value *aggregate, Vector<unsigned> indices);
	Value *getAggregateOperand() const;
	unsigned getNumIndices() const;
	const unsigned *getIndices() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Vector<unsigned> indices;
};

class AllocaInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Alloca;
	}
	AllocaInst(Type *pointer_type, Type *element_type, Value *size);
	Value *getArraySize() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Value *array_size;
};

class GetElementPtrInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::GetElementPtr;
	}
	GetElementPtrInst(Type *pointer_type, Vector<Value *> arguments, bool inbounds);
	bool isInBounds() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	bool inbounds;
};

class LoadInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Load;
	}
	LoadInst(Type *type, Value *ptr);
	Value *getPointerOperand() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class StoreInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Store;
	}
	StoreInst(Value *ptr, Value *value);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class CmpInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::CompareBase;
	}
	CmpInst(ValueKind kind, Predicate pred, Value *LHS, Value *RHS);
	Predicate getPredicate() const;

	static bool is_base_of_value_kind(ValueKind kind);

private:
	Predicate pred;
};

class FCmpInst : public CmpInst
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::FCmp;
	}
	FCmpInst(Predicate pred, Value *LHS, Value *RHS);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class ICmpInst : public CmpInst
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ICmp;
	}
	ICmpInst(Predicate pred, Value *LHS, Value *RHS);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class BranchInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Branch;
	}
	BranchInst(BasicBlock *true_block, BasicBlock *false_block, Value *cond);
	explicit BranchInst(BasicBlock *true_block);

	bool isConditional() const;
	Value *getCondition() const;

	unsigned getNumSuccessors() const;
	BasicBlock *getSuccessor(unsigned index) const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	BasicBlock *bbs[2] = {};
	unsigned num_blocks = 0;
	Value *cond = nullptr;
};

class SwitchInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Switch;
	}
	SwitchInst(Value *cond, BasicBlock *default_block, unsigned num_cases);
	void addCase(Value *case_value, BasicBlock *bb);

	struct Case
	{
		Value *value;
		BasicBlock *bb;

		BasicBlock *getCaseSuccessor() const;
		ConstantInt *getCaseValue() const;
	};

	Vector<Case>::const_iterator case_begin() const;
	Vector<Case>::const_iterator case_end() const;

	Value *getCondition() const;
	BasicBlock *getDefaultDest() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Value *cond;
	BasicBlock *default_block;
	Vector<Case> cases;
};

class PHINode : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::PHI;
	}
	PHINode(Type *type, size_t num_edges);

	unsigned getNumIncomingValues() const;
	Value *getIncomingValue(unsigned index) const;
	BasicBlock *getIncomingBlock(unsigned index) const;

	void add_incoming(Value *value, BasicBlock *bb);
	bool resolve_proxy_values_incoming();

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	struct Incoming
	{
		Value *value;
		BasicBlock *bb;
	};
	Vector<Incoming> incoming;
};

class AtomicRMWInst : public Instruction
{
public:
	enum class BinOp
	{
		Invalid,
		Xchg,
		Add,
		Sub,
		And,
		Nand,
		Or,
		Xor,
		Max,
		Min,
		UMax,
		UMin,
		FAdd, // wat
		FSub
	};
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::AtomicRMW;
	}
	AtomicRMWInst(Type *type, Value *ptr, Value *value, BinOp op);

	Value *getPointerOperand() const;
	Value *getValOperand() const;
	BinOp getOperation() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Value *ptr;
	Value *value;
	BinOp op;
};

class AtomicCmpXchgInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::AtomicCmpXchg;
	}
	AtomicCmpXchgInst(Value *ptr, Value *cmp, Value *new_value, Type *type_override = nullptr);

	Value *getPointerOperand() const;
	Value *getNewValOperand() const;
	Value *getCompareOperand() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Value *ptr;
	Value *new_value;
	Value *cmp_value;
};

class ShuffleVectorInst : public Instruction
{
public:
	ShuffleVectorInst(Type *type, Value *a, Value *b, Value *shuf);
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ShuffleVector;
	}

	int getMaskValue(unsigned index) const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Vector<int> shuffle_mask;
};

class ExtractElementInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ExtractElement;
	}
	ExtractElementInst(Value *vec, Value *offset);
	Value *getVectorOperand() const;
	Value *getIndexOperand() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Value *vec;
	Value *index;
};

class InsertElementInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::InsertElement;
	}
	InsertElementInst(Value *vec, Value *value, Value *index);
	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

// Extension of LLVM to better map to SPIR-V / DXBC-IR
class CompositeConstructInst : public Instruction
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::CompositeConstruct;
	}
	CompositeConstructInst(Type *type, Vector<Value *> constituents);
	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};
} // namespace LLVMBC


================================================
FILE: bc/iterator.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include <stddef.h>

namespace LLVMBC
{
// An iterator adaptor which lets us receive reference types instead of pointer types.
template <typename T, typename Iter>
struct IteratorAdaptor
{
	IteratorAdaptor(Iter iter_)
	    : iter(iter_)
	{
	}

	T &operator*()
	{
		return **iter;
	}

	T *operator->()
	{
		return *iter;
	}

	IteratorAdaptor operator++()
	{
		++iter;
		return *this;
	}

	bool operator==(const IteratorAdaptor &other) const
	{
		return iter == other.iter;
	}

	bool operator!=(const IteratorAdaptor &other) const
	{
		return !(*this == other);
	}

	ptrdiff_t operator-(const IteratorAdaptor &other) const
	{
		return iter - other.iter;
	}

	Iter iter;
};
} // namespace LLVMBC


================================================
FILE: bc/metadata.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "metadata.hpp"
#include "module.hpp"
#include "value.hpp"
#include <assert.h>
#include <utility>

namespace LLVMBC
{
MDOperand::MDOperand(Module *parent_, MetadataKind kind_)
    : parent(parent_)
    , kind(kind_)
{
}

MDOperand::MDOperand(Module *parent_)
    : parent(parent_)
{
}

Module *MDOperand::getParent() const
{
	return parent;
}

MetadataKind MDOperand::get_metadata_kind() const
{
	return kind;
}

MDNode::MDNode(Module *module, Vector<MDOperand *> operands_)
    : MDOperand(module, MetadataKind::Node)
    , operands(std::move(operands_))
{
}

unsigned MDNode::getNumOperands() const
{
	return unsigned(operands.size());
}

MDOperand &MDNode::getOperand(unsigned index) const
{
	assert(index < operands.size());
	return *operands[index];
}

uint64_t MDNode::get_tween_id() const
{
	return tween;
}

void MDNode::set_tween_id(uint64_t id)
{
	tween = id;
}

NamedMDNode::NamedMDNode(Module *module, String name_, Vector<MDNode *> operands_)
    : MDOperand(module, MetadataKind::NamedNode)
    , name(std::move(name_))
    , operands(std::move(operands_))
{
}

unsigned NamedMDNode::getNumOperands() const
{
	return unsigned(operands.size());
}

MDNode *NamedMDNode::getOperand(unsigned index) const
{
	assert(index < operands.size());
	return operands[index];
}

const String &NamedMDNode::getName() const
{
	return name;
}

ConstantAsMetadata::ConstantAsMetadata(Module *module, Constant *value_)
    : MDOperand(module, MetadataKind::Constant)
    , value(value_)
{
}

Constant *ConstantAsMetadata::getValue() const
{
	return value;
}

MDString::MDString(LLVMBC::Module *module, String str_)
    : MDOperand(module, MetadataKind::String)
    , str(std::move(str_))
{
}

const String &MDString::getString() const
{
	return str;
}

} // namespace LLVMBC


================================================
FILE: bc/metadata.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "data_structures.hpp"

namespace LLVMBC
{
class Value;
class Module;
class Constant;

enum class MetadataKind
{
	NamedNode,
	Node,
	Constant,
	String,
	None
};

class MDOperand
{
public:
	explicit MDOperand(Module *parent);
	MDOperand(Module *parent, MetadataKind kind);
	Module *getParent() const;

	MetadataKind get_metadata_kind() const;

	explicit operator bool() const
	{
		return kind != MetadataKind::None;
	}

private:
	Module *parent;
	MetadataKind kind = MetadataKind::None;
};

class MDNode : public MDOperand
{
public:
	static constexpr MetadataKind get_metadata_kind()
	{
		return MetadataKind::Node;
	}
	MDNode(Module *module, Vector<MDOperand *> operands);

	MDOperand &getOperand(unsigned index) const;
	unsigned getNumOperands() const;

	void set_tween_id(uint64_t id);
	uint64_t get_tween_id() const;

private:
	Vector<MDOperand *> operands;
	uint64_t tween = 0;
};

class NamedMDNode : public MDOperand
{
public:
	static constexpr MetadataKind get_metadata_kind()
	{
		return MetadataKind::NamedNode;
	}
	NamedMDNode(Module *module, String name, Vector<MDNode *> operands);
	const String &getName() const;

	MDNode *getOperand(unsigned index) const;
	unsigned getNumOperands() const;

private:
	String name;
	Vector<MDNode *> operands;
};

class ConstantAsMetadata : public MDOperand
{
public:
	static constexpr MetadataKind get_metadata_kind()
	{
		return MetadataKind::Constant;
	}
	ConstantAsMetadata(Module *module, Constant *value);
	Constant *getValue() const;

private:
	Constant *value;
};

class MDString : public MDOperand
{
public:
	static constexpr MetadataKind get_metadata_kind()
	{
		return MetadataKind::String;
	}
	MDString(Module *module, String str);
	const String &getString() const;

private:
	String str;
};

} // namespace LLVMBC


================================================
FILE: bc/module.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "module.hpp"
#include "cast.hpp"
#include "context.hpp"
#include "function.hpp"
#include "instruction.hpp"
#include "logging.hpp"
#include "metadata.hpp"
#include "type.hpp"
#include "value.hpp"
#include <algorithm>

#include "llvm_decoder.h"

namespace LLVMBC
{
enum class KnownBlocks : uint32_t
{
	BLOCKINFO = 0,

	// 1-7 reserved,

	MODULE_BLOCK = 8,
	PARAMATTR_BLOCK = 9,
	PARAMATTR_GROUP_BLOCK = 10,
	CONSTANTS_BLOCK = 11,
	FUNCTION_BLOCK = 12,
	TYPE_SYMTAB_BLOCK = 13,
	VALUE_SYMTAB_BLOCK = 14,
	METADATA_BLOCK = 15,
	METADATA_ATTACHMENT = 16,
	TYPE_BLOCK = 17,
};

enum class AttributeRecord : uint32_t
{
	NONE = 0,
	ALIGNMENT = 1,
	BY_VAL = 3,
	STACK_ALIGNMENT = 25,
	DEREFERENCEABLE = 41,
	DEREFERENCEABLE_OR_NULL = 42,
	ALLOC_SIZE = 51
};

enum class ModuleRecord : uint32_t
{
	VERSION = 1,
	TRIPLE = 2,
	DATALAYOUT = 3,
	GLOBAL_VARIABLE = 7,
	FUNCTION = 8,
};

enum class ConstantsRecord : uint32_t
{
	SETTYPE = 1,
	CONST_NULL = 2,
	UNDEF = 3,
	INTEGER = 4,
	WIDE_INTEGER = 5,
	FLOAT = 6,
	AGGREGATE = 7,
	STRING = 8,
	BINOP = 10,
	CE_CAST = 11,
	GEP = 12,
	INBOUNDS_GEP = 20,
	DATA = 22,
	GEP_WITH_INRANGE_INDEX = 24
};

enum class FunctionRecord : uint32_t
{
	DECLAREBLOCKS = 1,
	INST_BINOP = 2,
	INST_CAST = 3,
	INST_GEP_OLD = 4,
	INST_SELECT = 5,
	INST_EXTRACTELT = 6,
	INST_INSERTELT = 7,
	INST_SHUFFLEVEC = 8,
	INST_CMP = 9,
	INST_RET = 10,
	INST_BR = 11,
	INST_SWITCH = 12,
	INST_INVOKE = 13,
	INST_UNREACHABLE = 15,
	INST_PHI = 16,
	INST_ALLOCA = 19,
	INST_LOAD = 20,
	INST_VAARG = 23,
	INST_STORE_OLD = 24,
	INST_EXTRACTVAL = 26,
	INST_INSERTVAL = 27,
	INST_CMP2 = 28,
	INST_VSELECT = 29,
	INST_INBOUNDS_GEP_OLD = 30,
	INST_INDIRECTBR = 31,
	DEBUG_LOC_AGAIN = 33,
	INST_CALL = 34,
	DEBUG_LOC = 35,
	INST_FENCE = 36,
	INST_CMPXCHG_OLD = 37,
	INST_ATOMICRMW = 38,
	INST_RESUME = 39,
	INST_LANDINGPAD_OLD = 40,
	INST_LOADATOMIC = 41,
	INST_STOREATOMIC_OLD = 42,
	INST_GEP = 43,
	INST_STORE = 44,
	INST_STOREATOMIC = 45,
	INST_CMPXCHG = 46,
	INST_LANDINGPAD = 47,
	INST_CLEANUPRET = 48,
	INST_CATCHRET = 49,
	INST_CATCHPAD = 50,
	INST_CLEANUPPAD = 51,
	INST_CATCHSWITCH = 52,
	OPERAND_BUNDLE = 55,
	INST_UNOP = 56,
	INST_CALLBR = 57,
};

enum class ValueSymtabRecord : uint32_t
{
	ENTRY = 1,
	BBENTRY = 2,
	FNENTRY = 3,
	COMBINED_ENTRY = 5,
};

enum class MetaDataRecord : uint32_t
{
	STRING_OLD = 1,
	VALUE = 2,
	NODE = 3,
	NAME = 4,
	DISTINCT_NODE = 5,
	KIND = 6,
	LOCATION = 7,
	OLD_NODE = 8,
	OLD_FN_NODE = 9,
	NAMED_NODE = 10,
	ATTACHMENT = 11,
	GENERIC_DEBUG = 12,
	SUBRANGE = 13,
	ENUMERATOR = 14,
	BASIC_TYPE = 15,
	FILE = 16,
	DERIVED_TYPE = 17,
	COMPOSITE_TYPE = 18,
	SUBROUTINE_TYPE = 19,
	COMPILE_UNIT = 20,
	SUBPROGRAM = 21,
	LEXICAL_BLOCK = 22,
	LEXICAL_BLOCK_FILE = 23,
	NAMESPACE = 24,
	TEMPLATE_TYPE = 25,
	TEMPLATE_VALUE = 26,
	GLOBAL_VAR = 27,
	LOCAL_VAR = 28,
	EXPRESSION = 29,
	OBJC_PROPERTY = 30,
	IMPORTED_ENTITY = 31,
	MODULE = 32,
	MACRO = 33,
	MACRO_FILE = 34,
	STRINGS = 35,
	GLOBAL_DECL_ATTACHMENT = 36,
	GLOBAL_VAR_EXPR = 37,
	INDEX_OFFSET = 38,
	INDEX = 39,
	LABEL = 40,
	COMMON_BLOCK = 44,
};

enum class AttributeCodes : uint32_t
{
	CodeEntryOld = 1,
	CodeEntry = 2,
	GroupCodeEntry = 3
};

enum class TypeRecord : uint32_t
{
	NUMENTRY = 1,
	VOID_TYPE = 2,
	FLOAT = 3,
	DOUBLE = 4,
	LABEL = 5,
	OPAQUE_TYPE = 6,
	INTEGER = 7,
	POINTER = 8,
	FUNCTION_OLD = 9,
	HALF = 10,
	ARRAY = 11,
	VECTOR = 12,
	METADATA = 16,
	STRUCT_ANON = 18,
	STRUCT_NAME = 19,
	STRUCT_NAMED = 20,
	FUNCTION = 21,
	TOKEN = 22,
};

enum class UnaryOp : uint32_t
{
	FNEG = 0
};

enum class BinOp : uint32_t
{
	ADD = 0,
	SUB = 1,
	MUL = 2,
	UDIV = 3,
	SDIV = 4,
	UREM = 5,
	SREM = 6,
	SHL = 7,
	LSHR = 8,
	ASHR = 9,
	AND = 10,
	OR = 11,
	XOR = 12
};

enum class AtomicBinOp : uint32_t
{
	RMW_XCHG = 0,
	RMW_ADD = 1,
	RMW_SUB = 2,
	RMW_AND = 3,
	RMW_NAND = 4,
	RMW_OR = 5,
	RMW_XOR = 6,
	RMW_MAX = 7,
	RMW_MIN = 8,
	RMW_UMAX = 9,
	RMW_UMIN = 10,
	RMW_FADD = 11,
	RMW_FSUB = 12
};

enum class CastOp : uint32_t
{
	TRUNC = 0,
	ZEXT = 1,
	SEXT = 2,
	FPTOUI = 3,
	FPTOSI = 4,
	UITOFP = 5,
	SITOFP = 6,
	FPTRUNC = 7,
	FPEXT = 8,
	PTRTOINT = 9,
	INTTOPTR = 10,
	BITCAST = 11,
	ADDSPACECAST = 12
};

enum CallFlagBits
{
	CALL_TAIL_BIT = 1 << 0,
	CALL_CCONV_BIT = 1 << 1,
	CALL_MUSTTAIL_BIT = 1 << 14,
	CALL_EXPLICIT_TYPE_BIT = 1 << 15,
	CALL_NOTAIL_BIT = 1 << 16,
	CALL_FMF_BIT = 1 << 17
};

enum FastMathFlagBits
{
	FAST_MATH_UNSAFE_ALGEBRA_BIT = 1 << 0,
	FAST_MATH_ALLOW_CONTRACT_BIT = 1 << 5
};

static int64_t decode_sign_rotated_value(uint64_t v)
{
	bool sign = (v & 1) != 0;
	v >>= 1;
	if (sign)
		v = v ? -int64_t(v) : (1ull << 63u);
	return int64_t(v);
}

struct ModuleParseContext
{
	Function *function = nullptr;
	Module *module = nullptr;
	LLVMContext *context = nullptr;
	Vector<BasicBlock *> basic_blocks;

	Vector<Value *> values;
	Vector<Instruction *> instructions;

	Vector<Type *> types;
	Vector<Function *> functions_with_bodies;
	UnorderedMap<uint64_t, MDOperand *> metadata;
	UnorderedMap<uint64_t, String> metadata_kind_map;
	Vector<Vector<std::pair<String, String>>> attribute_lists;
	UnorderedMap<uint64_t, Vector<std::pair<String, String>>> attribute_groups;
	Type *constant_type = nullptr;
	String current_metadata_name;

	bool parse_function_child_block(const BlockOrRecord &entry);
	bool parse_record(const BlockOrRecord &entry);
	bool parse_constants_record(const BlockOrRecord &entry);
	bool parse_constants_block(const BlockOrRecord &entry);
	bool parse_metadata_block(const BlockOrRecord &entry);
	bool parse_paramattr_block(const BlockOrRecord &entry);
	bool parse_paramattr_group_block(const BlockOrRecord &entry);
	bool parse_metadata_attachment_record(const BlockOrRecord &entry);
	bool parse_metadata_record(const BlockOrRecord &entry, unsigned index);
	Type *get_constant_type();
	bool parse_function_body(const BlockOrRecord &entry);
	bool parse_types(const BlockOrRecord &entry);
	bool parse_value_symtab(const BlockOrRecord &entry);
	bool parse_function_record(const BlockOrRecord &entry);
	bool parse_global_variable_record(const BlockOrRecord &entry);
	bool parse_version_record(const BlockOrRecord &entry);
	bool parse_type(const BlockOrRecord &entry);
	bool add_instruction(Instruction *inst);
	bool add_value(Value *value);

	bool add_type(Type *type);
	Type *get_type(uint64_t index);
	bool finish_basic_block();
	bool add_successor(BasicBlock *bb);
	BasicBlock *get_basic_block(uint64_t index) const;
	BasicBlock *current_bb = nullptr;
	unsigned basic_block_index = 0;

	Value *get_value(uint64_t op, Type *expected_type = nullptr, bool force_absolute = false);
	std::pair<Value *, Type *> get_value_and_type(const Vector<uint64_t> &ops, unsigned &index);
	Value *get_value(const Vector<uint64_t> &ops, unsigned &index, Type *expected_type);
	Value *get_value_signed(uint64_t op, Type *expected_type = nullptr);
	MDOperand *get_metadata(uint64_t index) const;
	const char *get_metadata_kind(uint64_t index) const;

	Instruction *get_instruction(uint64_t index) const;

	Vector<ValueProxy *> pending_forward_references;
	Vector<std::pair<GlobalVariable *, uint64_t>> global_initializations;
	bool resolve_forward_references();
	bool resolve_global_initializations();

	uint64_t tween_id = 1;
	uint64_t metadata_tween_id = 1;

	bool use_relative_id = true;
	bool use_strtab = false;
	bool seen_first_function_body = false;
};

ValueProxy::ValueProxy(Type *type, ModuleParseContext &context_, uint64_t id_)
    : Value(type, ValueKind::Proxy)
    , id(id_)
    , context(context_)
{
}

bool ValueProxy::resolve()
{
	if (proxy)
		return true;

	if (id >= context.values.size())
	{
		LOGE("Value proxy is out of range.\n");
		return false;
	}

	proxy = context.values[id];
	while (proxy && proxy->get_value_kind() == ValueKind::Proxy)
	{
		cast<ValueProxy>(proxy)->resolve();
		proxy = cast<ValueProxy>(proxy)->get_proxy_value();
	}

	if (!proxy)
	{
		LOGE("Failed to resolve proxy value.\n");
		return false;
	}
	return true;
}

Value *ValueProxy::get_proxy_value() const
{
	return proxy;
}

bool ModuleParseContext::finish_basic_block()
{
	basic_block_index++;
	if (basic_block_index >= basic_blocks.size())
		current_bb = nullptr;
	else
	{
		current_bb = basic_blocks[basic_block_index];
		current_bb->set_tween_id(tween_id++);
	}

	return true;
}

bool ModuleParseContext::add_successor(BasicBlock *bb)
{
	if (!current_bb)
	{
		LOGE("No basic block is active in add_successor().\n");
		return false;
	}

	current_bb->add_successor(bb);
	return true;
}

BasicBlock *ModuleParseContext::get_basic_block(uint64_t index) const
{
	if (index >= basic_blocks.size())
	{
		LOGE("Basic block index is out of bounds!\n");
		return nullptr;
	}

	return basic_blocks[index];
}

Value *ModuleParseContext::get_value(uint64_t op, Type *expected_type, bool force_absolute)
{
	if (!force_absolute && use_relative_id)
		op = uint32_t(values.size() - op);

	if (op >= values.size())
	{
		if (!expected_type)
		{
			LOGE("Must have an expected type for forward references!\n");
			return nullptr;
		}
		auto *proxy = context->construct<ValueProxy>(expected_type, *this, op);
		pending_forward_references.push_back(proxy);
		return proxy;
	}
	else
	{
		// A pointer to function and a constant function do match.
		if (auto *ptr_type = dyn_cast<PointerType>(expected_type))
			if (ptr_type->getPointerElementType()->getTypeID() == Type::TypeID::FunctionTyID)
				expected_type = ptr_type->getPointerElementType();

		if (expected_type && expected_type != values[op]->getType())
		{
			LOGE("Type mismatch.\n");
			return nullptr;
		}
		return values[op];
	}
}

Value *ModuleParseContext::get_value(const Vector<uint64_t> &ops, unsigned &index, Type *expected_type)
{
	if (index >= ops.size())
		return nullptr;
	return get_value(ops[index++], expected_type);
}

std::pair<Value *, Type *> ModuleParseContext::get_value_and_type(const Vector<uint64_t> &ops, unsigned &index)
{
	if (index >= ops.size())
		return {};

	uint64_t op = ops[index++];
	if (use_relative_id)
		op = uint32_t(values.size() - op);

	if (op < values.size())
	{
		// Normal reference.
		return { values[op], values[op]->getType() };
	}
	else
	{
		// Forward reference, the type is encoded in the next element.
		if (index >= ops.size())
			return {};

		auto *type = get_type(ops[index++]);
		auto *proxy = context->construct<ValueProxy>(type, *this, op);
		pending_forward_references.push_back(proxy);
		return { proxy, type };
	}
}

Instruction *ModuleParseContext::get_instruction(uint64_t index) const
{
	if (index >= instructions.size())
	{
		LOGE("Instruction index is out of range!\n");
		return nullptr;
	}

	return instructions[index];
}

MDOperand *ModuleParseContext::get_metadata(uint64_t index) const
{
	auto itr = metadata.find(index);
	if (itr != metadata.end())
		return itr->second;
	else
	{
		// Need to return a null-node like this since MDOperand is used as a reference in the LLVM API for some reason.
		return context->construct<MDOperand>(module);
	}
}

const char *ModuleParseContext::get_metadata_kind(uint64_t index) const
{
	auto itr = metadata_kind_map.find(index);
	if (itr != metadata_kind_map.end())
		return itr->second.c_str();
	else
		return nullptr;
}

Value *ModuleParseContext::get_value_signed(uint64_t op, Type *expected_type)
{
	int64_t signed_op = decode_sign_rotated_value(op);
	if (use_relative_id)
		signed_op = values.size() - signed_op;
	op = signed_op;

	if (op >= values.size())
	{
		if (!expected_type)
		{
			LOGE("Must have an expected type for forward references!\n");
			return nullptr;
		}
		auto *proxy = context->construct<ValueProxy>(expected_type, *this, op);
		pending_forward_references.push_back(proxy);
		return proxy;
	}
	else
		return values[op];
}

bool ModuleParseContext::add_instruction(Instruction *inst)
{
	instructions.push_back(inst);

	if (current_bb)
		current_bb->add_instruction(inst);
	else
	{
		LOGE("No basic block is currently set!\n");
		return false;
	}

	if (inst->isTerminator())
		return finish_basic_block();
	else
		return add_value(inst);
}

bool ModuleParseContext::add_value(Value *value)
{
	if (value->getType()->getTypeID() != Type::TypeID::VoidTyID)
	{
		value->set_tween_id(tween_id++);
		values.push_back(value);
	}
	return true;
}

Type *ModuleParseContext::get_constant_type()
{
	if (constant_type)
		return constant_type;
	else
		return Type::getInt32Ty(*context);
}

static Type *resolve_gep_element_type(Type *type, const Vector<Value *> &args)
{
	for (unsigned i = 2; i < args.size(); i++)
	{
		auto *arg = args[i];
		if (type->getTypeID() == Type::TypeID::StructTyID)
		{
			auto *const_int = dyn_cast<ConstantInt>(arg);
			if (!const_int)
			{
				LOGE("Indexing into a struct without a constant integer.\n");
				return nullptr;
			}

			unsigned index = const_int->getUniqueInteger().getZExtValue();
			if (index >= cast<StructType>(type)->getNumElements())
			{
				LOGE("Struct element index out of range.\n");
				return nullptr;
			}
			type = cast<StructType>(type)->getElementType(index);
		}
		else if (type->getTypeID() == Type::TypeID::ArrayTyID)
		{
			type = type->getArrayElementType();
		}
		else if (type->getTypeID() == Type::TypeID::VectorTyID)
		{
			type = cast<VectorType>(type)->getElementType();
		}
		else
			return nullptr;
	}

	return type;
}

static BinaryOperator::BinaryOps translate_binop(BinOp op, Type *type)
{
	bool is_fp = type->isFloatingPointTy();
	switch (op)
	{
	case BinOp::ADD:
		return is_fp ? BinaryOperator::BinaryOps::FAdd : BinaryOperator::BinaryOps::Add;
	case BinOp::SUB:
		return is_fp ? BinaryOperator::BinaryOps::FSub : BinaryOperator::BinaryOps::Sub;
	case BinOp::MUL:
		return is_fp ? BinaryOperator::BinaryOps::FMul : BinaryOperator::BinaryOps::Mul;
	case BinOp::UDIV:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::UDiv;
	case BinOp::SDIV:
		return is_fp ? BinaryOperator::BinaryOps::FDiv : BinaryOperator::BinaryOps::SDiv;
	case BinOp::UREM:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::URem;
	case BinOp::SREM:
		return is_fp ? BinaryOperator::BinaryOps::FRem : BinaryOperator::BinaryOps::SRem;
	case BinOp::SHL:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::Shl;
	case BinOp::LSHR:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::LShr;
	case BinOp::ASHR:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::AShr;
	case BinOp::AND:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::And;
	case BinOp::OR:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::Or;
	case BinOp::XOR:
		return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::Xor;
	default:
		return BinaryOperator::BinaryOps::InvalidBinaryOp;
	}
}

static Instruction::CastOps translate_castop(CastOp op)
{
	switch (op)
	{
	case CastOp::TRUNC:
		return Instruction::Trunc;
	case CastOp::ZEXT:
		return Instruction::ZExt;
	case CastOp::SEXT:
		return Instruction::SExt;
	case CastOp::FPTOUI:
		return Instruction::FPToUI;
	case CastOp::FPTOSI:
		return Instruction::FPToSI;
	case CastOp::UITOFP:
		return Instruction::UIToFP;
	case CastOp::SITOFP:
		return Instruction::SIToFP;
	case CastOp::FPTRUNC:
		return Instruction::FPTrunc;
	case CastOp::FPEXT:
		return Instruction::FPExt;
	case CastOp::PTRTOINT:
		return Instruction::PtrToInt;
	case CastOp::INTTOPTR:
		return Instruction::IntToPtr;
	case CastOp::BITCAST:
		return Instruction::BitCast;
	case CastOp::ADDSPACECAST:
		return Instruction::AddrSpaceCast;
	}
	return Instruction::CastOps::InvalidCastOp;
}

bool ModuleParseContext::parse_constants_record(const BlockOrRecord &entry)
{
	if (entry.IsBlock())
		return true;

	switch (ConstantsRecord(entry.id))
	{
	case ConstantsRecord::SETTYPE:
		if (entry.ops.size() < 1)
			return false;
		constant_type = get_type(entry.ops[0]);
		break;

	case ConstantsRecord::CONST_NULL:
	{
		auto *type = get_constant_type();
		Value *value = nullptr;
		if (type->isIntegerTy())
			value = ConstantInt::get(type, 0);
		else if (type->isFloatingPointTy())
			value = ConstantFP::get(type, 0);
		else if (isa<ArrayType>(type) || isa<StructType>(type) || isa<VectorType>(type))
			value = context->construct<ConstantAggregateZero>(type);
		else if (isa<PointerType>(type))
			value = context->construct<ConstantPointerNull>(type);

		if (!value)
		{
			LOGE("Unknown type for CONST_NULL.\n");
			return false;
		}

		values.push_back(value);
		break;
	}

	case ConstantsRecord::UNDEF:
	{
		auto *type = get_constant_type();
		values.push_back(UndefValue::get(type));
		break;
	}

	case ConstantsRecord::INTEGER:
	{
		if (entry.ops.size() < 1)
			return false;
		auto *type = get_constant_type();
		if (!type->isIntegerTy())
		{
			LOGE("Constant type is not integer.\n");
			return false;
		}

		uint64_t literal = entry.ops[0];
		int64_t signed_literal = decode_sign_rotated_value(literal);
		ConstantInt *value = ConstantInt::get(type, signed_literal);
		values.push_back(value);
		break;
	}

	case ConstantsRecord::WIDE_INTEGER:
		LOGE("WIDE_INTEGER unimplemented.\n");
		return false;

	case ConstantsRecord::FLOAT:
	{
		if (entry.ops.size() < 1)
			return false;
		auto *type = get_constant_type();
		if (!type->isFloatingPointTy())
		{
			LOGE("Constant type is not FP.\n");
			return false;
		}
		ConstantFP *value = ConstantFP::get(type, entry.ops[0]);
		values.push_back(value);
		break;
	}

	case ConstantsRecord::AGGREGATE:
	{
		Vector<Value *> constants;
		Value *value;
		constants.reserve(entry.ops.size());

		if (auto *struct_type = dyn_cast<StructType>(get_constant_type()))
		{
			if (entry.ops.size() != struct_type->getStructNumElements())
			{
				LOGE("Mismatch in struct element counts.\n");
				return false;
			}

			for (unsigned i = 0; i < struct_type->getStructNumElements(); i++)
				constants.push_back(get_value(entry.ops[i], struct_type->getStructElementType(i), true));
			value = context->construct<ConstantAggregate>(get_constant_type(), std::move(constants));
		}
		else if (isa<ArrayType>(get_constant_type()))
		{
			auto *element_type = get_constant_type()->getArrayElementType();
			for (auto &op : entry.ops)
				constants.push_back(get_value(op, element_type, true));
			value = context->construct<ConstantAggregate>(get_constant_type(), std::move(constants));
		}
		else if (isa<VectorType>(get_constant_type()))
		{
			auto *element_type = cast<VectorType>(get_constant_type())->getElementType();
			for (auto &op : entry.ops)
				constants.push_back(get_value(op, element_type, true));
			value = context->construct<ConstantAggregate>(get_constant_type(), std::move(constants));
		}
		else
		{
			value = UndefValue::get(get_constant_type());
		}

		values.push_back(value);
		break;
	}

	case ConstantsRecord::STRING:
		LOGE("STRING unimplemented.\n");
		return false;

	case ConstantsRecord::BINOP:
	{
		unsigned index = 0;
		auto *type = get_constant_type();
		auto op = translate_binop(BinOp(entry.ops[index++]), type);
		auto *a = get_value(entry.ops[index++], type, true);
		auto *b = get_value(entry.ops[index++], type, true);
		auto elements = Vector<Value *>{a, b};
		Value *value = context->construct<ConstantExpr>(op, type, std::move(elements));
		values.push_back(value);
		break;
	}

	case ConstantsRecord::CE_CAST:
	{
		unsigned index = 0;

		auto op = translate_castop(CastOp(entry.ops[index++]));

		auto *type = get_constant_type();

		auto *input_value_type = get_type(entry.ops[index++]);
		if (!input_value_type)
			return false;
		auto *input_value = get_value(entry.ops[index++], input_value_type, true);

		auto elements = Vector<Value *>{input_value};
		Value *value = context->construct<ConstantExpr>(op, type, std::move(elements));
		values.push_back(value);
		break;
	}

	case ConstantsRecord::DATA:
	{
		bool is_vector = false;
		Type *element_type = nullptr;
		if (isa<ArrayType>(get_constant_type()))
			element_type = get_constant_type()->getArrayElementType();
		else if (isa<VectorType>(get_constant_type()))
		{
			element_type = cast<VectorType>(get_constant_type())->getElementType();
			is_vector = true;
		}
		else
		{
			LOGE("Unknown DATA type.\n");
			return false;
		}

		bool is_fp = element_type->isFloatingPointTy();
		bool is_int = element_type->isIntegerTy();
		Vector<Value *> constants;
		constants.reserve(entry.ops.size());
		if (is_fp)
		{
			for (auto &op : entry.ops)
				constants.push_back(ConstantFP::get(element_type, op));
		}
		else if (is_int)
		{
			for (auto &op : entry.ops)
				constants.push_back(ConstantInt::get(element_type, op));
		}
		else
		{
			LOGE("Unknown DATA type.\n");
			return false;
		}

		Value *value;
		if (is_vector)
			value = context->construct<ConstantDataVector>(get_constant_type(), std::move(constants));
		else
			value = context->construct<ConstantDataArray>(get_constant_type(), std::move(constants));

		values.push_back(value);
		break;
	}

	case ConstantsRecord::GEP:
	case ConstantsRecord::INBOUNDS_GEP:
	case ConstantsRecord::GEP_WITH_INRANGE_INDEX:
	{
		if (entry.ops.size() < 2)
			return false;
		Type *pointee_type = nullptr;
		unsigned index = 0;
		if (ConstantsRecord(entry.id) == ConstantsRecord::GEP_WITH_INRANGE_INDEX ||
			(entry.ops.size() & 1))
		{
			pointee_type = get_type(entry.ops[index++]);
		}

		if (ConstantsRecord(entry.id) == ConstantsRecord::GEP_WITH_INRANGE_INDEX)
			index++;

		Vector<Value *> elements;
		elements.reserve(entry.ops.size() / 2);
		while (index < entry.ops.size())
		{
			auto *type = get_type(entry.ops[index++]);
			auto *value = get_value(entry.ops[index++], type, true);
			elements.push_back(value);
		}

		if (elements.size() < 2)
			return false;

		if (!pointee_type)
			pointee_type = elements[0]->getType()->getPointerElementType();

		pointee_type = resolve_gep_element_type(pointee_type, elements);
		if (!pointee_type)
			return false;
		pointee_type = PointerType::get(pointee_type, cast<PointerType>(elements[0]->getType())->getAddressSpace());

		auto *value = context->construct<ConstantExpr>(Instruction::GetElementPtr, pointee_type, std::move(elements));
		values.push_back(value);
		break;
	}

	default:
		LOGE("UNKNOWN unimplemented.\n");
		return false;
	}

	return true;
}

bool ModuleParseContext::parse_constants_block(const BlockOrRecord &entry)
{
	constant_type = nullptr;
	for (auto &child : entry.children)
		if (!parse_constants_record(child))
			return false;
	return true;
}

bool ModuleParseContext::parse_metadata_attachment_record(const BlockOrRecord &entry)
{
	if (MetaDataRecord(entry.id) != MetaDataRecord::ATTACHMENT)
		return true;

	if (entry.ops.size() < 1)
		return false;

	size_t size = entry.ops.size();
	size_t num_nodes = (size - 1) / 2;
	auto *inst = get_instruction(entry.ops[0]);

	if (!inst)
	{
		LOGE("Invalid instruction.\n");
		return false;
	}

	for (size_t i = 0; i < num_nodes; i++)
	{
		auto *kind = get_metadata_kind(entry.ops[2 * i + 1]);
		auto *operand = get_metadata(entry.ops[2 * i + 2]);
		auto *node = dyn_cast<MDNode>(operand);

		if (!kind)
		{
			LOGE("Invalid metadata kind.\n");
			return false;
		}

		if (!node)
		{
			LOGE("Invalid metadata attachment.\n");
			return false;
		}

		inst->setMetadata(kind, node);
	}
	return true;
}

bool ModuleParseContext::parse_metadata_record(const BlockOrRecord &entry, unsigned index)
{
	switch (MetaDataRecord(entry.id))
	{
	case MetaDataRecord::NAME:
	{
		current_metadata_name = entry.getString();
		break;
	}

	case MetaDataRecord::NAMED_NODE:
	{
		Vector<MDNode *> ops;
		ops.reserve(entry.ops.size());
		for (auto &op : entry.ops)
		{
			auto *md = get_metadata(op);
			auto *node = dyn_cast<MDNode>(md);
			ops.push_back(node);
		}

		auto *node = context->construct<NamedMDNode>(module, current_metadata_name, std::move(ops));
		module->add_named_metadata(current_metadata_name, node);
		metadata[index] = node;
		break;
	}

	case MetaDataRecord::DISTINCT_NODE:
	case MetaDataRecord::NODE:
	{
		Vector<MDOperand *> ops;
		ops.reserve(entry.ops.size());
		for (auto &op : entry.ops)
		{
			// For some reason, here metadata is indexed with -1?
			auto *md = get_metadata(op - 1);
			ops.push_back(md);
		}

		auto *node = context->construct<MDNode>(module, std::move(ops));
		node->set_tween_id(metadata_tween_id++);
		module->add_unnamed_metadata(node);
		metadata[index] = node;
		break;
	}

	case MetaDataRecord::STRING_OLD:
	{
		auto *node = context->construct<MDString>(module, entry.getString());
		metadata[index] = node;
		break;
	}

	case MetaDataRecord::VALUE:
	{
		if (entry.ops.size() < 2)
			return false;

		auto *value = get_value(entry.ops[1], nullptr, true);
		if (!value)
		{
			LOGE("Null value!\n");
			return false;
		}

		auto *constant_value = dyn_cast<Constant>(value);
		if (!constant_value)
		{
			LOGE("Not a constant!\n");
			return false;
		}

		auto *node = context->construct<ConstantAsMetadata>(module, constant_value);
		metadata[index] = node;
		break;
	}

	case MetaDataRecord::KIND:
	{
		if (entry.ops.size() < 1)
			return false;

		metadata_kind_map[entry.ops[0]] = entry.getString(1);
		break;
	}

	default:
		break;
	}

	return true;
}

bool ModuleParseContext::parse_metadata_block(const BlockOrRecord &entry)
{
	unsigned index = 0;
	for (auto &child : entry.children)
		if (!parse_metadata_record(child, index++))
			return false;
	return true;
}

bool ModuleParseContext::parse_paramattr_block(const BlockOrRecord &entry)
{
	for (auto &child : entry.children)
	{
		if (!child.IsRecord())
			continue;

		// Don't support the OLD variant unless we observe it in the wild.
		// DXC doesn't generate it.
		if (AttributeCodes(child.id) != AttributeCodes::CodeEntry)
			return false;

		Vector<std::pair<String, String>> pairs;
		for (auto op : child.ops)
		{
			auto &grp = attribute_groups[op];
			for (auto &elem : grp)
				pairs.push_back(elem);
		}
		attribute_lists.push_back(std::move(pairs));
	}
	return true;
}

bool ModuleParseContext::parse_paramattr_group_block(const BlockOrRecord &entry)
{
	if (!attribute_groups.empty())
	{
		LOGE("Cannot use multiple group blocks.\n");
		return false;
	}

	for (auto &child : entry.children)
	{
		if (!child.IsRecord())
			continue;
		if (AttributeCodes(child.id) != AttributeCodes::GroupCodeEntry)
			continue;

		if (child.ops.size() < 3)
			return false;

		uint64_t group_id = child.ops[0];
		uint64_t index = child.ops[1];

		if (index != ~0u) // Only care about attributes on function scope
			continue;

		auto &attr_group = attribute_groups[group_id];

		size_t i = 2;
		size_t count = child.ops.size();
		while (i < count)
		{
			if (child.ops[i] == 0) // Enum attribute, skip 2 values
			{
				i += 2;
			}
			else if (child.ops[i] == 1) // Integer attribute, skip 2 or 3 values
			{
				i++;
				if (i >= count)
					return false;

				switch (AttributeRecord(child.ops[i++]))
				{
				case AttributeRecord::ALIGNMENT:
				case AttributeRecord::STACK_ALIGNMENT:
				case AttributeRecord::ALLOC_SIZE:
				case AttributeRecord::DEREFERENCEABLE:
				case AttributeRecord::DEREFERENCEABLE_OR_NULL:
					i++;
					break;

				default:
					break;
				}
			}
			else if (child.ops[i] == 3 || child.ops[i] == 4) // String attribute
			{
				bool has_value = child.ops[i++] == 4;
				String kind, value;

				while (child.ops[i] != 0 && i < count)
					kind.push_back(char(child.ops[i++]));
				if (child.ops[i] != 0)
					return false;
				i++;

				if (has_value)
				{
					while (child.ops[i] != 0 && i < count)
						value.push_back(char(child.ops[i++]));
					if (child.ops[i] != 0)
						return false;
					i++;
				}
				attr_group.emplace_back(std::move(kind), std::move(value));
			}
			else if (child.ops[i] == 5 || child.ops[i] == 6) // Value attribute
			{
				bool has_type = child.ops[i++] == 6;
				if (i >= count)
					return false;
				if (AttributeRecord(child.ops[i++]) == AttributeRecord::BY_VAL && has_type)
					i++;
			}
			else
				return false;
		}

		if (i > count)
			return false;
	}

	return true;
}

bool ModuleParseContext::parse_function_child_block(const BlockOrRecord &entry)
{
	switch (KnownBlocks(entry.id))
	{
	case KnownBlocks::CONSTANTS_BLOCK:
	{
		for (auto &child : entry.children)
			if (!parse_constants_record(child))
				return false;
		break;
	}

	case KnownBlocks::METADATA_ATTACHMENT:
	{
		for (auto &child : entry.children)
			if (!parse_metadata_attachment_record(child))
				return false;
		break;
	}

	default:
		break;
	}

	return true;
}

static UnaryOperator::UnaryOps translate_uop(UnaryOp op, Type *type)
{
	bool is_fp = type->isFloatingPointTy();
	if (op == UnaryOp::FNEG && is_fp)
		return UnaryOperator::UnaryOps::FNeg;
	else
		return UnaryOperator::UnaryOps::Invalid;
}

static AtomicRMWInst::BinOp translate_atomic_binop(AtomicBinOp op)
{
	switch (op)
	{
	case AtomicBinOp::RMW_XCHG:
		return AtomicRMWInst::BinOp::Xchg;
	case AtomicBinOp::RMW_ADD:
		return AtomicRMWInst::BinOp::Add;
	case AtomicBinOp::RMW_SUB:
		return AtomicRMWInst::BinOp::Sub;
	case AtomicBinOp::RMW_AND:
		return AtomicRMWInst::BinOp::And;
	case AtomicBinOp::RMW_NAND:
		return AtomicRMWInst::BinOp::Nand;
	case AtomicBinOp::RMW_OR:
		return AtomicRMWInst::BinOp::Or;
	case AtomicBinOp::RMW_XOR:
		return AtomicRMWInst::BinOp::Xor;
	case AtomicBinOp::RMW_MAX:
		return AtomicRMWInst::BinOp::Max;
	case AtomicBinOp::RMW_MIN:
		return AtomicRMWInst::BinOp::Min;
	case AtomicBinOp::RMW_UMAX:
		return AtomicRMWInst::BinOp::UMax;
	case AtomicBinOp::RMW_UMIN:
		return AtomicRMWInst::BinOp::UMin;
	case AtomicBinOp::RMW_FADD:
		return AtomicRMWInst::BinOp::FAdd;
	case AtomicBinOp::RMW_FSUB:
		return AtomicRMWInst::BinOp::FSub;
	default:
		return AtomicRMWInst::BinOp::Invalid;
	}
}

bool ModuleParseContext::parse_record(const BlockOrRecord &entry)
{
	switch (FunctionRecord(entry.id))
	{
	case FunctionRecord::DECLAREBLOCKS:
	{
		if (entry.ops.size() < 1)
			return false;
		basic_blocks.resize(entry.ops[0]);
		basic_block_index = 0;
		for (auto &bb : basic_blocks)
			bb = context->construct<BasicBlock>(*context);
		current_bb = basic_blocks.front();
		break;
	}

	case FunctionRecord::INST_CALL:
	{
		unsigned index = 1;

		if (index >= entry.ops.size())
			return false;
		auto CCInfo = entry.ops[index++];

		if (CCInfo & CALL_FMF_BIT)
		{
			if (index >= entry.ops.size())
				return false;
			auto fmf = entry.ops[index++];
			(void)fmf;
		}

		FunctionType *function_type = nullptr;
		if (CCInfo & CALL_EXPLICIT_TYPE_BIT)
		{
			if (index >= entry.ops.size())
				return false;
			function_type = cast<FunctionType>(get_type(entry.ops[index++]));
		}

		if (index >= entry.ops.size())
			return false;

		auto *callee = dyn_cast<Function>(get_value(entry.ops[index++]));
		if (!callee)
			return false;

		if (!function_type)
			function_type = callee->getFunctionType();

		if (!function_type)
			return false;

		unsigned num_params = function_type->getNumParams();
		if (entry.ops.size() != index + num_params)
		{
			LOGE("Number of params does not match record.\n");
			return false;
		}

		Vector<Value *> params;
		params.reserve(num_params);

		for (unsigned i = 0; i < num_params; i++)
		{
			auto *arg = get_value(entry.ops[index + i], function_type->getParamType(i));
			if (!arg)
				return false;
			params.push_back(arg);
		}

		auto *value = context->construct<CallInst>(function_type, callee, std::move(params));
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_RET:
	{
		Value *return_val = !entry.ops.empty() ? get_value(entry.ops[0]) : nullptr;
		auto *ret = context->construct<ReturnInst>(return_val);
		if (!add_instruction(ret))
			return false;
		break;
	}

	case FunctionRecord::INST_UNREACHABLE:
	{
		auto *ret = context->construct<UnreachableInst>();
		if (!add_instruction(ret))
			return false;
		break;
	}

	case FunctionRecord::INST_UNOP:
	{
		unsigned index = 0;
		auto val = get_value_and_type(entry.ops, index);
		if (!val.first)
			return false;
		if (index == entry.ops.size())
			return false;
		auto op = UnaryOp(entry.ops[index++]);
		auto *value = context->construct<UnaryOperator>(translate_uop(op, val.second), val.first);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_CMP:
	case FunctionRecord::INST_CMP2:
	{
		unsigned index = 0;
		auto lhs = get_value_and_type(entry.ops, index);
		if (!lhs.first)
			return false;
		auto *rhs = get_value(entry.ops, index, lhs.second);
		if (index == entry.ops.size())
			return false;
		auto pred = Instruction::Predicate(entry.ops[index++]);

		if (!rhs)
			return false;

		Instruction *value = nullptr;
		if (lhs.second->isFloatingPointTy())
			value = context->construct<FCmpInst>(pred, lhs.first, rhs);
		else
			value = context->construct<ICmpInst>(pred, lhs.first, rhs);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_PHI:
	{
		if (entry.ops.size() < 1)
			return false;

		auto *type = get_type(entry.ops[0]);
		size_t num_args = (entry.ops.size() - 1) / 2;

		auto *phi_node = context->construct<PHINode>(type, num_args);

		for (size_t i = 0; i < num_args; i++)
		{
			Value *value = nullptr;
			if (use_relative_id)
				value = get_value_signed(entry.ops[2 * i + 1], type);
			else
				value = get_value(entry.ops[2 * i + 1], type);

			BasicBlock *bb = get_basic_block(entry.ops[2 * i + 2]);
			if (!value || !bb)
				return false;
			phi_node->add_incoming(value, bb);
		}
		if (!add_instruction(phi_node))
			return false;
		break;
	}

	case FunctionRecord::INST_BINOP:
	{
		unsigned index = 0;
		auto lhs = get_value_and_type(entry.ops, index);
		if (!lhs.first)
			return false;
		auto *rhs = get_value(entry.ops, index, lhs.second);
		if (!lhs.first || !rhs)
			return false;
		if (index == entry.ops.size())
			return false;
		auto op = BinOp(entry.ops[index++]);
		auto *value = context->construct<BinaryOperator>(lhs.first, rhs, translate_binop(op, lhs.second));
		if (index < entry.ops.size())
		{
			// Only relevant for FP math, but we only look at fast math state for
			// FP operations anyways.
			auto fast_math_flags = entry.ops[index];
			bool fast = (fast_math_flags & (FAST_MATH_UNSAFE_ALGEBRA_BIT | FAST_MATH_ALLOW_CONTRACT_BIT)) != 0;
			value->setFast(fast);
		}
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_ATOMICRMW:
	{
		unsigned index = 0;
		auto ptr = get_value_and_type(entry.ops, index);
		if (!ptr.first || !isa<PointerType>(ptr.second))
			return false;
		auto *val = get_value(entry.ops, index, ptr.second->getPointerElementType());
		if (!val)
			return false;
		if (index == entry.ops.size())
			return false;
		AtomicRMWInst::BinOp op = translate_atomic_binop(AtomicBinOp(entry.ops[index++]));
		auto *value = context->construct<AtomicRMWInst>(val->getType(), ptr.first, val, op);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_CMPXCHG:
	{
		unsigned index = 0;
		auto ptr = get_value_and_type(entry.ops, index);
		auto cmp = get_value_and_type(entry.ops, index);
		if (!ptr.first || !cmp.first || !isa<PointerType>(ptr.second))
			return false;
		auto *new_value = get_value(entry.ops, index, cmp.second);
		auto *value = context->construct<AtomicCmpXchgInst>(ptr.first, cmp.first, new_value);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_CAST:
	{
		unsigned index = 0;
		auto input_value = get_value_and_type(entry.ops, index);
		if (!input_value.first || index + 2 > entry.ops.size())
			return false;
		auto *type = get_type(entry.ops[index++]);
		if (!type)
			return false;
		auto op = Instruction::CastOps(translate_castop(CastOp(entry.ops[index++])));
		auto *value = context->construct<CastInst>(type, input_value.first, op);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_SELECT:
	case FunctionRecord::INST_VSELECT:
	{
		unsigned index = 0;
		auto true_value = get_value_and_type(entry.ops, index);
		if (!true_value.first || index + 2 > entry.ops.size())
			return false;
		auto *false_value = get_value(entry.ops[index++], true_value.second);
		auto *cond_value = get_value(entry.ops[index++], Type::getInt1Ty(*context));
		if (!false_value || !cond_value)
			return false;
		auto *value = context->construct<SelectInst>(true_value.first, false_value, cond_value);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_EXTRACTVAL:
	{
		unsigned index = 0;
		auto aggregate = get_value_and_type(entry.ops, index);
		if (!aggregate.first)
			return false;

		if (index == entry.ops.size())
			return false;

		Vector<unsigned> indices;
		indices.reserve(entry.ops.size() - index);
		unsigned num_args = entry.ops.size();

		Type *type = aggregate.second;
		for (; index < num_args; index++)
		{
			auto element = unsigned(entry.ops[index]);
			if (type->getTypeID() == Type::TypeID::StructTyID)
			{
				if (element >= cast<StructType>(type)->getNumElements())
				{
					LOGE("Struct element index out of range.\n");
					return false;
				}
				type = cast<StructType>(type)->getElementType(element);
			}
			else if (type->getTypeID() == Type::TypeID::ArrayTyID)
			{
				type = type->getArrayElementType();
			}
			else if (type->getTypeID() == Type::TypeID::VectorTyID)
			{
				type = cast<VectorType>(type)->getElementType();
			}
			else
				return false;

			// DXIL does not support vectors, so we're not supposed to index into them any further.
			indices.push_back(element);
		}

		auto *value = context->construct<ExtractValueInst>(type, aggregate.first, std::move(indices));
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_BR:
	{
		if (entry.ops.size() < 1)
			return false;

		auto *true_block = get_basic_block(entry.ops[0]);
		if (!true_block)
			return false;

		if (!add_successor(true_block))
			return false;

		if (entry.ops.size() == 1)
		{
			auto *value = context->construct<BranchInst>(true_block);
			if (!add_instruction(value))
				return false;
		}
		else if (entry.ops.size() == 3)
		{
			auto *false_block = get_basic_block(entry.ops[1]);
			if (!false_block)
				return false;
			if (!add_successor(false_block))
				return false;
			auto *cond = get_value(entry.ops[2], Type::getInt1Ty(*context));
			auto *value = context->construct<BranchInst>(true_block, false_block, cond);
			if (!add_instruction(value))
				return false;
		}
		else
			return false;
		break;
	}

	case FunctionRecord::INST_SWITCH:
	{
		if (entry.ops.size() < 3)
			return false;

		auto *type = get_type(entry.ops[0]);
		auto *cond = get_value(entry.ops[1]);
		auto *default_block = get_basic_block(entry.ops[2]);

		if (!type || !cond || !default_block)
			return false;
		if (!add_successor(default_block))
			return false;

		unsigned num_cases = (entry.ops.size() - 3) / 2;
		auto *inst = context->construct<SwitchInst>(cond, default_block, num_cases);
		for (unsigned i = 0; i < num_cases; i++)
		{
			// For some reason, case values are encoded in absolute terms.
			auto *case_value = get_value(entry.ops[3 + 2 * i], type, true);
			BasicBlock *bb = get_basic_block(entry.ops[4 + 2 * i]);
			if (!case_value || !bb)
			{
				LOGE("Invalid switch record.\n");
				return false;
			}
			if (!add_successor(bb))
				return false;
			inst->addCase(case_value, bb);
		}
		if (!add_instruction(inst))
			return false;
		break;
	}

	case FunctionRecord::INST_ALLOCA:
	{
		if (entry.ops.size() < 3)
			return false;
		auto *allocated_type = get_type(entry.ops[0]);
		auto *type = get_type(entry.ops[1]);
		auto *size = get_value(entry.ops[2], nullptr, true);

		if (!allocated_type || !type || !size)
			return false;

		auto *ptr_type = PointerType::get(allocated_type, 0);

		auto *value = context->construct<AllocaInst>(ptr_type, type, size);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_GEP:
	{
		if (entry.ops.size() < 3)
			return false;

		bool inbounds = entry.ops[0] != 0;
		auto *type = get_type(entry.ops[1]);
		unsigned count = entry.ops.size();
		Vector<Value *> args;
		args.reserve(count);
		for (unsigned i = 2; i < count;)
		{
			auto value = get_value_and_type(entry.ops, i);
			if (!value.first)
				return false;
			args.push_back(value.first);
		}

		type = resolve_gep_element_type(type, args);
		if (!type)
			return false;
		type = PointerType::get(type, cast<PointerType>(args[0]->getType())->getAddressSpace());

		auto *value = context->construct<GetElementPtrInst>(type, std::move(args), inbounds);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_LOAD:
	{
		unsigned index = 0;
		auto ptr = get_value_and_type(entry.ops, index);
		if (index + 2 != entry.ops.size() && index + 3 != entry.ops.size())
			return false;

		if (!ptr.first || !isa<PointerType>(ptr.second))
		{
			LOGE("Loading from something that is not a pointer.\n");
			return false;
		}

		Type *loaded_type = nullptr;
		if (index + 3 == entry.ops.size())
			loaded_type = get_type(entry.ops[index++]);
		else
			loaded_type = cast<PointerType>(ptr.second)->getElementType();

		auto *value = context->construct<LoadInst>(loaded_type, ptr.first);
		add_instruction(value);
		break;
	}

	case FunctionRecord::INST_STORE:
	{
		unsigned index = 0;
		auto ptr = get_value_and_type(entry.ops, index);
		auto val = get_value_and_type(entry.ops, index);
		if (!ptr.first || !val.first || index + 2 != entry.ops.size())
			return false;
		auto *value = context->construct<StoreInst>(ptr.first, val.first);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_SHUFFLEVEC:
	{
		unsigned index = 0;
		auto a = get_value_and_type(entry.ops, index);
		auto *b = get_value(entry.ops, index, a.second);
		auto shuf = get_value_and_type(entry.ops, index);
		if (!a.first || !b || !shuf.first || !isa<VectorType>(a.second))
			return false;

		auto *vec_type = VectorType::get(cast<ConstantDataVector>(shuf.first)->getNumElements(),
		                                 cast<VectorType>(a.second)->getElementType());
		auto *value = context->construct<ShuffleVectorInst>(vec_type, a.first, b, shuf.first);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_EXTRACTELT:
	{
		unsigned index = 0;
		auto vec = get_value_and_type(entry.ops, index);
		if (!vec.first || !isa<VectorType>(vec.second))
			return false;
		auto element_index = get_value_and_type(entry.ops, index);
		if (!element_index.first)
			return false;

		auto *value = context->construct<ExtractElementInst>(vec.first, element_index.first);
		if (!add_instruction(value))
			return false;
		break;
	}

	case FunctionRecord::INST_INSERTELT:
	{
		unsigned index = 0;
		auto vec = get_value_and_type(entry.ops, index);
		if (!vec.first || !isa<VectorType>(vec.second))
			return false;
		auto *value = get_value(entry.ops, index, cast<VectorType>(vec.second)->getElementType());
		auto element_index = get_value_and_type(entry.ops, index);
		if (!value || !element_index.first)
			return false;
		auto *new_value = context->construct<InsertElementInst>(vec.first, value, element_index.first);
		if (!add_instruction(new_value))
			return false;
		break;
	}

	default:
		LOGE("Unhandled instruction!\n");
		return false;
	}

	return true;
}

bool ModuleParseContext::resolve_forward_references()
{
	for (auto *ref : pending_forward_references)
		if (!ref->resolve())
			return false;
	pending_forward_references.clear();

	for (auto *bb : basic_blocks)
		for (auto &inst : *bb)
			if (!inst.resolve_proxy_values())
				return false;

	return true;
}

bool ModuleParseContext::resolve_global_initializations()
{
	for (auto &ref : global_initializations)
	{
		Value *value = get_value(ref.second, nullptr, true);
		if (!value)
			return false;
		auto *constant_value = dyn_cast<Constant>(value);
		if (!constant_value)
		{
			LOGE("Global initializer is not a constant!\n");
			return false;
		}
		ref.first->set_initializer(constant_value);
	}
	global_initializations.clear();
	return true;
}

bool ModuleParseContext::parse_function_body(const BlockOrRecord &entry)
{
	auto global_values = values;

	// I think we are supposed to process functions in same order as the module declared them?
	if (!seen_first_function_body)
	{
		std::reverse(functions_with_bodies.begin(), functions_with_bodies.end());
		seen_first_function_body = true;
	}

	if (functions_with_bodies.empty())
	{
		LOGE("No more functions to process?\n");
		return false;
	}

	function = functions_with_bodies.back();
	functions_with_bodies.pop_back();

	auto *func_type = function->getFunctionType();
	for (unsigned i = 0; i < func_type->getNumParams(); i++)
	{
		auto *param_type = func_type->getParamType(i);
		auto *arg = context->construct<Argument>(param_type, i);
		function->add_argument(arg);
		add_value(arg);
	}

	for (auto &child : entry.children)
	{
		if (child.IsBlock())
		{
			if (!parse_function_child_block(child))
				return false;
		}
		else
		{
			if (!parse_record(child))
				return false;
		}
	}

	if (!resolve_forward_references())
		return false;
	if (!resolve_global_initializations())
		return false;

	function->set_basic_blocks(std::move(basic_blocks));
	basic_blocks = {};
	basic_block_index = 0;
	module->add_function_implementation(function);

	values = global_values;
	instructions.clear();
	return true;
}

bool ModuleParseContext::parse_type(const BlockOrRecord &child)
{
	Type *type = nullptr;
	switch (TypeRecord(child.id))
	{
	case TypeRecord::NUMENTRY:
	case TypeRecord::STRUCT_NAME:
		return true;

	case TypeRecord::VOID_TYPE:
		type = Type::getVoidTy(*context);
		break;

	case TypeRecord::HALF:
		type = Type::getHalfTy(*context);
		break;

	case TypeRecord::FLOAT:
		type = Type::getFloatTy(*context);
		break;

	case TypeRecord::DOUBLE:
		type = Type::getDoubleTy(*context);
		break;

	case TypeRecord::POINTER:
	{
		if (child.ops.size() < 2)
			return false;

		auto *pointee_type = get_type(child.ops[0]);
		if (!pointee_type)
			return false;
		type = PointerType::get(pointee_type, child.ops[1]);
		break;
	}

	case TypeRecord::ARRAY:
	{
		if (child.ops.size() < 2)
			return false;

		auto *elem_type = get_type(child.ops[1]);
		if (!elem_type)
			return false;
		type = ArrayType::get(elem_type, child.ops[0]);
		break;
	}

	case TypeRecord::INTEGER:
	{
		if (child.ops.size() < 1)
			return false;

		auto bit_width = child.ops[0];
		if (bit_width <= 64)
			type = Type::getIntTy(*context, unsigned(bit_width));
		else
			return false;
		break;
	}

	case TypeRecord::STRUCT_NAMED:
	case TypeRecord::STRUCT_ANON:
	{
		if (child.ops.size() < 1)
			return false;

		Vector<Type *> members;
		unsigned num_members = child.ops.size() - 1;
		members.reserve(num_members);
		for (unsigned i = 0; i < num_members; i++)
			members.push_back(get_type(child.ops[i + 1]));
		type = StructType::get(*context, std::move(members));
		break;
	}

	case TypeRecord::VECTOR:
	{
		if (child.ops.size() < 2)
			return false;

		auto *elem_type = get_type(child.ops[1]);
		if (!elem_type)
			return false;
		type = VectorType::get(child.ops[0], elem_type);
		break;
	}

	case TypeRecord::FUNCTION:
	{
		if (child.ops.size() < 2)
			return false;
		Vector<Type *> argument_types;
		argument_types.reserve(child.ops.size() - 2);
		for (size_t i = 2; i < child.ops.size(); i++)
			argument_types.push_back(get_type(child.ops[i]));

		auto *func_type = get_type(child.ops[1]);
		if (!func_type)
			return false;

		type = context->construct<FunctionType>(*context, func_type, std::move(argument_types));
		break;
	}

	case TypeRecord::LABEL:
	{
		type = Type::getLabelTy(*context);
		break;
	}

	case TypeRecord::METADATA:
	{
		type = Type::getMetadataTy(*context);
		break;
	}

	case TypeRecord::OPAQUE_TYPE:
	{
		type = Type::getOpaqueTy(*context);
		break;
	}

	default:
		LOGE("Unknown type!\n");
		return false;
	}

	add_type(type);
	return true;
}

bool ModuleParseContext::parse_types(const BlockOrRecord &entry)
{
	for (auto &child : entry.children)
		if (!parse_type(child))
			return false;
	return true;
}

bool ModuleParseContext::parse_value_symtab(const BlockOrRecord &entry)
{
	for (auto &symtab : entry.children)
	{
		switch (ValueSymtabRecord(symtab.id))
		{
		case ValueSymtabRecord::ENTRY:
		{
			if (symtab.ops.size() < 1)
				return false;

			auto name = symtab.getString(1);
			module->add_value_name(symtab.ops[0], name);
			break;
		}

		default:
			break;
		}
	}
	return true;
}

static GlobalVariable::LinkageTypes decode_linkage(uint64_t v)
{
	switch (v)
	{
	case 0:
	case 5:
	case 6:
	case 15:
		return GlobalVariable::ExternalLinkage;

	case 2:
		return GlobalVariable::AppendingLinkage;

	default:
		return GlobalVariable::InternalLinkage;
	}
}

bool ModuleParseContext::parse_global_variable_record(const BlockOrRecord &entry)
{
	if (use_strtab)
	{
		LOGE("Unknown module code 2 which uses strtab.\n");
		return false;
	}

	if (entry.ops.size() < 4)
		return false;

	auto *type = get_type(entry.ops[0]);
	bool is_const = (entry.ops[1] & 1) != 0;
	bool explicit_type = (entry.ops[1] & 2) != 0;
	unsigned address_space = 0;
	if (explicit_type)
		address_space = entry.ops[1] >> 2;
	else
	{
		address_space = cast<PointerType>(type)->getAddressSpace();
		type = cast<PointerType>(type)->getElementType();
	}

	if (!type)
		return false;

	auto linkage = decode_linkage(entry.ops[3]);

	auto *value = context->construct<GlobalVariable>(PointerType::get(type, address_space), linkage, is_const);
	module->add_global_variable(value);
	add_value(value);

	uint64_t init_id = entry.ops[2];
	if (init_id != 0)
		global_initializations.push_back({ value, init_id - 1 });

	return true;
}

bool ModuleParseContext::parse_function_record(const BlockOrRecord &entry)
{
	if (use_strtab)
	{
		LOGE("Unknown module code 2 which uses strtab.\n");
		return false;
	}

	if (entry.ops.size() < 3)
		return false;

	auto *type = get_type(entry.ops[0]);
	if (!type)
		return false;

	// Calling convention is [1], not relevant.
	bool is_proto = entry.ops[2];
	// Lots of other irrelevant arguments ...

	auto *func_type = dyn_cast<FunctionType>(type);
	if (!func_type)
		func_type = cast<FunctionType>(cast<PointerType>(type)->getElementType());

	if (!func_type)
		return false;

	auto id = values.size();
	auto *func = context->construct<Function>(func_type, id, *module);

	if (entry.ops.size() >= 5 && entry.ops[4] != 0 && (entry.ops[4] - 1) < attribute_lists.size())
		func->set_attributes(attribute_lists[entry.ops[4] - 1]);

	values.push_back(func);

	if (!is_proto)
		functions_with_bodies.push_back(func);

	return true;
}

bool ModuleParseContext::parse_version_record(const BlockOrRecord &entry)
{
	if (entry.ops.size() < 1)
		return false;
	unsigned version = entry.ops[0];
	use_relative_id = version >= 1;
	use_strtab = version >= 2;
	return true;
}

Type *ModuleParseContext::get_type(uint64_t index)
{
	if (index >= types.size())
		return nullptr;
	return types[index];
}

bool ModuleParseContext::add_type(Type *type)
{
	types.push_back(type);
	return true;
}

void Module::add_value_name(uint64_t id, const String &name)
{
	value_symtab[id] = name;
}

void Module::add_function_implementation(Function *func)
{
	functions.push_back(func);
}

void Module::add_global_variable(GlobalVariable *variable)
{
	globals.push_back(variable);
}

void Module::add_named_metadata(const String &name, NamedMDNode *node)
{
	named_metadata[name] = node;
}

void Module::add_unnamed_metadata(MDNode *node)
{
	unnamed_metadata.push_back(node);
}

Function *Module::getFunction(const String &name) const
{
	auto itr =
	    std::find_if(functions.begin(), functions.end(), [&](const Function *func) { return func->getName() == name; });

	if (itr != functions.end())
		return *itr;
	else
		return nullptr;
}

NamedMDNode *Module::getNamedMetadata(const String &name) const
{
	auto itr = named_metadata.find(name);
	if (itr != named_metadata.end())
		return itr->second;
	else
		return nullptr;
}

static const String empty_string;
const String &Module::get_value_name(uint64_t id) const
{
	auto itr = value_symtab.find(id);
	if (itr != value_symtab.end())
		return itr->second;
	else
		return empty_string;
}

LLVMContext &Module::getContext()
{
	return context;
}

Module::Module(LLVMContext &context_)
    : context(context_)
{
}

Vector<Function *>::const_iterator Module::begin() const
{
	return functions.begin();
}

Vector<Function *>::const_iterator Module::end() const
{
	return functions.end();
}

IteratorAdaptor<GlobalVariable, Vector<GlobalVariable *>::const_iterator> Module::global_begin() const
{
	return globals.begin();
}

IteratorAdaptor<GlobalVariable, Vector<GlobalVariable *>::const_iterator> Module::global_end() const
{
	return globals.end();
}

UnorderedMap<String, NamedMDNode *>::const_iterator Module::named_metadata_begin() const
{
	return named_metadata.begin();
}

UnorderedMap<String, NamedMDNode *>::const_iterator Module::named_metadata_end() const
{
	return named_metadata.end();
}

Vector<MDNode *>::const_iterator Module::unnamed_metadata_begin() const
{
	return unnamed_metadata.begin();
}

Vector<MDNode *>::const_iterator Module::unnamed_metadata_end() const
{
	return unnamed_metadata.end();
}

Module *parseIR(LLVMContext &context, const void *data, size_t size)
{
	LLVMBC::BitcodeReader reader(static_cast<const uint8_t *>(data), size);
	auto toplevel = reader.ReadToplevelBlock();

	// The top-level block must be MODULE_BLOCK.
	if (KnownBlocks(toplevel.id) != KnownBlocks::MODULE_BLOCK)
		return nullptr;

	// We should have consumed all bits, only one top-level block.
	if (!reader.AtEndOfStream())
		return nullptr;

	auto *module = context.construct<Module>(context);

	ModuleParseContext parse_context;
	parse_context.module = module;
	parse_context.context = &module->getContext();

	for (auto &child : toplevel.children)
	{
		if (child.IsBlock())
		{
			switch (KnownBlocks(child.id))
			{
			case KnownBlocks::VALUE_SYMTAB_BLOCK:
				if (!parse_context.parse_value_symtab(child))
					return nullptr;
				break;

			case KnownBlocks::FUNCTION_BLOCK:
				if (!parse_context.parse_function_body(child))
					return nullptr;
				break;

			case KnownBlocks::TYPE_BLOCK:
				if (!parse_context.parse_types(child))
					return nullptr;
				break;

			case KnownBlocks::CONSTANTS_BLOCK:
				if (!parse_context.parse_constants_block(child))
					return nullptr;
				break;

			case KnownBlocks::METADATA_BLOCK:
				if (!parse_context.parse_metadata_block(child))
					return nullptr;
				break;

			case KnownBlocks::PARAMATTR_BLOCK:
				if (!parse_context.parse_paramattr_block(child))
					return nullptr;
				break;

			case KnownBlocks::PARAMATTR_GROUP_BLOCK:
				if (!parse_context.parse_paramattr_group_block(child))
					return nullptr;
				break;

			default:
				break;
			}
		}
		else
		{
			switch (ModuleRecord(child.id))
			{
			case ModuleRecord::VERSION:
				if (!parse_context.parse_version_record(child))
					return nullptr;
				break;

			case ModuleRecord::FUNCTION:
				if (!parse_context.parse_function_record(child))
					return nullptr;
				break;

			case ModuleRecord::GLOBAL_VARIABLE:
				if (!parse_context.parse_global_variable_record(child))
					return nullptr;
				break;

			default:
				break;
			}
		}
	}

	return module;
}
} // namespace LLVMBC


================================================
FILE: bc/module.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

namespace dxbc_spv
{
namespace ir
{
class Builder;
}
}

#include "data_structures.hpp"
#include "iterator.hpp"
#include <exception>
#include <stddef.h>
#include <type_traits>
#include <utility>

// A reasonably small LLVM C++ API lookalike.

#define llvm LLVMBC

namespace LLVMBC
{
class Function;
class LLVMContext;
class Type;
class Instruction;
class Function;
class BasicBlock;
class GlobalVariable;
class NamedMDNode;
class MDNode;

class Module
{
public:
	explicit Module(LLVMContext &context);
	LLVMContext &getContext();

	NamedMDNode *getNamedMetadata(const String &name) const;

	Function *getFunction(const String &name) const;

	void add_value_name(uint64_t id, const String &name);
	void add_function_implementation(Function *func);
	void add_global_variable(GlobalVariable *variable);
	void add_named_metadata(const String &name, NamedMDNode *node);
	void add_unnamed_metadata(MDNode *node);
	const String &get_value_name(uint64_t id) const;

	Vector<Function *>::const_iterator begin() const;
	Vector<Function *>::const_iterator end() const;

	IteratorAdaptor<GlobalVariable, Vector<GlobalVariable *>::const_iterator> global_begin() const;
	IteratorAdaptor<GlobalVariable, Vector<GlobalVariable *>::const_iterator> global_end() const;

	UnorderedMap<String, NamedMDNode *>::const_iterator named_metadata_begin() const;
	UnorderedMap<String, NamedMDNode *>::const_iterator named_metadata_end() const;

	Vector<MDNode *>::const_iterator unnamed_metadata_begin() const;
	Vector<MDNode *>::const_iterator unnamed_metadata_end() const;

private:
	LLVMContext &context;
	Vector<Function *> functions;
	Vector<GlobalVariable *> globals;
	UnorderedMap<uint64_t, String> value_symtab;
	UnorderedMap<String, NamedMDNode *> named_metadata;
	Vector<MDNode *> unnamed_metadata;
};

Module *parseIR(LLVMContext &context, const void *data, size_t size);
Module *parseDXBCIR(LLVMContext &context, dxbc_spv::ir::Builder &builder);
Module *parseDXBCBinary(LLVMContext &context, const void *data, size_t size);
bool disassemble(Module &module, String &str);
} // namespace LLVMBC


================================================
FILE: bc/module_dxbc_ir.cpp
================================================
/* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "module.hpp"
#include "context.hpp"
#include "metadata.hpp"
#include "cast.hpp"
#include "function.hpp"
#include "instruction.hpp"
#include <array>

#include "../dxil.hpp"
#include <assert.h>

// dxbc-spirv
#include "ir/ir.h"
#include "ir/ir_builder.h"
#include "dxbc/dxbc_api.h"
#include "util/util_log.h"
using namespace dxbc_spv;

class ScopedLogger : util::Logger
{

public:

	virtual void message(util::LogLevel severity, const char* text)
	{
		switch (severity)
		{
			case util::LogLevel::eDebug:
			case util::LogLevel::eInfo: LOGI("%s\n", text); break;
			case util::LogLevel::eWarn: LOGW("%s\n", text); break;
			case util::LogLevel::eError: LOGE("%s\n", text); break;
		}
	}

	virtual util::LogLevel getMinimumSeverity()
	{
		return util::LogLevel::eInfo;
	}

};

namespace LLVMBC
{
template <typename Func>
static void for_all_opcodes(ir::Builder &builder, ir::OpCode opcode, const Func &func)
{
	for (auto &op : builder)
		if (op.getOpCode() == opcode)
			if (!func(op))
				return;
}

static const char *shader_stage_to_meta(ir::ShaderStage stage)
{
	switch (stage)
	{
	case ir::ShaderStage::eVertex:
		return "vs";
	case ir::ShaderStage::eHull:
		return "hs";
	case ir::ShaderStage::eDomain:
		return "ds";
	case ir::ShaderStage::eGeometry:
		return "gs";
	case ir::ShaderStage::ePixel:
		return "ps";
	case ir::ShaderStage::eCompute:
		return "cs";
	default:
		return "";
	}
}

static DXIL::InputPrimitive convert_input_primitive_type(ir::PrimitiveType type)
{
	switch (type)
	{
	case ir::PrimitiveType::eLines:
		return DXIL::InputPrimitive::Line;
	case ir::PrimitiveType::eLinesAdj:
		return DXIL::InputPrimitive::LineWithAdjacency;
	case ir::PrimitiveType::ePoints:
		return DXIL::InputPrimitive::Point;
	case ir::PrimitiveType::eTriangles:
		return DXIL::InputPrimitive::Triangle;
	case ir::PrimitiveType::eTrianglesAdj:
		return DXIL::InputPrimitive::TriangleWithAdjaceny;
	default:
		return DXIL::InputPrimitive::Undefined;
	}
}

static DXIL::PrimitiveTopology convert_output_primitive_type(ir::PrimitiveType type)
{
	switch (type)
	{
	case ir::PrimitiveType::eLines:
		return DXIL::PrimitiveTopology::LineStrip;
	case ir::PrimitiveType::ePoints:
		return DXIL::PrimitiveTopology::PointList;
	case ir::PrimitiveType::eTriangles:
		return DXIL::PrimitiveTopology::TriangleStrip;
	default:
		return DXIL::PrimitiveTopology::Undefined;
	}
}

static DXIL::ResourceKind convert_resource_kind(ir::ResourceKind kind)
{
	switch (kind)
	{
	case ir::ResourceKind::eBufferRaw:
		return DXIL::ResourceKind::RawBuffer;
	case ir::ResourceKind::eBufferStructured:
		return DXIL::ResourceKind::StructuredBuffer;
	case ir::ResourceKind::eBufferTyped:
		return DXIL::ResourceKind::TypedBuffer;
	case ir::ResourceKind::eImage1D:
		return DXIL::ResourceKind::Texture1D;
	case ir::ResourceKind::eImage1DArray:
		return DXIL::ResourceKind::Texture1DArray;
	case ir::ResourceKind::eImage2D:
		return DXIL::ResourceKind::Texture2D;
	case ir::ResourceKind::eImage2DArray:
		return DXIL::ResourceKind::Texture2DArray;
	case ir::ResourceKind::eImage3D:
		return DXIL::ResourceKind::Texture3D;
	case ir::ResourceKind::eImageCube:
		return DXIL::ResourceKind::TextureCube;
	case ir::ResourceKind::eImageCubeArray:
		return DXIL::ResourceKind::TextureCubeArray;
	case ir::ResourceKind::eImage2DMS:
		return DXIL::ResourceKind::Texture2DMS;
	case ir::ResourceKind::eImage2DMSArray:
		return DXIL::ResourceKind::Texture2DMSArray;
	default:
		LOGE("Unrecognized resource kind %d\n", int(kind));
		return DXIL::ResourceKind::Invalid;
	}
}

static DXIL::Op convert_builtin_opcode(ir::BuiltIn builtin)
{
	switch (builtin)
	{
	case ir::BuiltIn::eSampleCount:
		return DXIL::Op::RenderTargetGetSampleCount;
	case ir::BuiltIn::eLocalThreadIndex:
		return DXIL::Op::FlattenedThreadIdInGroup;
	case ir::BuiltIn::eIsFullyCovered:
		return DXIL::Op::InnerCoverage;
	case ir::BuiltIn::eGsInstanceId:
		return DXIL::Op::GSInstanceID;
	case ir::BuiltIn::ePrimitiveId:
		return DXIL::Op::PrimitiveID;
	case ir::BuiltIn::eTessControlPointId:
		return DXIL::Op::OutputControlPointID;
	case ir::BuiltIn::eTessControlPointCountIn:
		return DXIL::Op::ExtendedSpirvControlPointCountIn;

	default:
		return DXIL::Op::Count;
	}
}

static DXIL::Semantic convert_semantic(ir::BuiltIn builtin)
{
	switch (builtin)
	{
	case ir::BuiltIn::ePosition:
		return DXIL::Semantic::Position;
	case ir::BuiltIn::eClipDistance:
		return DXIL::Semantic::ClipDistance;
	case ir::BuiltIn::eCullDistance:
		return DXIL::Semantic::CullDistance;
	case ir::BuiltIn::eVertexId:
		return DXIL::Semantic::VertexID;
	case ir::BuiltIn::eInstanceId:
		return DXIL::Semantic::InstanceID;
	case ir::BuiltIn::ePrimitiveId:
		return DXIL::Semantic::PrimitiveID;
	case ir::BuiltIn::eLayerIndex:
		return DXIL::Semantic::RenderTargetArrayIndex;
	case ir::BuiltIn::eViewportIndex:
		return DXIL::Semantic::ViewPortArrayIndex;
	case ir::BuiltIn::eGsInstanceId:
		return DXIL::Semantic::GSInstanceID;
	case ir::BuiltIn::eTessControlPointId:
		return DXIL::Semantic::OutputControlPointID;
	case ir::BuiltIn::eTessCoord:
		return DXIL::Semantic::DomainLocation;
	case ir::BuiltIn::eTessFactorInner:
		return DXIL::Semantic::InsideTessFactor;
	case ir::BuiltIn::eTessFactorOuter:
		return DXIL::Semantic::TessFactor;
	case ir::BuiltIn::eSampleId:
		return DXIL::Semantic::SampleIndex;
	case ir::BuiltIn::eSampleMask:
		return DXIL::Semantic::Coverage;
	case ir::BuiltIn::eIsFrontFace:
		return DXIL::Semantic::IsFrontFace;
	case ir::BuiltIn::eDepth:
		return DXIL::Semantic::Depth;
	case ir::BuiltIn::eStencilRef:
		return DXIL::Semantic::StencilRef;
	case ir::BuiltIn::eGlobalThreadId:
		return DXIL::Semantic::DispatchThreadID;
	case ir::BuiltIn::eLocalThreadId:
		return DXIL::Semantic::GroupThreadID;
	case ir::BuiltIn::eWorkgroupId:
		return DXIL::Semantic::GroupID;

	default:
		return DXIL::Semantic::User;
	}
}

static DXIL::InterpolationMode convert_interpolation_mode(ir::InterpolationMode mode)
{
	switch (mode)
	{
	case ir::InterpolationMode::eCentroid:
		return DXIL::InterpolationMode::LinearCentroid;
	case ir::InterpolationMode::eNoPerspective:
		return DXIL::InterpolationMode::LinearNoperspective;
	case ir::InterpolationMode::eFlat:
		return DXIL::InterpolationMode::Constant;
	case ir::InterpolationMode::eSample:
		return DXIL::InterpolationMode::LinearSample;
	default:
		return DXIL::InterpolationMode::Undefined;
	}
}

static DXIL::AtomicBinOp convert_atomic_binop(ir::AtomicOp binop)
{
	switch (binop)
	{
	case ir::AtomicOp::eAdd:
	case ir::AtomicOp::eInc:
		return DXIL::AtomicBinOp::IAdd;
	case ir::AtomicOp::eAnd:
		return DXIL::AtomicBinOp::And;
	case ir::AtomicOp::eOr:
		return DXIL::AtomicBinOp::Or;
	case ir::AtomicOp::eXor:
		return DXIL::AtomicBinOp::Xor;
	case ir::AtomicOp::eExchange:
		return DXIL::AtomicBinOp::Exchange;
	case ir::AtomicOp::eSMax:
		return DXIL::AtomicBinOp::IMax;
	case ir::AtomicOp::eSMin:
		return DXIL::AtomicBinOp::IMin;
	case ir::AtomicOp::eUMax:
		return DXIL::AtomicBinOp::UMax;
	case ir::AtomicOp::eUMin:
		return DXIL::AtomicBinOp::UMin;
	case ir::AtomicOp::eSub:
	case ir::AtomicOp::eDec:
		return DXIL::AtomicBinOp::Sub;
	case ir::AtomicOp::eLoad:
		return DXIL::AtomicBinOp::Load;
	case ir::AtomicOp::eStore:
		return DXIL::AtomicBinOp::Store;
	default:
		return DXIL::AtomicBinOp::Invalid;
	}
}

static AtomicRMWInst::BinOp convert_atomic_binop_llvm(ir::AtomicOp binop)
{
	switch (binop)
	{
	case ir::AtomicOp::eAdd:
	case ir::AtomicOp::eInc:
		return AtomicRMWInst::BinOp::Add;
	case ir::AtomicOp::eAnd:
		return AtomicRMWInst::BinOp::And;
	case ir::AtomicOp::eOr:
		return AtomicRMWInst::BinOp::Or;
	case ir::AtomicOp::eXor:
		return AtomicRMWInst::BinOp::Xor;
	case ir::AtomicOp::eExchange:
		return AtomicRMWInst::BinOp::Xchg;
	case ir::AtomicOp::eSMax:
		return AtomicRMWInst::BinOp::Max;
	case ir::AtomicOp::eSMin:
		return AtomicRMWInst::BinOp::Min;
	case ir::AtomicOp::eUMax:
		return AtomicRMWInst::BinOp::UMax;
	case ir::AtomicOp::eUMin:
		return AtomicRMWInst::BinOp::UMin;
	case ir::AtomicOp::eSub:
	case ir::AtomicOp::eDec:
		return AtomicRMWInst::BinOp::Sub;
	case ir::AtomicOp::eLoad:
		return AtomicRMWInst::BinOp::Or;
	case ir::AtomicOp::eStore:
		return AtomicRMWInst::BinOp::Xchg;
	default:
		return AtomicRMWInst::BinOp::Invalid;
	}
}

static DXIL::Op convert_round_mode(ir::RoundMode mode)
{
	switch (mode)
	{
	case ir::RoundMode::ePositiveInf:
		return DXIL::Op::Round_pi;
	case ir::RoundMode::eNegativeInf:
		return DXIL::Op::Round_ni;
	case ir::RoundMode::eZero:
		return DXIL::Op::Round_z;
	default:
		return DXIL::Op::Round_ne;
	}
}

struct ComponentMapping
{
	DXIL::ComponentType type = DXIL::ComponentType::Invalid;
	uint32_t num_rows = 1;
	uint32_t num_cols = 1;
};

static ComponentMapping convert_component_mapping(const ir::Type &type, bool need_axis)
{
	ComponentMapping mapping = {};

	switch (type.getBaseType(0).getBaseType())
	{
	case ir::ScalarType::eF16:
		mapping.type = DXIL::ComponentType::F16;
		break;

	case ir::ScalarType::eI16:
		mapping.type = DXIL::ComponentType::I16;
		break;

	case ir::ScalarType::eU16:
		mapping.type = DXIL::ComponentType::U16;
		break;

	case ir::ScalarType::eF32:
		mapping.type = DXIL::ComponentType::F32;
		break;

	case ir::ScalarType::eI32:
		mapping.type = DXIL::ComponentType::I32;
		break;

	case ir::ScalarType::eU32:
		mapping.type = DXIL::ComponentType::U32;
		break;

	case ir::ScalarType::eF64:
		mapping.type = DXIL::ComponentType::F64;
		break;

	case ir::ScalarType::eI64:
		mapping.type = DXIL::ComponentType::I64;
		break;

	case ir::ScalarType::eU64:
		mapping.type = DXIL::ComponentType::U64;
		break;

	case ir::ScalarType::eBool:
		mapping.type = DXIL::ComponentType::I1;
		break;

	default:
		LOGE("Unrecognized component type.\n");
		break;
	}

	if (need_axis)
	{
		// Strip the outermost dimension.
		if (type.getArrayDimensions() >= 2)
			mapping.num_rows = type.getArraySize(type.getArrayDimensions() - 2);
	}
	else if (type.isArrayType())
	{
		mapping.num_rows = type.getArraySize(0);
		if (type.getArrayDimensions() != 1)
			LOGE("Unexpected number of array dimensions.\n");
	}

	mapping.num_cols = type.getBaseType(0).getVectorSize();

	return mapping;
}

static DXIL::TessellatorDomain convert_hull_domain(ir::PrimitiveType type)
{
	switch (type)
	{
	case ir::PrimitiveType::eTriangles:
		return DXIL::TessellatorDomain::Tri;
	case ir::PrimitiveType::eQuads:
		return DXIL::TessellatorDomain::Quad;
	case ir::PrimitiveType::eLines:
		return DXIL::TessellatorDomain::IsoLine;
	default:
		return DXIL::TessellatorDomain::Undefined;
	}
}

static DXIL::TessellatorPartitioning convert_hull_partitioning(ir::TessPartitioning part)
{
	switch (part)
	{
	case ir::TessPartitioning::eInteger:
		return DXIL::TessellatorPartitioning::Integer;
	case ir::TessPartitioning::eFractEven:
		return DXIL::TessellatorPartitioning::FractionalEven;
	case ir::TessPartitioning::eFractOdd:
		return DXIL::TessellatorPartitioning::FractionalOdd;
	default:
		return DXIL::TessellatorPartitioning::Undefined;
	}
}

static DXIL::TessellatorOutputPrimitive convert_hull_output_primitive(ir::PrimitiveType type, ir::TessWindingOrder winding)
{
	switch (type)
	{
	case ir::PrimitiveType::eTriangles:
		return winding == ir::TessWindingOrder::eCw ?
		       DXIL::TessellatorOutputPrimitive::TriangleCW :
		       DXIL::TessellatorOutputPrimitive::TriangleCCW;
	case ir::PrimitiveType::eLines:
		return DXIL::TessellatorOutputPrimitive::Line;
	case ir::PrimitiveType::ePoints:
		return DXIL::TessellatorOutputPrimitive::Point;
	default:
		return DXIL::TessellatorOutputPrimitive::Undefined;
	}
}

struct DXILIntrinsicTable
{
	struct FunctionOverload
	{
		Function *func;
		// Either overloaded on return type, or the primary argument for e.g. stores.
		Type *overload_type;
	};

	struct FunctionEntry
	{
		// At most should be overload for i32/u32/f32 x 16/32/64
		std::array<FunctionOverload, 9> overloads;
		unsigned num_overloads;
	};

	FunctionEntry intrinsic_functions[int(DXIL::Op::Count)] = {};

	Function *get(Module &module, DXIL::Op op,
	              Type *return_type,
	              const Vector<Type *> &argument_types,
	              Type *overload_type, uint64_t &tween);
};

Function *DXILIntrinsicTable::get(
	Module &module, DXIL::Op op,
	Type *return_type, const Vector<Type *> &argument_types,
	Type *overload_type, uint64_t &tween)
{
	auto &entry = intrinsic_functions[int(op)];

	for (unsigned i = 0; i < entry.num_overloads; i++)
		if (entry.overloads[i].overload_type == overload_type)
			return entry.overloads[i].func;

	auto &context = module.getContext();

	assert(entry.num_overloads < entry.overloads.size());
	auto *func_type = context.construct<FunctionType>(context, return_type, argument_types);
	auto *func = context.construct<Function>(func_type, ++tween, module);
	// TODO: Can have a look-up for expected intrinsics name.
	module.add_value_name(tween, "dx.op.intrinsic");
	entry.overloads[entry.num_overloads++] = { func, overload_type };
	return func;
}

class ParseContext
{
public:
	ParseContext(LLVMContext &context_, ir::Builder &builder_, Module &module_)
	    : context(context_), builder(builder_), module(module_) {}

	bool emit_metadata();
	bool emit_entry_point();
	bool emit_function_bodies();

private:
	LLVMContext &context;
	ir::Builder &builder;
	Module &module;
	uint64_t metadata_tween_id = 0;
	uint64_t tween_id = 0;

	ir::ShaderStage shader_stage = {};

	ConstantInt *get_constant_uint(uint32_t value);

	// Metadata wrangling
	ConstantAsMetadata *create_constant_uint_meta(uint32_t value);
	ConstantAsMetadata *create_constant_uint64_meta(uint32_t value);
	MDString *create_string_meta(const String &str);
	ConstantAsMetadata *create_constant_meta(Constant *c);

	template <typename... Ops>
	MDNode *create_md_node(Ops&&... ops)
	{
		Vector<MDOperand *> vops { std::forward<Ops>(ops)... };
		return create_md_node(std::move(vops));
	}

	void create_named_md_node(const String &name, MDNode *node);
	MDNode *create_md_node(Vector<MDOperand *> ops);

	MDOperand *create_entry_point_meta(llvm::Function *patch_control_func);
	MDNode *create_stage_io_meta();
	MDOperand *create_null_meta();

	void set_function_attributes(Function *func);

	struct MetadataMapping
	{
		Vector<MDOperand *> nodes;
	};
	MetadataMapping srvs, uavs, cbvs, samplers, inputs, outputs, patches;

	uint32_t build_texture_srv(uint32_t space, uint32_t index, uint32_t size,
	                           DXIL::ResourceKind kind,
	                           DXIL::ComponentType type);

	uint32_t build_texture_uav(uint32_t space, uint32_t index, uint32_t size,
	                           DXIL::ResourceKind kind,
	                           DXIL::ComponentType type, bool coherent, bool rov);

	uint32_t build_buffer_uav(uint32_t space, uint32_t index, uint32_t size,
	                          DXIL::ResourceKind kind, uint32_t stride,
	                          bool coherent, bool counter, bool rov);

	uint32_t build_buffer_srv(uint32_t space, uint32_t index, uint32_t size,
	                          DXIL::ResourceKind kind, uint32_t stride);

	uint32_t build_cbv(uint32_t space, uint32_t index, uint32_t size, uint32_t cbv_size);

	uint32_t build_sampler(uint32_t space, uint32_t index, uint32_t size);

	uint32_t build_stage_io(MetadataMapping &mapping, ir::SsaDef ssa, const String &name,
	                        DXIL::ComponentType type,
	                        DXIL::Semantic semantic,
	                        uint32_t semantic_index,
	                        DXIL::InterpolationMode interpolation,
	                        uint32_t rows, uint32_t cols,
	                        uint32_t start_row, uint32_t start_col,
	                        uint32_t stream,
	                        bool need_axis);

	// DXIL intrinsic build.
	DXILIntrinsicTable dxil_intrinsics;

	template <typename... Values>
	Instruction *build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Values&&... values);
	Instruction *build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Vector<Value *> values);

	// Resource access hell.
	Instruction *build_load_input(
		uint32_t index, Type *type,
		Value *row, uint32_t col, Value *axis, bool patch);
	Instruction *build_load_output(
		uint32_t index, Type *type,
		Value *row, uint32_t col, Value *axis, bool patch);
	Instruction *build_store_output(uint32_t index, Value *row, uint32_t col, Value *value, bool patch);
	Instruction *build_load_builtin(DXIL::Op opcode, ir::SsaDef addr);
	Instruction *build_descriptor_load(ir::SsaDef resource, ir::SsaDef index, bool nonuniform);

	bool build_input_load(const ir::Op &op);
	bool build_output_load(const ir::Op &op);
	bool build_output_store(const ir::Op &op);
	bool build_gep_load(const ir::Op &op);
	bool build_gep_store(const ir::Op &op);
	bool build_composite_construct(const ir::Op &op);
	bool build_composite_extract(const ir::Op &op);
	bool build_composite_insert(const ir::Op &op);
	bool build_descriptor_load(const ir::Op &op);
	bool build_buffer_load(const ir::Op &op);
	bool build_buffer_load_cbv(const ir::Op &op);
	bool build_buffer_load(const ir::Op &op, DXIL::ResourceKind kind);
	bool build_buffer_load_return_composite(const ir::Op &op, Value *value);
	Instruction *build_extract_composite(const ir::Op &op, Value *value, unsigned num_elements);
	bool build_buffer_query_size(const ir::Op &op);
	bool build_buffer_store(const ir::Op &op);
	bool build_buffer_store(const ir::Op &op, DXIL::ResourceKind kind);
	bool build_buffer_atomic(const ir::Op &op);
	bool build_lds_atomic(const ir::Op &op);
	bool build_buffer_atomic_binop(const ir::Op &op, DXIL::ResourceKind kind);
	bool build_counter_atomic(const ir::Op &op);
	bool build_image_load(const ir::Op &op);
	bool build_image_store(const ir::Op &op);
	bool build_image_atomic(const ir::Op &op);
	bool build_image_query_size(const ir::Op &op);
	bool build_image_query_mips_samples(const ir::Op &op);
	bool build_image_sample(const ir::Op &op);
	bool build_image_gather(const ir::Op &op);
	bool build_image_compute_lod(const ir::Op &op);
	bool build_deriv(const ir::Op &op);
	bool build_check_sparse_access(const ir::Op &op);
	bool build_fround(const ir::Op &op);
	bool build_frcp(const ir::Op &op);
	bool build_binary_op(const ir::Op &op, BinaryOperator::BinaryOps binop);
	bool build_interpolate_at_centroid(const ir::Op &op);
	bool build_interpolate_at_sample(const ir::Op &op);
	bool build_interpolate_at_offset(const ir::Op &op);
	bool build_barrier(const ir::Op &op);
	bool build_demote(const ir::Op &op);

	template <DXIL::Op dxop>
	bool build_dxil_unary(const ir::Op &op);
	template <DXIL::Op dxop>
	bool build_dxil_constant_unary(const ir::Op &op);
	template <DXIL::Op dxop>
	bool build_dxil_binary(const ir::Op &op);
	template <DXIL::Op dxop>
	bool build_dxil_trinary(const ir::Op &op);
	template <DXIL::Op dxop>
	bool build_dxil_quaternary(const ir::Op &op);

	Value *get_extracted_composite_component(Value *value, unsigned component);
	Value *get_constant_mul(Value *value, uint32_t scale);

	// BasicBlock emission.
	BasicBlock *current_bb = nullptr;
	void push_instruction(Instruction *instruction, ir::SsaDef ssa = {});
	bool push_instruction(const ir::Op &op);

	// ir::Builder helpers.
	// Need ways to translate between ir::SsaDef <-> LLVM values for the most part.
	UnorderedMap<ir::SsaDef, Function *> function_map;
	UnorderedMap<ir::SsaDef, BasicBlock *> bb_map;
	UnorderedMap<ir::SsaDef, Type *> param_types;
	Vector<std::pair<ir::SsaDef, Type *>> params;
	UnorderedMap<ir::SsaDef, Value *> value_map;

	// Maps stage IO and resources since we need to resolve them back to type + metadata index
	// when loading descriptor.
	struct StageIOHandler
	{
		uint32_t index = UINT32_MAX;
		DXIL::Op op = DXIL::Op::Count;
		bool need_axis = false;
	};
	UnorderedMap<ir::SsaDef, StageIOHandler> stage_io_map;

	struct StageIOAccess
	{
		Value *axis;
		Value *row;
		uint32_t col;
	};
	StageIOAccess build_stage_io_access(const StageIOHandler &handler, ir::SsaDef io_decl, ir::SsaDef addr);

	struct ResourceHandler
	{
		DXIL::ResourceType resource_type;
		DXIL::ResourceKind resource_kind;
		uint32_t index;
		uint32_t binding_offset; // DXIL is weird.
	};
	UnorderedMap<ir::SsaDef, ResourceHandler> resource_map;

	Type *convert_type(const ir::Type &type);
	BasicBlock *get_basic_block(ir::SsaDef ssa);

	Value *get_value(const ir::Operand &op) const;
	Value *get_value(const ir::SsaDef &op) const;

	bool emit_constant(const ir::Op &op);

	ir::OpFlags global_fp_flags = {};
};

static inline Type *get_value_type(Value *value)
{
	assert(value);
	return value->getType();
}

static Type *get_scalar_type(Type *type)
{
	if (auto *vec = dyn_cast<VectorType>(type))
		return vec->getElementType();
	else if (isa<StructType>(type))
		return type->getStructElementType(0);
	else
		return type;
}

template <typename... Values>
Instruction *ParseContext::build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Values&&... values)
{
	auto *func = dxil_intrinsics.get(
	    module, op, return_type,
	    Vector<Type *> { Type::getInt32Ty(context), get_value_type(values)... }, overload_type, tween_id);

	auto *inst = context.construct<CallInst>(
	    func->getFunctionType(), func,
	    Vector<Value *> { get_constant_uint(uint32_t(op)), values... });

	return inst;
}

Instruction *ParseContext::build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Vector<Value *> values)
{
	Vector<Type *> types;
	types.reserve(values.size() + 1);
	types.push_back(Type::getInt32Ty(context));
	for (auto *v : values)
		types.push_back(v->getType());

	auto *func = dxil_intrinsics.get(module, op, return_type, types, overload_type, tween_id);
	values.insert(values.begin(), get_constant_uint(uint32_t(op)));
	auto *inst = context.construct<CallInst>(func->getFunctionType(), func, std::move(values));
	return inst;
}

bool ParseContext::emit_constant(const ir::Op &op)
{
	auto &type = op.getType();
	Value *value = nullptr;

	if (type.isBasicType())
	{
		auto *llvm_type = convert_type(type);
		if (type.isScalarType())
		{
			if (type.getBaseType(0).isIntType())
				value = ConstantInt::get(llvm_type, uint64_t(op.getOperand(0)));
			else if (type.getBaseType(0).isFloatType())
				value = ConstantFP::get(llvm_type, uint64_t(op.getOperand(0)));
			else if (type.getBaseType(0).isBoolType())
				value = ConstantInt::get(llvm_type, bool(op.getOperand(0)));
			else
				return false;
		}
		else
		{
			Vector<Value *> constants;
			constants.reserve(op.getOperandCount());

			auto *llvm_sub_type = get_scalar_type(llvm_type);

			if (type.getBaseType(0).isIntType())
			{
				for (uint32_t i = 0; i < op.getOperandCount(); i++)
					constants.push_back(ConstantInt::get(llvm_sub_type, uint64_t(op.getOperand(i))));
			}
			else if (type.getBaseType(0).isFloatType())
			{
				for (uint32_t i = 0; i < op.getOperandCount(); i++)
					constants.push_back(ConstantFP::get(llvm_sub_type, uint64_t(op.getOperand(i))));
			}
			else
				return false;

			value = context.construct<ConstantDataVector>(convert_type(type), std::move(constants));
		}
	}
	else if (type.isArrayType())
	{
		// This is quite flexible, but only support what we can reasonably expect to see. Extend and generalize if needed.
		auto elem_type = type.getSubType(0);
		if (!elem_type.isScalarType() && !elem_type.isVectorType())
			return false;

		uint32_t vecsize = elem_type.getBaseType(0).getVectorSize();
		assert(vecsize && op.getOperandCount() % vecsize == 0);
		uint32_t array_elements = op.getOperandCount() / vecsize;

		Vector<Value *> constants;
		Vector<Value *> values;

		values.reserve(array_elements);
		constants.reserve(vecsize);

		auto *llvm_sub_type = convert_type(elem_type);

		for (uint32_t elem = 0; elem < array_elements; elem++)
		{
			constants.clear();

			for (uint32_t c = 0; c < vecsize; c++)
			{
				if (elem_type.getBaseType(0).isIntType())
				{
					constants.push_back(ConstantInt::get(get_scalar_type(llvm_sub_type),
					                                     uint64_t(op.getOperand(elem * vecsize + c))));
				}
				else if (elem_type.getBaseType(0).isFloatType())
				{
					constants.push_back(ConstantFP::get(get_scalar_type(llvm_sub_type),
					                                    uint64_t(op.getOperand(elem * vecsize + c))));
				}
				else
					return false;
			}

			if (elem_type.isVectorType())
				values.push_back(context.construct<ConstantDataVector>(convert_type(elem_type), constants));
			else
				values.push_back(constants[0]);
		}

		auto *constant_value = context.construct<ConstantDataArray>(convert_type(op.getType()), values);
		auto *lut = context.construct<GlobalVariable>(
			PointerType::get(convert_type(op.getType()), uint32_t(DXIL::AddressSpace::Thread)),
			GlobalVariable::LinkageTypes::InternalLinkage, false);
		lut->set_initializer(constant_value);
		module.add_global_variable(lut);
		value = lut;
	}

	if (!value)
		return false;

	value_map[op.getDef()] = value;
	return true;
}

Type *ParseContext::convert_type(const ir::Type &type)
{
	if (type.isArrayType())
	{
		auto *llvm_type = convert_type(type.getSubType(0));
		for (unsigned dim = 0; dim < type.getArrayDimensions(); dim++)
			llvm_type = ArrayType::get(llvm_type, type.getArraySize(dim));
		return llvm_type;
	}
	else if (type.isStructType())
	{
		Vector<Type *> members;
		for (unsigned index = 0; index < type.getStructMemberCount(); index++)
			members.push_back(convert_type(type.getSubType(index)));
		return StructType::get(context, std::move(members));
	}
	else if (type.isVoidType())
	{
		return Type::getVoidTy(context);
	}
	else if (type.isBasicType())
	{
		Type *llvm_type;
		ir::BasicType base = type.getBaseType(0);
		switch (base.getBaseType())
		{
		case ir::ScalarType::eF16:
			llvm_type = Type::getHalfTy(context);
			break;

		case ir::ScalarType::eF32:
			llvm_type = Type::getFloatTy(context);
			break;

		case ir::ScalarType::eF64:
			llvm_type = Type::getDoubleTy(context);
			break;

		case ir::ScalarType::eI16:
		case ir::ScalarType::eU16:
			llvm_type = Type::getInt16Ty(context);
			break;

		case ir::ScalarType::eI32:
		case ir::ScalarType::eU32:
			llvm_type = Type::getInt32Ty(context);
			break;

		case ir::ScalarType::eI64:
		case ir::ScalarType::eU64:
			llvm_type = Type::getInt64Ty(context);
			break;

		case ir::ScalarType::eBool:
			llvm_type = Type::getInt1Ty(context);
			break;

		default:
			LOGE("Unrecognized basic scalar type %u\n", unsigned(base.getBaseType()));
			return nullptr;
		}

		if (base.isVector())
			llvm_type = VectorType::get(base.getVectorSize(), llvm_type);

		return llvm_type;
	}
	else
	{
		LOGE("Unrecognized type.\n");
		return nullptr;
	}
}

void ParseContext::push_instruction(Instruction *instruction, ir::SsaDef ssa)
{
	assert(current_bb);
	instruction->set_tween_id(++tween_id);
	current_bb->add_instruction(instruction);
	if (ssa)
		value_map[ssa] = instruction;
}

ParseContext::StageIOAccess
ParseContext::build_stage_io_access(const StageIOHandler &handler, ir::SsaDef io_decl, ir::SsaDef addr)
{
	Value *axis = nullptr;
	Value *row = get_constant_uint(0);
	uint32_t col = 0;

	if (addr)
	{
		auto &decl = builder.getOp(io_decl);
		auto &addr_op = builder.getOp(addr);

		uint32_t chain_length = addr_op.getType().getBaseType(0).getVectorSize();
		uint32_t dim = 0;

		auto *addr_value = get_value(addr);

		if (handler.need_axis)
			axis = get_extracted_composite_component(addr_value, dim++);
		if (dim + 1 == decl.getType().getArrayDimensions())
			row = get_extracted_composite_component(addr_value, dim++);

		// This is optional if we're loading from a scalar, or we're loading the full vector.
		if (dim < chain_length)
		{
			assert(dim == chain_length - 1);

			// The last element is the column. It must be constant.
			if (const auto *c = dyn_cast<ConstantInt>(get_extracted_composite_component(addr_value, dim)))
			{
				col = c->getUniqueInteger().getZExtValue();
			}
			else
			{
				LOGE("Column index is not compile-time constant.\n");
				return {};
			}
		}
	}

	return { axis, row, col };
}

static bool io_decl_is_patch(ir::ShaderStage stage, const ir::Op &op)
{
	if (stage != ir::ShaderStage::eHull && stage != ir::ShaderStage::eDomain)
		return false;

	switch (op.getOpCode())
	{
	case ir::OpCode::eDclInput:
		return stage == ir::ShaderStage::eDomain && !op.getType().isArrayType();
	case ir::OpCode::eDclOutput:
		return stage == ir::ShaderStage::eHull && !op.getType().isArrayType();
	case ir::OpCode::eDclOutputBuiltIn:
		if (stage == ir::ShaderStage::eDomain)
			return false;
		break;
	default:
		break;
	}

	// For builtin-IO, there's tess factors and clip/cull distance that is a bit "special".
	auto builtin = ir::BuiltIn(op.getOperand(1));

	if (builtin == ir::BuiltIn::eTessCoord)
		return false;

	return builtin == ir::BuiltIn::eTessFactorOuter || builtin == ir::BuiltIn::eTessFactorInner ||
	       !op.getType().isArrayType();
}

bool ParseContext::build_input_load(const ir::Op &op)
{
	auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))];

	// Redirect to magic opcode as needed.
	if (ref.op != DXIL::Op::Count)
	{
		auto *inst = build_load_builtin(ref.op, ir::SsaDef(op.getOperand(1)));
		push_instruction(inst, op.getDef());
		return true;
	}

	auto *type = convert_type(op.getType());
	auto *scalar_type = type;

	unsigned components = 1;
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
	{
		components = vec->getVectorSize();
		scalar_type = vec->getElementType();
	}

	Instruction *insts[4] = {};

	auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1)));
	bool patch = io_decl_is_patch(shader_stage, builder.getOp(ir::SsaDef(op.getOperand(0))));

	for (unsigned c = 0; c < components; c++)
	{
		insts[c] = build_load_input(ref.index, scalar_type, access.row, access.col + c, access.axis, patch);
		push_instruction(insts[c], op.getDef());
	}

	if (components != 1)
	{
		auto *inst = context.construct<CompositeConstructInst>(
		    type, Vector<Value *>{ insts, insts + components });
		push_instruction(inst, op.getDef());
	}

	return true;
}

bool ParseContext::build_output_load(const ir::Op &op)
{
	auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))];

	auto *type = convert_type(op.getType());
	auto *scalar_type = type;

	unsigned components = 1;
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
	{
		components = vec->getVectorSize();
		scalar_type = vec->getElementType();
	}

	Instruction *insts[4] = {};

	auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1)));
	bool patch = io_decl_is_patch(shader_stage, builder.getOp(ir::SsaDef(op.getOperand(0))));

	for (unsigned c = 0; c < components; c++)
	{
		insts[c] = build_load_output(ref.index, scalar_type, access.row, access.col + c, access.axis, patch);
		push_instruction(insts[c], op.getDef());
	}

	if (components != 1)
	{
		auto *inst = context.construct<CompositeConstructInst>(
		    type, Vector<Value *>{ insts, insts + components });
		push_instruction(inst, op.getDef());
	}

	return true;
}

bool ParseContext::build_interpolate_at_centroid(const ir::Op &op)
{
	auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))];
	auto *type = convert_type(op.getType());
	auto *scalar_type = type;

	unsigned components = 1;
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
	{
		components = vec->getVectorSize();
		scalar_type = vec->getElementType();
	}

	Instruction *insts[4] = {};

	auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1)));

	for (unsigned c = 0; c < components; c++)
	{
		insts[c] = build_dxil_call(DXIL::Op::EvalCentroid, scalar_type, scalar_type,
		                           get_constant_uint(ref.index),
		                           access.row, get_constant_uint(access.col + c));
		push_instruction(insts[c], op.getDef());
	}

	if (components != 1)
	{
		auto *inst = context.construct<CompositeConstructInst>(
		    type, Vector<Value *>{ insts, insts + components });
		push_instruction(inst, op.getDef());
	}

	return true;
}

bool ParseContext::build_interpolate_at_sample(const ir::Op &op)
{
	auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))];
	auto *type = convert_type(op.getType());
	auto *scalar_type = type;

	unsigned components = 1;
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
	{
		components = vec->getVectorSize();
		scalar_type = vec->getElementType();
	}

	Instruction *insts[4] = {};

	auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1)));

	for (unsigned c = 0; c < components; c++)
	{
		insts[c] = build_dxil_call(DXIL::Op::EvalSampleIndex, scalar_type, scalar_type,
		                           get_constant_uint(ref.index),
		                           access.row, get_constant_uint(access.col + c), get_value(op.getOperand(2)));
		push_instruction(insts[c], op.getDef());
	}

	if (components != 1)
	{
		auto *inst = context.construct<CompositeConstructInst>(
		    type, Vector<Value *>{ insts, insts + components });
		push_instruction(inst, op.getDef());
	}

	return true;
}

bool ParseContext::build_interpolate_at_offset(const ir::Op &op)
{
	auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))];
	auto *type = convert_type(op.getType());
	auto *scalar_type = type;

	unsigned components = 1;
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
	{
		components = vec->getVectorSize();
		scalar_type = vec->getElementType();
	}

	Instruction *insts[4] = {};

	auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1)));

	for (unsigned c = 0; c < components; c++)
	{
		insts[c] = build_dxil_call(DXIL::Op::ExtendedEvalSnapped, scalar_type, scalar_type,
		                           get_constant_uint(ref.index),
		                           access.row, get_constant_uint(access.col + c), get_value(op.getOperand(2)));
		push_instruction(insts[c], op.getDef());
	}

	if (components != 1)
	{
		auto *inst = context.construct<CompositeConstructInst>(
		    type, Vector<Value *>{ insts, insts + components });
		push_instruction(inst, op.getDef());
	}

	return true;
}

bool ParseContext::build_output_store(const ir::Op &op)
{
	auto *store_value = get_value(op.getOperand(2));
	auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))];
	unsigned components = 1;

	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(store_value->getType()))
		components = vec->getVectorSize();

	auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1)));
	bool patch = io_decl_is_patch(shader_stage, builder.getOp(ir::SsaDef(op.getOperand(0))));

	if (components == 1)
	{
		push_instruction(build_store_output(ref.index, access.row, access.col, store_value, patch));
	}
	else
	{
		for (unsigned c = 0; c < components; c++)
		{
			auto *value = get_extracted_composite_component(store_value, c);
			push_instruction(build_store_output(ref.index, access.row, access.col + c, value, patch));
		}
	}

	return true;
}

bool ParseContext::build_gep_load(const ir::Op &op)
{
	auto *type = convert_type(op.getType());
	Vector<Value *> args;
	args.push_back(get_value(op.getOperand(0)));
	args.push_back(get_constant_uint(0));

	if (op.getOperand(1))
	{
		auto &addr = builder.getOp(ir::SsaDef(op.getOperand(1)));
		auto *addr_value = get_value(op.getOperand(1));
		for (uint32_t i = 0; i < addr.getType().getBaseType(0).getVectorSize(); i++)
			args.push_back(get_extracted_composite_component(addr_value, i));
	}

	auto addr_space = op.getOpCode() == ir::OpCode::eLdsLoad ?
	                  DXIL::AddressSpace::GroupShared : DXIL::AddressSpace::Thread;

	auto *gep = context.construct<GetElementPtrInst>(
		PointerType::get(type, uint32_t(addr_space)), std::move(args), true);
	auto *load = context.construct<LoadInst>(type, gep);
	push_instruction(gep);
	push_instruction(load, op.getDef());
	return true;
}

bool ParseContext::build_gep_store(const ir::Op &op)
{
	auto *type = convert_type(builder.getOp(ir::SsaDef(op.getOperand(2))).getType());
	Vector<Value *> args;
	args.push_back(get_value(op.getOperand(0)));
	args.push_back(get_constant_uint(0));

	if (op.getOperand(1))
	{
		auto &addr = builder.getOp(ir::SsaDef(op.getOperand(1)));
		auto *addr_value = get_value(op.getOperand(1));
		for (uint32_t i = 0; i < addr.getType().getBaseType(0).getVectorSize(); i++)
			args.push_back(get_extracted_composite_component(addr_value, i));
	}

	auto addr_space = op.getOpCode() == ir::OpCode::eLdsStore ?
	                  DXIL::AddressSpace::GroupShared : DXIL::AddressSpace::Thread;

	auto *gep = context.construct<GetElementPtrInst>(
	    PointerType::get(type, uint32_t(addr_space)), std::move(args), true);
	auto *store = context.construct<StoreInst>(gep, get_value(op.getOperand(2)));
	push_instruction(gep);
	push_instruction(store, op.getDef());
	return true;
}

bool ParseContext::build_composite_construct(const ir::Op &op)
{
	auto *type = convert_type(op.getType());

	Vector<Value *> values;
	values.reserve(op.getOperandCount());

	for (unsigned i = 0; i < op.getOperandCount(); i++)
		values.push_back(get_value(op.getOperand(i)));

	auto *inst = context.construct<CompositeConstructInst>(type, std::move(values));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_composite_extract(const ir::Op &op)
{
	auto &address = builder.getOpForOperand(op, 1);
	if (!address.isConstant())
	{
		LOGE("CompositeExtract must take a constant index.\n");
		return false;
	}

	auto *value = get_value(op.getOperand(0));

	for (unsigned i = 0; i < address.getOperandCount(); i++)
		value = get_extracted_composite_component(value, uint32_t(address.getOperand(i)));

	value_map[op.getDef()] = value;
	return true;
}

bool ParseContext::build_composite_insert(const ir::Op &op)
{
	auto *inst = context.construct<InsertElementInst>(
	    get_value(op.getOperand(0)), get_value(op.getOperand(2)), get_value(op.getOperand(1)));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_descriptor_load(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));
	auto &dcl_op = builder.getOp(descriptor);
	if (dcl_op.getOpCode() == ir::OpCode::eDclUavCounter)
		descriptor = ir::SsaDef(dcl_op.getOperand(1));

	auto *inst = build_descriptor_load(descriptor, ir::SsaDef(op.getOperand(1)),
	                                   bool(op.getFlags() & ir::OpFlag::eNonUniform));

	if (!inst)
		return false;
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_deriv(const ir::Op &op)
{
	auto *inst = build_dxil_call(DXIL::Op::ExtendedDeriv,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)),
	                             get_constant_uint(op.getOpCode() == ir::OpCode::eDerivY),
	                             get_constant_uint(uint32_t(op.getOperand(1))));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_check_sparse_access(const ir::Op &op)
{
	auto *inst = build_dxil_call(DXIL::Op::CheckAccessFullyMapped,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_fround(const ir::Op &op)
{
	auto dxop = convert_round_mode(ir::RoundMode(op.getOperand(op.getFirstLiteralOperandIndex())));
	auto *inst = build_dxil_call(dxop,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_frcp(const ir::Op &op)
{
	Constant *const1;

	switch (op.getType().getBaseType(0).getBaseType())
	{
	case ir::ScalarType::eF16:
		const1 = ConstantFP::get(Type::getHalfTy(context), 0x3c00);
		break;

	case ir::ScalarType::eF32:
	{
		const float one = 1.0f;
		uint32_t v;
		memcpy(&v, &one, sizeof(one));
		const1 = ConstantFP::get(Type::getFloatTy(context), v);
		break;
	}

	case ir::ScalarType::eF64:
	{
		const double one = 1.0;
		uint64_t v;
		memcpy(&v, &one, sizeof(one));
		const1 = ConstantFP::get(Type::getDoubleTy(context), v);
		break;
	}

	default:
		return false;
	}

	if (op.getType().isVectorType())
	{
		unsigned num_components = op.getType().getBaseType(0).getVectorSize();
		Vector<Value *> values;
		values.reserve(num_components);
		for (unsigned i = 0; i < num_components; i++)
			values.push_back(const1);
		const1 = context.construct<ConstantDataVector>(VectorType::get(num_components, const1->getType()), std::move(values));
	}

	auto *inst = context.construct<BinaryOperator>(const1, get_value(op.getOperand(0)),
	                                               BinaryOperator::BinaryOps::FDiv);
	inst->setFast(!((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_binary_op(const ir::Op &op, BinaryOperator::BinaryOps binop)
{
	auto *inst = context.construct<BinaryOperator>(
	    get_value(op.getOperand(0)), get_value(op.getOperand(1)), binop);
	push_instruction(inst, op.getDef());
	if (op.getType().getBaseType(0).isFloatType())
		inst->setFast(!((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise));
	return true;
}

template <DXIL::Op dxop>
bool ParseContext::build_dxil_unary(const ir::Op &op)
{
	assert(op.getOperandCount() == 1);
	auto *inst = build_dxil_call(dxop,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)));
	if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)
		inst->setMetadata("dx.precise", create_md_node(create_null_meta()));
	push_instruction(inst, op.getDef());
	return true;
}

template <DXIL::Op dxop>
bool ParseContext::build_dxil_constant_unary(const ir::Op &op)
{
	assert(op.getOperandCount() == 1);
	auto *inst = build_dxil_call(dxop,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_constant_uint(uint32_t(op.getOperand(0))));
	if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)
		inst->setMetadata("dx.precise", create_md_node(create_null_meta()));
	push_instruction(inst, op.getDef());
	return true;
}

template <DXIL::Op dxop>
bool ParseContext::build_dxil_binary(const ir::Op &op)
{
	assert(op.getOperandCount() == 2);
	auto *inst = build_dxil_call(dxop,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)), get_value(op.getOperand(1)));
	if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)
		inst->setMetadata("dx.precise", create_md_node(create_null_meta()));
	push_instruction(inst, op.getDef());
	return true;
}

template <DXIL::Op dxop>
bool ParseContext::build_dxil_trinary(const ir::Op &op)
{
	assert(op.getOperandCount() == 3);
	auto *inst = build_dxil_call(dxop,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)),
	                             get_value(op.getOperand(1)),
	                             get_value(op.getOperand(2)));
	if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)
		inst->setMetadata("dx.precise", create_md_node(create_null_meta()));
	push_instruction(inst, op.getDef());
	return true;
}

template <DXIL::Op dxop>
bool ParseContext::build_dxil_quaternary(const ir::Op &op)
{
	assert(op.getOperandCount() == 4);
	auto *inst = build_dxil_call(dxop,
	                             convert_type(op.getType()), convert_type(op.getType()),
	                             get_value(op.getOperand(0)),
	                             get_value(op.getOperand(1)),
	                             get_value(op.getOperand(2)),
	                             get_value(op.getOperand(3)));
	if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)
		inst->setMetadata("dx.precise", create_md_node(create_null_meta()));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_barrier(const ir::Op &op)
{
	auto exec_scope = ir::Scope(op.getOperand(0));
	auto mem_scope = ir::Scope(op.getOperand(1));
	auto memory_type = ir::MemoryTypeFlags(op.getOperand(2));
	auto *void_type = Type::getVoidTy(context);

	uint32_t memory_flags = 0;
	uint32_t semantic_flags = 0;

	semantic_flags |= DXIL::GroupScopeBit;
	if (exec_scope != ir::Scope::eThread)
		semantic_flags |= DXIL::GroupSyncBit;
	if (mem_scope == ir::Scope::eGlobal)
		semantic_flags |= DXIL::DeviceScopeBit;

	if (memory_type & ir::MemoryType::eLds)
		memory_flags |= DXIL::MemoryTypeGroupSharedBit;
	if (memory_type & ir::MemoryType::eUav)
		memory_flags |= DXIL::MemoryTypeUavBit;

	auto *inst = build_dxil_call(DXIL::Op::BarrierByMemoryType, void_type, void_type,
	                             get_constant_uint(memory_flags),
	                             get_constant_uint(semantic_flags));

	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_demote(const ir::Op &op)
{
	auto *void_type = Type::getVoidTy(context);
	auto *inst = build_dxil_call(DXIL::Op::Discard, void_type, void_type,
	                             get_constant_uint(1));
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::push_instruction(const ir::Op &op)
{
	switch (op.getOpCode())
	{
#define OPMAP(irop, llvmop) case ir::OpCode::e##irop: if (!build_##llvmop(op)) return false; break
	OPMAP(InputLoad, input_load);
	OPMAP(OutputLoad, output_load);
	OPMAP(OutputStore, output_store);
	OPMAP(CompositeConstruct, composite_construct);
	OPMAP(CompositeExtract, composite_extract);
	OPMAP(CompositeInsert, composite_insert);
	OPMAP(DescriptorLoad, descriptor_load);
	OPMAP(BufferLoad, buffer_load);
	OPMAP(BufferStore, buffer_store);
	OPMAP(BufferAtomic, buffer_atomic);
	OPMAP(CounterAtomic, counter_atomic);
	OPMAP(BufferQuerySize, buffer_query_size);
	OPMAP(ImageLoad, image_load);
	OPMAP(ImageStore, image_store);
	OPMAP(ImageAtomic, image_atomic);
	OPMAP(ImageQuerySize, image_query_size);
	OPMAP(ImageQueryMips, image_query_mips_samples);
	OPMAP(ImageQuerySamples, image_query_mips_samples);
	OPMAP(ImageSample, image_sample);
	OPMAP(ImageGather, image_gather);
	OPMAP(ImageComputeLod, image_compute_lod);
	OPMAP(DerivX, deriv);
	OPMAP(DerivY, deriv);
	OPMAP(CheckSparseAccess, check_sparse_access);
	OPMAP(FRound, fround);
	OPMAP(FAbs, dxil_unary<DXIL::Op::FAbs>);
	OPMAP(IAbs, dxil_unary<DXIL::Op::ExtendedIAbs>);
	OPMAP(FMad, dxil_trinary<DXIL::Op::Fma>);
	OPMAP(FRcp, frcp);
	OPMAP(FFract, dxil_unary<DXIL::Op::Frc>);
	OPMAP(FMin, dxil_binary<DXIL::Op::FMin>);
	OPMAP(FMax, dxil_binary<DXIL::Op::FMax>);
	OPMAP(SMin, dxil_binary<DXIL::Op::IMin>);
	OPMAP(SMax, dxil_binary<DXIL::Op::IMax>);
	OPMAP(UMin, dxil_binary<DXIL::Op::UMin>);
	OPMAP(UMax, dxil_binary<DXIL::Op::UMax>);
	OPMAP(FClamp, dxil_trinary<DXIL::Op::ExtendedFClamp>);
	OPMAP(SClamp, dxil_trinary<DXIL::Op::ExtendedIClamp>);
	OPMAP(UClamp, dxil_trinary<DXIL::Op::ExtendedUClamp>);
	OPMAP(FLog2, dxil_unary<DXIL::Op::Log>);
	OPMAP(FExp2, dxil_unary<DXIL::Op::Exp>);
	OPMAP(FSin, dxil_unary<DXIL::Op::Sin>);
	OPMAP(FCos, dxil_unary<DXIL::Op::Cos>);
	OPMAP(FSqrt, dxil_unary<DXIL::Op::Sqrt>);
	OPMAP(FRsq, dxil_unary<DXIL::Op::Rsqrt>);
	OPMAP(FPow, dxil_binary<DXIL::Op::ExtendedPow>);
	OPMAP(FIsNan, dxil_unary<DXIL::Op::IsNan>);
	OPMAP(ConvertF32toPackedF16, dxil_unary<DXIL::Op::ExtendedLegacyF32ToF16>);
	OPMAP(ConvertPackedF16toF32, dxil_unary<DXIL::Op::ExtendedLegacyF16ToF32>);
	OPMAP(InterpolateAtCentroid, interpolate_at_centroid);
	OPMAP(InterpolateAtSample, interpolate_at_sample);
	OPMAP(InterpolateAtOffset, interpolate_at_offset);
	OPMAP(UBitExtract, dxil_trinary<DXIL::Op::ExtendedSpirvUbfe>);
	OPMAP(SBitExtract, dxil_trinary<DXIL::Op::ExtendedSpirvIbfe>);
	OPMAP(IBitInsert, dxil_quaternary<DXIL::Op::ExtendedSpirvBfi>);
	OPMAP(EmitVertex, dxil_constant_unary<DXIL::Op::EmitStream>);
	OPMAP(EmitPrimitive, dxil_constant_unary<DXIL::Op::CutStream>);
	OPMAP(IBitCount, dxil_unary<DXIL::Op::Countbits>);
	OPMAP(IBitReverse, dxil_unary<DXIL::Op::Bfrev>);
	OPMAP(IFindLsb, dxil_unary<DXIL::Op::ExtendedSpirvFindLSB>);
	OPMAP(SFindMsb, dxil_unary<DXIL::Op::ExtendedSpirvIFindMSB>);
	OPMAP(UFindMsb, dxil_unary<DXIL::Op::ExtendedSpirvUFindMSB>);
	OPMAP(IAddCarry, dxil_binary<DXIL::Op::ExtendedSpirvIAddCarry>);
	OPMAP(ISubBorrow, dxil_binary<DXIL::Op::ExtendedSpirvISubBorrow>);
	OPMAP(SMulExtended, dxil_binary<DXIL::Op::ExtendedSpirvSMulExtended>);
	OPMAP(UMulExtended, dxil_binary<DXIL::Op::ExtendedSpirvUMulExtended>);
	OPMAP(ScratchLoad, gep_load);
	OPMAP(ScratchStore, gep_store);
	OPMAP(LdsLoad, gep_load);
	OPMAP(LdsStore, gep_store);
	OPMAP(ConstantLoad, gep_load);
	OPMAP(Barrier, barrier);
	OPMAP(LdsAtomic, lds_atomic);
	OPMAP(Demote, demote);
#undef OPMAP

	// Plain instructions
	case ir::OpCode::eCast:
	{
		if (convert_type(op.getType()) == convert_type(builder.getOp(ir::SsaDef(op.getOperand(0))).getType()))
		{
			// I <-> U casts are meaningless.
			value_map[op.getDef()] = get_value(op.getOperand(0));
		}
		else
		{
			push_instruction(context.construct<CastInst>(convert_type(op.getType()), get_value(op.getOperand(0)),
			                                             Instruction::CastOps::BitCast),
			                 op.getDef());
		}
		break;
	}

	case ir::OpCode::eSelect:
	{
		push_instruction(context.construct<SelectInst>(
			                 get_value(op.getOperand(1)),
			                 get_value(op.getOperand(2)),
			                 get_value(op.getOperand(0))),
		                 op.getDef());
		break;
	}

	case ir::OpCode::eFNeg:
	case ir::OpCode::eINeg:
	{
		push_instruction(context.construct<UnaryOperator>(
			                 op.getOpCode() == ir::OpCode::eFNeg ?
			                 UnaryOperator::UnaryOps::FNeg : UnaryOperator::UnaryOps::INeg,
			                 get_value(op.getOperand(0))),
		                 op.getDef());
		break;
	}

#define CMP(irop, type, llvmop) \
	case ir::OpCode::irop: \
		push_instruction(context.construct<type##CmpInst>( \
			                 CmpInst::Predicate::llvmop, \
			                 get_value(op.getOperand(0)), \
			                 get_value(op.getOperand(1))), \
		                 op.getDef()); break
	CMP(eFNe, F, FCMP_UNE);
	CMP(eFEq, F, FCMP_OEQ);
	CMP(eFGt, F, FCMP_OGT);
	CMP(eFGe, F, FCMP_OGE);
	CMP(eFLt, F, FCMP_OLT);
	CMP(eFLe, F, FCMP_OLE);
	CMP(eINe, I, ICMP_NE);
	CMP(eIEq, I, ICMP_EQ);
	CMP(eBNe, I, ICMP_NE);
	CMP(eBEq, I, ICMP_EQ);
	CMP(eSGt, I, ICMP_SGT);
	CMP(eSGe, I, ICMP_SGE);
	CMP(eSLt, I, ICMP_SLT);
	CMP(eSLe, I, ICMP_SLE);
	CMP(eUGt, I, ICMP_UGT);
	CMP(eUGe, I, ICMP_UGE);
	CMP(eULt, I, ICMP_ULT);
	CMP(eULe, I, ICMP_ULE);

#define BOP(irop, llvmop) case ir::OpCode::irop: if (!build_binary_op(op, BinaryOperator::BinaryOps::llvmop)) return false; break
	BOP(eFAdd, FAdd);
	BOP(eFSub, FSub);
	BOP(eFMul, FMul);
	BOP(eFDiv, FDiv);
	BOP(eIAdd, Add);
	BOP(eISub, Sub);
	BOP(eIMul, Mul);
	BOP(eUDiv, UDiv);
	BOP(eUMod, URem);
	BOP(eIAnd, And);
	BOP(eIOr, Or);
	BOP(eIXor, Xor);
	BOP(eBAnd, And);
	BOP(eBOr, Or);
	BOP(eIShl, Shl);
	BOP(eUShr, LShr);
	BOP(eSShr, AShr);
#undef BOP

	case ir::OpCode::eConvertFtoF:
	{
		auto &out_type = op.getType();
		auto &in_type = builder.getOp(ir::SsaDef(op.getOperand(0))).getType();

		if (out_type.byteSize() == in_type.byteSize())
		{
			value_map[op.getDef()] = get_value(op.getOperand(0));
			break;
		}

		bool ext = out_type.byteSize() > in_type.byteSize();
		auto *inst = context.construct<CastInst>(convert_type(out_type),
		                                         get_value(op.getOperand(0)),
		                                         ext ? Instruction::CastOps::FPExt : Instruction::CastOps::FPTrunc);
		push_instruction(inst, op.getDef());
		break;
	}

	case ir::OpCode::eConvertFtoI:
	{
		auto &out_type = op.getType();
		bool is_signed = out_type.getBaseType(0).isSignedIntType();
		auto *inst = context.construct<CastInst>(convert_type(out_type), get_value(op.getOperand(0)),
		                                         is_signed ? Instruction::CastOps::FPToSI : Instruction::CastOps::FPToUI);
		push_instruction(inst, op.getDef());
		break;
	}

	case ir::OpCode::eConvertItoF:
	{
		auto &out_type = op.getType();
		auto &in_type = builder.getOp(ir::SsaDef(op.getOperand(0))).getType();
		bool is_signed = in_type.getBaseType(0).isSignedIntType();
		auto *inst = context.construct<CastInst>(convert_type(out_type), get_value(op.getOperand(0)),
		                                         is_signed ? Instruction::CastOps::SIToFP : Instruction::CastOps::UIToFP);
		push_instruction(inst, op.getDef());
		break;
	}

	case ir::OpCode::eConvertItoI:
	{
		auto &out_type = op.getType();
		auto &in_type = builder.getOp(ir::SsaDef(op.getOperand(0))).getType();
		bool is_signed = in_type.getBaseType(0).isSignedIntType();
		bool ext = out_type.byteSize() > in_type.byteSize();

		if (out_type.byteSize() == in_type.byteSize())
		{
			value_map[op.getDef()] = get_value(op.getOperand(0));
			break;
		}

		if (!ext)
		{
			auto *inst = context.construct<CastInst>(convert_type(out_type), get_value(op.getOperand(0)), Instruction::CastOps::Trunc);
			push_instruction(inst, op.getDef());
		}
		else
		{
			auto *inst = context.construct<CastInst>(
				convert_type(out_type), get_value(op.getOperand(0)),
				is_signed ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt);
			push_instruction(inst, op.getDef());
		}

		break;
	}

	case ir::OpCode::eINot:
	case ir::OpCode::eBNot:
	{
		auto *result_type = convert_type(op.getType());
		auto *scalar_type = result_type;
		Constant *constant_max;
		if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(result_type))
		{
			scalar_type = vec->getElementType();
			constant_max = ConstantInt::get(scalar_type, UINT64_MAX);
			// Only vec2 is supported.
			constant_max = context.construct<ConstantDataVector>(result_type, Vector<Value *>{ constant_max, constant_max });
		}
		else
		{
			constant_max = ConstantInt::get(scalar_type, UINT64_MAX);
		}

		auto *inst = context.construct<BinaryOperator>(get_value(op.getOperand(0)),
		                                               constant_max,
		                                               Instruction::BinaryOps::Xor);

		push_instruction(inst, op.getDef());
		break;
	}

	case ir::OpCode::eFunctionCall:
	{
		auto itr = function_map.find(ir::SsaDef(op.getOperand(0)));
		if (itr == function_map.end())
			return false;

		auto *func = itr->second;
		Vector<Value *> args;
		args.reserve(op.getOperandCount() - 1);

		for (uint32_t i = 1; i < op.getOperandCount(); i++)
			args.push_back(get_value(op.getOperand(i)));

		push_instruction(context.construct<CallInst>(func->getFunctionType(), func, std::move(args)),
		                 op.getDef());
		break;
	}

	default:
		LOGE("Unimplemented opcode %u\n", unsigned(op.getOpCode()));
		return false;
	}

	return true;
}

Instruction *ParseContext::build_load_input(
    uint32_t index, Type *type, Value *row, uint32_t col, Value *axis, bool patch)
{
	assert(index != UINT32_MAX);
	auto *inst = build_dxil_call(
		patch ? DXIL::Op::LoadPatchConstant : DXIL::Op::ExtendedSpirvLoadInput, type, type,
		get_constant_uint(index),
		row,
		get_constant_uint(col),
		axis ? axis : UndefValue::get(Type::getInt32Ty(context)));

	return inst;
}

Instruction *ParseContext::build_load_output(
    uint32_t index, Type *type, Value *row, uint32_t col, Value *axis, bool patch)
{
	assert(index != UINT32_MAX);

	Instruction *inst;

	// This is slightly extended internally to allow loading outputs in general.
	if (patch)
	{
		inst = build_dxil_call(DXIL::Op::LoadPatchConstant, type, type, get_constant_uint(index), row,
		                       get_constant_uint(col));
	}
	else
	{
		inst = build_dxil_call(DXIL::Op::LoadOutputControlPoint, type, type, get_constant_uint(index), row,
		                       get_constant_uint(col), axis ? axis : UndefValue::get(Type::getInt32Ty(context)));
	}

	return inst;
}

Instruction *ParseContext::build_store_output(uint32_t index, Value *row, uint32_t col, Value *value, bool patch)
{
	assert(index != UINT32_MAX);
	auto *inst = build_dxil_call(
		patch ? DXIL::Op::StorePatchConstant : DXIL::Op::StoreOutput, Type::getVoidTy(context), value->getType(),
		get_constant_uint(index),
		row,
		get_constant_uint(col),
		value);

	return inst;
}

Instruction *ParseContext::build_load_builtin(DXIL::Op opcode, ir::SsaDef addr)
{
	Type *type;

	if (opcode == DXIL::Op::InnerCoverage)
		type = Type::getInt1Ty(context);
	else
		type = Type::getInt32Ty(context);

	if (addr)
		return build_dxil_call(opcode, type, nullptr, get_value(addr));
	else
		return build_dxil_call(opcode, type, nullptr);
}

Value *ParseContext::get_extracted_composite_component(Value *value, unsigned component)
{
	if (!isa<VectorType>(value->getType()) && !isa<StructType>(value->getType()))
	{
		assert(component == 0);
		return value;
	}

	// Common pattern where composites are constructed only to be extracted again.
	if (const auto *comp = dyn_cast<CompositeConstructInst>(value))
		return comp->getOperand(component);
	if (const auto *vec = dyn_cast<ConstantDataVector>(value))
		return vec->getElementAsConstant(component);

	ExtractValueInst *extracted;
	if (const auto *vec_type = dyn_cast<VectorType>(value->getType()))
		extracted = context.construct<ExtractValueInst>(vec_type->getElementType(), value, Vector<unsigned>{ component });
	else if (const auto *struct_type = dyn_cast<StructType>(value->getType()))
		extracted = context.construct<ExtractValueInst>(struct_type->getStructElementType(component), value, Vector<unsigned>{ component });
	else
		return nullptr;

	push_instruction(extracted);
	return extracted;
}

Value *ParseContext::get_constant_mul(Value *value, uint32_t scale)
{
	// If there is already a multiplier, fold it in to help dxil-spirv analysis get proper vectorization.
	if (const auto *cint = dyn_cast<ConstantInt>(value))
	{
		return get_constant_uint(cint->getUniqueInteger().getZExtValue() * scale);
	}
	else if (const auto *bop = dyn_cast<BinaryOperator>(value))
	{
		if (bop->getOpcode() == BinaryOperator::BinaryOps::Mul)
		{
			auto *ca = dyn_cast<ConstantInt>(bop->getOperand(0));
			auto *cb = dyn_cast<ConstantInt>(bop->getOperand(1));

			if (ca && cb)
			{
				return get_constant_uint(
				    ca->getUniqueInteger().getZExtValue() * cb->getUniqueInteger().getZExtValue() * scale);
			}
			else if (ca || cb)
			{
				auto *c = ca ? ca : cb;
				auto *other = bop->getOperand(ca ? 1 : 0);
				auto *inst =
					context.construct<BinaryOperator>(
						get_constant_uint(
							c->getUniqueInteger().getZExtValue() * scale), other, BinaryOperator::BinaryOps::Mul);
				push_instruction(inst);
				return inst;
			}
		}
		else if (bop->getOpcode() == BinaryOperator::BinaryOps::Add)
		{
			if (isa<ConstantInt>(bop->getOperand(0)) || isa<ConstantInt>(bop->getOperand(1)))
			{
				// Avoid nested scaling. Scale each side. Probably only worth it if at least one of them is a constant.
				auto *scaled_a = get_constant_mul(bop->getOperand(0), scale);
				auto *scaled_b = get_constant_mul(bop->getOperand(1), scale);
				auto *inst = context.construct<BinaryOperator>(scaled_a, scaled_b, BinaryOperator::BinaryOps::Add);
				push_instruction(inst);
				return inst;
			}
		}
	}

	auto *inst = context.construct<BinaryOperator>(get_constant_uint(scale), value, BinaryOperator::BinaryOps::Mul);
	push_instruction(inst);
	return inst;
}

static VectorType *get_vec4_variant(Type *type)
{
	if (auto *vec = dyn_cast<VectorType>(type))
	{
		if (vec->getVectorSize() == 4)
			return vec;
		else
			return VectorType::get(4, vec->getElementType());
	}
	else
		return VectorType::get(4, type);
}

static StructType *get_sparse_feedback_variant(Type *type)
{
	auto *scalar_type = get_scalar_type(type->getStructElementType(1));
	return StructType::get(
		type->getContext(),
		{ scalar_type, scalar_type, scalar_type, scalar_type, Type::getInt32Ty(type->getContext()) });
}

static Type *get_composite_return_type(Type *type)
{
	if (isa<StructType>(type))
		return get_sparse_feedback_variant(type);
	else
		return get_vec4_variant(type);
}

Instruction *ParseContext::build_extract_composite(const ir::Op &op, Value *value, unsigned num_elements)
{
	if (!num_elements)
		num_elements = op.getType().getBaseType(0).getVectorSize();

	if (num_elements == 1)
		return context.construct<ExtractValueInst>(get_scalar_type(value->getType()), value, Vector<unsigned>{ 0 });

	Value *values[4];
	for (unsigned c = 0; c < num_elements; c++)
		values[c] = get_extracted_composite_component(value, c);

	assert(num_elements > 1);
	auto *result_type = VectorType::get(num_elements, get_scalar_type(value->getType()));
	auto *comp = context.construct<CompositeConstructInst>(
		result_type, Vector<Value *> { values, values + num_elements });
	return comp;
}

bool ParseContext::build_buffer_load_return_composite(const ir::Op &op, Value *value)
{
	if (op.getType().isStructType())
	{
		// Sparse feedback.
		auto *code = get_extracted_composite_component(value, 4);
		auto *sampled_value = build_extract_composite(op, value, op.getType().getBaseType(1).getVectorSize());
		push_instruction(sampled_value);
		auto *inst = context.construct<CompositeConstructInst>(convert_type(op.getType()), Vector<Value *>{ code, sampled_value });
		push_instruction(inst, op.getDef());
		return true;
	}
	else
	{
		unsigned num_elements = op.getType().getBaseType(0).getVectorSize();
		if (num_elements != 1)
			push_instruction(build_extract_composite(op, value, num_elements), op.getDef());
		else
			value_map[op.getDef()] = get_extracted_composite_component(value, 0);
	}

	return true;
}

bool ParseContext::build_buffer_load(const ir::Op &op, DXIL::ResourceKind kind)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));
	auto *int_type = Type::getInt32Ty(context);

	auto *addr_value = get_value(op.getOperand(1));

	Value *first;
	Value *second;

	if (kind == DXIL::ResourceKind::StructuredBuffer)
	{
		first = get_extracted_composite_component(addr_value, 0);
		second = get_extracted_composite_component(addr_value, 1);
		second = get_constant_mul(second, 4);
	}
	else
	{
		first = addr_value;
		if (kind == DXIL::ResourceKind::RawBuffer)
			first = get_constant_mul(first, 4);
		second = UndefValue::get(int_type);
	}

	auto *result_type = convert_type(op.getType());
	auto *dxil_result_type = get_composite_return_type(result_type);

	auto *inst = build_dxil_call(
		DXIL::Op::BufferLoad, dxil_result_type, dxil_result_type,
		get_value(descriptor), first, second);
	push_instruction(inst);
	return build_buffer_load_return_composite(op, inst);
}

bool ParseContext::build_buffer_load_cbv(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));
	auto addr = ir::SsaDef(op.getOperand(1));
	Instruction *inst = nullptr;

	if (op.getType().isScalarType())
	{
		if (op.getType().byteSize() != 4)
		{
			LOGE("Only support 4 byte scalar CBV loads.\n");
			return false;
		}

		auto *result_type = convert_type(op.getType());

		auto *addr_value = get_value(addr);
		if (!llvm::isa<llvm::VectorType>(addr_value->getType()))
		{
			LOGE("Expected a vector type addr for vectors.\n");
			return false;
		}

		auto *index16 = get_extracted_composite_component(addr_value, 0);
		auto *index4 = get_extracted_composite_component(addr_value, 1);

		auto *mul16 = get_constant_mul(index16, 16);
		auto *mul4 = get_constant_mul(index4, 4);

		Value *byte_addr;

		if (isa<ConstantInt>(mul16) && isa<ConstantInt>(mul4))
		{
			byte_addr = get_constant_uint(cast<ConstantInt>(mul16)->getUniqueInteger().getZExtValue() +
			                              cast<ConstantInt>(mul4)->getUniqueInteger().getZExtValue());
		}
		else
		{
			auto *byte_addr_inst = context.construct<BinaryOperator>(mul16, mul4, llvm::BinaryOperator::BinaryOps::Add);
			push_instruction(byte_addr_inst);
			byte_addr = byte_addr_inst;
		}

		inst = build_dxil_call(DXIL::Op::CBufferLoad, result_type, result_type,
		                       get_value(descriptor),
		                       byte_addr);
	}
	else if (op.getType().isVectorType())
	{
		if (op.getType().getBaseType(0).getVectorSize() != 4 || op.getType().byteSize() != 16)
		{
			LOGE("We can only support vec4 or scalar loads from CBV.\n");
			return false;
		}

		auto *result_type = convert_type(op.getType());
		auto *addr_value = get_value(addr);

		inst = build_dxil_call(DXIL::Op::CBufferLoadLegacy, result_type, result_type,
		                       get_value(descriptor),
		                       addr_value);
	}

	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_buffer_load(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	// This function is overloaded, so need to figure out which type of load we should generate.
	if (itr->second.resource_type == DXIL::ResourceType::CBV)
		return build_buffer_load_cbv(op);
	else
		return build_buffer_load(op, itr->second.resource_kind);
}

bool ParseContext::build_image_store(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto layer = ir::SsaDef(op.getOperand(1));
	auto coord = ir::SsaDef(op.getOperand(2));
	auto value = ir::SsaDef(op.getOperand(3));

	Value *coords[3] = {};
	Value *values[4] = {};

	unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize();
	unsigned num_value_components = builder.getOp(value).getType().getBaseType(0).getVectorSize();

	auto *scalar_type = get_scalar_type(get_value(value)->getType());
	auto *coord_value = get_value(coord);

	for (unsigned c = 0; c < num_coord_components; c++)
		coords[c] = get_extracted_composite_component(coord_value, c);
	for (unsigned c = num_coord_components; c < 3; c++)
		coords[c] = UndefValue::get(Type::getInt32Ty(context));

	switch (itr->second.resource_kind)
	{
	case DXIL::ResourceKind::Texture1DArray:
	case DXIL::ResourceKind::Texture2DArray:
		coords[num_coord_components] = get_value(layer);
		break;

	default:
		break;
	}

	for (unsigned c = 0; c < num_value_components; c++)
		values[c] = get_extracted_composite_component(get_value(value), c);
	for (unsigned c = num_value_components; c < 4; c++)
		values[c] = UndefValue::get(scalar_type);

	unsigned mask = (1u << num_value_components) - 1u;

	auto *inst = build_dxil_call(DXIL::Op::TextureStore, Type::getVoidTy(context),
	                             scalar_type,
	                             get_value(descriptor),
	                             coords[0], coords[1], coords[2],
	                             values[0], values[1], values[2], values[3],
	                             get_constant_uint(mask));

	push_instruction(inst);
	return true;
}

bool ParseContext::build_image_atomic(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto layer = ir::SsaDef(op.getOperand(1));
	auto coord = ir::SsaDef(op.getOperand(2));
	auto atomic_op = ir::AtomicOp(op.getOperand(op.getFirstLiteralOperandIndex()));

	Value *coords[3] = {};

	unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize();

	auto *int_type = Type::getInt32Ty(context);

	for (unsigned c = 0; c < num_coord_components; c++)
		coords[c] = get_extracted_composite_component(get_value(coord), c);
	for (unsigned c = num_coord_components; c < 3; c++)
		coords[c] = UndefValue::get(Type::getInt32Ty(context));

	switch (itr->second.resource_kind)
	{
	case DXIL::ResourceKind::Texture1DArray:
	case DXIL::ResourceKind::Texture2DArray:
		coords[num_coord_components] = get_value(layer);
		break;

	default:
		break;
	}

	if (atomic_op == ir::AtomicOp::eCompareExchange)
	{
		auto *inst = build_dxil_call(
		    DXIL::Op::AtomicCompareExchange,
		    int_type, int_type,
		    get_value(descriptor),
		    coords[0], coords[1], coords[2],
		    get_extracted_composite_component(get_value(op.getOperand(3)), 0),
		    get_extracted_composite_component(get_value(op.getOperand(3)), 1));

		push_instruction(inst, op.getDef());
		return true;
	}

	auto binop = convert_atomic_binop(atomic_op);

	auto *return_type = convert_type(op.getType());
	Value *value;

	if (binop == DXIL::AtomicBinOp::Load)
	{
		value = UndefValue::get(int_type);
	}
	else if (atomic_op == ir::AtomicOp::eInc || atomic_op == ir::AtomicOp::eDec)
	{
		value = get_constant_uint(1);
	}
	else
	{
		value = get_value(op.getOperand(3));
		if (binop != DXIL::AtomicBinOp::Store && op.getType().isVoidType())
			return_type = int_type;
	}

	auto *inst = build_dxil_call(
		DXIL::Op::AtomicBinOp,
		return_type, return_type,
		get_value(descriptor), get_constant_uint(uint32_t(binop)),
		coords[0], coords[1], coords[2],
		value);

	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_image_load(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto kind = itr->second.resource_kind;
	auto *int_type = Type::getInt32Ty(context);

	auto mip = ir::SsaDef(op.getOperand(1));
	auto layer = ir::SsaDef(op.getOperand(2));
	auto coord = ir::SsaDef(op.getOperand(3));
	auto sample = ir::SsaDef(op.getOperand(4));
	auto offset = ir::SsaDef(op.getOperand(5));

	Value *mip_or_sample = nullptr;
	Value *offsets[3] = {};
	Value *coords[3] = {};

	if (kind == DXIL::ResourceKind::TextureCube || kind == DXIL::ResourceKind::TextureCubeArray)
	{
		LOGE("Cubes not allowed for loads.\n");
		return false;
	}

	if (kind == DXIL::ResourceKind::Texture2DMS || kind == DXIL::ResourceKind::Texture2DMSArray)
		mip_or_sample = get_value(sample);
	else if (itr->second.resource_type == DXIL::ResourceType::SRV)
		mip_or_sample = get_value(mip);

	unsigned coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize();

	for (unsigned c = 0; c < coord_components; c++)
	{
		coords[c] = get_extracted_composite_component(get_value(coord), c);
		if (offset)
			offsets[c] = get_extracted_composite_component(get_value(offset), c);
	}

	if (kind == DXIL::ResourceKind::Texture1DArray ||
	    kind == DXIL::ResourceKind::Texture2DArray ||
	    kind == DXIL::ResourceKind::Texture2DMSArray)
	{
		coords[coord_components] = get_value(layer);
	}

	if (!mip_or_sample)
		mip_or_sample = UndefValue::get(int_type);

	for (auto &off : offsets)
		if (!off)
			off = UndefValue::get(int_type);

	for (auto &c : coords)
		if (!c)
			c = UndefValue::get(int_type);

	auto *result_type = convert_type(op.getType());
	auto *dxil_result_type = get_composite_return_type(result_type);

	auto *inst = build_dxil_call(
		DXIL::Op::TextureLoad, dxil_result_type, dxil_result_type,
		get_value(descriptor), mip_or_sample,
		coords[0], coords[1], coords[2],
		offsets[0], offsets[1], offsets[2]);
	push_instruction(inst);
	return build_buffer_load_return_composite(op, inst);
}

bool ParseContext::build_image_query_size(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto *result_type = convert_type(op.getType());
	auto *dxil_result_type = VectorType::get(4, Type::getInt32Ty(context));
	auto kind = itr->second.resource_kind;

	auto *inst = build_dxil_call(
		DXIL::Op::GetDimensions, dxil_result_type, dxil_result_type,
		get_value(descriptor),
		op.getOperand(1) ? get_value(op.getOperand(1)) : UndefValue::get(Type::getInt32Ty(context)));
	push_instruction(inst);

	unsigned num_dimensions = op.getType().getSubType(0).getBaseType(0).getVectorSize();

	auto *dims = build_extract_composite(op, inst, num_dimensions);
	push_instruction(dims);

	Value *layers;

	if (kind == DXIL::ResourceKind::Texture1DArray || kind == DXIL::ResourceKind::Texture2DArray ||
	    kind == DXIL::ResourceKind::Texture2DMSArray || kind == DXIL::ResourceKind::TextureCubeArray)
	{
		layers = get_extracted_composite_component(inst, num_dimensions);
	}
	else
	{
		layers = get_constant_uint(1);
	}

	inst = context.construct<CompositeConstructInst>(result_type, Vector<Value *>{ dims, layers });
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_image_query_mips_samples(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto *dxil_result_type = VectorType::get(4, Type::getInt32Ty(context));

	auto *inst = build_dxil_call(
	    DXIL::Op::GetDimensions, dxil_result_type, dxil_result_type,
	    get_value(descriptor), get_constant_uint(0));
	push_instruction(inst);

	// Mips are encoded in the last structure element, for reasons.
	auto *value = get_extracted_composite_component(inst, 3);
	value_map[op.getDef()] = value;
	return true;
}

bool ParseContext::build_image_sample(const ir::Op &op)
{
	auto image_desc = ir::SsaDef(op.getOperand(0));
	auto &resource_op = builder.getOp(image_desc);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto layer = ir::SsaDef(op.getOperand(2));
	auto coord = ir::SsaDef(op.getOperand(3));
	auto offset = ir::SsaDef(op.getOperand(4));
	auto lod_index = ir::SsaDef(op.getOperand(5));
	auto lod_bias = ir::SsaDef(op.getOperand(6));
	auto lod_clamp = ir::SsaDef(op.getOperand(7));
	auto dx = ir::SsaDef(op.getOperand(8));
	auto dy = ir::SsaDef(op.getOperand(9));
	auto dref = ir::SsaDef(op.getOperand(10));

	auto opcode = DXIL::Op::Sample;
	if (lod_index)
		opcode = DXIL::Op::SampleLevel;
	else if (lod_bias)
		opcode = DXIL::Op::SampleBias;
	else if (dx && dy)
		opcode = DXIL::Op::SampleGrad;

	if (op.getType().isScalarType())
	{
		switch (opcode)
		{
		case DXIL::Op::Sample: opcode = DXIL::Op::SampleCmp; break;
		case DXIL::Op::SampleLevel: opcode = DXIL::Op::SampleCmpLevel; break;
		case DXIL::Op::SampleBias: opcode = DXIL::Op::SampleCmpBias; break;
		case DXIL::Op::SampleGrad: opcode = DXIL::Op::SampleCmpGrad; break;
		default: return false;
		}
	}

	unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize();

	Value *coords[4] = {};
	Value *offsets[3] = {};
	Value *ddx[3] = {};
	Value *ddy[3] = {};

	for (unsigned c = 0; c < num_coord_components; c++)
	{
		coords[c] = get_extracted_composite_component(get_value(coord), c);
		if (offset)
			offsets[c] = get_extracted_composite_component(get_value(offset), c);
		if (dx)
			ddx[c] = get_extracted_composite_component(get_value(dx), c);
		if (dy)
			ddy[c] = get_extracted_composite_component(get_value(dy), c);
	}

	switch (itr->second.resource_kind)
	{
	case DXIL::ResourceKind::Texture1DArray:
	case DXIL::ResourceKind::Texture2DArray:
	case DXIL::ResourceKind::TextureCubeArray:
		coords[num_coord_components] = get_value(layer);
		break;

	default:
		break;
	}

	Vector<Value *> values;
	values.push_back(get_value(image_desc));
	values.push_back(get_value(op.getOperand(1))); // sampler
	for (auto *c : coords)
		values.push_back(c ? c : UndefValue::get(Type::getFloatTy(context)));
	for (auto *o : offsets)
		values.push_back(o ? o : UndefValue::get(Type::getInt32Ty(context)));

	if (op.getType().isScalarType())
		values.push_back(get_value(dref));

	if (opcode == DXIL::Op::SampleGrad || opcode == DXIL::Op::SampleCmpGrad)
	{
		for (auto *d : ddx)
			values.push_back(d ? d : UndefValue::get(Type::getFloatTy(context)));
		for (auto *d : ddy)
			values.push_back(d ? d : UndefValue::get(Type::getFloatTy(context)));
	}

	if (opcode == DXIL::Op::SampleBias || opcode == DXIL::Op::SampleCmpBias)
		values.push_back(get_value(lod_bias));

	if (opcode != DXIL::Op::SampleLevel && opcode != DXIL::Op::SampleCmpLevel)
		values.push_back(lod_clamp ? get_value(lod_clamp) : UndefValue::get(Type::getFloatTy(context)));
	else if (lod_index)
		values.push_back(get_value(lod_index));

	auto *result_type = convert_type(op.getType());
	auto *dxil_result_type = get_composite_return_type(result_type);
	auto *inst = build_dxil_call(opcode, dxil_result_type, dxil_result_type, std::move(values));
	push_instruction(inst);
	return build_buffer_load_return_composite(op, inst);
}

bool ParseContext::build_image_gather(const ir::Op &op)
{
	auto image_desc = ir::SsaDef(op.getOperand(0));
	auto &resource_op = builder.getOp(image_desc);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto layer = ir::SsaDef(op.getOperand(2));
	auto coord = ir::SsaDef(op.getOperand(3));
	auto offset = ir::SsaDef(op.getOperand(4));
	auto dref = ir::SsaDef(op.getOperand(5));
	auto comp = uint32_t(op.getOperand(6));

	auto opcode = dref ? DXIL::Op::TextureGatherCmp : DXIL::Op::TextureGather;
	unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize();

	Value *coords[4] = {};
	Value *offsets[2] = {};

	for (unsigned c = 0; c < num_coord_components; c++)
	{
		coords[c] = get_extracted_composite_component(get_value(coord), c);
		if (offset)
			offsets[c] = get_extracted_composite_component(get_value(offset), c);
	}

	switch (itr->second.resource_kind)
	{
	case DXIL::ResourceKind::Texture2DArray:
	case DXIL::ResourceKind::TextureCubeArray:
		coords[num_coord_components] = get_value(layer);
		break;

	default:
		break;
	}

	Vector<Value *> values;
	values.push_back(get_value(image_desc));
	values.push_back(get_value(op.getOperand(1))); // sampler
	for (auto *c : coords)
		values.push_back(c ? c : UndefValue::get(Type::getFloatTy(context)));
	for (auto *o : offsets)
		values.push_back(o ? o : UndefValue::get(Type::getInt32Ty(context)));
	values.push_back(get_constant_uint(comp));
	if (dref)
		values.push_back(get_value(dref));

	auto *result_type = convert_type(op.getType());
	auto *dxil_result_type = get_composite_return_type(result_type);
	auto *inst = build_dxil_call(opcode, dxil_result_type, dxil_result_type, std::move(values));
	push_instruction(inst);
	return build_buffer_load_return_composite(op, inst);
}

bool ParseContext::build_image_compute_lod(const ir::Op &op)
{
	auto image_desc = ir::SsaDef(op.getOperand(0));
	auto &resource_op = builder.getOp(image_desc);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto coord = ir::SsaDef(op.getOperand(2));
	unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize();

	Value *coords[3] = {};
	for (unsigned c = 0; c < num_coord_components; c++)
		coords[c] = get_extracted_composite_component(get_value(coord), c);
	for (unsigned c = num_coord_components; c < 3; c++)
		coords[c] = UndefValue::get(Type::getFloatTy(context));

	// Alternate extended formulation since DXIL is weird.
	auto *inst = build_dxil_call(DXIL::Op::ExtendedCalculateLOD, convert_type(op.getType()), nullptr,
	                             get_value(image_desc), get_value(op.getOperand(1)),
	                             coords[0], coords[1], coords[2]);
	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_buffer_store(const ir::Op &op, DXIL::ResourceKind kind)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));
	auto *int_type = Type::getInt32Ty(context);

	auto *addr_value = get_value(op.getOperand(1));

	Value *first;
	Value *second;

	// TODO: Adjust byte offset.
	if (kind == DXIL::ResourceKind::StructuredBuffer)
	{
		first = get_extracted_composite_component(addr_value, 0);
		second = get_extracted_composite_component(addr_value, 1);
		second = get_constant_mul(second, 4);
	}
	else
	{
		first = addr_value;
		if (kind == DXIL::ResourceKind::RawBuffer)
			first = get_constant_mul(first, 4);
		second = UndefValue::get(int_type);
	}

	auto *value = get_value(op.getOperand(2));
	Value *scalar_values[4];

	auto *scalar_type = value->getType();
	unsigned num_components = 1;
	if (const auto *vec = dyn_cast<VectorType>(scalar_type))
	{
		scalar_type = vec->getElementType();
		num_components = vec->getVectorSize();
	}

	unsigned mask = (1u << num_components) - 1u;

	for (unsigned c = 0; c < num_components; c++)
		scalar_values[c] = get_extracted_composite_component(value, c);
	for (unsigned c = num_components; c < 4; c++)
		scalar_values[c] = UndefValue::get(scalar_type);

	auto *inst = build_dxil_call(DXIL::Op::BufferStore, Type::getVoidTy(context), scalar_type,
	                             get_value(descriptor),
	                             first, second,
	                             scalar_values[0], scalar_values[1], scalar_values[2], scalar_values[3],
	                             get_constant_uint(mask));

	push_instruction(inst);
	return true;
}

bool ParseContext::build_buffer_store(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	return build_buffer_store(op, itr->second.resource_kind);
}

bool ParseContext::build_buffer_atomic_binop(const ir::Op &op, DXIL::ResourceKind kind)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));
	auto *int_type = Type::getInt32Ty(context);
	auto *addr_value = get_value(op.getOperand(1));

	Value *first;
	Value *second;

	// TODO: Adjust byte offset.
	if (kind == DXIL::ResourceKind::StructuredBuffer)
	{
		first = get_extracted_composite_component(addr_value, 0);
		second = get_extracted_composite_component(addr_value, 1);
		second = get_constant_mul(second, 4);
	}
	else
	{
		first = addr_value;
		if (kind == DXIL::ResourceKind::RawBuffer)
			first = get_constant_mul(first, 4);
		second = UndefValue::get(int_type);
	}

	auto atomic_op = ir::AtomicOp(op.getOperand(op.getFirstLiteralOperandIndex()));

	Value *value;
	auto *return_type = convert_type(op.getType());

	if (atomic_op == ir::AtomicOp::eCompareExchange)
	{
		auto *inst = build_dxil_call(
		    DXIL::Op::AtomicCompareExchange,
		    int_type, int_type,
		    get_value(descriptor),
		    first, second, UndefValue::get(int_type),
		    get_extracted_composite_component(get_value(op.getOperand(2)), 0),
		    get_extracted_composite_component(get_value(op.getOperand(2)), 1));

		push_instruction(inst, op.getDef());
		return true;
	}

	auto binop = convert_atomic_binop(atomic_op);

	if (binop == DXIL::AtomicBinOp::Load)
	{
		value = UndefValue::get(int_type);
	}
	else if (atomic_op == ir::AtomicOp::eInc || atomic_op == ir::AtomicOp::eDec)
	{
		value = get_constant_uint(1);
	}
	else
	{
		value = get_value(op.getOperand(2));
		if (binop != DXIL::AtomicBinOp::Store && op.getType().isVoidType())
			return_type = int_type;
	}

	auto *inst = build_dxil_call(
		DXIL::Op::AtomicBinOp, return_type, return_type,
		get_value(descriptor),
		get_constant_uint(uint32_t(binop)),
		first, second, UndefValue::get(int_type),
		value);

	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_lds_atomic(const ir::Op &op)
{
	auto *lds = get_value(op.getOperand(0));
	Vector<Value *> args;
	args.push_back(lds);
	args.push_back(get_constant_uint(0));

	if (op.getOperand(1))
	{
		auto &addr = builder.getOp(ir::SsaDef(op.getOperand(1)));
		auto *addr_value = get_value(op.getOperand(1));
		for (uint32_t i = 0; i < addr.getType().getBaseType(0).getVectorSize(); i++)
			args.push_back(get_extracted_composite_component(addr_value, i));
	}

	Type *type;
	if (!op.getType().isVoidType())
		type = convert_type(op.getType());
	else
		type = convert_type(builder.getOp(ir::SsaDef(op.getOperand(2))).getType());

	auto *gep = context.construct<GetElementPtrInst>(
		PointerType::get(type, uint32_t(DXIL::AddressSpace::GroupShared)), std::move(args), true);
	push_instruction(gep);

	auto *value = get_value(op.getOperand(2));
	auto atomic_op = ir::AtomicOp(op.getOperand(3));

	if (atomic_op == ir::AtomicOp::eCompareExchange)
	{
		auto *inst = context.construct<AtomicCmpXchgInst>(
			gep,
			get_extracted_composite_component(value, 0),
			get_extracted_composite_component(value, 1), type);

		push_instruction(inst, op.getDef());
	}
	else
	{
		if (atomic_op == ir::AtomicOp::eInc || atomic_op == ir::AtomicOp::eDec)
			value = get_constant_uint(1);
		else if (atomic_op == ir::AtomicOp::eLoad)
			value = get_constant_uint(0);

		assert(value);

		auto *inst = context.construct<AtomicRMWInst>(
		    type, gep, value, convert_atomic_binop_llvm(atomic_op));
		push_instruction(inst, op.getDef());
	}

	return true;
}

bool ParseContext::build_buffer_atomic(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	return build_buffer_atomic_binop(op, itr->second.resource_kind);
}

bool ParseContext::build_counter_atomic(const ir::Op &op)
{
	auto &load_desc_op = builder.getOp(ir::SsaDef(op.getOperand(0)));
	auto counter_descriptor = ir::SsaDef(load_desc_op.getOperand(0));
	auto *int_type = Type::getInt32Ty(context);

	auto &counter_resource_op = builder.getOp(counter_descriptor);
	auto descriptor = ir::SsaDef(counter_resource_op.getOperand(1));
	auto itr = resource_map.find(descriptor);
	if (itr == resource_map.end())
		return false;

	auto *inst = build_dxil_call(
	        DXIL::Op::BufferUpdateCounter, int_type, int_type,
	        get_value(load_desc_op.getDef()),
	        get_constant_uint(ir::AtomicOp(op.getOperand(1)) == ir::AtomicOp::eInc ? 1 : -1));

	push_instruction(inst, op.getDef());
	return true;
}

bool ParseContext::build_buffer_query_size(const ir::Op &op)
{
	auto descriptor = ir::SsaDef(op.getOperand(0));

	auto &resource_op = builder.getOp(descriptor);
	auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0)));
	if (itr == resource_map.end())
		return false;

	auto *result_type = convert_type(op.getType());
	auto *vec4_type = get_vec4_variant(result_type);

	// Fold in the mul + div into a plain OpArrayLength.
	auto *inst = build_dxil_call(
		DXIL::Op::ExtendedGetDimensions, vec4_type, nullptr,
		get_value(descriptor),
		UndefValue::get(Type::getInt32Ty(context)),
	    get_constant_uint(itr->second.resource_kind == DXIL::ResourceKind::RawBuffer ? 4 : 1));

	push_instruction(inst);

	auto *value = get_extracted_composite_component(inst, 0);
	value_map[op.getDef()] = value;

	return true;
}

Instruction *ParseContext::build_descriptor_load(ir::SsaDef resource, ir::SsaDef index, bool nonuniform)
{
	auto itr = resource_map.find(resource);
	if (itr == resource_map.end())
		return nullptr;

	// Dummy pointer type which represents handles.
	// It's not directly used.
	auto *ptr_type = PointerType::get(Type::getVoidTy(context), 0);
	auto *bool_type = Type::getInt1Ty(context);

	Value *binding_offset;

	if (index)
	{
		auto *dynamic_offset = get_value(index);
		if (const auto *const_offset = llvm::dyn_cast<ConstantInt>(dynamic_offset))
		{
			binding_offset = get_constant_uint(
				const_offset->getUniqueInteger().getZExtValue() +
				itr->second.binding_offset);
		}
		else if (itr->second.binding_offset)
		{
			// SM 5.1 bindless.
			auto *add = context.construct<BinaryOperator>(dynamic_offset,
			                                              get_constant_uint(itr->second.binding_offset),
			                                              BinaryOperator::BinaryOps::Add);
			push_instruction(add);
			binding_offset = add;
		}
		else
		{
			binding_offset = dynamic_offset;
		}
	}
	else
	{
		// DXIL is a bit silly and takes effective register index instead of offset into binding space.
		binding_offset = get_constant_uint(itr->second.binding_offset);
	}

	return build_dxil_call(DXIL::Op::CreateHandle, ptr_type, nullptr,
	                       get_constant_uint(uint32_t(itr->second.resource_type)),
	                       get_constant_uint(itr->second.index),
	                       binding_offset,
	                       ConstantInt::get(bool_type, nonuniform));
}

MDOperand *ParseContext::create_null_meta()
{
	return context.construct<MDOperand>(&module, MetadataKind::None);
}

MDNode *ParseContext::create_md_node(Vector<MDOperand *> ops)
{
	auto *node = context.construct<MDNode>(&module, std::move(ops));
	node->set_tween_id(++metadata_tween_id);
	module.add_unnamed_metadata(node);
	return node;
}

void ParseContext::create_named_md_node(const String &name, MDNode *node)
{
	Vector<MDNode *> vops { node };
	auto *n = context.construct<NamedMDNode>(&module, name, std::move(vops));
	module.add_named_metadata(name, n);
}

MDNode *ParseContext::create_stage_io_meta()
{
	struct IOOp
	{
		const ir::Op *op;
		std::string semantic;
		uint32_t index;
	};
	std::vector<IOOp> io_inputs, io_outputs, io_patch;

	for (auto &op : builder)
	{
		switch (op.getOpCode())
		{
		case ir::OpCode::eDclInput:
		case ir::OpCode::eDclInputBuiltIn:
			// For user-IO the general rule is that if it's an array it's a control point of some kind.
			// Multiple rows for stage IO is not used except for certain builtins.
			if (io_decl_is_patch(shader_stage, op))
				io_patch.push_back({ &op });
			else
				io_inputs.push_back({ &op });
			break;

		case ir::OpCode::eDclOutput:
		case ir::OpCode::eDclOutputBuiltIn:
			// For user-IO the general rule is that if it's an array it's a control point of some kind.
			// Multiple rows for stage IO is not used except for certain builtins.
			if (io_decl_is_patch(shader_stage, op))
				io_patch.push_back({ &op });
			else
				io_outputs.push_back({ &op });
			break;

		case ir::OpCode::eSemantic:
		{
			std::vector<IOOp> *sems[] = { &io_inputs, &io_outputs, &io_patch };
			for (auto *sem : sems)
			{
				for (auto &ioop : *sem)
				{
					if (ioop.op->getDef() == ir::SsaDef(op.getOperand(0)))
					{
						ioop.index = uint32_t(op.getOperand(1));
						ioop.semantic = op.getLiteralString(2);
					}
				}
			}
			break;
		}

		default:
			break;
		}
	}

	const struct
	{
		std::vector<IOOp> *ioop;
		MetadataMapping *mapping;
	} mappings[] = {
		{ &io_inputs, &inputs },
		{ &io_outputs, &outputs },
		{ &io_patch, &patches },
	};

	for (auto &mapping : mappings)
	{
		for (auto &io : *mapping.ioop)
		{
			DXIL::Semantic builtin = shader_stage == ir::ShaderStage::ePixel &&
			                         io.op->getOpCode() == ir::OpCode::eDclOutput ?
			                         DXIL::Semantic::Target : DXIL::Semantic::User;

			uint32_t location, component;
			uint32_t stream = UINT32_MAX;

			bool is_input =
				io.op->getOpCode() == ir::OpCode::eDclInput ||
				io.op->getOpCode() == ir::OpCode::eDclInputBuiltIn;

			bool is_user =
				io.op->getOpCode() == ir::OpCode::eDclInput ||
				io.op->getOpCode() == ir::OpCode::eDclOutput;

			if (is_user)
			{
				location = uint32_t(io.op->getOperand(1));
				component = uint32_t(io.op->getOperand(2));
				if (!is_input && io.op->getOperandCount() == 4)
					stream = uint32_t(io.op->getOperand(3));
			}
			else
			{
				builtin = convert_semantic(ir::BuiltIn(io.op->getOperand(1)));
				location = UINT32_MAX;
				component = UINT32_MAX;

				if (!is_input && io.op->getOperandCount() == 3)
					stream = uint32_t(io.op->getOperand(2));

				if (builtin == DXIL::Semantic::Depth)
				{
					for_all_opcodes(builder, ir::OpCode::eSetPsDepthLessEqual,
					                [&](const ir::Op &op) { builtin = DXIL::Semantic::DepthLessEqual; return false; });
					for_all_opcodes(builder, ir::OpCode::eSetPsDepthGreaterEqual,
					                [&](const ir::Op &op) { builtin = DXIL::Semantic::DepthGreaterEqual; return false; });
				}

				if (io.op->getOpCode() == ir::OpCode::eDclInputBuiltIn)
				{
					// Some stage IO builtins are resolved through opcodes, not IO.
					auto op = convert_builtin_opcode(ir::BuiltIn(io.op->getOperand(1)));

					if (builtin == DXIL::Semantic::Coverage)
						op = DXIL::Op::Coverage;

					if (op != DXIL::Op::Count)
					{
						stage_io_map[io.op->getDef()] = { UINT32_MAX, op, false };
						continue;
					}
				}
			}

			auto interpolation = DXIL::InterpolationMode::Invalid;
			if (is_input)
				interpolation = convert_interpolation_mode(ir::InterpolationMode(io.op->getOperand(is_user ? 3 : 2)));

			bool is_geom = shader_stage == ir::ShaderStage::eGeometry;
			bool is_tess = shader_stage == ir::ShaderStage::eHull || shader_stage == ir::ShaderStage::eDomain;
			bool is_geom_tess_input = is_input && (is_geom || is_tess);
			bool is_hull_output = !is_input && shader_stage == ir::ShaderStage::eHull;

			// TessFactors is the exception since it's a patch array.
			bool need_axis = io.op->getType().isArrayType() &&
			                 (builtin != DXIL::Semantic::TessFactor && builtin != DXIL::Semantic::InsideTessFactor) &&
			                 (is_geom_tess_input || is_hull_output);

			auto comp = convert_component_mapping(io.op->getType(), need_axis);
			build_stage_io(*mapping.mapping, io.op->getDef(), String(io.semantic),
			               comp.type, builtin, io.index, interpolation,
			               comp.num_rows, comp.num_cols, location, component, stream, need_axis);
		}
	}

	return create_md_node(
		inputs.nodes.empty() ? create_null_meta() : create_md_node(inputs.nodes),
		outputs.nodes.empty() ? create_null_meta() : create_md_node(outputs.nodes),
		patches.nodes.empty() ? create_null_meta() : create_md_node(patches.nodes));
}

MDOperand *ParseContext::create_entry_point_meta(Function *patch_control_func)
{
	Vector<MDOperand *> flag_ops;

	uint64_t shader_flags = 0;
	flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::ShaderFlags)));

	if (shader_stage == ir::ShaderStage::ePixel)
	{
		for_all_opcodes(builder, ir::OpCode::eSetPsEarlyFragmentTest, [&](const ir::Op &) {
			shader_flags |= DXIL::ShaderFlagEarlyDepthStencil;
			return false;
		});
	}

	flag_ops.push_back(create_constant_uint64_meta(shader_flags));

	if (shader_stage == ir::ShaderStage::eCompute)
	{
		flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::NumThreads)));
		const ir::Op *threads = nullptr;
		for_all_opcodes(builder, ir::OpCode::eSetCsWorkgroupSize,
		                [&](const ir::Op &op) { threads = &op; return false; });

		if (!threads)
		{
			LOGE("Need to declare threads.\n");
			return nullptr;
		}

		flag_ops.push_back(create_md_node(
		    create_constant_uint_meta(uint32_t(threads->getOperand(1))),
		    create_constant_uint_meta(uint32_t(threads->getOperand(2))),
		    create_constant_uint_meta(uint32_t(threads->getOperand(3)))));
	}
	else if (shader_stage == ir::ShaderStage::eGeometry)
	{
		flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::GSState)));

		ir::PrimitiveType input_primitive = {};
		ir::PrimitiveType output_primitive = {};
		uint32_t stream_mask = 0;
		uint32_t instances = 0;
		uint32_t output_vertices = 0;

		for (auto &op : builder)
		{
			switch (op.getOpCode())
			{
			case ir::OpCode::eSetGsInstances:
				instances = uint32_t(op.getOperand(1));
				break;

			case ir::OpCode::eSetGsOutputVertices:
				output_vertices = uint32_t(op.getOperand(1));
				break;

			case ir::OpCode::eSetGsInputPrimitive:
				input_primitive = ir::PrimitiveType(op.getOperand(1));
				break;

			case ir::OpCode::eSetGsOutputPrimitive:
				output_primitive = ir::PrimitiveType(op.getOperand(1));
				stream_mask = uint32_t(op.getOperand(2));
				break;

			default:
				break;
			}
		}

		flag_ops.push_back(create_md_node(
		    create_constant_uint_meta(uint32_t(convert_input_primitive_type(input_primitive))),
		    create_constant_uint_meta(output_vertices),
		    create_constant_uint_meta(stream_mask),
		    create_constant_uint_meta(uint32_t(convert_output_primitive_type(output_primitive))),
		    create_constant_uint_meta(instances)));
	}
	else if (shader_stage == ir::ShaderStage::eHull)
	{
		ir::PrimitiveType prim = {};
		ir::PrimitiveType domain = {};
		ir::TessWindingOrder winding = {};
		ir::TessPartitioning partitioning = {};
		uint32_t input_control_points = 0;
		uint32_t output_control_points = 0;

		for (auto &op : builder)
		{
			switch (op.getOpCode())
			{
			case ir::OpCode::eSetTessControlPoints:
				input_control_points = uint32_t(op.getOperand(1));
				output_control_points = uint32_t(op.getOperand(2));
				break;

			case ir::OpCode::eSetTessPrimitive:
				prim = ir::PrimitiveType(op.getOperand(1));
				winding = ir::TessWindingOrder(op.getOperand(2));
				partitioning = ir::TessPartitioning(op.getOperand(3));
				break;

			case ir::OpCode::eSetTessDomain:
				domain = ir::PrimitiveType(op.getOperand(1));
				break;

			default:
				break;
			}
		}

		flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::HSState)));
		flag_ops.push_back(create_md_node(
		    patch_control_func ? create_constant_meta(patch_control_func) : create_null_meta(),
		    create_constant_uint_meta(input_control_points),
		    create_constant_uint_meta(output_control_points),
		    create_constant_uint_meta(uint32_t(convert_hull_domain(domain))),
		    create_constant_uint_meta(uint32_t(convert_hull_partitioning(partitioning))),
		    create_constant_uint_meta(uint32_t(convert_hull_output_primitive(prim, winding)))));
	}
	else if (shader_stage == ir::ShaderStage::eDomain)
	{
		ir::PrimitiveType domain = {};
		for_all_opcodes(builder, ir::OpCode::eSetTessDomain, [&](const ir::Op &op) {
			domain = ir::PrimitiveType(op.getOperand(1));
			return false;
		});
		flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::DSState)));
		flag_ops.push_back(create_md_node(
		    create_constant_uint_meta(uint32_t(convert_hull_domain(domain))),
		    create_constant_uint_meta(32 /* somewhat irrelevant? */)));
	}

	return flag_ops.empty() ? create_null_meta() : create_md_node(std::move(flag_ops));
}

void ParseContext::set_function_attributes(Function *func)
{
	Vector<std::pair<String, String>> attrs;

	for_all_opcodes(builder, ir::OpCode::eSetFpMode, [&](const ir::Op &op) {
		auto round = ir::RoundMode(op.getOperand(1));
		auto denorm = ir::DenormMode(op.getOperand(2));

		const char *round_mode = nullptr;
		const char *denorm_mode = nullptr;

		switch (op.getType().getBaseType(0).getBaseType())
		{
		case ir::ScalarType::eF16:
			round_mode = "dxbc-fp16-round-mode";
			denorm_mode = "dxbc-fp16-denorm-mode";
			break;

		case ir::ScalarType::eF32:
			round_mode = "dxbc-fp32-round-mode";
			denorm_mode = "dxbc-fp32-denorm-mode";
			break;

		case ir::ScalarType::eF64:
			round_mode = "dxbc-fp64-round-mode";
			denorm_mode = "dxbc-fp64-denorm-mode";
			break;

		default:
			break;
		}

		if (round == ir::RoundMode::eZero)
			attrs.emplace_back(round_mode, "rtz");
		else if (round == ir::RoundMode::eNearestEven)
			attrs.emplace_back(round_mode, "rte");

		if (denorm == ir::DenormMode::eFlush)
			attrs.emplace_back(denorm_mode, "ftz");
		else if (denorm == ir::DenormMode::ePreserve)
			attrs.emplace_back(denorm_mode, "preserve");

		global_fp_flags |= op.getFlags();
		return true;
	});

	func->set_attributes(std::move(attrs));
}

bool ParseContext::emit_entry_point()
{
	const ir::Op *entry = nullptr;
	for_all_opcodes(builder, ir::OpCode::eEntryPoint, [&](const ir::Op &op) { entry = &op; return false; });
	if (!entry)
		return false;

	shader_stage = ir::ShaderStage(entry->getOperand(entry->getFirstLiteralOperandIndex()));

	Function *patch_control_func = nullptr;

	// Process patch constant func first so we can emit metadata.
	for (uint32_t i_plus1 = entry->getFirstLiteralOperandIndex(); i_plus1; i_plus1--)
	{
		auto i = i_plus1 - 1;
		auto ssa = ir::SsaDef(entry->getOperand(i));

		Type *type = convert_type(entry->getType());

		// Entry points don't take arguments.
		auto *func_type = context.construct<FunctionType>(context, type, Vector<Type *>{});
		auto *func = context.construct<Function>(func_type, ++tween_id, module);
		module.add_value_name(tween_id, i == 0 ? "main" : "patchMain");

		if (i == 1)
			patch_control_func = func;

		// We're not barbarians.
		func->set_structured_control_flow();

		function_map[ssa] = func;

		if (i == 0)
		{
			create_named_md_node("dx.entryPoints",
			                     create_md_node(create_constant_meta(func), create_string_meta("main"),
			                                    create_stage_io_meta(), create_null_meta(),
			                                    create_entry_point_meta(patch_control_func)));

			set_function_attributes(func);
		}
	}

	auto *name = create_string_meta("dxbc-spirv");
	create_named_md_node("llvm.ident", create_md_node(name));

	const char *stage_str = shader_stage_to_meta(shader_stage);
	auto *stage_type = create_string_meta(stage_str);
	auto *major = create_constant_uint_meta(6);
	auto *minor = create_constant_uint_meta(0);
	create_named_md_node("dx.shaderModel", create_md_node(stage_type, major, minor));
	return true;
}

uint32_t ParseContext::build_texture_srv(
    uint32_t space, uint32_t index, uint32_t size,
    DXIL::ResourceKind kind, DXIL::ComponentType type)
{
	uint32_t ret = srvs.nodes.size();
	auto *srv = create_md_node(
	    create_constant_uint_meta(ret),
	    create_null_meta(),
	    create_string_meta(""),
	    create_constant_uint_meta(space),
	    create_constant_uint_meta(index),
	    create_constant_uint_meta(size),
	    create_constant_uint_meta(uint32_t(kind)),
	    create_null_meta(), // SRV sample count? We don't care about that.
	    create_md_node(
	        create_constant_uint_meta(0),
	        create_constant_uint_meta(uint32_t(type))));

	srvs.nodes.push_back(srv);
	return ret;
}

uint32_t ParseContext::build_texture_uav(
    uint32_t space, uint32_t index, uint32_t size,
    DXIL::ResourceKind kind, DXIL::ComponentType type,
    bool coherent, bool rov)
{
	uint32_t ret = uavs.nodes.size();

	auto *uav = create_md_node(
	    create_constant_uint_meta(ret),
	    create_null_meta(),
	    create_string_meta(""),
	    create_constant_uint_meta(space),
	    create_constant_uint_meta(index),
	    create_constant_uint_meta(size),
	    create_constant_uint_meta(uint32_t(kind)),
	    create_constant_uint_meta(coherent),
	    create_constant_uint_meta(false),
	    create_constant_uint_meta(rov),
	    create_md_node(
	        create_constant_uint_meta(0),
	        create_constant_uint_meta(uint32_t(type))));

	uavs.nodes.push_back(uav);
	return ret;
}

uint32_t ParseContext::build_buffer_uav(
    uint32_t space, uint32_t index, uint32_t size,
    DXIL::ResourceKind kind, uint32_t stride,
    bool coherent, bool counter, bool rov)
{
	uint32_t ret = uavs.nodes.size();

	auto *uav = create_md_node(
	    create_constant_uint_meta(ret),
	    create_null_meta(),
	    create_string_meta(""),
	    create_constant_uint_meta(space),
	    create_constant_uint_meta(index),
	    create_constant_uint_meta(size),
	    create_constant_uint_meta(uint32_t(kind)),
	    create_constant_uint_meta(coherent),
	    create_constant_uint_meta(counter),
	    create_constant_uint_meta(rov),
	    create_md_node(
	        create_constant_uint_meta(1),
	        create_constant_uint_meta(stride)));

	uavs.nodes.push_back(uav);
	return ret;
}

uint32_t ParseContext::build_buffer_srv(
    uint32_t space, uint32_t index, uint32_t size,
    DXIL::ResourceKind kind, uint32_t stride)
{
	uint32_t ret = srvs.nodes.size();
	auto *srv = create_md_node(
	    create_constant_uint_meta(ret),
	    create_null_meta(),
	    create_string_meta(""),
	    create_constant_uint_meta(space),
	    create_constant_uint_meta(index),
	    create_constant_uint_meta(size),
	    create_constant_uint_meta(uint32_t(kind)),
	    create_null_meta(), // SRV sample count? We don't care about that.
	    create_md_node(
	        create_constant_uint_meta(1),
	        create_constant_uint_meta(stride)));

	srvs.nodes.push_back(srv);
	return ret;
}

uint32_t ParseContext::build_sampler(uint32_t space, uint32_t index, uint32_t size)
{
	uint32_t ret = samplers.nodes.size();
	auto *sampler = create_md_node(
	    create_constant_uint_meta(ret),
	    create_null_meta(),
	    create_string_meta(""),
	    create_constant_uint_meta(space),
	    create_constant_uint_meta(index),
	    create_constant_uint_meta(size));

	samplers.nodes.push_back(sampler);
	return ret;
}

uint32_t ParseContext::build_cbv(
    uint32_t space, uint32_t index, uint32_t size, uint32_t cbv_size)
{
	uint32_t ret = cbvs.nodes.size();
	auto *sampler = create_md_node(
	    create_constant_uint_meta(ret),
	    create_null_meta(),
	    create_string_meta(""),
	    create_constant_uint_meta(space),
	    create_constant_uint_meta(index),
	    create_constant_uint_meta(size),
	    create_constant_uint_meta(cbv_size));

	cbvs.nodes.push_back(sampler);
	return ret;
}

uint32_t ParseContext::build_stage_io(
    MetadataMapping &mapping, ir::SsaDef ssa,
	const String &name, DXIL::ComponentType type, DXIL::Semantic semantic, uint32_t semantic_index,
    DXIL::InterpolationMode interpolation,
    uint32_t rows, uint32_t cols,
    uint32_t start_row, uint32_t start_col, uint32_t stream, bool need_axis)
{
	uint32_t ret = mapping.nodes.size();

	stage_io_map[ssa] = { ret, DXIL::Op::Count, need_axis };

	MDOperand *stream_meta;

	if (stream != UINT32_MAX)
	{
		stream_meta = create_md_node(
			create_constant_uint_meta(uint32_t(DXIL::GSStageOutTags::Stream)),
			create_constant_uint_meta(stream));
	}
	else
		stream_meta = create_null_meta();

	auto *input = create_md_node(
		create_constant_uint_meta(ret),
		create_string_meta(name),
		create_constant_uint_meta(uint32_t(type)),
		create_constant_uint_meta(uint32_t(semantic)),
		semantic_index ? create_md_node(create_constant_uint_meta(semantic_index)) : create_null_meta(),
		create_constant_uint_meta(uint32_t(interpolation)),
		create_constant_uint_meta(rows),
		create_constant_uint_meta(cols),
		create_constant_uint_meta(start_row),
		create_constant_uint_meta(start_col),
	    stream_meta);

	mapping.nodes.push_back(input);
	return ret;
}

bool ParseContext::emit_metadata()
{
	UnorderedSet<ir::SsaDef> uav_counters;
	for (auto &op : builder)
		if (op.getOpCode() == ir::OpCode::eDclUavCounter)
			uav_counters.insert(ir::SsaDef(op.getOperand(1)));

	for (auto &op : builder)
	{
		switch (op.getOpCode())
		{
		case ir::OpCode::eDclCbv:
		{
			uint32_t space = uint32_t(op.getOperand(1));
			uint32_t binding = uint32_t(op.getOperand(2));
			uint32_t count = uint32_t(op.getOperand(3));
			if (!count)
				count = UINT32_MAX;

			uint32_t cbv_size = op.getType().byteSize();
			uint32_t index = build_cbv(space, binding, count, cbv_size);
			resource_map[op.getDef()] = { DXIL::ResourceType::CBV, DXIL::ResourceKind::CBuffer, index, binding };
			break;
		}

		case ir::OpCode::eDclSampler:
		{
			uint32_t space = uint32_t(op.getOperand(1));
			uint32_t binding = uint32_t(op.getOperand(2));
			uint32_t count = uint32_t(op.getOperand(3));
			if (!count)
				count = UINT32_MAX;

			uint32_t index = build_sampler(space, binding, count);
			resource_map[op.getDef()] = { DXIL::ResourceType::Sampler, DXIL::ResourceKind::Sampler, index, binding };
			break;
		}

		case ir::OpCode::eDclSrv:
		case ir::OpCode::eDclUav:
		{
			uint32_t space = uint32_t(op.getOperand(1));
			uint32_t binding = uint32_t(op.getOperand(2));
			uint32_t count = uint32_t(op.getOperand(3));
			if (!count)
				count = UINT32_MAX;

			auto kind = convert_resource_kind(ir::ResourceKind(uint32_t(op.getOperand(4))));
			bool srv = op.getOpCode() == ir::OpCode::eDclSrv;
			uint32_t index;

			ir::UavFlag uav_flags = {};
			if (!srv)
				uav_flags = ir::UavFlag(op.getOperand(5));

			if (kind == DXIL::ResourceKind::RawBuffer || kind == DXIL::ResourceKind::StructuredBuffer)
			{
				uint32_t stride = 0;

				if (kind == DXIL::ResourceKind::StructuredBuffer)
				{
					if (op.getType().getArrayDimensions() != 2)
					{
						LOGE("Expected 2 array dimensions.\n");
						return false;
					}

					stride = op.getType().getArraySize(0) * 4;
				}
				else
				{
					if (op.getType().getArrayDimensions() != 1)
					{
						LOGE("Expected 1 array dimension.\n");
						return false;
					}
				}

				if (op.getType().getArraySize(op.getType().getArrayDimensions() - 1) != 0)
				{
					LOGE("Last dimension must be unsized.\n");
					return false;
				}

				if (op.getType().getBaseType(0).byteSize() != 4)
				{
					LOGE("Expected 4 byte base type for raw buffers.\n");
					return false;
				}

				if (srv)
				{
					index = build_buffer_srv(space, binding, count, kind, stride);
				}
				else
				{
					index = build_buffer_uav(space, binding, count, kind, stride,
					                         bool(uav_flags & ir::UavFlag::eCoherent),
					                         uav_counters.count(op.getDef()) != 0,
					                         bool(uav_flags & ir::UavFlag::eRasterizerOrdered));
				}
			}
			else
			{
				auto mapping = convert_component_mapping(op.getType(), false);

				if (srv)
				{
					index = build_texture_srv(space, binding, count, kind, mapping.type);
				}
				else
				{
					index = build_texture_uav(space, binding, count, kind, mapping.type,
					                          bool(uav_flags & ir::UavFlag::eCoherent),
					                          bool(uav_flags & ir::UavFlag::eRasterizerOrdered));
				}
			}

			resource_map[op.getDef()] = {
				srv ? DXIL::ResourceType::SRV : DXIL::ResourceType::UAV, kind, index, binding
			};

			break;
		}

		default:
			break;
		}
	}

	create_named_md_node("dx.resources", create_md_node(
		srvs.nodes.empty() ? create_null_meta() : create_md_node(srvs.nodes),
		uavs.nodes.empty() ? create_null_meta() : create_md_node(uavs.nodes),
		cbvs.nodes.empty() ? create_null_meta() : create_md_node(cbvs.nodes),
		samplers.nodes.empty() ? create_null_meta() : create_md_node(samplers.nodes)));

	return true;
}

bool ParseContext::emit_function_bodies()
{
	Vector<BasicBlock *> bbs;
	Function *func = nullptr;

	for (auto &op : builder)
	{
		switch (op.getOpCode())
		{
		case ir::OpCode::eEntryPoint:
		case ir::OpCode::eDebugName:
		case ir::OpCode::eDebugMemberName:
			break;

		case ir::OpCode::eDclSpecConstant:
		case ir::OpCode::eDclPushData:
		case ir::OpCode::eDclTmp:
		case ir::OpCode::eScopedIf:
		case ir::OpCode::eScopedElse:
		case ir::OpCode::eScopedEndIf:
		case ir::OpCode::eScopedLoop:
		case ir::OpCode::eScopedLoopBreak:
		case ir::OpCode::eScopedLoopContinue:
		case ir::OpCode::eScopedEndLoop:
		case ir::OpCode::eScopedSwitch:
		case ir::OpCode::eScopedSwitchCase:
		case ir::OpCode::eScopedSwitchDefault:
		case ir::OpCode::eScopedSwitchBreak:
		case ir::OpCode::eScopedEndSwitch:
		case ir::OpCode::eConsumeAs:
		case ir::OpCode::eTmpLoad:
		case ir::OpCode::eTmpStore:
		case ir::OpCode::ePushDataLoad:
		case ir::OpCode::eMemoryLoad:
		case ir::OpCode::eMemoryStore:
		case ir::OpCode::eMemoryAtomic:
		case ir::OpCode::ePointer:
		case ir::OpCode::eFMulLegacy:
		case ir::OpCode::eFMadLegacy:
		case ir::OpCode::eFDot:
		case ir::OpCode::eFDotLegacy:
		case ir::OpCode::eFPowLegacy:
		case ir::OpCode::eUMSad:
		case ir::OpCode::eDrain:
			LOGE("Opcode %u should not appear in final IR at this point.\n", unsigned(op.getOpCode()));
			return false;

		case ir::OpCode::eDclXfb:
		case ir::OpCode::eRovScopedLockBegin:
		case ir::OpCode::eRovScopedLockEnd:
			// Should not appear, but we can just ignore it since it has no semantic impact at this stage.
			// ROV is done automatically by dxil-spirv path already, so ignore that here.
			break;

		case ir::OpCode::eDclInput:
		case ir::OpCode::eDclInputBuiltIn:
		case ir::OpCode::eDclOutput:
		case ir::OpCode::eDclOutputBuiltIn:
		case ir::OpCode::eDclSrv:
		case ir::OpCode::eDclUav:
		case ir::OpCode::eDclUavCounter:
		case ir::OpCode::eDclCbv:
		case ir::OpCode::eDclSampler:
		case ir::OpCode::eSemantic:
		case ir::OpCode::eSetCsWorkgroupSize:
		case ir::OpCode::eSetPsDepthGreaterEqual:
		case ir::OpCode::eSetPsDepthLessEqual:
		case ir::OpCode::eSetGsInputPrimitive:
		case ir::OpCode::eSetGsOutputPrimitive:
		case ir::OpCode::eSetGsOutputVertices:
		case ir::OpCode::eSetGsInstances:
		case ir::OpCode::eSetTessControlPoints:
		case ir::OpCode::eSetTessDomain:
		case ir::OpCode::eSetTessPrimitive:
		case ir::OpCode::eSetFpMode:
		case ir::OpCode::eSetPsEarlyFragmentTest:
			break;

		case ir::OpCode::eConstant:
			if (!emit_constant(op))
				return false;
			break;

		case ir::OpCode::eUndef:
			value_map[op.getDef()] = UndefValue::get(convert_type(op.getType()));
			break;

		// Functions
		case ir::OpCode::eDclParam:
			param_types[op.getDef()] = convert_type(op.getType());
			break;

		case ir::OpCode::eFunction:
		{
			auto itr = function_map.find(op.getDef());
			if (itr == function_map.end())
			{
				Type *type = convert_type(op.getType());

				Vector<Type *> types;
				types.reserve(op.getOperandCount());
				params.clear();

				for (unsigned i = 0; i < op.getOperandCount(); i++)
				{
					auto *param_type = param_types[ir::SsaDef(op.getOperand(i))];
					types.push_back(param_type);
					params.emplace_back(ir::SsaDef(op.getOperand(i)), param_type);
				}

				auto *func_type = context.construct<FunctionType>(context, type, types);
				func = context.construct<Function>(func_type, ++tween_id, module);

				for (unsigned i = 0; i < op.getOperandCount(); i++)
					func->add_argument(context.construct<Argument>(types[i], i));

				func->set_structured_control_flow();
				function_map[op.getDef()] = func;
			}
			else
			{
				func = itr->second;
			}
			break;
		}

		case ir::OpCode::eFunctionEnd:
			if (!func)
			{
				LOGE("Cannot end function without a function.\n");
				return false;
			}

			func->set_basic_blocks(std::move(bbs));
			module.add_function_implementation(func);
			bbs = {};
			break;

		case ir::OpCode::eParamLoad:
		{
			if (!func)
			{
				LOGE("Cannot get parameter without a function.\n");
				return false;
			}

			auto &func_op = builder.getOp(ir::SsaDef(op.getOperand(0)));
			auto param = ir::SsaDef(op.getOperand(1));

			auto arg_iter = func->arg_begin();
			for (uint32_t i = 0; i < func_op.getOperandCount(); i++, ++arg_iter)
				if (ir::SsaDef(func_op.getOperand(i)) == param)
					break;

			if (arg_iter == func->arg_end())
				return false;

			auto &arg = *arg_iter;
			value_map[op.getDef()] = const_cast<Argument *>(&arg);
			break;
		}

		// Basic Blocks
		case ir::OpCode::eLabel:
		{
			auto *bb = get_basic_block(op.getDef());
			current_bb = bb;
			bbs.push_back(bb);

			switch (ir::Construct(op.getOperand(op.getFirstLiteralOperandIndex())))
			{
			case ir::Construct::eStructuredSelection:
				bb->set_selection_merge(get_basic_block(ir::SsaDef(op.getOperand(0))));
				break;

			case ir::Construct::eStructuredLoop:
				bb->set_loop_merge(get_basic_block(ir::SsaDef(op.getOperand(0))),
				                   get_basic_block(ir::SsaDef(op.getOperand(1))));
				break;

			default:
				break;
			}

			break;
		}

		case ir::OpCode::ePhi:
		{
			// We might not have emitted all inputs yet. Defer that to a fixup pass later.
			auto *phi = context.construct<PHINode>(convert_type(op.getType()), op.getOperandCount() / 2);
			push_instruction(phi, op.getDef());
			break;
		}

		case ir::OpCode::eReturn:
			if (!current_bb)
				return false;

			if (op.getOperand(0))
				push_instruction(context.construct<ReturnInst>(get_value(op.getOperand(0))));
			else
				push_instruction(context.construct<ReturnInst>(nullptr));

			current_bb = nullptr;
			break;

		case ir::OpCode::eBranch:
		{
			if (!current_bb)
				return false;
			auto *target = get_basic_block(ir::SsaDef(op.getOperand(0)));
			current_bb->add_successor(target);
			push_instruction(context.construct<BranchInst>(target));
			current_bb = nullptr;
			break;
		}

		case ir::OpCode::eBranchConditional:
		{
			if (!current_bb)
				return false;
			auto *value = get_value(op.getOperand(0));
			auto *true_path = get_basic_block(ir::SsaDef(op.getOperand(1)));
			auto *false_path = get_basic_block(ir::SsaDef(op.getOperand(2)));
			current_bb->add_successor(true_path);
			current_bb->add_successor(false_path);
			push_instruction(context.construct<BranchInst>(true_path, false_path, value));
			current_bb = nullptr;
			break;
		}

		case ir::OpCode::eSwitch:
		{
			if (!current_bb)
				return false;
			auto *default_block = get_basic_block(ir::SsaDef(op.getOperand(1)));
			current_bb->add_successor(default_block);

			unsigned num_cases = (op.getOperandCount() - 2) / 2;
			auto *inst = context.construct<SwitchInst>(get_value(op.getOperand(0)), default_block, num_cases);
			for (unsigned i = 0; i < num_cases; i++)
			{
				auto *value = get_value(op.getOperand(2 * i + 2));
				auto *case_label = get_basic_block(ir::SsaDef(op.getOperand(2 * i + 3)));
				current_bb->add_successor(case_label);
				inst->addCase(value, case_label);
			}

			push_instruction(inst);
			current_bb = nullptr;
			break;
		}

		case ir::OpCode::eUnreachable:
			if (!current_bb)
				return false;
			push_instruction(context.construct<UnreachableInst>());
			current_bb = nullptr;
			break;

		case ir::OpCode::eDclScratch:
		case ir::OpCode::eDclLds:
		{
			auto *type = convert_type(op.getType());

			auto *value = context.construct<GlobalVariable>(
				PointerType::get(type, uint32_t(
					op.getOpCode() == ir::OpCode::eDclLds ?
					DXIL::AddressSpace::GroupShared :
					DXIL::AddressSpace::Thread)),
				GlobalVariable::LinkageTypes::InternalLinkage, false);

			value_map[op.getDef()] = value;
			module.add_global_variable(value);
			break;
		}

		// Opcodes
		default:
			if (!current_bb)
			{
				LOGE("No BB to insert instructions into.\n");
				return false;
			}

			if (!push_instruction(op))
				return false;
			break;
		}
	}

	// Resolve PHI incoming values since we have value-defs for them now.
	for (auto &op : builder)
	{
		if (op.getOpCode() == ir::OpCode::ePhi)
		{
			auto *phi = cast<PHINode>(get_value(op.getDef()));
			for (uint32_t i = 0; i < op.getOperandCount(); i += 2)
				phi->add_incoming(get_value(op.getOperand(i + 1)), get_basic_block(ir::SsaDef(op.getOperand(i))));
		}
	}

	return true;
}

Value *ParseContext::get_value(const ir::Operand &op) const
{
	return get_value(ir::SsaDef(op));
}

Value *ParseContext::get_value(const ir::SsaDef &op) const
{
	auto itr = value_map.find(op);
	return itr == value_map.end() ? nullptr : itr->second;
}

BasicBlock *ParseContext::get_basic_block(ir::SsaDef ssa)
{
	auto &bb = bb_map[ssa];
	if (!bb)
		bb = context.construct<BasicBlock>(context);
	return bb;
}

ConstantInt *ParseContext::get_constant_uint(uint32_t value)
{
	return ConstantInt::get(Type::getInt32Ty(context), value);
}

ConstantAsMetadata *ParseContext::create_constant_uint_meta(uint32_t value)
{
	return create_constant_meta(get_constant_uint(value));
}

ConstantAsMetadata *ParseContext::create_constant_uint64_meta(uint32_t value)
{
	return create_constant_meta(ConstantInt::get(Type::getInt64Ty(context), value));
}

ConstantAsMetadata *ParseContext::create_constant_meta(Constant *c)
{
	return context.construct<ConstantAsMetadata>(&module, c);
}

MDString *ParseContext::create_string_meta(const String &str)
{
	return context.construct<MDString>(&module, str);
}

// Parses the highly simplified and SSA-ified IR coming from dxbc-spirv.
Module *parseDXBCIR(LLVMContext &context, ir::Builder &builder)
{
	auto *module = context.construct<Module>(context);
	ParseContext ctx(context, builder, *module);
	if (!ctx.emit_entry_point())
		return nullptr;
	if (!ctx.emit_metadata())
		return nullptr;
	if (!ctx.emit_function_bodies())
		return nullptr;
	return module;
}

Module *parseDXBCBinary(LLVMContext &context, const void* data, size_t size)
{
	ScopedLogger logger;

	dxbc::Converter::Options convertOptions = { };
	convertOptions.includeDebugNames = false;
	convertOptions.boundCheckShaderIo = true;

	ir::CompileOptions options = { };
	options.scratchOptions.enableBoundChecking = true;
	options.scratchOptions.resolveCbvCopy = false;
	options.scratchOptions.unpackConstantIndexedArrays = true;
	options.scratchOptions.unpackSmallArrays = true;

	options.arithmeticOptions.lowerDot = true;
	options.arithmeticOptions.lowerSinCos = false;
	options.arithmeticOptions.lowerMsad = true;
	options.arithmeticOptions.lowerF32toF16 = true;
	options.arithmeticOptions.lowerConvertFtoI = false;
	options.arithmeticOptions.lowerGsVertexCountIn = true;
	options.arithmeticOptions.hasNvUnsignedItoFBug = true;

	options.min16Options.enableFloat16 = true;
	options.min16Options.enableInt16 = true;

	options.resourceOptions.allowSubDwordScratchAndLds = false;
	options.resourceOptions.flattenLds = true;
	options.resourceOptions.flattenScratch = true;
	options.resourceOptions.structuredCbv = false;
	options.resourceOptions.structuredSrvUav = false;

	options.bufferOptions.useTypedForRaw = false;
	options.bufferOptions.useTypedForStructured = false;
	options.bufferOptions.useTypedForSparseFeedback = true;
	options.bufferOptions.useRawForTypedAtomic = false;

	options.scalarizeOptions.subDwordVectors = true;

	options.syncOptions.allowWorkgroupCoherence = false;
	options.syncOptions.insertRovLocks = false;
	options.syncOptions.insertLdsBarriers = false;
	options.syncOptions.insertUavBarriers = false;

	options.derivativeOptions.hoistNontrivialDerivativeOps = true;
	options.derivativeOptions.hoistNontrivialImplicitLodOps = false;
	options.derivativeOptions.hoistDescriptorLoads = false;

	options.cseOptions.relocateDescriptorLoad = false;

	options.descriptorIndexing.optimizeDescriptorIndexing = false;

	auto builder = dxbc::compileShaderToLegalizedIr(data, size, convertOptions, options);

	if (!builder)
		return nullptr;

	return parseDXBCIR(context, *builder);
}

}


================================================
FILE: bc/type.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "type.hpp"
#include "cast.hpp"
#include "context.hpp"
#include <assert.h>

namespace LLVMBC
{
PointerType::PointerType(Type *type, uint32_t addr_space)
    : Type(type->getContext(), TypeID::PointerTyID)
    , contained_type(type)
{
	address_space = addr_space;
}

PointerType *PointerType::get(Type *pointee, unsigned addr_space)
{
	auto &context = pointee->getContext();
	auto &cache = context.get_type_cache();
	for (auto *type : cache)
	{
		if (type->getTypeID() == TypeID::PointerTyID)
		{
			auto *pointer_type = cast<PointerType>(type);
			if (pointer_type->getAddressSpace() == addr_space && pointer_type->getElementType() == pointee)
				return pointer_type;
		}
	}

	auto *type = context.construct<PointerType>(pointee, addr_space);
	cache.push_back(type);
	return type;
}

unsigned Type::getAddressSpace() const
{
	return address_space;
}

Type *PointerType::getElementType() const
{
	return contained_type;
}

ArrayType::ArrayType(Type *type, uint64_t elements_)
    : Type(type->getContext(), TypeID::ArrayTyID)
    , contained_type(type)
    , elements(elements_)
{
}

ArrayType *ArrayType::get(Type *element, uint64_t size)
{
	auto &context = element->getContext();
	auto &cache = context.get_type_cache();
	for (auto *type : cache)
	{
		if (type->getTypeID() == TypeID::ArrayTyID)
		{
			auto *array_type = cast<ArrayType>(type);
			if (array_type->getArrayNumElements() == size && array_type->getArrayElementType() == element)
				return array_type;
		}
	}

	auto *type = context.construct<ArrayType>(element, size);
	cache.push_back(type);
	return type;
}

VectorType::VectorType(LLVMBC::LLVMContext &context, unsigned vector_size_, LLVMBC::Type *type)
    : Type(context, TypeID::VectorTyID)
    , element_type(type)
    , vector_size(vector_size_)
{
}

unsigned VectorType::getVectorSize() const
{
	return vector_size;
}

Type *VectorType::getElementType() const
{
	return element_type;
}

VectorType *VectorType::get(unsigned vector_size, Type *element)
{
	auto &context = element->getContext();
	auto &cache = context.get_type_cache();
	for (auto *type : cache)
	{
		if (type->getTypeID() == TypeID::VectorTyID)
		{
			auto *vector_type = cast<VectorType>(type);
			if (vector_type->getVectorSize() == vector_size && vector_type->getElementType() == element)
				return vector_type;
		}
	}

	auto *type = context.construct<VectorType>(context, vector_size, element);
	cache.push_back(type);
	return type;
}

uint64_t Type::getArrayNumElements() const
{
	assert(type_id == TypeID::ArrayTyID);
	return cast<ArrayType>(this)->elements;
}

unsigned Type::getVectorNumElements() const
{
	assert(type_id == TypeID::VectorTyID);
	return cast<VectorType>(this)->getVectorSize();
}

unsigned Type::getPointerAddressSpace() const
{
	assert(type_id == TypeID::PointerTyID);
	return cast<PointerType>(this)->getAddressSpace();
}

Type *Type::getArrayElementType() const
{
	assert(type_id == TypeID::ArrayTyID);
	return cast<ArrayType>(this)->contained_type;
}

Type *Type::getStructElementType(unsigned index) const
{
	assert(type_id == TypeID::StructTyID);
	return cast<StructType>(this)->getElementType(index);
}

unsigned Type::getStructNumElements() const
{
	assert(type_id == TypeID::StructTyID);
	return cast<StructType>(this)->getNumElements();
}

unsigned Type::getIntegerBitWidth() const
{
	assert(type_id == TypeID::IntegerTyID);
	return cast<IntegerType>(this)->getBitWidth();
}

Type *Type::getPointerElementType() const
{
	assert(type_id == TypeID::PointerTyID);
	return cast<PointerType>(this)->getElementType();
}

StructType::StructType(LLVMContext &context, Vector<Type *> member_types_)
    : Type(context, TypeID::StructTyID)
    , member_types(std::move(member_types_))
{
}

unsigned StructType::getNumElements() const
{
	return member_types.size();
}

Type *StructType::getElementType(unsigned N) const
{
	assert(N < member_types.size());
	return member_types[N];
}

StructType *StructType::get(LLVMContext &context, Vector<Type *> member_types)
{
	auto &cache = context.get_type_cache();
	for (auto *type : cache)
	{
		if (type->getTypeID() == TypeID::StructTyID)
		{
			auto *struct_type = cast<StructType>(type);
			if (struct_type->getNumElements() == member_types.size())
			{
				bool equal = true;
				unsigned count = member_types.size();
				for (unsigned i = 0; i < count; i++)
				{
					if (member_types[i] != struct_type->getElementType(i))
					{
						equal = false;
						break;
					}
				}

				if (equal)
					return struct_type;
			}
		}
	}

	auto *type = context.construct<StructType>(context, std::move(member_types));
	cache.push_back(type);
	return type;
}

FunctionType::FunctionType(LLVMContext &context, Type *return_type_, Vector<Type *> argument_types_)
    : Type(context, TypeID::FunctionTyID)
    , return_type(return_type_)
    , argument_types(std::move(argument_types_))
{
}

unsigned FunctionType::getNumParams() const
{
	return unsigned(argument_types.size());
}

Type *FunctionType::getParamType(unsigned index) const
{
	assert(index < argument_types.size());
	return argument_types[index];
}

Type *FunctionType::getReturnType() const
{
	return return_type;
}

IntegerType::IntegerType(LLVMContext &context, uint32_t width_)
    : Type(context, TypeID::IntegerTyID)
    , width(width_)
{
}

uint32_t IntegerType::getBitWidth() const
{
	return width;
}

Type::Type(LLVMContext &context_, TypeID type_id_)
    : context(context_)
    , type_id(type_id_)
{
}

Type::TypeID Type::getTypeID() const
{
	return type_id;
}

Type *Type::getIntTy(LLVMContext &context, uint32_t width)
{
	auto &cache = context.get_type_cache();
	for (auto *type : cache)
		if (type->getTypeID() == TypeID::IntegerTyID && cast<IntegerType>(type)->getBitWidth() == width)
			return type;

	auto *type = context.construct<IntegerType>(context, width);
	cache.push_back(type);
	return type;
}

Type *Type::getTy(LLVMContext &context, TypeID id)
{
	auto &cache = context.get_type_cache();
	for (auto *type : cache)
		if (type->getTypeID() == id)
			return type;

	auto *type = context.construct<Type>(context, id);
	cache.push_back(type);
	return type;
}

Type *Type::getVoidTy(LLVMContext &context)
{
	return getTy(context, TypeID::VoidTyID);
}

Type *Type::getHalfTy(LLVMContext &context)
{
	return getTy(context, TypeID::HalfTyID);
}

Type *Type::getFloatTy(LLVMContext &context)
{
	return getTy(context, TypeID::FloatTyID);
}

Type *Type::getDoubleTy(LLVMContext &context)
{
	return getTy(context, TypeID::DoubleTyID);
}

Type *Type::getLabelTy(LLVMContext &context)
{
	return getTy(context, TypeID::LabelTyID);
}

Type *Type::getOpaqueTy(LLVMBC::LLVMContext &context)
{
	return getTy(context, TypeID::OpaqueTyID);
}

Type *Type::getMetadataTy(LLVMContext &context)
{
	return getTy(context, TypeID::MetadataTyID);
}

Type *Type::getInt1Ty(LLVMContext &context)
{
	return getIntTy(context, 1);
}

Type *Type::getInt8Ty(LLVMContext &context)
{
	return getIntTy(context, 8);
}

Type *Type::getInt16Ty(LLVMContext &context)
{
	return getIntTy(context, 16);
}

Type *Type::getInt32Ty(LLVMContext &context)
{
	return getIntTy(context, 32);
}

Type *Type::getInt64Ty(LLVMContext &context)
{
	return getIntTy(context, 64);
}

bool Type::isIntegerTy() const
{
	return type_id == TypeID::IntegerTyID;
}

bool Type::isFloatingPointTy() const
{
	return type_id == TypeID::HalfTyID || type_id == TypeID::FloatTyID || type_id == TypeID::DoubleTyID;
}

LLVMContext &Type::getContext()
{
	return context;
}
} // namespace LLVMBC


================================================
FILE: bc/type.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "data_structures.hpp"
#include <stdint.h>

namespace LLVMBC
{
class LLVMContext;

class Type
{
public:
	enum class TypeID
	{
		Unknown,
		VoidTyID,
		HalfTyID,
		FloatTyID,
		DoubleTyID,
		IntegerTyID,
		PointerTyID,
		ArrayTyID,
		StructTyID,
		FunctionTyID,
		LabelTyID,
		VectorTyID,
		MetadataTyID,
		OpaqueTyID
	};

	Type(LLVMContext &context, TypeID type_id);
	LLVMContext &getContext();
	TypeID getTypeID() const;

	static Type *getVoidTy(LLVMContext &context);
	static Type *getHalfTy(LLVMContext &context);
	static Type *getFloatTy(LLVMContext &context);
	static Type *getDoubleTy(LLVMContext &context);
	static Type *getInt1Ty(LLVMContext &context);
	static Type *getInt8Ty(LLVMContext &context);
	static Type *getInt16Ty(LLVMContext &context);
	static Type *getInt32Ty(LLVMContext &context);
	static Type *getInt64Ty(LLVMContext &context);
	static Type *getIntTy(LLVMContext &context, uint32_t width);
	static Type *getLabelTy(LLVMContext &context);
	static Type *getMetadataTy(LLVMContext &context);
	static Type *getOpaqueTy(LLVMContext &context);

	bool isIntegerTy() const;
	bool isFloatingPointTy() const;

	uint64_t getArrayNumElements() const;
	Type *getArrayElementType() const;
	Type *getPointerElementType() const;
	Type *getStructElementType(unsigned index) const;
	unsigned getStructNumElements() const;
	unsigned getIntegerBitWidth() const;
	unsigned getAddressSpace() const;
	unsigned getVectorNumElements() const;
	unsigned getPointerAddressSpace() const;

protected:
	LLVMContext &context;
	TypeID type_id;
	static Type *getTy(LLVMContext &context, TypeID id);
	unsigned address_space = 0;
};

class PointerType : public Type
{
public:
	static constexpr TypeID get_type_id()
	{
		return TypeID::PointerTyID;
	}
	PointerType(Type *type, unsigned addr_space);
	static PointerType *get(Type *pointee, unsigned addr_space);

	Type *getElementType() const;

private:
	Type *contained_type = nullptr;
};

class ArrayType : public Type
{
public:
	static constexpr TypeID get_type_id()
	{
		return TypeID::ArrayTyID;
	}
	ArrayType(Type *type, uint64_t elements);
	static ArrayType *get(Type *element, uint64_t size);

private:
	friend class Type;
	Type *contained_type = nullptr;
	uint64_t elements = 0;
};

class IntegerType : public Type
{
public:
	static constexpr TypeID get_type_id()
	{
		return TypeID::IntegerTyID;
	}
	IntegerType(LLVMContext &context, uint32_t width);
	uint32_t getBitWidth() const;

private:
	uint32_t width = 0;
};

class StructType : public Type
{
public:
	static constexpr TypeID get_type_id()
	{
		return TypeID::StructTyID;
	}
	StructType(LLVMContext &context, Vector<Type *> member_types);
	static StructType *get(LLVMContext &context, Vector<Type *> member_types);

	unsigned getNumElements() const;
	Type *getElementType(unsigned N) const;

private:
	Vector<Type *> member_types;
};

class VectorType : public Type
{
public:
	static constexpr TypeID get_type_id()
	{
		return TypeID::VectorTyID;
	}
	VectorType(LLVMContext &context, unsigned vector_size, Type *type);
	static VectorType *get(unsigned vector_size, Type *type);

	unsigned getVectorSize() const;
	Type *getElementType() const;

private:
	Type *element_type;
	unsigned vector_size;
};

class FunctionType : public Type
{
public:
	static constexpr TypeID get_type_id()
	{
		return TypeID::FunctionTyID;
	}
	FunctionType(LLVMContext &context, Type *return_type, Vector<Type *> argument_types);
	unsigned getNumParams() const;
	Type *getParamType(unsigned index) const;
	Type *getReturnType() const;

private:
	Type *return_type = nullptr;
	Vector<Type *> argument_types;
};
} // namespace LLVMBC


================================================
FILE: bc/value.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "value.hpp"
#include "cast.hpp"
#include "context.hpp"
#include "instruction.hpp"
#include "logging.hpp"
#include "type.hpp"
#include <string.h>

namespace LLVMBC
{
Value::Value(Type *type_, ValueKind kind_)
    : type(type_)
    , kind(kind_)
{
}

void Value::set_tween_id(uint64_t id)
{
	tween_id = id;
}

uint64_t Value::get_tween_id() const
{
	return tween_id;
}

Type *Value::getType() const
{
	return type;
}

ValueKind Value::get_value_kind() const
{
	return kind;
}

Argument::Argument(Type *type, unsigned argument_number_)
	: Value(type, ValueKind::Argument), argument_number(argument_number_)
{
}

unsigned Argument::getArgNo() const
{
	return argument_number;
}

bool Constant::is_base_of_value_kind(ValueKind kind)
{
	switch (kind)
	{
	case ValueKind::ConstantFP:
	case ValueKind::ConstantInt:
	case ValueKind::ConstantDataArray:
	case ValueKind::ConstantDataVector:
	case ValueKind::ConstantAggregate:
	case ValueKind::ConstantAggregateZero:
	case ValueKind::ConstantBase:
	case ValueKind::Undef:
	case ValueKind::Function:
	case ValueKind::Global:
	case ValueKind::ConstantExpr:
		return true;

	default:
		return false;
	}
}

Constant::Constant(Type *type, ValueKind kind)
    : Value(type, kind)
{
}

ConstantInt *ConstantInt::get(Type *type, uint64_t value)
{
	auto &context = type->getContext();
	return context.construct<ConstantInt>(type, value);
}

const APInt &Constant::getUniqueInteger() const
{
	return apint;
}

void Constant::set_integer(const APInt &apint_)
{
	apint = apint_;
}

void Constant::set_float(const APFloat &apfloat_)
{
	apfloat = apfloat_;
}

APInt::APInt(Type *type_, uint64_t value_)
    : type(type_)
    , value(value_)
{
}

APFloat::APFloat(Type *type_, uint64_t value_)
    : type(type_)
    , value(value_)
{
}

int64_t APInt::getSExtValue() const
{
	auto width = cast<IntegerType>(type)->getBitWidth();
	if (width == 64)
		return int64_t(value);
	auto mask = (1ull << width) - 1;
	bool sign_bit = ((value >> (width - 1)) & 1) != 0;
	uint64_t extended = sign_bit ? ~mask : 0ull;
	return int64_t((value & mask) | extended);
}

uint64_t APInt::getZExtValue() const
{
	auto width = cast<IntegerType>(type)->getBitWidth();
	if (width == 64)
		return value;
	auto mask = (1ull << width) - 1u;
	return value & mask;
}

uint64_t APInt::get_raw_value() const
{
	return value;
}

ConstantFP *ConstantFP::get(Type *type, uint64_t value)
{
	auto &context = type->getContext();
	return context.construct<ConstantFP>(type, value);
}

ConstantInt::ConstantInt(Type *type, uint64_t value)
    : Constant(type, ValueKind::ConstantInt)
{
	set_integer(APInt(type, value));
}

ConstantFP::ConstantFP(Type *type, uint64_t value)
    : Constant(type, ValueKind::ConstantFP)
{
	set_float(APFloat(type, value));
}

const APFloat &Constant::getValueAPF() const
{
	return apfloat;
}

float APFloat::convertToFloat() const
{
	switch (type->getTypeID())
	{
	case Type::TypeID::FloatTyID:
	{
		float f;
		auto u = uint32_t(value);
		static_assert(sizeof(f) == sizeof(u), "Float is not 32-bit.");
		memcpy(&f, &u, sizeof(float));
		return f;
	}
	case Type::TypeID::DoubleTyID:
	{
		double f;
		static_assert(sizeof(f) == sizeof(value), "Double is not 64-bit.");
		memcpy(&f, &value, sizeof(double));
		return float(f);
	}

	default:
		LOGE("Unknown FP type in APFloat::convertToFloat().\n");
		return 0.0f;
	}
}

APInt APFloat::bitcastToAPInt() const
{
	Type *int_type = nullptr;
	switch (type->getTypeID())
	{
	case Type::TypeID::HalfTyID:
		int_type = Type::getInt16Ty(type->getContext());
		break;

	case Type::TypeID::FloatTyID:
		int_type = Type::getInt32Ty(type->getContext());
		break;

	case Type::TypeID::DoubleTyID:
		int_type = Type::getInt64Ty(type->getContext());
		break;

	default:
		break;
	}

	return { int_type, value };
}

double APFloat::convertToDouble() const
{
	switch (type->getTypeID())
	{
	case Type::TypeID::FloatTyID:
	{
		float f;
		auto u = uint32_t(value);
		static_assert(sizeof(f) == sizeof(u), "Float is not 32-bit.");
		memcpy(&f, &u, sizeof(float));
		return double(f);
	}
	case Type::TypeID::DoubleTyID:
	{
		double f;
		static_assert(sizeof(f) == sizeof(value), "Double is not 64-bit.");
		memcpy(&f, &value, sizeof(double));
		return f;
	}

	default:
		LOGE("Unknown FP type in APFloat::convertToDouble().\n");
		return 0.0f;
	}
}

UndefValue::UndefValue(Type *type)
    : Constant(type, ValueKind::Undef)
{
}

UndefValue *UndefValue::get(Type *type)
{
	auto &context = type->getContext();
	return context.construct<UndefValue>(type);
}

ConstantAggregateZero::ConstantAggregateZero(Type *type)
    : Constant(type, ValueKind::ConstantAggregateZero)
{
}

ConstantPointerNull::ConstantPointerNull(Type *type)
    : Constant(type, ValueKind::ConstantPointerNull)
{
}

ConstantDataArray::ConstantDataArray(Type *type, Vector<Value *> elements_)
    : Constant(type, ValueKind::ConstantDataArray)
    , elements(std::move(elements_))
{
}

unsigned ConstantDataArray::getNumElements() const
{
	return elements.size();
}

Constant *ConstantDataArray::getElementAsConstant(unsigned index) const
{
	return cast<Constant>(elements[index]);
}

ConstantDataVector::ConstantDataVector(Type *type, Vector<Value *> elements_)
	: Constant(type, ValueKind::ConstantDataVector)
	, elements(std::move(elements_))
{
}

unsigned ConstantDataVector::getNumElements() const
{
	return elements.size();
}

Constant *ConstantDataVector::getElementAsConstant(unsigned index) const
{
	return cast<Constant>(elements[index]);
}

ConstantAggregate::ConstantAggregate(Type *type, Vector<Value *> elements_)
	: Constant(type, ValueKind::ConstantAggregate)
	, elements(std::move(elements_))
{
}

unsigned ConstantAggregate::getNumOperands() const
{
	return elements.size();
}

Constant *ConstantAggregate::getOperand(unsigned index) const
{
	return cast<Constant>(elements[index]);
}

ConstantExpr::ConstantExpr(unsigned opcode_, Type *type, Vector<Value *> elements_)
	: Constant(type, ValueKind::ConstantExpr)
	, opcode(opcode_)
	, elements(std::move(elements_))
{
}

unsigned ConstantExpr::getOpcode() const
{
	return opcode;
}

Constant *ConstantExpr::getOperand(unsigned int N) const
{
	return cast<Constant>(elements[N]);
}

unsigned ConstantExpr::getNumOperands() const
{
	return unsigned(elements.size());
}

GlobalVariable::GlobalVariable(Type *type, LinkageTypes linkage_, bool is_const_)
	: Constant(type, ValueKind::Global)
	, linkage(linkage_)
	, is_const(is_const_)
{
}

GlobalVariable::LinkageTypes GlobalVariable::getLinkage() const
{
	return linkage;
}

bool GlobalVariable::hasInitializer() const
{
	return initializer != nullptr;
}

Constant *GlobalVariable::getInitializer() const
{
	return initializer;
}

void GlobalVariable::set_initializer(Constant *value)
{
	initializer = value;
}

bool GlobalVariable::isConstant() const
{
	return is_const;
}
} // namespace LLVMBC


================================================
FILE: bc/value.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "data_structures.hpp"
#include <stdint.h>

namespace LLVMBC
{
class Type;

enum class ValueKind
{
	Argument,
	Function,
	InstructionBase,
	ConstantBase,
	ConstantInt,
	ConstantFP,
	ConstantAggregateZero,
	ConstantPointerNull,
	ConstantAggregate,
	ConstantDataArray,
	ConstantDataVector,
	ConstantExpr,
	Undef,
	UnaryOperator,
	BinaryOperator,
	Call,
	CompareBase,
	FCmp,
	ICmp,
	BasicBlock,
	PHI,
	Cast,
	Select,
	ExtractValue,
	Alloca,
	GetElementPtr,
	Load,
	Store,
	AtomicRMW,
	AtomicCmpXchg,
	Return,
	Unreachable,
	Branch,
	Switch,
	Proxy,
	Global,
	ShuffleVector,
	ExtractElement,
	InsertElement,
	CompositeConstruct
};

#define LLVMBC_DEFAULT_VALUE_KIND_IMPL                \
	static bool is_base_of_value_kind(ValueKind kind) \
	{                                                 \
		return get_value_kind() == kind;              \
	}

class Value
{
public:
	Value(Type *type, ValueKind kind);
	Type *getType() const;

	ValueKind get_value_kind() const;
	void set_tween_id(uint64_t id);
	uint64_t get_tween_id() const;

protected:
	Type *type;
	ValueKind kind;
	uint64_t tween_id = 0;
};

class Argument : public Value
{
public:
	Argument(Type *type, unsigned argument_number);
	unsigned getArgNo() const;

	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Argument;
	}

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	unsigned argument_number;
};

class APInt
{
public:
	APInt() = default;
	APInt(Type *type, uint64_t value);
	uint64_t getZExtValue() const;
	int64_t getSExtValue() const;

	// LLVMBC specific hack to make minprecision with signed ints work.
	// We need a sign-extended value which fortunately the DXIL emits,
	// but LLVM itself will mask off the bits for you.
	uint64_t get_raw_value() const;

private:
	Type *type = nullptr;
	uint64_t value = 0;
};

class APFloat
{
public:
	APFloat() = default;
	APFloat(Type *type, uint64_t value);

	float convertToFloat() const;
	double convertToDouble() const;

	APInt bitcastToAPInt() const;

private:
	Type *type = nullptr;
	uint64_t value = 0;
};

class Constant : public Value
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantBase;
	}
	Constant(Type *type, ValueKind kind);

	void set_integer(const APInt &apint);
	void set_float(const APFloat &apfloat);
	const APFloat &getValueAPF() const;
	const APInt &getUniqueInteger() const;

	static bool is_base_of_value_kind(ValueKind kind);

private:
	APInt apint;
	APFloat apfloat;
};

class ConstantInt : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantInt;
	}
	static ConstantInt *get(Type *type, uint64_t value);
	ConstantInt(Type *type, uint64_t value);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class ConstantFP : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantFP;
	}
	static ConstantFP *get(Type *type, uint64_t bits);
	ConstantFP(Type *type, uint64_t bits);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class ConstantAggregateZero : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantAggregateZero;
	}
	explicit ConstantAggregateZero(Type *type);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class ConstantPointerNull : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantPointerNull;
	}

	explicit ConstantPointerNull(Type *type);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class ConstantDataArray : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantDataArray;
	}
	ConstantDataArray(Type *type, Vector<Value *> elements);

	unsigned getNumElements() const;
	Constant *getElementAsConstant(unsigned index) const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Vector<Value *> elements;
};

class ConstantDataVector : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantDataVector;
	}
	ConstantDataVector(Type *type, Vector<Value *> elements);

	unsigned getNumElements() const;
	Constant *getElementAsConstant(unsigned index) const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Vector<Value *> elements;
};

class ConstantAggregate : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantAggregate;
	}
	ConstantAggregate(Type *type, Vector<Value *> elements);

	unsigned getNumOperands() const;
	Constant *getOperand(unsigned index) const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Vector<Value *> elements;
};

class ConstantExpr : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::ConstantExpr;
	}
	ConstantExpr(unsigned opcode, Type *type, Vector<Value *> elements);

	unsigned getOpcode() const;
	unsigned getNumOperands() const;
	Constant *getOperand(unsigned N) const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	unsigned opcode;
	Vector<Value *> elements;
};

class UndefValue : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Undef;
	}
	explicit UndefValue(Type *type);
	static UndefValue *get(Type *type);

	LLVMBC_DEFAULT_VALUE_KIND_IMPL
};

class GlobalVariable : public Constant
{
public:
	static constexpr ValueKind get_value_kind()
	{
		return ValueKind::Global;
	}

	enum LinkageTypes
	{
		ExternalLinkage,
		InternalLinkage,
		AppendingLinkage
	};

	explicit GlobalVariable(Type *type, LinkageTypes linkage, bool is_const);
	void set_initializer(Constant *value);
	Constant *getInitializer() const;
	bool hasInitializer() const;
	bool isConstant() const;
	LinkageTypes getLinkage() const;

	LLVMBC_DEFAULT_VALUE_KIND_IMPL

private:
	Constant *initializer = nullptr;
	LinkageTypes linkage;
	bool is_const;
};
} // namespace LLVMBC


================================================
FILE: build_dxc.sh
================================================
#!/bin/bash

PROFILE=Release

if [ ! -z $1 ]; then
	PROFILE=$1
fi

if [ ! -z $2 ]; then
	NPROC="--parallel $2"
fi

echo "Building DXC."
mkdir -p external/dxc-build
cd external/dxc-build
# CLANG_FORMAT_EXE=OFF avoids a broken build where it expects clang-format to produce exact results for some dumb reason.
cmake ../DirectXShaderCompiler -DCMAKE_BUILD_TYPE=$PROFILE -C ../DirectXShaderCompiler/cmake/caches/PredefinedParams.cmake -G Ninja -DSPIRV_WERROR=OFF -DCLANG_FORMAT_EXE=OFF -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
cmake --build . --config $PROFILE ${NPROC}


================================================
FILE: cfg_structurizer.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "cfg_structurizer.hpp"
#include "SpvBuilder.h"
#include "logging.hpp"
#include "node.hpp"
#include "node_pool.hpp"
#include "spirv_module.hpp"
#include <algorithm>
#include <assert.h>

namespace dxil_spv
{
CFGStructurizer::CFGStructurizer(CFGNode *entry, CFGNodePool &pool_, SPIRVModule &module_)
    : entry_block(entry)
    , pool(pool_)
    , module(module_)
{
	exit_block = pool.create_node();
	exit_block->name = "EXIT";
}

void CFGStructurizer::log_cfg_graphviz(const char *path) const
{
	FILE *file = fopen(path, "w");
	if (!file)
	{
		LOGE("Failed to open graphviz dump path: %s\n", path);
		return;
	}

	UnorderedMap<const CFGNode *, uint32_t> node_to_id;
	uint32_t accum_id = 0;

	const auto get_node_id = [&](const CFGNode *node) -> uint32_t {
		auto itr = node_to_id.find(node);
		if (itr == node_to_id.end())
		{
			const char *shape = nullptr;
			if (node->merge == MergeType::Loop)
				shape = "circle";
			else if (node->merge == MergeType::Selection)
				shape = "triangle";
			else
				shape = "box";

			node_to_id[node] = ++accum_id;
			fprintf(file, "%u [label=\"%s\", shape=\"%s\"];\n", accum_id, node->name.c_str(), shape);
			return accum_id;
		}
		else
			return itr->second;
	};

	fprintf(file, "digraph {\n");
	for (auto index = forward_post_visit_order.size(); index; index--)
	{
		auto *node = forward_post_visit_order[index - 1];
		switch (node->ir.terminator.type)
		{
		case Terminator::Type::Branch:
			fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(node->ir.terminator.direct_block));
			break;

		case Terminator::Type::Condition:
			fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(node->ir.terminator.true_block));
			fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(node->ir.terminator.false_block));
			break;

		case Terminator::Type::Switch:
			for (auto &c : node->ir.terminator.cases)
				fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(c.node));
			break;

		default:
			break;
		}

		if (node->merge == MergeType::Loop)
		{
			if (node->pred_back_edge)
				fprintf(file, "%u -> %u [style=\"dotted\"];\n", get_node_id(node), get_node_id(node->pred_back_edge));
			if (node->loop_merge_block)
				fprintf(file, "%u -> %u [style=\"dashed\"];\n", get_node_id(node), get_node_id(node->loop_merge_block));
		}
		else if (node->merge == MergeType::Selection)
		{
			if (node->selection_merge_block)
				fprintf(file, "%u -> %u [style=\"dashed\"];\n", get_node_id(node), get_node_id(node->selection_merge_block));
		}
	}

	fprintf(file, "}\n");
	fclose(file);
}

void CFGStructurizer::log_cfg(const char *tag) const
{
	LOGI("\n======== %s =========\n", tag);
	for (auto index = forward_post_visit_order.size(); index; index--)
	{
		auto *node = forward_post_visit_order[index - 1];

		LOGI("%s:\n", node->name.c_str());
		switch (node->ir.terminator.type)
		{
		case Terminator::Type::Branch:
			LOGI("  Branch -> %s\n", node->ir.terminator.direct_block->name.c_str());
			break;

		case Terminator::Type::Condition:
			LOGI("  Cond -> %s | %s\n", node->ir.terminator.true_block->name.c_str(),
			     node->ir.terminator.false_block->name.c_str());
			break;

		case Terminator::Type::Return:
			LOGI("  Return\n");
			break;

		case Terminator::Type::Unreachable:
			LOGI("  Unreachable\n");
			break;

		case Terminator::Type::Switch:
			LOGI("  Switch\n");
			for (auto &c : node->ir.terminator.cases)
			{
				if (c.is_default)
					LOGI("    Default -> %s\n", c.node->name.c_str());
				else
					LOGI("    Case %u -> %s\n", c.value, c.node->name.c_str());
			}
			break;

		case Terminator::Type::Kill:
			LOGI("  Kill\n");
			break;
		}

		switch (node->merge)
		{
		case MergeType::Selection:
			LOGI("  SelectionMerge -> %s\n",
			     node->selection_merge_block ? node->selection_merge_block->name.c_str() : "N/A");
			break;

		case MergeType::Loop:
			LOGI("  LoopMerge -> %s\n", node->loop_merge_block ? node->loop_merge_block->name.c_str() : "N/A");
			LOGI("    Continue -> %s\n", node->pred_back_edge ? node->pred_back_edge->name.c_str() : "N/A");
			break;

		default:
			break;
		}

		LOGI("\n");
	}
	LOGI("\n=====================\n");
}

//#define PHI_DEBUG
#ifdef PHI_DEBUG
static void validate_phi(const PHI &phi)
{
	auto incomings = phi.incoming;
	std::sort(incomings.begin(), incomings.end(), [](const IncomingValue &a, const IncomingValue &b) {
		return a.block < b.block;
	});
	auto itr = std::unique(incomings.begin(), incomings.end(), [](const IncomingValue &a, const IncomingValue &b) {
		return a.block == b.block;
	});
	if (itr != incomings.end())
		abort();
}

static void validate_phi(const Vector<PHI> &phis)
{
	for (auto &phi : phis)
		validate_phi(phi);
}
#else
#define validate_phi(phi) ((void)0)
#endif

void CFGStructurizer::eliminate_node_link_preds_to_succ(CFGNode *node)
{
	assert(node->succ.size() == 1 || node->succ.size() == 2);

#ifdef PHI_DEBUG
	for (auto *succ : node->succ)
		validate_phi(succ->ir.phi);
#endif

	Vector<CFGNode *> break_nodes;
	auto pred_copy = node->pred;
	auto succ_copy = node->succ;

	for (auto *pred : pred_copy)
	{
		auto *break_node = pool.create_node();
		break_node->name = node->name + ".break." + pred->name;

		break_node->ir.terminator = node->ir.terminator;
		for (auto *succ : succ_copy)
			break_node->add_branch(succ);

		break_node->immediate_post_dominator = node->immediate_post_dominator;
		break_node->immediate_dominator = pred;
		pred->retarget_branch(node, break_node);

		break_nodes.push_back(break_node);

		for (auto &phi : node->ir.phi)
		{
			for (auto &incoming : phi.incoming)
			{
				if (incoming.block == pred)
				{
					incoming.block = break_node;
					// We have no opcodes in this block, but we may depend on a PHI variable to do conditional branch.
					if (phi.id == break_node->ir.terminator.conditional_id)
						break_node->ir.terminator.conditional_id = incoming.id;
				}
			}
		}
	}
	assert(node->pred.empty());

	for (auto *succ : node->succ)
	{
		for (auto &phi : succ->ir.phi)
		{
			// Find incoming ID from the block we're splitting up.
			auto incoming_itr = std::find_if(phi.incoming.begin(), phi.incoming.end(),
			                                 [&](const IncomingValue &incoming) { return incoming.block == node; });

			assert(incoming_itr != phi.incoming.end());
			spv::Id incoming_from_node = incoming_itr->id;
			phi.incoming.erase(incoming_itr);

			// Try to see if the ID is a PHI that was generated by this block.
			auto outgoing_itr = std::find_if(node->ir.phi.begin(), node->ir.phi.end(),
			                                 [&](const PHI &phi) { return phi.id == incoming_from_node; });

			if (outgoing_itr != node->ir.phi.end())
			{
				// If it was then we need to split up the PHI node. The break block will serve as a proxy
				// incoming block instead.
				phi.incoming.insert(phi.incoming.end(), outgoing_itr->incoming.begin(), outgoing_itr->incoming.end());
				validate_phi(succ->ir.phi);
			}
			else
			{
				// A plain value is passed down to succ, most likely a constant which lives at global scope.
				// We know this block does not generate this ID, so it must be either a value generated at global scope
				// (constant), or a value created by a block which dominates this node,
				// which also means it dominates all preds to this node.
				for (auto *break_pred : break_nodes)
					phi.incoming.push_back({ break_pred, incoming_from_node });
				validate_phi(succ->ir.phi);
			}
		}

		// Remove any lingering pred, since node is now unreachable, and if we do more transforms without
		// recomputing CFG, we'll add impossible PHI inputs.
		auto erase_itr = std::find(succ->pred.begin(), succ->pred.end(), node);
		if (erase_itr != succ->pred.end())
			succ->pred.erase(erase_itr);
	}

	node->ir.phi.clear();
}

bool CFGStructurizer::cleanup_breaking_return_constructs()
{
	unsigned post_dominating_returns = 0;
	CFGNode *split_candidate = nullptr;

	for (auto *node : forward_post_visit_order)
	{
		if (node->ir.terminator.type != Terminator::Type::Return)
			continue;

		// If this block is only serving to return, it's meaningless to merge.
		// It will only complicate the CFG.
		if (node->ir.operations.empty() && node->num_forward_preds() > 1 && !node->post_dominates_any_work())
		{
			split_candidate = node;
		}
		else
		{
			// If we're actually post-dominating other blocks, the split candidate is relevant.
			for (auto *pred : node->pred)
			{
				if (node->post_dominates(pred))
				{
					post_dominating_returns++;
					break;
				}
			}
		}
	}

	// Only bother if we have more than one return and at least another return that is actually post-dominating
	// work. Avoids potential false positives.
	if (!post_dominating_returns)
		return false;

	if (split_candidate)
	{
		auto preds = split_candidate->pred;

		for (auto *pred : preds)
		{
			auto *dummy_return = pool.create_node();
			dummy_return->name = split_candidate->name + ".dup";
			dummy_return->immediate_dominator = split_candidate->immediate_dominator;
			dummy_return->immediate_post_dominator = exit_block;
			dummy_return->forward_post_visit_order = split_candidate->forward_post_visit_order;
			dummy_return->backward_post_visit_order = split_candidate->backward_post_visit_order;
			dummy_return->ir.terminator.type = Terminator::Type::Return;
			pred->retarget_branch(split_candidate, dummy_return);
		}

		// Iterate until we are done.
		recompute_cfg();
		return true;
	}

	return false;
}

bool CFGStructurizer::block_is_breaking_phi_construct(const CFGNode *node) const
{
	// Only bother with blocks which don't do anything useful work.
	// The only opcodes they should have are PHI nodes and a (conditional) branch.
	if (!node->ir.operations.empty())
		return false;
	if (node->pred.size() <= 1)
		return false;

	// Don't bother with anything that could be considered load bearing.
	if (node->post_dominates_perfect_structured_construct())
		return false;

	// Anything related to loop/continue blocks, we don't bother with.
	if (node->succ_back_edge || node->pred_back_edge)
		return false;

	if (node->succ.size() == 1)
	{
		if (node->ir.terminator.type != Terminator::Type::Branch)
			return false;
	}
	else if (node->succ.size() == 2)
	{
		if (node->ir.terminator.type != Terminator::Type::Condition)
			return false;
	}
	else
		return false;

	for (auto *succ : node->succ)
	{
		if (node->dominates(succ))
			return false;

		// Checks if either the merge block or successor is sensitive to PHI somehow.
		if (!ladder_chain_has_phi_dependencies(succ, node))
			return false;
	}

	// This is a merge block candidate for a loop, don't split.
	// It will only confuse things where we'll need to re-merge the split blocks anyways.
	for (auto *pred : node->pred)
		if (pred->succ_back_edge)
			return false;

	// A more complicated case where we want the block to remain as a ladder block.
	auto *loop_header = get_innermost_loop_header_for(node);
	if (loop_header && loop_header->pred_back_edge &&
	    loop_header->dominates(node) && loop_header->pred_back_edge->succ.empty())
	{
		bool merge_is_outside_loop = !query_reachability(*node, *loop_header->pred_back_edge);
		if (merge_is_outside_loop)
		{
			auto *header_pdom = loop_header->pred_back_edge->immediate_post_dominator;

			// We only want to avoid the split when this is a meaningful ladder.
			// If the paths all end up in the same merge anyway, it's safer to split.
			for (auto *df : node->dominance_frontier)
				if (df == header_pdom)
					return true;

			for (auto *pdf : node->post_dominance_frontier)
			{
				// We can't reach, but the PDF can. We're confident we're a loop exit.
				if (query_reachability(*pdf, *loop_header->pred_back_edge))
					return false;
			}
		}
	}

	return true;
}

void CFGStructurizer::cleanup_breaking_phi_constructs()
{
	bool did_work = false;

	// There might be cases where we have a common break block from different scopes which only serves to PHI together some values
	// before actually breaking, and passing that PHI node on to the actual break block.
	// This causes problems because this looks very much like a merge, but it is actually not and forces validation errors.
	// Another case is where the succ block takes PHI nodes from the breaking block only,
	// which is relevant if only constants are somehow used in the PHI construct.

	for (size_t i = forward_post_visit_order.size(); i; i--)
	{
		auto *node = forward_post_visit_order[i - 1];

		if (block_is_breaking_phi_construct(node))
		{
			eliminate_node_link_preds_to_succ(node);
			did_work = true;
		}
	}

	if (did_work)
		recompute_cfg();
}

static void scrub_rov_begin_lock(CFGNode *node, bool preserve_first_begin)
{
	auto begin_itr = node->ir.operations.begin();
	if (preserve_first_begin)
	{
		begin_itr = std::find_if(node->ir.operations.begin(), node->ir.operations.end(),
		                         [](const Operation *op) { return op->op == spv::OpBeginInvocationInterlockEXT; });
		assert(begin_itr != node->ir.operations.end());
		++begin_itr;
	}

	auto itr = std::remove_if(begin_itr, node->ir.operations.end(),
	                          [](const Operation *op) { return op->op == spv::OpBeginInvocationInterlockEXT; });
	node->ir.operations.erase(itr, node->ir.operations.end());
}

static void scrub_rov_end_lock(CFGNode *node, bool preserve_last_end)
{
	auto end_itr = node->ir.operations.end();

	if (preserve_last_end)
	{
		for (size_t i = node->ir.operations.size(); i; i--)
		{
			size_t index = i - 1;
			auto &op = node->ir.operations[index];
			if (op->op == spv::OpEndInvocationInterlockEXT)
			{
				end_itr = node->ir.operations.begin() + index;
				break;
			}
		}
	}

	auto itr = std::remove_if(node->ir.operations.begin(), end_itr,
	                          [](const Operation *op) { return op->op == spv::OpEndInvocationInterlockEXT; });
	node->ir.operations.erase(itr, end_itr);
}

static void scrub_rov_lock_regions(CFGNode *node, bool preserve_first_begin, bool preserve_last_end)
{
	scrub_rov_begin_lock(node, preserve_first_begin);
	scrub_rov_end_lock(node, preserve_last_end);
}

bool CFGStructurizer::find_single_entry_exit_lock_region(
	CFGNode *&idom, CFGNode *&pdom, const Vector<CFGNode *> &rov_blocks)
{
	// If the lock region has multiple instances, i.e. a loop, give up right away, unless the construct is simple
	// and we can trivially do:
	// begin(); for(;;) {} end();
	// For this to work, all ROV blocks must be contained by one loop. The must be a trivial input branch to the loop
	// header, and trivial exit out of the loop, i.e. one loop exit which is covered by the continue block.
	auto *outermost_loop_header = idom ? const_cast<CFGNode *>(get_innermost_loop_header_for(entry_block, idom)) : nullptr;

	while (outermost_loop_header && outermost_loop_header != entry_block)
	{
		auto *innermost_loop_header = const_cast<CFGNode *>(
		    get_innermost_loop_header_for(entry_block, outermost_loop_header->immediate_dominator));

		// Stop right before we hit the entry block.
		if (innermost_loop_header && innermost_loop_header != entry_block)
			outermost_loop_header = innermost_loop_header;
		else
			break;
	}

	if (idom && outermost_loop_header != entry_block)
	{
		// First, all ROV blocks must be inside the loop construct.
		for (auto *rov : rov_blocks)
		{
			if (!outermost_loop_header->dominates(rov) ||
			    !query_reachability(*rov, *outermost_loop_header->pred_back_edge))
			{
				// Cannot promote directly. Can only promote if idom is entered once.
				return execution_path_is_single_entry_and_dominates_exit(idom, pdom);
			}
		}

		idom = outermost_loop_header;

		auto analysis = analyze_loop(outermost_loop_header);
		auto merge = analyze_loop_merge(outermost_loop_header, analysis);
		if (!merge.merge || !merge.dominated_merge || merge.infinite_continue_ladder ||
		    merge.merge != merge.dominated_merge)
		{
			return false;
		}
		else
		{
			pdom = merge.merge;
		}

		// We must insert the lock before entering loop.
		// This only works if we have exactly one pred and that pred directly branches to us.
		if (idom->pred.size() == 1 && idom->pred.front()->ir.terminator.type == Terminator::Type::Branch)
			idom = idom->pred.front();
		else
			return false;
	}

	return true;
}

bool CFGStructurizer::execution_path_is_single_entry_and_dominates_exit(CFGNode *idom, CFGNode *pdom)
{
	if (!idom->dominates_all_reachable_exits())
		return false;

	pdom = CFGNode::find_common_post_dominator(pdom, idom);

	bool internal_early_return = !pdom || pdom->immediate_post_dominator == pdom;
	if (internal_early_return)
		return false;

	// If we're dominating all reachable exits despite being inside a loop, it's okay to use ROV as-is.
	// We have proven that this path will only be executed once per thread.
	// We will have to make sure that this exit path doesn't loop itself.
	// Just prove this by making sure there are no back-edges on the path from idom to pdom.
	// If there are back-edges that loop back to an earlier header, that is covered by dominates_all_reachable_exits.

	if (idom->pred_back_edge || !idom->dominates(pdom))
		return false;

	while (pdom != idom)
	{
		if (pdom->pred_back_edge)
			return false;
		pdom = pdom->immediate_dominator;
	}

	return true;
}

void CFGStructurizer::flatten_subgroup_shuffles()
{
	recompute_cfg();

	// Look for cases where shuffles happen inside small branches.
	// This comes up due to HLSL's short-cicruit rules.
	for (auto *n : forward_post_visit_order)
	{
		// Only care about blocks which don't dominate anything.
		if (n->succ.size() != 1 || n->dominance_frontier.size() != 1 || n->dominance_frontier.front() != n->succ.front())
			continue;
		if (n->pred.size() != 1)
			continue;
		if (!n->pred.front()->dominates(n->succ.front()))
			continue;
		if (n->pred.front()->succ.size() != 2)
			continue;

		// There's a limit to how much we want to peephole.
		if (n->ir.operations.size() > 4)
			continue;

		// We don't want to hoist if both sides of the branch have meaningful work associated with them.
		auto *succ = n->succ.front();
		auto *pred = n->pred.front();
		auto *sibling0 = pred->succ[0];
		auto *sibling1 = pred->succ[1];

		if (sibling0 != succ && sibling0 != n && !sibling0->ir.operations.empty())
			continue;
		if (sibling1 != succ && sibling1 != n && !sibling1->ir.operations.empty())
			continue;

		// Now we've detected:
		// if (blah) { a = shuffle(); } phi(a);

		bool has_dubious_shuffle = false;

		for (auto *op : n->ir.operations)
		{
			if (op->op == spv::OpGroupNonUniformShuffle || op->op == spv::OpGroupNonUniformBroadcast)
			{
				for (auto &phi : n->succ.front()->ir.phi)
				{
					for (auto &incoming : phi.incoming)
					{
						if (incoming.id == op->id)
						{
							has_dubious_shuffle = true;
							goto out;
						}
					}
				}
			}
		}
	out:

		if (has_dubious_shuffle)
		{
			// Now the question is if it's safe to do this. There can be nothing control dependent (except for shuffles).
			for (auto *op : n->ir.operations)
			{
				if (op->op == spv::OpGroupNonUniformShuffle ||
				    op->op == spv::OpGroupNonUniformBroadcast)
					continue;

				if (op->op == spv::OpLoad)
				{
					// Only allow loads if it's loading from plain OpVariables.
					// Hoisting a buffer read is not acceptable.
					if (!module.get_builder().hasDecoration(op->arguments[0], spv::DecorationBuiltIn))
					{
						has_dubious_shuffle = false;
						break;
					}
				}

				if (SPIRVModule::opcode_is_control_dependent(op->op) || op->id == 0 ||
				    SPIRVModule::opcode_has_side_effect_and_result(op->op))
				{
					has_dubious_shuffle = false;
					break;
				}
			}
		}

		if (has_dubious_shuffle)
		{
			for (auto *op : n->ir.operations)
				n->pred.front()->ir.operations.push_back(op);
			n->ir.operations.clear();
		}
	}
}

void CFGStructurizer::rewrite_auto_group_shared_barrier()
{
	recompute_cfg();

	enum class Kind { None, Load, Store, Atomic };

	struct Block
	{
		CFGNode *node;
		const CFGNode *innermost_loop;
		Kind pre_kind;
		Kind post_kind;
	};

	// In linear traversal order, find all BBs that use group shared.
	Vector<Block> shared_blocks;

	for (size_t i = forward_post_visit_order.size(); i; i--)
	{
		auto *node = forward_post_visit_order[i - 1];
		for (auto *op : node->ir.operations)
		{
			if ((op->flags & Operation::AutoGroupSharedBarrier) != 0)
			{
				shared_blocks.push_back({ node, get_innermost_loop_header_for(node), Kind::None, Kind::None });
				break;
			}
		}
	}

	// Deal with intra-BB hazards.
	for (auto &block : shared_blocks)
	{
		Kind pending = Kind::None;

		// If we're the first BB to access shared, no need for a post block.
		// Similar for the last block.
		// Loops can complicate this analysis, but ... eh.
		// This is a workaround, not required by spec or anything.

		for (auto *op : block.node->ir.operations)
		{
			if ((op->flags & Operation::AutoGroupSharedBarrier) != 0)
			{
				if (op->op == spv::OpLoad || op->op == spv::PseudoOpMaskedLoad)
				{
					if (pending != Kind::Load && pending != Kind::None)
						op->flags |= Operation::SubgroupSyncPre;
					pending = Kind::Load;
				}
				else if (op->op == spv::OpStore || op->op == spv::PseudoOpMaskedStore)
				{
					if (pending != Kind::Store && pending != Kind::None)
						op->flags |= Operation::SubgroupSyncPre;
					pending = Kind::Store;
				}
				else
				{
					if (pending != Kind::Atomic && pending != Kind::None)
						op->flags |= Operation::SubgroupSyncPre;
					pending = Kind::Atomic;
				}

				if (block.pre_kind == Kind::None)
					block.pre_kind = pending;
			}
		}

		block.post_kind = pending;
	}

	for (size_t i = 0; i < shared_blocks.size(); i++)
	{
		auto &first = shared_blocks[i];

		for (size_t j = i + 1; j < shared_blocks.size(); j++)
		{
			auto &second = shared_blocks[j];
			if (!query_reachability(*first.node, *second.node))
				continue;

			if (first.post_kind != second.pre_kind)
			{
				// Find an intermediate block which:
				// - post dominates the first
				// - dominates the second
				// Has the maximal number of invocations.
				// The subgroup barrier should be run with as many threads as possible.
				if (second.node->post_dominates(first.node))
					second.node->ir.operations.front()->flags |= Operation::SubgroupSyncPre;
				else if (first.node->dominates(second.node))
					first.node->ir.operations.back()->flags |= Operation::SubgroupSyncPost;
				else
				{
					// Try to find some intermediate node. If we cannot find it, just yolo in a barrier
					// somewhere. This is just a workaround, so if it doesn't work 100%, it's not a big deal.
					auto *pdom = first.node->immediate_post_dominator;
					while (pdom && query_reachability(*pdom, *second.node) && !pdom->dominates(second.node) &&
					       pdom->immediate_post_dominator && pdom->immediate_post_dominator != pdom)
					{
						pdom = pdom->immediate_post_dominator;
					}

					if (pdom && pdom != second.node)
					{
						if (pdom->ir.operations.empty())
						{
							auto *nop = module.allocate_op(spv::OpNop);
							nop->flags |= Operation::SubgroupSyncPost;
							pdom->ir.operations.push_back(nop);
						}
						else
							pdom->ir.operations.back()->flags |= Operation::SubgroupSyncPost;
					}
					else if (pdom == second.node)
					{
						second.node->ir.operations.front()->flags |= Operation::SubgroupSyncPre;
					}
				}

				// We've added appropriate barriers for this node now.
				second.pre_kind = Kind::None;
			}

			break;
		}

		// Analyze re-entrant code. We may depend on memory coming from an earlier loop iteration.
		if (first.pre_kind != Kind::None && first.innermost_loop != entry_block && first.innermost_loop->pred_back_edge)
		{
			bool has_complex_dependency = false;
			// Other blocks within the loop may require a dependency.
			for (size_t j = i + 1; j < shared_blocks.size() && !has_complex_dependency; j++)
			{
				if (query_reachability(*shared_blocks[j].node, *first.innermost_loop->pred_back_edge))
				{
					first.node->ir.operations.front()->flags |= Operation::SubgroupSyncPre;
					has_complex_dependency = true;
				}
			}

			if (!has_complex_dependency && first.pre_kind != first.post_kind)
			{
				// Self-dependency within the BB.
				first.node->ir.operations.back()->flags |= Operation::SubgroupSyncPost;
			}
		}
	}
}

bool CFGStructurizer::rewrite_rov_lock_region()
{
	recompute_cfg();

	// First, find all BBs that use ROV.
	Vector<CFGNode *> rov_blocks;

	for (auto *node : forward_post_visit_order)
	{
		for (auto &op : node->ir.operations)
		{
			if (op->op == spv::OpBeginInvocationInterlockEXT)
			{
				rov_blocks.push_back(node);
				break;
			}
		}
	}

	// If we declare ROVs but never actually use them ... *shrug*
	if (rov_blocks.empty())
		return true;

	// Rules: OpBegin and OpEnd must be dynamically called exactly once.
	// To simplify, we want to only emit one begin and one end that covers the entire shader.
	// Usually ROV access is constrained to a single BB as a simple case.
	// Simple BB case fails with control flow. E.g. a loop or conditional. In this case we must widen the range
	// of the lock such that: end post-dominates begin. Begin post-dominates entry.
	// If we cannot make this work, flag as non-trivial and wrap the entire shader in a big lock.

	auto *idom = rov_blocks.front();
	for (size_t i = 1; i < rov_blocks.size() && idom; i++)
		idom = CFGNode::find_common_dominator(idom, rov_blocks[i]);

	// Stretch scope as long as we don't post-dominate entry.
	while (idom && idom != entry_block && !idom->post_dominates(entry_block))
		idom = idom->immediate_dominator;

	auto *pdom = find_common_post_dominator(rov_blocks);

	if (!pdom || !idom ||
	    !find_single_entry_exit_lock_region(idom, pdom, rov_blocks) ||
	    !idom->dominates(pdom))
	{
		idom = nullptr;
		pdom = nullptr;
	}

	// Stretch post-dominator if we need to.
	if (idom && pdom)
		pdom = CFGNode::find_common_post_dominator(pdom, idom);

	bool internal_early_return = !pdom || pdom->immediate_post_dominator == pdom;

	// Non trivial case.
	if (!idom || !pdom || internal_early_return)
	{
		for (auto *node : rov_blocks)
			scrub_rov_lock_regions(node, false, false);
		return false;
	}

	bool begin_block_has_lock = std::find(rov_blocks.begin(), rov_blocks.end(), idom) != rov_blocks.end();
	bool end_block_has_lock = std::find(rov_blocks.begin(), rov_blocks.end(), pdom) != rov_blocks.end();

	for (auto *node : rov_blocks)
		scrub_rov_lock_regions(node, node == idom, node == pdom);

	if (!begin_block_has_lock)
		idom->ir.operations.push_back(module.allocate_op(spv::OpBeginInvocationInterlockEXT));

	if (!end_block_has_lock)
		pdom->ir.operations.insert(pdom->ir.operations.begin(), module.allocate_op(spv::OpEndInvocationInterlockEXT));

	return true;
}

void CFGStructurizer::rewrite_multiple_back_edges()
{
	reset_traversal();
	visit_for_back_edge_analysis(*entry_block);
}

void CFGStructurizer::sink_ssa_constructs()
{
	sink_ssa_constructs_run(true);
	sink_ssa_constructs_run(false);
}

void CFGStructurizer::sink_ssa_constructs_run(bool dry_run)
{
	// First, propagate sinkability state to any operation that uses a sinkable SSA.
	// If an SSA expression is used in a BB, but that use of the SSA can be sunk, we need to
	// sink everything as a group.
	Vector<spv::Id> sinkable_ops;

	struct RewriteState
	{
		CFGNode *consumed_block;
		Operation *op;
	};
	UnorderedMap<spv::Id, RewriteState> sinks;

	for (auto *n : forward_post_visit_order)
	{
		sinkable_ops.clear();

		auto &ops = n->ir.operations;
		for (auto *op : ops)
		{
			if ((op->flags & Operation::SinkableBit) != 0)
			{
				sinkable_ops.push_back(op->id);
				sinks[op->id] = { nullptr, op };
			}
			else if (op->id && !SPIRVModule::opcode_is_control_dependent(op->op) &&
			         !SPIRVModule::opcode_has_side_effect_and_result(op->op))
			{
				// We cannot sink any opcode which is control dependent, or has side effects.
				for (uint32_t i = 0; i < op->num_arguments; i++)
				{
					if ((op->literal_mask & (1u << i)) != 0)
						continue;

					spv::Id consumed_id = op->arguments[i];
					if (std::find(sinkable_ops.begin(), sinkable_ops.end(), consumed_id) != sinkable_ops.end())
					{
						sinkable_ops.push_back(op->id);
						op->flags |= Operation::DependencySinkableBit;
						sinks[op->id] = { nullptr, op };
						break;
					}
				}
			}
			else if (op->op == spv::OpControlBarrier || op->op == spv::OpMemoryBarrier)
			{
				// We cannot sink beyond this barrier. Invalidate every sinkable op we saw so far.
				for (spv::Id id : sinkable_ops)
				{
					auto *op_ptr = sinks[id].op;
					assert(op_ptr);
					op_ptr->flags &= ~(Operation::SinkableBit | Operation::DependencySinkableBit);
				}
				sinkable_ops.clear();
			}
		}
	}

	// If an expression is used as a PHI input assume we cannot sink.
	// It gets a bit awkward to deal with this, and it's not required for this workaround pass.
	for (auto *n : forward_post_visit_order)
	{
		for (auto &phi : n->ir.phi)
		{
			for (auto &incoming : phi.incoming)
			{
				auto itr = sinks.find(incoming.id);
				if (itr != sinks.end())
				{
					auto *op_ptr = itr->second.op;
					assert(op_ptr);
					op_ptr->flags &= ~(Operation::SinkableBit | Operation::DependencySinkableBit);
				}
			}
		}
	}

	const auto consume_id = [&](spv::Id consumed_id, CFGNode *n) {
		auto itr = sinks.find(consumed_id);
		if (itr != sinks.end())
		{
			if (!itr->second.consumed_block)
				itr->second.consumed_block = n;
			else if (itr->second.consumed_block != n)
				itr->second.op->flags &= ~(Operation::SinkableBit | Operation::DependencySinkableBit);
		}
	};

	const auto path_is_reorderable = [&](const CFGNode *src, const CFGNode *dst) {
		// There cannot be any control or memory barriers along the way, or we have to be conservative.

		// There is absolutely no point in sinking if dst ends up post-dominating src anyway.
		// We cannot avoid any bug from happening.
		if (dst->post_dominates(src))
			return false;

		// Never sink into a loop.
		if (dst->pred_back_edge)
			return false;

		// Could deal with multiple preds, but we mostly just care about trivial sinks.
		if (dst->pred.size() > 1)
			return false;
		dst = dst->immediate_dominator;

		while (src != dst)
		{
			if (dst->pred.size() > 1 || dst->pred_back_edge)
				return false;

			for (auto *op : dst->ir.operations)
				if (op->op == spv::OpControlBarrier || op->op == spv::OpMemoryBarrier)
					return false;

			dst = dst->pred.front();
		}

		// We reached src, and we validated that block already when deciding on what is sinkable or not, so we're good.
		return true;
	};

	// Walk all instructions in reverse order.
	// We can sink an instruction if:
	// - An ID was only consumed in a BB != generating BB.
	//   The consumed BB must be unique for us to consider it for simplicity.
	for (auto *n : forward_post_visit_order)
	{
		if (n->ir.terminator.type == Terminator::Type::Condition ||
		    n->ir.terminator.type == Terminator::Type::Switch)
		{
			consume_id(n->ir.terminator.conditional_id, n);
		}
		else if (n->ir.terminator.type == Terminator::Type::Return &&
		         n->ir.terminator.return_value != 0)
		{
			consume_id(n->ir.terminator.return_value, n);
		}

		auto &ops = n->ir.operations;

		for (size_t i = ops.size(); i; i--)
		{
			auto *op = ops[i - 1];
			auto *target_block = n;

			if (op->id && (op->flags & (Operation::SinkableBit | Operation::DependencySinkableBit)) != 0)
			{
				auto sink_itr = sinks.find(op->id);

				if (sink_itr != sinks.end() &&
				    sink_itr->second.consumed_block &&
				    sink_itr->second.consumed_block != n &&
				    path_is_reorderable(n, sink_itr->second.consumed_block))
				{
					// Move the operation to the beginning of the consumed block instead.
					target_block = sink_itr->second.consumed_block;

					// Don't actually move the instruction until we have confirmed the entire chain can be sunk,
					// otherwise this exercise is meaningless.
					if (!dry_run)
					{
						target_block->ir.operations.insert(target_block->ir.operations.begin(), op);
						ops.erase(ops.begin() + int(i - 1));
					}
				}
				else
				{
					// This failed to sink. Remember this for the next run.
					op->flags &= ~Operation::SinkableBit;
				}
			}

			// Mark uses after we have sunk the instruction. This allows us to sink a chain of SSA instructions.
			for (uint32_t j = 0; j < op->num_arguments; j++)
				if ((op->literal_mask & (1u << j)) == 0)
					consume_id(op->arguments[j], target_block);
		}
	}

	if (dry_run)
		for (auto *n : forward_post_visit_order)
			for (auto *op : n->ir.operations)
				op->flags &= ~Operation::DependencySinkableBit;
}

void CFGStructurizer::propagate_branch_control_hints()
{
	for (auto *n : forward_post_visit_order)
	{
		if (n->pred_back_edge)
		{
			if (n->pred_back_edge->ir.terminator.force_loop)
				n->ir.merge_info.loop_control_mask = spv::LoopControlDontUnrollMask;
			else if (n->pred_back_edge->ir.terminator.force_unroll)
				n->ir.merge_info.loop_control_mask = spv::LoopControlUnrollMask;
		}

		if (n->ir.terminator.type == Terminator::Type::Condition)
		{
			if (n->ir.terminator.force_flatten)
				n->ir.merge_info.selection_control_mask = spv::SelectionControlFlattenMask;
			else if (n->ir.terminator.force_branch)
				n->ir.merge_info.selection_control_mask = spv::SelectionControlDontFlattenMask;
		}

		// Both are possible if a selection construct is also a loop header.
	}
}

void CFGStructurizer::remove_unused_ssa()
{
	UnorderedSet<spv::Id> removed_ids;
	UnorderedSet<spv::Id> used_ids;

	for (auto *node : forward_post_visit_order)
	{
		for (auto &phi : node->ir.phi)
			for (auto &incoming : phi.incoming)
				used_ids.insert(incoming.id);

		for (auto *op : node->ir.operations)
			for (unsigned i = 0; i < op->num_arguments; i++)
				if ((op->literal_mask & (1u << i)) == 0)
					used_ids.insert(op->arguments[i]);

		if (node->ir.terminator.conditional_id)
			used_ids.insert(node->ir.terminator.conditional_id);

		if (node->ir.terminator.type == Terminator::Type::Return &&
		    node->ir.terminator.return_value != 0)
		{
			used_ids.insert(node->ir.terminator.return_value);
		}
	}

	for (auto *node : forward_post_visit_order)
	{
		node->ir.phi.erase(std::remove_if(node->ir.phi.begin(), node->ir.phi.end(),
		                                  [&](const PHI &phi) { return used_ids.count(phi.id) == 0; }),
		                   node->ir.phi.end());

		node->ir.operations.erase(
			std::remove_if(
				node->ir.operations.begin(), node->ir.operations.end(),
				[&](const Operation *op)
				{
					bool ret = op->id != 0 &&
					           !SPIRVModule::opcode_has_side_effect_and_result(op->op) &&
					           used_ids.count(op->id) == 0;
					if (ret)
						removed_ids.insert(op->id);
					return ret;
				}),
			node->ir.operations.end());
	}

	module.get_builder().removeDecorations(removed_ids);
}

bool CFGStructurizer::rewrite_impossible_back_edges()
{
	bool did_rewrite = false;

	for (auto *node : forward_post_visit_order)
	{
		if (!node->succ_back_edge)
			continue;

		// Make sure that the continue block in question branches to the innermost loop header.
		// If this is not the case, it is not a valid structured CFG.
		// In unstructured CFG, as long as the continue block cannot reach the back-edge of any inner loop constructs,
		// it's technically not considered part of their loops, even if the loops dominate it.
		// Utter nonsense ... >_<
		// The only viable solution is to transpose out the continue block and use ladder selection
		// to resolve the control flow.

		auto *header = get_innermost_loop_header_for(node);
		if (header == node->succ_back_edge)
			continue;

		// Make sure that we're in valid unstructured control flow. Our node cannot reach any back edge on the way,
		// meaning it's okay to transpose code. If the continue block can reach us, it means we're already
		// outside the loop, stop any attempt to transpose.
		const auto validate_header_suitability = [this, node](const CFGNode *header) {
			return !query_reachability(*node, *header->pred_back_edge) &&
			       !query_reachability(*header->pred_back_edge, *node);
		};

		// Find a more appropriate place to put it.
		// We want to rewrite the flow so that the continue block lives outside any inner scopes.
		// The succ of the outer continue block is appropriate.
		const CFGNode *next_header;
		while ((next_header = get_innermost_loop_header_for(header->immediate_dominator)) != node->succ_back_edge &&
		       validate_header_suitability(next_header))
		{
			header = next_header;
		}

		if (next_header != node->succ_back_edge || !validate_header_suitability(header))
			continue;

		auto *outer_continue = header->pred_back_edge;

		// The outer continue must have a normal succ.
		if (outer_continue->succ.size() != 1)
			continue;

		// This succ is now in the loop scope of node->succ_back_edge. We can do the continue construct here.
		auto *succ = outer_continue->succ.front();
		auto *ladder = create_helper_pred_block(succ);

		auto orig_preds = node->pred;
		traverse_dominated_blocks_and_rewrite_branch(node->succ_back_edge, node, ladder);
		rewrite_ladder_conditional_branch_from_incoming_blocks(
			ladder, node, succ,
			[&orig_preds](const CFGNode *n) { return std::find(orig_preds.begin(), orig_preds.end(), n) != orig_preds.end(); },
			"tranpose_continue_phi");

		did_rewrite = true;
		break;
	}

	if (did_rewrite)
		recompute_cfg();
	return did_rewrite;
}

bool CFGStructurizer::run_trivial()
{
	recompute_cfg();
	sink_ssa_constructs();
	propagate_branch_control_hints();
	// Remove unused SSA ops in this path.
	remove_unused_ssa();

	return true;
}

bool CFGStructurizer::run()
{
	String graphviz_path;
	if (const char *env = getenv("DXIL_SPIRV_GRAPHVIZ_PATH"))
		graphviz_path = env;

	// We make the assumption during traversal that there is only one back edge.
	// Fix this up here.
	rewrite_multiple_back_edges();

	//log_cfg("Input state");
	if (!graphviz_path.empty())
	{
		reset_traversal();
		visit(*entry_block);
		auto graphviz_input = graphviz_path + ".input";
		log_cfg_graphviz(graphviz_input.c_str());
	}

	recompute_cfg();
	sink_ssa_constructs();
	propagate_branch_control_hints();

	cleanup_breaking_phi_constructs();

	if (!graphviz_path.empty())
	{
		auto graphviz_split = graphviz_path + ".phi-split";
		log_cfg_graphviz(graphviz_split.c_str());
	}

	while (cleanup_breaking_return_constructs())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_split = graphviz_path + ".break-return";
			log_cfg_graphviz(graphviz_split.c_str());
		}
	}

	create_continue_block_ladders();

	while (serialize_interleaved_early_returns())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_split = graphviz_path + ".serialize-early-return";
			log_cfg_graphviz(graphviz_split.c_str());
		}
	}

	while (serialize_interleaved_merge_scopes_aggressive())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_split = graphviz_path + ".serialize-aggressive";
			log_cfg_graphviz(graphviz_split.c_str());
		}
	}

	while (serialize_interleaved_merge_scopes())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_split = graphviz_path + ".serialize";
			log_cfg_graphviz(graphviz_split.c_str());
		}
	}

	split_merge_scopes();
	recompute_cfg();

	//log_cfg("Split merge scopes");
	if (!graphviz_path.empty())
	{
		auto graphviz_split = graphviz_path + ".split";
		log_cfg_graphviz(graphviz_split.c_str());
	}

	// We will have generated lots of ladder blocks
	// which might cause issues with further analysis, so
	// nuke them as required.
	eliminate_degenerate_blocks();

	if (!graphviz_path.empty())
	{
		auto graphviz_split = graphviz_path + ".eliminate0";
		log_cfg_graphviz(graphviz_split.c_str());
	}

	// Similar to cleanup_breaking_phi_constructs() in spirit,
	// but here we are forced to duplicate code blocks to make it work.
	duplicate_impossible_merge_constructs();

	//log_cfg("Split impossible merges");
	if (!graphviz_path.empty())
	{
		auto graphviz_split = graphviz_path + ".duplicate";
		log_cfg_graphviz(graphviz_split.c_str());
	}

	while (rewrite_transposed_loops())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_split = graphviz_path + ".transpose-loop-rewrite";
			log_cfg_graphviz(graphviz_split.c_str());
		}
	}

	// If there are back-edges that punch through multiple loop headers, fix this up.
	while (rewrite_impossible_back_edges())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_split = graphviz_path + ".impossible-continue";
			log_cfg_graphviz(graphviz_split.c_str());
		}
	}

	//LOGI("=== Structurize pass ===\n");
	while (structurize(0))
	{
		recompute_cfg();
		if (!graphviz_path.empty())
		{
			auto graphviz_final = graphviz_path + ".partial-struct0";
			log_cfg_graphviz(graphviz_final.c_str());
		}
	}

	update_structured_loop_merge_targets();

	//log_cfg("Structurize pass 0");
	if (!graphviz_path.empty())
	{
		auto graphviz_final = graphviz_path + ".struct0";
		log_cfg_graphviz(graphviz_final.c_str());
	}

	// We will have generated lots of ladder blocks
	// which might cause issues with further analysis, so
	// nuke them as required.
	eliminate_degenerate_blocks();

	//log_cfg("Split merge scopes");
	if (!graphviz_path.empty())
	{
		auto graphviz_split = graphviz_path + ".eliminate1";
		log_cfg_graphviz(graphviz_split.c_str());
	}

	//LOGI("=== Structurize pass ===\n");
	structurize(1);

	if (!graphviz_path.empty())
	{
		auto graphviz_final = graphviz_path + ".struct1";
		log_cfg_graphviz(graphviz_final.c_str());
	}

	bool need_restructure = false;
	while (rewrite_invalid_loop_breaks())
	{
		if (!graphviz_path.empty())
		{
			auto graphviz_final = graphviz_path + ".loop-break-rewrite";
			log_cfg_graphviz(graphviz_final.c_str());
		}

		need_restructure = true;
	}

	if (need_restructure)
	{
		// Need to redo the final structurization pass if we end up here.
		structurize(1);
	}

	//log_cfg("Final");
	if (!graphviz_path.empty())
	{
		auto graphviz_final = graphviz_path + ".final";
		log_cfg_graphviz(graphviz_final.c_str());
	}

	insert_phi();

	return true;
}

CFGNode *CFGStructurizer::get_entry_block() const
{
	return entry_block;
}

static bool block_is_control_dependent(const CFGNode *node)
{
	for (auto *op : node->ir.operations)
		if (SPIRVModule::opcode_is_control_dependent(op->op))
			return true;
	return false;
}

bool CFGStructurizer::continue_block_can_merge(CFGNode *node) const
{
	const CFGNode *pred_candidate = nullptr;
	auto *header = node->succ_back_edge;

	// This algorithm is very arbitrary and should be seen as a nasty heuristic which solves real shaders
	// we see in the wild. It's probably safe to block continue merge in far more cases than this, but we
	// want to be maximally convergent as often as we can.

	for (auto *pred : node->pred)
	{
		// This is the merge block of another inner loop, we really need an intermediate merge.
		if (pred->succ_back_edge && header != pred->succ_back_edge && header->dominates(pred->succ_back_edge))
			return true;
	}

	// Plain continue block that does nothing useful. No point merging this.
	// A continue block's succ is sometimes used to aid analysis and simplify other passes,
	// use terminator here explicitly.
	if (node->ir.operations.empty() && node->ir.terminator.type == Terminator::Type::Branch)
		return false;

	if (header->ir.terminator.type == Terminator::Type::Switch)
	{
		// If the loop header is also a switch statement, there can be some nasty edge cases.
		// We likely never intend for the continue block to be maximally convergent here
		// if the natural merge block is not the continue block.
		auto *merge = find_common_post_dominator(header->succ);
		auto *natural_merge = find_natural_switch_merge_block(header, merge);
		if (merge == node && natural_merge != merge)
			return false;
	}

	for (auto *pred : node->pred)
	{
		// If we have a situation where a continue block has a pred which is itself a selection merge target, that
		// block is the merge target where we follow maximum convergence.
		// The candidate must be inside loop body and not the header itself.
		// Neither continue block nor merge target have any dominance relationship.

		if (pred->num_forward_preds() >= 2 && pred->succ.size() >= 2 &&
		    header != pred && !pred->dominates(node) &&
		    !node->post_dominates(pred))
		{
			// If execution does not merge up right at the natural break block,
			// things will get very complicated.
			// In practice, we can handle merges as long as the candidate just breaks out normally.
			// If not, we have to introduce ladder breaking and this is (almost) impossible to get right.
			auto *common_post_dominator = CFGNode::find_common_post_dominator(node, pred);
			if (common_post_dominator &&
			    std::find(node->succ.begin(), node->succ.end(), common_post_dominator) == node->succ.end())
			{
				pred_candidate = pred;
			}
		}

		// If we have a situation where a switch block inside our loop uses the continue block
		// as a continue target, it's important that we keep this block as a continue block,
		// otherwise, it will complicate the switch block greatly.
		if (pred->ir.terminator.type == Terminator::Type::Switch && !node->post_dominates(pred))
			return false;
	}

	// No obviously nasty case to handle, probably safe to let the algorithm do its thing ...
	if (!pred_candidate)
		return true;

	// Need to find another escape edge which is neither header nor the candidate.
	bool found_another_escape_edge = false;
	for (auto *pred : node->pred)
	{
		if (pred != header && pred != pred_candidate && !pred->dominates(node))
		{
			found_another_escape_edge = true;
			break;
		}
	}

	// If we have yet another escape edge, we probably cannot merge to continue ...
	return !found_another_escape_edge;
}

void CFGStructurizer::create_continue_block_ladders()
{
	// It does not seem to be legal to merge directly to continue blocks.
	// To make it possible to merge execution, we need to create a ladder block which we can merge to.
	// There are certain scenarios where it is impossible to merge to a continue block.
	// In this case, we will abandom maximum convergence and use the continue block as a "break"-like target.
	bool need_recompute_cfg = false;
	for (auto *node : forward_post_visit_order)
	{
		if (block_is_plain_continue(node) && continue_block_can_merge(node))
		{
			//LOGI("Creating helper pred block for continue block: %s\n", node->name.c_str());
			create_helper_pred_block(node);
			need_recompute_cfg = true;
		}
	}

	if (need_recompute_cfg)
		recompute_cfg();
}

void CFGStructurizer::update_structured_loop_merge_targets()
{
	// First, we need to do this before recomputing the CFG, since we lose
	// normal loop merge targets when recomputing.
	structured_loop_merge_targets.clear();
	for (auto *node : forward_post_visit_order)
	{
		if (node->loop_merge_block)
			structured_loop_merge_targets.insert(node->loop_merge_block);
		if (node->loop_ladder_block)
			structured_loop_merge_targets.insert(node->loop_ladder_block);
	}

	recompute_cfg();

	// Make sure we include merge blocks which are frozen merge targets in ladder blocks, which
	// were not included in the post visit order yet.
	for (auto *node : forward_post_visit_order)
	{
		if (node->loop_merge_block)
			structured_loop_merge_targets.insert(node->loop_merge_block);
		if (node->loop_ladder_block)
			structured_loop_merge_targets.insert(node->loop_ladder_block);
	}
}

static spv::Id get_remapped_id_for_duplicated_block(spv::Id id, const UnorderedMap<spv::Id, spv::Id> &remap)
{
	auto itr = remap.find(id);
	if (itr != remap.end())
		return itr->second;
	else
		return id;
}

Operation *CFGStructurizer::duplicate_op(Operation *op, UnorderedMap<spv::Id, spv::Id> &id_remap)
{
	Operation *duplicated_op;
	if (op->id)
		duplicated_op = module.allocate_op(op->op, module.allocate_id(), op->type_id);
	else
		duplicated_op = module.allocate_op(op->op);

	for (unsigned i = 0; i < op->num_arguments; i++)
	{
		if (op->literal_mask & (1u << i))
			duplicated_op->add_literal(op->arguments[i]);
		else
			duplicated_op->add_id(get_remapped_id_for_duplicated_block(op->arguments[i], id_remap));
	}

	if (op->id)
		id_remap[op->id] = duplicated_op->id;

	return duplicated_op;
}

bool CFGStructurizer::can_duplicate_phis(const CFGNode *node)
{
	// If we want to duplicate nodes, we cannot do so in complicated scenarios where
	// we need to resolve PHIs. For example, if a node is split, the split nodes might have to
	// insert PHI nodes covering the subset of nodes which can reach each split.
	// This get very hairy, very quickly.
	// To check this, ensure that the node we want to split does not require any complex PHI handling.

	// First, validate that we can even find incoming values properly.
	for (auto *pred : node->pred)
	{
		for (auto &phi : node->ir.phi)
		{
			auto itr = find_incoming_value(pred, phi.incoming);
			if (itr == phi.incoming.end())
				return false;
		}
	}

	// Then, make sure that every incoming value dominates at least pred of node.
	// This way, we know that we don't need complicated PHI frontier merges along the way.
	for (auto &phi : node->ir.phi)
	{
		for (auto &incoming : phi.incoming)
		{
			bool dominates_at_least_one_pred = false;
			for (auto *pred : node->pred)
			{
				if (incoming.block->dominates(pred))
				{
					dominates_at_least_one_pred = true;
					break;
				}
			}

			if (!dominates_at_least_one_pred)
				return false;
		}
	}

	return true;
}

void CFGStructurizer::duplicate_node(CFGNode *node)
{
	Vector<UnorderedMap<spv::Id, spv::Id>> rewritten_ids;
	assert(node->succ.size() == 1);
	assert(node->pred.size() >= 2);
	assert(!node->dominates(node->succ.front()));

	Vector<CFGNode *> break_blocks(node->pred.size());
	rewritten_ids.resize(node->pred.size());
	auto *succ = node->succ.front();

	auto tmp_pred = node->pred;
	for (size_t i = 0, n = tmp_pred.size(); i < n; i++)
	{
		auto *pred = tmp_pred[i];
		auto &remap = rewritten_ids[i];

		// First, rewrite PHI inputs.
		// Since we only have one pred now, we can resolve PHIs directly.
		auto *block = pool.create_node();
		block->name = node->name + ".dup." + pred->name;
		block->ir.terminator.type = Terminator::Type::Branch;
		block->ir.terminator.direct_block = succ;
		block->immediate_post_dominator = succ;
		block->immediate_dominator = pred;
		pred->retarget_branch(node, block);
		block->add_branch(succ);

		for (auto &phi : node->ir.phi)
		{
			auto itr = find_incoming_value(pred, phi.incoming);
			assert(itr != phi.incoming.end());
			remap[phi.id] = itr->id;
		}

		UnorderedSet<spv::Id> remove_decoration_ids;

		for (auto *op : node->ir.operations)
		{
			auto *dup_op = duplicate_op(op, remap);
			bool nocontract = module.get_builder().hasDecoration(op->id, spv::DecorationNoContraction);
			if (nocontract)
			{
				remove_decoration_ids.insert(op->id);
				module.get_builder().addDecoration(dup_op->id, spv::DecorationNoContraction);
			}
			block->ir.operations.push_back(dup_op);
		}

		module.get_builder().removeDecorations(remove_decoration_ids);

		break_blocks[i] = block;
	}

	assert(node->pred.empty());

	// Finally, look at succ. If it takes PHI inputs from node, we'll have to rewrite the PHIs.
	// We know that node does not dominate succ,
	// so succ cannot use any SSA variables node generated directly
	// without using PHI nodes.

	// We might have placed ladders in between so that we need to fixup PHI later than just plain succ.
	// Chase down the chain and replace all PHIs.

	// First, collect all the succs that we are supposed to examine.
	// The list should also include succ_back_edge because it is not in the succ chain after recompute_cfg.
	Vector<CFGNode *> succs;
	while (succ)
	{
		if (succ->succ_back_edge)
			succs.push_back(succ->succ_back_edge);
		succs.push_back(succ);
		if (succ->succ.size() == 1)
			succ = succ->succ.front();
		else
			succ = nullptr;
	}

	for (auto *succ : succs)
	{
		bool done = false;
		for (auto &phi : succ->ir.phi)
		{
			// Find incoming ID from the block we're splitting up.
			auto incoming_itr = std::find_if(phi.incoming.begin(), phi.incoming.end(), [&](const IncomingValue &incoming) {
				return incoming.block == node;
			});

			if (incoming_itr != phi.incoming.end())
			{
				spv::Id incoming_from_node = incoming_itr->id;
				phi.incoming.erase(incoming_itr);

				for (size_t i = 0, n = tmp_pred.size(); i < n; i++)
				{
					auto &remap = rewritten_ids[i];
					phi.incoming.push_back({ break_blocks[i], get_remapped_id_for_duplicated_block(incoming_from_node, remap) });
				}

				// We've found the block we wanted to rewrite, terminate loop now.
				done = true;
			}
		}

		if (done)
			break;
	}
}

void CFGStructurizer::duplicate_impossible_merge_constructs()
{
	Vector<CFGNode *> duplicate_queue;

	for (size_t i = forward_post_visit_order.size(); i; i--)
	{
		auto *node = forward_post_visit_order[i - 1];

		// Never duplicate back-edges.
		if (node->succ_back_edge)
			continue;

		// Check for breaking merge blocks which were not considered degenerate.
		// This can happen if we actually have code in the breaking construct ... (scary!)
		// We'll have to split this block somehow.
		// If the candidate has control dependent effects like barriers and such,
		// this will likely break completely,
		// but I don't see how that would work on native drivers either ...

		// WARNING: This check is EXTREMELY sensitive and microscopic changes to the implementation
		// will dramatically affect codegen.
		bool breaking = merge_candidate_is_on_breaking_path(node);

		if (breaking && !node->ir.operations.empty() && !block_is_control_dependent(node))
			duplicate_queue.push_back(node);
	}

	if (duplicate_queue.empty())
		return;

	for (auto *node : duplicate_queue)
	{
		if (!can_duplicate_phis(node))
		{
			// A block could be subtly load bearing, in that if we split the node, it becomes impossible to resolve
			// PHIs and we hit assertions in duplicate_node().
			// This means the block is probably load bearing after all, and we should not split it.
			// Normally, we only want to break up blocks which have fairly trivial PHI resolves.
			LOGW("Was asked to duplicate node %s, but cannot split phis without crashing ...\n", node->name.c_str());
			continue;
		}

		duplicate_node(node);
	}
	recompute_cfg();
}

bool CFGStructurizer::ladder_chain_has_phi_dependencies(const CFGNode *succ, const CFGNode *node)
{
	while (succ)
	{
		for (auto &phi : succ->ir.phi)
			for (auto &incoming : phi.incoming)
				if (incoming.block == node)
					return true;

		if (succ->succ.size() == 1)
			succ = succ->succ.front();
		else
			succ = nullptr;
	}

	return false;
}

void CFGStructurizer::eliminate_degenerate_blocks()
{
	// After we create ladder blocks, we will likely end up with a lot of blocks which don't do much.
	// We might also have created merge scenarios which should *not* merge, i.e. cleanup_breaking_phi_constructs(),
	// except we caused it ourselves.

	// Eliminate bottom-up. First eliminate B, in A -> B -> C, where B contributes nothing.
	bool did_work = false;
	for (auto *node : forward_post_visit_order)
	{
		if (node->ir.operations.empty() &&
		    node->ir.phi.empty() &&
		    !node->pred_back_edge &&
		    !node->succ_back_edge &&
		    !node->is_pseudo_back_edge &&
		    node->succ.size() == 1 &&
		    node->ir.terminator.type == Terminator::Type::Branch &&
		    node->merge == MergeType::None &&
		    // Loop merge targets are sacred, and must not be removed.
		    structured_loop_merge_targets.count(node) == 0 &&
		    !ladder_chain_has_phi_dependencies(node->succ.front(), node))
		{
			auto check_is_load_bearing_continue_succ = [node](const CFGNode *n) {
				if (!n->succ_back_edge)
					return false;

				// If we eliminate the block, we want the succ to post-dominate the header,
				// so it can be considered a merge block.
				// Similarly, we want the header to dominate the succ.
				if (!node->succ.front()->post_dominates(n->succ_back_edge))
					return true;
				if (!n->succ_back_edge->dominates(node->succ.front()))
					return true;

				// No point in eliminating since we're inside the construct.
				if (n->dominates(node))
					return true;

				return false;
			};

			// If any pred is a continue block, this block is also load-bearing, since it can be used as a merge block.
			// Even if a continue block branches to us, it may be a fake load bearing block.
			// If the succ of node post-dominates the entire loop construct, we can eliminate the block safely
			// since we're not taking away a nice merge target.
			if (std::find_if(node->pred.begin(), node->pred.end(), check_is_load_bearing_continue_succ) != node->pred.end())
				continue;

			// We might be a viable merge target for an infinite loop. If we only have one pred, we're probably not
			// a painful break merge. Removing this block shouldn't be problematic for correctness, but removing
			// a block only to add back a ladder is a little silly.
			if (node->pred.size() == 1 && node->pred.front()->pred_back_edge &&
				node->pred.front()->pred_back_edge->succ.empty())
				continue;

			// If any succ is a continue block, this block is also load-bearing, since it can be used as a merge block
			// (merge-to-continue ladder).
			if (std::find_if(node->succ.begin(), node->succ.end(),
			                 [](const CFGNode *n) { return n->succ_back_edge != nullptr; }) != node->succ.end())
			{
				continue;
			}

			auto *succ = node->succ.front();

			if (node->pred.size() == 1 && node->post_dominates(node->pred.front()))
			{
				// Trivial case.
				did_work = true;
				auto *pred = node->pred.front();
				pred->retarget_branch(node, succ);

				pred->dominance_frontier.clear();
				// Propagates any idom information up to pred if pred dominates succ.
				recompute_dominance_frontier(succ);
				recompute_dominance_frontier(pred);
			}
			else if (merge_candidate_is_inside_continue_construct(node) || merge_candidate_is_on_breaking_path(node))
			{
				// If we have two or more preds, we have to be really careful.
				// If this node is on a breaking path, without being important for merging control flow,
				// it is fine to eliminate the block.
				did_work = true;
				auto tmp_pred = node->pred;
				for (auto *pred : tmp_pred)
					pred->retarget_branch_with_intermediate_node(node, node->succ.front());

				// Iteratively, we need to recompute the dominance frontier for all preds.
				// When we eliminate nodes like this, we might cause the pred blocks to become degenerate in
				// future iterations in this loop.
				std::sort(tmp_pred.begin(), tmp_pred.end(), [](const CFGNode *a, const CFGNode *b) {
					return a->forward_post_visit_order < b->forward_post_visit_order;
				});

				// Need to compute dominance frontiers from inside out.
				for (auto *pred : tmp_pred)
				{
					pred->dominance_frontier.clear();
					recompute_dominance_frontier(pred);
				}
			}
		}
	}

	if (did_work)
		recompute_cfg();
}

void CFGStructurizer::prune_dead_preds()
{
	// We do not want to see unreachable preds.
	// Having a pred means we need to map it to an incoming value when dealing with PHI.
	for (auto *node : forward_post_visit_order)
	{
		auto itr = std::remove_if(node->pred.begin(), node->pred.end(),
		                          [&](const CFGNode *node) { return reachable_nodes.count(node) == 0; });
		node->pred.erase(itr, node->pred.end());
	}
}

static void rewrite_consumed_ids(IRBlock &ir, spv::Id from, spv::Id to)
{
	for (auto *op : ir.operations)
	{
		for (unsigned i = 0; i < op->num_arguments; i++)
		{
			if ((op->literal_mask & (1u << i)) == 0)
				if (op->arguments[i] == from)
					op->arguments[i] = to;
		}
	}

	if (ir.terminator.conditional_id == from)
		ir.terminator.conditional_id = to;
	if (ir.terminator.return_value == from)
		ir.terminator.return_value = to;
}

void CFGStructurizer::fixup_loop_header_undef_phis()
{
	auto &builder = module.get_builder();

	recompute_cfg();

	// If the incoming value to the loop is undef, something is deeply wrong.
	// This is almost a guarantee that we will consume the value as undef, causing breakage in the wild.
	// Observed in Dune.
	for (auto *node : forward_post_visit_order)
	{
		if (!node->pred_back_edge)
			continue;

		for (auto &phi : node->ir.phi)
			for (auto &incoming : phi.incoming)
				if (incoming.block != node && incoming.block->dominates(node))
					if (const auto *inst = builder.getInstruction(incoming.id))
						if (inst->getOpCode() == spv::OpUndef)
							incoming.id = builder.makeNullConstant(phi.type_id);
	}
}

static bool type_class_is_opaque(spv::Op type_op)
{
	return type_op == spv::OpTypeImage || type_op == spv::OpTypeSampler ||
	       type_op == spv::OpTypeAccelerationStructureKHR;
}

void CFGStructurizer::fixup_broken_value_dominance()
{
	struct Origin
	{
		CFGNode *node;
		spv::Id type_id;
		const Operation *rematerialize_op;
	};

	UnorderedMap<spv::Id, Origin> origin;
	UnorderedMap<spv::Id, Vector<CFGNode *>> id_to_non_local_consumers;

	// First, scan through all blocks and figure out which block creates an ID.
	for (auto *node : forward_post_visit_order)
	{
		for (auto *op : node->ir.operations)
		{
			// OpVariable is always hoisted to function entry or above.
			// It can never not have dominance relationship.
			if (op->op != spv::OpVariable && op->id)
				origin[op->id] = { node, op->type_id, op->op == spv::OpSampledImage ? op : nullptr };
		}

		for (auto &phi : node->ir.phi)
			origin[phi.id] = { node, phi.type_id, nullptr };
	}

	const auto sort_unique_node_vector = [](Vector<CFGNode *> &nodes) {
		// Fixup nodes in order.
		std::sort(nodes.begin(), nodes.end(), [](const CFGNode *a, const CFGNode *b) -> bool {
			return a->forward_post_visit_order > b->forward_post_visit_order;
		});
		nodes.erase(std::unique(nodes.begin(), nodes.end()), nodes.end());
	};

	const auto mark_node_value_access = [&](CFGNode *node, spv::Id id) {
		auto origin_itr = origin.find(id);
		if (origin_itr == origin.end())
			return;

		auto *origin_node = origin_itr->second.node;
		if (!origin_node->dominates(node) || (origin_itr->second.rematerialize_op && node != origin_node))
		{
			// We have a problem. Mark that we need to rewrite a certain variable.
			id_to_non_local_consumers[id].push_back(node);
		}
	};

	// Need value copy here since we might be updating node->ir.operations inline leading to iterator invalidation.
	Vector<Operation> variable_pointer_like_operations[2];

	// Now, scan through all blocks and figure out which values are consumed in different blocks.
	for (auto *node : forward_post_visit_order)
	{
		for (auto *op : node->ir.operations)
		{
			auto literal_mask = op->literal_mask;
			for (unsigned i = 0; i < op->num_arguments; i++)
				if (((1u << i) & literal_mask) == 0)
					mark_node_value_access(node, op->arguments[i]);

			if (op->op == spv::OpLoad && type_class_is_opaque(module.get_builder().getTypeClass(op->type_id)))
				variable_pointer_like_operations[0].push_back(*op);
			else if (op->op == spv::OpAccessChain)
				variable_pointer_like_operations[1].push_back(*op);
		}

		// Incoming PHI values are handled elsewhere by modifying the incoming block to the creating block.
		// Ignore these kinds of usage here.

		if (node->ir.terminator.conditional_id != 0)
			mark_node_value_access(node, node->ir.terminator.conditional_id);
		if (node->ir.terminator.return_value != 0)
			mark_node_value_access(node, node->ir.terminator.return_value);
	}

	// First, sink any opaque objects which are accessed in unexpected blocks after CFG rewrite.

	for (auto &rewrite_ordering : variable_pointer_like_operations)
	{
		for (auto &variable_op : rewrite_ordering)
		{
			auto itr = id_to_non_local_consumers.find(variable_op.id);
			if (itr != id_to_non_local_consumers.end())
			{
				// We will need to sink the operation.
				// Make sure all dependencies are also marked as used in potentially non-local block.

				// Sort for deterministic output.
				Vector<CFGNode *> local_consumers_sorted;
				for (auto *non_local_node : itr->second)
					local_consumers_sorted.push_back(non_local_node);

				std::sort(local_consumers_sorted.begin(), local_consumers_sorted.end(),
						  [](const CFGNode *a, const CFGNode *b) {
							  return a->forward_post_visit_order < b->forward_post_visit_order;
						  });

				auto literal_mask = variable_op.literal_mask;

				for (unsigned i = 0; i < variable_op.num_arguments; i++)
					if (((1u << i) & literal_mask) == 0)
						for (auto *non_local_node : local_consumers_sorted)
							mark_node_value_access(non_local_node, variable_op.arguments[i]);

				for (auto *non_local_node : local_consumers_sorted)
				{
					auto *sunk_chain = module.allocate_op();
					*sunk_chain = variable_op;
					sunk_chain->id = module.allocate_id();

					if (module.get_builder().hasDecoration(variable_op.id, spv::DecorationNonUniform))
						module.get_builder().addDecoration(sunk_chain->id, spv::DecorationNonUniform);

					auto &ops = non_local_node->ir.operations;
					rewrite_consumed_ids(non_local_node->ir, variable_op.id, sunk_chain->id);
					ops.insert(ops.begin(), sunk_chain);
				}
			}
		}
	}

	// Resolve these broken PHIs by using OpVariable. It is the simplest solution, and this is a very rare case to begin with.
	struct Rewrite
	{
		spv::Id id;
		const Vector<CFGNode *> *consumers;
	};
	Vector<Rewrite> rewrites;
	rewrites.reserve(id_to_non_local_consumers.size());

	for (auto &pair : id_to_non_local_consumers)
	{
		sort_unique_node_vector(pair.second);
		rewrites.push_back({ pair.first, &pair.second });
	}

	// Ensure ordering so that output remains stable.
	std::sort(rewrites.begin(), rewrites.end(), [](const Rewrite &a, const Rewrite &b) {
		return a.id < b.id;
	});

	for (auto &rewrite : rewrites)
	{
		auto &orig = origin[rewrite.id];

		// We don't rely on VariablePointers, so if this comes up, we need to figure out something else.
		// These kinds of ops are handled specially by re-creating them as needed.
		bool rematerialized = module.get_builder().isPointerType(orig.type_id) ||
		                      type_class_is_opaque(module.get_builder().getTypeClass(orig.type_id));

		if (orig.rematerialize_op)
		{
			auto *rematerialize_op = module.allocate_op();
			*rematerialize_op = *orig.rematerialize_op;
			rematerialize_op->id = module.allocate_id();

			if (module.get_builder().hasDecoration(orig.rematerialize_op->id, spv::DecorationNonUniform))
				module.get_builder().addDecoration(rematerialize_op->id, spv::DecorationNonUniform);

			for (auto *consumer : *rewrite.consumers)
			{
				rewrite_consumed_ids(consumer->ir, rewrite.id, rematerialize_op->id);
				consumer->ir.operations.insert(consumer->ir.operations.begin(), rematerialize_op);
			}
		}
		else if (!rematerialized)
		{
			// Invalid access chains are resolved above. We end up rewriting any non-dominated values instead.
			spv::Id alloca_var_id = module.create_variable(spv::StorageClassFunction, orig.type_id);

			auto *store_op = module.allocate_op(spv::OpStore);
			store_op->add_id(alloca_var_id);
			store_op->add_id(rewrite.id);
			orig.node->ir.operations.push_back(store_op);

			// For every non-local node which consumes ID, we load from the alloca'd variable instead.
			// Rewrite all ID references to point to the loaded value.
			for (auto *consumer : *rewrite.consumers)
			{
				spv::Id loaded_id = module.allocate_id();
				auto *load_op = module.allocate_op(spv::OpLoad, loaded_id, orig.type_id);
				load_op->add_id(alloca_var_id);

				rewrite_consumed_ids(consumer->ir, rewrite.id, loaded_id);

				consumer->ir.operations.insert(consumer->ir.operations.begin(), load_op);
			}
		}
	}
}

void CFGStructurizer::insert_phi()
{
	// If we inserted dummy branches from back-edge to rewrite infinite loops, we must prune these branches
	// now, so we don't end up creating a wrong amount of PHI incoming values.
	// We don't have to recompute the CFG since we don't really care about post-visit orders at this stage.
	for (auto *node : forward_post_visit_order)
	{
		if (node->pred_back_edge && node->pred_back_edge->ir.terminator.type == Terminator::Type::Branch &&
		    node->pred_back_edge->succ_back_edge == node->pred_back_edge->ir.terminator.direct_block &&
		    node->pred_back_edge->succ.size() == 1)
		{
			auto *back_edge = node->pred_back_edge;
			auto *succ = back_edge->succ.front();
			back_edge->succ.clear();
			auto itr = std::find(succ->pred.begin(), succ->pred.end(), back_edge);
			assert(itr != succ->pred.end());
			succ->pred.erase(itr);
			succ->recompute_immediate_dominator();
		}
	}

	prune_dead_preds();

	// It is possible that an SSA value was created in a block, and consumed in another.
	// With CFG rewriting branches, it is possible that dominance relationship no longer holds
	// and we must insert new dummy IDs to resolve this.
	fixup_broken_value_dominance();

	// Build a map of value ID -> creating block.
	// This allows us to detect if a value is consumed in a situation where the declaration does not dominate use.
	// This can happen when introducing ladder blocks or similar.
	for (auto *node : forward_post_visit_order)
	{
		unsigned phi_index = 0;
		for (auto &phi : node->ir.phi)
		{
			phi_nodes.push_back({ node, phi_index });
			if (phi.id)
				value_id_to_block[phi.id] = node;
			phi_index++;
		}

		for (auto *op : node->ir.operations)
			if (op->id)
				value_id_to_block[op->id] = node;
	}

	// Resolve phi-nodes top-down since PHI nodes may depend on other PHI nodes.
	std::sort(phi_nodes.begin(), phi_nodes.end(),
	          [](const PHINode &a, const PHINode &b) { return a.block->forward_post_visit_order > b.block->forward_post_visit_order; });

	for (auto &phi_node : phi_nodes)
	{
		fixup_phi(phi_node);
		insert_phi(phi_node);
	}
}

Vector<IncomingValue>::const_iterator CFGStructurizer::find_incoming_value(
    const CFGNode *frontier_pred, const Vector<IncomingValue> &incoming)
{
	// Find the incoming block which dominates frontier_pred and has the lowest post visit order.
	// There are cases where two or more blocks dominate, but we want the most immediate dominator.
	auto candidate = incoming.end();

	for (auto itr = incoming.begin(); itr != incoming.end(); ++itr)
	{
		auto *block = itr->block;
		if (block->dominates(frontier_pred))
		{
			if (candidate == incoming.end() || (block->forward_post_visit_order < candidate->block->forward_post_visit_order))
				candidate = itr;
		}
	}

	return candidate;
}

static IncomingValue *phi_incoming_blocks_find_block(Vector<IncomingValue> &incomings, const CFGNode *block)
{
	for (auto &incoming : incomings)
		if (incoming.block == block)
			return &incoming;
	return nullptr;
}

static bool id_is_generated_by_block(const CFGNode *block, spv::Id id)
{
	for (const auto *op : block->ir.operations)
		if (op->id == id)
			return true;

	for (const auto &phi : block->ir.phi)
		if (phi.id == id)
			return true;

	return false;
}

static void retarget_phi_incoming_block(PHI &phi, CFGNode *from, CFGNode *to)
{
	auto *value = phi_incoming_blocks_find_block(phi.incoming, from);
	if (value)
		value->block = to;
}

void CFGStructurizer::fixup_phi(PHINode &node)
{
	// We want to move any incoming block to where the ID was created.
	// This avoids some problematic cases of crossing edges when using ladders.
	auto &phi = node.block->ir.phi[node.phi_index];
	auto &incomings = phi.incoming;

	for (auto &incoming : incomings)
	{
		auto itr = value_id_to_block.find(incoming.id);
		if (itr == end(value_id_to_block))
		{
			// This is a global.
			continue;
		}

		auto *source_block = itr->second;

		// Only hoist PHI inputs if there used to be a dominance relationship in the original CFG,
		// but there no longer is.
		if (!source_block->dominates(incoming.block))
		{
			bool hoist_incoming = true;
			if (phi_incoming_blocks_find_block(incomings, source_block) != nullptr)
			{
				// Sanity check. This would create ambiguity.
				hoist_incoming = false;
			}

			// Don't hoist PHI inputs across the loop header boundary.
			if (incoming.block->succ_back_edge && query_reachability(*source_block, *incoming.block->succ_back_edge))
			{
				// If this happens somehow, we have a problem. It's a bit unclear how this is supposed to work.
				// It's possible we'd need to synthesize a fake input to back-edge which can be resolved
				// in a code path that does dominate the loop ...
				LOGW("Incoming value to back edge does not dominate loop header.\n");
				hoist_incoming = false;
			}

			if (hoist_incoming)
			{
#ifdef PHI_DEBUG
				LOGI("For node %s, move incoming node %s to %s.\n", node.block->name.c_str(),
				     incoming.block->name.c_str(), itr->second->name.c_str());
#endif
				incoming.block = itr->second;
			}
			else
			{
				// We cannot hoist, so need to use dummy OpVariable instead.
				spv::Id alloca_var_id = module.create_variable(spv::StorageClassFunction, phi.type_id, "phi_fixup");
				auto *store_op = module.allocate_op(spv::OpStore);
				store_op->add_id(alloca_var_id);
				store_op->add_id(incoming.id);
				itr->second->ir.operations.push_back(store_op);

				spv::Id loaded_id = module.allocate_id();
				auto *load_op = module.allocate_op(spv::OpLoad, loaded_id, phi.type_id);
				load_op->add_id(alloca_var_id);
				incoming.block->ir.operations.push_back(load_op);

				incoming.id = loaded_id;
			}

			validate_phi(node.block->ir.phi[node.phi_index]);
		}
	}
}

bool CFGStructurizer::can_complete_phi_insertion(const PHI &phi, const CFGNode *block)
{
	// If all incoming values have at least one pred block they dominate, we can merge the final PHI.
	auto &incoming_values = phi.incoming;
	for (auto &incoming : incoming_values)
	{
		auto itr = std::find_if(block->pred.begin(), block->pred.end(),
		                        [&](const CFGNode *n) { return incoming.block->dominates(n); });

		if (itr == block->pred.end() &&
		    (!block->pred_back_edge || !incoming.block->dominates(block->pred_back_edge)))
		{
			return false;
		}
	}

	return true;
}

bool CFGStructurizer::query_reachability_through_back_edges(const CFGNode &from, const CFGNode &to) const
{
	if (to.dominates(&from))
	{
		// If we're dominated by end node, only way we can reach is through a back edge.
		return to.pred_back_edge && query_reachability(from, *to.pred_back_edge);
	}
	else
		return query_reachability(from, to);
}

bool CFGStructurizer::query_reachability_split_loop_header(const CFGNode &from, const CFGNode &to, const CFGNode &end_node) const
{
	// A special query where from and to must lie on the same side of a loop header to be considered reachable.
	if (!end_node.pred_back_edge)
		return query_reachability(from, to);

	bool from_reaches_header = query_reachability(from, end_node);
	bool to_reaches_header = query_reachability(to, end_node);
	if (from_reaches_header != to_reaches_header)
		return false;

	return query_reachability(from, to);
}

bool CFGStructurizer::phi_frontier_makes_forward_progress(const PHI &phi, const CFGNode *frontier,
                                                          const CFGNode *end_node) const
{
	// Not all PHI frontiers are nodes we need to care about.
	// There are two conditions we must meet to disregard a placement.
	// - We do not remove any inputs as a result.
	// - The frontier can reach another incoming value.
	// In this situation, a frontier is completely meaningless.
	auto &incoming = phi.incoming;

	for (auto &incoming_value : incoming)
	{
		auto *incoming_block = incoming_value.block;
		// We will remove an input, this is forward progress.
		// Avoid checking the edge case where frontier candidate == incoming block.
		// Removing an input only to place a new frontier there is nonsensical.
		if (frontier != incoming_block && !exists_path_in_cfg_without_intermediate_node(incoming_block, end_node, frontier))
			return true;
	}

	// Nothing is removed as a result, so check if the frontier can reach another incoming value.
	// If end_node is a loop header, makes sure we only consider a node visible if they are both on the correct side of the
	// loop header.
	for (auto &incoming_value : incoming)
		if (query_reachability_split_loop_header(*frontier, *incoming_value.block, *end_node))
			return false;

	// Assume we make forward progress. Either way, we will never look at a frontier twice,
	// so this should be safe. The only real risk is that we add some redundant PHI nodes.
	return true;
}

void CFGStructurizer::insert_phi(PHINode &node)
{
	// We start off with N values defined in N blocks.
	// These N blocks *used* to branch to the PHI node, but due to our structurizer,
	// there might not be branch targets here anymore, primary example here is ladders.
	// In order to fix this we need to follow control flow from these values and insert phi nodes as necessary to link up
	// a set of values where dominance frontiers are shared.

#ifdef PHI_DEBUG
	LOGI("\n=== INSERT PHI FOR %s ===\n", node.block->name.c_str());
#endif

	auto &phi = node.block->ir.phi[node.phi_index];
	auto &incoming_values = phi.incoming;

	UnorderedSet<const CFGNode *> placed_frontiers;

	for (;;)
	{
#ifdef PHI_DEBUG
		LOGI("\n=== PHI iteration ===\n");

		for (auto &incoming : incoming_values)
			LOGI("  Incoming value from %s\n", incoming.block->name.c_str());
#endif

		// Inside the CFG subset, find a dominance frontiers where we merge PHIs this iteration.
		CFGNode *frontier = node.block;
		if (!can_complete_phi_insertion(phi, node.block))
		{
			frontier = nullptr;

			// We need some intermediate merge, so find a frontier node to work on.
			for (auto &incoming : incoming_values)
			{
				for (auto *candidate_frontier : incoming.block->dominance_frontier)
				{
					if (placed_frontiers.count(candidate_frontier))
						continue;

					if (!phi_frontier_makes_forward_progress(phi, candidate_frontier, node.block))
					{
						// Makes sure we don't redundantly test this again.
						placed_frontiers.insert(candidate_frontier);
						continue;
					}

					// Only consider a frontier if we can reach node.block or its back edge from it.
					if (query_reachability_through_back_edges(*candidate_frontier, *node.block))
					{
						if (frontier == nullptr || candidate_frontier->forward_post_visit_order > frontier->forward_post_visit_order)
						{
							// Pick the earliest frontier in the CFG.
							// We want to merge top to bottom.
							frontier = candidate_frontier;
						}
					}
				}
			}

			if (frontier)
				placed_frontiers.insert(frontier);
		}

		assert(frontier);

		if (frontier == node.block)
		{
			if (frontier->pred.size() == 1 && !frontier->pred_back_edge)
			{
				// The PHI node has already been merged.
				// This can happen if a ladder pred block merged all inputs, and we would
				// end up with a single-pred PHI, which makes no sense (even if it should work).
				// Just copy the ID for the frontier node which made the final merge.
				auto itr = find_incoming_value(frontier->pred.front(), incoming_values);
				assert(itr != incoming_values.end());

				auto *op = module.allocate_op(spv::OpCopyObject, phi.id, phi.type_id);
				op->add_id(itr->id);
				frontier->pred.front()->ir.operations.push_back(op);

				// Ignore this one when emitting PHIs later.
				phi.id = 0;
			}
			else
			{
				Vector<IncomingValue> final_incoming;

				// Final merge.
				for (auto *input : frontier->pred)
				{
					auto itr = find_incoming_value(input, incoming_values);

					IncomingValue value = {};
					if (itr != incoming_values.end())
						value.id = itr->id;
					else
						value.id = module.get_builder().createUndefined(phi.type_id);

					value.block = input;
					final_incoming.push_back(value);
				}

				if (frontier->pred_back_edge)
				{
					auto itr = find_incoming_value(frontier->pred_back_edge, incoming_values);

					IncomingValue value = {};
					if (itr != incoming_values.end())
						value.id = itr->id;
					else
						value.id = module.get_builder().createUndefined(phi.type_id);

					value.block = frontier->pred_back_edge;
					final_incoming.push_back(value);
				}

				incoming_values = std::move(final_incoming);
			}
			return;
		}

		// A candidate dominance frontier is a place where we might want to place a PHI node in order to merge values.
		// For a successful iteration, we need to find at least one candidate where we can merge PHI.

#ifdef PHI_DEBUG
		LOGI("Testing dominance frontier %s ...\n", frontier->name.c_str());
#endif

		// Remove old inputs.
		PHI frontier_phi;
		frontier_phi.id = module.allocate_id();
		frontier_phi.type_id = phi.type_id;
		frontier_phi.relaxed = phi.relaxed;
		module.get_builder().addName(frontier_phi.id, (String("frontier_phi_") + frontier->name).c_str());

		assert(!frontier->pred_back_edge);
		for (auto *input : frontier->pred)
		{
			auto itr = find_incoming_value(input, incoming_values);
			if (itr != incoming_values.end())
			{
#ifdef PHI_DEBUG
				auto *incoming_block = itr->block;
				LOGI("   ... found incoming block %s for input %s.\n", incoming_block->name.c_str(), input->name.c_str());
				LOGI(" ... For pred %s (%p), found incoming value from %s (%p)\n", input->name.c_str(),
				     static_cast<const void *>(input), incoming_block->name.c_str(),
				     static_cast<const void *>(incoming_block));
#endif

				IncomingValue value = {};
				value.id = itr->id;
				value.block = input;
				frontier_phi.incoming.push_back(value);
			}
			else
			{
#ifdef PHI_DEBUG
				LOGI("   ... creating undefined input for %s\n", input->name.c_str());
#endif
				// If there is no incoming value, we need to hallucinate an undefined value.
				IncomingValue value = {};
				value.id = module.get_builder().createUndefined(phi.type_id);
				value.block = input;
				frontier_phi.incoming.push_back(value);
			}
		}

		// Do we remove the incoming value now or not?
		// If all paths from incoming value must go through frontier, we can remove it,
		// otherwise, we might still need to use the incoming value somewhere else.
		size_t num_alive_incoming_values = incoming_values.size();
		for (size_t i = 0; i < num_alive_incoming_values; )
		{
			auto *incoming_block = incoming_values[i].block;

			// This is fundamentally ambiguous and should never happen.
			if (incoming_block == frontier)
				LOGE("Invalid PHI collapse detected!\n");
			assert(incoming_block != frontier);

			if (!exists_path_in_cfg_without_intermediate_node(incoming_block, node.block, frontier))
			{
#ifdef PHI_DEBUG
				LOGI("     ... removing input in %s\n", incoming_block->name.c_str());
#endif
				if (i != num_alive_incoming_values - 1)
					std::swap(incoming_values[num_alive_incoming_values - 1], incoming_values[i]);
				num_alive_incoming_values--;
			}
			else
			{
#ifdef PHI_DEBUG
				LOGI("     ... keeping input in %s\n", incoming_block->name.c_str());
#endif
				i++;
			}
		}

		// Need to clean up exhausted incoming values after the loop,
		// since an incoming value can be used multiple times before a frontier PHI is resolved.
		incoming_values.erase(incoming_values.begin() + num_alive_incoming_values, incoming_values.end());

		IncomingValue *dominated_incoming = nullptr;
		for (auto &incoming : incoming_values)
		{
			if (frontier->dominates(incoming.block) &&
			    !exists_path_in_cfg_without_intermediate_node(frontier, node.block, incoming.block))
			{
				// There should be only one block the frontier can dominate.
				// The candidate block must also post-dominate the frontier on the CFG subset which terminates at node.block,
				// otherwise we will get a proper merge later anyways.
				assert(!dominated_incoming);
				dominated_incoming = &incoming;
			}
		}

		if (dominated_incoming)
		{
			// If our frontier dominates another incoming block, we need to merge two incoming values
			// using an auxillary phi node as well as an OpSelect to resolve two conflicting values into one.

			// For every pred edge of the frontier where pred did not dominate, we are now suddenly dominating.
			// If we came from such a block,
			// we should replace the incoming value of dominating_incoming rather than adding a new incoming value.
			PHI merge_phi = {};
			merge_phi.relaxed = phi.relaxed;

			// Here we need to figure out if we have a cross branch which functions as a ladder.
			// If we have such a special edge, the PHI value we find here will override any other value on this path.
			// However, if we only have expected branches, there is nothing to override, and any PHI values
			// we created along this path turned out to be irrelevant after all.

			unsigned normal_branch_count = 0;
			for (auto *input : frontier->pred)
			{
				IncomingValue value = {};
				auto itr = find_incoming_value(input, incoming_values);
				if (itr != incoming_values.end())
				{
					// If the input does not dominate the frontier, this might be a case of cross-edge PHI merge.
					// However, if we still have an incoming value which dominates the input block, ignore.
					// This is considered a normal path and we will merge the actual result in a later iteration, because
					// the frontier is not a post-dominator of the input value.
					bool input_is_normal_edge = true;
					if (!input->dominates(frontier))
					{
						input_is_normal_edge = false;
						for (auto &incoming : incoming_values)
						{
							if (incoming.block->dominates(input))
							{
								input_is_normal_edge = true;
								break;
							}
						}
					}

					if (input_is_normal_edge)
						normal_branch_count++;

					value.id = module.get_builder().makeBoolConstant(input_is_normal_edge);
				}
				else
				{
					// The input is undefined, so we don't really care. Just treat this as a normal edge.
					normal_branch_count++;
					value.id = module.get_builder().makeBoolConstant(true);
				}

				value.block = input;
				merge_phi.incoming.push_back(value);
			}

			if (normal_branch_count != frontier->pred.size())
			{
				merge_phi.id = module.allocate_id();
				merge_phi.type_id = module.get_builder().makeBoolType();

				Operation *op = module.allocate_op(spv::OpSelect, module.allocate_id(), phi.type_id);
				op->add_id(merge_phi.id);
				op->add_id(dominated_incoming->id);
				op->add_id(frontier_phi.id);
				dominated_incoming->block->ir.operations.push_back(op);
				dominated_incoming->id = op->id;

				module.get_builder().addName(merge_phi.id,
				                             (String("merged_phi_") + dominated_incoming->block->name).c_str());
				frontier->ir.phi.push_back(std::move(merge_phi));
			}
		}
		else
		{
			// Replace with merged value.
			IncomingValue new_incoming = {};
			new_incoming.id = frontier_phi.id;
			new_incoming.block = frontier;
			incoming_values.push_back(new_incoming);
		}

#ifdef PHI_DEBUG
		LOGI("=========================\n");
#endif
		frontier->ir.phi.push_back(std::move(frontier_phi));
	}
}

void CFGStructurizer::compute_dominance_frontier()
{
	for (auto *node : forward_post_visit_order)
		node->dominance_frontier.clear();
	for (auto *node : forward_post_visit_order)
		recompute_dominance_frontier(node);
}

void CFGStructurizer::compute_post_dominance_frontier()
{
	for (auto *node : backward_post_visit_order)
		node->post_dominance_frontier.clear();
	for (auto *node : backward_post_visit_order)
		recompute_post_dominance_frontier(node);
}

void CFGStructurizer::build_immediate_dominators()
{
	for (auto i = forward_post_visit_order.size(); i; i--)
	{
		auto *block = forward_post_visit_order[i - 1];
		block->recompute_immediate_dominator();
	}
}

void CFGStructurizer::build_immediate_post_dominators()
{
	for (auto i = backward_post_visit_order.size(); i; i--)
	{
		auto *block = backward_post_visit_order[i - 1];
		block->recompute_immediate_post_dominator();
	}
}

void CFGStructurizer::reset_traversal()
{
	reachable_nodes.clear();
	forward_post_visit_order.clear();
	backward_post_visit_order.clear();

	pool.for_each_node([](CFGNode &node) {
		node.visited = false;
		node.backward_visited = false;
		node.traversing = false;
		node.immediate_dominator = nullptr;
		node.immediate_post_dominator = nullptr;
		node.fake_pred.clear();
		node.fake_succ.clear();
		node.headers.clear();

		if (!node.freeze_structured_analysis)
		{
			node.merge = MergeType::None;
			node.loop_merge_block = nullptr;
			node.loop_ladder_block = nullptr;
			node.selection_merge_block = nullptr;
		}

		if (node.succ_back_edge)
			node.succ.push_back(node.succ_back_edge);
		if (node.pred_back_edge)
			node.pred.push_back(node.pred_back_edge);
		node.succ_back_edge = nullptr;
		node.pred_back_edge = nullptr;
	});
}

struct LoopBacktracer
{
	void trace_to_parent(CFGNode *header, CFGNode *block);
	UnorderedSet<CFGNode *> traced_blocks;
};

struct LoopMergeTracer
{
	explicit LoopMergeTracer(const LoopBacktracer &backtracer_)
		: backtracer(backtracer_)
	{
	}

	void trace_from_parent(CFGNode *header);
	const LoopBacktracer &backtracer;
	Vector<CFGNode *> loop_exits;
	UnorderedSet<CFGNode *> traced_blocks;
};

void LoopBacktracer::trace_to_parent(CFGNode *header, CFGNode *block)
{
	if (block == header)
	{
		traced_blocks.insert(block);
		return;
	}

	if (traced_blocks.count(block) == 0)
	{
		traced_blocks.insert(block);
		for (auto *p : block->pred)
			trace_to_parent(header, p);

		// A backtrace will not pick up continue blocks which only branch back to header,
		// and thus they will be considered loop exists by mistake.
		// Start traversing from the continue block to catch these nodes as well.
		// If a loop header is part of an outer loop construct, the loop body must
		// also be part of the loop construct.
		if (block->pred_back_edge)
			trace_to_parent(header, block->pred_back_edge);
	}
}

void LoopMergeTracer::trace_from_parent(CFGNode *header)
{
	if (backtracer.traced_blocks.count(header) == 0)
	{
		if (std::find(loop_exits.begin(), loop_exits.end(), header) == loop_exits.end())
			loop_exits.push_back(header);
		return;
	}

	for (auto *succ : header->succ)
	{
		if (traced_blocks.count(succ) == 0)
		{
			trace_from_parent(succ);
			traced_blocks.insert(succ);
		}
	}
}

void CFGStructurizer::backwards_visit()
{
	Vector<CFGNode *> leaf_nodes;

	// Traverse from leaf nodes, back through their preds instead.
	// Clear out some state set by forward visit earlier.
	for (auto *node : forward_post_visit_order)
	{
		node->backward_visited = false;
		node->traversing = false;

		// For loops which can only exit from their header block,
		// certain loops will be unreachable when doing a backwards traversal.
		// We'll visit them explicitly later.
		if (node->succ.empty() && !node->succ_back_edge)
			leaf_nodes.push_back(node);
	}

	for (auto *leaf : leaf_nodes)
		backwards_visit(*leaf);

	// It might be case that some continue blocks are not reachable through backwards traversal.
	// This effectively means that our flipped CFG is not reducible, which is rather annoying.
	// To work around this, we fake some branches from the continue block out to other blocks.
	// This way, we ensure that every forward-reachable block is reachable in a backwards traversal as well.
	// The algorithm works where given the innermost loop header A, a block B (A dom B) and continue block C,
	// For successors of B, we will observe some successors which can reach C ({E}), and some successors which can not reach C.
	// C will add fake successor edges to {E}.
	bool need_revisit = false;
	for (size_t i = forward_post_visit_order.size(); i; i--)
	{
		// Resolve outer loops before inner loops since we can have nested loops which need
		// to link into each other.
		auto *node = forward_post_visit_order[i - 1];

		if (node->pred_back_edge)
		{
			if (!node->pred_back_edge->backward_visited)
			{
				LoopBacktracer tracer;
				tracer.trace_to_parent(node, node->pred_back_edge);
				LoopMergeTracer merge_tracer(tracer);
				merge_tracer.trace_from_parent(node);

				// If we have an infinite loop, the continue block will not be reachable with backwards traversal.
				// Also, the only way to exit the loop construct could be through a single return block.
				// In this case, the return block should be moved and considered to be the merge block.
				// We add true branches from the continue block to return block instead of fake branches.

				// Ensure stable codegen order.
				Vector<CFGNode *> exits;
				exits.reserve(merge_tracer.loop_exits.size());
				for (auto *exit_node : merge_tracer.loop_exits)
					exits.push_back(exit_node);
				std::sort(exits.begin(), exits.end(), [](const CFGNode *a, const CFGNode *b) {
					return a->forward_post_visit_order > b->forward_post_visit_order;
				});

				bool transpose_loop_exit = false;
				if (exits.size() == 1)
				{
					auto *exit_node = exits.front();
					// If this is true, we never really leave the loop, which is problematic.
					transpose_loop_exit = exit_node->dominates_all_reachable_exits();

					// Only transpose if we're the innermost header, otherwise, inner loops which try to branch
					// to the return will be considered a multi-break which is very awkward.
					if (transpose_loop_exit)
					{
						auto *innermost_header = get_innermost_loop_header_for(node, exit_node);
						transpose_loop_exit = innermost_header == node;
					}
				}

				if (transpose_loop_exit)
				{
					for (auto *f : exits)
						node->pred_back_edge->add_branch(f);
				}
				else
				{
					// Only consider exits that are themselves backwards reachable.
					// Otherwise, we'll be adding fake succs that resolve to outer infinite loops again.
					for (auto *f : exits)
						if (f->reaches_backward_visited_node())
							node->pred_back_edge->add_fake_branch(f);
				}

				if (!node->pred_back_edge->succ.empty() ||
				    !node->pred_back_edge->fake_succ.empty())
				{
					// Consider this to be backwards visited in case we have a nested inner loop
					// that needs to link up to node->pred_back_edge.
					node->pred_back_edge->backward_visited = true;
				}

				need_revisit = true;
			}
		}
	}

	if (need_revisit)
	{
		for (auto *node : forward_post_visit_order)
		{
			node->backward_visited = false;
			node->traversing = false;
			node->backward_post_visit_order = 0;
		}

		for (auto *leaf : leaf_nodes)
			backwards_visit(*leaf);
	}

	exit_block->backward_post_visit_order = backward_post_visit_order.size();
	exit_block->immediate_post_dominator = exit_block;
	exit_block->backward_visited = true;
	for (auto *leaf : leaf_nodes)
		leaf->immediate_post_dominator = exit_block;
}

void CFGStructurizer::backwards_visit(CFGNode &entry)
{
	entry.backward_visited = true;

	for (auto *pred : entry.pred)
		if (!pred->backward_visited)
			backwards_visit(*pred);

	for (auto *pred : entry.fake_pred)
		if (!pred->backward_visited)
			backwards_visit(*pred);

	entry.backward_post_visit_order = backward_post_visit_order.size();
	backward_post_visit_order.push_back(&entry);
}

void CFGStructurizer::visit_for_back_edge_analysis(CFGNode &entry)
{
	entry.visited = true;
	entry.traversing = true;
	reachable_nodes.insert(&entry);

	for (auto *succ : entry.succ)
	{
		// Reuse the existing vector to keep track of back edges.
		if (succ->traversing)
			succ->fake_pred.push_back(&entry);
		else if (!succ->visited)
			visit_for_back_edge_analysis(*succ);
	}

	entry.traversing = false;

	// After we get here, we must have observed all back edges.
	// If there is more than one back edge, merge them.
	if (entry.fake_pred.size() >= 2)
	{
		auto *new_back_edge = pool.create_node();
		new_back_edge->name = entry.name + ".back-edge-merge";
		for (auto *n : entry.fake_pred)
			n->retarget_branch_pre_traversal(&entry, new_back_edge);
		new_back_edge->succ.push_back(&entry);
		new_back_edge->ir.terminator.type = Terminator::Type::Branch;
		new_back_edge->ir.terminator.direct_block = &entry;
		new_back_edge->add_branch(&entry);
	}
}

void CFGStructurizer::visit(CFGNode &entry)
{
	entry.visited = true;
	entry.traversing = true;
	reachable_nodes.insert(&entry);

	for (auto *succ : entry.succ)
	{
		if (succ->traversing)
		{
			// For now, only support one back edge.
			// DXIL seems to obey this.
			assert(!entry.succ_back_edge || entry.succ_back_edge == succ);
			entry.succ_back_edge = succ;

			// For now, only support one back edge.
			// DXIL seems to obey this.
			assert(!succ->pred_back_edge || succ->pred_back_edge == &entry);
			succ->pred_back_edge = &entry;
		}
		else if (!succ->visited)
			visit(*succ);
	}

	// Any back edges need to be handled specifically, only keep forward edges in succ/pred lists.
	// This avoids any infinite loop scenarios and needing to special case a lot of checks.
	if (entry.succ_back_edge)
	{
		auto itr = std::find(entry.succ.begin(), entry.succ.end(), entry.succ_back_edge);
		if (itr != entry.succ.end())
			entry.succ.erase(itr);
	}

	if (entry.pred_back_edge)
	{
		auto itr = std::find(entry.pred.begin(), entry.pred.end(), entry.pred_back_edge);
		if (itr != entry.pred.end())
			entry.pred.erase(itr);
	}

	entry.traversing = false;
	entry.forward_post_visit_order = forward_post_visit_order.size();
	forward_post_visit_order.push_back(&entry);
}

void CFGStructurizer::merge_to_succ(CFGNode *node, unsigned index)
{
	node->succ[index]->headers.push_back(node);
	node->selection_merge_block = node->succ[index];
	node->merge = MergeType::Selection;
	//LOGI("Fixup selection merge %s -> %s\n", node->name.c_str(), node->selection_merge_block->name.c_str());
}

void CFGStructurizer::isolate_structured(UnorderedSet<CFGNode *> &nodes, const CFGNode *header,
                                         const CFGNode *merge)
{
	for (auto *pred : merge->pred)
	{
		if (pred != header && nodes.count(pred) == 0)
		{
			nodes.insert(pred);
			isolate_structured(nodes, header, pred);
		}
	}
}

Vector<CFGNode *> CFGStructurizer::isolate_structured_sorted(const CFGNode *header, const CFGNode *merge)
{
	UnorderedSet<CFGNode *> nodes;
	isolate_structured(nodes, header, merge);

	Vector<CFGNode *> sorted;
	sorted.reserve(nodes.size());

	for (auto *node : nodes)
		sorted.push_back(node);

	std::sort(sorted.begin(), sorted.end(),
	          [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; });
	return sorted;
}

bool CFGStructurizer::block_is_load_bearing(const CFGNode *node, const CFGNode *merge) const
{
	while (merge->succ.size() == 1)
	{
		// If we're going to eliminate a block due to impossible merge,
		// we should look ahead since we might get a false positive.
		bool breaking = merge_candidate_is_on_breaking_path(merge);
		if (breaking && !merge->ir.operations.empty() && !block_is_control_dependent(merge))
			merge = merge->succ.front();
		else
			break;
	}

	return node->pred.size() >= 2 &&
	       !exists_path_in_cfg_without_intermediate_node(node->immediate_dominator, merge, node);
}

bool CFGStructurizer::control_flow_is_escaping_from_loop(const CFGNode *node, const CFGNode *merge) const
{
	bool escaping_path = false;

	if (node == merge)
		return escaping_path;

	assert(merge->post_dominates(node));

	// First, test the loop scenario.
	// If we're inside a loop, we're a break construct if we can prove that:
	// - node has a loop header which dominates it.
	// - node cannot reach the continue block.
	// - Continue block cannot reach node.
	// - All post-domination frontiers can reach the continue block, meaning that at some point control flow
	//   decided to break out of the loop construct.
	auto *innermost_loop_header = get_innermost_loop_header_for(node);
	if (innermost_loop_header && innermost_loop_header->pred_back_edge)
	{
		bool dominates_merge = node->dominates(merge);
		bool can_reach_continue = query_reachability(*node, *innermost_loop_header->pred_back_edge);
		bool continue_can_reach = query_reachability(*innermost_loop_header->pred_back_edge, *node);
		bool pdf_can_reach_continue = true;

		for (auto *frontier : node->post_dominance_frontier)
		{
			bool header_dominates_frontier = innermost_loop_header->dominates(frontier);
			bool frontier_is_inside_loop_construct =
				query_reachability(*frontier, *innermost_loop_header->pred_back_edge);
			if (!header_dominates_frontier || !frontier_is_inside_loop_construct)
			{
				pdf_can_reach_continue = false;
				break;
			}
		}

		if (!dominates_merge && !continue_can_reach && !can_reach_continue && pdf_can_reach_continue)
			escaping_path = true;
	}

	return escaping_path;
}

bool CFGStructurizer::control_flow_is_escaping(const CFGNode *node, const CFGNode *merge) const
{
	if (node == merge)
		return false;

	if (control_flow_is_escaping_from_loop(node, merge))
		return true;

	// Try to test if our block is load bearing, in which case it cannot be considered a break block.
	// If the only path from idom to merge goes through node, it must be considered load bearing,
	// since removing break paths must not change reachability.
	if (block_is_load_bearing(node, merge))
		return false;

	// If we have two different switch blocks in our PDF frontier something ridiculous is happening
	// where we effectively have one switch block falling through to another switch block (?!?!?!)
	// Definitely needs to be split up.
	unsigned switch_pdf_frontiers = 0;
	for (auto *frontier : node->post_dominance_frontier)
		if (frontier->ir.terminator.type == Terminator::Type::Switch)
			switch_pdf_frontiers++;

	if (switch_pdf_frontiers >= 2)
		return true;

	// If we cannot prove the escape through loop analysis, we might be able to deduce it from domination frontiers.
	// If control flow is not escaping, then there must exist a dominance frontier node A,
	// where merge strictly post-dominates A.
	// This means that control flow can merge somewhere before we hit the merge block, and we consider that
	// normal structured control flow.

	bool escaping_path = !node->reaches_domination_frontier_before_merge(merge);

	// This is a strong check.
	// If node directly branches to merge, but PDF does not,
	// we have detected a control flow pattern which is clearly a break.
	// The PDF candidate must dominate node for this check to be meaningful.
	if (escaping_path)
	{
		for (auto *frontier : node->post_dominance_frontier)
			if (frontier->dominates(node) && frontier->reaches_domination_frontier_before_merge(merge))
				return true;

		// Strong check as well.
		// If branching directly to continue block like this, this is a non-merging continue,
		// which we should always consider an escape.
		if (node->succ.size() == 1 && node->succ.front()->succ_back_edge)
			return true;
	}

	if (escaping_path && node->ir.operations.empty() && node->ir.phi.empty())
	{
		// If we post-dominate nothing useful or do nothing useful ourselves,
		// this is a good indication we're a common escape edge ladder block.
		// This can happen if we have a graph of:
		// A -> B
		// A -> C
		// B -> merge
		// C -> merge
		// B -> node
		// C -> node
		// node -> merge
		// This super jank diamond pattern will break the heuristics.

		// If we only post dominate work from one pred, we're not meaningfully merging anything,
		// so it should be safe to elide.
		if (node->count_post_dominates_work_from_incoming_preds() <= 1)
			return true;
	}

	if (escaping_path && node->pred.size() >= 2)
	{
		// We also need to consider false positives here, which are mostly only relevant for merge candidates.

		// One case would be selection construct A, which terminates in block B. B then branches to C.
		// Earlier in the A -> B construct, there might be a break block D which also branches to B.
		// This means that C will be a "false" domination frontier of B and our analysis above is wrong.

		// The algorithm here:
		// - Get idom of node, which represents the header. For this analysis, we're only interested
		//   in code paths which are dominated by idom.
		// - Find all preds of merge which are dominated by idom(node).
		// - Backtrace every pred P until they can reach B, or B can reach P.
		// - If B has strictly lowest post-visit order, we are not escaping. P was.

		auto *idom = node->immediate_dominator;
		bool found_false_positive = false;

		for (auto *pred : merge->pred)
		{
			// Don't care about these.
			if (!idom->dominates(pred))
				continue;

			while (pred != node && !query_reachability(*pred, *node) && !query_reachability(*node, *pred))
				pred = pred->immediate_dominator;

			// Ignore these.
			if (pred == node)
				continue;

			if (query_reachability(*pred, *node))
			{
				// Seems good. Keep going. If we don't find a counter example, we'll accept this as a false positive.
				found_false_positive = true;
			}
			else
			{
				// Indeed, this is an escape.
				found_false_positive = false;
				break;
			}
		}

		escaping_path = !found_false_positive;
	}

	return escaping_path;
}

bool CFGStructurizer::block_is_plain_continue(const CFGNode *node)
{
	return node->succ_back_edge != nullptr && node != node->succ_back_edge;
}

const CFGNode *CFGStructurizer::scan_plain_continue_block(const CFGNode *node)
{
	auto *base_node = node;
	while (!block_is_plain_continue(node) &&
	       base_node->dominates(node) &&
	       !node->succ_back_edge &&
	       !node->pred_back_edge &&
	       node->immediate_post_dominator &&
	       node->immediate_post_dominator != node)
	{
		node = node->immediate_post_dominator;
	}

	return node;
}

bool CFGStructurizer::selection_requires_structured_header(const CFGNode *node) const
{
	// From SPIR-V spec. SelectionMerge is required for:
	// ... an OpBranchConditional instruction that has different
	// True Label and False Label operands where neither are declared merge blocks or Continue Targets.
	// Ensure that there is a real merge block.
	// Only safe to do this in pass1, since we're not supposed to rewrite control flow there.
	// In first passes, it's okay to merge in the wrong direction.

	// Only consider normal selection merges. Switch and loop exits are stronger than selection exits,
	// so we don't need to apply special cases.
	// This consideration is purely to avoid excessive deltas in shader outputs, and having merge
	// blocks makes SPIRV-Cross output a little more readable.
	assert(node->succ.size() == 2 && !node->succ_back_edge);

	// We can use proper merge blocks if both paths converge to same location.
	// If we have a direct branch to continue block on one path,
	// we can use merge blocks in the opposing path just fine.
	for (int i = 0; i < 2; i++)
		if (query_reachability(*node->succ[i], *node->succ[1 - i]) || block_is_plain_continue(node->succ[i]))
			return true;

	for (int i = 0; i < 2; i++)
	{
		auto *s = node->succ[i];

		bool succ_is_plain_selection_merge =
			std::find_if(s->headers.begin(), s->headers.end(), [&](const CFGNode *head) {
			  return head->ir.terminator.type != Terminator::Type::Switch &&
			         head->merge == MergeType::Selection && head->selection_merge_block == s;
			}) != s->headers.end();

		if (succ_is_plain_selection_merge)
			return false;
	}

	return true;
}

void CFGStructurizer::fixup_broken_selection_merges(unsigned pass)
{
	// Here we deal with selection branches where one path breaks and one path merges.
	// This is common case for ladder blocks where we need to merge to the "true" merge block.
	// The selection header has two succs, but the merge block might only have one pred block,
	// which means it was not considered a merge candidate earlier in find_selection_merges().
	for (auto *node : forward_post_visit_order)
	{
		if (node->succ.size() != 2)
			continue;
		if (node->merge != MergeType::None)
			continue;

		// A continue block will never need to merge execution, but it shouldn't have succ.size() == 2,
		// but rather succ.size() == 1 and a back edge.
		if (node->succ_back_edge)
			continue;

		bool dominates_a = node->dominates(node->succ[0]);
		bool dominates_b = node->dominates(node->succ[1]);

		// Continue blocks should also be considered to have a header already. Makes sure we don't merge to them.
		bool merge_a_has_header = !node->succ[0]->headers.empty() || block_is_plain_continue(node->succ[0]);
		bool merge_b_has_header = !node->succ[1]->headers.empty() || block_is_plain_continue(node->succ[1]);

		if (pass == 1 && !selection_requires_structured_header(node))
			continue;

		int trivial_merge_index = -1;

		// Only allow the obvious merge candidates in pass 1.
		// In pass 0, we might have a clear merge candidate,
		// but the other path might be an escaping edge, which needs to be considered.
		if (dominates_a && !dominates_b && !merge_a_has_header)
		{
			// A is obvious candidate. B is a direct break/continue construct target most likely.
			merge_to_succ(node, 0);
			trivial_merge_index = 0;
		}
		else if (dominates_b && !dominates_a && !merge_b_has_header)
		{
			// B is obvious candidate. A is a direct break/continue construct target most likely.
			merge_to_succ(node, 1);
			trivial_merge_index = 1;
		}
		else if (dominates_a && dominates_b && !merge_a_has_header && merge_b_has_header)
		{
			// Not as obvious of a candidate, but this can happen if one path hits continue block,
			// and other path hits a ladder merge block.
			// For do/while(false) style loop, the loop body may dominate the merge block.
			merge_to_succ(node, 0);
			trivial_merge_index = 0;
		}
		else if (dominates_a && dominates_b && !merge_b_has_header && merge_a_has_header)
		{
			// Not as obvious of a candidate, but this can happen if one path hits continue block,
			// and other path hits a ladder merge block.
			// For do/while style loop, the loop body may dominate the merge block.
			merge_to_succ(node, 1);
			trivial_merge_index = 1;
		}
		else if (dominates_a && dominates_b && !merge_a_has_header && !merge_b_has_header)
		{
			// We could merge to both, no obvious merge point.
			// Figure out where execution reconvenes.
			// If we have a "break"-like construct inside a selection construct, we will not end up dominating the merge block.
			// This will be fixed up with ladder constructs later in first pass.

			// In second pass, we will have redirected any branches which escape through a ladder block.
			// If we find that one path of the selection construct must go through that ladder block, we know we have a break construct.
			CFGNode *merge = CFGStructurizer::find_common_post_dominator(node->succ);
			if (merge)
			{
				bool dominates_merge = node->dominates(merge);
				bool merges_to_continue = block_is_plain_continue(merge);

				// Here we have a likely case where one block is doing a clean "break" out of a loop, and
				// the other path continues as normal, and then conditionally breaks in a continue block or something similar.
				bool ambiguous_merge_case = !merges_to_continue && dominates_merge && !merge->headers.empty();

				// Happens first iteration. We'll have to split blocks, so register a merge target where we want it.
				// Otherwise, this is the easy case if we observe it in pass 1.
				// This shouldn't really happen though, as we'd normally resolve this earlier in find_selection_merges.
				bool mark_merge_block_case = !merges_to_continue && (merge->headers.empty() || pass == 0);

				// Another scenario is that we don't dominate the merge block in pass 1. We cannot split blocks now.
				// Check to see which paths can actually reach the merge target without going through a ladder block.
				// If we don't go through ladder it means an outer scope will actually reach the merge node.
				// If we reach a ladder it means a block we dominate will make the escape.

				// If we're in pass 1 and we still don't dominate our merge target, consider it ambiguous.
				if (pass == 1 && !dominates_merge)
					ambiguous_merge_case = true;

				// Another case is when one path is "breaking" out to a continue block which we don't dominate.
				// We should not attempt to do ladder breaking here in pass 0 since it's unnecessary.
				bool tie_break_merge = ambiguous_merge_case || !mark_merge_block_case;

				bool a_path_is_break = control_flow_is_escaping(node->succ[0], merge);
				bool a_path_is_continue = block_is_plain_continue(scan_plain_continue_block(node->succ[0]));
				bool b_path_is_break = control_flow_is_escaping(node->succ[1], merge);
				bool b_path_is_continue = block_is_plain_continue(scan_plain_continue_block(node->succ[1]));

				bool a_path_is_break_or_continue = a_path_is_break || a_path_is_continue;
				bool b_path_is_break_or_continue = b_path_is_break || b_path_is_continue;

				// Continue is stronger than break. A breaking path may still need to merge control flow
				// especially if that breaking path is very complicated. If we detect continue, the back-edge
				// post-dominates our succ, so we are guaranteed to never need to merge control flow on that path.
				// Demote the other path to a non-breaking path.
				if (a_path_is_continue != b_path_is_continue)
				{
					tie_break_merge = true;
					if (a_path_is_continue)
						b_path_is_break_or_continue = false;
					else
						a_path_is_break_or_continue = false;
				}

				if (tie_break_merge)
				{
					if (a_path_is_break_or_continue && b_path_is_break_or_continue)
					{
						// Both paths break, so we don't need to merge anything. Use Unreachable merge target.
						node->merge = MergeType::Selection;
						node->selection_merge_block = nullptr;
						//LOGI("Merging %s -> Unreachable\n", node->name.c_str());
					}
					else if (b_path_is_break_or_continue)
						merge_to_succ(node, 0);
					else if (a_path_is_break_or_continue)
						merge_to_succ(node, 1);
					else
					{
						// Need more interesting tie-breaking.
						// We can deduce which path is breaking or not based on the dominance frontier.
						// If a dominance frontier for A can reach B, then we assume that B is breaking further than A
						// is, so we should merge to A.
						// The breaking path for B will likely need to ensure that the selection header can
						// support such a break.
						// If we hit this path, the common post-dominator will not find the intended merge
						// target for B, so we never get to perform the necessary fixup.
						auto *a_front = node->succ[0]->dominance_frontier.size() == 1 ?
						                node->succ[0]->dominance_frontier.front() : nullptr;
						auto *b_front = node->succ[1]->dominance_frontier.size() == 1 ?
						                node->succ[1]->dominance_frontier.front() : nullptr;
						bool found_candidate = false;

						CFGNode *inner_merge_candidate = nullptr;

						// If there is no unique dominance frontier for one path, pick the one that has a unique frontier
						// as that in considered a merge.
						if ((a_front || b_front) && a_front != b_front)
						{
							if (!b_front || (a_front && query_reachability(*a_front, *b_front)))
							{
								merge_to_succ(node, 0);
								inner_merge_candidate = b_front;
								found_candidate = true;
							}
							else if (!a_front || (b_front && query_reachability(*b_front, *a_front)))
							{
								merge_to_succ(node, 1);
								inner_merge_candidate = a_front;
								found_candidate = true;
							}
						}

						if (!found_candidate)
						{
							node->merge = MergeType::Selection;
							node->selection_merge_block = nullptr;

							if (a_front && b_front && a_front->headers.size() == 1 && b_front->headers.size() == 1)
							{
								// Extremely ambiguous merge where the selection construct can merge to two different paths.
								// Our only option at this point is to pick an arbitrary winner
								// and consider one path the breaking one arbitrarily.
								auto *a_header = a_front->headers.front();
								auto *b_header = b_front->headers.front();

								// Pick the largest enclosing header as a heuristic.
								inner_merge_candidate =
									a_header->forward_post_visit_order > b_header->forward_post_visit_order ?
									a_front : b_front;
							}
						}

						if (inner_merge_candidate && inner_merge_candidate->headers.size() == 1)
						{
							// The breaking path tries to break to this node.
							// This will only trigger in pass 1.
							auto *header = inner_merge_candidate->headers.front();
							if (header->merge == MergeType::Selection)
							{
								// Promote to loop header instead.
								// We might have to enter the loop ladder fixup stages later
								// to insert ladders as required.
								header->merge = MergeType::Loop;
								header->loop_merge_block = header->selection_merge_block;
								header->selection_merge_block = nullptr;
								header->freeze_structured_analysis = true;
							}
						}
					}
				}
				else
				{
					assert(merge);
					node->selection_merge_block = merge;
					node->merge = MergeType::Selection;
					merge->headers.push_back(node);
					//LOGI("Merging %s -> %s\n", node->name.c_str(), node->selection_merge_block->name.c_str());
				}
			}
			else
			{
				// We likely had one side of the branch take an "exit", in which case there is no common post-dominator.
				bool a_dominates_exit = node->succ[0]->dominates_all_reachable_exits();
				bool b_dominates_exit = node->succ[1]->dominates_all_reachable_exits();
				if (!a_dominates_exit && b_dominates_exit)
					merge_to_succ(node, 0);
				else if (!b_dominates_exit && a_dominates_exit)
					merge_to_succ(node, 1);
				else
				{
					// Both paths lead to exit. Do we even need to merge here?
					// In worst case we can always merge to an unreachable node in the CFG.
					node->merge = MergeType::Selection;
					node->selection_merge_block = nullptr;

					const auto node_is_degenerate_merge_block = [](const CFGNode *n) {
						return n->ir.terminator.type == Terminator::Type::Unreachable ||
						       (n->ir.terminator.type == Terminator::Type::Return &&
						        n->ir.operations.empty());
					};

					// In some cases however, we have to try even harder to tie-break these blocks,
					// since post-domination analysis may break due to early exit blocks.
					// Use principle of least break to tie-break.
					if (node->succ[0]->dominance_frontier.size() == 1 &&
					    node->succ[1]->dominance_frontier.size() == 1)
					{
						auto *a_frontier = node->succ[0]->dominance_frontier.front();
						auto *b_frontier = node->succ[1]->dominance_frontier.front();
						if (a_frontier != b_frontier)
						{
							// Try to merge in the direction of early returns, since the other direction
							// will likely result in a loop break or something like that.
							// Inner constructs tend to use weaker selection merges, which means we need
							// to merge in that direction to stay valid.
							if (query_reachability(*a_frontier, *b_frontier))
								merge_to_succ(node, 0);
							else if (query_reachability(*b_frontier, *a_frontier))
								merge_to_succ(node, 1);
							else
							{
								auto a_succ_count = a_frontier->succ.size();
								auto b_succ_count = b_frontier->succ.size();

								// First look at the idoms. This can give us an idea how the code is nested.
								// Merge towards innermost idom.
								// If that fails, merge against early returns as a last resort.

								a_frontier = a_frontier->immediate_dominator;
								b_frontier = b_frontier->immediate_dominator;
								if (a_frontier != b_frontier && query_reachability(*a_frontier, *b_frontier))
									merge_to_succ(node, 1);
								else if (a_frontier != b_frontier && query_reachability(*b_frontier, *a_frontier))
									merge_to_succ(node, 0);
								else if (a_succ_count == 0 && b_succ_count != 0)
									merge_to_succ(node, 0);
								else if (b_succ_count == 0 && a_succ_count != 0)
									merge_to_succ(node, 1);
							}
						}
					}
					else if (node_is_degenerate_merge_block(node->succ[1]) &&
					         !node_is_degenerate_merge_block(node->succ[0]))
					{
						// Try to merge away from blank returns.
						merge_to_succ(node, 0);
					}
					else if (node_is_degenerate_merge_block(node->succ[0]) &&
					         !node_is_degenerate_merge_block(node->succ[1]))
					{
						// Try to merge away from blank returns.
						merge_to_succ(node, 1);
					}
				}
			}
		}
		else if (pass == 0)
		{
			// No possible merge target. Just need to pick whatever node is the merge block here.
			// Only do this in first pass, so that we can get a proper ladder breaking mechanism in place if we are escaping.
			CFGNode *merge = CFGStructurizer::find_common_post_dominator(node->succ);

			if (merge)
			{
				// Don't try to merge to our switch block.
				auto *inner_header = node->get_outer_header_dominator();
				bool conditional_switch_break = inner_header && inner_header->merge == MergeType::Selection &&
				                                inner_header->selection_merge_block == merge;

				if (!conditional_switch_break)
				{
					node->selection_merge_block = merge;
					node->merge = MergeType::Selection;
					merge->headers.push_back(node);
					//LOGI("Merging %s -> %s\n", node->name.c_str(), node->selection_merge_block->name.c_str());
				}
			}
			else
			{
				//LOGI("Cannot find a merge target for block %s ...\n", node->name.c_str());
			}
		}

		if (trivial_merge_index >= 0 && pass == 0)
		{
			CFGNode *merge = CFGStructurizer::find_common_post_dominator(node->succ);
			if (merge && !node->dominates(merge) && !block_is_plain_continue(merge))
			{
				if (!merge->headers.empty())
				{
					// We might have a trivial merge, yet the other branch direction
					// is a breaking construct. We will have to split some blocks.
					merge->headers.push_back(node);
				}

				auto *current_candidate = node->succ[trivial_merge_index];
				auto *other_candidate = node->succ[1 - trivial_merge_index];

				bool current_escapes = current_candidate == merge || control_flow_is_escaping(current_candidate, merge);

				// It's possible that our other candidate is a merge target. If we don't dominate the candidate,
				// it means it's on the dominance frontier and we should not consider it escaping.

				// Trivial heuristic for escape.
				bool other_escapes = other_candidate == merge || block_is_plain_continue(other_candidate);

				// Second level heuristic.
				if (!other_escapes && control_flow_is_escaping(other_candidate, merge))
				{
					// Final layer of hell.
					if (node->dominates(other_candidate))
					{
						// There is no frontier, so we accept escape analysis as-is.
						other_escapes = true;
					}
					else
					{
						// This is a frontier, so it shouldn't be considered an escape,
						// but if this is a "weak" frontier, we can avoid creating a dummy interim block.
						// If the other candidate is a loop merge, then we will resolve the merge in another way,
						// which will make the interim block superfluous.
						bool other_is_loop_merge_candidate =
							other_candidate->headers.size() == 1 &&
							other_candidate->headers.front()->merge == MergeType::Loop &&
							(other_candidate->headers.front()->loop_merge_block == other_candidate ||
							 other_candidate->headers.front()->loop_ladder_block == other_candidate);
						other_escapes = other_is_loop_merge_candidate;
					}
				}

				if (!current_escapes && !other_escapes)
				{
					// Neither is considered an escape. This is strange and should not happen unless we have
					// a fake frontier block to contend with.
					// Attempt to tie-break by observing if current candidate has a direct branch to merge,
					// but other does not.
					if (std::find(current_candidate->succ.begin(), current_candidate->succ.end(), merge) != current_candidate->succ.end() &&
					    std::find(other_candidate->succ.begin(), other_candidate->succ.end(), merge) == other_candidate->succ.end())
					{
						current_escapes = true;

						// If current candidate's frontier can reach the other candidate directly,
						// this is a final tie-break to show that we should accept the current situation.
						for (auto *frontier : current_candidate->dominance_frontier)
						{
							if (frontier != other_candidate && query_reachability(*frontier, *other_candidate))
							{
								current_escapes = false;
								break;
							}
						}
					}
				}

				// If we tried to merge in a direction which is a breaking construct,
				// this means that the other path is actual desired break path.
				if (current_escapes && !other_escapes)
				{
					auto *target_block = node->succ[1 - trivial_merge_index];
					// We kinda want to merge the other way, but to do that, we need an interim block.
					auto *ladder = pool.create_node();
					ladder->name = node->name + "." + target_block->name + ".interim";
					ladder->add_branch(target_block);
					ladder->ir.terminator.type = Terminator::Type::Branch;
					ladder->ir.terminator.direct_block = target_block;
					ladder->immediate_dominator = node;
					ladder->immediate_post_dominator = target_block;
					ladder->dominance_frontier.push_back(target_block);
					ladder->forward_post_visit_order = node->forward_post_visit_order;
					ladder->backward_post_visit_order = node->backward_post_visit_order;
					node->retarget_branch(target_block, ladder);
					node->selection_merge_block = ladder;
				}
			}
		}
	}
}

void CFGStructurizer::rewrite_selection_breaks(CFGNode *header, CFGNode *ladder_to)
{
	// Don't rewrite loops here (since this is likely a loop merge block),
	// unless we're rewriting header -> inner construct scenario.
	// Check if the ladder_to block has a path to continue block.
	// If it does, it is part of the loop construct, and cannot be a loop merge block.
	if (header->pred_back_edge && !header->pred_back_edge->can_backtrace_to(ladder_to))
		return;

	// Don't rewrite switch blocks either.
	if (header->ir.terminator.type == Terminator::Type::Switch)
		return;

	//LOGI("Rewriting selection breaks %s -> %s\n", header->name.c_str(), ladder_to->name.c_str());

	UnorderedSet<CFGNode *> construct;

	// Be careful about rewriting branches in continuing constructs.
	CFGNode *inner_continue_block = nullptr;
	CFGNode *inner_continue_succ = nullptr;
	bool ladder_to_dominates_continue = false;
	bool break_post_dominates_ladder_to = false;
	auto *innermost_loop_header = get_innermost_loop_header_for(header);
	if (innermost_loop_header && innermost_loop_header->pred_back_edge)
		inner_continue_block = innermost_loop_header->pred_back_edge;
	if (inner_continue_block && inner_continue_block->succ.size() == 1)
	{
		inner_continue_succ = inner_continue_block->succ.front();
		break_post_dominates_ladder_to = inner_continue_succ->post_dominates(ladder_to);
		ladder_to_dominates_continue = ladder_to->dominates(inner_continue_block);
	}

	header->traverse_dominated_blocks([&](CFGNode *node) -> bool {
		// Inner loop headers are not candidates for a rewrite. They are split in split_merge_blocks.
		// Similar with switch blocks.
		// Also, we need to stop traversing when we hit the target block ladder_to.
		if (node != ladder_to)
		{
			if (!query_reachability(*node, *ladder_to))
				return false;

			bool branch_is_loop_or_switch = node->pred_back_edge ||
			                                node->ir.terminator.type == Terminator::Type::Switch;

			// If our candidate scope splits a loop scope in half, ignore this candidate.
			if (break_post_dominates_ladder_to && !ladder_to_dominates_continue &&
			    node->dominates(inner_continue_block))
			{
				return false;
			}

			if (node->succ.size() >= 2 && !branch_is_loop_or_switch)
			{
				auto *outer_header = get_post_dominance_frontier_with_cfg_subset_that_reaches(node, ladder_to, nullptr);
				if (outer_header == header)
					construct.insert(node);
			}
			return true;
		}
		else
			return false;
	});

	Vector<CFGNode *> sorted_construct;
	sorted_construct.reserve(construct.size());
	for (auto *inner_block : construct)
		sorted_construct.push_back(inner_block);

	// Emit inner constructs before outer constructs.
	// This way we get natural nesting in case of certain if/else if ladders.
	std::sort(sorted_construct.begin(), sorted_construct.end(), [](const CFGNode *a, const CFGNode *b) {
		return a->forward_post_visit_order < b->forward_post_visit_order;
	});

	for (auto *inner_block : sorted_construct)
	{
		//LOGI("Header: %s, Inner: %s.\n", header->name.c_str(), inner_block->name.c_str());
		auto *ladder = pool.create_node();
		ladder->name = ladder_to->name + "." + inner_block->name + ".ladder";
		//LOGI("Walking dominated blocks of %s, rewrite branches %s -> %s.\n", inner_block->name.c_str(),
		//     ladder_to->name.c_str(), ladder->name.c_str());

		ladder->add_branch(ladder_to);
		ladder->ir.terminator.type = Terminator::Type::Branch;
		ladder->ir.terminator.direct_block = ladder_to;
		ladder->immediate_post_dominator = ladder_to;
		ladder->dominance_frontier.push_back(ladder_to);
		ladder->forward_post_visit_order = ladder_to->forward_post_visit_order;
		ladder->backward_post_visit_order = ladder_to->backward_post_visit_order;

		// Stop rewriting once we hit a merge block.
		traverse_dominated_blocks_and_rewrite_branch(inner_block,
		    ladder_to, ladder, [inner_block](CFGNode *node) -> bool {
			    return inner_block->selection_merge_block != node;
		    }, {});

		ladder->recompute_immediate_dominator();
		rewrite_selection_breaks(inner_block, ladder);
	}
}

bool CFGStructurizer::is_strictly_dominance_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c)
{
	return a != b && a->dominates(b) && b != c && b->dominates(c);
}

bool CFGStructurizer::is_reachability_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c)
{
	return a != b && query_reachability(*a, *b) && b != c && query_reachability(*b, *c);
}

bool CFGStructurizer::header_and_merge_block_have_entry_exit_relationship(const CFGNode *header, const CFGNode *merge) const
{
	if (!merge->post_dominates(header))
		return false;

	// If there are other blocks which need merging, and that idom is the header,
	// then header is some kind of exit block.
	bool found_inner_merge_target = false;
	const CFGNode *potential_inner_merge_target = nullptr;

	const auto is_earlier = [](const CFGNode *candidate, const CFGNode *existing) {
		return !existing || (candidate->forward_post_visit_order > existing->forward_post_visit_order);
	};

	const auto is_later = [](const CFGNode *candidate, const CFGNode *existing) {
		return !existing || (candidate->forward_post_visit_order < existing->forward_post_visit_order);
	};

	header->traverse_dominated_blocks([&](const CFGNode *node) {
		if (node == merge)
			return false;

		// Don't analyze loops, this path is mostly for selections only.
		if (node->pred_back_edge)
			return false;

		if (node->num_forward_preds() <= 1)
			return true;
		auto *idom = node->immediate_dominator;

		if (idom == header)
		{
			found_inner_merge_target = true;
			return false;
		}
		else if (is_later(node, potential_inner_merge_target) &&
		         idom->immediate_post_dominator == merge &&
		         !exists_path_in_cfg_without_intermediate_node(header, node, idom))
		{
			// Need to analyze this further to determine if it's one of those insane crossing merge cases ...
			// Find the lowest post visit order if there are multiple candidates.
			potential_inner_merge_target = node;
		}

		return true;
	});

	if (found_inner_merge_target)
		return true;
	if (!potential_inner_merge_target)
		return false;

	// Alternatively, try to find a situation where the natural merge is difficult to determine.
	// In this scenario, selection constructs appear to be "breaking" in different directions.
	// Any attempt to split scopes here will fail spectacularly.

	const CFGNode *first_natural_breaks_to_outer = nullptr;
	const CFGNode *first_natural_breaks_to_inner = nullptr;
	const CFGNode *last_natural_breaks_to_outer = nullptr;
	const CFGNode *last_natural_breaks_to_inner = nullptr;

	header->traverse_dominated_blocks([&](const CFGNode *node) {
		if (node == merge || node == potential_inner_merge_target)
			return false;
		if (!query_reachability(*node, *merge) || !query_reachability(*node, *potential_inner_merge_target))
			return false;

		if (node->succ.size() < 2)
			return true;

		bool breaks_to_outer = std::find_if(node->succ.begin(), node->succ.end(), [&](const CFGNode *candidate) {
			return merge->post_dominates(candidate);
		}) != node->succ.end();

		bool breaks_to_inner = std::find_if(node->succ.begin(), node->succ.end(), [&](const CFGNode *candidate) {
			return potential_inner_merge_target->post_dominates(candidate);
		}) != node->succ.end();

		if (breaks_to_inner)
			breaks_to_outer = false;

		if (breaks_to_outer)
		{
			if (is_earlier(node, first_natural_breaks_to_outer))
				first_natural_breaks_to_outer = node;
			if (is_later(node, last_natural_breaks_to_outer))
				last_natural_breaks_to_outer = node;
		}

		if (breaks_to_inner)
		{
			if (is_earlier(node, first_natural_breaks_to_inner))
				first_natural_breaks_to_inner = node;
			if (is_later(node, last_natural_breaks_to_inner))
				last_natural_breaks_to_inner = node;
		}

		return true;
	});

	if (!first_natural_breaks_to_outer || !first_natural_breaks_to_inner ||
	    !last_natural_breaks_to_outer || !last_natural_breaks_to_inner)
	{
		return false;
	}

	// Crossing break scenario.
	if (is_strictly_dominance_ordered(first_natural_breaks_to_inner, first_natural_breaks_to_outer, last_natural_breaks_to_inner))
		return true;
	else if (is_strictly_dominance_ordered(first_natural_breaks_to_outer, first_natural_breaks_to_inner, last_natural_breaks_to_outer))
		return true;
	else
		return false;
}

bool CFGStructurizer::serialize_interleaved_early_returns()
{
	for (auto *node : forward_post_visit_order)
	{
		if (node->num_forward_preds() <= 1)
			continue;

		// Never merge to continue block.
		// We should never hit this path unless we explicitly
		// avoided creating a continue ladder block earlier.
		if (block_is_plain_continue(node))
			continue;

		auto *idom = node->immediate_dominator;
		auto *merge_candidate = CFGNode::find_common_post_dominator(idom, node);
		bool post_dominator_is_exit_node =
		    merge_candidate && merge_candidate->immediate_post_dominator == merge_candidate;
		bool merged_into_terminating_path = post_dominator_is_exit_node && node->dominates_all_reachable_exits();

		// If our candidate idom post dominates the entry block, we consider this the main path of execution.
		if (merged_into_terminating_path && idom->post_dominates(entry_block))
			merged_into_terminating_path = false;

		if (merged_into_terminating_path)
		{
			// Similar to loops, find the break target for this construct.
			auto *break_target = find_break_target_for_selection_construct(idom, node);

			if (break_target)
			{
				Vector<CFGNode *> valid = { break_target, node };
				collect_and_dispatch_control_flow(idom, break_target, valid, false, false);

				// This completely transposes the CFG, so need to recompute CFG to keep going.
				recompute_cfg();
				return true;
			}
		}
	}

	return false;
}

bool CFGStructurizer::serialize_interleaved_merge_scopes_aggressive()
{
	for (auto *node : forward_post_visit_order)
	{
		// Eagerly collapse ridiculous unrolled loops if they exist.
		// We normally handle it, but we need to consider cases with cross-branches as well.
		constexpr size_t PredThreshold = 32;
		if (node->num_forward_preds() < PredThreshold)
			continue;
		if (block_is_plain_continue(node))
			continue;

		auto *idom = node->immediate_dominator;

		// Only consider simpler cases that we can collapse.
		if (!node->post_dominates(idom))
			continue;

		// node->forward_post_visit_order should map 1:1 to the post-visit array,
		// but in extreme circumstances where there have been inline cfg rewrites before recompute,
		// this may not be true, so be defensive.
		auto itr = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), node);
		auto end = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), idom);
		assert(itr != forward_post_visit_order.end());
		assert(end != forward_post_visit_order.end());

		Vector<CFGNode *> constructs;

		for (; itr != end; ++itr)
		{
			auto *candidate = *itr;
			if (candidate->num_forward_preds() < PredThreshold)
				continue;
			if (!idom->dominates(candidate))
				continue;
			auto &df = candidate->dominance_frontier;
			if (std::find(df.begin(), df.end(), node) != df.end())
				constructs.push_back(candidate);
		}

		if (constructs.empty())
			continue;

		if (constructs.size() >= 2)
			filter_serialization_candidates(constructs);

		if (constructs.empty())
			continue;

		constructs.push_back(node);
		collect_and_dispatch_control_flow(idom, node, constructs, false, true);
		recompute_cfg();
		return true;
	}

	return false;
}

Vector<std::pair<CFGNode *, CFGNode *>> CFGStructurizer::build_pdf_ranges(const Vector<CFGNode *> &candidates)
{
	Vector<std::pair<CFGNode *, CFGNode *>> pdf_ranges;
	pdf_ranges.reserve(candidates.size());

	// If breaking merge constructs are entangled, their PDFs will overlap.
	for (auto *candidate : candidates)
	{
		auto &pdf = candidate->post_dominance_frontier;
		assert(!pdf.empty());
		CFGNode *first = pdf.front();
		CFGNode *last = first;

		for (auto *n : pdf)
		{
			if (n->forward_post_visit_order > first->forward_post_visit_order)
				first = n;
			if (n->forward_post_visit_order < last->forward_post_visit_order)
				last = n;
		}

		pdf_ranges.push_back({ first, last });
	}

	return pdf_ranges;
}

bool CFGStructurizer::pdf_ranges_have_strict_dominance_ordering(const Vector<std::pair<CFGNode *, CFGNode *>> &pdf_ranges)
{
	bool need_deinterleave = false;
	auto count = pdf_ranges.size();
	for (size_t i = 0; i < count && !need_deinterleave; i++)
		for (size_t j = 0; j < count && !need_deinterleave; j++)
			if (i != j)
				need_deinterleave = is_strictly_dominance_ordered(pdf_ranges[i].first, pdf_ranges[j].first, pdf_ranges[i].second);

	return need_deinterleave;
}

void CFGStructurizer::filter_serialization_candidates(Vector<CFGNode *> &candidates) const
{
	// Ensure stable order.
	std::sort(candidates.begin(), candidates.end(), [](const CFGNode *a, const CFGNode *b)
	          { return a->forward_post_visit_order < b->forward_post_visit_order; });

	auto *common_idom = candidates[0];
	for (size_t i = 1, n = candidates.size(); i < n; i++)
		common_idom = CFGNode::find_common_dominator(common_idom, candidates[i]);

	// Filter out false positive inner constructs.
	// If we're dominated by another inner construct, and we don't post-dominate that construct, we should yield.
	for (auto itr = candidates.begin(); itr != candidates.end(); )
	{
		bool eliminated = false;
		for (auto candidate_itr = itr + 1; candidate_itr != candidates.end(); ++candidate_itr)
		{
			bool keep_candidate = (*candidate_itr) == common_idom || !(*candidate_itr)->dominates(*itr) ||
			                      (*itr)->post_dominates(*candidate_itr);

			// Don't let the common idom of constructs consume subsequent constructs.
			if (!keep_candidate)
			{
				// To accept a dominator, we don't want any common idom removing every node.
				std::move(itr + 1, candidates.end(), itr);
				candidates.pop_back();
				eliminated = true;
				break;
			}
		}

		if (!eliminated)
			++itr;
	}

	Vector<CFGNode *> valid_constructs;

	// Prune any candidate that can reach another candidate. The sort ensures that candidate to be removed comes last.
	size_t count = candidates.size();
	for (size_t i = 0; i < count; i++)
	{
		bool valid = true;
		for (size_t j = 0; j < i; j++)
		{
			if (query_reachability(*candidates[i], *candidates[j]))
			{
				valid = false;
				break;
			}
		}

		// Another sanity check for candidates, the idom must be able to reach other nodes.
		if (valid)
		{
			valid = false;
			for (size_t j = 0; j < count; j++)
			{
				if (i == j)
					continue;

				if (query_reachability(*candidates[i]->immediate_dominator, *candidates[j]))
				{
					valid = true;
					break;
				}
			}
		}

		if (valid)
			valid_constructs.push_back(candidates[i]);
	}

	candidates = std::move(valid_constructs);
}

bool CFGStructurizer::serialize_interleaved_merge_scopes()
{
	// Try to fixup scenarios which arise from unrolled loops with multiple break blocks.
	// DXC will emit maximal convergence and force all dynamic instances of a given break to branch to the same
	// block, which then breaks, e.g.:
	// for (int i = 0; i < CONSTANT; i++) { cond_break_construct1(); cond_break_construct2(); cond_break_construct3(); }
	// When this unrolls we can end up with merge blocks which are entangled. Only sane way to make this work
	// is to serialize the breaks to after the merge block.
	UnorderedSet<CFGNode *> potential_merge_nodes;

	for (auto *node : forward_post_visit_order)
		if (node->num_forward_preds() >= 2 && !block_is_plain_continue(node))
			potential_merge_nodes.insert(node);

	UnorderedSet<const CFGNode *> visited;

	for (auto *node : forward_post_visit_order)
	{
		if (node->num_forward_preds() <= 1)
			continue;
		if (block_is_plain_continue(node))
			continue;

		auto *idom = node->immediate_dominator;

		Vector<CFGNode *> complex_inner_constructs;
		Vector<CFGNode *> constructs;

		// Find merge block candidates that are strictly dominated by idom and immediately post-dominated by node.
		// They also must not be good merge candidates on their own.
		// Also, we're not interested in any loop merge candidates.
		for (auto *candidate : potential_merge_nodes)
		{
			if (candidate != idom && idom->dominates(candidate) &&
				node->post_dominates(candidate) &&
			    !candidate->post_dominates_perfect_structured_construct() &&
			    get_innermost_loop_header_for(idom, candidate) == idom)
			{
				bool direct_dominance_frontier = candidate->dominance_frontier.size() == 1 &&
				                                 candidate->dominance_frontier.front() == node;
				// The candidate must not try to merge to other code since we might end up introducing loops that way.
				// All code reachable by candidate must cleanly break to node.
				// We can make use of a simpler rewrite path if all code paths to node goes through our candidates.
				// Accept a construct and determine if we need to promote the complex constructs instead of the inner constructs.
				// The inner construct may just be a false positive that ends up blocking the rewrite.
				if (direct_dominance_frontier)
					constructs.push_back(candidate);
				else
					complex_inner_constructs.push_back(candidate);
			}
		}

		// If true, we need a complex rewrite. This means taking unrelated branches to node and fuse them into
		// one big merge. This requires very simple control flow from the candidates,
		// since otherwise we end up with unintended loops in the rewrite.
		// The simplified flow requires that all code paths from idom flow through the complex inner candidates.
		bool collect_all_paths_to_pdom = true;

		if (constructs.size() == 1 && complex_inner_constructs.size() >= 2)
		{
			auto *candidate_inner = constructs.front();
			auto *common_idom = candidate_inner;

			constructs.clear();

			// Try to detect a false positive where we should ignore inner_constructs.

			// Ensure that the inner construct comes after the candidate constructs.
			bool should_promote_complex = true;
			for (auto *candidate : complex_inner_constructs)
			{
				if (!query_reachability(*candidate, *candidate_inner))
				{
					should_promote_complex = false;
					break;
				}
			}

			if (should_promote_complex)
			{
				// The inner candidate should not post-dominate any other candidate block.
				// We're looking for unusual merge patterns here.
				for (auto *pred : complex_inner_constructs)
				{
					if (candidate_inner->post_dominates(pred))
					{
						should_promote_complex = false;
						break;
					}
				}
			}

			if (should_promote_complex)
			{
				// In complex merges, we focus on merging as early as possible, rather than as late as possible.
				// Remove any candidates which are reachable by other candidates.

				// Disregard the inner constructs, promote the complex ones.
				collect_all_paths_to_pdom = false;

				// Ensure stable order.
				std::sort(complex_inner_constructs.begin(), complex_inner_constructs.end(), [](const CFGNode *a, const CFGNode *b) {
					return a->forward_post_visit_order > b->forward_post_visit_order;
				});

				size_t count = complex_inner_constructs.size();
				for (size_t j = 0; j < count; j++)
				{
					bool is_reachable = false;
					for (size_t i = 0; i < j && !is_reachable; i++)
						if (query_reachability(*complex_inner_constructs[i], *complex_inner_constructs[j]))
							is_reachable = true;

					if (!is_reachable)
						constructs.push_back(complex_inner_constructs[j]);
				}
			}

			if (should_promote_complex && constructs.size() >= 2)
			{
				for (auto *inner : constructs)
					common_idom = CFGNode::find_common_dominator(common_idom, inner);

				// Verify that all paths to node must go through the inner constructs.
				// We cannot handle more awkward merges.
				should_promote_complex = !node->can_backtrace_to_with_blockers(common_idom, constructs);
			}

			if (!should_promote_complex)
				continue;
		}

		if (constructs.size() < 2)
			continue;

		filter_serialization_candidates(constructs);

		if (constructs.size() < 2)
			continue;

		auto pdf_ranges = build_pdf_ranges(constructs);
		bool need_deinterleave = pdf_ranges_have_strict_dominance_ordering(pdf_ranges);
		size_t count = constructs.size();

		CFGNode *common_anchor = nullptr;

		if (!need_deinterleave)
		{
			// Detect a complicated pattern that comes up which looks a lot like interleaved merges, but isn't really.
			// A       B
			// |\     /|
			// | \   / |
			// |   E   |
			// | /  \  |
			// C      D
			//  \    /
			//   \  /
			//    F
			// Candidates: {C, D}
			// Where {A, E} is pdf range of C
			// and {B, E} is pdf range of D
			// The last PDF can be considered a merge anchor that distributes code further.
			// E must have {C, D} - and only those - in the dominance frontier.
			common_anchor = pdf_ranges[0].second;

			bool can_be_anchor = common_anchor->pred.size() >= 2 ||
			                     (common_anchor->pred.size() == 1 && common_anchor->pred.front()->succ_back_edge);

			need_deinterleave = common_anchor->dominance_frontier.size() == count &&
			                    common_anchor->succ.size() == count &&
			                    common_anchor->ir.terminator.type == Terminator::Type::Condition &&
			                    can_be_anchor;

			for (size_t i = 0; i < count && need_deinterleave; i++)
			{
				need_deinterleave =
					query_reachability(*pdf_ranges[i].first, *pdf_ranges[i].second) &&
					pdf_ranges[0].second == pdf_ranges[i].second;

				need_deinterleave = need_deinterleave &&
				                    std::find(common_anchor->dominance_frontier.begin(),
				                              common_anchor->dominance_frontier.end(),
				                              constructs[i]) != common_anchor->dominance_frontier.end();
			}

			if (!need_deinterleave)
				common_anchor = nullptr;
		}

		if (!need_deinterleave)
		{
			const CFGNode *interleaved_exit_loop = nullptr;

			// Try finding interleaved loops exits. Extremely rare and awkward scenario.
			// This pattern makes it so that loop resolves cannot work well since nothing ends up being nested.
			// We can deal with one, but if two or more loops end up with awkward resolves, we have to employ magic.

			// First, look at the PDFs, try to find a node in an inner loop.
			// If the loop exits in a way where they can both reach the interleaving candidates,
			// that's a scenario where we need to consider rewriting.
			for (auto *candidate : constructs)
			{
				auto &pdf = candidate->post_dominance_frontier;
				for (auto *pdf_candidate : pdf)
				{
					auto *inner_header = get_innermost_loop_header_for(idom, pdf_candidate);
					if (inner_header != idom && inner_header != interleaved_exit_loop)
					{
						// Don't allow nested loops to be considered as two loops.
						if (interleaved_exit_loop && query_reachability(*inner_header, *interleaved_exit_loop))
							continue;

						if (query_reachability(*pdf_candidate, *inner_header->pred_back_edge))
						{
							// The back-edge can only reach one of the interleave nodes, while the candidate PDF
							// can reach both. This proves weird break cases.

							unsigned back_edge_reach_count = 0;
							unsigned pdf_reach_count = 0;
							for (auto *reach_candidate : constructs)
							{
								if (query_reachability(*inner_header->pred_back_edge, *reach_candidate))
									back_edge_reach_count++;
								if (query_reachability(*pdf_candidate, *reach_candidate))
									pdf_reach_count++;
							}

							if (back_edge_reach_count == 1 && pdf_reach_count == constructs.size())
							{
								// We've found two candidates now, break out.
								need_deinterleave = interleaved_exit_loop != nullptr;
								interleaved_exit_loop = inner_header;
								break;
							}
						}
					}
				}

				if (need_deinterleave)
					break;
			}
		}

		if (!need_deinterleave && count >= 3)
		{
			// More special cases.
			// We might not find an interleaving scenario by looking at strict dominance,
			// but there might be difficult cases lurking if we look at pure reachability.
			for (size_t i = 0; i < count && !need_deinterleave; i++)
			{
				for (size_t j = 0; j < count && !need_deinterleave; j++)
				{
					if (i == j)
						continue;

					if (!is_reachability_ordered(pdf_ranges[i].first, pdf_ranges[j].first, pdf_ranges[i].second))
						continue;

					auto &df = pdf_ranges[i].second->dominance_frontier;
					bool all_in_frontier = true;
					// If all the valid constructs are in the dominance frontier, consider this a highly difficult case.
					// If there's just two candidate blocks we can resolve them with ladder breaks, but three and above
					// can be nested in unexpected ways. This threshold is mostly a heuristic to avoid
					// doing complex transforms unless we really know for sure we need them.
					for (size_t k = 0; k < count && all_in_frontier; k++)
						all_in_frontier = std::find(df.begin(), df.end(), constructs[k]) != df.end();
					need_deinterleave = all_in_frontier;
				}
			}
		}

		if (!need_deinterleave && pdf_ranges[0].first != pdf_ranges[0].second)
		{
			// Special case of the above. If the PDFs overlap exactly we have criss-cross merge patterns.
			// Be very conservative when we accept this since this pattern comes up as innocent
			// breaking patterns. Some complicating factors is when idom is a loop header and
			// We don't post-dominate the idom and is more likely a breaking path.

			bool same_pdfs = true;
			for (size_t i = 1; i < count && !same_pdfs; i++)
				same_pdfs = pdf_ranges[i].first == pdf_ranges[0].first && pdf_ranges[i].second == pdf_ranges[0].second;

			// Heuristic to avoid needing to do needless rewrites.
			// The issues only seem to manifest in this situation.
			// Likely the problem is that different idoms with wrong ladder resolve order
			// can lead to backwards branches in some extremely rare cases ...
			auto *first = pdf_ranges[0].first->immediate_dominator;
			auto *second = pdf_ranges[0].second->immediate_dominator;
			auto *common_idom = CFGNode::find_common_dominator(first, second);

			bool crossing_idoms = first != second && first != common_idom && second != common_idom &&
			                      (query_reachability(*first, *second) || query_reachability(*second, *first));

			if (same_pdfs && crossing_idoms)
			{
				// All PDFs must have all candidates in their DFs.
				bool all_in_frontier = true;
				// If all the valid constructs are in the dominance frontier, consider this a highly difficult case.
				// If there's just two candidate blocks we can resolve them with ladder breaks, but three and above
				// can be nested in unexpected ways. This threshold is mostly a heuristic to avoid
				// doing complex transforms unless we really know for sure we need them.

				const Vector<CFGNode *> *dfs[] = {
					&pdf_ranges[0].first->dominance_frontier,
					&pdf_ranges[0].second->dominance_frontier,
				};

				for (auto *df : dfs)
					for (size_t i = 0; i < count && all_in_frontier; i++)
						all_in_frontier = std::find(df->begin(), df->end(), constructs[i]) != df->end();

				need_deinterleave = all_in_frontier;
			}
		}

		if (need_deinterleave)
		{
			if (common_anchor)
				collect_and_dispatch_control_flow_from_anchor(common_anchor, constructs);
			else
				collect_and_dispatch_control_flow(idom, node, constructs, collect_all_paths_to_pdom, false);

			// This completely transposes the CFG, so need to recompute CFG to keep going.
			recompute_cfg();
			return true;
		}
	}

	return false;
}

void CFGStructurizer::split_merge_scopes()
{
	for (auto *node : forward_post_visit_order)
	{
		// Setup a preliminary merge scope so we know when to stop traversal.
		// We don't care about traversing inner scopes, out starting from merge block as well.
		if (node->num_forward_preds() <= 1)
			continue;

		if (block_is_plain_continue(node))
			continue;

		// The idom is the natural header block.
		auto *idom = node->immediate_dominator;
		assert(idom->succ.size() >= 2);

		if (idom->merge == MergeType::None)
		{
			idom->merge = MergeType::Selection;
			idom->selection_merge_block = node;
		}
		node->headers.push_back(idom);
	}

	for (auto *node : forward_post_visit_order)
	{
		if (node->num_forward_preds() <= 1)
			continue;

		// Continue blocks can always be branched to, from any scope, so don't rewrite anything here.
		if (node->succ_back_edge)
			continue;

		// The idom is the natural header block.
		auto *idom = node->immediate_dominator;
		assert(idom->succ.size() >= 2);

		// We already rewrote this selection construct in serialize_interleaved_merge_scopes.
		// Don't try to introduce unnecessary ladders.
		if (idom->merge == MergeType::Loop && idom->loop_merge_block == node)
			continue;

		// If we find a construct which is a typical entry <-> exit scenario, do not attempt to rewrite
		// any branches. The real merge block might be contained inside this construct, and this block merely
		// serves as the exit merge point. It should generally turn into a loop merge later.
		if (header_and_merge_block_have_entry_exit_relationship(idom, node))
			continue;

		// Now we want to deal with cases where we are using this selection merge block as "goto" target for inner selection constructs.
		// Using a loop header might be possible,
		// but we will need to split up blocks to make sure that we don't end up with headers where the only branches
		// are either merges or breaks.

		// This case is relevant when we have something like:
		// A -> B -> C -> D -> M
		// A -> M
		// B -> M
		// C -> M
		// D -> M
		// We'll need intermediate blocks which merge each layer of the selection "onion".
		rewrite_selection_breaks(idom, node);
	}
}

bool CFGStructurizer::query_reachability(const CFGNode &from, const CFGNode &to) const
{
	if (&from == &to)
		return true;

	const uint32_t *src_reachability = &reachability_bitset[from.forward_post_visit_order * reachability_stride];
	return (src_reachability[to.forward_post_visit_order / 32] & (1u << (to.forward_post_visit_order & 31u))) != 0;
}

void CFGStructurizer::visit_reachability(const CFGNode &node)
{
	uint32_t *dst_reachability = &reachability_bitset[node.forward_post_visit_order * reachability_stride];

	for (auto *succ : node.succ)
	{
		// Inherit reachability from all successors.
		const uint32_t *src_reachability = &reachability_bitset[succ->forward_post_visit_order * reachability_stride];
		for (unsigned i = 0; i < reachability_stride; i++)
			dst_reachability[i] |= src_reachability[i];
	}

	// We can reach ourselves.
	dst_reachability[node.forward_post_visit_order / 32] |= 1u << (node.forward_post_visit_order & 31u);
}

void CFGStructurizer::build_reachability()
{
	reachability_stride = (forward_post_visit_order.size() + 31) / 32;
	reachability_bitset.clear();
	reachability_bitset.resize(reachability_stride * forward_post_visit_order.size());
	for (auto *node : forward_post_visit_order)
		visit_reachability(*node);
}

void CFGStructurizer::recompute_cfg()
{
	reset_traversal();
	visit(*entry_block);
	// Need to prune dead preds before computing dominance.
	prune_dead_preds();
	build_immediate_dominators();
	build_reachability();

	backwards_visit();
	build_immediate_post_dominators();

	compute_dominance_frontier();
	compute_post_dominance_frontier();
}

CFGNode *CFGStructurizer::find_natural_switch_merge_block(CFGNode *node, CFGNode *post_dominator) const
{
	// Maintain the original switch block order if possible to avoid awkward churn in reference output.
	uint64_t order = 1;
	for (auto &c : node->ir.terminator.cases)
	{
		// We'll need to decrement global order up to N times in the worst case.
		// Use 64-bit here as a safeguard in case the module is using a ridiculous amount of case labels.
		c.global_order = order * node->ir.terminator.cases.size();
		order++;
	}

	// First, sort so that any fallthrough parent comes before fallthrough target.
	std::sort(node->ir.terminator.cases.begin(), node->ir.terminator.cases.end(),
			  [](const Terminator::Case &a, const Terminator::Case &b)
			  { return a.node->forward_post_visit_order > b.node->forward_post_visit_order; });

	// Look at all potential fallthrough candidates and reassign global order.
	for (size_t i = 0, n = node->ir.terminator.cases.size(); i < n; i++)
	{
		for (size_t j = i + 1; j < n; j++)
		{
			auto &parent = node->ir.terminator.cases[i];
			auto &child = node->ir.terminator.cases[j];

			// A case label might be the merge block candidate of the switch.
			// Don't consider case fallthrough if b post-dominates the entire switch statement.
			// If a case label is a continue block, ignore it, since it will be a pure continue break in this scenario.
			// This is not considered a fallthrough, just a common break.
			if (child.node != post_dominator && parent.node != child.node &&
			    !(child.node->succ_back_edge || child.node->is_pseudo_back_edge) &&
			    query_reachability(*parent.node, *child.node))
			{
				parent.global_order = child.global_order - 1;
				break;
			}
		}
	}

	// Sort again, but this time, by global order.
	std::stable_sort(node->ir.terminator.cases.begin(), node->ir.terminator.cases.end(),
					 [](const Terminator::Case &a, const Terminator::Case &b)
					 { return a.global_order < b.global_order; });

	// Detect impossible fallthrough scenarios. We can have A -> B -> C fallthrough, but not
	// A -> C and B -> C. In this situation, we should see C as the actual switch merge block,
	// and rewrite the switch to loop + switch.
	// Detect this by having two entries with identical global order.

	bool has_impossible_fallthrough = false;
	uint64_t target_order = 0;

	for (size_t i = 1, n = node->ir.terminator.cases.size(); i < n; i++)
	{
		if (node->ir.terminator.cases[i].global_order == node->ir.terminator.cases[i - 1].global_order)
		{
			target_order = node->ir.terminator.cases[i].global_order + 1;
			has_impossible_fallthrough = true;
			break;
		}
	}

	CFGNode *candidate = nullptr;
	if (has_impossible_fallthrough)
	{
		for (auto &c : node->ir.terminator.cases)
		{
			if (c.global_order == target_order)
			{
				// Pick the earliest one.
				candidate = c.node;
				break;
			}
		}
	}

	bool case_labels_can_be_candidate_frontier = false;

	if (has_impossible_fallthrough && !candidate)
	{
		// This can happen if the impossible candidate block is a pred of yet another case label ?!?!
		// If this happens, do the full analysis in the loop below.
		case_labels_can_be_candidate_frontier = true;
	}

	// We found a candidate, but there might be multiple candidates which are considered impossible.

	// If two case labels merge execution before the candidate merge, we should consider that the natural merge,
	// since it is not possible to express this without a switch merge.
	for (auto &c : node->ir.terminator.cases)
	{
		for (auto *front : c.node->dominance_frontier)
		{
			// Never consider continue constructs here.
			if (front->succ_back_edge || front->is_pseudo_back_edge)
				continue;

			if (!case_labels_can_be_candidate_frontier)
			{
				// Ignore frontiers that are other case labels.
				// We allow simple fallthrough, and if we found an impossible case we would have handled it already.
				for (auto &ic : node->ir.terminator.cases)
				{
					if (ic.node == front)
					{
						front = nullptr;
						break;
					}
				}
			}

			if (!front)
				continue;

			if (!post_dominator ||
			    (front->forward_post_visit_order != post_dominator->forward_post_visit_order &&
			     query_reachability(*front, *post_dominator)))
			{
				// If this is reachable by a different case label, we have a winner. This must be a fake fallthrough
				// that we should promote to switch merge.
				for (auto &ic : node->ir.terminator.cases)
				{
					if (ic.node != c.node && query_reachability(*ic.node, *front))
					{
						// Select the innermost block that is impossible.
						// Breaking further out can be handled with loops, etc.
						if (!candidate || front->forward_post_visit_order > candidate->forward_post_visit_order)
							candidate = front;
					}
				}
			}
		}
	}

	return candidate ? candidate : post_dominator;
}

CFGNode *CFGStructurizer::create_switch_merge_ladder(CFGNode *header, CFGNode *merge)
{
	// We did not rewrite switch blocks w.r.t. selection breaks.
	// We might be in a situation where the switch block is trying to merge to a block which is already being merged to.
	// Create a ladder which the switch block could merge to.
	return create_ladder_block(header, merge, ".switch-merge");
}

Operation *CFGStructurizer::build_switch_case_equal_check(
    const CFGNode *header, CFGNode *insert_node, const Terminator::Case &c)
{
	Operation *ieq;

	if (c.is_default)
	{
		// Awkward since we have to compare all other case labels.
		Operation *neq_and = nullptr;
		for (auto &label : header->ir.terminator.cases)
		{
			if (!label.is_default)
			{
				Operation *neq = module.allocate_op(spv::OpINotEqual,
				                                    module.allocate_id(),
				                                    module.get_builder().makeBoolType());
				neq->add_id(header->ir.terminator.conditional_id);
				neq->add_id(module.get_builder().makeUintConstant(label.value));
				insert_node->ir.operations.push_back(neq);

				if (neq_and)
				{
					Operation *and_op = module.allocate_op(spv::OpLogicalAnd,
					                                       module.allocate_id(),
					                                       module.get_builder().makeBoolType());
					and_op->add_id(neq_and->id);
					and_op->add_id(neq->id);
					insert_node->ir.operations.push_back(and_op);
					neq_and = and_op;
				}
				else
				{
					neq_and = neq;
				}
			}
		}

		ieq = neq_and;
	}
	else
	{
		ieq = module.allocate_op(spv::OpIEqual, module.allocate_id(), module.get_builder().makeBoolType());
		ieq->add_id(header->ir.terminator.conditional_id);
		ieq->add_id(module.get_builder().makeUintConstant(c.value));
		insert_node->ir.operations.push_back(ieq);
	}

	return ieq;
}

void CFGStructurizer::hoist_switch_branches_to_frontier(CFGNode *node, CFGNode *merge,
                                                        CFGNode *dominance_frontier_candidate)
{
	// Dispatch to the dominance frontier before we enter switch scope.
	auto *pred = create_helper_pred_block(node);
	std::swap(pred->ir.operations, node->ir.operations);

	auto succs = node->succ;
	for (auto *succ : succs)
	{
		if (!query_reachability(*succ, *dominance_frontier_candidate))
			continue;

		// Rewrite the case label to reach merge block in a unique path.
		// That way we can PHI select whether to branch to dominance frontier or not
		// in the switch merge block.

		spv::Id cond_id = 0;
		for (auto &c : node->ir.terminator.cases)
		{
			if (c.node == succ)
			{
				auto *ieq = build_switch_case_equal_check(node, pred, c);

				if (cond_id)
				{
					auto *bor = module.allocate_op(spv::OpLogicalOr, module.allocate_id(),
					                               module.get_builder().makeBoolType());
					bor->add_id(cond_id);
					bor->add_id(ieq->id);
					pred->ir.operations.push_back(bor);
					cond_id = bor->id;
				}
				else
				{
					cond_id = ieq->id;
				}
			}
		}

		if (succ == dominance_frontier_candidate)
		{
			// We're directly branching to target, so might have to rewrite PHI incoming
			// block to pred helper block instead.
			for (auto &phi : dominance_frontier_candidate->ir.phi)
				for (auto &incoming : phi.incoming)
					if (incoming.block == node)
						incoming.block = pred;
		}

		for (auto *&p : succ->pred)
			if (p == node)
				p = pred;

		for (auto &c : node->ir.terminator.cases)
			if (c.node == succ)
				c.node = merge;

		node->succ.erase(std::find(node->succ.begin(), node->succ.end(), succ));
		node->add_branch(merge);
		pred->add_branch(succ);

		// Make sure that our selection branch has somewhere to merge if it has to.
		if (succ == dominance_frontier_candidate)
		{
			succ = pred->rewrite_branch_through_intermediate_node(dominance_frontier_candidate,
			                                                      dominance_frontier_candidate);
		}

		pred->ir.terminator.type = Terminator::Type::Condition;
		pred->ir.terminator.conditional_id = cond_id;
		pred->ir.terminator.true_block = succ;
		pred->ir.terminator.false_block = node;
		pred->ir.terminator.direct_block = nullptr;

		// Have to assume that there is only one path to this frontier,
		// otherwise we're in a world of impossible case merges
		// which should have been handled elsewhere ...
		return;
	}
}

CFGStructurizer::SwitchProgressMode CFGStructurizer::process_switch_blocks(unsigned pass)
{
	bool modified_cfg = false;
	for (auto index = forward_post_visit_order.size(); index; index--)
	{
		auto *node = forward_post_visit_order[index - 1];
		if (node->ir.terminator.type != Terminator::Type::Switch)
			continue;

		auto *merge = find_common_post_dominator(node->succ);
		auto *natural_merge = find_natural_switch_merge_block(node, merge);

		// If there are early exits inside the switch statement, post-dominance analysis won't work.
		// Just pick the natural merge.
		// This only seems to happen in dxbc2dxil.
		if (!merge)
			merge = natural_merge;

		// If there is still nothing, it's possible one of the case labels is the only non-exiting path.
		// If we have no natural merge either, this is the likely merge point.
		if (!merge)
		{
			CFGNode *pdom = nullptr;
			for (auto *succ : node->succ)
			{
				if (!succ->dominates_all_reachable_exits())
				{
					if (!pdom)
					{
						pdom = succ;
					}
					else
					{
						auto *new_pdom = CFGNode::find_common_post_dominator(pdom, succ);
						if (new_pdom)
							pdom = new_pdom;
					}

					// If there is at least one exit, have a fallback.
					merge = succ;
					natural_merge = succ;
				}
			}

			// If we have a valid pdom, that is the more reasonable target.
			if (pdom)
			{
				merge = pdom;
				natural_merge = pdom;
			}
		}

		if (!merge)
		{
			// Merge to unreachable.
			node->merge = MergeType::Selection;
			continue;
		}

		if (node->freeze_structured_analysis && node->merge == MergeType::Selection)
		{
			natural_merge = node->selection_merge_block;
		}
		else if (pass == 0)
		{
			// It is possible that we don't necessarily want to merge to the post-dominator.
			// There might be inner constructs which are better suited.
			// This can happen if some branches break farther out than some other branches.
			// We should let the loop ladder system take care of that.
			// The switch merge should consume the smallest possible scope.
			if (merge != natural_merge)
			{
				CFGNode *inner_merge = merge;
				for (auto *frontier_node : natural_merge->dominance_frontier)
				{
					if (node->dominates(frontier_node) && merge->post_dominates(frontier_node) &&
					    frontier_node->forward_post_visit_order > inner_merge->forward_post_visit_order)
					{
						inner_merge = frontier_node;
					}
				}

				if (merge != inner_merge && inner_merge != natural_merge && node->dominates(merge))
				{
					// If node dominates the merge, it's important that node remains a header block.
					// If we have an inner merge, we need to transpose the control flow so that
					// we avoid the inner merge altogether.
					Vector<CFGNode *> constructs = { natural_merge };
					for (auto *pred : inner_merge->pred)
						if (!query_reachability(*pred, *natural_merge) && !query_reachability(*natural_merge, *pred))
							constructs.push_back(pred);

					if (constructs.size() >= 2)
					{
						collect_and_dispatch_control_flow(node, merge, constructs, false, false);
						return SwitchProgressMode::IterativeModify;
					}
				}

				merge = inner_merge;

				// Relying on loop ladder system might not be possible in all situations.
				// It's possible that the switch block is also a loop header for example.
				// Need to transpose the code with a ladder to avoid impossible problems later.
				if (node->pred_back_edge)
					natural_merge = transpose_code_path_through_ladder_block(node, natural_merge, inner_merge);
			}
			else if (merge && !node->dominates(merge))
			{
				CFGNode *dominance_frontier_candidate = nullptr;

				// If we have a normal merge scenario (merge == natural_merge),
				// there might still be breaks which can reach the switch merge block.
				// This can happen if a switch block is in an if() {} block, and
				// one of the case labels branch to the else() block. Both the switch and else() block
				// reconvene later, which means that we should hoist the break so it's not contained
				// in switch scope.
				for (auto *frontier : node->dominance_frontier)
				{
					if (frontier->forward_post_visit_order != merge->forward_post_visit_order &&
					    query_reachability(*frontier, *merge))
					{
						// Uncertain if we can deal with this.
						// Multiple nested branches perhaps?
						if (dominance_frontier_candidate)
							LOGW("Multiple candidates for switch break transposition.\n");
						dominance_frontier_candidate = frontier;
					}
				}

				if (dominance_frontier_candidate)
					hoist_switch_branches_to_frontier(node, merge, dominance_frontier_candidate);
			}

			bool can_merge_to_post_dominator = merge && node->dominates(merge) && merge->headers.empty();

			// Need to guarantee that we can merge somewhere.
			// If possible we want to make it so that by creating a ladder,
			// we change the post-dominator to something we dominate.
			// For this to work, the dominance frontier of node must only contain the merge node.
			if (merge != natural_merge && !can_merge_to_post_dominator &&
			    node->dominance_frontier.size() == 1 && node->dominance_frontier.front() == merge)
			{
				merge = create_switch_merge_ladder(node, merge);
				assert(node->dominates(merge));
				modified_cfg = true;
				can_merge_to_post_dominator = true;
			}

			// Need to rewrite the switch if we're not already a loop header.
			if (merge != natural_merge && can_merge_to_post_dominator && !node->pred_back_edge)
			{
				auto *switch_outer = create_helper_pred_block(node);
				switch_outer->merge = MergeType::Loop;
				switch_outer->loop_merge_block = merge;
				switch_outer->freeze_structured_analysis = true;
				merge->headers.push_back(switch_outer);

				// Shouldn't be needed (I believe), but spirv-val is a bit temperamental when double breaking
				// straight out of a switch block in some situations,
				// so try not to ruffle too many feathers.
				if (std::find(node->succ.begin(), node->succ.end(), natural_merge) != node->succ.end())
				{
					auto *dummy_case = pool.create_node();
					dummy_case->name = natural_merge->name + ".pred";
					dummy_case->immediate_dominator = node;
					dummy_case->immediate_post_dominator = natural_merge;
					dummy_case->forward_post_visit_order = node->forward_post_visit_order;
					dummy_case->backward_post_visit_order = node->backward_post_visit_order;
					dummy_case->ir.terminator.type = Terminator::Type::Branch;
					dummy_case->ir.terminator.direct_block = natural_merge;
					dummy_case->add_branch(natural_merge);
					node->retarget_branch(natural_merge, dummy_case);
				}

				node->freeze_structured_analysis = true;
			}

			// Switch case labels must be contained within the switch statement.
			// Use a dummy label if we have to.
			auto succs = node->succ;
			for (auto *succ : succs)
			{
				bool need_fixup = false;
				if (succ == merge)
				{
					if (merge != natural_merge)
					{
						// If we used outer shell method, we dominate merge,
						// but not structurally, since there's a loop merge already.
						need_fixup = can_merge_to_post_dominator;
					}
					else
					{
						// If this happens we are our own outer shell.
						// The node itself is both a loop header *and* switch header,
						// so similar analysis applies.
						// Only consider fixup if we cannot reach continue block.
						// This can still be a normal inner merge for the switch, which then branches to continue block.
						need_fixup = node->pred_back_edge != nullptr &&
						             !query_reachability(*succ, *node->pred_back_edge);
					}
				}
				else
				{
					// If we don't dominate succ, but it's not the common merge block, this is
					// an edge case we have to handle as well.
					// We might dominate a continue block, but these actually belong to outer loop scope.
					need_fixup = !node->dominates(succ) || succ->succ_back_edge;
				}

				// Guard against duplicate label branches.
				bool has_succ = std::find(node->succ.begin(), node->succ.end(), succ) != node->succ.end();

				if (need_fixup && has_succ)
				{
					auto *dummy_break = pool.create_node();
					dummy_break->name = node->name + (succ->succ_back_edge ? ".continue" : ".break");
					dummy_break->immediate_dominator = node;
					dummy_break->immediate_post_dominator = succ;
					dummy_break->forward_post_visit_order = node->forward_post_visit_order;
					dummy_break->backward_post_visit_order = node->backward_post_visit_order;
					dummy_break->ir.terminator.type = Terminator::Type::Branch;
					dummy_break->ir.terminator.direct_block = succ;
					dummy_break->is_pseudo_back_edge = succ->succ_back_edge != nullptr;
					dummy_break->add_branch(succ);
					node->retarget_branch(succ, dummy_break);
				}
			}
		}

		merge = natural_merge;

		CFGNode *merge_ladder = nullptr;

		// We cannot rewrite the CFG in pass 1 safely, this should have happened in pass 0.
		if (pass == 0 && (!node->dominates(merge) || block_is_plain_continue(merge)))
		{
			merge_ladder = create_switch_merge_ladder(node, merge);
			merge = find_common_post_dominator(node->succ);
			// If there are early-exits, the pdom may be nullptr. Safeguard against this.
			// This only seems to happen in dxbc2dxil.
			if (!merge)
				merge = merge_ladder;
			modified_cfg = true;
		}

		if (node->dominates(merge))
		{
			//LOGI("Switch merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(node), node->name.c_str(),
			//     static_cast<const void *>(merge), merge->name.c_str());
			node->merge = MergeType::Selection;

			// There is a small chance that this is supposed to be a loop merge target.
			// We'll fix that up later if needed. In that case, the switch block will merge to unreachable.

			node->selection_merge_block = merge;
			merge->add_unique_header(node);
		}
		else
		{
			// We got a switch block where someone is escaping. Similar idea as for loop analysis.
			// Find a post-dominator where we ignore branches which are "escaping".
			auto *dominated_merge_target = find_common_post_dominator_with_ignored_break(node->succ, merge);

			if (!dominated_merge_target)
			{
				LOGW("No dominated merge target found. Likely a bug. Falling back to merge ladder.\n");
				dominated_merge_target = merge_ladder;
			}

			assert(dominated_merge_target);

			if (node->dominates(dominated_merge_target))
			{
				node->merge = MergeType::Selection;
				node->selection_merge_block = merge;
				dominated_merge_target->add_unique_header(node);
				merge->add_unique_header(node);
			}
		}

		// A switch header might also be a loop header. Create a helper succ block for this case.
		if (pass == 0 && node->pred_back_edge)
		{
			node = create_helper_succ_block(node);
			modified_cfg = true;
		}
	}

	return modified_cfg ? SwitchProgressMode::SimpleModify : SwitchProgressMode::Done;
}

bool CFGStructurizer::merge_candidate_is_inside_continue_construct(const CFGNode *node) const
{
	// If we've reached the continue construct, we cannot merge away from that construct.
	// Any such merge must be eliminated. We can know this for certain if the succ of node
	// post dominates the entire loop construct, since that node is the obvious merge node.
	assert(node->succ.size() == 1);
	for (auto *pred : node->pred)
	{
		if (pred->succ_back_edge &&
		    node->succ.front()->post_dominates(pred->succ_back_edge) &&
			pred->succ_back_edge->dominates(node->succ.front()) &&
		    !pred->dominates(node))
		{
			return true;
		}
	}

	return false;
}

bool CFGStructurizer::merge_candidate_is_on_breaking_path(const CFGNode *node) const
{
	return node->pred.size() >= 2 && node->succ.size() == 1 &&
	       !node->dominates(node->succ.front()) &&
	       node->succ.front()->post_dominates(node) &&
	       control_flow_is_escaping(node, node->succ.front()) &&
	       !node->post_dominates_perfect_structured_construct();
}

void CFGStructurizer::find_selection_merges(unsigned pass)
{
	for (auto *node : forward_post_visit_order)
	{
		if (node->num_forward_preds() <= 1)
			continue;

		// Never merge to continue block.
		// We should never hit this path unless we explicitly
		// avoided creating a continue ladder block earlier.
		if (block_is_plain_continue(node))
			continue;

		// If there are 2 or more pred edges, try to merge execution.

		// The idom is the natural header block.
		auto *idom = node->immediate_dominator;
		assert(idom->succ.size() >= 2);

		// Check for case fallthrough here. In this case, we do not have a merge scenario, just ignore.
		auto *inner_header = node->get_outer_selection_dominator();
		if (inner_header && inner_header->ir.terminator.type == Terminator::Type::Switch)
		{
			if (inner_header->selection_merge_block == node)
			{
				// We just found a switch block which we have already handled.
				continue;
			}

			if (std::find(inner_header->succ.begin(), inner_header->succ.end(), node) != inner_header->succ.end())
			{
				// Fallthrough.
				continue;
			}
		}

		for (auto *header : node->headers)
		{
			// If we have a loop header already associated with this block, treat that as our idom.
			if (header->forward_post_visit_order > idom->forward_post_visit_order)
				idom = header;
		}

		// Similar, but also check if we have associated ladder blocks with the idom.
		if (!idom->pred_back_edge)
		{
			auto *inner_loop_header = get_innermost_loop_header_for(idom);
			if (inner_loop_header && inner_loop_header->loop_ladder_block == node)
				idom = const_cast<CFGNode *>(inner_loop_header);
		}

		if (idom->merge == MergeType::None || idom->merge == MergeType::Selection)
		{
			// We just found a switch block which we have already handled.
			if (idom->ir.terminator.type == Terminator::Type::Switch)
				continue;

			// If the idom is already a selection construct, this must mean
			// we have some form of breaking construct inside this inner construct.
			// This fooled find_selection_merges() to think we had a selection merge target at the break target.
			// Fix this up here, where we rewrite the outer construct as a fixed loop instead.
			if (idom->merge == MergeType::Selection)
			{
				if (pass == 0)
				{
					assert(idom->selection_merge_block);

					// If we turn the outer selection construct into a loop,
					// we remove the possibility to break further out (without adding ladders like we do for loops).
					// To make this work, we must ensure that the new merge block post-dominates the loop and selection merge.
					auto *merge_candidate = CFGNode::find_common_post_dominator(idom->selection_merge_block, idom);

					if (!merge_candidate || merge_candidate == idom->selection_merge_block)
					{
						idom->loop_merge_block = idom->selection_merge_block;
					}
					else
					{
						// Make sure we split merge scopes. Pretend we have a true loop.
						idom->loop_ladder_block = idom->selection_merge_block;
						idom->loop_merge_block = merge_candidate;
					}

					idom->loop_merge_block->add_unique_header(idom);

					idom->merge = MergeType::Loop;
					idom->selection_merge_block = nullptr;
					idom->freeze_structured_analysis = true;
					idom = create_helper_succ_block(idom);
				}
				else
					LOGW("Mismatch headers in pass 1 ... ?\n");
			}

			// If we're in a pass 1, opting for a selection merge better make sure that we can
			// actually use this as a merge block.
			// If we have more than 2 preds, there is no way this is not a break block merge.
			// It is not a switch statement and selections spawn two new scopes.
			// We should have resolved this in pass 0, but it can slip through the cracks if there
			// are multiple interleaving merge scopes in play.
			bool force_loop = pass == 1 &&
			                  node->num_forward_preds() > 2 &&
			                  idom->merge == MergeType::None;

			if (force_loop)
			{
				idom->merge = MergeType::Loop;
				node->add_unique_header(idom);
				idom->loop_merge_block = node;
				idom->freeze_structured_analysis = true;
			}
			else
			{
				idom->merge = MergeType::Selection;
				node->add_unique_header(idom);
				assert(node);
				idom->selection_merge_block = node;
				//LOGI("Selection merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(idom), idom->name.c_str(),
				//     static_cast<const void *>(node), node->name.c_str());
			}
		}
		else if (idom->merge == MergeType::Loop)
		{
			if (pass == 0)
			{
				if (idom->loop_merge_block == node && idom->loop_ladder_block)
				{
					// We need to create an outer shell for this header since we need to ladder break to this node.
					auto *loop = create_helper_pred_block(idom);
					loop->merge = MergeType::Loop;
					loop->loop_merge_block = node;
					loop->freeze_structured_analysis = true;
					node->add_unique_header(loop);
					//LOGI("Loop merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(loop), loop->name.c_str(),
					//     static_cast<const void *>(node), node->name.c_str());
				}
				else if (idom->loop_merge_block != node && idom->loop_ladder_block != node)
				{
					auto *selection_idom = create_helper_succ_block(idom);
					// If we split the loop header into the loop header -> selection merge header,
					// then we can merge into a continue block for example.
					selection_idom->merge = MergeType::Selection;
					selection_idom->selection_merge_block = node;
					node->add_unique_header(selection_idom);
					//LOGI("Selection merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(selection_idom),
					//     selection_idom->name.c_str(), static_cast<const void *>(node), node->name.c_str());
				}
			}
		}
		else
		{
			// We are hosed. There is no obvious way to merge execution here.
			// This might be okay.
			LOGW("Cannot merge execution for node %p (%s).\n", static_cast<const void *>(node), node->name.c_str());
		}
	}
}

const CFGNode *CFGStructurizer::get_innermost_loop_header_for(const CFGNode *header, const CFGNode *other) const
{
	auto *node = other;

	while (header != other)
	{
		// Entry block case.
		if (other->pred.empty())
			break;

		// Found a loop header. This better be the one.
		// Detect false positive if back-edge can reach the node, this means we just skip over
		// the loop. We want to detect loops in a structured sense.
		// Breaking constructs should still detect the loop header as we'd expect.
		if (other->pred_back_edge && (other->pred_back_edge == node || !query_reachability(*other->pred_back_edge, *node)))
			break;

		assert(other->immediate_dominator);
		other = other->immediate_dominator;
	}

	return other;
}

const CFGNode *CFGStructurizer::get_innermost_loop_header_for(const CFGNode *other) const
{
	return get_innermost_loop_header_for(entry_block, other);
}

bool CFGStructurizer::loop_exit_supports_infinite_loop(const CFGNode *header, const CFGNode *loop_exit) const
{
	auto *inner_header = get_innermost_loop_header_for(header, loop_exit);
	// A loop exit can exit out to an outer scope such that inner_header dominates the header.
	// If there is no inner loop we can transform the loop exit into a merge block quite easily
	// and avoid the infinite loop.
	if (inner_header->dominates(header))
		return false;

	// We have a candidate. If the candidates dominates all reachable exits, there is never a need to merge later.
	return loop_exit->dominates_all_reachable_exits();
}

CFGStructurizer::LoopExitType CFGStructurizer::get_loop_exit_type(const CFGNode &header, const CFGNode &node) const
{
	// If there exists an inner loop which dominates this exit, we treat it as an inner loop exit.
	const CFGNode *innermost_loop_header = get_innermost_loop_header_for(&header, &node);
	bool is_innermost_loop_header = &header == innermost_loop_header;

	// If a back-edge can reach this node, it's not really an exit, but an Escape.
	// Exits must never branch "out" of the loop.
	if (header.dominates(&node) &&
	    (!header.pred_back_edge || !query_reachability(*header.pred_back_edge, node)) &&
	    node.dominates_all_reachable_exits())
	{
		if (is_innermost_loop_header)
			return LoopExitType::Exit;
		else
			return LoopExitType::InnerLoopExit;
	}

	if (header.dominates(&node))
	{
		if (is_innermost_loop_header)
		{
			// Even if we dominate node, we might not be able to merge to it.
			if (!header.can_loop_merge_to(&node))
			{
				// This is an escape we dominate, but this could also be a case where we break
				// to a continue construct in the outer loop which is not reachable through back traversal.
				// This will confuse loop analysis, since this kind of double continue will not resolve properly.
				// In this case we need to rendezvous at this block with a ladder to avoid
				// double-continue.

				auto *outer_infinite_loop = get_innermost_loop_header_for(entry_block,
				                                                          innermost_loop_header->immediate_dominator);
				if (outer_infinite_loop && outer_infinite_loop->pred_back_edge &&
				    outer_infinite_loop->pred_back_edge->succ.empty() &&
				    outer_infinite_loop->pred_back_edge->post_dominates(&node))
				{
					return LoopExitType::MergeToInfiniteLoop;
				}
				else
					return LoopExitType::Escape;
			}

			return LoopExitType::Merge;
		}
		else
		{
			// Try to detect if this is a degenerate inner loop merge.
			// If the inner loop header is the only way to exit the loop construct,
			// the loop exit block is a false exit.
			// This is the case if the candidate must pass through the back edge, and the back edge can only branch to header.
			// In this case, the loop will not be visible through back-propagation, but it is definitely part of the loop construct.

			if (!innermost_loop_header->pred_back_edge || innermost_loop_header->pred_back_edge->ir.terminator.type != Terminator::Type::Branch)
				return LoopExitType::InnerLoopMerge;

			auto *post = find_common_post_dominator({ const_cast<CFGNode *>(&node), innermost_loop_header->pred_back_edge });
			if (post == innermost_loop_header->pred_back_edge)
				return LoopExitType::InnerLoopFalsePositive;
			else
				return LoopExitType::InnerLoopMerge;
		}
	}
	else
		return LoopExitType::Escape;
}

CFGNode *CFGStructurizer::create_helper_pred_block(CFGNode *node)
{
	auto *pred_node = pool.create_node();
	pred_node->name = node->name + ".pred";

	// Fixup visit order later.
	pred_node->forward_post_visit_order = node->forward_post_visit_order;
	pred_node->backward_post_visit_order = node->backward_post_visit_order;

	std::swap(pred_node->pred, node->pred);
	for (auto *header : node->headers)
		header->fixup_merge_info_after_branch_rewrite(node, pred_node);
	node->headers.clear();

	// We're replacing entry block.
	if (node == node->immediate_dominator)
		pred_node->immediate_dominator = pred_node;
	else
		pred_node->immediate_dominator = node->immediate_dominator;

	pred_node->immediate_post_dominator = node;
	node->immediate_dominator = pred_node;

	retarget_pred_from(pred_node, node);

	pred_node->add_branch(node);

	if (node == entry_block)
		entry_block = pred_node;

	pred_node->ir.terminator.type = Terminator::Type::Branch;
	pred_node->ir.terminator.direct_block = node;

	return pred_node;
}

void CFGStructurizer::retarget_pred_from(CFGNode *new_node, CFGNode *old_succ)
{
	for (auto *p : new_node->pred)
	{
		for (auto &s : p->succ)
			if (s == old_succ)
				s = new_node;

		auto &p_term = p->ir.terminator;
		if (p_term.direct_block == old_succ)
			p_term.direct_block = new_node;
		if (p_term.true_block == old_succ)
			p_term.true_block = new_node;
		if (p_term.false_block == old_succ)
			p_term.false_block = new_node;
		for (auto &c : p_term.cases)
			if (c.node == old_succ)
				c.node = new_node;
	}

	// Do not swap back edges.

	// Retarget immediate post dominators.
	for (auto *n : forward_post_visit_order)
		if (n->immediate_post_dominator == old_succ)
			n->immediate_post_dominator = new_node;
}

void CFGStructurizer::retarget_succ_from(CFGNode *new_node, CFGNode *old_pred)
{
	for (auto *s : new_node->succ)
		for (auto &p : s->pred)
			if (p == old_pred)
				p = new_node;

	for (auto *node : forward_post_visit_order)
	{
		if (node != old_pred)
		{
			// Don't override immediate dominator for entry block.
			if (node->immediate_dominator == old_pred)
				node->immediate_dominator = new_node;
		}
	}
	new_node->immediate_dominator = old_pred;

	// Do not swap back edges.
}

CFGNode *CFGStructurizer::create_helper_succ_block(CFGNode *node)
{
	auto *succ_node = pool.create_node();
	succ_node->name = node->name + ".succ";

	// Fixup visit order later.
	succ_node->forward_post_visit_order = node->forward_post_visit_order;
	succ_node->backward_post_visit_order = node->backward_post_visit_order;

	std::swap(succ_node->succ, node->succ);
	// Do not swap back edges, only forward edges.

	succ_node->immediate_post_dominator = node->immediate_post_dominator;
	node->immediate_post_dominator = succ_node;

	succ_node->ir.terminator = node->ir.terminator;
	node->ir.terminator.type = Terminator::Type::Branch;
	node->ir.terminator.direct_block = succ_node;

	// Inherit selection construct from parent since we're taking over any selection.
	if (succ_node->ir.terminator.type == Terminator::Type::Condition)
		succ_node->ir.merge_info.selection_control_mask = node->ir.merge_info.selection_control_mask;

	retarget_succ_from(succ_node, node);

	node->add_branch(succ_node);
	return succ_node;
}

CFGNode *CFGStructurizer::find_common_post_dominator(const Vector<CFGNode *> &candidates)
{
	if (candidates.empty())
		return nullptr;
	else if (candidates.size() == 1)
		return candidates.front();

	CFGNode *common_post = CFGNode::find_common_post_dominator(candidates[0], candidates[1]);
	for (size_t i = 2; i < candidates.size(); i++)
		common_post = CFGNode::find_common_post_dominator(common_post, candidates[i]);
	return common_post != common_post->immediate_post_dominator ? common_post : nullptr;
}

CFGNode *CFGStructurizer::find_break_target_for_selection_construct(CFGNode *idom, CFGNode *merge)
{
	Vector<CFGNode *> new_visit_queue;
	UnorderedSet<CFGNode *> visited;
	Vector<CFGNode *> visit_queue;
	Vector<CFGNode *> candidates;

	visit_queue.push_back(idom);
	do
	{
		for (auto *n : visit_queue)
		{
			if (visited.count(n))
				continue;
			visited.insert(n);

			if (query_reachability(*merge, *n))
				continue;

			if (query_reachability(*n, *merge))
			{
				for (auto *succ : n->succ)
					new_visit_queue.push_back(succ);
			}
			else
			{
				// Cannot merge into a loop construct.
				// Merging towards an outer loop construct would probably lead to weird results,
				// but allow it here.
				auto *inner = get_innermost_loop_header_for(n);
				if (inner != entry_block && query_reachability(*idom, *inner))
					continue;

				// The breaking path might be vestigal.
				// I.e., it might just be exiting directly without dominating anything.
				// Have to detect this false positive, since it's not really a break, just early return.
				// If we hit a dominance frontier, allow it as a candidate since it cannot be early return within
				// the construct.
				if (!n->dominates_all_reachable_exits() || !idom->dominates(n))
					candidates.push_back(n);
			}
		}

		visit_queue = new_visit_queue;
		new_visit_queue.clear();
	} while (!visit_queue.empty());

	if (candidates.empty())
		return nullptr;
	else
		return find_common_post_dominator(candidates);
}

CFGNode *CFGStructurizer::find_common_post_dominator_with_ignored_break(Vector<CFGNode *> candidates,
                                                                        const CFGNode *ignored_node)
{
	if (candidates.empty())
		return nullptr;

	Vector<CFGNode *> next_nodes;
	const auto add_unique_next_node = [&](CFGNode *node) {
		if (node != ignored_node)
			if (std::find(next_nodes.begin(), next_nodes.end(), node) == next_nodes.end())
				next_nodes.push_back(node);
	};

	while (candidates.size() != 1)
	{
		// Sort candidates by post visit order.
		std::sort(candidates.begin(), candidates.end(),
		          [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; });

		// We reached exit without merging execution, there is no common post dominator.
		// A continue block which only branches back to header is conveniently ignored here.
		if (candidates.front()->succ.empty() && !candidates.front()->succ_back_edge)
			return nullptr;

		for (auto *succ : candidates.front()->succ)
			add_unique_next_node(succ);
		for (auto itr = candidates.begin() + 1; itr != candidates.end(); ++itr)
			add_unique_next_node(*itr);

		candidates.clear();
		std::swap(candidates, next_nodes);
	}

	if (candidates.empty())
		return nullptr;

	return candidates.front();
}

void CFGStructurizer::rewrite_ladder_conditional_branch_from_incoming_blocks(
	CFGNode *ladder, CFGNode *true_block, CFGNode *false_block,
	const std::function<bool (const CFGNode *)> &path_cb, const String &name)
{
	ladder->add_branch(true_block);
	ladder->add_branch(false_block);

	ladder->ir.terminator.type = Terminator::Type::Condition;
	ladder->ir.terminator.conditional_id = module.allocate_id();
	ladder->ir.terminator.true_block = true_block;
	ladder->ir.terminator.false_block = false_block;
	ladder->ir.terminator.direct_block = nullptr;

	PHI phi;
	phi.id = ladder->ir.terminator.conditional_id;
	phi.type_id = module.get_builder().makeBoolType();
	module.get_builder().addName(phi.id, name.c_str());

	for (auto *pred : ladder->pred)
	{
		IncomingValue incoming = {};
		incoming.block = pred;
		incoming.id = module.get_builder().makeBoolConstant(path_cb(pred));
		phi.incoming.push_back(incoming);
	}

	ladder->ir.phi.push_back(std::move(phi));
}

CFGNode *CFGStructurizer::transpose_code_path_through_ladder_block(
    CFGNode *header, CFGNode *merge, CFGNode *path)
{
	assert(header->dominates(merge) && header->dominates(path));
	assert(query_reachability(*merge, *path));
	assert(!merge->dominates(path));
	assert(header != merge);
	assert(merge != path);
	assert(header != path);

	// Rewrite the merge block into merge.pred where merge.pred will branch to either merge or path.
	auto *ladder = create_ladder_block(header, merge, ".transpose");

	UnorderedSet<const CFGNode *> normal_preds;
	for (auto *p : ladder->pred)
		normal_preds.insert(p);
	traverse_dominated_blocks_and_rewrite_branch(header, path, ladder);
	rewrite_ladder_conditional_branch_from_incoming_blocks(
		ladder, path, merge, [&](const CFGNode *n) { return normal_preds.count(n) == 0; },
		String("transpose_ladder_phi_") + ladder->name);
	return ladder;
}

void CFGStructurizer::rewrite_transposed_loop_outer(CFGNode *node, CFGNode *impossible_merge_target,
                                                    const LoopMergeAnalysis &analysis)
{
	auto impossible_preds = impossible_merge_target->pred;

	auto *replaced_merge_block = create_helper_pred_block(analysis.dominated_merge);
	replaced_merge_block->name = analysis.dominated_merge->name + ".transposed-merge-outer";

	for (auto *pred : impossible_preds)
		if (!query_reachability(*analysis.dominated_merge, *pred))
			pred->retarget_branch(impossible_merge_target, replaced_merge_block);

	rewrite_ladder_conditional_branch_from_incoming_blocks(
		replaced_merge_block,
		impossible_merge_target, analysis.dominated_merge,
		[&](const CFGNode *n) {
			return std::find(impossible_preds.begin(), impossible_preds.end(), n) != impossible_preds.end();
		}, String("transposed_selector_") + node->name);
}

void CFGStructurizer::rewrite_transposed_loop_inner(CFGNode *node, CFGNode *impossible_merge_target,
                                                    const LoopMergeAnalysis &analysis)
{
	// Rewrite the control flow from the inside out through a transposition.
	// The common break target will become the merge block instead.
	// The continue will break out to the transposed merge instead.
	// In the ladder, we will enter a breaking path which branches out to loop_ladder.

	// We just arbitrary call this "inner", since I don't think it has a formal name.
	// In this case, dominated merge cannot reach impossible merge target.

	auto *merge = analysis.merge;
	auto *dominated_merge = analysis.dominated_merge;

	auto *ladder_break = pool.create_node();
	ladder_break->name = node->name + ".transposed-merge-inner.break";
	ladder_break->ir.terminator.type = Terminator::Type::Branch;
	ladder_break->ir.terminator.direct_block = impossible_merge_target;
	ladder_break->immediate_post_dominator = impossible_merge_target;
	ladder_break->forward_post_visit_order = impossible_merge_target->forward_post_visit_order;
	ladder_break->backward_post_visit_order = impossible_merge_target->backward_post_visit_order;

	auto *ladder_selection = pool.create_node();
	ladder_selection->name = node->name + ".transposed-merge-inner";
	ladder_selection->forward_post_visit_order = impossible_merge_target->forward_post_visit_order;
	ladder_selection->backward_post_visit_order = impossible_merge_target->backward_post_visit_order;
	ladder_selection->immediate_post_dominator = merge;
	ladder_break->immediate_dominator = ladder_selection;

	auto ladder_preds = dominated_merge->pred;

	ladder_selection->add_branch(ladder_break);
	ladder_selection->add_branch(dominated_merge);
	traverse_dominated_blocks_and_rewrite_branch(node, impossible_merge_target, ladder_selection);
	ladder_selection->recompute_immediate_dominator();

	ladder_break->add_branch(impossible_merge_target);

	// Branches from these blocks should be rewritten to target transposed-merge.
	for (auto *ladder_pred : ladder_preds)
		ladder_pred->retarget_branch(dominated_merge, ladder_selection);

	rewrite_ladder_conditional_branch_from_incoming_blocks(
		ladder_selection,
		dominated_merge, ladder_break,
		[&](const CFGNode *n) { return std::find(ladder_preds.begin(), ladder_preds.end(), n) != ladder_preds.end(); },
		String("transposed_selector_") + node->name);
}

bool CFGStructurizer::rewrite_transposed_loops()
{
	bool did_rewrite = false;

	for (auto index = forward_post_visit_order.size(); index && !did_rewrite; index--)
	{
		// Visit in reverse order so we resolve outer loops first,
		// this lets us detect ladder-breaking loops.
		auto *node = forward_post_visit_order[index - 1];

		if (node->freeze_structured_analysis && node->merge == MergeType::Loop)
			continue;
		if (!node->has_pred_back_edges())
			continue;

		auto result = analyze_loop(node);
		auto merge_result = analyze_loop_merge(node, result);

		auto *merge = merge_result.merge;
		auto *dominated_merge = merge_result.dominated_merge;

		if (!merge || !dominated_merge)
			continue;

		// We might have a horribly complex scenario where a loop breaks, but it breaks to an outer scope
		// which is not consistent with the merge block, i.e. we need structured control flow to resolve properly
		// before we can break. This is ... problematic.

		// We call this an "inner" transposed loop here since merge block cannot reach this block.

		// Always resolve infinite continue ladders. This is where we break to
		// an outer infinite loop. We must resolve the scopes by making this ladder the
		// merge point, then we can break further.
		CFGNode *impossible_merge_target = merge_result.infinite_continue_ladder;

		if (!impossible_merge_target && !result.non_dominated_exit.empty())
		{
			auto *common_break_target = find_common_post_dominator(result.non_dominated_exit);
			if (common_break_target && common_break_target != merge &&
			    !query_reachability(*dominated_merge, *common_break_target) &&
			    !query_reachability(*common_break_target, *dominated_merge))
			{
				// Another weird scenario is where we dominate the outer continue,
				// which would escape the DF analysis, but that is strong evidence we need to transpose.
				// A normal break would never dominate anything like that.
				if (common_break_target->reaches_domination_frontier_before_merge(merge) ||
					common_break_target->dominates_outer_continue(node))
				{
					impossible_merge_target = common_break_target;
				}
			}
		}

		if (!impossible_merge_target)
		{
			// We might have a different scenario where there are multiple breaks, but they break out to different
			// scopes. One of these might require a similar impossible merge.
			// Common post dominator analysis would not catch this.
			// What we're looking for is a node which:
			// - Is dominated by loop header (or is in the domination frontier of loop header)
			// - Is reachable, but not dominated by dominated_merge.
			// - Post dominates one of the non_dominated_exits.
			// This means the node is in a twilight zone where the node is kinda in the loop construct, but kinda not.

			// Structured rules for a loop state that a node is in the construct if:
			// - It is dominated by loop header
			// - Not dominated by merge block.
			// In a sense, the merge block ends up branching back into its own loop, which is irreducible, kinda ...

			// We call this an "outer" transposed loop here since merge block *can* reach this block.

			for (size_t i = 0, n = result.non_dominated_exit.size(); i < n && !impossible_merge_target; i++)
			{
				auto *candidate = result.non_dominated_exit[i];

				while (candidate != merge && candidate != dominated_merge)
				{
					if (query_reachability(*dominated_merge, *candidate) && !dominated_merge->dominates(candidate))
					{
						// Merge block attempts to branch back into its own loop construct (yikes).
						impossible_merge_target = candidate;

						// If we don't dominate the merge target, i.e. we're in the domination frontier,
						// we have to synthesize a fake impossible merge target first since the rewrite
						// algorithm depends on node dominating the merge target.
						if (!node->dominates(impossible_merge_target))
							impossible_merge_target = create_ladder_block(node, impossible_merge_target, ".impossible-ladder");
						break;
					}
					else if (node->dominates(candidate) && candidate != candidate->immediate_post_dominator)
					{
						candidate = candidate->immediate_post_dominator;
					}
					else
					{
						// We will be able to select a candidate in the domination frontier once.
						// If we failed to find a candidate in the domination frontier, we're done checking.
						break;
					}
				}
			}
		}

		if (impossible_merge_target)
		{
			if (query_reachability(*dominated_merge, *impossible_merge_target))
				rewrite_transposed_loop_outer(node, impossible_merge_target, merge_result);
			else
				rewrite_transposed_loop_inner(node, impossible_merge_target, merge_result);

			// We have obliterated the existing control flow through transposition,
			// and any domination or post-domination analysis will break.
			// Re-traverse the CFG and try again.
			// Continue until we have eliminated all impossible loops (should be extremely rare).
			did_rewrite = true;
		}
		else if (!result.non_dominated_exit.empty() && dominated_merge->dominance_frontier.size() >= 2)
		{
			// If we cannot find the impossible merge target through post-domination analysis,
			// we might find it through domination frontier analysis.
			// If all loop exits and the loop header share a domination frontier,
			// it's probably our candidate.

			// Only apply this analysis to cases where a loop has at least two dominance frontiers,
			// which also don't have a dominance relationship with each other.
			// This is evidence that the loop is attempting to break to multiple different scopes.
			auto frontier = dominated_merge->dominance_frontier;

			std::stable_sort(frontier.begin(), frontier.end(), [](const CFGNode *a, const CFGNode *b)
			{
				return a->forward_post_visit_order > b->forward_post_visit_order;
			});

			bool frontier_has_dominance_relationship = false;
			for (size_t i = 0, n = frontier.size(); i < n && !frontier_has_dominance_relationship; i++)
				for (size_t j = i + 1; j < n && !frontier_has_dominance_relationship; j++)
					if (frontier[i]->dominates(frontier[j]))
						frontier_has_dominance_relationship = true;

			if (frontier_has_dominance_relationship)
				continue;

			for (auto *candidate : dominated_merge->dominance_frontier)
			{
				bool all_frontier = std::find(node->dominance_frontier.begin(), node->dominance_frontier.end(),
				                              candidate) != node->dominance_frontier.end();

				if (all_frontier)
				{
					for (auto *non_dominated : result.non_dominated_exit)
					{
						if (!node->dominates(non_dominated))
						{
							all_frontier = false;
							break;
						}

						if (std::find(non_dominated->dominance_frontier.begin(), non_dominated->dominance_frontier.end(),
									  candidate) == non_dominated->dominance_frontier.end())
						{
							all_frontier = false;
							break;
						}
					}
				}

				if (all_frontier)
				{
					if (!impossible_merge_target ||
						candidate->forward_post_visit_order > impossible_merge_target->forward_post_visit_order)
					{
						impossible_merge_target = candidate;
					}
				}
				else
				{
					impossible_merge_target = nullptr;
					break;
				}
			}

			if (impossible_merge_target)
			{
				auto constructs = result.non_dominated_exit;
				constructs.push_back(dominated_merge);
				collect_and_dispatch_control_flow(node, dominated_merge, constructs, false, false);
				did_rewrite = true;
			}
		}
	}

	if (did_rewrite)
		recompute_cfg();
	return did_rewrite;
}

CFGStructurizer::LoopAnalysis CFGStructurizer::analyze_loop(CFGNode *node) const
{
	LoopAnalysis result;

	// Now, we need to figure out which blocks belong in the loop construct.
	// The way to figure out a natural loop is any block which is dominated by loop header
	// and control flow passes to one of the back edges.

	// Unfortunately, it can be ambiguous which block is the merge block for a loop.
	// Ideally, there is a unique block which is the loop exit block, but if there are multiple breaks
	// there are multiple blocks which are not part of the loop construct.

	LoopBacktracer tracer;
	auto *pred = node->pred_back_edge;

	// Back-trace from here.
	// The CFG is reducible, so node must dominate pred.
	// Since node dominates pred, there is no pred chain we can follow without
	// eventually hitting node, and we'll stop traversal there.

	// All nodes which are touched during this traversal must be part of the loop construct.
	tracer.trace_to_parent(node, pred);

	LoopMergeTracer merge_tracer(tracer);
	merge_tracer.trace_from_parent(node);

	for (auto *loop_exit : merge_tracer.loop_exits)
	{
		auto exit_type = get_loop_exit_type(*node, *loop_exit);
		switch (exit_type)
		{
		case LoopExitType::Exit:
			result.direct_exits.push_back(loop_exit);
			break;

		case LoopExitType::InnerLoopExit:
			// It's not an exit for us, but the inner loop.
			result.inner_direct_exits.push_back(loop_exit);
			break;

		case LoopExitType::Merge:
			result.dominated_exit.push_back(loop_exit);
			break;

		case LoopExitType::InnerLoopMerge:
			result.inner_dominated_exit.push_back(loop_exit);
			break;

		case LoopExitType::InnerLoopFalsePositive:
			// In this case, the inner loop can only exit at the loop header,
			// and thus post-dominance analysis will always fail.
			// Ignore this case as it's a false exit.
			break;

		case LoopExitType::Escape:
			result.non_dominated_exit.push_back(loop_exit);
			break;

		case LoopExitType::MergeToInfiniteLoop:
			result.dominated_continue_exit.push_back(loop_exit);
			break;
		}
	}

	// A dominated continue exit should not be considered as such if it can reach other "normal" exits.
	// In this case, it's just a break.
	auto continue_itr = result.dominated_continue_exit.begin();
	while (continue_itr != result.dominated_continue_exit.end())
	{
		auto *candidate = *continue_itr;
		bool found_candidate = false;
		for (auto *dominated : result.dominated_exit)
		{
			if (query_reachability(*candidate, *dominated))
			{
				result.non_dominated_exit.push_back(candidate);
				continue_itr = result.dominated_continue_exit.erase(continue_itr);
				found_candidate = true;
				break;
			}
		}

		if (!found_candidate)
		{
			for (auto *non_dominated : result.non_dominated_exit)
			{
				if (query_reachability(*candidate, *non_dominated))
				{
					result.non_dominated_exit.push_back(candidate);
					continue_itr = result.dominated_continue_exit.erase(continue_itr);
					found_candidate = true;
					break;
				}
			}
		}

		if (!found_candidate)
			++continue_itr;
	}

	if (result.dominated_continue_exit.size() > 1)
	{
		// If we have multiple continue exit candidates, they better merge into a single clean candidate that we
		// still dominate, otherwise, ignore this case and treat them all as normal Escape nodes.
		auto *common = find_common_post_dominator(result.dominated_continue_exit);
		if (common && node->dominates(common))
		{
			result.dominated_continue_exit.clear();
			result.dominated_continue_exit.push_back(common);
		}
		else
		{
			result.non_dominated_exit.insert(result.non_dominated_exit.end(),
			                                 result.dominated_continue_exit.begin(),
			                                 result.dominated_continue_exit.end());
			result.dominated_continue_exit.clear();
		}
	}

	// If the only merge candidates we have are inner dominated, treat them as true dominated exits.
	if (result.dominated_exit.empty() && !result.inner_dominated_exit.empty())
		std::swap(result.dominated_exit, result.inner_dominated_exit);

	// If there are no direct exists, treat inner direct exists as direct exits.
	if (result.direct_exits.empty())
		std::swap(result.direct_exits, result.inner_direct_exits);

	// A direct exit can be considered a dominated exit if there are no better candidates.
	if (result.dominated_exit.empty() && !result.direct_exits.empty())
		std::swap(result.dominated_exit, result.direct_exits);

	// If we only have one direct exit, consider it our merge block.
	// Pick either Merge or Escape.
	if (result.direct_exits.size() == 1 && result.dominated_exit.empty() && result.non_dominated_exit.empty())
	{
		if (node->dominates(result.direct_exits.front()))
			std::swap(result.dominated_exit, result.direct_exits);
		else
			std::swap(result.non_dominated_exit, result.direct_exits);
	}

	if (result.dominated_exit.size() >= 2)
	{
		// Try to see if we can reduce the number of merge blocks to just 1.
		// This is relevant if we have various "clean" break blocks.
		auto *post_dominator = find_common_post_dominator(result.dominated_exit);
		if (std::find(result.dominated_exit.begin(), result.dominated_exit.end(),
		              post_dominator) != result.dominated_exit.end())
		{
			result.dominated_exit.clear();
			result.dominated_exit.push_back(post_dominator);
		}
	}

	return result;
}

CFGStructurizer::LoopMergeAnalysis CFGStructurizer::analyze_loop_merge(CFGNode *node, const LoopAnalysis &analysis)
{
	// We have multiple blocks which are merge candidates. We need to figure out where execution reconvenes.
	Vector<CFGNode *> merges;
	merges.reserve(analysis.inner_dominated_exit.size() + analysis.dominated_exit.size() + analysis.non_dominated_exit.size());
	merges.insert(merges.end(), analysis.inner_dominated_exit.begin(), analysis.inner_dominated_exit.end());
	merges.insert(merges.end(), analysis.dominated_exit.begin(), analysis.dominated_exit.end());
	merges.insert(merges.end(), analysis.non_dominated_exit.begin(), analysis.non_dominated_exit.end());
	CFGNode *merge = CFGStructurizer::find_common_post_dominator(merges);

	CFGNode *dominated_merge = nullptr;

	// Try to find the sensible target first.
	// If one of our merge blocks is the successor of the continue block,
	// this is a prime candidate for a ladder block.
	if (node->pred_back_edge && node->pred_back_edge->succ.size() == 1 &&
	    std::find(analysis.dominated_exit.begin(),
	              analysis.dominated_exit.end(),
	              node->pred_back_edge->succ.front()) != analysis.dominated_exit.end())
	{
		dominated_merge = node->pred_back_edge->succ.front();
	}
	else if (merge && !node->dominates(merge) && analysis.dominated_exit.size() > 1)
	{
		// Now, we might have Merge blocks which end up escaping out of the loop construct.
		// We might have to remove candidates which end up being break blocks after all.
		Vector<CFGNode *> non_breaking_exits;
		non_breaking_exits.reserve(analysis.dominated_exit.size());
		for (auto *exit : analysis.dominated_exit)
			if (!control_flow_is_escaping(exit, merge))
				non_breaking_exits.push_back(exit);

		if (!non_breaking_exits.empty())
			dominated_merge = CFGStructurizer::find_common_post_dominator(non_breaking_exits);

		if (!dominated_merge)
		{
			// If we get here, we likely have some questionable tie-break situation.
			// One possible case is an infinite loop where one path does a multi-level break,
			// and other paths branch to outer loop's continue. We'll want to only look at dominated exits
			// with the smallest break scope and try to find a common post dominator.
			auto *innermost_header = get_innermost_loop_header_for(node->immediate_dominator);
			Vector<CFGNode *> continue_exits;

			if (innermost_header && innermost_header->pred_back_edge)
				for (auto *exit : analysis.dominated_exit)
					if (query_reachability(*exit, *innermost_header->pred_back_edge))
						continue_exits.push_back(exit);

			if (!continue_exits.empty())
				dominated_merge = CFGStructurizer::find_common_post_dominator(continue_exits);
		}
	}
	else
	{
		dominated_merge = CFGStructurizer::find_common_post_dominator(analysis.dominated_exit);
	}

	if (!dominated_merge)
	{
		LOGW("There is no candidate for ladder merging.\n");
	}

	if (dominated_merge && !node->dominates(dominated_merge))
	{
		LOGW("We don't dominate the merge target ...\n");
		dominated_merge = nullptr;
	}

	LoopMergeAnalysis merge_result = {};
	merge_result.merge = merge;
	merge_result.weak_merge = merge;
	merge_result.dominated_merge = dominated_merge;

	if (!merge)
	{
		// Try to find a candidate merge point which ignores any early exits through common post domination frontier
		// analysis.
		Vector<CFGNode *> frontiers;
		for (auto *m : merges)
			frontiers.insert(frontiers.end(), m->dominance_frontier.begin(), m->dominance_frontier.end());

		// Find the innermost frontier that satisfies the requirements.
		std::stable_sort(frontiers.begin(), frontiers.end(), [](const CFGNode *a, const CFGNode *b)
		{
			return a->forward_post_visit_order > b->forward_post_visit_order;
		});
		frontiers.erase(std::unique(frontiers.begin(), frontiers.end()), frontiers.end());

		for (auto *front : frontiers)
		{
			// All merge nodes must reach the candidate for it to be considered a proper merge.
			auto itr = std::find_if(merges.begin(), merges.end(), [&](const CFGNode *c)
			{
				return !query_reachability(*c, *front);
			});

			if (itr == merges.end())
			{
				merge_result.weak_merge = front;
				break;
			}
		}
	}

	if (!analysis.dominated_continue_exit.empty())
	{
		assert(analysis.dominated_continue_exit.size() == 1);
		merge_result.infinite_continue_ladder = analysis.dominated_continue_exit.front();
	}

	return merge_result;
}

void CFGStructurizer::collect_and_dispatch_control_flow_from_anchor(
	CFGNode *anchor, const Vector<CFGNode *> &constructs)
{
	auto &builder = module.get_builder();

	// If we have an anchor, it should collect all control flow, maybe dispatch itself, then dispatch to the constructs.
	// It must be a conditional branch, since it's too much of a mess to deal with switch.
	assert(anchor->ir.terminator.type == Terminator::Type::Condition);
	assert(constructs.size() == 2);
	assert(constructs[0]->post_dominates(anchor->ir.terminator.true_block) ||
	       constructs[0]->post_dominates(anchor->ir.terminator.false_block));
	assert(constructs[1]->post_dominates(anchor->ir.terminator.true_block) ||
	       constructs[1]->post_dominates(anchor->ir.terminator.false_block));

	auto *anchor_pred = create_helper_pred_block(anchor);

	auto *anchor_to_construct0 = pool.create_node();
	auto *anchor_to_construct1 = pool.create_node();
	auto *anchor_terminator = pool.create_node();
	auto *anchor_dispatcher = pool.create_node();

	anchor_to_construct0->name = anchor->name + ".anchor0";
	anchor_to_construct1->name = anchor->name + ".anchor1";

	anchor_to_construct0->immediate_dominator = anchor;
	anchor_to_construct1->immediate_dominator = anchor;
	anchor_to_construct0->immediate_post_dominator = constructs[0];
	anchor_to_construct1->immediate_post_dominator = constructs[1];
	anchor_to_construct0->forward_post_visit_order = constructs[0]->forward_post_visit_order;
	anchor_to_construct1->forward_post_visit_order = constructs[1]->forward_post_visit_order;
	anchor_to_construct0->backward_post_visit_order = constructs[0]->backward_post_visit_order;
	anchor_to_construct1->backward_post_visit_order = constructs[1]->backward_post_visit_order;

	anchor_to_construct0->add_branch(anchor_terminator);
	anchor_to_construct1->add_branch(anchor_terminator);
	anchor_to_construct0->ir.terminator.type = Terminator::Type::Branch;
	anchor_to_construct0->ir.terminator.direct_block = anchor_terminator;
	anchor_to_construct1->ir.terminator.type = Terminator::Type::Branch;
	anchor_to_construct1->ir.terminator.direct_block = anchor_terminator;
	anchor_terminator->name = anchor->name + ".anchor-term";
	anchor_terminator->add_branch(anchor_dispatcher);
	anchor_terminator->ir.terminator.type = Terminator::Type::Branch;
	anchor_terminator->ir.terminator.direct_block = anchor_dispatcher;
	anchor_dispatcher->name = anchor->name + ".anchor-dispatch";

	PHI terminator_selector;
	terminator_selector.id = module.allocate_id();
	terminator_selector.type_id = builder.makeBoolType();
	terminator_selector.incoming.push_back({ anchor_to_construct0, builder.makeBoolConstant(true) });
	terminator_selector.incoming.push_back({ anchor_to_construct1, builder.makeBoolConstant(false) });

	traverse_dominated_blocks_and_rewrite_branch(anchor, constructs[0], anchor_to_construct0);
	traverse_dominated_blocks_and_rewrite_branch(anchor, constructs[1], anchor_to_construct1);

	size_t cutoff_normal_path = anchor_pred->pred.size();
	traverse_dominated_blocks_and_rewrite_branch(constructs[0]->immediate_dominator, constructs[0], anchor_pred);
	size_t cutoff_path0 = anchor_pred->pred.size();
	traverse_dominated_blocks_and_rewrite_branch(constructs[1]->immediate_dominator, constructs[1], anchor_pred);

	assert(constructs[0]->pred.empty());
	assert(constructs[1]->pred.empty());

	// Branch to anchor as normal if we have a pre-existing pred.
	PHI take_anchor_phi;
	take_anchor_phi.id = module.allocate_id();
	take_anchor_phi.type_id = builder.makeBoolType();
	for (size_t i = 0; i < cutoff_normal_path; i++)
		take_anchor_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(true) });
	for (size_t i = cutoff_normal_path; i < anchor_pred->pred.size(); i++)
		take_anchor_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(false) });

	anchor_pred->add_branch(anchor);
	anchor_pred->add_branch(anchor_dispatcher);
	anchor_pred->ir.terminator.type = Terminator::Type::Condition;
	anchor_pred->ir.terminator.true_block = anchor;
	anchor_pred->ir.terminator.false_block = anchor_dispatcher;
	anchor_pred->ir.terminator.direct_block = nullptr;
	anchor_pred->ir.terminator.conditional_id = take_anchor_phi.id;

	PHI outside_true_phi;
	outside_true_phi.id = module.allocate_id();
	outside_true_phi.type_id = builder.makeBoolType();
	for (size_t i = 0; i < cutoff_path0; i++)
		outside_true_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(true) });
	for (size_t i = cutoff_path0; i < anchor_pred->pred.size(); i++)
		outside_true_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(false) });

	PHI anchor_cond_phi;
	anchor_cond_phi.id = module.allocate_id();
	anchor_cond_phi.type_id = builder.makeBoolType();
	// If we took the path through anchor, use that conditional. Otherwise, use the selector between path 0 or 1.
	anchor_cond_phi.incoming.push_back({ anchor, terminator_selector.id });
	anchor_cond_phi.incoming.push_back({ anchor_pred, outside_true_phi.id });

	anchor_pred->ir.phi.push_back(std::move(take_anchor_phi));
	anchor_pred->ir.phi.push_back(std::move(outside_true_phi));
	anchor_terminator->ir.phi.push_back(std::move(terminator_selector));
	anchor_dispatcher->ir.terminator.conditional_id = anchor_cond_phi.id;
	anchor_dispatcher->ir.terminator.type = Terminator::Type::Condition;
	anchor_dispatcher->ir.terminator.true_block = constructs[0];
	anchor_dispatcher->ir.terminator.false_block = constructs[1];
	anchor_dispatcher->add_branch(constructs[0]);
	anchor_dispatcher->add_branch(constructs[1]);
	anchor_dispatcher->ir.phi.push_back(std::move(anchor_cond_phi));
}

void CFGStructurizer::collect_and_dispatch_control_flow(
	CFGNode *common_idom, CFGNode *common_pdom, const Vector<CFGNode *> &constructs,
	bool collect_all_code_paths_to_pdom, bool allow_crossing_branches)
{
	assert(constructs.size() >= 2);
	auto &builder = module.get_builder();
	bool need_default_case = false;
	bool plain_branch = false;
	size_t cutoff_index = 0;
	CFGNode *dispatcher;

	// If there is no strict dominance relationship, it's too risky to freeze a loop here,
	// since we may have stray breaks that will invert merge ordering, and cause issues.
	// Freezing control flow is important for interleaved merge patterns where we don't want to explode
	// the control flow ladders all over the place.
	bool freeze_control_flow = !common_idom->pred_back_edge && common_pdom->post_dominates(common_idom);

	if (freeze_control_flow)
	{
		// Also check that there are no edges that leave the scope between common_idom
		// and common_pdom and don't freeze if so.

		// node->forward_post_visit_order should map 1:1 to the post-visit array,
		// but in extreme circumstances where there have been inline cfg rewrites before recompute,
		// this may not be true, so be defensive.
		auto itr = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), common_pdom);
		auto end = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), common_idom);

		assert(itr != forward_post_visit_order.end());
		assert(end != forward_post_visit_order.end());

		const auto can_reach_any_construct = [&](const CFGNode *succ)
		{
			for (auto *construct : constructs)
				if (query_reachability(*succ, *construct))
					return true;

			return false;
		};

		const auto any_succ_escapes_constructs = [&](const CFGNode *n)
		{
			// idom is not included in the loop below, but it can branch beyond all constructs.
			for (auto *succ : n->succ)
				if (!can_reach_any_construct(succ))
					return true;

			return false;
		};

		const auto is_construct = [&](const CFGNode *n)
		{
			return std::find(constructs.begin(), constructs.end(), n) != constructs.end();
		};

		if (!collect_all_code_paths_to_pdom)
		{
			// idom is not included in the loop below, but it can branch beyond all constructs.
			freeze_control_flow = !any_succ_escapes_constructs(common_idom);
		}

		for (; itr != end && freeze_control_flow; ++itr)
		{
			CFGNode *node = *itr;

			if (!common_idom->dominates(node))
				continue;

			if (node->succ_back_edge != nullptr && node->succ_back_edge != common_idom &&
			    query_reachability(*node->succ_back_edge, *common_idom))
			{
				// Branches backwards.
				freeze_control_flow = false;
			}
			else if (!collect_all_code_paths_to_pdom && !is_construct(node) && common_idom->dominates(node) &&
			         can_reach_any_construct(node) && any_succ_escapes_constructs(node))
			{
				// If we're using the simple collector, we merge at the constructs instead.
				// Make absolutely sure this is safe to merge to be checking that the dispatch point would be
				// a suitable merge.
				freeze_control_flow = false;
			}
		}
	}

	PHI phi;
	phi.id = module.allocate_id();

	if (collect_all_code_paths_to_pdom)
	{
		// In some merge scenarios, we need to make sure we encapsulate all code into this new dispatcher.
		// This will become our new merge block. Incoming impossible merges will be transposed to after this new merge.
		dispatcher = create_helper_pred_block(common_pdom);
		for (auto *candidate : constructs)
			traverse_dominated_blocks_and_rewrite_branch(candidate, dispatcher, common_pdom);

		cutoff_index = dispatcher->pred.size();

		// If there is no direct branch intended for node, the default case label will never be reached,
		// so just pilfer one of the cases as default.
		need_default_case = !dispatcher->pred.empty();

		plain_branch = !need_default_case && constructs.size() == 2;
		if (!plain_branch)
		{
			for (size_t i = 0; i < cutoff_index; i++)
				phi.incoming.push_back({ dispatcher->pred[i], builder.makeIntConstant(-1) });
			phi.type_id = builder.makeIntType(32);
		}
		else
		{
			phi.type_id = builder.makeBoolType();
		}
	}
	else
	{
		dispatcher = pool.create_node();
		dispatcher->name = common_idom->name + ".collector";
		dispatcher->immediate_dominator = common_idom;
		dispatcher->immediate_post_dominator = common_pdom;
		dispatcher->forward_post_visit_order = common_pdom->forward_post_visit_order;
		dispatcher->backward_post_visit_order = common_pdom->backward_post_visit_order;
		plain_branch = constructs.size() == 2;
	}

	phi.type_id = plain_branch ? builder.makeBoolType() : builder.makeIntType(32);

	for (size_t i = 0, n = constructs.size(); i < n; i++)
	{
		auto *candidate = constructs[i];

		if (allow_crossing_branches)
		{
			traverse_dominated_blocks_and_rewrite_branch(common_idom, candidate, dispatcher,
				[](const CFGNode *) { return true; },
				constructs);
		}
		else
		{
			traverse_dominated_blocks_and_rewrite_branch(common_idom, candidate, dispatcher);
		}

		size_t next_cutoff_index = dispatcher->pred.size();
		for (size_t j = cutoff_index; j < next_cutoff_index; j++)
		{
			spv::Id cond_id;
			if (plain_branch)
				cond_id = builder.makeBoolConstant(i != 0);
			else
				cond_id = builder.makeIntConstant(int32_t(i));

			phi.incoming.push_back({ dispatcher->pred[j], cond_id });
		}
		cutoff_index = next_cutoff_index;
	}

	if (freeze_control_flow)
	{
		common_idom->freeze_structured_analysis = true;
		common_idom->merge = MergeType::Loop;
		common_idom->loop_merge_block = dispatcher;
	}

	dispatcher->ir.terminator.conditional_id = phi.id;
	dispatcher->ir.phi.push_back(std::move(phi));
	builder.addName(phi.id, String("selector_" + common_pdom->name).c_str());

	dispatcher->ir.terminator.direct_block = nullptr;
	dispatcher->clear_branches();

	if (plain_branch)
	{
		dispatcher->ir.terminator.type = Terminator::Type::Condition;
		dispatcher->ir.terminator.false_block = constructs[0];
		dispatcher->ir.terminator.true_block = constructs[1];
		dispatcher->add_branch(constructs[0]);
		dispatcher->add_branch(constructs[1]);
	}
	else
	{
		dispatcher->ir.terminator.type = Terminator::Type::Switch;
		Terminator::Case default_case;
		default_case.node = need_default_case ? common_pdom : constructs[0];
		default_case.is_default = true;
		dispatcher->ir.terminator.cases.push_back(default_case);
		dispatcher->add_branch(default_case.node);

		for (size_t i = 0, n = constructs.size(); i < n; i++)
		{
			auto *candidate = constructs[i];
			assert(allow_crossing_branches || candidate->pred.empty() || candidate == default_case.node);
			dispatcher->add_branch(candidate);

			if (need_default_case || i)
			{
				Terminator::Case break_case;
				break_case.node = candidate;
				break_case.value = uint32_t(i);
				dispatcher->ir.terminator.cases.push_back(break_case);
			}
		}
	}
}

bool CFGStructurizer::rewrite_complex_loop_exits(CFGNode *node, CFGNode *merge, Vector<CFGNode *> &dominated_exits)
{
	if (!merge || !node->pred_back_edge->succ.empty() || dominated_exits.size() < 2)
		return false;

	// This heuristic is somewhat questionable. :')
	bool needs_early_explicit_ladder = false;
	CFGNode *common_idom = node;

	// Use a stricter definition when there is a clean merge candidate.
	if (node->can_loop_merge_to(merge))
	{
		// If all nodes share a frontier node which is not the target merge block, we have a spicy merge
		// that should be collected in a ladder first, since there is no natural latter block
		// in this scenario. The shared frontier node is the more plausible true merge target,
		// and the outer merge was a red herring, but since we don't have a proper ladder block,
		// it will complicate things.
		Vector<const CFGNode *> frontier_nodes;
		for (auto *n : dominated_exits)
		{
			frontier_nodes.insert(frontier_nodes.end(),
			                      n->dominance_frontier.begin(),
			                      n->dominance_frontier.end());
		}

		std::sort(frontier_nodes.begin(), frontier_nodes.end());
		const CFGNode *frontier_base = nullptr;
		uint32_t count = 0;

		for (auto *n : frontier_nodes)
		{
			if (n == frontier_base)
				count++;
			else
				count = 1;

			frontier_base = n;

			if (count == dominated_exits.size() && n != merge && query_reachability(*n, *merge))
			{
				needs_early_explicit_ladder = true;
				break;
			}
		}
	}
	else
	{
		// If we cannot do a clean merge anyway, then we should try to look for frontier nodes.
		auto frontier_nodes = dominated_exits;

		// Skip forward to the dominance frontier. This makes control flow easier to deal with
		// since unrelated branches to the frontiers can also be resolved.
		// This heuristic is admittedly somewhat arbitrary,
		// but it is meant to help on some specific real-world shaders.
		for (auto *&n : frontier_nodes)
		{
			if (n->dominance_frontier.size() == 1 && !node->dominates(n->dominance_frontier.front()))
			{
				n = n->dominance_frontier.front();
			}
			else
			{
				// We don't have a clean frontier, skip this check.
				frontier_nodes.clear();
				break;
			}
		}

		if (!frontier_nodes.empty())
		{
			// If the frontiers are all different, and it's not the merge block, something is afoot.
			// Don't sort by pointer since we care about codegen invariance.
			std::sort(frontier_nodes.begin(), frontier_nodes.end(), [](const CFGNode *a, const CFGNode *b)
			          { return a->forward_post_visit_order > b->forward_post_visit_order; });

			bool has_dup_frontier = false;
			for (size_t i = 1, n = frontier_nodes.size(); i < n && !has_dup_frontier; i++)
				if (frontier_nodes[i] == frontier_nodes[i - 1] || frontier_nodes[i] == merge)
					has_dup_frontier = true;

			if (has_dup_frontier)
				frontier_nodes.clear();
		}

		if (!frontier_nodes.empty())
		{
			// Make sure that the frontier nodes we found fully dominate all preds of merge,
			// otherwise, the transpose of code will likely break.
			for (auto *pred : merge->pred)
			{
				bool has_dominating = false;
				for (auto *f : frontier_nodes)
				{
					if (f->dominates(pred))
					{
						has_dominating = true;
						break;
					}
				}

				if (!has_dominating)
				{
					frontier_nodes.clear();
					break;
				}
			}
		}

		if (!frontier_nodes.empty())
		{
			needs_early_explicit_ladder = true;
			// First collect the inner break blocks in a neat bow.
			node->pred_back_edge->fake_succ.clear();
			node->pred_back_edge->fake_pred.clear();
			collect_and_dispatch_control_flow(node, merge, dominated_exits, false, false);

			recompute_cfg();

			// Then collect the outer layer.
			dominated_exits = std::move(frontier_nodes);
			common_idom = merge->immediate_dominator;
		}
	}

	if (needs_early_explicit_ladder)
	{
		// Avoids false-positive assertions when trying to rewrite branches.
		// We're going to recompute the CFG after this anyway.
		node->pred_back_edge->fake_succ.clear();
		node->pred_back_edge->fake_pred.clear();
		collect_and_dispatch_control_flow(common_idom, merge, dominated_exits, false, false);
		return true;
	}

	return false;
}

bool CFGStructurizer::find_loops(unsigned pass)
{
	for (auto index = forward_post_visit_order.size(); index; index--)
	{
		// Visit in reverse order so we resolve outer loops first,
		// this lets us detect ladder-breaking loops.
		auto *node = forward_post_visit_order[index - 1];

		if (node->freeze_structured_analysis)
		{
			// If we have a pre-created dummy loop for ladding breaking,
			// just propagate the header information and be done with it.
			if (node->merge == MergeType::Loop)
			{
				node->loop_merge_block->headers.push_back(node);
				continue;
			}
		}

		if (!node->has_pred_back_edges())
			continue;

		// There are back-edges here, this must be a loop header.
		node->merge = MergeType::Loop;

		auto result = analyze_loop(node);
		auto &dominated_exit = result.dominated_exit;
		auto &inner_dominated_exit = result.inner_dominated_exit;
		auto &non_dominated_exit = result.non_dominated_exit;

		// This should not come up here, and must be handled in transpose loops.
		assert(result.dominated_continue_exit.empty());

		// Detect infinite loop with an exit which is only in inner loop construct.
		// It is impossible to construct a merge block in this case since the merge targets,
		// so just merge to unreachable.
		bool force_infinite_loop = false;

		// If we have a trivial case where there is only one possible loop exit which we dominate,
		// we shouldn't consider it an infinite loop, but a merge.
		bool trivial_exit_loop = dominated_exit.size() == 1 && result.non_dominated_exit.empty() &&
		                         result.inner_dominated_exit.empty() && result.direct_exits.empty() &&
		                         result.inner_direct_exits.empty();

		if (trivial_exit_loop)
		{
			auto *candidate = dominated_exit.front();

			// Resolve some false positives. It's possible that a loop exit can be detected as inner,
			// but it's just a good merge candidate for an inner infinite loop.
			bool loop_exit_dominates_continue =
				candidate->immediate_dominator &&
			    candidate->immediate_dominator->dominates(node->pred_back_edge);

			// If we promoted inner header, this is not a trivial exit.
			const CFGNode *innermost_loop_header = get_innermost_loop_header_for(node, dominated_exit.front());
			if (node != innermost_loop_header)
			{
				// There are at least two scenarios where we have to be careful:
				// - If the innermost header has a edge out of continue block.
				//   If we still detect this as belong to inner loop, it must be the case.
				// - Also, only accept this as a trivial exit if the immediate dominator of exit also dominates
				//   continue block.
				if (!innermost_loop_header->pred_back_edge->succ.empty() || !loop_exit_dominates_continue)
					trivial_exit_loop = false;
			}
		}

		if (node->pred_back_edge->succ.empty() && !trivial_exit_loop)
		{
			force_infinite_loop = true;
			for (auto *e : result.dominated_exit)
				force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e);
			for (auto *e : result.non_dominated_exit)
				force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e);
			for (auto *e : result.inner_dominated_exit)
				force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e);
			for (auto *e : result.direct_exits)
				force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e);
			for (auto *e : result.inner_direct_exits)
				force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e);
		}

		if (force_infinite_loop ||
		    (dominated_exit.empty() && inner_dominated_exit.empty() && non_dominated_exit.empty()))
		{
			// There can be zero loop exits, i.e. infinite loop. This means we have no merge block.
			// We will invent a merge block to satisfy SPIR-V validator, and declare it as unreachable.
			node->loop_merge_block = nullptr;
			//LOGI("Loop without merge: %p (%s)\n", static_cast<const void *>(node), node->name.c_str());
		}
		else if (dominated_exit.size() == 1 && non_dominated_exit.empty() && inner_dominated_exit.empty())
		{
			CFGNode *direct_exit_pdom = nullptr;
			if (!result.direct_exits.empty())
				direct_exit_pdom = find_common_post_dominator(result.direct_exits);

			if (direct_exit_pdom && query_reachability(*dominated_exit.front(), *direct_exit_pdom))
			{
				node->loop_ladder_block = dominated_exit.front();
				node->loop_merge_block = direct_exit_pdom;
			}
			else
			{
				// Clean merge.
				// This is a unique merge block. There can be no other merge candidate.
				node->loop_merge_block = dominated_exit.front();
			}

			const_cast<CFGNode *>(node->loop_merge_block)->add_unique_header(node);
			//LOGI("Loop with simple merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(node), node->name.c_str(),
			//     static_cast<const void *>(node->loop_merge_block), node->loop_merge_block->name.c_str());
		}
		else if (dominated_exit.empty() && inner_dominated_exit.empty() && non_dominated_exit.size() == 1)
		{
			// Single-escape merge.
			// It is unique, but we need workarounds later.
			auto *merge_block = non_dominated_exit.front();

			// We can make the non-dominated exit dominated by
			// adding a ladder block in-between. This allows us to merge the loop cleanly
			// before breaking out.
			auto *ladder = create_ladder_block(node, merge_block, ".merge");
			node->loop_ladder_block = nullptr;
			node->loop_merge_block = ladder;

			const_cast<CFGNode *>(node->loop_merge_block)->add_unique_header(node);
			//LOGI("Loop with ladder merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(node), node->name.c_str(),
			//     static_cast<const void *>(node->loop_merge_block), node->loop_merge_block->name.c_str());
		}
		else
		{
			auto merge_result = analyze_loop_merge(node, result);
			auto *merge = merge_result.merge;
			auto *dominated_merge = merge_result.dominated_merge;

			if (pass == 0 && rewrite_complex_loop_exits(node, merge, dominated_exit))
				return true;

			if (!merge)
			{
				// Most likely this means we have an early return somewhere. Try the weak merge candidate.
				merge = merge_result.weak_merge;
			}

			if (!merge)
			{
				LOGW("Failed to find a common merge point ...\n");
			}
			else if (node->can_loop_merge_to(merge))
			{
				// Clean merge.
				// This is a unique merge block. There can be no other merge candidate.
				//LOGI("Loop with simple multi-exit merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(node),
				//     node->name.c_str(), static_cast<const void *>(node->loop_merge_block),
				//     node->loop_merge_block->name.c_str());

				node->loop_merge_block = merge;
				const_cast<CFGNode *>(node->loop_merge_block)->add_unique_header(node);
			}
			else
			{
				if (!dominated_merge && node->pred_back_edge->succ.size() == 1)
				{
					// If continue block exits, and it still does not dominate, we should invent a ladder block
					// so we get one, otherwise splitting merge scopes will break.
					dominated_merge = create_ladder_block(node->pred_back_edge, node->pred_back_edge->succ.front(), ".merge");
				}

				// Single-escape merge.
				// It is unique, but we need workarounds later.
				//LOGI("Loop with ladder multi-exit merge: %p (%s) -> %p (%s)\n", static_cast<const void *>(node),
				//     node->name.c_str(), static_cast<const void *>(node->loop_merge_block),
				//     node->loop_merge_block->name.c_str());

				//if (dominated_merge)
				//{
					//LOGI("    Ladder block: %p (%s)\n", static_cast<const void *>(dominated_merge),
					//     dominated_merge->name.c_str());
				//}

				// We will use this block as a ladder.
				node->loop_ladder_block = dominated_merge;
				node->loop_merge_block = merge;

				const_cast<CFGNode *>(node->loop_merge_block)->add_unique_header(node);
			}
		}
	}

	return false;
}

CFGNode *CFGStructurizer::get_target_break_block_for_inner_header(const CFGNode *node, size_t header_index)
{
	CFGNode *inner_header = node->headers[header_index];
	CFGNode *target_header = nullptr;

	for (size_t j = header_index; j && !target_header; j--)
	{
		auto *candidate_header = node->headers[j - 1];

		if (candidate_header->merge == MergeType::Loop)
		{
			// We might have two loops, each at equal scopes.
			// In order to break out to an outer loop, we must verify that the loops actually nest.
			// We must not introduce any backwards branches here.
			CFGNode *candidate_merge = nullptr;
			if (candidate_header->loop_ladder_block)
				candidate_merge = candidate_header->loop_ladder_block;
			else if (candidate_header->loop_merge_block)
				candidate_merge = candidate_header->loop_merge_block;

			if (!candidate_merge)
				continue;

			// Check for backwards branch.
			if (query_reachability(*candidate_merge, *inner_header))
				continue;

			// An outer header is expected to dominate the inner header. Otherwise, they live in
			// separate scopes, and we should look for a header that is further out.
			if (!candidate_header->dominates(inner_header))
				continue;

			target_header = candidate_header;
		}
	}

	return target_header;
}

CFGNode *CFGStructurizer::create_ladder_block(CFGNode *header, CFGNode *node, const char *tag)
{
	auto *ladder = pool.create_node();
	ladder->name = node->name + tag;
	ladder->add_branch(node);
	ladder->ir.terminator.type = Terminator::Type::Branch;
	ladder->ir.terminator.direct_block = node;
	ladder->immediate_post_dominator = node;
	ladder->forward_post_visit_order = node->forward_post_visit_order;
	ladder->backward_post_visit_order = node->backward_post_visit_order;
	ladder->dominance_frontier.push_back(node);

	traverse_dominated_blocks_and_rewrite_branch(header, node, ladder);
	ladder->recompute_immediate_dominator();

	return ladder;
}

CFGNode *CFGStructurizer::get_or_create_ladder_block(CFGNode *node, size_t header_index)
{
	auto *header = node->headers[header_index];
	auto *loop_ladder = header->loop_ladder_block;

	if (!loop_ladder)
	{
		// We don't have a ladder, because the loop merged to an outer scope, so we need to fake a ladder.
		// If we hit this case, we did not hit the simpler case in find_loops().
		auto *ladder = create_ladder_block(header, node, ".merge");
		header->loop_ladder_block = ladder;

		// If this is the second outermost scope, we don't need to deal with ladders.
		// ladder is a dummy branch straight out to the outer merge point.
		if (header_index > 1)
			loop_ladder = header->loop_ladder_block;
	}

	return loop_ladder;
}

CFGNode *CFGStructurizer::build_enclosing_break_target_for_loop_ladder(CFGNode *&node, CFGNode *loop_ladder)
{
	// A loop ladder needs to break out somewhere. If we don't have a candidate
	// place to break out to, we will need to create one for the outer scope.
	// This is the purpose of the full_break_target fallback.

	bool ladder_to_merge_is_trivial = loop_ladder->succ.size() == 1 && loop_ladder->succ.front() == node;

	if (ladder_to_merge_is_trivial)
	{
		auto *succ = loop_ladder->succ.front();

		// Chase through dummy ladders until we find something tangible that is actually PHI sensitive.
		while (succ->ir.phi.empty() && succ->succ.size() == 1)
			succ = succ->succ.front();

		IncomingValue *incoming_from_ladder = nullptr;
		if (!succ->ir.phi.empty())
		{
			// All PHIs are fundamentally the same w.r.t. input blocks.
			auto &phi = succ->ir.phi.front();
			incoming_from_ladder = phi_incoming_blocks_find_block(phi.incoming, loop_ladder);
		}

		CFGNode *retarget_idom = nullptr;
		if (incoming_from_ladder != nullptr)
		{
			// If succ takes this ladder as a PHI input, we have to be careful.
			// We can only treat this merge as trivial if we can trivially hoist the input to the idom.
			// Hoisting to idom only works if that idom is not already a PHI input for succ,
			// and that idom dominates the input value.
			retarget_idom = loop_ladder->immediate_dominator;

			bool can_hoist_incoming_value =
			    retarget_idom && retarget_idom != loop_ladder &&
			    !phi_incoming_blocks_find_block(succ->ir.phi.front().incoming, retarget_idom);

			if (!can_hoist_incoming_value)
				retarget_idom = nullptr;
		}

		if (retarget_idom)
		{
			bool is_generated = false;

			// We have no opcodes in loop ladder, but theoretically,
			// we can have some PHI values that are being depended on.
			for (auto &override_phi : succ->ir.phi)
			{
				auto *incoming = phi_incoming_blocks_find_block(override_phi.incoming, loop_ladder);
				if (!incoming)
					continue;

				if (id_is_generated_by_block(loop_ladder, incoming->id))
				{
					is_generated = true;
					break;
				}
			}

			if (!is_generated)
			{
				// If we don't generate the ID ourselves and idom dominates this block we can prove
				// that idom is a valid incoming value.
				for (auto &override_phi : succ->ir.phi)
					retarget_phi_incoming_block(override_phi, loop_ladder, retarget_idom);
			}
			else
			{
				// It's not a trivial merge after all :(
				ladder_to_merge_is_trivial = false;
			}
		}
	}

	CFGNode *full_break_target = nullptr;

	// We have to break somewhere, turn the outer selection construct into
	// a loop.
	if (!ladder_to_merge_is_trivial)
	{
		// Selection merge to this dummy instead.
		auto *new_selection_merge = create_helper_pred_block(node);

		// This is now our fallback loop break target.
		full_break_target = node;

		auto *loop = create_helper_pred_block(new_selection_merge->headers[0]);

		// Reassign header node.
		assert(new_selection_merge->headers[0]->merge == MergeType::Selection);
		new_selection_merge->headers[0]->selection_merge_block = new_selection_merge;
		new_selection_merge->headers[0] = loop;

		loop->merge = MergeType::Loop;
		loop->loop_merge_block = node;
		loop->freeze_structured_analysis = true;

		// After the loop ladder, make sure we always branch to the break target.
		traverse_dominated_blocks_and_rewrite_branch(loop_ladder, new_selection_merge, node);

		node = new_selection_merge;
	}

	return full_break_target;
}

CFGNode *CFGStructurizer::build_ladder_block_for_escaping_edge_handling(CFGNode *node, CFGNode *header,
                                                                        CFGNode *loop_ladder,
                                                                        CFGNode *target_header,
                                                                        CFGNode *full_break_target,
                                                                        const UnorderedSet<const CFGNode *> &normal_preds)
{
	CFGNode *new_ladder_block = nullptr;

	if (target_header || full_break_target)
	{
		// If we have a ladder block, there exists a merge candidate which the loop header dominates.
		// We create a ladder block before the merge block, which becomes the true merge block.
		// In this ladder block, we can detect with Phi nodes whether the break was "clean",
		// or if we had an escape edge.
		// If we have an escape edge, we can break to outer level, and continue the ladder that way.
		// Otherwise we branch to the existing merge block and continue as normal.
		// We'll also need to rewrite a lot of Phi nodes this way as well.
		auto *ladder = create_helper_pred_block(loop_ladder);
		new_ladder_block = ladder;

		unsigned header_index;
		for (header_index = 0; header_index < uint32_t(node->headers.size()); header_index++)
			if (node->headers[header_index] == header)
				break;

		assert(header_index != node->headers.size());

		// Merge to ladder instead.
		// If we're fixing up ladders for header index 0 it means we've already rewritten everything,
		// only apply the last fixup branch.
		if (header_index != 0 || block_is_plain_continue(node))
		{
			traverse_dominated_blocks_and_rewrite_branch(
				header, node, ladder,
				[node, header_index](const CFGNode *next)
				{
					for (unsigned i = 0; i < header_index; i++)
					{
						auto *target = node->headers[i];
						// Do not introduce cycles. Outer scopes must never be rewritten to branch to inner scopes.
						if (target && target->loop_ladder_block == next)
							return false;
					}
					return true;
				}, {});
		}

		CFGNode *true_block = nullptr;

		// Ladder breaks out to outer scope.
		if (target_header && target_header->loop_ladder_block)
			true_block = target_header->loop_ladder_block;
		else if (target_header && target_header->loop_merge_block)
			true_block = target_header->loop_merge_block;
		else if (full_break_target)
			true_block = full_break_target;
		else
			LOGW("No loop merge block?\n");

		if (true_block)
		{
			rewrite_ladder_conditional_branch_from_incoming_blocks(
				ladder,
				true_block, loop_ladder, [&](const CFGNode *n) { return normal_preds.count(n) == 0; },
				String("ladder_phi_") + loop_ladder->name);

			// This can happen in some scenarios, fixup the branch to be a direct one instead.
			if (ladder->ir.terminator.true_block == ladder->ir.terminator.false_block)
			{
				ladder->ir.terminator.direct_block = ladder->ir.terminator.true_block;
				ladder->ir.terminator.type = Terminator::Type::Branch;
			}
		}
	}
	else
	{
		// Here, loop_ladder -> final merge is a trivial, direct branch.

		if (loop_ladder->ir.operations.empty())
		{
			// Simplest common case.
			// If the loop ladder just branches to outer scope, and this block does not perform
			// any operations we can avoid messing around with ladder PHI variables and just execute the branch.
			// This block will likely become a frontier node when merging PHI instead.
			// This is a common case when breaking out of a simple for loop.
			traverse_dominated_blocks_and_rewrite_branch(header, node, loop_ladder);
		}
		else
		{
			// We have a case where we're trivially breaking out of a selection construct,
			// but the loop ladder block contains operations which we must not execute,
			// since we were supposed to branch directly out to node.
			// We cannot directly break out of a selection construct, so our ladder must be a bit more sophisticated.
			// ladder-pre -> merge -> ladder-post -> selection merge
			//      \-------------------/
			auto *ladder_pre = create_helper_pred_block(loop_ladder);
			auto *ladder_post = create_helper_succ_block(loop_ladder);

			// Merge to ladder instead.
			traverse_dominated_blocks_and_rewrite_branch(header, node, ladder_pre);

			rewrite_ladder_conditional_branch_from_incoming_blocks(
				ladder_pre,
				ladder_post, loop_ladder,
				[&](const CFGNode *n) { return normal_preds.count(n) == 0; },
				String("ladder_phi_") + loop_ladder->name);

			new_ladder_block = ladder_pre;
		}
	}

	return new_ladder_block;
}

void CFGStructurizer::eliminate_degenerate_switch_merges()
{
	for (auto *node : forward_post_visit_order)
	{
		if (node->headers.size() <= 1)
			continue;

		// In the second pass, it's illegal to have more than two target headers, so we have to turn some
		// headers into unreachable. The outermost scope wins.
		std::sort(node->headers.begin(), node->headers.end(),
		          [](const CFGNode *a, const CFGNode *b) -> bool {
			          if (a->dominates(b))
				          return true;
			          else if (b->dominates(a))
				          return false;
			          else
				          return a->forward_post_visit_order > b->forward_post_visit_order;
		          });

		// Can only elide if we have a true loop merge to this node.
		if (node->headers[0]->merge != MergeType::Loop || node->headers[0]->loop_merge_block != node)
			continue;

		for (size_t i = 1, n = node->headers.size(); i < n; i++)
		{
			auto *header = node->headers[i];
			// This cannot possibly work with loops.
			// We can generally turn selections into unreachable merges without trouble however ...
			if (header->merge == MergeType::Selection && header->selection_merge_block == node)
				header->selection_merge_block = nullptr;
		}
	}
}

bool CFGStructurizer::is_rewind_candidate_split_node(
	const Vector<const CFGNode *> &visited_orphans, CFGNode *node, CFGNode *candidate) const
{
	if (node->forward_post_visit_order != candidate->forward_post_visit_order)
		return false;

	if (is_trivially_no_split_node(candidate))
		return false;

	if (std::find(visited_orphans.begin(), visited_orphans.end(), candidate) != visited_orphans.end())
		return false;

	// For whatever reason, the node is no longer a meaningful merge target.
	for (auto *header : candidate->headers)
		if (!candidate->can_backtrace_to(header))
			return false;

	// If we created a new helper pred block during traversal, it might not
	// exist in forward_post_visit_order.
	// Look for the replacement block here to make sure it gets processed in the appropriate order.
	// The replacement can happen in-line in this function,
	// so there is no chance to re-traverse the CFG.
	// Only consider blocks that we trivially post-dominate and that
	// definitely have no entry in forward_post_visit_order already.
	if (candidate->succ.size() != 1 || candidate->succ.front() != node)
	{
		// This is a ladder block of some sort. It's possible we're already in a "resolved" state,
		// so we really should not try to split further.
		// If we're considered a proper ladder block by any of our headers, bail.
		for (auto *header : candidate->headers)
			if (header->loop_ladder_block == node || header->loop_merge_block == node)
				return false;
	}

	return true;
}

bool CFGStructurizer::is_trivially_no_split_node(CFGNode *node) const
{
	if (node->headers.size() <= 1 && !block_is_plain_continue(node))
		return true;

	// It's possible that we have just one header.
	// One loop has a ladder block which is not this block, but the post-dominator is a pure continue block.
	// This gets rather awkward, since we need to special case this scenario.
	if (node->headers.empty())
		return true;

	return false;
}

bool CFGStructurizer::split_merge_blocks(CFGNode *node)
{
	if (is_trivially_no_split_node(node))
		return false;

	// If this block was the merge target for more than one construct,
	// we will need to split the block. In SPIR-V, a merge block can only be the merge target for one construct.
	// However, we can set up a chain of merges where inner scope breaks to outer scope with a dummy basic block.
	// The outer scope comes before the inner scope merge.

	// We cannot fully trust a sort on post-visit order, since if we have two split blocks here,
	// they will have the same post-visit order until we recompute them.
	// FIXME: Should probably be smarter about this ...
	std::sort(node->headers.begin(), node->headers.end(),
	          [](const CFGNode *a, const CFGNode *b) -> bool
	          {
		          if (a->dominates(b))
			          return true;
		          else if (b->dominates(a))
			          return false;
		          else
			          return a->forward_post_visit_order > b->forward_post_visit_order;
	          });

	//LOGI("Splitting merge blocks for %s\n", node->name.c_str());
	//for (auto *header : node->headers)
	//	LOGI("  Header: %s.\n", header->name.c_str());

	CFGNode *full_break_target = nullptr;

	// If we're a plain continue block, we're implicitly the full break target.
	bool plain_continue_resolve = block_is_plain_continue(node);
	if (plain_continue_resolve)
		full_break_target = node;

	// Before we start splitting and rewriting branches, we need to know which preds are considered "normal",
	// and which branches are considered ladder breaking branches (rewritten branches).
	// This will influence if a pred block gets false or true when emitting ladder breaking blocks later.
	Vector<UnorderedSet<const CFGNode *>> normal_preds(node->headers.size());
	for (size_t i = 0; i < node->headers.size(); i++)
		if (node->headers[i]->loop_ladder_block)
			for (auto *pred : node->headers[i]->loop_ladder_block->pred)
				normal_preds[i].insert(pred);

	bool has_rewrites_to_outer_ladder = false;

	// Start from innermost scope, and rewrite all escape branches to a merge block which is dominated by the loop header in question.
	// The merge block for the loop must have a ladder block before the old merge block.
	// This ladder block will break to outer scope, or keep executing the old merge block.
	for (size_t i = node->headers.size() - 1; i || plain_continue_resolve; i--)
	{
		auto *current_node = node->headers[i];

		// Find innermost loop header scope we can break to when resolving ladders.
		CFGNode *target_header = i != 0 ? get_target_break_block_for_inner_header(node, i) : nullptr;

		//LOGI("Current: %s, target: %s.\n", current_node->name.c_str(), target_header->name.c_str());

		if (current_node->merge == MergeType::Loop)
		{
			auto *loop_ladder = get_or_create_ladder_block(node, i);

			// The loop ladder needs to break to somewhere.
			// Either this is an outer loop scope, or we need to create a fake loop we can break out of if
			// the break is non-trivial.
			if (loop_ladder && !target_header && !full_break_target)
				full_break_target = build_enclosing_break_target_for_loop_ladder(node, loop_ladder);

			CFGNode *new_ladder_block = nullptr;
			if (loop_ladder)
			{
				new_ladder_block = build_ladder_block_for_escaping_edge_handling(
				    node, current_node, loop_ladder, target_header, full_break_target, normal_preds[i]);

				if (target_header == node->headers[0])
					has_rewrites_to_outer_ladder = true;
			}

			// We won't analyze this again, so make sure header knows
			// about the new merge block.
			if (current_node->freeze_structured_analysis)
			{
				if (new_ladder_block)
					current_node->loop_ladder_block = new_ladder_block;
				current_node->loop_merge_block = current_node->loop_ladder_block;
				current_node->loop_ladder_block = nullptr;
			}
		}
		else if (current_node->merge == MergeType::Selection)
		{
			if (target_header)
			{
				// Breaks out to outer available scope.
				CFGNode *rewrite_to = nullptr;
				if (target_header->loop_ladder_block)
					rewrite_to = target_header->loop_ladder_block;
				else if (target_header->loop_merge_block)
					rewrite_to = target_header->loop_merge_block;

				if (rewrite_to)
				{
					traverse_dominated_blocks_and_rewrite_branch(current_node, node, rewrite_to);
					if (target_header == node->headers[0])
						has_rewrites_to_outer_ladder = true;
				}
				else
					LOGW("No loop merge block?\n");
			}
			else if (full_break_target)
			{
				traverse_dominated_blocks_and_rewrite_branch(current_node, node, full_break_target);
			}
			else
			{
				// The outer scope *must* now become a loop, no matter what.
				// We cannot rely on a traversal to rewrite breaking constructs in the entire loop,
				// so "everything" must essentially become a break instead.
				full_break_target = node;
				assert(node->headers[0]->merge == MergeType::Selection);
				node->headers[0]->merge = MergeType::Loop;
				node->headers[0]->freeze_structured_analysis = true;

				assert(node->headers[0]->selection_merge_block == node);
				node->headers[0]->loop_merge_block = node->headers[0]->selection_merge_block;
				node->headers[0]->selection_merge_block = nullptr;
			}
		}
		else
			LOGE("Invalid merge type.\n");

		if (i == 0)
			break;
	}

	auto *outer_header = node->headers[0];
	if (has_rewrites_to_outer_ladder && outer_header->merge == MergeType::Loop && outer_header->loop_ladder_block &&
	    outer_header->loop_merge_block && outer_header->loop_ladder_block->dominates(outer_header->loop_merge_block))
	{
		auto *ladder = outer_header->loop_ladder_block;
		bool non_trivial_ladder =
		    !ladder->ir.operations.empty() || ladder_chain_has_phi_dependencies(ladder, outer_header->loop_merge_block);

		if (non_trivial_ladder)
		{
			// It's possible we have branches that intended to rewrite to loop_merge_block
			// but ended up writing to loop_ladder_block instead.
			// Perform a final fixup branch if this is necessary.
			// If the ladder block is a dummy, we can ignore this.
			build_ladder_block_for_escaping_edge_handling(node, outer_header, outer_header->loop_ladder_block, nullptr,
			                                              outer_header->loop_merge_block, normal_preds[0]);
		}
	}

	return true;
}

void CFGStructurizer::split_merge_blocks_and_visit_orphan_preds(
	Vector<const CFGNode *> &visited_orphans, CFGNode *merge, CFGNode *node)
{
	if (split_merge_blocks(node))
		return;

	for (auto *pred : node->pred)
	{
		if (is_rewind_candidate_split_node(visited_orphans, merge, pred))
		{
			visited_orphans.push_back(pred);
			split_merge_blocks_and_visit_orphan_preds(visited_orphans, merge, pred);
		}
	}
}

void CFGStructurizer::split_merge_blocks()
{
	Vector<const CFGNode *> visited_orphans;
	for (auto *node : forward_post_visit_order)
		split_merge_blocks_and_visit_orphan_preds(visited_orphans, node, node);
}

bool CFGStructurizer::structurize(unsigned pass)
{
	auto switch_mode = process_switch_blocks(pass);
	while (switch_mode == SwitchProgressMode::IterativeModify)
	{
		// For complex rewrites, we damage the CFG, so need to start over every iteration.
		recompute_cfg();
		switch_mode = process_switch_blocks(pass);
	}

	// After a trivial modify, we must be able to complete the process in one iteration.
	if (switch_mode == SwitchProgressMode::SimpleModify)
	{
		recompute_cfg();
		if (process_switch_blocks(pass) != SwitchProgressMode::Done)
		{
			LOGE("Fatal, detected infinite loop.\n");
			abort();
		}
	}

	if (find_loops(pass))
		return true;
	find_selection_merges(pass);
	fixup_broken_selection_merges(pass);
	if (pass == 0)
		split_merge_blocks();
	else
		eliminate_degenerate_switch_merges();

	return false;
}

bool CFGStructurizer::exists_path_in_cfg_without_intermediate_node(const CFGNode *start_block,
                                                                   const CFGNode *end_block,
                                                                   const CFGNode *stop_block) const
{
	// If we're resolving PHI for a frontier inside a loop, consider the back-edge as the end target for analysis.
	// If we start outside the loop, don't move the end block.
	if (end_block->pred_back_edge &&
	    !query_reachability(*stop_block, *end_block) &&
	    !query_reachability(*start_block, *end_block))
	{
		end_block = end_block->pred_back_edge;
	}

	if (query_reachability(*start_block, *end_block) &&
	    query_reachability(*start_block, *stop_block) &&
	    query_reachability(*stop_block, *end_block))
	{
		auto *frontier = get_post_dominance_frontier_with_cfg_subset_that_reaches(stop_block, end_block, start_block);
		// We already know start_block reaches the frontier.
		return frontier != nullptr;
	}
	else
	{
		bool ret = query_reachability_through_back_edges(*start_block, *end_block);
		return ret;
	}
}

CFGNode *CFGStructurizer::get_post_dominance_frontier_with_cfg_subset_that_reaches(const CFGNode *node,
                                                                                   const CFGNode *must_reach,
                                                                                   const CFGNode *must_reach_frontier) const
{
	UnorderedSet<const CFGNode *> promoted_post_dominators;
	promoted_post_dominators.insert(node);
	auto frontiers = node->post_dominance_frontier;

	assert(query_reachability(*node, *must_reach));

	if (frontiers.empty())
		return nullptr;

	while (!frontiers.empty())
	{
		// We might not be interested in post-domination-frontiers that we cannot reach.
		// Filter our search based on this.
		if (must_reach_frontier)
		{
			auto itr = std::remove_if(frontiers.begin(), frontiers.end(), [&](CFGNode *candidate) {
			    return !query_reachability(*must_reach_frontier, *candidate);
			});
			frontiers.erase(itr, frontiers.end());
		}

		if (frontiers.size() > 1)
		{
			std::sort(frontiers.begin(), frontiers.end(), [](const CFGNode *a, const CFGNode *b) {
				return a->backward_post_visit_order < b->backward_post_visit_order;
			});
			frontiers.erase(std::unique(frontiers.begin(), frontiers.end()), frontiers.end());
		}
		else if (frontiers.empty())
			break;

		auto *frontier = frontiers.back();

		// For a frontier to be discounted, we look at all successors and check
		// if there no node in promoted_post_dominators that post-dominate the successor, that path cannot reach must_reach.
		// If a post dominance frontier satisfies this rule, it is promoted to be considered an alias of node.

		bool all_succs_must_go_via_node = true;
		for (auto *succ : frontier->succ)
		{
			bool promote = true;
			if (query_reachability(*succ, *must_reach))
			{
				promote = false;
				for (auto *pdom : promoted_post_dominators)
				{
					if (pdom->post_dominates(succ))
					{
						promote = true;
						break;
					}
				}
			}

			if (!promote)
			{
				all_succs_must_go_via_node = false;
				break;
			}
		}

		if (!all_succs_must_go_via_node)
		{
			return frontier;
		}
		else
		{
			promoted_post_dominators.insert(frontier);
			frontiers.pop_back();
			for (auto *pdoms : frontier->post_dominance_frontier)
				frontiers.push_back(pdoms);
		}
	}

	return frontiers.empty() ? nullptr : frontiers.front();
}

void CFGStructurizer::recompute_post_dominance_frontier(CFGNode *node)
{
	for (auto *pred : node->pred)
	{
		if (pred->immediate_post_dominator != node &&
		    std::find(node->post_dominance_frontier.begin(),
		              node->post_dominance_frontier.end(),
		              pred) == node->post_dominance_frontier.end())
		{
			node->post_dominance_frontier.push_back(pred);
		}

		if (auto *ipdom = node->immediate_post_dominator)
		{
			for (auto *frontier_node : node->post_dominance_frontier)
			{
				if (!ipdom->post_dominates(frontier_node) &&
				    std::find(ipdom->post_dominance_frontier.begin(),
				              ipdom->post_dominance_frontier.end(),
				              frontier_node) == ipdom->post_dominance_frontier.end())
				{
					ipdom->post_dominance_frontier.push_back(frontier_node);
				}
			}
		}
	}
}

void CFGStructurizer::recompute_dominance_frontier(CFGNode *node)
{
	for (auto *succ : node->succ)
	{
		if (succ->immediate_dominator != node &&
		    std::find(node->dominance_frontier.begin(),
		              node->dominance_frontier.end(),
		              succ) == node->dominance_frontier.end())
		{
			node->dominance_frontier.push_back(succ);
		}

		if (auto *idom = node->immediate_dominator)
		{
			for (auto *frontier_node : node->dominance_frontier)
			{
				if (!idom->dominates(frontier_node) &&
				    std::find(idom->dominance_frontier.begin(),
				              idom->dominance_frontier.end(),
				              frontier_node) == idom->dominance_frontier.end())
				{
					idom->dominance_frontier.push_back(frontier_node);
				}
			}
		}
	}
}

bool CFGStructurizer::rewrite_invalid_loop_breaks()
{
	// Keep iterating here until we have validated a clean CFG w.r.t. block-like loops.
	// This should pass through first time without issue with extremely high probability,
	// so hitting the slow path isn't a real concern until proven otherwise.
	CFGNode *rewrite_header = nullptr;
	CFGNode *invalid_target = nullptr;
	CFGNode *invalid_merge = nullptr;

	// Process from inside out.
	for (auto *node : forward_post_visit_order)
	{
		// Structured loop constructs can end up with problematic merge scenarios where we missed
		// some cases where blocks branch outside our construct.
		// At some point, we were considered mere selection constructs and breaking out of it is fine,
		// but if the selection is promoted to a loop at some point after this analysis, we are a bit screwed.
		// This can happen in complex ladder resolve scenarios.
		// The fix-up means introducing multiple levels of ladder blocks.
		if (node->merge == MergeType::Loop && node->freeze_structured_analysis)
		{
			auto *merge = node->loop_merge_block;
			if (!merge || merge->post_dominates(node))
				continue;

			node->traverse_dominated_blocks([&](CFGNode *candidate) {
				if (candidate == merge || invalid_target)
					return false;

				// If the succ can reach outside the loop construct, we have an error condition.
				for (auto *succ : candidate->succ)
				{
					bool can_reach_merge = query_reachability(*succ, *merge);
					auto *candidate_continue = scan_plain_continue_block(succ);

					// Need to be a bit more careful about continue blocks in infinite loops.
					// Include loop exits as well in the reachability analysis.
					if (!can_reach_merge && candidate_continue->succ_back_edge)
					{
						for (auto *fake_succ : candidate_continue->fake_succ)
						{
							if (query_reachability(*fake_succ, *merge))
							{
								can_reach_merge = true;
								break;
							}
						}
					}

					if (!can_reach_merge)
					{
						// Determine if we're an inner terminate/return, or a loop exit.
						// If the common post-dominator is EXIT node, this is a return-like relationship,
						// and we skip any fixup.
						auto *pdom = CFGNode::find_common_post_dominator(succ, merge);
						if (pdom != nullptr && !pdom->pred.empty())
							invalid_target = succ;
					}
				}
				return true;
			});

			if (invalid_target)
			{
				rewrite_header = node;
				break;
			}
		}
		else if (node->merge == MergeType::Loop && node->loop_merge_block && node->pred_back_edge &&
		         node->pred_back_edge->succ.empty())
		{
			// Only consider "infinite" loops here. Otherwise, the break from continue will always be
			// a suitable merge target and the ladder block for any loop exits.

			if (!node->dominates(node->loop_merge_block))
			{
				// We must dominate the loop merge block here.
				// There is a risk that with breaks happening into multiple scopes in certain cases,
				// we won't be able to guarantee this in the two-phase structurizer.
				invalid_merge = node;
				break;
			}
		}
	}

	if (invalid_merge)
	{
		auto result = analyze_loop(invalid_merge);
		result.dominated_exit.insert(result.dominated_exit.end(), result.non_dominated_exit.begin(),
		                             result.non_dominated_exit.end());
		collect_and_dispatch_control_flow(invalid_merge, invalid_merge->loop_merge_block, result.dominated_exit, false,
		                                  false);
		recompute_cfg();
		return true;
	}

	if (invalid_target)
	{
		auto *merge = rewrite_header->loop_merge_block;
		auto *dispatcher = create_helper_pred_block(merge);
		rewrite_header->loop_merge_block = dispatcher;

		size_t natural_preds = dispatcher->pred.size();
		traverse_dominated_blocks_and_rewrite_branch(rewrite_header, invalid_target, dispatcher);

		PHI phi;
		phi.id = module.allocate_id();
		phi.type_id = module.get_builder().makeBoolType();
		module.get_builder().addName(phi.id, (String("break_selector_") + merge->name).c_str());

		for (size_t i = 0; i < natural_preds; i++)
		{
			IncomingValue incoming = {};
			incoming.block = dispatcher->pred[i];
			incoming.id = module.get_builder().makeBoolConstant(true);
			phi.incoming.push_back(incoming);
		}

		for (size_t i = natural_preds, n = dispatcher->pred.size(); i < n; i++)
		{
			IncomingValue incoming = {};
			incoming.block = dispatcher->pred[i];
			incoming.id = module.get_builder().makeBoolConstant(false);
			phi.incoming.push_back(incoming);
		}

		dispatcher->ir.terminator.type = Terminator::Type::Condition;
		dispatcher->ir.terminator.true_block = merge;
		dispatcher->ir.terminator.false_block = invalid_target;
		dispatcher->ir.terminator.direct_block = nullptr;
		dispatcher->ir.terminator.conditional_id = phi.id;

		dispatcher->ir.phi.push_back(std::move(phi));
		dispatcher->add_branch(invalid_target);

		recompute_cfg();
		return true;
	}

	return false;
}

void CFGStructurizer::traverse(BlockEmissionInterface &iface)
{
	// Make sure all blocks are known to the backend before we emit code.
	// Prefer that IDs grow the further down the function we go.
	for (auto itr = forward_post_visit_order.rbegin(); itr != forward_post_visit_order.rend(); ++itr)
	{
		(*itr)->id = 0;
		iface.register_block(*itr);
	}

	// Need to emit blocks such that dominating blocks come before dominated blocks.
	for (auto index = forward_post_visit_order.size(); index; index--)
	{
		auto *block = forward_post_visit_order[index - 1];

		auto &merge = block->ir.merge_info;

		switch (block->merge)
		{
		case MergeType::Selection:
			merge.merge_block = block->selection_merge_block;
			if (merge.merge_block)
				iface.register_block(merge.merge_block);
			merge.merge_type = block->merge;
			iface.emit_basic_block(block);
			break;

		case MergeType::Loop:
			merge.merge_block = block->loop_merge_block;
			merge.merge_type = block->merge;
			merge.continue_block = block->pred_back_edge;
			if (merge.merge_block)
				iface.register_block(merge.merge_block);
			if (merge.continue_block)
				iface.register_block(merge.continue_block);

			iface.emit_basic_block(block);
			break;

		default:
			iface.emit_basic_block(block);
			break;
		}
	}
}

template <typename Op>
void CFGStructurizer::traverse_dominated_blocks_and_rewrite_branch(const CFGNode *dominator, CFGNode *candidate,
                                                                   CFGNode *from, CFGNode *to, const Op &op,
                                                                   const Vector<CFGNode *> &barrier,
                                                                   UnorderedSet<CFGNode *> &visitation_cache)
{
	visitation_cache.insert(candidate);

	for (auto *node : candidate->succ)
	{
		if (!op(node))
			continue;

		if (node == from)
		{
			// Don't introduce a cycle.
			// We only retarget branches when we have "escape-like" edges.
			bool introduces_cycle;

			if ((to->forward_post_visit_order == candidate->forward_post_visit_order && to != candidate) ||
			    (from->forward_post_visit_order == candidate->forward_post_visit_order && from != candidate))
			{
				// Can happen when resolving ladders. We cannot use reachability query, do it slow way.
				introduces_cycle = candidate->can_backtrace_to(to);
			}
			else
			{
				introduces_cycle = query_reachability(*to, *candidate);
			}

			if (!introduces_cycle)
			{
				// If we already have a branch to "to", need to branch there via an intermediate node.
				// This way, we can distinguish between a normal branch and a rewritten branch.
				candidate->retarget_branch_with_intermediate_node(from, to);
			}
		}
		else if (dominator->dominates(node) && node != to &&
		         std::find(barrier.begin(), barrier.end(), node) == barrier.end())
		{
			// Do not traverse beyond the new branch target.
			if (!visitation_cache.count(node))
				traverse_dominated_blocks_and_rewrite_branch(dominator, node, from, to, op, barrier, visitation_cache);
		}
	}

	// In case we are rewriting branches to a new merge block, we might
	// change the immediate post dominator for continue blocks inside this loop construct.
	// When analysing post dominance in these cases, we need to make sure that we merge to the new merge block,
	// and not the old one. This avoids some redundant awkward loop constructs.
	for (auto &fake_next : candidate->fake_succ)
	{
		if (fake_next == from)
		{
			candidate->retarget_fake_succ(from, to);
			break;
		}
	}
}

template <typename Op>
void CFGStructurizer::traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to,
                                                                   const Op &op, const Vector<CFGNode *> &barrier)
{
	if (from == to)
		return;

	UnorderedSet<CFGNode *> visitation_cache;
	traverse_dominated_blocks_and_rewrite_branch(dominator, dominator, from, to, op, barrier, visitation_cache);
	dominator->fixup_merge_info_after_branch_rewrite(from, to);

	// Force all post-domination information to be recomputed.
	Vector<CFGNode *> linear_visitation_cache;
	linear_visitation_cache.reserve(visitation_cache.size());

	for (auto *n : visitation_cache)
	{
		if (n->immediate_post_dominator == from)
		{
			if (n->fake_succ.empty())
			{
				n->immediate_post_dominator = nullptr;
				// Ignore any infinite continue blocks.
				// They wreak havoc in post-dominance analysis.
				linear_visitation_cache.push_back(n);
			}
			else
			{
				// Infinite loop blocks must not be traversed again.
				n->immediate_post_dominator = to;
			}
		}
	}

	// Will recompute everything that was cleared out.
	// Compute later nodes first. This way we avoid a potential recursive loop.
	std::sort(linear_visitation_cache.begin(), linear_visitation_cache.end(), [](const CFGNode *a, const CFGNode *b) {
		return a->forward_post_visit_order < b->forward_post_visit_order;
	});

	for (auto *n : linear_visitation_cache)
		if (!n->immediate_post_dominator)
			n->recompute_immediate_post_dominator();
	dominator->recompute_immediate_post_dominator();
}

void CFGStructurizer::traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to)
{
	traverse_dominated_blocks_and_rewrite_branch(dominator, from, to, [](const CFGNode *node) -> bool { return true; }, {});
}
} // namespace dxil_spv


================================================
FILE: cfg_structurizer.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "ir.hpp"

#include <memory>
#include <stdint.h>

namespace dxil_spv
{
class BlockEmissionInterface;
class SPIRVModule;
struct CFGNode;
class CFGNodePool;

class BlockEmissionInterface
{
public:
	virtual ~BlockEmissionInterface() = default;
	virtual void emit_basic_block(CFGNode *node) = 0;
	virtual void register_block(CFGNode *node) = 0;
};

class CFGStructurizer
{
public:
	CFGStructurizer(CFGNode *entry, CFGNodePool &pool, SPIRVModule &module);
	bool run();
	bool run_trivial();
	void traverse(BlockEmissionInterface &iface);
	CFGNode *get_entry_block() const;

	bool rewrite_rov_lock_region();
	void rewrite_auto_group_shared_barrier();
	void flatten_subgroup_shuffles();
	void fixup_loop_header_undef_phis();

private:
	CFGNode *entry_block;
	CFGNode *exit_block;
	CFGNodePool &pool;
	SPIRVModule &module;

	// For dominance analysis.
	Vector<CFGNode *> forward_post_visit_order;
	// For post-dominance analysis.
	Vector<CFGNode *> backward_post_visit_order;

	Vector<uint32_t> reachability_bitset;
	unsigned reachability_stride = 0;

	UnorderedSet<const CFGNode *> reachable_nodes;
	UnorderedSet<const CFGNode *> structured_loop_merge_targets;
	void visit(CFGNode &entry);
	void visit_for_back_edge_analysis(CFGNode &entry);
	void backwards_visit();
	void backwards_visit(CFGNode &entry);
	void build_immediate_dominators();
	void build_immediate_post_dominators();
	void build_reachability();
	void visit_reachability(const CFGNode &node);
	bool query_reachability(const CFGNode &from, const CFGNode &to) const;
	bool structurize(unsigned pass);
	bool find_loops(unsigned pass);
	bool rewrite_complex_loop_exits(CFGNode *node, CFGNode *merge, Vector<CFGNode *> &dominated_exits);
	bool rewrite_transposed_loops();

	struct LoopAnalysis
	{
		Vector<CFGNode *> direct_exits;
		Vector<CFGNode *> inner_direct_exits;
		Vector<CFGNode *> dominated_exit;
		Vector<CFGNode *> inner_dominated_exit;
		Vector<CFGNode *> non_dominated_exit;
		Vector<CFGNode *> dominated_continue_exit;
	};
	LoopAnalysis analyze_loop(CFGNode *node) const;

	struct LoopMergeAnalysis
	{
		CFGNode *merge;
		CFGNode *weak_merge;
		CFGNode *dominated_merge;
		CFGNode *infinite_continue_ladder;
	};
	LoopMergeAnalysis analyze_loop_merge(CFGNode *node, const LoopAnalysis &analysis);
	void rewrite_transposed_loop_inner(CFGNode *node, CFGNode *impossible_merge_target,
	                                   const LoopMergeAnalysis &analysis);
	void rewrite_transposed_loop_outer(CFGNode *node, CFGNode *impossible_merge_target,
	                                   const LoopMergeAnalysis &analysis);

	static bool is_strictly_dominance_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c);
	bool is_reachability_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c);
	bool serialize_interleaved_merge_scopes_aggressive();
	bool serialize_interleaved_merge_scopes();
	bool serialize_interleaved_early_returns();
	static Vector<std::pair<CFGNode *, CFGNode *>> build_pdf_ranges(const Vector<CFGNode *> &candidates);
	static bool pdf_ranges_have_strict_dominance_ordering(
		const Vector<std::pair<CFGNode *, CFGNode *>> &candidates);
	void filter_serialization_candidates(Vector<CFGNode *> &candidates) const;
	void split_merge_scopes();
	bool is_rewind_candidate_split_node(const Vector<const CFGNode *> &visited_orphans, CFGNode *node,
	                                    CFGNode *candidate) const;
	bool is_trivially_no_split_node(CFGNode *node) const;
	void eliminate_degenerate_blocks();
	static bool ladder_chain_has_phi_dependencies(const CFGNode *chain, const CFGNode *incoming);
	void duplicate_impossible_merge_constructs();
	void duplicate_node(CFGNode *node);
	static bool can_duplicate_phis(const CFGNode *node);
	Operation *duplicate_op(Operation *op, UnorderedMap<spv::Id, spv::Id> &id_remap);
	void update_structured_loop_merge_targets();
	void find_selection_merges(unsigned pass);
	bool header_and_merge_block_have_entry_exit_relationship(const CFGNode *header, const CFGNode *merge) const;
	void fixup_broken_selection_merges(unsigned pass);
	bool selection_requires_structured_header(const CFGNode *node) const;

	enum class SwitchProgressMode { Done, SimpleModify, IterativeModify };
	SwitchProgressMode process_switch_blocks(unsigned pass);

	void hoist_switch_branches_to_frontier(CFGNode *node, CFGNode *merge, CFGNode *frontier);
	Operation *build_switch_case_equal_check(const CFGNode *header, CFGNode *insert_node,
	                                         const Terminator::Case &case_label);
	CFGNode *create_switch_merge_ladder(CFGNode *header, CFGNode *merge);
	CFGNode *find_natural_switch_merge_block(CFGNode *node, CFGNode *post_dominator) const;
	const CFGNode *get_innermost_loop_header_for(const CFGNode *node) const;
	const CFGNode *get_innermost_loop_header_for(const CFGNode *header, const CFGNode *node) const;
	bool loop_exit_supports_infinite_loop(const CFGNode *header, const CFGNode *loop_exit) const;

	void split_merge_blocks();
	bool split_merge_blocks(CFGNode *node);
	void split_merge_blocks_and_visit_orphan_preds(Vector<const CFGNode *> &visited, CFGNode *merge, CFGNode *node);
	void eliminate_degenerate_switch_merges();
	bool merge_candidate_is_on_breaking_path(const CFGNode *node) const;
	bool merge_candidate_is_inside_continue_construct(const CFGNode *node) const;
	bool continue_block_can_merge(CFGNode *node) const;
	static bool block_is_plain_continue(const CFGNode *node);
	static const CFGNode *scan_plain_continue_block(const CFGNode *node);

	// Create a new block. Rewrite all branches to node from blocks that are dominated by header to that block.
	// The new block then branches to node.
	CFGNode *create_ladder_block(CFGNode *header, CFGNode *node, const char *tag);

	CFGNode *get_target_break_block_for_inner_header(const CFGNode *node, size_t header_index);
	CFGNode *get_or_create_ladder_block(CFGNode *node, size_t header_index);
	CFGNode *build_enclosing_break_target_for_loop_ladder(CFGNode *&node, CFGNode *loop_ladder);
	CFGNode *build_ladder_block_for_escaping_edge_handling(CFGNode *node, CFGNode *header,
	                                                       CFGNode *loop_ladder,
	                                                       CFGNode *target_header,
	                                                       CFGNode *full_break_target,
	                                                       const UnorderedSet<const CFGNode *> &normal_preds);

	static CFGNode *find_common_post_dominator(const Vector<CFGNode *> &candidates);
	static CFGNode *find_common_post_dominator_with_ignored_break(Vector<CFGNode *> candidates,
	                                                              const CFGNode *break_node);
	CFGNode *find_break_target_for_selection_construct(CFGNode *idom, CFGNode *merge);
	bool control_flow_is_escaping(const CFGNode *node, const CFGNode *merge) const;
	bool control_flow_is_escaping_from_loop(const CFGNode *node, const CFGNode *merge) const;
	bool block_is_load_bearing(const CFGNode *node, const CFGNode *merge) const;
	static Vector<CFGNode *> isolate_structured_sorted(const CFGNode *header, const CFGNode *merge);
	static void isolate_structured(UnorderedSet<CFGNode *> &nodes, const CFGNode *header, const CFGNode *merge);

	static Vector<IncomingValue>::const_iterator find_incoming_value(const CFGNode *frontier_pred,
	                                                                 const Vector<IncomingValue> &incoming);

	void rewrite_selection_breaks(CFGNode *header, CFGNode *ladder_to);

	enum class LoopExitType
	{
		Exit,
		Merge,
		Escape,
		MergeToInfiniteLoop,
		InnerLoopExit,
		InnerLoopMerge,
		InnerLoopFalsePositive
	};
	LoopExitType get_loop_exit_type(const CFGNode &header, const CFGNode &node) const;
	CFGNode *create_helper_pred_block(CFGNode *node);
	CFGNode *create_helper_succ_block(CFGNode *node);
	void reset_traversal();
	bool rewrite_invalid_loop_breaks();
	void recompute_cfg();
	void rewrite_multiple_back_edges();
	bool rewrite_impossible_back_edges();
	void compute_dominance_frontier();
	void compute_post_dominance_frontier();
	void create_continue_block_ladders();
	static void recompute_dominance_frontier(CFGNode *node);
	static void recompute_post_dominance_frontier(CFGNode *node);
	static void merge_to_succ(CFGNode *node, unsigned index);
	void retarget_pred_from(CFGNode *new_node, CFGNode *old_succ);
	void retarget_succ_from(CFGNode *new_node, CFGNode *old_pred);

	CFGNode *get_post_dominance_frontier_with_cfg_subset_that_reaches(const CFGNode *node,
	                                                                  const CFGNode *must_reach,
	                                                                  const CFGNode *must_reach_frontier) const;
	bool exists_path_in_cfg_without_intermediate_node(const CFGNode *start_block,
	                                                  const CFGNode *end_block,
	                                                  const CFGNode *stop_block) const;

	struct PHINode
	{
		CFGNode *block;
		unsigned phi_index;
	};
	Vector<PHINode> phi_nodes;
	void insert_phi();
	void insert_phi(PHINode &node);
	void fixup_phi(PHINode &node);
	void cleanup_breaking_phi_constructs();
	bool block_is_breaking_phi_construct(const CFGNode *node) const;
	bool cleanup_breaking_return_constructs();
	void eliminate_node_link_preds_to_succ(CFGNode *node);
	void prune_dead_preds();

	void fixup_broken_value_dominance();

	UnorderedMap<uint32_t, CFGNode *> value_id_to_block;

	void log_cfg(const char *tag) const;
	void log_cfg_graphviz(const char *path) const;

	static bool can_complete_phi_insertion(const PHI &phi, const CFGNode *end_node);
	bool query_reachability_through_back_edges(const CFGNode &from, const CFGNode &to) const;
	bool query_reachability_split_loop_header(const CFGNode &from, const CFGNode &to, const CFGNode &end_node) const;
	bool phi_frontier_makes_forward_progress(const PHI &phi, const CFGNode *frontier,
	                                         const CFGNode *end_node) const;

	void traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to);
	template <typename Op>
	void traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to, const Op &op,
	                                                  const Vector<CFGNode *> &barrier);

	template <typename Op>
	void traverse_dominated_blocks_and_rewrite_branch(const CFGNode *dominator, CFGNode *candidate,
	                                                  CFGNode *from, CFGNode *to, const Op &op,
	                                                  const Vector<CFGNode *> &barrier,
	                                                  UnorderedSet<CFGNode *> &visitation_cache);

	CFGNode *transpose_code_path_through_ladder_block(CFGNode *header, CFGNode *merge, CFGNode *succ);
	void rewrite_ladder_conditional_branch_from_incoming_blocks(
		CFGNode *ladder, CFGNode *true_block, CFGNode *false_block,
		const std::function<bool (const CFGNode *)> &path_cb, const String &name);

	void propagate_branch_control_hints();
	void remove_unused_ssa();

	bool find_single_entry_exit_lock_region(
	    CFGNode *&idom, CFGNode *&pdom, const Vector<CFGNode *> &rov_blocks);
	bool execution_path_is_single_entry_and_dominates_exit(CFGNode *idom, CFGNode *pdom);

	void collect_and_dispatch_control_flow(
		CFGNode *common_idom, CFGNode *common_pdom, const Vector<CFGNode *> &constructs,
		bool collect_all_code_paths_to_pdom, bool allow_crossing_branches);

	void collect_and_dispatch_control_flow_from_anchor(
		CFGNode *anchor, const Vector<CFGNode *> &constructs);

	void sink_ssa_constructs();
	void sink_ssa_constructs_run(bool dry_run);
};
} // namespace dxil_spv


================================================
FILE: checkout_dxc.sh
================================================
#!/bin/bash

DXC_REV=a9d33d3500d37bd24c10288c76aca8e1c948d4a2

if [ -d external/DirectXShaderCompiler ]; then
	echo "Updating DirectXShaderCompiler to revision $DXC_REV."
	cd external/DirectXShaderCompiler
	git fetch origin
	git checkout $DXC_REV
	git submodule update --init
else
	echo "Cloning DirectXShaderCompiler revision $DXC_REV."
	mkdir -p external
	cd external
	git clone https://github.com/Microsoft/DirectXShaderCompiler.git
	cd DirectXShaderCompiler
	git checkout $DXC_REV
	git submodule update --init
fi


================================================
FILE: checkout_llvm.sh
================================================
#!/bin/bash

LLVM_REV=2c4ca6832fa6b306ee6a

if [ -z $PROTOCOL ]; then
	PROTOCOL=git
fi

echo "Using protocol \"$PROTOCOL\" for checking out repositories. If this is problematic, try PROTOCOL=https $0."

if [ -d external/llvm ]; then
	echo "Updating LLVM to revision $LLVM_REV."
	cd external/llvm
	git fetch origin
	git checkout $LLVM_REV
else
	echo "Cloning LLVM revision $LLVM_REV."
	mkdir -p external
	cd external
	git clone $PROTOCOL://github.com/llvm-mirror/llvm.git
	cd llvm
	git checkout $LLVM_REV
fi


================================================
FILE: copy_reference_shaders.py
================================================
#!/usr/bin/env python3

# Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
#
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import sys
import os
import argparse
import shutil
import hashlib

def hashstr(path):
    with open(path, 'rb') as f:
        bytes = f.read()
        if len(bytes) < 4:
            print('Skipping file', path, 'due to size < 4.')
            return None

        if bytes[0:4] != b'DXBC':
            print('Skipping broken file', path)
            return None

        result = hashlib.sha1(bytes).hexdigest()
    return result

def add_tags(path, noglsl):
    if not noglsl:
        return path
    else:
        return path[:-4] + 'noglsl.dxil'

def copy_reference_shader(output_dir, input_path, raw, noglsl):
    modified_input_path = add_tags(input_path, noglsl)
    if raw:
        shutil.copy(input_path, os.path.join(output_dir, os.path.basename(modified_input_path)))
    else:
        name = hashstr(input_path)
        if name is not None:
            shutil.copy(input_path, os.path.join(output_dir, name + ('.noglsl' if noglsl else '') + '.dxil'))

def main():
    parser = argparse.ArgumentParser(description = 'Script for copying VKD3D shader dumps to regression suite.')
    parser.add_argument('--dxil', help = 'Folder containing a bunch of .dxil shaders.')
    parser.add_argument('--dxbc', help = 'Folder containing a bunch of .dxbc shaders.')
    parser.add_argument('--output', required = True, help = 'Output directory.')
    parser.add_argument('--raw', help = 'Skip hashing. Files must be in format $hash.dxil', action = 'store_true')
    parser.add_argument('--noglsl', help = 'Add .noglsl. tag.', action = 'store_true')

    args = parser.parse_args()

    if args.dxil is not None:
        for root, dirs, files in os.walk(args.dxil):
            for file in files:
                ext = os.path.splitext(file)[1]
                if ext == '.dxil':
                    print('Copying DXIL reference file:', file)
                    copy_reference_shader(args.output, os.path.join(args.dxil, file), args.raw, args.noglsl)

    if args.dxbc is not None:
        for root, dirs, files in os.walk(args.dxbc):
            for file in files:
                ext = os.path.splitext(file)[1]
                if ext == '.dxbc':
                    print('Copying DXBC reference file:', file)
                    copy_reference_shader(args.output, os.path.join(args.dxbc, file), args.raw, args.noglsl)

if __name__ == '__main__':
    main()


================================================
FILE: debug/logging.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "logging.hpp"

namespace dxil_spv
{
static thread_local LoggingCallback log_callback;
static thread_local void *log_userdata;

void set_thread_log_callback(LoggingCallback callback, void *userdata)
{
	log_callback = callback;
	log_userdata = userdata;
}

LoggingCallback get_thread_log_callback()
{
	return log_callback;
}

void *get_thread_log_callback_userdata()
{
	return log_userdata;
}
}


================================================
FILE: debug/logging.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include <stdint.h>
#include <stdio.h>

#if defined(_MSC_VER)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#define LOGE_INNER(...)                                            \
	do                                                             \
	{                                                              \
		fprintf(stderr, "[ERROR]: " __VA_ARGS__);                  \
		fflush(stderr);                                            \
		char buffer[4096];                                         \
		snprintf(buffer, sizeof(buffer), "[ERROR]: " __VA_ARGS__); \
		OutputDebugStringA(buffer);                                \
	} while (false)

#define LOGW_INNER(...)                                           \
	do                                                            \
	{                                                             \
		fprintf(stderr, "[WARN]: " __VA_ARGS__);                  \
		fflush(stderr);                                           \
		char buffer[4096];                                        \
		snprintf(buffer, sizeof(buffer), "[WARN]: " __VA_ARGS__); \
		OutputDebugStringA(buffer);                               \
	} while (false)

#define LOGI_INNER(...)                                           \
	do                                                            \
	{                                                             \
		fprintf(stderr, "[INFO]: " __VA_ARGS__);                  \
		fflush(stderr);                                           \
		char buffer[4096];                                        \
		snprintf(buffer, sizeof(buffer), "[INFO]: " __VA_ARGS__); \
		OutputDebugStringA(buffer);                               \
	} while (false)
#elif defined(ANDROID)
#include <android/log.h>
#define LOGE_INNER(...) __android_log_print(ANDROID_LOG_ERROR, "dxil-spirv", __VA_ARGS__)
#define LOGW_INNER(...) __android_log_print(ANDROID_LOG_WARN, "dxil-spirv", __VA_ARGS__)
#define LOGI_INNER(...) __android_log_print(ANDROID_LOG_INFO, "dxil-spirv", __VA_ARGS__)
#else
#define LOGE_INNER(...)                           \
	do                                            \
	{                                             \
		fprintf(stderr, "[ERROR]: " __VA_ARGS__); \
		fflush(stderr);                           \
	} while (false)

#define LOGW_INNER(...)                          \
	do                                           \
	{                                            \
		fprintf(stderr, "[WARN]: " __VA_ARGS__); \
		fflush(stderr);                          \
	} while (false)

#define LOGI_INNER(...)                          \
	do                                           \
	{                                            \
		fprintf(stderr, "[INFO]: " __VA_ARGS__); \
		fflush(stderr);                          \
	} while (false)
#endif

namespace dxil_spv
{
enum class LogLevel : uint32_t
{
	Debug = 0,
	Warn = 1,
	Error = 2
};
using LoggingCallback = void (*)(void *, LogLevel, const char *);

void set_thread_log_callback(LoggingCallback callback, void *userdata);
LoggingCallback get_thread_log_callback();
void *get_thread_log_callback_userdata();
}

#define LOGI(...) do {                                                                           \
	if (auto *cb = ::dxil_spv::get_thread_log_callback())                                        \
	{                                                                                            \
		char buffer[4096];                                                                       \
		snprintf(buffer, sizeof(buffer), __VA_ARGS__);                                           \
		cb(::dxil_spv::get_thread_log_callback_userdata(), ::dxil_spv::LogLevel::Debug, buffer); \
	}                                                                                            \
	else                                                                                         \
	{                                                                                            \
        LOGI_INNER(__VA_ARGS__);                                                                 \
	}                                                                                            \
} while(0)

#define LOGW(...) do {                                                                           \
	if (auto *cb = ::dxil_spv::get_thread_log_callback())                                        \
	{                                                                                            \
		char buffer[4096];                                                                       \
		snprintf(buffer, sizeof(buffer), __VA_ARGS__);                                           \
		cb(::dxil_spv::get_thread_log_callback_userdata(), ::dxil_spv::LogLevel::Warn, buffer);  \
	}                                                                                            \
	else                                                                                         \
	{                                                                                            \
        LOGW_INNER(__VA_ARGS__);                                                                 \
	}                                                                                            \
} while(0)

#define LOGE(...) do {                                                                           \
	if (auto *cb = ::dxil_spv::get_thread_log_callback())                                        \
	{                                                                                            \
		char buffer[4096];                                                                       \
		snprintf(buffer, sizeof(buffer), __VA_ARGS__);                                           \
		cb(::dxil_spv::get_thread_log_callback_userdata(), ::dxil_spv::LogLevel::Error, buffer); \
	}                                                                                            \
	else                                                                                         \
	{                                                                                            \
        LOGE_INNER(__VA_ARGS__);                                                                 \
	}                                                                                            \
} while(0)


================================================
FILE: descriptor_qa.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "descriptor_qa.hpp"
#include "spirv_module.hpp"
#include "SpvBuilder.h"
#include "logging.hpp"

namespace dxil_spv
{
static spv::Id build_descriptor_qa_heap_buffer_type(spv::Builder &builder)
{
	Vector<spv::Id> member_types;
	// DescriptorHeapQAData {
	//  uint descriptor_count;
	//  uint heap_id;
	//  uvec3 cookies_descriptor_info[];
	// }
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id uvec3_type = builder.makeVectorType(u32_type, 3);
	spv::Id uvec3_arr_type = builder.makeRuntimeArray(uvec3_type);
	builder.addDecoration(uvec3_arr_type, spv::DecorationArrayStride, 12);

	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(uvec3_arr_type);

	spv::Id id = builder.makeStructType(member_types, "DescriptorHeapQAData");

	const auto set_info = [&](DescriptorQAHeapMembers member, int offset, const char *name) {
		builder.addMemberDecoration(id, int(member), spv::DecorationOffset, offset);
		builder.addMemberName(id, int(member), name);
	};

	set_info(DescriptorQAHeapMembers::DescriptorCount, 0, "descriptor_count");
	set_info(DescriptorQAHeapMembers::HeapIndex, 4, "heap_index");
	set_info(DescriptorQAHeapMembers::CookiesDescriptorInfo, 8, "cookies_descriptor_info");

	builder.addDecoration(id, spv::DecorationBlock);
	return id;
}

static spv::Id build_descriptor_global_buffer_type(spv::Builder &builder)
{
	Vector<spv::Id> member_types;
	// DescriptorHeapQAGlobalData {
	//  uvec2 failed_shader_hash;
	//  uint failed_offset;
	//  uint failed_heap;
	//  uint failed_cookie;
	//  uint fault_atomic;
	//  uint failed_instruction;
	//  uint failed_descriptor_type_mask;
	//  uint actual_descriptor_type_mask;
	//  uint fault_type;
	//  uint va_map_timestamp;
	//  uint live_status_table[];
	// }
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id uvec2_type = builder.makeVectorType(u32_type, 2);
	spv::Id u32_arr_type = builder.makeRuntimeArray(u32_type);
	builder.addDecoration(u32_arr_type, spv::DecorationArrayStride, 4);

	member_types.push_back(uvec2_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_type);
	member_types.push_back(u32_arr_type);

	spv::Id id = builder.makeStructType(member_types, "DescriptorHeapGlobalQAData");

	const auto set_info = [&](DescriptorQAGlobalMembers member, int offset, const char *name) {
		builder.addMemberDecoration(id, int(member), spv::DecorationOffset, offset);
		builder.addMemberName(id, int(member), name);
	};

	set_info(DescriptorQAGlobalMembers::FailedShaderHash, 0, "failed_shader_hash");
	set_info(DescriptorQAGlobalMembers::FailedOffset, 8, "failed_offset");
	set_info(DescriptorQAGlobalMembers::FailedHeap, 12, "failed_heap");
	set_info(DescriptorQAGlobalMembers::FailedCookie, 16, "failed_cookie");
	set_info(DescriptorQAGlobalMembers::FaultAtomic, 20, "fault_atomic");
	set_info(DescriptorQAGlobalMembers::FailedInstruction, 24, "failed_instruction");
	set_info(DescriptorQAGlobalMembers::FailedDescriptorTypeMask, 28, "failed_descriptor_type_mask");
	set_info(DescriptorQAGlobalMembers::ActualDescriptorTypeMask, 32, "actual_descriptor_type_mask");
	set_info(DescriptorQAGlobalMembers::FaultType, 36, "fault_type");
	set_info(DescriptorQAGlobalMembers::VAMapTimestamp, 40, "va_map_timestamp");
	set_info(DescriptorQAGlobalMembers::LiveStatusTable, 44, "live_status_table");

	builder.addDecoration(id, spv::DecorationBlock);

	return id;
}

static spv::Id build_ssbo_load(spv::Builder &builder, spv::Id value_type, spv::Id ssbo_id, uint32_t member)
{
	spv::Id ptr_id = builder.makePointer(spv::StorageClassStorageBuffer, value_type);
	auto chain = std::make_unique<spv::Instruction>(builder.getUniqueId(), ptr_id, spv::OpAccessChain);
	chain->addIdOperand(ssbo_id);
	chain->addIdOperand(builder.makeUintConstant(member));

	auto load = std::make_unique<spv::Instruction>(builder.getUniqueId(), value_type, spv::OpLoad);
	load->addIdOperand(chain->getResultId());
	spv::Id result_id = load->getResultId();

	builder.getBuildPoint()->addInstruction(std::move(chain));
	builder.getBuildPoint()->addInstruction(std::move(load));
	return result_id;
}

static void build_ssbo_store(spv::Builder &builder, spv::Id value_type, spv::Id ssbo_id, uint32_t member, spv::Id value_id)
{
	spv::Id ptr_id = builder.makePointer(spv::StorageClassStorageBuffer, value_type);
	auto chain = std::make_unique<spv::Instruction>(builder.getUniqueId(), ptr_id, spv::OpAccessChain);
	chain->addIdOperand(ssbo_id);
	chain->addIdOperand(builder.makeUintConstant(member));

	auto store = std::make_unique<spv::Instruction>(spv::OpStore);
	store->addIdOperand(chain->getResultId());
	store->addIdOperand(value_id);
	if (builder.hasCapability(spv::CapabilityVulkanMemoryModel))
		store->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask);

	builder.getBuildPoint()->addInstruction(std::move(chain));
	builder.getBuildPoint()->addInstruction(std::move(store));
}

static spv::Id build_ssbo_load_array(spv::Builder &builder, spv::Id value_type, spv::Id ssbo_id, uint32_t member,
                                     spv::Id offset)
{
	spv::Id ptr_id = builder.makePointer(spv::StorageClassStorageBuffer, value_type);
	auto chain = std::make_unique<spv::Instruction>(builder.getUniqueId(), ptr_id, spv::OpAccessChain);
	chain->addIdOperand(ssbo_id);
	chain->addIdOperand(builder.makeUintConstant(member));
	chain->addIdOperand(offset);

	auto load = std::make_unique<spv::Instruction>(builder.getUniqueId(), value_type, spv::OpLoad);
	load->addIdOperand(chain->getResultId());
	spv::Id result_id = load->getResultId();

	builder.getBuildPoint()->addInstruction(std::move(chain));
	builder.getBuildPoint()->addInstruction(std::move(load));
	return result_id;
}

static void build_cookie_descriptor_info_split(spv::Builder &builder, spv::Id composite_id,
                                               spv::Id &cookie_id,
                                               spv::Id &cookie_shifted_id,
                                               spv::Id &cookie_masked_id,
                                               spv::Id &descriptor_timestamp_id,
                                               spv::Id &descriptor_info_id)
{
	spv::Id u32_type = builder.makeUintType(32);

	auto *cookie = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	cookie->addIdOperand(composite_id);
	cookie->addImmediateOperand(0);

	auto *descriptor_timestamp = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	descriptor_timestamp->addIdOperand(composite_id);
	descriptor_timestamp->addImmediateOperand(1);

	auto *descriptor_type = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	descriptor_type->addIdOperand(composite_id);
	descriptor_type->addImmediateOperand(2);

	auto *shifted = builder.addInstruction(u32_type, spv::OpShiftRightLogical);
	shifted->addIdOperand(cookie->getResultId());
	shifted->addIdOperand(builder.makeUintConstant(5));

	auto *masked = builder.addInstruction(u32_type, spv::OpBitwiseAnd);
	masked->addIdOperand(cookie->getResultId());
	masked->addIdOperand(builder.makeUintConstant(31));

	cookie_id = cookie->getResultId();
	descriptor_timestamp_id = descriptor_timestamp->getResultId();
	descriptor_info_id = descriptor_type->getResultId();
	cookie_shifted_id = shifted->getResultId();
	cookie_masked_id = masked->getResultId();
}

static spv::Id build_live_check(spv::Builder &builder, spv::Id status_id, spv::Id bit_id)
{
	spv::Id u32_type = builder.makeUintType(32);

	auto shift_up = std::make_unique<spv::Instruction>(builder.getUniqueId(), u32_type, spv::OpShiftLeftLogical);
	shift_up->addIdOperand(builder.makeUintConstant(1));
	shift_up->addIdOperand(bit_id);

	auto mask = std::make_unique<spv::Instruction>(builder.getUniqueId(), u32_type, spv::OpBitwiseAnd);
	mask->addIdOperand(status_id);
	mask->addIdOperand(shift_up->getResultId());

	auto cond = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeBoolType(), spv::OpINotEqual);
	cond->addIdOperand(mask->getResultId());
	cond->addIdOperand(builder.makeUintConstant(0));
	spv::Id res = cond->getResultId();

	builder.getBuildPoint()->addInstruction(std::move(shift_up));
	builder.getBuildPoint()->addInstruction(std::move(mask));
	builder.getBuildPoint()->addInstruction(std::move(cond));
	return res;
}

static spv::Id build_binary_op(spv::Builder &builder, spv::Id type, spv::Op opcode, spv::Id a, spv::Id b)
{
	auto op = std::make_unique<spv::Instruction>(builder.getUniqueId(), type, opcode);
	op->addIdOperand(a);
	op->addIdOperand(b);
	spv::Id ret = op->getResultId();
	builder.getBuildPoint()->addInstruction(std::move(op));
	return ret;
}

static void build_ssbo_barrier(spv::Builder &builder)
{
	auto barrier = std::make_unique<spv::Instruction>(spv::OpMemoryBarrier);
	barrier->addIdOperand(builder.getAtomicDeviceScopeId());
	barrier->addIdOperand(builder.makeUintConstant(spv::MemorySemanticsUniformMemoryMask |
	                                               spv::MemorySemanticsAcquireReleaseMask));
	builder.getBuildPoint()->addInstruction(std::move(barrier));
}

static void build_descriptor_qa_fault_report(SPIRVModule &module, spv::Id &func_id, spv::Id &buffer_id)
{
	auto &builder = module.get_builder();
	spv::Id global_buffer_type_id = build_descriptor_global_buffer_type(builder);
	spv::Id descriptor_qa_global_buffer_id = module.create_variable(spv::StorageClassStorageBuffer,
	                                                                global_buffer_type_id, "QAGlobalData");
	buffer_id = descriptor_qa_global_buffer_id;

	builder.addDecoration(descriptor_qa_global_buffer_id, spv::DecorationDescriptorSet,
	                      module.get_descriptor_qa_info().global_desc_set);
	builder.addDecoration(descriptor_qa_global_buffer_id, spv::DecorationBinding,
	                      module.get_descriptor_qa_info().global_binding);

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;

	Vector<spv::Id> param_types(7, builder.makeUintType(32));
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(),
	                                       "descriptor_qa_report_fault", param_types, {}, &entry);

	func_id = func->getId();

	spv::Id fault_type_id = func->getParamId(0);
	spv::Id heap_offset_id = func->getParamId(1);
	spv::Id cookie_id = func->getParamId(2);
	spv::Id heap_id = func->getParamId(3);
	spv::Id descriptor_type_id = func->getParamId(4);
	spv::Id actual_descriptor_type_id = func->getParamId(5);
	spv::Id instruction_id = func->getParamId(6);
	builder.addName(fault_type_id, "fault_type");
	builder.addName(heap_offset_id, "heap_offset");
	builder.addName(cookie_id, "cookie");
	builder.addName(heap_id, "heap_index");
	builder.addName(descriptor_type_id, "descriptor_type");
	builder.addName(actual_descriptor_type_id, "actual_descriptor_type");
	builder.addName(instruction_id, "instruction");

	spv::Id u32_type = builder.makeUintType(32);
	spv::Id u32_ptr_type = builder.makePointer(spv::StorageClassStorageBuffer, u32_type);

	auto chain = std::make_unique<spv::Instruction>(builder.getUniqueId(), u32_ptr_type, spv::OpAccessChain);
	chain->addIdOperand(descriptor_qa_global_buffer_id);
	chain->addIdOperand(builder.makeUintConstant(uint32_t(DescriptorQAGlobalMembers::FaultAtomic)));

	auto increment = std::make_unique<spv::Instruction>(builder.getUniqueId(), u32_type, spv::OpAtomicIAdd);
	increment->addIdOperand(chain->getResultId());
	increment->addIdOperand(builder.getAtomicDeviceScopeId());
	increment->addIdOperand(builder.makeUintConstant(0));
	increment->addIdOperand(builder.makeUintConstant(1));

	auto check = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeBoolType(), spv::OpIEqual);
	check->addIdOperand(increment->getResultId());
	check->addIdOperand(builder.makeUintConstant(0));
	spv::Id check_id = check->getResultId();

	auto *true_block = new spv::Block(builder.getUniqueId(), *func);
	auto *false_block = new spv::Block(builder.getUniqueId(), *func);

	builder.setBuildPoint(entry);
	entry->addInstruction(std::move(chain));
	entry->addInstruction(std::move(increment));
	entry->addInstruction(std::move(check));
	builder.createSelectionMerge(false_block, 0);
	builder.createConditionalBranch(check_id, true_block, false_block);
	builder.setBuildPoint(true_block);
	{
		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FailedCookie), cookie_id);
		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FailedOffset), heap_offset_id);
		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FailedHeap), heap_id);
		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FailedDescriptorTypeMask), descriptor_type_id);
		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::ActualDescriptorTypeMask), actual_descriptor_type_id);
		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FailedInstruction), instruction_id);

		spv::Id uvec2_type = builder.makeVectorType(u32_type, 2);

		Vector<spv::Id> comps;
		comps.push_back(builder.makeUintConstant(uint32_t(module.get_descriptor_qa_info().shader_hash)));
		comps.push_back(builder.makeUintConstant(uint32_t(module.get_descriptor_qa_info().shader_hash >> 32u)));
		spv::Id hash_id = builder.makeCompositeConstant(uvec2_type, comps);
		build_ssbo_store(builder, uvec2_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FailedShaderHash), hash_id);

		// Device memory barrier here so that if host observed fault_type != 0,
		// we're certain that the other values are correct as well.
		build_ssbo_barrier(builder);

		build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id,
		                 uint32_t(DescriptorQAGlobalMembers::FaultType), fault_type_id);

		builder.createBranch(false_block);
	}
	builder.setBuildPoint(false_block);
	builder.makeReturn(false);
	builder.setBuildPoint(current_build_point);
}

spv::Id build_descriptor_qa_check_function(SPIRVModule &module)
{
	auto &builder = module.get_builder();
	spv::Id fault_func_id, global_buffer_id;
	build_descriptor_qa_fault_report(module, fault_func_id, global_buffer_id);

	spv::Id heap_buffer_type_id = build_descriptor_qa_heap_buffer_type(builder);
	spv::Id descriptor_qa_heap_buffer_id = module.create_variable(spv::StorageClassStorageBuffer,
	                                                              heap_buffer_type_id, "QAHeapData");
	builder.addDecoration(descriptor_qa_heap_buffer_id, spv::DecorationDescriptorSet,
	                      module.get_descriptor_qa_info().heap_desc_set);
	builder.addDecoration(descriptor_qa_heap_buffer_id, spv::DecorationBinding,
	                      module.get_descriptor_qa_info().heap_binding);
	builder.addDecoration(descriptor_qa_heap_buffer_id, spv::DecorationNonWritable);

	auto heap_buffer_id = descriptor_qa_heap_buffer_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;

	Vector<spv::Id> param_types(3, builder.makeUintType(32));
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeUintType(32),
	                                       "descriptor_qa_check",
	                                       param_types, {}, &entry);
	builder.setBuildPoint(entry);

	spv::Id offset_id = func->getParamId(0);
	spv::Id descriptor_type_id = func->getParamId(1);
	spv::Id instruction_id = func->getParamId(2);

	builder.addName(offset_id, "heap_offset");
	builder.addName(descriptor_type_id, "descriptor_type_mask");
	builder.addName(instruction_id, "instruction");

	spv::Id descriptor_count_id = build_ssbo_load(builder, builder.makeUintType(32), heap_buffer_id,
	                                              uint32_t(DescriptorQAHeapMembers::DescriptorCount));

	spv::Id fallback_offset_id = descriptor_count_id;

	spv::Id heap_id = build_ssbo_load(builder, builder.makeUintType(32), heap_buffer_id,
	                                  uint32_t(DescriptorQAHeapMembers::HeapIndex));
	spv::Id timestamp_id = build_ssbo_load(builder, builder.makeUintType(32), global_buffer_id,
	                                       uint32_t(DescriptorQAGlobalMembers::VAMapTimestamp));
	spv::Id cookie_descriptor_info = build_ssbo_load_array(builder, builder.makeVectorType(builder.makeUintType(32), 3),
	                                                       heap_buffer_id,
	                                                       uint32_t(DescriptorQAHeapMembers::CookiesDescriptorInfo),
	                                                       offset_id);
	spv::Id cookie_id;
	spv::Id cookie_shifted_id;
	spv::Id cookie_mask_id;
	spv::Id descriptor_timestamp_id;
	spv::Id descriptor_info_id;
	build_cookie_descriptor_info_split(builder, cookie_descriptor_info, cookie_id,
	                                   cookie_shifted_id, cookie_mask_id,
	                                   descriptor_timestamp_id, descriptor_info_id);

	spv::Id live_status_id = build_ssbo_load_array(builder, builder.makeUintType(32),
	                                               global_buffer_id,
	                                               uint32_t(DescriptorQAGlobalMembers::LiveStatusTable),
	                                               cookie_shifted_id);
	spv::Id live_status_cond_id = build_live_check(builder, live_status_id, cookie_mask_id);

	spv::Id type_cond_id = build_binary_op(builder, builder.makeUintType(32), spv::OpBitwiseAnd, descriptor_info_id, descriptor_type_id);
	type_cond_id = build_binary_op(builder, builder.makeBoolType(), spv::OpIEqual,
	                               type_cond_id, descriptor_type_id);

	spv::Id out_of_range_id = build_binary_op(builder, builder.makeBoolType(), spv::OpUGreaterThanEqual,
	                                          offset_id, descriptor_count_id);

	// First check: descriptor index is in range of heap.
	auto *range_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect);
	range_check->addIdOperand(out_of_range_id);
	range_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_INDEX_OUT_OF_RANGE_BIT));
	range_check->addIdOperand(builder.makeUintConstant(0u));

	// Second: Check if type matches.
	auto *type_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect);
	type_check->addIdOperand(type_cond_id);
	type_check->addIdOperand(builder.makeUintConstant(0u));
	type_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_INVALID_TYPE_BIT));

	// Third: Check if cookie is alive.
	auto *alive_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect);
	alive_check->addIdOperand(live_status_cond_id);
	alive_check->addIdOperand(builder.makeUintConstant(0u));
	alive_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_RESOURCE_DESTROYED_BIT));

	// Fourth: Check if the view was created before GPU submission happened.
	auto *time_check_cond = builder.addInstruction(builder.makeBoolType(), spv::OpUGreaterThanEqual);
	time_check_cond->addIdOperand(timestamp_id);
	time_check_cond->addIdOperand(descriptor_timestamp_id);

	auto *time_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect);
	time_check->addIdOperand(time_check_cond->getResultId());
	time_check->addIdOperand(builder.makeUintConstant(0u));
	time_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_VA_TIMESTAMP_INVALID_BIT));

	auto *merge_check0 = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseOr);
	auto *merge_check1 = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseOr);
	auto *merge_check2 = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseOr);
	merge_check0->addIdOperand(range_check->getResultId());
	merge_check0->addIdOperand(type_check->getResultId());
	merge_check1->addIdOperand(merge_check0->getResultId());
	merge_check1->addIdOperand(alive_check->getResultId());
	merge_check2->addIdOperand(merge_check1->getResultId());
	merge_check2->addIdOperand(time_check->getResultId());

	auto *fault_cond = builder.addInstruction(builder.makeBoolType(), spv::OpINotEqual);
	fault_cond->addIdOperand(merge_check2->getResultId());
	fault_cond->addIdOperand(builder.makeUintConstant(0u));

	spv::Id fault_type_id = merge_check2->getResultId();
	spv::Id fault_cond_id = fault_cond->getResultId();

	auto *fault_block = new spv::Block(builder.getUniqueId(), *func);
	auto *correct_block = new spv::Block(builder.getUniqueId(), *func);
	builder.createSelectionMerge(correct_block, 0);
	builder.createConditionalBranch(fault_cond_id, fault_block, correct_block);
	{
		builder.setBuildPoint(fault_block);
		auto *call = builder.addInstruction(builder.makeVoidType(), spv::OpFunctionCall);
		call->addIdOperand(fault_func_id);
		call->addIdOperand(fault_type_id);
		call->addIdOperand(offset_id);
		call->addIdOperand(cookie_id);
		call->addIdOperand(heap_id);
		call->addIdOperand(descriptor_type_id);
		call->addIdOperand(descriptor_info_id);
		call->addIdOperand(instruction_id);
		builder.makeReturn(false, fallback_offset_id);
	}
	builder.setBuildPoint(correct_block);
	builder.makeReturn(false, offset_id);

	builder.setBuildPoint(current_build_point);
	return func->getId();
}
}


================================================
FILE: descriptor_qa.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include <stdint.h>
#include "spirv.hpp"

namespace dxil_spv
{
static constexpr uint32_t Version = 2;

struct DescriptorQAInfo
{
	uint32_t version = 0;
	uint32_t global_desc_set = 0;
	uint32_t global_binding = 0;
	uint32_t heap_desc_set = 0;
	uint32_t heap_binding = 0;
	uint64_t shader_hash = 0;
};

enum class InstructionInstrumentationType
{
	FullNanInf = 0,
	// Only instrument writes to externally visible memory, etc.
	// Gets rid of potential false positives.
	ExternallyVisibleWriteNanInf = 1,
	// Flushes all NaNs to zero.
	// Useful when trying to figure out where a NaN is first generated.
	FlushNaNToZero = 2,
	// Adds assertions with OpAssumeTrueKHR. For now, these are resolved internally,
	// but we could add a more that forwards them to driver.
	ExpectAssume = 3,
	BufferSynchronizationValidation = 4
};

struct InstructionInstrumentationInfo
{
	uint32_t version = 0;
	bool enabled = false;
	bool fp16 = false;
	bool fp32 = false;
	bool fp64 = false;
	uint32_t control_desc_set = 0;
	uint32_t control_binding = 0;
	uint32_t payload_desc_set = 0;
	uint32_t payload_binding = 0;
	uint64_t shader_hash = 0;
	InstructionInstrumentationType type = {};
};

struct InstructionInstrumentationState
{
	uint32_t instruction_count = 0;
	spv::Id nan_inf_instrument_fp16_call_id = 0;
	spv::Id nan_inf_instrument_fp32_call_id = 0;
	spv::Id nan_inf_instrument_fp64_call_id = 0;
	spv::Id assume_true_call_id = 0;
	spv::Id should_report_instrumentation_id = 0;
	spv::Id global_nan_inf_control_var_id = 0;
	spv::Id global_nan_inf_data_var_id = 0;
	InstructionInstrumentationInfo info = {};
};

enum DescriptorQATypeFlagBits
{
	DESCRIPTOR_QA_TYPE_NONE_BIT = 0,
	DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT = 1 << 0,
	DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT = 1 << 1,
	DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT = 1 << 2,
	DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT = 1 << 3,
	DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT = 1 << 4,
	DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT = 1 << 5,
	DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT = 1 << 6,
	DESCRIPTOR_QA_TYPE_SAMPLER_BIT = 1 << 7,
	DESCRIPTOR_QA_TYPE_RAW_VA_BIT = 1 << 8
};
using DescriptorQATypeFlags = uint32_t;

enum class DescriptorQAGlobalMembers
{
	FailedShaderHash = 0,
	FailedOffset,
	FailedHeap,
	FailedCookie,
	FaultAtomic,
	FailedInstruction,
	FailedDescriptorTypeMask,
	ActualDescriptorTypeMask,
	FaultType,
	VAMapTimestamp,
	LiveStatusTable
};

enum DescriptorQAFaultTypeBits
{
	DESCRIPTOR_QA_FAULT_INDEX_OUT_OF_RANGE_BIT = 1 << 0,
	DESCRIPTOR_QA_FAULT_INVALID_TYPE_BIT = 1 << 1,
	DESCRIPTOR_QA_FAULT_RESOURCE_DESTROYED_BIT = 1 << 2,
	DESCRIPTOR_QA_FAULT_VA_TIMESTAMP_INVALID_BIT = 1 << 3
};

enum class DescriptorQAHeapMembers
{
	DescriptorCount = 0,
	HeapIndex,
	CookiesDescriptorInfo
};

class SPIRVModule;
spv::Id build_descriptor_qa_check_function(SPIRVModule &module);
}


================================================
FILE: dxbc_spirv_sandbox.cpp
================================================
/* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 */

#include "ir/ir.h"
#include "ir/ir_builder.h"
#include "dxil_converter.hpp"
#include "module.hpp"
#include "api/test_api.h"
#include "context.hpp"
#include "thread_local_allocator.hpp"
#include "cfg_structurizer.hpp"
#include "logging.hpp"
#include "spirv_cross_c.h"
#include "spirv-tools/libspirv.hpp"

using namespace dxil_spv;
using namespace dxbc_spv;

struct Remapper : ResourceRemappingInterface
{
	bool remap_srv(const D3DBinding &d3d_binding, VulkanSRVBinding &vulkan_binding) override
	{
		vulkan_binding = {};
		vulkan_binding.buffer_binding.descriptor_set = d3d_binding.register_space;
		vulkan_binding.buffer_binding.binding = d3d_binding.register_index;
		if (d3d_binding.kind == DXIL::ResourceKind::StructuredBuffer || d3d_binding.kind == DXIL::ResourceKind::RawBuffer)
			vulkan_binding.buffer_binding.descriptor_type = VulkanDescriptorType::SSBO;
		return true;
	}

	bool remap_sampler(const D3DBinding &d3d_binding, VulkanBinding &vulkan_binding) override
	{
		vulkan_binding = {};
		vulkan_binding.descriptor_set = d3d_binding.register_space;
		vulkan_binding.binding = d3d_binding.register_index;
		return true;
	}

	bool remap_uav(const D3DUAVBinding &d3d_binding, VulkanUAVBinding &vulkan_binding) override
	{
		vulkan_binding = {};
		vulkan_binding.buffer_binding.descriptor_set = d3d_binding.binding.register_space;
		vulkan_binding.buffer_binding.binding = d3d_binding.binding.register_index;

		if (d3d_binding.binding.kind == DXIL::ResourceKind::StructuredBuffer ||
		    d3d_binding.binding.kind == DXIL::ResourceKind::RawBuffer)
			vulkan_binding.buffer_binding.descriptor_type = VulkanDescriptorType::SSBO;

		if (d3d_binding.counter)
		{
			vulkan_binding.counter_binding.descriptor_set = d3d_binding.binding.register_space;
			vulkan_binding.counter_binding.binding = d3d_binding.binding.register_index;
			vulkan_binding.counter_binding.descriptor_type = VulkanDescriptorType::TexelBuffer;
		}

		return true;
	}

	bool remap_cbv(const D3DBinding &d3d_binding, VulkanCBVBinding &vulkan_binding) override
	{
		vulkan_binding = {};
		vulkan_binding.buffer.descriptor_set = d3d_binding.register_space;
		vulkan_binding.buffer.binding = d3d_binding.register_index;
		return true;
	}

	bool remap_vertex_input(const D3DStageIO &d3d_input, VulkanStageIO &vulkan_location) override
	{
		vulkan_location = {};
		vulkan_location.location = d3d_input.start_row;
		return true;
	}

	bool remap_stream_output(const D3DStreamOutput &, VulkanStreamOutput &vk_output) override
	{
		vk_output = {};
		return true;
	}

	bool remap_stage_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) override
	{
		return true;
	}

	bool remap_stage_output(const D3DStageIO &d3d_output, VulkanStageIO &vk_output) override
	{
		return true;
	}

	unsigned get_root_constant_word_count() override
	{
		return 0;
	}

	unsigned get_root_descriptor_count() override
	{
		return 0;
	}

	bool has_nontrivial_stage_input_remapping() override
	{
		return false;
	}
};

static std::string convert_to_asm(const void *code, size_t size)
{
	spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3);
	tools.SetMessageConsumer([](spv_message_level_t, const char *, const spv_position_t &, const char *message) {
		                         LOGE("SPIRV-Tools message: %s\n", message);
	                         });

	std::string str;
	if (!tools.Disassemble(static_cast<const uint32_t *>(code), size / sizeof(uint32_t), &str,
	                       SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES |
	                       SPV_BINARY_TO_TEXT_OPTION_INDENT |
	                       SPV_BINARY_TO_TEXT_OPTION_NESTED_INDENT))
		return "";
	else
		return str;
}

static bool validate_spirv(const void *code, size_t size)
{
	spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3);
	bool expected_failure = false;
	bool unexpected_failure = false;

	tools.SetMessageConsumer([&](spv_message_level_t, const char *, const spv_position_t &, const char *message) {
		                         if (strstr(message, "08721") || strstr(message, "08722"))
		                         {
			                         LOGW("SPIRV-Tools message expected failure: %s\n", message);
			                         expected_failure = true;
		                         }
		                         else
		                         {
			                         LOGE("SPIRV-Tools message: %s\n", message);
			                         unexpected_failure = true;
		                         }
	                         });

	spvtools::ValidatorOptions opts;
	opts.SetScalarBlockLayout(true);
	return tools.Validate(static_cast<const uint32_t *>(code), size / sizeof(uint32_t), opts) ||
	       (expected_failure && !unexpected_failure);
}

static std::string convert_to_glsl(const void *code, size_t size)
{
	std::string ret;
	spvc_context context;
	if (spvc_context_create(&context) != SPVC_SUCCESS)
		return ret;

	spvc_parsed_ir ir;
	if (spvc_context_parse_spirv(context, static_cast<const SpvId *>(code), size / sizeof(uint32_t), &ir) != SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler compiler;
	if (spvc_context_create_compiler(context, SPVC_BACKEND_GLSL, ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler) != SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler_options opts;
	if (spvc_compiler_create_compiler_options(compiler, &opts) != SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE);
	spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_GLSL_VERSION, 460);
	spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS, SPVC_TRUE);
	spvc_compiler_install_compiler_options(compiler, opts);

	const char *source;
	if (spvc_compiler_compile(compiler, &source) != SPVC_SUCCESS)
		goto cleanup;

	ret = source;

cleanup:
	spvc_context_destroy(context);
	return ret;
}

static Vector<uint32_t> run_test(const char *name, ir::Builder &builder)
{
	LOGI("Testing %s ...\n", name);

	LLVMBCParser parser;
	if (!parser.parseDXBC(builder))
	{
		LOGE("Failed to parse.\n");
		return {};
	}

	SPIRVModule module;
	Converter converter(parser, nullptr, module);
	Remapper remapper;

	OptionSSBOAlignment align;
	align.alignment = 1;
	converter.add_option(align);

	OptionShaderDemoteToHelper demote;
	demote.supported = true;
	converter.add_option(demote);

#if 1
	OptionMinPrecisionNative16Bit native_16bit;
	native_16bit.enabled = true;
	converter.add_option(native_16bit);
#endif

	converter.set_resource_remapping_interface(&remapper);
	auto entry = converter.convert_entry_point();

	if (!entry.entry.entry)
	{
		LOGE("Failed to convert function.\n");
		return {};
	}

	{
		CFGStructurizer structurizer(entry.entry.entry, *entry.node_pool, module);
		if (entry.entry.is_structured)
			structurizer.run_trivial();
		else
			structurizer.run();
		module.emit_entry_point_function_body(structurizer);
	}

	for (auto &leaf : entry.leaf_functions)
	{
		if (!leaf.entry)
		{
			LOGE("Leaf function is nullptr!\n");
			return {};
		}
		CFGStructurizer structurizer(leaf.entry, *entry.node_pool, module);
		module.set_entry_build_point(leaf.func);

		if (leaf.is_structured)
			structurizer.run_trivial();
		else
			structurizer.run();

		module.emit_leaf_function_body(leaf.func, structurizer);
	}

	Vector<uint32_t> spirv;
	if (!module.finalize_spirv(spirv))
	{
		LOGE("Failed to finalize SPIR-V.\n");
		return {};
	}

#if 1
	if (!validate_spirv(spirv.data(), spirv.size() * sizeof(uint32_t)))
	{
		LOGE("Failed to validate SPIR-V.\n");
		return {};
	}
#endif

	return spirv;
}

int main(int argc, char **argv)
{
	auto tests = test_api::enumerateTests(nullptr);

	for (auto &test : tests)
	{
#if 0
		if (test.name != "test_arithmetic_fp32_special")
			continue;
#endif

		begin_thread_allocator_context();
		{
			auto spirv = run_test(test.name.c_str(), test.builder);

			if (spirv.empty())
			{
				LOGE("Failure to convert test to SPIR-V!\n");
				return EXIT_FAILURE;
			}

			auto disasm = convert_to_asm(spirv.data(), spirv.size() * sizeof(uint32_t));
			auto glsl = convert_to_glsl(spirv.data(), spirv.size() * sizeof(uint32_t));

			FILE *file_asm = nullptr;
			FILE *file_glsl = nullptr;

			if (argc == 2)
			{
				std::string path = argv[1];
				path += '/';
				path += test.name;

				auto path_asm = path + ".asm";
				auto path_glsl = path + ".glsl";

				file_asm = fopen(path_asm.c_str(), "w");
				file_glsl = fopen(path_glsl.c_str(), "w");
				if (!file_asm || !file_glsl)
				{
					LOGE("Failed to open file \"%s\" and \"%s\"\n",
					     path_asm.c_str(), path_glsl.c_str());
					return EXIT_FAILURE;
				}
			}

			if (file_asm && file_glsl)
			{
				fprintf(file_asm, "SPIR-V:\n%s\n", disasm.c_str());
				fprintf(file_glsl, "GLSL:\n%s\n", glsl.c_str());
				fclose(file_asm);
				fclose(file_glsl);
			}
			else
			{
				LOGI("SPIR-V:\n%s\n", disasm.c_str());
				LOGI("GLSL:\n%s\n", glsl.c_str());
			}
		}
		end_thread_allocator_context();
	}
}

================================================
FILE: dxil-disasm.py
================================================
#!/usr/bin/env python3

# Copyright (c) 2019-2023 Hans-Kristian Arntzen for Valve Corporation
#
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import sys
import os
import os.path
import subprocess
import argparse
import tempfile
import re
import multiprocessing

def disasm_shader_regex(input_file, args, regex):
    f, path = tempfile.mkstemp(suffix = 'dxil')
    f2, path2 = tempfile.mkstemp(suffix = 'dxil2')
    os.close(f)
    os.close(f2)
    result = None

    try:
        dxil_extract_cmd = [args.dxil_extract, input_file, '--output']
        p = subprocess.Popen(dxil_extract_cmd + [path, '--verbose'], stdout = subprocess.PIPE)
        subprocess.check_call(dxil_extract_cmd + [path2, '--reflection'], stdout = subprocess.DEVNULL)
        llvm_dis_cmd = [args.llvm_dis, '-o', '/dev/stdout']
        main_pipe = subprocess.Popen(llvm_dis_cmd + [path], stdout = subprocess.PIPE)
        refl_pipe = subprocess.Popen(llvm_dis_cmd + [path2], stdout = subprocess.PIPE)
        lines_main = main_pipe.communicate()[0].decode()
        lines_refl = refl_pipe.communicate()[0].decode()

        if args.isolate:
            allow = re.search(regex, lines_main + lines_refl)
        else:
            allow = True

        if allow:
            result = p.communicate()[0].decode()
            result += '  DXIL:\n'
            for line in lines_main.splitlines():
                if re.search(regex, line):
                    result += '    ' + line + '\n'
            result += '  STAT:\n'
            for line in lines_refl.splitlines():
                if re.search(regex, line):
                    result += '    ' + line + '\n'
    except:
        pass

    os.remove(path)
    os.remove(path2)
    return result

def disasm_shader_plain(input_file, args, regex):
    f, path = tempfile.mkstemp(suffix = 'dxil')
    result = ''
    try:
        dxil_extract_cmd = [args.dxil_extract, '--verbose', input_file, '--output', path]
        if args.reflect:
            dxil_extract_cmd.append('--reflection')
        p = subprocess.Popen(dxil_extract_cmd, stdout = subprocess.PIPE)
        result += p.communicate()[0].decode()
        llvm_dis_cmd = [args.llvm_dis, '-o', '/dev/stdout', path]
        p = subprocess.Popen(llvm_dis_cmd, stdout = subprocess.PIPE)
        result += p.communicate()[0].decode()
    except:
        pass

    os.remove(path)
    return result

def main():
    parser = argparse.ArgumentParser(description = 'Script for disassembling DXIL.')
    parser.add_argument('input',
            help = 'File or folder containing shader files to convert.')
    parser.add_argument('--output',
            help = 'Path where LLVM asm is output.',
            default = '/dev/stdout')
    parser.add_argument('--dxil-extract',
            help = 'Path to dxil-extract',
            default = 'dxil-extract')
    parser.add_argument('--llvm-dis',
            help = 'Path to llvm-dis',
            default = 'llvm-dis')
    parser.add_argument('--reflect', action = 'store_true',
            help = 'Use reflection section')
    parser.add_argument('--isolate', action = 'store_true',
            help = 'Isolate regex output to hits only')
    parser.add_argument('--symbol-regex', type = str,
            help = 'Grep disassemblies for a symbol')

    args = parser.parse_args()
    if not args.input:
        sys.stderr.write('Need input shader.\n')
        sys.exit(1)
    if not args.dxil_extract:
        sys.stderr.write('Need dxil-extract path.\n')
        sys.exit(1)

    if args.symbol_regex:
        regex = re.compile(args.symbol_regex)
    else:
        regex = None

    if os.path.isfile(args.input):
        files = [args.input]
    else:
        files = []
        for file in os.scandir(args.input):
            if os.path.splitext(file.name)[1] == '.dxil':
                files.append(os.path.join(args.input, file.name))

    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    results = []
    counter = 0

    with open(args.output, 'w') as f:
        for input_file in files:
            results.append(pool.apply_async(disasm_shader_regex if regex else disasm_shader_plain,
                                            args = (input_file, args, regex)))

        for res in results:
            lines = res.get()
            counter += 1
            print('Progress {} / {}'.format(counter, len(files)))
            if lines is not None:
                f.writelines(lines)
                f.writelines('\n\n')

if __name__ == '__main__':
    main()


================================================
FILE: dxil.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include <stddef.h>
#include <stdint.h>

namespace DXIL
{
constexpr size_t ContainerHashSize = 16;

struct ContainerHeader
{
	uint32_t header_fourcc;
	uint8_t digest[ContainerHashSize];
	uint16_t major_version;
	uint16_t minor_version;
	uint32_t container_size_in_bytes;
	uint32_t part_count;
};

struct PartHeader
{
	uint32_t part_fourcc;
	uint32_t part_size;
};

struct ProgramHeader
{
	uint32_t program_version;
	uint32_t size_in_uint32;
	uint32_t dxil_magic;
	uint32_t dxil_version;
	uint32_t bitcode_offset;
	uint32_t bitcode_size;
};

struct IOElement
{
	dxil_spv::String semantic_name;
	uint32_t stream_index;
	uint32_t semantic_index;
	uint32_t system_value_semantic;
	uint32_t component_type;
	uint32_t register_index;
	uint32_t mask;
	uint32_t min_precision;
};

constexpr uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
{
	return a | (b << 8) | (c << 16) | (d << 24);
}

enum class FourCC : uint32_t
{
	Container = fourcc('D', 'X', 'B', 'C'),
	ResourceDef = fourcc('R', 'D', 'E', 'F'),
	InputSignature = fourcc('I', 'S', 'G', '1'),
	OutputSignature = fourcc('O', 'S', 'G', '1'),
	PatchConstantSignature = fourcc('P', 'S', 'G', '1'),
	ShaderStatistics = fourcc('S', 'T', 'A', 'T'),
	ShaderDebugInfoDXIL = fourcc('I', 'L', 'D', 'B'),
	ShaderDebugName = fourcc('I', 'L', 'D', 'N'),
	FeatureInfo = fourcc('S', 'F', 'I', '0'),
	PrivateData = fourcc('P', 'R', 'I', 'V'),
	RootSignature = fourcc('R', 'T', 'S', '0'),
	DXIL = fourcc('D', 'X', 'I', 'L'),
	SHDR = fourcc('S', 'H', 'D', 'R'),
	SHEX = fourcc('S', 'H', 'E', 'X'),
	PipelineStateValidation = fourcc('P', 'S', 'V', '0'),
	RuntimeData = fourcc('R', 'D', 'A', 'T'),
	ShaderHash = fourcc('H', 'A', 'S', 'H')
};

enum class RuntimeDataPartType : uint32_t
{
	Invalid = 0,
	StringBuffer = 1,
	IndexArrays = 2,
	ResourceTable = 3,
	FunctionTable = 4,
	RawBytes = 5,
	SubobjectTable = 6
};

enum class SubobjectKind : uint32_t
{
	StateObjectConfig = 0,
	GlobalRootSignature = 1,
	LocalRootSignature = 2,
	SubobjectToExportsAssociation = 8,
	RaytracingShaderConfig = 9,
	RaytracingPipelineConfig = 10,
	HitGroup = 11,
	RaytracingPipelineConfig1 = 12
};

enum class HitGroupType : uint32_t
{
	Triangle = 0,
	Procedural = 1
};

enum class ComponentType : uint8_t
{
	Invalid = 0,
	I1,
	I16,
	U16,
	I32,
	U32,
	I64,
	U64,
	F16,
	F32,
	F64,
	SNormF16,
	UNormF16,
	SNormF32,
	UNormF32,
	SNormF64,
	UNormF64,
	InternalU8 = 0xff // Doesn't exist, but dummy value to signal 8-bit SSBO
};

enum class InterpolationMode : uint8_t
{
	Undefined,
	Constant,
	Linear,
	LinearCentroid,
	LinearNoperspective,
	LinearNoperspectiveCentroid,
	LinearSample,
	LinearNoperspectiveSample,
	Invalid
};

enum class Semantic : uint8_t
{
	User,
	VertexID,
	InstanceID,
	Position,
	RenderTargetArrayIndex,
	ViewPortArrayIndex,
	ClipDistance,
	CullDistance,
	OutputControlPointID,
	DomainLocation,
	PrimitiveID,
	GSInstanceID,
	SampleIndex,
	IsFrontFace,
	Coverage,
	InnerCoverage,
	Target,
	Depth,
	DepthLessEqual,
	DepthGreaterEqual,
	StencilRef,
	DispatchThreadID,
	GroupID,
	GroupIndex,
	GroupThreadID,
	TessFactor,
	InsideTessFactor,
	ViewID,
	Barycentrics,
	ShadingRate,
	CullPrimitive,

	// Fake semantics to disambiguate semantics based on interpolation flags.
	InternalBarycentricsNoPerspective = 255
};

enum class ResourceType : uint8_t
{
	SRV = 0,
	UAV = 1,
	CBV = 2,
	Sampler = 3
};

enum class ResourceKind : uint8_t
{
	Invalid = 0,
	Texture1D,
	Texture2D,
	Texture2DMS,
	Texture3D,
	TextureCube,
	Texture1DArray,
	Texture2DArray,
	Texture2DMSArray,
	TextureCubeArray,
	TypedBuffer,
	RawBuffer,
	StructuredBuffer,
	CBuffer,
	Sampler,
	TBuffer,
	RTAccelerationStructure,
	FeedbackTexture2D,
	FeedbackTexture2DArray
};

enum class Op : unsigned
{
	// Input output
	TempRegLoad = 0,
	TempRegStore = 1,
	MinPrecXRegLoad = 2,
	MinPrecXRegStore = 3,
	LoadInput = 4,
	StoreOutput = 5,
	FAbs = 6,
	Saturate = 7,
	IsNan = 8,
	IsInf = 9,
	IsFinite = 10,
	IsNormal = 11,
	Cos = 12,
	Sin = 13,
	Tan = 14,
	Acos = 15,
	Asin = 16,
	Atan = 17,
	Hcos = 18,
	Hsin = 19,
	Htan = 20,
	Exp = 21,
	Frc = 22,
	Log = 23,
	Sqrt = 24,
	Rsqrt = 25,
	Round_ne = 26,
	Round_ni = 27,
	Round_pi = 28,
	Round_z = 29,
	Bfrev = 30,
	Countbits = 31,
	FirstbitLo = 32,
	FirstbitHi = 33,
	FirstbitSHi = 34,
	FMax = 35,
	FMin = 36,
	IMax = 37,
	IMin = 38,
	UMax = 39,
	UMin = 40,
	IMul = 41,
	UMul = 42,
	UDiv = 43,
	UAddc = 44,
	USubb = 45,
	FMad = 46,
	Fma = 47,
	IMad = 48,
	UMad = 49,
	Msad = 50,
	Ibfe = 51,
	Ubfe = 52,
	Bfi = 53,
	Dot2 = 54,
	Dot3 = 55,
	Dot4 = 56,
	CreateHandle = 57,
	CBufferLoad = 58,
	CBufferLoadLegacy = 59,
	Sample = 60,
	SampleBias = 61,
	SampleLevel = 62,
	SampleGrad = 63,
	SampleCmp = 64,
	SampleCmpLevelZero = 65,
	TextureLoad = 66,
	TextureStore = 67,
	BufferLoad = 68,
	BufferStore = 69,
	BufferUpdateCounter = 70,
	CheckAccessFullyMapped = 71,
	GetDimensions = 72,
	TextureGather = 73,
	TextureGatherCmp = 74,
	Texture2DMSGetSamplePosition = 75,
	RenderTargetGetSamplePosition = 76,
	RenderTargetGetSampleCount = 77,
	AtomicBinOp = 78,
	AtomicCompareExchange = 79,
	Barrier = 80,
	CalculateLOD = 81,
	Discard = 82,
	DerivCoarseX = 83,
	DerivCoarseY = 84,
	DerivFineX = 85,
	DerivFineY = 86,
	EvalSnapped = 87,
	EvalSampleIndex = 88,
	EvalCentroid = 89,
	SampleIndex = 90,
	Coverage = 91,
	InnerCoverage = 92,
	ThreadId = 93,
	GroupId = 94,
	ThreadIdInGroup = 95,
	FlattenedThreadIdInGroup = 96,
	EmitStream = 97,
	CutStream = 98,
	EmitThenCutStream = 99,
	GSInstanceID = 100,
	MakeDouble = 101,
	SplitDouble = 102,
	LoadOutputControlPoint = 103,
	LoadPatchConstant = 104,
	DomainLocation = 105,
	StorePatchConstant = 106,
	OutputControlPointID = 107,
	PrimitiveID = 108,
	CycleCounterLegacy = 109,
	WaveIsFirstLane = 110,
	WaveGetLaneIndex = 111,
	WaveGetLaneCount = 112,
	WaveAnyTrue = 113,
	WaveAllTrue = 114,
	WaveActiveAllEqual = 115,
	WaveActiveBallot = 116,
	WaveReadLaneAt = 117,
	WaveReadLaneFirst = 118,
	WaveActiveOp = 119,
	WaveActiveBit = 120,
	WavePrefixOp = 121,
	QuadReadLaneAt = 122,
	QuadOp = 123,
	BitcastI16toF16 = 124,
	BitcastF16toI16 = 125,
	BitcastI32toF32 = 126,
	BitcastF32toI32 = 127,
	BitcastI64toF64 = 128,
	BitcastF64toI64 = 129,
	LegacyF32ToF16 = 130,
	LegacyF16ToF32 = 131,
	LegacyDoubleToFloat = 132,
	LegacyDoubleToSInt32 = 133,
	LegacyDoubleToUInt32 = 134,
	WaveAllBitCount = 135,
	WavePrefixBitCount = 136,
	AttributeAtVertex = 137,
	ViewID = 138,
	RawBufferLoad = 139,
	RawBufferStore = 140,
	InstanceID = 141,
	InstanceIndex = 142,
	HitKind = 143,
	RayFlags = 144,
	DispatchRaysIndex = 145,
	DispatchRaysDimensions = 146,
	WorldRayOrigin = 147,
	WorldRayDirection = 148,
	ObjectRayOrigin = 149,
	ObjectRayDirection = 150,
	ObjectToWorld = 151,
	WorldToObject = 152,
	RayTMin = 153,
	RayTCurrent = 154,
	IgnoreHit = 155,
	AcceptHitAndEndSearch = 156,
	TraceRay = 157,
	ReportHit = 158,
	CallShader = 159,
	CreateHandleForLib = 160,
	PrimitiveIndex = 161,
	Dot2AddHalf = 162,
	Dot4AddI8Packed = 163,
	Dot4AddU8Packed = 164,
	WaveMatch = 165,
	WaveMultiPrefixOp = 166,
	WaveMultiPrefixBitCount = 167,
	SetMeshOutputCounts = 168,
	EmitIndices = 169,
	GetMeshPayload = 170,
	StoreVertexOutput = 171,
	StorePrimitiveOutput = 172,
	DispatchMesh = 173,
	WriteSamplerFeedback = 174,
	WriteSamplerFeedbackBias = 175,
	WriteSamplerFeedbackLevel = 176,
	WriteSamplerFeedbackGrad = 177,
	AllocateRayQuery = 178,
	RayQuery_TraceRayInline = 179,
	RayQuery_Proceed = 180,
	RayQuery_Abort = 181,
	RayQuery_CommitNonOpaqueTriangleHit = 182,
	RayQuery_CommitProceduralPrimitiveHit = 183,
	RayQuery_CommittedStatus = 184,
	RayQuery_CandidateType = 185,
	RayQuery_CandidateObjectToWorld3x4 = 186,
	RayQuery_CandidateWorldToObject3x4 = 187,
	RayQuery_CommittedObjectToWorld3x4 = 188,
	RayQuery_CommittedWorldToObject3x4 = 189,
	RayQuery_CandidateProceduralPrimitiveNonOpaque = 190,
	RayQuery_CandidateTriangleFrontFace = 191,
	RayQuery_CommittedTriangleFrontFace = 192,
	RayQuery_CandidateTriangleBarycentrics = 193,
	RayQuery_CommittedTriangleBarycentrics = 194,
	RayQuery_RayFlags = 195,
	RayQuery_WorldRayOrigin = 196,
	RayQuery_WorldRayDirection = 197,
	RayQuery_RayTMin = 198,
	RayQuery_CandidateTriangleRayT = 199,
	RayQuery_CommittedRayT = 200,
	RayQuery_CandidateInstanceIndex = 201,
	RayQuery_CandidateInstanceID = 202,
	RayQuery_CandidateGeometryIndex = 203,
	RayQuery_CandidatePrimitiveIndex = 204,
	RayQuery_CandidateObjectRayOrigin = 205,
	RayQuery_CandidateObjectRayDirection = 206,
	RayQuery_CommittedInstanceIndex = 207,
	RayQuery_CommittedInstanceID = 208,
	RayQuery_CommittedGeometryIndex = 209,
	RayQuery_CommittedPrimitiveIndex = 210,
	RayQuery_CommittedObjectRayOrigin = 211,
	RayQuery_CommittedObjectRayDirection = 212,
	RayQuery_GeometryIndex = 213,
	RayQuery_CandidateInstanceContributionToHitGroupIndex = 214,
	RayQuery_CommittedInstanceContributionToHitGroupIndex = 215,
	AnnotateHandle = 216,
	CreateHandleFromBinding = 217,
	CreateHandleFromHeap = 218,
	Unpack4x8 = 219,
	Pack4x8 = 220,
	IsHelperLane = 221,
	QuadVote = 222,
	TextureGatherRaw = 223,
	SampleCmpLevel = 224,
	TextureStoreSample = 225,
	WaveMatrix_Annotate = 226,
	WaveMatrix_Depth = 227,
	WaveMatrix_Fill = 228,
	WaveMatrix_LoadRawBuf = 229,
	WaveMatrix_LoadGroupShared = 230,
	WaveMatrix_StoreRawBuf = 231,
	WaveMatrix_StoreGroupShared = 232,
	WaveMatrix_Multiply = 233,
	WaveMatrix_MultiplyAccumulate = 234,
	WaveMatrix_ScalarOp = 235,
	WaveMatrix_SumAccumulate = 236,
	WaveMatrix_Add = 237,
	AllocateNodeOutputRecords = 238,
	GetNodeRecordPtr = 239,
	IncrementOutputCount = 240,
	OutputComplete = 241,
	GetInputRecordCount = 242,
	FinishedCrossGroupSharing = 243,
	BarrierByMemoryType = 244,
	BarrierByMemoryHandle = 245,
	BarrierByNodeRecordHandle = 246,
	CreateNodeOutputHandle = 247,
	IndexNodeHandle = 248,
	AnnotateNodeHandle = 249,
	CreateNodeInputRecordHandle = 250,
	AnnotateNodeRecordHandle = 251,
	NodeOutputIsValid = 252,
	GetRemainingRecursionLevels = 253,
	SampleCmpGrad = 254,
	SampleCmpBias = 255,
	StartVertexLocation = 256,
	StartInstanceLocation = 257,

	// Internal extensions for where there is impedance mismatch
	ExtendedDeriv,
	ExtendedCalculateLOD,
	ExtendedGetDimensions,
	ExtendedFClamp,
	ExtendedIClamp,
	ExtendedUClamp,
	ExtendedLegacyF32ToF16,
	ExtendedLegacyF16ToF32,
	ExtendedIAbs,
	ExtendedEvalSnapped,
	ExtendedSpirvIbfe,
	ExtendedSpirvUbfe,
	ExtendedSpirvBfi,
	ExtendedSpirvFindLSB,
	ExtendedSpirvIFindMSB,
	ExtendedSpirvUFindMSB,
	ExtendedSpirvIAddCarry,
	ExtendedSpirvISubBorrow,
	ExtendedSpirvSMulExtended,
	ExtendedSpirvUMulExtended,
	ExtendedSpirvLoadInput,
	ExtendedSpirvControlPointCountIn,
	ExtendedPow,

	Count
};

enum class AtomicBinOp : uint8_t
{
	IAdd = 0,
	And = 1,
	Or = 2,
	Xor = 3,
	IMin = 4,
	IMax = 5,
	UMin = 6,
	UMax = 7,
	Exchange = 8,

	// Internal extensions for custom IR
	// Load = optimized or/add of 0
	// Store = optimized exchange
	Sub = 200,
	Load,
	Store,
	Invalid = 255
};

enum class ShaderPropertyTag : uint8_t
{
	ShaderFlags = 0,
	GSState = 1,
	DSState = 2,
	HSState = 3,
	NumThreads = 4,
	AutoBindingSpace = 5,
	RayPayloadSize = 6,
	RayAttribSizeTag = 7,
	ShaderKind = 8,
	MSState = 9,
	ASState = 10,
	WaveSize = 11,
	NodeLaunchType = 13,
	NodeIsProgramEntry = 14,
	NodeID = 15,
	NodeLocalRootArgumentsTableIndex = 16,
	NodeShareInputOf = 17,
	NodeDispatchGrid = 18,
	NodeMaxRecursionDepth = 19,
	NodeInputs = 20,
	NodeOutputs = 21,
	NodeMaxDispatchGrid = 22,
	RangedWaveSize = 23
};

enum class GSStageOutTags : uint32_t
{
	Stream = 0,
	Invalid
};

enum BarrierModeBits : uint8_t
{
	SyncThreadGroup = 1 << 0,
	AccessUAVGlobal = 1 << 1,
	AccessUAVThreadGroup = 1 << 2,
	AccessGroupShared = 1 << 3
};

enum class AddressSpace : uint8_t
{
	Thread = 0,
	GroupShared = 3,
	PhysicalNodeIO = 6,
	Invalid
};

enum class InputPrimitive : uint8_t
{
	Undefined = 0,
	Point = 1,
	Line = 2,
	Triangle = 3,
	LineWithAdjacency = 6,
	TriangleWithAdjaceny = 7
};

enum class PrimitiveTopology : uint8_t
{
	Undefined = 0,
	PointList = 1,
	LineList = 2,
	LineStrip = 3,
	TriangleList = 4,
	TriangleStrip = 5
};

enum class TessellatorDomain : uint8_t
{
	Undefined = 0,
	IsoLine = 1,
	Tri = 2,
	Quad = 3
};

enum class TessellatorOutputPrimitive : uint8_t
{
	Undefined = 0,
	Point = 1,
	Line = 2,
	TriangleCW = 3,
	TriangleCCW = 4
};

enum class TessellatorPartitioning : uint8_t
{
	Undefined = 0,
	Integer = 1,
	Pow2 = 2,
	FractionalOdd = 3,
	FractionalEven = 4
};

enum class MeshOutputTopology
{
	Undefined = 0,
	Line = 1,
	Triangle = 2
};

enum ShaderFlag
{
	ShaderFlagEarlyDepthStencil = 1 << 3,
	ShaderFlagNativeLowPrecision = 1 << 23
};

enum class WaveOpKind : uint8_t
{
	Sum = 0,
	Product = 1,
	Min = 2,
	Max = 3
};

enum class WaveBitOpKind : uint8_t
{
	And = 0,
	Or = 1,
	Xor = 2
};

enum class WaveMultiPrefixOpKind : uint8_t
{
	Sum = 0,
	And = 1,
	Or = 2,
	Xor = 3,
	Product = 4
};

enum class SignedOpKind : uint8_t
{
	Signed = 0,
	Unsigned = 1
};

enum class ShaderKind
{
	Pixel = 0,
	Vertex,
	Geometry,
	Hull,
	Domain,
	Compute,
	Library,
	RayGeneration,
	Intersection,
	AnyHit,
	ClosestHit,
	Miss,
	Callable,
	Mesh,
	Amplification,
	Node,
	Invalid
};

enum class NodeLaunchType
{
	Invalid = 0,
	Broadcasting = 1,
	Coalescing = 2,
	Thread = 3
};

enum class NodeMetadataTag
{
	NodeOutputID = 0,
	NodeIOFlags = 1,
	NodeRecordType = 2,
	NodeMaxRecords = 3,
	NodeMaxRecordsSharedWith = 4,
	NodeOutputArraySize = 5,
	NodeAllowSparseNodes = 6
};

enum NodeIOFlagBits
{
	NodeIOInputBit = 0x1,
	NodeIOOutputBit = 0x2,
	NodeIOReadWriteBit = 0x4,
	NodeIOEmptyRecordBit = 0x8,
	NodeIONodeArrayBit = 0x10,
	NodeIOThreadRecordBit = 0x20,
	NodeIOGroupRecordBit = 0x40,
	NodeIODispatchRecordBit = 0x60,
	RecordGranularityMask = 0x60,
	NodeIOKindMask = 0x7f,
	NodeIOTrackRWInputSharingBit = 0x100,
	NodeIOGloballyCoherentBit = 0x200,
	NodeFlagsMask = 0x100,
	RecordFlagsMask = 0x200
};

enum class NodeIOKind
{
	Invalid = 0,
	EmptyInput = NodeIOEmptyRecordBit | NodeIOInputBit,
	NodeOutput = NodeIOReadWriteBit | NodeIOOutputBit,
	NodeOutputArray = NodeIOReadWriteBit | NodeIOOutputBit | NodeIONodeArrayBit,
	EmptyOutput = NodeIOEmptyRecordBit | NodeIOOutputBit,
	EmptyOutputArray = NodeIOEmptyRecordBit | NodeIOOutputBit | NodeIONodeArrayBit,

	// Records:
	DispatchNodeInputRecord = NodeIOInputBit | NodeIODispatchRecordBit,
	GroupNodeInputRecords   = NodeIOInputBit | NodeIOGroupRecordBit,
	ThreadNodeInputRecord   = NodeIOInputBit | NodeIOThreadRecordBit,

	RWDispatchNodeInputRecord = NodeIOReadWriteBit | NodeIOInputBit | NodeIODispatchRecordBit,
	RWGroupNodeInputRecords   = NodeIOReadWriteBit | NodeIOInputBit | NodeIOGroupRecordBit,
	RWThreadNodeInputRecord   = NodeIOReadWriteBit | NodeIOInputBit | NodeIOThreadRecordBit,

	GroupNodeOutputRecords  = NodeIOReadWriteBit | NodeIOOutputBit | NodeIOGroupRecordBit,
	ThreadNodeOutputRecords = NodeIOReadWriteBit | NodeIOOutputBit | NodeIOThreadRecordBit
};

enum MemoryTypeFlagBits
{
	MemoryTypeUavBit = 0x1,
	MemoryTypeGroupSharedBit = 0x2,
	MemoryTypeNodeInputBit = 0x4,
	MemoryTypeNodeOutputBit = 0x8,
	MemoryTypeAllBits = 0xf
};

enum BarrierSemanticsFlagBits
{
	GroupSyncBit = 0x1,
	GroupScopeBit = 0x2,
	DeviceScopeBit = 0x4
};
} // namespace DXIL


================================================
FILE: dxil_converter.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "opcodes/converter_impl.hpp"
#include "opcodes/opcodes_dxil_builtins.hpp"
#include "opcodes/opcodes_llvm_builtins.hpp"
#include "opcodes/dxil/dxil_common.hpp"
#include "opcodes/dxil/dxil_workgraph.hpp"
#include "opcodes/dxil/dxil_geometry.hpp"

#include "dxil_converter.hpp"
#include "logging.hpp"
#include "node.hpp"
#include "node_pool.hpp"
#include "spirv_module.hpp"

#include <utility>
#include <algorithm>

namespace dxil_spv
{
Converter::Converter(LLVMBCParser &bitcode_parser_, LLVMBCParser *bitcode_reflection_parser_, SPIRVModule &module_)
{
	impl = std::make_unique<Impl>(bitcode_parser_, bitcode_reflection_parser_, module_);
}

Converter::~Converter()
{
}

void Converter::add_local_root_constants(uint32_t register_space, uint32_t register_index, uint32_t num_words)
{
	LocalRootSignatureEntry entry = {};
	entry.type = LocalRootSignatureType::Constants;
	entry.constants.num_words = num_words;
	entry.constants.register_space = register_space;
	entry.constants.register_index = register_index;
	impl->local_root_signature.push_back(entry);
}

void Converter::add_local_root_descriptor(ResourceClass type, uint32_t register_space, uint32_t register_index)
{
	LocalRootSignatureEntry entry = {};
	entry.type = LocalRootSignatureType::Descriptor;
	entry.descriptor.type = type;
	entry.descriptor.register_space = register_space;
	entry.descriptor.register_index = register_index;
	impl->local_root_signature.push_back(entry);
}

void Converter::add_local_root_descriptor_table(Vector<DescriptorTableEntry> entries)
{
	LocalRootSignatureEntry entry = {};
	entry.type = LocalRootSignatureType::Table;
	entry.table_entries = std::move(entries);
	impl->local_root_signature.push_back(std::move(entry));
}

void Converter::add_local_root_descriptor_table(const DescriptorTableEntry *entries, size_t count)
{
	add_local_root_descriptor_table({ entries, entries + count });
}

uint32_t Converter::get_patch_location_offset() const
{
	return impl->patch_location_offset;
}

void Converter::set_patch_location_offset(uint32_t offset)
{
	impl->patch_location_offset = offset;
}

void Converter::get_workgroup_dimensions(uint32_t &x, uint32_t &y, uint32_t &z) const
{
	x = impl->execution_mode_meta.workgroup_threads[0];
	y = impl->execution_mode_meta.workgroup_threads[1];
	z = impl->execution_mode_meta.workgroup_threads[2];
}

uint32_t Converter::get_patch_vertex_count() const
{
	return impl->execution_mode_meta.stage_input_num_vertex;
}

void Converter::get_compute_wave_size_range(uint32_t &min, uint32_t &max, uint32_t &preferred) const
{
	min = impl->execution_mode_meta.wave_size_min;
	max = impl->execution_mode_meta.wave_size_max;
	preferred = impl->execution_mode_meta.wave_size_preferred;
}

uint32_t Converter::get_compute_heuristic_max_wave_size() const
{
	if (impl->execution_mode_meta.wave_size_min)
		return 0;

	return impl->execution_mode_meta.heuristic_max_wave_size;
}

uint32_t Converter::get_compute_heuristic_min_wave_size() const
{
	if (impl->execution_mode_meta.wave_size_min)
		return 0;

	return impl->execution_mode_meta.heuristic_min_wave_size;
}

bool Converter::is_multiview_compatible() const
{
	// We're not multiview compatible if ViewIndex does not correspond 1:1 with output layer index.
	// ViewIndex is limited, and if the constant Layer offset is too large, it may force "slow" path
	// with draw-level instancing.
	return impl->options.multiview.enable && !impl->multiview.custom_layer_index &&
	       impl->options.multiview.view_index_to_view_instance_spec_id != UINT32_MAX;
}

bool Converter::shader_requires_feature(ShaderFeature feature) const
{
	switch (feature)
	{
	case ShaderFeature::Native16BitOperations:
		return impl->builder().hasCapability(spv::CapabilityFloat16) ||
		       impl->builder().hasCapability(spv::CapabilityInt16);

	default:
		return false;
	}
}

bool Converter::get_driver_version(uint32_t &driver_id, uint32_t &driver_version) const
{
	if (impl->options.driver_version == 0)
		return false;

	driver_id = impl->options.driver_id;
	driver_version = impl->options.driver_version;
	return true;
}

ConvertedFunction Converter::convert_entry_point()
{
	return impl->convert_entry_point();
}

template <typename T = uint32_t>
static T get_constant_metadata(const llvm::MDNode *node, unsigned index)
{
	return T(
	    llvm::cast<llvm::ConstantAsMetadata>(node->getOperand(index))->getValue()->getUniqueInteger().getSExtValue());
}

static String get_string_metadata(const llvm::MDNode *node, unsigned index)
{
#ifdef HAVE_LLVMBC
	return llvm::cast<llvm::MDString>(node->getOperand(index))->getString();
#else
	std::string tmp = llvm::cast<llvm::MDString>(node->getOperand(index))->getString();
	String str(tmp.begin(), tmp.end());
	return str;
#endif
}

static String get_resource_name_metadata(const llvm::MDNode *node, const llvm::MDNode *reflections)
{
	if (reflections)
	{
		unsigned bind_space = get_constant_metadata(node, 3);
		unsigned bind_register = get_constant_metadata(node, 4);
		unsigned num_operands = reflections->getNumOperands();
		for (unsigned i = 0; i < num_operands; i++)
		{
			auto *refl_node = llvm::cast<llvm::MDNode>(reflections->getOperand(i));
			if (get_constant_metadata(refl_node, 3) == bind_space &&
			    get_constant_metadata(refl_node, 4) == bind_register)
			{
				return get_string_metadata(refl_node, 2);
			}
		}
	}

	return get_string_metadata(node, 2);
}

static spv::Dim image_dimension_from_resource_kind(DXIL::ResourceKind kind)
{
	switch (kind)
	{
	case DXIL::ResourceKind::Texture1D:
	case DXIL::ResourceKind::Texture1DArray:
		return spv::Dim1D;
	case DXIL::ResourceKind::Texture2D:
	case DXIL::ResourceKind::Texture2DMS:
	case DXIL::ResourceKind::Texture2DArray:
	case DXIL::ResourceKind::Texture2DMSArray:
	case DXIL::ResourceKind::FeedbackTexture2D:
	case DXIL::ResourceKind::FeedbackTexture2DArray:
		return spv::Dim2D;
	case DXIL::ResourceKind::Texture3D:
		return spv::Dim3D;
	case DXIL::ResourceKind::TextureCube:
	case DXIL::ResourceKind::TextureCubeArray:
		return spv::DimCube;

	case DXIL::ResourceKind::TypedBuffer:
	case DXIL::ResourceKind::StructuredBuffer:
	case DXIL::ResourceKind::RawBuffer:
		return spv::DimBuffer;

	default:
		return spv::DimMax;
	}
}

static bool image_dimension_is_arrayed(DXIL::ResourceKind kind)
{
	switch (kind)
	{
	case DXIL::ResourceKind::Texture1DArray:
	case DXIL::ResourceKind::Texture2DArray:
	case DXIL::ResourceKind::Texture2DMSArray:
	case DXIL::ResourceKind::TextureCubeArray:
	case DXIL::ResourceKind::FeedbackTexture2DArray:
		return true;

	default:
		return false;
	}
}

static bool image_dimension_is_multisampled(DXIL::ResourceKind kind)
{
	switch (kind)
	{
	case DXIL::ResourceKind::Texture2DMS:
	case DXIL::ResourceKind::Texture2DMSArray:
		return true;

	default:
		return false;
	}
}

static DXIL::ComponentType convert_16bit_component_to_32bit(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::F16:
		return DXIL::ComponentType::F32;
	case DXIL::ComponentType::I16:
		return DXIL::ComponentType::I32;
	case DXIL::ComponentType::U16:
		return DXIL::ComponentType::U32;
	default:
		return type;
	}
}

static DXIL::ComponentType convert_component_to_unsigned(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::I16:
		return DXIL::ComponentType::U16;
	case DXIL::ComponentType::I32:
		return DXIL::ComponentType::U32;
	case DXIL::ComponentType::I64:
		return DXIL::ComponentType::U64;
	default:
		return type;
	}
}

static DXIL::ComponentType normalize_component_type(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::UNormF16:
	case DXIL::ComponentType::SNormF16:
		return DXIL::ComponentType::F16;

	case DXIL::ComponentType::UNormF32:
	case DXIL::ComponentType::SNormF32:
		return DXIL::ComponentType::F32;

	case DXIL::ComponentType::UNormF64:
	case DXIL::ComponentType::SNormF64:
		return DXIL::ComponentType::F64;

	default:
		return type;
	}
}

static spv::Id build_ssbo_runtime_array_type(Converter::Impl &impl, RawType type, unsigned bits, unsigned vecsize,
                                             unsigned range_size, const String &name)
{
	auto &builder = impl.builder();
	spv::Id value_type = type == RawType::Integer ? builder.makeUintType(bits) : builder.makeFloatType(bits);
	if (vecsize > 1)
		value_type = builder.makeVectorType(value_type, vecsize);
	spv::Id element_array_type = builder.makeRuntimeArray(value_type);
	builder.addDecoration(element_array_type, spv::DecorationArrayStride, vecsize * (bits / 8));
	spv::Id block_type_id = impl.get_struct_type({ element_array_type }, 0, name.c_str());
	builder.addMemberDecoration(block_type_id, 0, spv::DecorationOffset, 0);
	builder.addDecoration(block_type_id, spv::DecorationBlock);

	spv::Id type_id = block_type_id;
	if (range_size != 1)
	{
		assert(range_size != 0);
		if (range_size == ~0u)
			type_id = builder.makeRuntimeArray(type_id);
		else
			type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
	}

	return type_id;
}

Vector<Converter::Impl::RawDeclarationVariable>
Converter::Impl::create_bindless_heap_variable_alias_group(const BindlessInfo &base_info,
                                                           const Vector<RawDeclaration> &raw_decls)
{
	Vector<RawDeclarationVariable> decls;
	decls.reserve(raw_decls.size());

	for (auto &decl : raw_decls)
	{
		RawDeclarationVariable var = {};
		var.declaration = decl;

		auto info = base_info;
		info.component = raw_width_to_component_type(decl.type, decl.width);
		info.raw_vecsize = decl.vecsize;
		var.var_id = create_bindless_heap_variable(info);
		decls.push_back(var);
	}

	return decls;
}

spv::Id Converter::Impl::create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name,
                                             unsigned cbv_size)
{
	auto &builder = spirv_module.get_builder();

	unsigned element_size = raw_width_to_bits(raw_decl.width) * raw_vecsize_to_vecsize(raw_decl.vecsize) / 8;
	unsigned array_length = (cbv_size + element_size - 1) / element_size;

	// It seems like we will have to bitcast ourselves away from vec4 here after loading.
	spv::Id size_id = builder.makeUintConstant(array_length, false);

	unsigned bits = raw_width_to_bits(raw_decl.width);
	spv::Id element_type = raw_decl.type == RawType::Float ? builder.makeFloatType(bits) : builder.makeUintType(bits);

	if (raw_decl.vecsize != RawVecSize::V1)
		element_type = builder.makeVectorType(element_type, raw_vecsize_to_vecsize(raw_decl.vecsize));
	spv::Id member_array_type = builder.makeArrayType(element_type, size_id, element_size);

	builder.addDecoration(member_array_type, spv::DecorationArrayStride, element_size);

	auto ubo_block_name = name.empty() ? "" : (name + "UBO");
	spv::Id type_id = get_struct_type({ member_array_type }, 0, ubo_block_name.c_str());
	builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
	builder.addDecoration(type_id, spv::DecorationBlock);

	if (range_size != 1)
	{
		if (range_size == ~0u)
			type_id = builder.makeRuntimeArray(type_id);
		else
			type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
	}

	if (raw_decl.width == RawWidth::B16)
		builder.addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess);
	else if (raw_decl.width == RawWidth::B8)
	{
		builder.addExtension("SPV_KHR_8bit_storage");
		builder.addCapability(spv::CapabilityUniformAndStorageBuffer8BitAccess);
	}

	return create_variable(spv::StorageClassUniform,
	                       type_id, name.empty() ? nullptr : name.c_str());
}

spv::Id Converter::Impl::create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name)
{
	spv::Id type_id = build_ssbo_runtime_array_type(*this,
	                                                raw_decl.type,
	                                                raw_width_to_bits(raw_decl.width),
	                                                raw_vecsize_to_vecsize(raw_decl.vecsize),
	                                                range_size, name + "SSBO");

	if (raw_decl.width == RawWidth::B16)
		builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
	else if (raw_decl.width == RawWidth::B8)
	{
		builder().addExtension("SPV_KHR_8bit_storage");
		builder().addCapability(spv::CapabilityStorageBuffer8BitAccess);
	}

	return create_variable(spv::StorageClassStorageBuffer, type_id, name.empty() ? nullptr : name.c_str());
}

Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_raw_ssbo_variable_alias_group(
		const Vector<RawDeclaration> &raw_decls,
		uint32_t range_size, const String &name)
{
	Vector<RawDeclarationVariable> group;
	group.reserve(raw_decls.size());
	for (auto &decl : raw_decls)
		group.push_back({ decl, create_raw_ssbo_variable(decl, range_size, name) });
	return group;
}

Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_ubo_variable_alias_group(
		const Vector<RawDeclaration> &raw_decls,
		uint32_t range_size, const String &name, unsigned cbv_size)
{
	Vector<RawDeclarationVariable> group;
	group.reserve(raw_decls.size());
	for (auto &decl : raw_decls)
		group.push_back({ decl, create_ubo_variable(decl, range_size, name, cbv_size) });
	return group;
}

static const char *convert_component_type_to_str(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::U16: return "U16";
	case DXIL::ComponentType::U32: return "U32";
	case DXIL::ComponentType::U64: return "U64";
	case DXIL::ComponentType::I16: return "I16";
	case DXIL::ComponentType::I32: return "I32";
	case DXIL::ComponentType::I64: return "I64";
	case DXIL::ComponentType::F16: return "F16";
	case DXIL::ComponentType::F32: return "F32";
	case DXIL::ComponentType::F64: return "F64";
	default: return "";
	}
}

spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info)
{
	auto itr = std::find_if(bindless_resources.begin(), bindless_resources.end(), [&](const BindlessResource &resource) {
		return
			resource.info.type == info.type &&
			resource.info.component == info.component &&
			resource.info.raw_vecsize == info.raw_vecsize &&
			resource.info.kind == info.kind &&
			resource.info.desc_set == info.desc_set &&
			resource.info.format == info.format &&
			resource.info.binding == info.binding &&
			resource.info.uav_read == info.uav_read &&
			resource.info.uav_written == info.uav_written &&
			resource.info.uav_coherent == info.uav_coherent &&
			resource.info.relaxed_precision == info.relaxed_precision &&
			resource.info.aliased == info.aliased &&
			resource.info.counters == info.counters &&
			resource.info.offsets == info.offsets &&
			resource.info.descriptor_type == info.descriptor_type &&
			(!options.extended_non_semantic_info || resource.info.debug.stride == info.debug.stride);
	});

	if (itr != bindless_resources.end())
	{
		return itr->var_id;
	}
	else
	{
		BindlessResource resource = {};
		resource.info = info;

		spv::Id type_id = 0;
		auto storage = spv::StorageClassMax;

		switch (info.type)
		{
		case DXIL::ResourceType::SRV:
		{
			if (info.kind == DXIL::ResourceKind::RTAccelerationStructure)
			{
				if (info.descriptor_type == VulkanDescriptorType::SSBO)
				{
					type_id = build_ssbo_runtime_array_type(*this, RawType::Integer, 32, 2, 1, "RTASHeap");
					storage = spv::StorageClassStorageBuffer;
				}
				else
				{
					type_id = builder().makeAccelerationStructureType();
					type_id = builder().makeRuntimeArray(type_id);
					storage = spv::StorageClassUniformConstant;
				}
			}
			else if (info.descriptor_type == VulkanDescriptorType::SSBO)
			{
				RawType raw_type = raw_component_type_to_type(info.component);
				unsigned bits = raw_component_type_to_bits(info.component);

				if (info.offsets)
					type_id = build_ssbo_runtime_array_type(*this, raw_type, 32, 2, 1, "SSBO_Offsets");
				else
					type_id = build_ssbo_runtime_array_type(*this, raw_type, bits, raw_vecsize_to_vecsize(info.raw_vecsize),
					                                        ~0u, "SSBO");
				storage = spv::StorageClassStorageBuffer;
				if (bits == 16)
					builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
				else if (bits == 8)
				{
					builder().addExtension("SPV_KHR_8bit_storage");
					builder().addCapability(spv::CapabilityStorageBuffer8BitAccess);
				}
			}
			else
			{
				if (info.component != DXIL::ComponentType::U32 &&
				    info.component != DXIL::ComponentType::I32 &&
				    info.component != DXIL::ComponentType::F32)
				{
					LOGE("Invalid component type for image.\n");
					return 0;
				}

				spv::Id sampled_type_id = get_type_id(info.component, 1, 1);
				type_id = builder().makeImageType(sampled_type_id, image_dimension_from_resource_kind(info.kind), false,
				                                  image_dimension_is_arrayed(info.kind),
				                                  image_dimension_is_multisampled(info.kind), 1, spv::ImageFormatUnknown);
				type_id = builder().makeRuntimeArray(type_id);
				storage = spv::StorageClassUniformConstant;
			}
			break;
		}

		case DXIL::ResourceType::UAV:
		{
			if (info.counters)
			{
				if (info.kind == DXIL::ResourceKind::Invalid)
				{
					auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::RawDescriptorHeapView)];

					if (mapping.kind == MetaDescriptorKind::UBOContainingBDA)
					{
						// This is faster access than the normal SSBO descriptor path.
						if (info.desc_set != mapping.desc_set || info.binding != mapping.desc_binding)
							LOGW("Using meta CBV mapping for physical descriptors, but there is a mismatch in requested bindings.\n");

						if (!emit_descriptor_heap_introspection_buffer())
							return 0;
						return instrumentation.descriptor_heap_introspection_var_id;
					}
					else
					{
						spv::Id uint_type = builder().makeUintType(32);
						spv::Id uvec2_type = builder().makeVectorType(uint_type, 2);

						spv::Id runtime_array_type_id = builder().makeRuntimeArray(uvec2_type);
						builder().addDecoration(runtime_array_type_id, spv::DecorationArrayStride, sizeof(uint64_t));

						type_id = get_struct_type({ runtime_array_type_id }, 0, "AtomicCounters");
						builder().addDecoration(type_id, spv::DecorationBlock);
						builder().addMemberName(type_id, 0, "counters");
						builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
						builder().addMemberDecoration(type_id, 0, spv::DecorationNonWritable);
					}
				}
				else
				{
					spv::Id uint_type = builder().makeUintType(32);
					type_id = get_struct_type({ uint_type }, 0, "AtomicCounters");
					builder().addDecoration(type_id, spv::DecorationBlock);
					builder().addMemberName(type_id, 0, "counter");
					builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
					type_id = builder().makeRuntimeArray(type_id);
				}

				storage = spv::StorageClassStorageBuffer;
			}
			else if (info.descriptor_type == VulkanDescriptorType::SSBO)
			{
				RawType raw_type = raw_component_type_to_type(info.component);
				unsigned bits = raw_component_type_to_bits(info.component);

				type_id = build_ssbo_runtime_array_type(*this, raw_type, bits, raw_vecsize_to_vecsize(info.raw_vecsize),
				                                        ~0u, "SSBO");
				storage = spv::StorageClassStorageBuffer;
				if (bits == 16)
					builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
				else if (bits == 8)
				{
					builder().addExtension("SPV_KHR_8bit_storage");
					builder().addCapability(spv::CapabilityStorageBuffer8BitAccess);
				}
			}
			else
			{
				if (info.component != DXIL::ComponentType::U32 &&
				    info.component != DXIL::ComponentType::I32 &&
				    info.component != DXIL::ComponentType::F32 &&
				    info.component != DXIL::ComponentType::U64)
				{
					LOGE("Invalid component type for image.\n");
					return 0;
				}

				spv::Id sampled_type_id = get_type_id(info.component, 1, 1);
				type_id = builder().makeImageType(sampled_type_id, image_dimension_from_resource_kind(info.kind), false,
				                                  image_dimension_is_arrayed(info.kind),
				                                  image_dimension_is_multisampled(info.kind), 2, info.format);
				type_id = builder().makeRuntimeArray(type_id);
				storage = spv::StorageClassUniformConstant;
			}
			break;
		}

		case DXIL::ResourceType::Sampler:
			type_id = builder().makeSamplerType();
			type_id = builder().makeRuntimeArray(type_id);
			storage = spv::StorageClassUniformConstant;
			break;

		case DXIL::ResourceType::CBV:
		{
			RawType raw_type = raw_component_type_to_type(info.component);
			unsigned bits = raw_component_type_to_bits(info.component);

			unsigned vecsize = raw_vecsize_to_vecsize(info.raw_vecsize);
			type_id = raw_type == RawType::Float ? builder().makeFloatType(bits) : builder().makeUintType(bits);
			if (vecsize > 1)
				type_id = builder().makeVectorType(type_id, vecsize);

			unsigned element_size = (bits / 8) * vecsize;
			unsigned num_elements = 0x10000 / element_size;

			type_id = builder().makeArrayType(type_id, builder().makeUintConstant(num_elements), element_size);
			builder().addDecoration(type_id, spv::DecorationArrayStride, element_size);
			type_id = get_struct_type({ type_id }, 0, "BindlessCBV");
			builder().addDecoration(type_id, spv::DecorationBlock);
			if (options.bindless_cbv_ssbo_emulation)
				builder().addMemberDecoration(type_id, 0, spv::DecorationNonWritable);
			builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
			type_id = builder().makeRuntimeArray(type_id);
			storage = options.bindless_cbv_ssbo_emulation ? spv::StorageClassStorageBuffer : spv::StorageClassUniform;

			if (bits == 16)
			{
				if (options.bindless_cbv_ssbo_emulation)
					builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
				else
					builder().addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess);
			}
			else if (bits == 8)
			{
				builder().addExtension("SPV_KHR_8bit_storage");
				if (options.bindless_cbv_ssbo_emulation)
					builder().addCapability(spv::CapabilityStorageBuffer8BitAccess);
				else
					builder().addCapability(spv::CapabilityUniformAndStorageBuffer8BitAccess);
			}
			break;
		}

		default:
			return 0;
		}

		builder().addExtension("SPV_EXT_descriptor_indexing");
		builder().addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);
		resource.var_id = create_variable(storage, type_id);

		if (options.extended_non_semantic_info)
		{
			String name;
			switch (info.type)
			{
			case DXIL::ResourceType::SRV: name = "SRV"; break;
			case DXIL::ResourceType::UAV: name = "UAV"; break;
			case DXIL::ResourceType::CBV: name = "CBV"; break;
			case DXIL::ResourceType::Sampler: name = "Sampler"; break;
			default: break;
			}

			const char *component_type_name = convert_component_type_to_str(info.component);

			switch (info.kind)
			{
			case DXIL::ResourceKind::RawBuffer:
				name += "_ByteAddressBuffer";
				name += "_vec";
				name += std::to_string(raw_vecsize_to_vecsize(info.raw_vecsize)).c_str();
				builder().addName(builder().getContainedTypeId(type_id), (name + "_Block").c_str());
				break;

			case DXIL::ResourceKind::StructuredBuffer:
				name += "_StructuredBuffer_";
				name += std::to_string(info.debug.stride).c_str();
				name += "_vec";
				name += std::to_string(raw_vecsize_to_vecsize(info.raw_vecsize)).c_str();
				builder().addName(builder().getContainedTypeId(type_id), (name + "_Block").c_str());
				break;

			case DXIL::ResourceKind::CBuffer:
				builder().addName(builder().getContainedTypeId(type_id), (name + "_Block").c_str());
				break;

			case DXIL::ResourceKind::TypedBuffer:
				name += "_TypedBuffer_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture1D:
				name += "_1D_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture1DArray:
				name += "_1DArray_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture2D:
				name += "_2D_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture2DArray:
				name += "_2DArray_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture2DMS:
				name += "_2DMS_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture2DMSArray:
				name += "_2DMSArray_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::TextureCube:
				name += "_Cube_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::TextureCubeArray:
				name += "_CubeArray_";
				name += component_type_name;
				break;

			case DXIL::ResourceKind::Texture3D:
				name += "_3D_";
				name += component_type_name;
				break;

			default:
				break;
			}

			builder().addName(resource.var_id, name.c_str());
		}

		auto &meta = handle_to_resource_meta[resource.var_id];
		meta = {};
		meta.kind = info.kind;
		meta.component_type = info.component;
		meta.raw_component_vecsize = info.raw_vecsize;
		meta.var_id = resource.var_id;
		meta.storage = storage;

		builder().addDecoration(resource.var_id, spv::DecorationDescriptorSet, info.desc_set);
		builder().addDecoration(resource.var_id, spv::DecorationBinding, info.binding);

		if (info.relaxed_precision)
		{
			builder().addDecoration(resource.var_id, spv::DecorationRelaxedPrecision);

			// Signal the intended component type.
			switch (meta.component_type)
			{
			case DXIL::ComponentType::F32:
				meta.component_type = DXIL::ComponentType::F16;
				break;

			case DXIL::ComponentType::I32:
				meta.component_type = DXIL::ComponentType::I16;
				break;

			case DXIL::ComponentType::U32:
				meta.component_type = DXIL::ComponentType::U16;
				break;

			default:
				break;
			}
		}

		if (info.type == DXIL::ResourceType::UAV && !info.counters)
		{
			if (!info.uav_read)
				builder().addDecoration(resource.var_id, spv::DecorationNonReadable);
			if (!info.uav_written)
				builder().addDecoration(resource.var_id, spv::DecorationNonWritable);
			if (info.uav_coherent && execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
				builder().addDecoration(resource.var_id, spv::DecorationCoherent);
		}
		else if (info.counters && info.kind == DXIL::ResourceKind::Invalid)
		{
			builder().addDecoration(resource.var_id, spv::DecorationAliasedPointer);
		}
		else if (info.type == DXIL::ResourceType::SRV && info.descriptor_type == VulkanDescriptorType::SSBO)
		{
			builder().addDecoration(resource.var_id, spv::DecorationNonWritable);
			builder().addDecoration(resource.var_id, spv::DecorationRestrict);
		}

		// The default in Vulkan environment is Restrict.
		if (info.aliased && info.type == DXIL::ResourceType::UAV)
			builder().addDecoration(resource.var_id, spv::DecorationAliased);

		bindless_resources.push_back(resource);
		return resource.var_id;
	}
}

Converter::Impl::ResourceVariableMeta Converter::Impl::get_resource_variable_meta(const llvm::MDNode *resource) const
{
	ResourceVariableMeta meta = {};

	if (!resource)
		return meta;

	if (const auto *variable = llvm::dyn_cast<llvm::ConstantAsMetadata>(resource->getOperand(1)))
	{
		const llvm::Value *val = variable->getValue();
		const auto *global = llvm::dyn_cast<llvm::GlobalVariable>(val);

		// It's possible that the variable is a constexpr bitcast, so resolve those ...
		while (!global && val)
		{
			auto *constexpr_op = llvm::dyn_cast<llvm::ConstantExpr>(val);
			val = nullptr;

			if (constexpr_op && constexpr_op->getOpcode() == llvm::UnaryOperator::BitCast)
			{
				val = constexpr_op->getOperand(0);
				global = llvm::dyn_cast<llvm::GlobalVariable>(val);
			}
		}

		if (global)
		{
			meta.is_lib_variable = true;
			meta.is_active = llvm_active_global_resource_variables.count(global) != 0;
			return meta;
		}
	}

	meta.is_active = true;
	return meta;
}

void Converter::Impl::register_resource_meta_reference(const llvm::MDOperand &operand, DXIL::ResourceType type, unsigned index)
{
	// In RT shaders, apps will load dummy structs from global variables.
	// Here we get the chance to redirect them towards the resource meta declaration.
	if (operand)
	{
		auto *value = llvm::cast<llvm::ConstantAsMetadata>(operand)->getValue();

		// In lib_6_6, this is somehow a bitcasted pointer expression, sigh ...
		// Drill deep until we actually find the original resource.
		while (auto *cexpr = llvm::dyn_cast<llvm::ConstantExpr>(value))
		{
			if (cexpr->getOpcode() == llvm::Instruction::BitCast)
				value = cexpr->getOperand(0);
			else
				break;
		}

		auto *global_variable = llvm::dyn_cast<llvm::GlobalVariable>(value);
		if (global_variable)
			llvm_global_variable_to_resource_mapping[global_variable] = { type, index, nullptr, global_variable, false };
	}
}

bool Converter::Impl::emit_resources_global_mapping(DXIL::ResourceType type, const llvm::MDNode *node)
{
	unsigned num_resources = node->getNumOperands();
	for (unsigned i = 0; i < num_resources; i++)
	{
		auto *resource = llvm::cast<llvm::MDNode>(node->getOperand(i));
		unsigned index = get_constant_metadata(resource, 0);

		if (type == DXIL::ResourceType::UAV)
		{
			unsigned bind_space = get_constant_metadata(resource, 3);
			unsigned bind_register = get_constant_metadata(resource, 4);
			auto resource_kind = static_cast<DXIL::ResourceKind>(get_constant_metadata(resource, 6));

			if (bind_space == AgsUAVMagicRegisterSpace && resource_kind == DXIL::ResourceKind::RawBuffer)
			{
				ags.uav_magic_resource_type_index = index;
			}
			else if (options.nvapi.enabled &&
			         options.nvapi.register_index == bind_register &&
			         options.nvapi.register_space == bind_space &&
			         resource_kind == DXIL::ResourceKind::StructuredBuffer)
			{
				nvapi.uav_magic_resource_type_index = index;
			}
		}
		register_resource_meta_reference(resource->getOperand(1), type, index);
	}
	return true;
}

spv::Id Converter::Impl::get_physical_pointer_block_type(spv::Id base_type_id, const PhysicalPointerMeta &meta)
{
	auto itr = std::find_if(physical_pointer_entries.begin(), physical_pointer_entries.end(), [&](const PhysicalPointerEntry &entry) {
		return entry.meta.coherent == meta.coherent &&
		       entry.meta.nonreadable == meta.nonreadable &&
		       entry.meta.nonwritable == meta.nonwritable &&
		       entry.meta.size == meta.size &&
		       entry.meta.stride == meta.stride &&
		       entry.base_type_id == base_type_id;
	});

	if (itr != physical_pointer_entries.end())
		return itr->ptr_type_id;

	int vecsize = builder().getNumTypeComponents(base_type_id);
	int width = builder().getScalarTypeWidth(base_type_id);

	spv::Op op = builder().getTypeClass(base_type_id);
	if (op == spv::OpTypeVector)
		op = builder().getTypeClass(builder().getScalarTypeId(base_type_id));

	String type = "PhysicalPointer";
	switch (op)
	{
	case spv::OpTypeFloat:
		if (width == 16)
			type += "Half";
		else if (width == 32)
			type += "Float";
		else if (width == 64)
			type += "Double";
		break;

	case spv::OpTypeInt:
		if (width == 16)
			type += "Ushort";
		else if (width == 32)
			type += "Uint";
		else if (width == 64)
			type += "Uint64";
		break;

	default:
		break;
	}

	if (vecsize > 1)
		type += std::to_string(vecsize).c_str();

	if (meta.nonwritable)
		type += "NonWrite";
	if (meta.nonreadable)
		type += "NonRead";
	if (meta.coherent)
		type += "Coherent";

	spv::Id type_id = base_type_id;

	if (meta.stride > 0)
	{
		if (meta.size == 0)
		{
			type_id = builder().makeRuntimeArray(type_id);
			type += "Array";
		}
		else
		{
			type_id = builder().makeArrayType(type_id, builder().makeUintConstant(meta.size / meta.stride),
			                                  meta.stride);
			type += "CBVArray";
		}
		builder().addDecoration(type_id, spv::DecorationArrayStride, meta.stride);
	}

	spv::Id block_type_id = builder().makeStructType({ type_id }, type.c_str());
	builder().addMemberDecoration(block_type_id, 0, spv::DecorationOffset, 0);
	builder().addMemberName(block_type_id, 0, "value");
	builder().addDecoration(block_type_id, spv::DecorationBlock);

	if (meta.nonwritable)
		builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonWritable);
	if (meta.nonreadable)
		builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonReadable);
	if (meta.coherent && execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
		builder().addMemberDecoration(block_type_id, 0, spv::DecorationCoherent);

	spv::Id ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, block_type_id);
	PhysicalPointerEntry new_entry = {};
	new_entry.ptr_type_id = ptr_type_id;
	new_entry.base_type_id = base_type_id;
	new_entry.meta = meta;
	physical_pointer_entries.push_back(new_entry);
	return ptr_type_id;
}

static bool component_type_is_16bit(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::F16:
	case DXIL::ComponentType::I16:
	case DXIL::ComponentType::U16:
		return true;

	default:
		return false;
	}
}

bool Converter::Impl::analyze_aliased_access(const AccessTracking &tracking,
                                             VulkanDescriptorType descriptor_type,
                                             AliasedAccess &aliased_access) const
{
	bool raw_access_16bit = false;
	bool raw_access_64bit = false;

	for (int type_ = 0; type_ < int(RawType::Count); type_++)
	{
		for (int width_ = 0; width_ < int(RawWidth::Count); width_++)
		{
			auto width = RawWidth(width_);
			if (width == RawWidth::B16 && !execution_mode_meta.native_16bit_operations)
				continue;

			for (int vecsize_ = 0; vecsize_ < int(RawVecSize::Count); vecsize_++)
			{
				auto vecsize = RawVecSize(vecsize_);
				auto type = RawType(type_);
				// Non-native 16-bit SSBOs are declared as 32-bit, so avoid false aliases.
				bool has_decl = tracking.raw_access_buffer_declarations[type_][width_][vecsize_];
				if (!has_decl && RawWidth(width) == RawWidth::B32 && !execution_mode_meta.native_16bit_operations)
					has_decl = tracking.raw_access_buffer_declarations[type_][unsigned(RawWidth::B16)][vecsize_];

				if (has_decl)
				{
					if (width == RawWidth::B16)
						raw_access_16bit = true;
					else if (width == RawWidth::B64)
						raw_access_64bit = true;
					aliased_access.raw_declarations.push_back({ type, width, vecsize });

					aliased_access.primary_component_type = raw_width_to_component_type(type, width);
					aliased_access.primary_raw_vecsize = vecsize;
				}
			}
		}
	}

	if (raw_access_16bit &&
	    descriptor_type != VulkanDescriptorType::SSBO &&
	    descriptor_type != VulkanDescriptorType::UBO &&
	    descriptor_type != VulkanDescriptorType::BufferDeviceAddress)
	{
		LOGE("Raw 16-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n");
		return false;
	}

	if (raw_access_64bit &&
	    descriptor_type != VulkanDescriptorType::SSBO &&
	    descriptor_type != VulkanDescriptorType::UBO &&
	    descriptor_type != VulkanDescriptorType::BufferDeviceAddress)
	{
		LOGE("Raw 64-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n");
		return false;
	}

	// Only SSBO and UBO can be reclared with different types.
	// Typed descriptors are always scalar.
	aliased_access.requires_alias_decoration = (descriptor_type == VulkanDescriptorType::SSBO ||
	                                            descriptor_type == VulkanDescriptorType::UBO) &&
	                                           aliased_access.raw_declarations.size() > 1;

	// If we only emit one 16-bit or 64-bit SSBO/UBO, we need to override the component type of that meta declaration.
	aliased_access.override_primary_component_types = (descriptor_type == VulkanDescriptorType::SSBO ||
	                                                   descriptor_type == VulkanDescriptorType::UBO) &&
	                                                  aliased_access.raw_declarations.size() == 1;

	// If the SSBO is never actually accessed (UAV counters for example), fudge the default type.
	if (descriptor_type == VulkanDescriptorType::SSBO && aliased_access.raw_declarations.empty())
		aliased_access.raw_declarations.push_back({ RawType::Integer, RawWidth::B32, RawVecSize::V1 });

	// If the CBV is never actually accessed, fudge the default legacy CBV type.
	if (descriptor_type == VulkanDescriptorType::UBO && aliased_access.raw_declarations.empty())
		aliased_access.raw_declarations.push_back({ RawType::Float, RawWidth::B32, RawVecSize::V4 });

	// Safeguard against unused variables where we never end up setting any primary component type.
	if ((descriptor_type == VulkanDescriptorType::SSBO ||
	     descriptor_type == VulkanDescriptorType::UBO) &&
	    aliased_access.raw_declarations.size() == 1)
	{
		aliased_access.primary_component_type =
				raw_width_to_component_type(aliased_access.raw_declarations.front().type, aliased_access.raw_declarations.front().width);
		aliased_access.primary_raw_vecsize = aliased_access.raw_declarations.front().vecsize;
		aliased_access.override_primary_component_types = true;
	}

	return true;
}

void Converter::Impl::emit_non_semantic_debug_info(const NonSemanticDebugInfo &info)
{
	auto &b = spirv_module.get_builder();
	b.addExtension("SPV_KHR_non_semantic_info");
	spv::Id ext = b.import("NonSemantic.dxil-spirv.signature");
	auto *u8_data = static_cast<const uint8_t *>(info.data);

	// If the root sig is massive (likely because it came from a full DXIL blob or something),
	// need to dump in multiple stages due to opcode limits.
	for (size_t i = 0; i < info.size; i += 64 * 1024)
	{
		size_t to_dump = std::min<size_t>(info.size - i, 64 * 1024);
		auto inst = std::make_unique<spv::Instruction>(
		    b.getUniqueId(), b.makeVoidType(), spv::OpExtInst);
		inst->addIdOperand(ext);
		inst->addImmediateOperand(1);
		inst->addIdOperand(b.addString(info.tag));

		for (size_t j = 0; j < (to_dump & ~size_t(3)); j += 4)
		{
			uint32_t v;
			memcpy(&v, u8_data + i + j, sizeof(v));
			inst->addIdOperand(b.makeUintConstant(v));
		}

		for (size_t j = to_dump & ~size_t(3); j < to_dump; j++)
			inst->addIdOperand(b.makeUint8Constant(u8_data[i + j]));

		b.addExternal(std::move(inst));
	}
}

void Converter::Impl::emit_root_parameter_index_from_push_index(const char *tag, uint32_t index, uint32_t size, bool bda)
{
    bool descriptor_packing = (index & 0x80000000) != 0;
    uint32_t parameter_index = UINT32_MAX;
    uint32_t effective_offset = 0;

    if (descriptor_packing)
    {
        for (auto &mapping : root_parameter_mappings)
        {
            if (mapping.offset == index)
            {
                parameter_index = mapping.root_parameter_index;
                break;
            }
        }
    }
    else
    {
        effective_offset = bda ? index * 8 : (index * 4 + root_descriptor_count * 8);
        for (auto &mapping: root_parameter_mappings)
        {
            if (mapping.offset == effective_offset)
            {
                parameter_index = mapping.root_parameter_index;
                break;
            }
        }
    }

	if (parameter_index == UINT32_MAX)
		return;

	// Avoid lots of spam.
	if ((1ull << parameter_index) & root_parameter_emit_mask)
		return;
	root_parameter_emit_mask |= 1ull << parameter_index;

	auto &b = spirv_module.get_builder();
	b.addExtension("SPV_KHR_non_semantic_info");
	spv::Id ext = b.import("NonSemantic.dxil-spirv.signature");
	auto inst = std::make_unique<spv::Instruction>(b.getUniqueId(), b.makeVoidType(), spv::OpExtInst);
	inst->addIdOperand(ext);
	inst->addImmediateOperand(0);
	inst->addIdOperand(b.addString(tag));
	inst->addIdOperand(b.makeUintConstant(parameter_index));

    if (descriptor_packing)
    {
        inst->addIdOperand(b.makeUintConstant((index >> 24) & 0x7f));
        inst->addIdOperand(b.makeUintConstant(index & 0xffffff));
    }
    else
    {
        inst->addIdOperand(b.makeUintConstant(effective_offset));
        inst->addIdOperand(b.makeUintConstant(size));
    }

	b.addExternal(std::move(inst));
}

bool Converter::Impl::emit_srvs(const llvm::MDNode *srvs, const llvm::MDNode *refl)
{
	auto &builder = spirv_module.get_builder();
	unsigned num_srvs = srvs->getNumOperands();

	for (unsigned i = 0; i < num_srvs; i++)
	{
		auto *srv = llvm::cast<llvm::MDNode>(srvs->getOperand(i));

		auto var_meta = get_resource_variable_meta(srv);
		if (!var_meta.is_active)
			continue;

		unsigned index = get_constant_metadata(srv, 0);
		auto name = get_resource_name_metadata(srv, refl);
		unsigned bind_space = get_constant_metadata(srv, 3);
		unsigned bind_register = get_constant_metadata(srv, 4);
		unsigned range_size = get_constant_metadata(srv, 5);

		if (bind_register == UINT32_MAX && bind_space == UINT32_MAX)
		{
			// This seems to be possible in RT shaders when explicit register() is missing?
			LOGE("Nonsensical SRV binding detected.\n");
			return false;
		}

		auto resource_kind = static_cast<DXIL::ResourceKind>(get_constant_metadata(srv, 6));

		llvm::MDNode *tags = nullptr;
		if (srv->getNumOperands() >= 9 && srv->getOperand(8))
			tags = llvm::dyn_cast<llvm::MDNode>(srv->getOperand(8));

		auto actual_component_type = DXIL::ComponentType::U32;
		auto effective_component_type = actual_component_type;

		unsigned stride = 0;

		if (tags && get_constant_metadata(tags, 0) == 0)
		{
			// Sampled format.
			actual_component_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(tags, 1)));
			effective_component_type = get_effective_typed_resource_type(actual_component_type);
		}
		else
		{
			// Structured/Raw buffers, just use uint for good measure, we'll bitcast as needed.
			// Field 1 is stride, but we don't care about that unless we will support an SSBO path.
			if (tags)
				stride = get_constant_metadata(tags, 1);
		}

		unsigned alignment = resource_kind == DXIL::ResourceKind::RawBuffer ? 16 : (stride & -int(stride));

		DescriptorTableEntry local_table_entry = {};
		int local_root_signature_entry = get_local_root_signature_entry(
			ResourceClass::SRV, bind_space, bind_register, local_table_entry);
		bool need_resource_remapping = local_root_signature_entry < 0 ||
		                               local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table;

		D3DBinding d3d_binding = {
			get_remapping_stage(execution_model), resource_kind, index, bind_space, bind_register, range_size, alignment,
		};
		VulkanSRVBinding vulkan_binding = { { bind_space, bind_register }, {} };
		if (need_resource_remapping && resource_mapping_iface &&
		    !resource_mapping_iface->remap_srv(d3d_binding, vulkan_binding))
		{
			// We may be rejected if the unbound range has 1 non-bindless descriptor.
			bool retry = d3d_binding.range_size == UINT32_MAX;
			if (retry)
			{
				d3d_binding.range_size = 1;
				range_size = 1;
			}

			if (!retry || !resource_mapping_iface->remap_srv(d3d_binding, vulkan_binding))
			{
				LOGE("Failed to remap SRV %u:%u.\n", bind_space, bind_register);
				return false;
			}
		}

		auto &access_meta = srv_access_tracking[index];

		AliasedAccess aliased_access;
		if (!analyze_aliased_access(access_meta,
		                            need_resource_remapping ?
		                            vulkan_binding.buffer_binding.descriptor_type :
		                            VulkanDescriptorType::BufferDeviceAddress, aliased_access))
		{
			return false;
		}

		if (range_size != 1 && resource_kind != DXIL::ResourceKind::RTAccelerationStructure)
		{
			if (range_size == ~0u)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);
			}

			if ((resource_kind == DXIL::ResourceKind::StructuredBuffer ||
			     resource_kind == DXIL::ResourceKind::RawBuffer) &&
			    vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO)
			{
				builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing);
			}
			else if (resource_kind == DXIL::ResourceKind::StructuredBuffer ||
			         resource_kind == DXIL::ResourceKind::RawBuffer || resource_kind == DXIL::ResourceKind::TypedBuffer)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				builder.addCapability(spv::CapabilityUniformTexelBufferArrayDynamicIndexingEXT);
			}
			else
				builder.addCapability(spv::CapabilitySampledImageArrayDynamicIndexing);
		}

		srv_index_to_reference.resize(std::max(srv_index_to_reference.size(), size_t(index + 1)));
		srv_index_to_offset.resize(std::max(srv_index_to_offset.size(), size_t(index + 1)));

		if (!get_ssbo_offset_buffer_id(srv_index_to_offset[index], vulkan_binding.buffer_binding,
		                               vulkan_binding.offset_binding, resource_kind, alignment))
			return false;

		BindlessInfo bindless_info = {};
		bindless_info.type = DXIL::ResourceType::SRV;
		bindless_info.component = effective_component_type;
		bindless_info.kind = resource_kind;
		bindless_info.desc_set = vulkan_binding.buffer_binding.descriptor_set;
		bindless_info.binding = vulkan_binding.buffer_binding.binding;
		bindless_info.descriptor_type = vulkan_binding.buffer_binding.descriptor_type;
		bindless_info.relaxed_precision = actual_component_type != effective_component_type &&
		                                  component_type_is_16bit(actual_component_type);
		bindless_info.debug.stride = stride;

		if (local_root_signature_entry >= 0)
		{
			auto &entry = local_root_signature[local_root_signature_entry];
			if (entry.type == LocalRootSignatureType::Table)
			{
				if (!vulkan_binding.buffer_binding.bindless.use_heap)
				{
					LOGE("Table SBT entries must be bindless.\n");
					return false;
				}

				if (!var_meta.is_lib_variable)
				{
					LOGE("Local root signature requires global lib variables.\n");
					return false;
				}

				uint32_t heap_offset = local_table_entry.offset_in_heap;
				heap_offset += bind_register - local_table_entry.register_index;

				auto &ref = srv_index_to_reference[index];
				if (aliased_access.requires_alias_decoration)
				{
					ref.var_alias_group = create_bindless_heap_variable_alias_group(
						bindless_info, aliased_access.raw_declarations);
				}
				else if (aliased_access.override_primary_component_types)
				{
					auto tmp_info = bindless_info;
					tmp_info.component = aliased_access.primary_component_type;
					tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize;
					ref.var_id = create_bindless_heap_variable(tmp_info);
				}
				else
				{
					ref.var_id = create_bindless_heap_variable(bindless_info);
				}

				ref.aliased = aliased_access.requires_alias_decoration;
				ref.base_offset = heap_offset;
				ref.base_resource_is_array = range_size != 1;
				ref.stride = stride;
				ref.bindless = true;
				ref.local_root_signature_entry = local_root_signature_entry;
				ref.resource_kind = resource_kind;
			}
			else
			{
				// Otherwise, we simply refer to the SBT directly to obtain a pointer.
				if (resource_kind != DXIL::ResourceKind::RawBuffer &&
				    resource_kind != DXIL::ResourceKind::StructuredBuffer &&
				    resource_kind != DXIL::ResourceKind::RTAccelerationStructure)
				{
					LOGE("SRV SBT root descriptors must be raw buffers, structured buffers or RTAS.\n");
					return false;
				}

				auto &ref = srv_index_to_reference[index];
				ref.var_id = shader_record_buffer_id;
				ref.stride = stride;
				ref.local_root_signature_entry = local_root_signature_entry;
				ref.resource_kind = resource_kind;

				if (range_size != 1)
				{
					LOGE("Cannot use descriptor array for root descriptors.\n");
					return false;
				}
			}
		}
		else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::BufferDeviceAddress)
		{
			if (resource_kind != DXIL::ResourceKind::RawBuffer &&
			    resource_kind != DXIL::ResourceKind::StructuredBuffer &&
			    resource_kind != DXIL::ResourceKind::RTAccelerationStructure)
			{
				LOGE("BDA root descriptors must be raw buffers, structured buffers or RTAS.\n");
				return false;
			}

			auto &ref = srv_index_to_reference[index];
			ref.var_id = root_constant_id;
			ref.root_descriptor = true;
			ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index;
			ref.stride = stride;
			ref.resource_kind = resource_kind;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("SRV", ref.push_constant_member, 8, true);

			if (range_size != 1)
			{
				LOGE("Cannot use descriptor array for root descriptors.\n");
				return false;
			}
		}
		else if (vulkan_binding.buffer_binding.bindless.use_heap)
		{
			// DXIL already applies the t# register offset to any dynamic index, so counteract that here.
			// The exception is with lib_* where we access resources by variable, not through
			// createResource() >_____<.
			uint32_t heap_offset = vulkan_binding.buffer_binding.bindless.heap_root_offset;
			if (range_size != 1 && !var_meta.is_lib_variable)
				heap_offset -= bind_register;

			auto &ref = srv_index_to_reference[index];
			if (aliased_access.requires_alias_decoration)
			{
				ref.var_alias_group = create_bindless_heap_variable_alias_group(
					bindless_info, aliased_access.raw_declarations);
			}
			else if (aliased_access.override_primary_component_types)
			{
				auto tmp_info = bindless_info;
				tmp_info.component = aliased_access.primary_component_type;
				tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize;
				ref.var_id = create_bindless_heap_variable(tmp_info);
			}
			else
			{
				ref.var_id = create_bindless_heap_variable(bindless_info);
			}

			ref.aliased = aliased_access.requires_alias_decoration;
			ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index + root_descriptor_count;
			ref.base_offset = heap_offset;
			ref.stride = stride;
			ref.bindless = true;
			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = resource_kind;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("ResourceTable", vulkan_binding.buffer_binding.root_constant_index, 4, false);
		}
		else
		{
			auto sampled_type_id = get_type_id(effective_component_type, 1, 1);

			spv::Id type_id = 0;
			auto storage = spv::StorageClassUniformConstant;

			if (resource_kind == DXIL::ResourceKind::RTAccelerationStructure)
			{
				type_id = builder.makeAccelerationStructureType();
			}
			else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO)
			{
				storage = spv::StorageClassStorageBuffer;
				// Defer typing the SSBOs.
			}
			else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::InputAttachment)
			{
				if (execution_model != spv::ExecutionModelFragment)
				{
					LOGE("InputAttachments can only be used in pixel shaders.\n");
					return false;
				}

				if (range_size != 1)
				{
					LOGE("Cannot bind input attachment to array of descriptors.\n");
					return false;
				}

				if (resource_kind != DXIL::ResourceKind::Texture2D && resource_kind != DXIL::ResourceKind::Texture2DMS)
				{
					LOGE("Can only bind Texture2D and Texture2DMS to input attachments.\n");
					return false;
				}

				type_id =
					builder.makeImageType(sampled_type_id, spv::DimSubpassData, false, false,
										  image_dimension_is_multisampled(resource_kind), 2, spv::ImageFormatUnknown);
			}
			else
			{
				type_id =
				    builder.makeImageType(sampled_type_id, image_dimension_from_resource_kind(resource_kind), false,
				                          image_dimension_is_arrayed(resource_kind),
				                          image_dimension_is_multisampled(resource_kind), 1, spv::ImageFormatUnknown);
				if (range_size != 1)
				{
					if (range_size == ~0u)
						type_id = builder.makeRuntimeArray(type_id);
					else
						type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
				}
			}

			auto &ref = srv_index_to_reference[index];

			if (type_id)
				ref.var_id = create_variable(storage, type_id, name.empty() ? nullptr : name.c_str());
			else if (aliased_access.requires_alias_decoration)
				ref.var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name);
			else
			{
				assert(aliased_access.raw_declarations.size() == 1);
				ref.var_id = create_raw_ssbo_variable(aliased_access.raw_declarations.front(), range_size, name);
			}

			if (actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type))
				builder.addDecoration(ref.var_id, spv::DecorationRelaxedPrecision);

			const auto decorate_variable = [&](spv::Id id) {
				builder.addDecoration(id, spv::DecorationDescriptorSet, vulkan_binding.buffer_binding.descriptor_set);
				builder.addDecoration(id, spv::DecorationBinding, vulkan_binding.buffer_binding.binding);
				if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO)
				{
					// Make it crystal clear this is a read-only SSBO which cannot observe changed from other SSBO writes.
					// Do not emit Aliased here even for type aliases
					// since we cannot observe writes from other descriptors anyways.
					builder.addDecoration(id, spv::DecorationNonWritable);
					builder.addDecoration(id, spv::DecorationRestrict);
				}
				else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::InputAttachment &&
				         vulkan_binding.buffer_binding.input_attachment_index != -1u)
				{
					builder.addDecoration(id, spv::DecorationInputAttachmentIndex,
					                      int(vulkan_binding.buffer_binding.input_attachment_index));
				}
			};

			if (ref.var_id)
				decorate_variable(ref.var_id);
			for (auto &var : ref.var_alias_group)
				decorate_variable(var.var_id);

			ref.aliased = aliased_access.requires_alias_decoration;
			ref.base_resource_is_array = range_size != 1;
			ref.stride = stride;
			ref.resource_kind = resource_kind;

			// Counteract any offsets.
			if (range_size != 1 && !var_meta.is_lib_variable)
				ref.base_offset = -bind_register;

			if (ref.var_id)
			{
				auto &meta = handle_to_resource_meta[ref.var_id];
				meta = {};
				meta.kind = resource_kind;
				meta.stride = stride;
				meta.var_id = ref.var_id;
				meta.storage = storage;
				meta.component_type = actual_component_type;

				if (aliased_access.override_primary_component_types)
				{
					meta.component_type = aliased_access.primary_component_type;
					meta.raw_component_vecsize = aliased_access.primary_raw_vecsize;
				}
			}

			for (auto &var : ref.var_alias_group)
			{
				auto &meta = handle_to_resource_meta[var.var_id];
				meta = {};
				meta.kind = resource_kind;
				meta.component_type = raw_width_to_component_type(var.declaration.type, var.declaration.width);
				meta.raw_component_vecsize = var.declaration.vecsize;
				meta.stride = stride;
				meta.var_id = var.var_id;
				meta.storage = storage;
			}
		}
	}

	return true;
}

bool Converter::Impl::get_ssbo_offset_buffer_id(spv::Id &buffer_id,
                                                const VulkanBinding &buffer_binding,
                                                const VulkanBinding &offset_binding,
                                                DXIL::ResourceKind kind, unsigned alignment)
{
	buffer_id = 0;

	bool is_buffer_type = kind == DXIL::ResourceKind::StructuredBuffer ||
	                      kind == DXIL::ResourceKind::RawBuffer ||
	                      kind == DXIL::ResourceKind::TypedBuffer;
	if (!is_buffer_type)
		return true;

	bool use_offsets = false;

	// If we're emitting an SSBO where we expect small alignment, we'll need to carry forward an "offset".
	if (buffer_binding.descriptor_type == VulkanDescriptorType::SSBO)
	{
		if (kind != DXIL::ResourceKind::TypedBuffer && (alignment & (options.ssbo_alignment - 1)) != 0)
		{
			if (!buffer_binding.bindless.use_heap)
			{
				LOGE("SSBO offset is only supported for bindless SSBOs.\n");
				return false;
			}

			if (offset_binding.bindless.use_heap)
			{
				LOGE("SSBO offset buffer must be a bindless buffer.\n");
				return false;
			}

			use_offsets = true;
		}
	}
	else if (options.bindless_typed_buffer_offsets && buffer_binding.bindless.use_heap)
	{
		use_offsets = true;
	}

	if (use_offsets)
	{
		BindlessInfo bindless_info = {};
		bindless_info.descriptor_type = VulkanDescriptorType::SSBO;
		bindless_info.type = DXIL::ResourceType::SRV;
		bindless_info.offsets = true;
		bindless_info.desc_set = offset_binding.descriptor_set;
		bindless_info.binding = offset_binding.binding;
		bindless_info.component = DXIL::ComponentType::U32;
		bindless_info.kind = DXIL::ResourceKind::RawBuffer;
		buffer_id = create_bindless_heap_variable(bindless_info);
	}

	return true;
}

bool Converter::Impl::get_uav_image_format(DXIL::ResourceKind resource_kind,
                                           DXIL::ComponentType actual_component_type,
                                           const AccessTracking &access_meta,
                                           spv::ImageFormat &format)
{
	if (resource_kind == DXIL::ResourceKind::FeedbackTexture2D ||
	    resource_kind == DXIL::ResourceKind::FeedbackTexture2DArray)
	{
		format = spv::ImageFormatR64ui;
		builder().addExtension("SPV_EXT_shader_image_int64");
		builder().addCapability(spv::CapabilityInt64ImageEXT);
		return true;
	}
	else if (resource_kind != DXIL::ResourceKind::RawBuffer &&
	         resource_kind != DXIL::ResourceKind::StructuredBuffer)
	{
		// For any typed resource, we need to check if the resource is being read.
		// To avoid StorageReadWithoutFormat, we emit a format based on the component type.
		if (access_meta.has_read)
		{
			if (options.typed_uav_read_without_format && !access_meta.has_atomic)
			{
				builder().addCapability(spv::CapabilityStorageImageReadWithoutFormat);
				format = spv::ImageFormatUnknown;
			}
			else
			{
				switch (actual_component_type)
				{
				case DXIL::ComponentType::U32:
					format = spv::ImageFormatR32ui;
					break;

				case DXIL::ComponentType::I32:
					format = spv::ImageFormatR32i;
					break;

				case DXIL::ComponentType::F32:
					format = spv::ImageFormatR32f;
					break;

				case DXIL::ComponentType::U64:
					format = spv::ImageFormatR64ui;
					builder().addExtension("SPV_EXT_shader_image_int64");
					builder().addCapability(spv::CapabilityInt64ImageEXT);
					break;

				default:
					LOGE("Reading from UAV, but component type does not conform to U32, I32, F32 or U64. "
					     "typed_uav_read_without_format option must be enabled.\n");
					return false;
				}
			}
		}
	}

	return true;
}

bool Converter::Impl::emit_uavs(const llvm::MDNode *uavs, const llvm::MDNode *refl)
{
	auto &builder = spirv_module.get_builder();
	unsigned num_uavs = uavs->getNumOperands();

	for (unsigned i = 0; i < num_uavs; i++)
	{
		auto *uav = llvm::cast<llvm::MDNode>(uavs->getOperand(i));

		auto var_meta = get_resource_variable_meta(uav);
		if (!var_meta.is_active)
			continue;

		unsigned index = get_constant_metadata(uav, 0);
		auto name = get_resource_name_metadata(uav, refl);
		unsigned bind_space = get_constant_metadata(uav, 3);
		unsigned bind_register = get_constant_metadata(uav, 4);
		unsigned range_size = get_constant_metadata(uav, 5);

		if (bind_register == UINT32_MAX && bind_space == UINT32_MAX)
		{
			// This seems to be possible in RT shaders when explicit register() is missing?
			LOGE("Nonsensical UAV binding detected.\n");
			return false;
		}

		auto resource_kind = static_cast<DXIL::ResourceKind>(get_constant_metadata(uav, 6));

		// Magic resource that does not actually exist.
		if (index == ags.uav_magic_resource_type_index || index == nvapi.uav_magic_resource_type_index)
			continue;

		bool has_counter = get_constant_metadata(uav, 8) != 0;
		bool is_rov = get_constant_metadata(uav, 9) != 0;

		// ROV implies coherent in Vulkan memory models.
		bool globally_coherent = get_constant_metadata(uav, 7) != 0 || is_rov;

		llvm::MDNode *tags = nullptr;
		if (uav->getNumOperands() >= 11 && uav->getOperand(10))
			tags = llvm::dyn_cast<llvm::MDNode>(uav->getOperand(10));

		unsigned stride = 0;
		spv::ImageFormat format = spv::ImageFormatUnknown;

		auto actual_component_type = DXIL::ComponentType::U32;
		auto effective_component_type = actual_component_type;

		auto &access_meta = uav_access_tracking[index];
		if (globally_coherent)
			execution_mode_meta.declares_globallycoherent_uav = true;
		if (is_rov)
			execution_mode_meta.declares_rov = true;

		// We shouldn't need this, but dxilconv is broken.
		if (access_meta.has_counter)
			has_counter = true;

		// If the shader has device-memory memory barriers, we need to support this.
		// GLSL450 memory model does not do this for us by default.
		//   coherent: memory variable where reads and writes are coherent with reads and
		//             writes from other shader invocations
		// We have two options:
		// - slap Coherent on it.
		// - Use Vulkan memory model and make use of MakeVisibleKHR/MakeAvailableKHR flags in a OpMemoryBarrier.
		//   This would flush and invalidate any incoherent caches as necessary.
		// For now, slapping coherent on all UAVs is good enough.
		// When we move to full Vulkan memory model we can do a slightly better job.
		// If no UAV actually needs globallycoherent we can demote any barriers to workgroup barriers,
		// which is hopefully more optimal if the compiler understands the intent ...
		// Only promote resources which actually need some kind of coherence.
		if (shader_analysis.require_uav_thread_group_coherence &&
		    access_meta.has_written && access_meta.has_read &&
		    execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
		{
			globally_coherent = true;
		}

		if (resource_kind == DXIL::ResourceKind::FeedbackTexture2D ||
		    resource_kind == DXIL::ResourceKind::FeedbackTexture2DArray)
		{
			// 64-bit atomics make things a bit nicer.
			actual_component_type = DXIL::ComponentType::U64;
			effective_component_type = get_effective_typed_resource_type(actual_component_type);
		}
		else if (tags && get_constant_metadata(tags, 0) == 0)
		{
			// Sampled format.
			actual_component_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(tags, 1)));
			if (access_meta.has_atomic_64bit)
			{
				// The component type in DXIL is u32, even if the resource itself is u64 in meta reflection data ...
				// This is also the case for signed components. Always use R64UI here.
				actual_component_type = DXIL::ComponentType::U64;
			}
			effective_component_type = get_effective_typed_resource_type(actual_component_type);
		}
		else
		{
			// Structured/Raw buffers, just use uint for good measure, we'll bitcast as needed.
			// Field 1 is stride, but we don't care about that unless we will support an SSBO path.
			format = spv::ImageFormatR32ui;
			if (tags)
				stride = get_constant_metadata(tags, 1);
		}

		unsigned alignment = resource_kind == DXIL::ResourceKind::RawBuffer ? 16 : (stride & -int(stride));

		if (!get_uav_image_format(resource_kind, actual_component_type, access_meta, format))
			return false;

		DescriptorTableEntry local_table_entry = {};
		int local_root_signature_entry = get_local_root_signature_entry(
		    ResourceClass::UAV, bind_space, bind_register, local_table_entry);
		bool need_resource_remapping = local_root_signature_entry < 0 ||
		                               local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table;

		D3DUAVBinding d3d_binding = {};
		d3d_binding.counter = has_counter;
		d3d_binding.binding = {
			get_remapping_stage(execution_model), resource_kind, index, bind_space, bind_register, range_size, alignment
		};
		VulkanUAVBinding vulkan_binding = { { bind_space, bind_register }, { bind_space + 1, bind_register }, {} };
		if (need_resource_remapping && resource_mapping_iface &&
		    !resource_mapping_iface->remap_uav(d3d_binding, vulkan_binding))
		{
			// We may be rejected if the unbound range has 1 non-bindless descriptor.
			bool retry = d3d_binding.binding.range_size == UINT32_MAX;
			if (retry)
			{
				d3d_binding.binding.range_size = 1;
				range_size = 1;
			}

			if (!retry || !resource_mapping_iface->remap_uav(d3d_binding, vulkan_binding))
			{
				LOGE("Failed to remap UAV %u:%u.\n", bind_space, bind_register);
				return false;
			}
		}

		AliasedAccess aliased_access;
		if (!analyze_aliased_access(access_meta,
		                            need_resource_remapping ?
		                            vulkan_binding.buffer_binding.descriptor_type :
		                            VulkanDescriptorType::BufferDeviceAddress, aliased_access))
		{
			return false;
		}

		uav_index_to_reference.resize(std::max(uav_index_to_reference.size(), size_t(index + 1)));
		uav_index_to_counter.resize(std::max(uav_index_to_counter.size(), size_t(index + 1)));
		uav_index_to_offset.resize(std::max(uav_index_to_offset.size(), size_t(index + 1)));

		if (!get_ssbo_offset_buffer_id(uav_index_to_offset[index], vulkan_binding.buffer_binding,
		                               vulkan_binding.offset_binding, resource_kind, alignment))
			return false;

		if (range_size != 1)
		{
			if (range_size == ~0u)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);
			}

			if (has_counter)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				if (vulkan_binding.counter_binding.descriptor_type == VulkanDescriptorType::SSBO)
					builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing);
				else
					builder.addCapability(spv::CapabilityStorageTexelBufferArrayDynamicIndexingEXT);
			}

			if ((resource_kind == DXIL::ResourceKind::StructuredBuffer ||
			     resource_kind == DXIL::ResourceKind::RawBuffer) &&
			    vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO)
			{
				builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing);
			}
			else if (resource_kind == DXIL::ResourceKind::StructuredBuffer ||
			         resource_kind == DXIL::ResourceKind::RawBuffer ||
			         resource_kind == DXIL::ResourceKind::TypedBuffer)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				builder.addCapability(spv::CapabilityStorageTexelBufferArrayDynamicIndexingEXT);
			}
			else
				builder.addCapability(spv::CapabilityStorageImageArrayDynamicIndexing);
		}

		BindlessInfo bindless_info = {};
		bindless_info.type = DXIL::ResourceType::UAV;
		bindless_info.component = effective_component_type;
		bindless_info.kind = resource_kind;
		bindless_info.desc_set = vulkan_binding.buffer_binding.descriptor_set;
		bindless_info.binding = vulkan_binding.buffer_binding.binding;
		bindless_info.format = format;
		bindless_info.uav_read = access_meta.has_read;
		bindless_info.uav_written = access_meta.has_written;
		bindless_info.uav_coherent = globally_coherent;
		bindless_info.descriptor_type = vulkan_binding.buffer_binding.descriptor_type;
		bindless_info.relaxed_precision = actual_component_type != effective_component_type &&
		                                  component_type_is_16bit(actual_component_type);
		bindless_info.debug.stride = stride;

		// If we emit two SSBOs which both access the same buffer, we must emit Aliased decoration to be safe.
		bindless_info.aliased = aliased_access.requires_alias_decoration;

		BindlessInfo counter_info = {};

		counter_info.type = DXIL::ResourceType::UAV;
		counter_info.component = DXIL::ComponentType::U32;
		counter_info.desc_set = vulkan_binding.counter_binding.descriptor_set;
		counter_info.binding = vulkan_binding.counter_binding.binding;

		if (vulkan_binding.counter_binding.descriptor_type == VulkanDescriptorType::SSBO)
		{
			counter_info.kind = DXIL::ResourceKind::RawBuffer;
			counter_info.counters = true;
		}
		else if (options.physical_storage_buffer &&
		         vulkan_binding.counter_binding.descriptor_type != VulkanDescriptorType::TexelBuffer)
		{
			counter_info.kind = DXIL::ResourceKind::Invalid;
			counter_info.counters = true;
		}
		else
		{
			counter_info.kind = DXIL::ResourceKind::TypedBuffer;
			counter_info.uav_read = true;
			counter_info.uav_written = true;
			counter_info.uav_coherent = globally_coherent;
			counter_info.format = spv::ImageFormatR32ui;
		}

		ReferenceVkMemoryModel vkmm = {};

		if (execution_mode_meta.memory_model == spv::MemoryModelVulkan)
		{
			// For UAV we just slap it on everything.
			vkmm.non_private = true;
			vkmm.auto_visibility = globally_coherent || is_rov;
		}

		if (local_root_signature_entry >= 0)
		{
			auto &entry = local_root_signature[local_root_signature_entry];
			if (entry.type == LocalRootSignatureType::Table)
			{
				if (!vulkan_binding.buffer_binding.bindless.use_heap)
				{
					LOGE("Table SBT entries must be bindless.\n");
					return false;
				}

				if (!var_meta.is_lib_variable)
				{
					LOGE("Local root signature requires global lib variables.\n");
					return false;
				}

				uint32_t heap_offset = local_table_entry.offset_in_heap;
				heap_offset += bind_register - local_table_entry.register_index;

				auto &ref = uav_index_to_reference[index];
				if (aliased_access.requires_alias_decoration)
				{
					ref.var_alias_group = create_bindless_heap_variable_alias_group(
						bindless_info, aliased_access.raw_declarations);
				}
				else if (aliased_access.override_primary_component_types)
				{
					auto tmp_info = bindless_info;
					tmp_info.component = aliased_access.primary_component_type;
					tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize;
					ref.var_id = create_bindless_heap_variable(tmp_info);
				}
				else
				{
					ref.var_id = create_bindless_heap_variable(bindless_info);
				}

				ref.aliased = aliased_access.requires_alias_decoration;
				ref.base_offset = heap_offset;
				ref.stride = stride;
				ref.bindless = true;
				ref.base_resource_is_array = range_size != 1;
				ref.local_root_signature_entry = local_root_signature_entry;
				ref.resource_kind = resource_kind;
				ref.vkmm = vkmm;

				if (has_counter)
				{
					if (!vulkan_binding.counter_binding.bindless.use_heap)
					{
						LOGE("Table SBT entries must be bindless.\n");
						return false;
					}

					heap_offset = local_table_entry.offset_in_heap;
					heap_offset += bind_register - local_table_entry.register_index;
					spv::Id counter_var_id = create_bindless_heap_variable(counter_info);

					auto &counter_ref = uav_index_to_counter[index];
					counter_ref.var_id = counter_var_id;
					counter_ref.base_offset = heap_offset;
					counter_ref.stride = 4;
					counter_ref.bindless = true;
					counter_ref.base_resource_is_array = range_size != 1;
					counter_ref.local_root_signature_entry = local_root_signature_entry;
					// Signals the underlying type of the counter buffer.
					counter_ref.resource_kind =
						counter_info.counters ? DXIL::ResourceKind::RawBuffer : DXIL::ResourceKind::TypedBuffer;
				}
			}
			else
			{
				// Otherwise, we simply refer to the SBT directly to obtain a pointer.
				if (resource_kind != DXIL::ResourceKind::RawBuffer &&
				    resource_kind != DXIL::ResourceKind::StructuredBuffer)
				{
					LOGE("UAV SBT root descriptors must be raw buffers or structures buffers.\n");
					return false;
				}

				auto &ref = uav_index_to_reference[index];
				ref.var_id = shader_record_buffer_id;
				ref.stride = stride;
				ref.local_root_signature_entry = local_root_signature_entry;
				ref.resource_kind = resource_kind;
				ref.vkmm = vkmm;

				if (range_size != 1)
				{
					LOGE("Cannot use descriptor array for root descriptors.\n");
					return false;
				}
			}
		}
		else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::BufferDeviceAddress)
		{
			if (resource_kind != DXIL::ResourceKind::RawBuffer &&
			    resource_kind != DXIL::ResourceKind::StructuredBuffer)
			{
				LOGE("BDA root descriptors must be raw buffers or structured buffers.\n");
				return false;
			}

			auto &ref = uav_index_to_reference[index];
			ref.var_id = root_constant_id;
			ref.root_descriptor = true;
			ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index;
			ref.coherent = globally_coherent;
			ref.rov = is_rov;
			ref.stride = stride;
			ref.resource_kind = resource_kind;
			ref.vkmm = vkmm;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("UAV", ref.push_constant_member, 8, true);

			if (range_size != 1)
			{
				LOGE("Cannot use descriptor array for root descriptors.\n");
				return false;
			}
		}
		else if (vulkan_binding.buffer_binding.bindless.use_heap)
		{
			// DXIL already applies the t# register offset to any dynamic index, so counteract that here.
			// The exception is with lib_* where we access resources by variable, not through
			// createResource() >_____<.
			uint32_t heap_offset = vulkan_binding.buffer_binding.bindless.heap_root_offset;
			if (range_size != 1 && !var_meta.is_lib_variable)
				heap_offset -= bind_register;

			auto &ref = uav_index_to_reference[index];
			if (aliased_access.requires_alias_decoration)
			{
				ref.var_alias_group = create_bindless_heap_variable_alias_group(
					bindless_info, aliased_access.raw_declarations);
			}
			else if (aliased_access.override_primary_component_types)
			{
				auto tmp_info = bindless_info;
				tmp_info.component = aliased_access.primary_component_type;
				tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize;
				ref.var_id = create_bindless_heap_variable(tmp_info);
			}
			else
			{
				ref.var_id = create_bindless_heap_variable(bindless_info);
			}

			ref.aliased = aliased_access.requires_alias_decoration;
			ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index + root_descriptor_count;
			ref.base_offset = heap_offset;
			ref.stride = stride;
			ref.bindless = true;
			ref.coherent = globally_coherent;
			ref.rov = is_rov;
			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = resource_kind;
			ref.vkmm = vkmm;

			if (options.extended_non_semantic_info)
			{
				emit_root_parameter_index_from_push_index(
					"ResourceTable", vulkan_binding.buffer_binding.root_constant_index, 4, false);
			}

			if (has_counter)
			{
				if (vulkan_binding.counter_binding.bindless.use_heap)
				{
					spv::Id counter_var_id = create_bindless_heap_variable(counter_info);

					heap_offset = vulkan_binding.counter_binding.bindless.heap_root_offset;
					if (range_size != 1 && !var_meta.is_lib_variable)
						heap_offset -= bind_register;

					auto &counter_ref = uav_index_to_counter[index];
					counter_ref.var_id = counter_var_id;
					counter_ref.push_constant_member = vulkan_binding.counter_binding.root_constant_index + root_descriptor_count;
					counter_ref.base_offset = heap_offset;
					counter_ref.stride = 4;
					counter_ref.bindless = true;
					counter_ref.base_resource_is_array = range_size != 1;

					// Signals the underlying type of the counter buffer.
					counter_ref.resource_kind = counter_info.kind;
				}
				else
				{
					LOGE("If base UAV uses bindless heap, UAV counter must also do so.\n");
					return false;
				}
			}
		}
		else
		{
			spv::Id var_id = 0;
			Vector<RawDeclarationVariable> var_alias_group;
			spv::StorageClass storage;

			if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO)
			{
				// TODO: Consider implementing aliased buffers which all refer to the same buffer,
				// but which can exploit alignment per-instruction.
				// This is impractical, since BufferLoad/Store in DXIL does not have alignment (4 bytes is assumed),
				// so just unroll.
				// To make good use of this, we'll need apps to use SM 6.2 RawBufferLoad/Store, which does have explicit alignment.
				// We'll likely need to mess around with Aliased decoration as well, which might have other effects ...

				storage = spv::StorageClassStorageBuffer;

				if (aliased_access.requires_alias_decoration)
					var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name);
				else
				{
					assert(aliased_access.raw_declarations.size() == 1);
					var_id = create_raw_ssbo_variable(aliased_access.raw_declarations.front(), range_size, name);
				}
			}
			else
			{
				// Treat default as texel buffer, as it's the more compatible way of implementing buffer types in DXIL.
				auto element_type_id = get_type_id(effective_component_type, 1, 1);

				spv::Id type_id =
				    builder.makeImageType(element_type_id, image_dimension_from_resource_kind(resource_kind), false,
				                          image_dimension_is_arrayed(resource_kind),
				                          image_dimension_is_multisampled(resource_kind), 2, format);

				if (range_size != 1)
				{
					if (range_size == ~0u)
						type_id = builder.makeRuntimeArray(type_id);
					else
						type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
				}

				storage = spv::StorageClassUniformConstant;
				var_id = create_variable(storage, type_id,
				                         name.empty() ? nullptr : name.c_str());

				if (actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type))
					builder.addDecoration(var_id, spv::DecorationRelaxedPrecision);
			}

			auto &ref = uav_index_to_reference[index];
			ref.var_id = var_id;
			ref.var_alias_group = std::move(var_alias_group);
			ref.aliased = aliased_access.requires_alias_decoration;
			ref.stride = stride;
			ref.coherent = globally_coherent;
			ref.rov = is_rov;
			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = resource_kind;
			ref.vkmm = vkmm;

			// Counteract any offsets.
			if (range_size != 1 && !var_meta.is_lib_variable)
				ref.base_offset = -bind_register;

			const auto decorate_variable = [&](spv::Id id) {
				builder.addDecoration(id, spv::DecorationDescriptorSet, vulkan_binding.buffer_binding.descriptor_set);
				builder.addDecoration(id, spv::DecorationBinding, vulkan_binding.buffer_binding.binding);
				if (!access_meta.has_read)
					builder.addDecoration(id, spv::DecorationNonReadable);
				if (!access_meta.has_written)
					builder.addDecoration(id, spv::DecorationNonWritable);
				if (globally_coherent && execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
					builder.addDecoration(id, spv::DecorationCoherent);
				if (aliased_access.requires_alias_decoration)
					builder.addDecoration(id, spv::DecorationAliased);
			};

			if (var_id)
				decorate_variable(var_id);
			for (auto &var : ref.var_alias_group)
				decorate_variable(var.var_id);

			spv::Id counter_var_id = 0;
			if (has_counter)
			{
				if (vulkan_binding.counter_binding.bindless.use_heap)
				{
					LOGE("Cannot use bindless UAV counters along with non-bindless UAVs.\n");
					return false;
				}

				spv::StorageClass counter_storage;
				spv::Id type_id;

				if (vulkan_binding.counter_binding.descriptor_type == VulkanDescriptorType::SSBO)
				{
					spv::Id uint_type = builder.makeUintType(32);
					type_id = builder.makeStructType({ uint_type }, "AtomicCounterSSBO");
					builder.addDecoration(type_id, spv::DecorationBlock);
					builder.addMemberName(type_id, 0, "counter");
					builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
					counter_storage = spv::StorageClassStorageBuffer;
				}
				else
				{
					// Treat default as texel buffer, as it's the more compatible way of implementing buffer types in DXIL.
					auto element_type_id = get_type_id(DXIL::ComponentType::U32, 1, 1);
					type_id = builder.makeImageType(
						element_type_id, spv::DimBuffer, false, false, false, 2, format);
					counter_storage = spv::StorageClassUniformConstant;
				}

				if (range_size != 1)
				{
					if (range_size == ~0u)
						type_id = builder.makeRuntimeArray(type_id);
					else
						type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
				}

				counter_var_id = create_variable(counter_storage, type_id, name.empty() ? nullptr : (name + "Counter").c_str());

				builder.addDecoration(counter_var_id, spv::DecorationDescriptorSet,
				                      vulkan_binding.counter_binding.descriptor_set);
				builder.addDecoration(counter_var_id, spv::DecorationBinding, vulkan_binding.counter_binding.binding);

				auto &counter_ref = uav_index_to_counter[index];
				counter_ref.var_id = counter_var_id;
				counter_ref.stride = 4;
				counter_ref.base_resource_is_array = range_size != 1;
				counter_ref.resource_kind = counter_storage == spv::StorageClassStorageBuffer ?
					DXIL::ResourceKind::RawBuffer : DXIL::ResourceKind::TypedBuffer;
			}

			if (var_id)
			{
				auto &meta = handle_to_resource_meta[var_id];
				meta = {};
				meta.kind = resource_kind;
				meta.stride = stride;
				meta.var_id = var_id;
				meta.storage = storage;
				meta.component_type = actual_component_type;
				meta.vkmm = vkmm;

				if (aliased_access.override_primary_component_types)
				{
					meta.component_type = aliased_access.primary_component_type;
					meta.raw_component_vecsize = aliased_access.primary_raw_vecsize;
				}
			}

			for (auto &var : ref.var_alias_group)
			{
				auto &meta = handle_to_resource_meta[var.var_id];
				meta = {};
				meta.kind = resource_kind;
				meta.stride = stride;
				meta.var_id = var.var_id;
				meta.storage = storage;
				meta.component_type = raw_width_to_component_type(var.declaration.type, var.declaration.width);
				meta.raw_component_vecsize = var.declaration.vecsize;
				meta.vkmm = vkmm;
			}
		}
	}

	return true;
}

bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs, const llvm::MDNode *refl)
{
	auto &builder = spirv_module.get_builder();
	unsigned num_cbvs = cbvs->getNumOperands();

	for (unsigned i = 0; i < num_cbvs; i++)
	{
		auto *cbv = llvm::cast<llvm::MDNode>(cbvs->getOperand(i));
		auto var_meta = get_resource_variable_meta(cbv);
		if (!var_meta.is_active)
			continue;

		unsigned index = get_constant_metadata(cbv, 0);
		auto name = get_resource_name_metadata(cbv, refl);
		unsigned bind_space = get_constant_metadata(cbv, 3);
		unsigned bind_register = get_constant_metadata(cbv, 4);
		unsigned range_size = get_constant_metadata(cbv, 5);
		unsigned cbv_size = get_constant_metadata(cbv, 6);

		if (bind_register == UINT32_MAX && bind_space == UINT32_MAX)
		{
			// This seems to be possible in RT shaders when explicit register() is missing?
			LOGE("Nonsensical CBV binding detected.\n");
			return false;
		}

		DescriptorTableEntry local_table_entry = {};
		int local_root_signature_entry = get_local_root_signature_entry(
		    ResourceClass::CBV, bind_space, bind_register, local_table_entry);
		bool need_resource_remapping = local_root_signature_entry < 0 ||
		                               local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table;

		D3DBinding d3d_binding = { get_remapping_stage(execution_model),
			                       DXIL::ResourceKind::CBuffer,
			                       index,
			                       bind_space,
			                       bind_register,
			                       range_size, 0 };
		VulkanCBVBinding vulkan_binding = {};
		vulkan_binding.buffer = { bind_space, bind_register };
		if (need_resource_remapping && resource_mapping_iface &&
		    !resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding))
		{
			// We may be rejected if the unbound range has 1 non-bindless descriptor.
			bool retry = d3d_binding.range_size == UINT32_MAX;
			if (retry)
			{
				d3d_binding.range_size = 1;
				range_size = 1;
			}

			if (!retry || !resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding))
			{
				LOGE("Failed to remap CBV %u:%u.\n", bind_space, bind_register);
				return false;
			}
		}

		auto &access_meta = cbv_access_tracking[index];
		AliasedAccess aliased_access;
		if (!analyze_aliased_access(access_meta, VulkanDescriptorType::UBO, aliased_access))
			return false;

		cbv_index_to_reference.resize(std::max(cbv_index_to_reference.size(), size_t(index + 1)));

		if (range_size != 1)
		{
			if (range_size == ~0u)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);
			}

			if (vulkan_binding.buffer.bindless.use_heap && options.bindless_cbv_ssbo_emulation)
				builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing);
			else
				builder.addCapability(spv::CapabilityUniformBufferArrayDynamicIndexing);
		}

		BindlessInfo bindless_info = {};
		bindless_info.type = DXIL::ResourceType::CBV;
		bindless_info.kind = DXIL::ResourceKind::CBuffer;
		bindless_info.desc_set = vulkan_binding.buffer.descriptor_set;
		bindless_info.binding = vulkan_binding.buffer.binding;
		bindless_info.component = aliased_access.primary_component_type;
		bindless_info.raw_vecsize = aliased_access.primary_raw_vecsize;

		if (local_root_signature_entry >= 0)
		{
			auto &entry = local_root_signature[local_root_signature_entry];
			if (entry.type == LocalRootSignatureType::Table)
			{
				if (!vulkan_binding.buffer.bindless.use_heap)
				{
					LOGE("Table SBT entries must be bindless.\n");
					return false;
				}

				uint32_t heap_offset = local_table_entry.offset_in_heap;
				heap_offset += bind_register - local_table_entry.register_index;

				if (!var_meta.is_lib_variable)
				{
					LOGE("Local root signature requires global lib variables.\n");
					return false;
				}

				auto &ref = cbv_index_to_reference[index];

				if (aliased_access.requires_alias_decoration)
				{
					ref.var_alias_group = create_bindless_heap_variable_alias_group(
							bindless_info, aliased_access.raw_declarations);
				}
				else
				{
					ref.var_id = create_bindless_heap_variable(bindless_info);
				}

				ref.base_offset = heap_offset;
				ref.base_resource_is_array = range_size != 1;
				ref.bindless = true;
				ref.local_root_signature_entry = local_root_signature_entry;
				ref.resource_kind = DXIL::ResourceKind::CBuffer;
			}
			else
			{
				auto &ref = cbv_index_to_reference[index];
				ref.var_id = shader_record_buffer_id;
				ref.local_root_signature_entry = local_root_signature_entry;
				ref.resource_kind = DXIL::ResourceKind::CBuffer;

				if (range_size != 1)
				{
					LOGE("Cannot use descriptor array for root descriptors.\n");
					return false;
				}
			}
		}
		else if (vulkan_binding.push_constant)
		{
			if (root_constant_id == 0)
			{
				LOGE("Must have setup push constant block to use root constant path.\n");
				return false;
			}

			auto &ref = cbv_index_to_reference[index];
			ref.var_id = root_constant_id;
			ref.push_constant_member = vulkan_binding.push.offset_in_words + root_descriptor_count;
			ref.resource_kind = DXIL::ResourceKind::CBuffer;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("Constant", vulkan_binding.push.offset_in_words, cbv_size, false);
		}
		else if (vulkan_binding.buffer.descriptor_type == VulkanDescriptorType::BufferDeviceAddress)
		{
			auto &ref = cbv_index_to_reference[index];
			ref.var_id = root_constant_id;
			ref.root_descriptor = true;
			ref.push_constant_member = vulkan_binding.buffer.root_constant_index;
			ref.resource_kind = DXIL::ResourceKind::CBuffer;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("CBV", ref.push_constant_member, 8, true);

			if (range_size != 1)
			{
				LOGE("Cannot use descriptor array for root descriptors.\n");
				return false;
			}
		}
		else if (vulkan_binding.buffer.bindless.use_heap)
		{
			// DXIL already applies the t# register offset to any dynamic index, so counteract that here.
			// The exception is with lib_* where we access resources by variable, not through
			// createResource() >_____<.
			uint32_t heap_offset = vulkan_binding.buffer.bindless.heap_root_offset;
			if (range_size != 1 && !var_meta.is_lib_variable)
				heap_offset -= bind_register;

			auto &ref = cbv_index_to_reference[index];

			if (aliased_access.requires_alias_decoration)
			{
				ref.var_alias_group = create_bindless_heap_variable_alias_group(
						bindless_info, aliased_access.raw_declarations);
			}
			else
			{
				ref.var_id = create_bindless_heap_variable(bindless_info);
			}

			ref.push_constant_member = vulkan_binding.buffer.root_constant_index + root_descriptor_count;
			ref.base_offset = heap_offset;
			ref.base_resource_is_array = range_size != 1;
			ref.bindless = true;
			ref.resource_kind = DXIL::ResourceKind::CBuffer;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("ResourceTable", vulkan_binding.buffer.root_constant_index, 4, false);
		}
		else
		{
			auto &ref = cbv_index_to_reference[index];

			if (aliased_access.requires_alias_decoration)
			{
				ref.var_alias_group = create_ubo_variable_alias_group(
						aliased_access.raw_declarations, range_size, name, cbv_size);
			}
			else
			{
				assert(aliased_access.raw_declarations.size() == 1);
				ref.var_id = create_ubo_variable(aliased_access.raw_declarations.front(), range_size, name, cbv_size);
			}

			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = DXIL::ResourceKind::CBuffer;

			// Counteract any offsets.
			if (range_size != 1 && !var_meta.is_lib_variable)
				ref.base_offset = -bind_register;

			if (ref.var_id)
			{
				auto &meta = handle_to_resource_meta[ref.var_id];
				meta = {};
				meta.kind = ref.resource_kind;
				meta.var_id = ref.var_id;
				meta.storage = spv::StorageClassUniform;
				meta.component_type = aliased_access.primary_component_type;
				meta.raw_component_vecsize = aliased_access.primary_raw_vecsize;
				builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set);
				builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding);
			}

			for (auto &var : ref.var_alias_group)
			{
				auto &meta = handle_to_resource_meta[var.var_id];
				meta = {};
				meta.kind = ref.resource_kind;
				meta.var_id = var.var_id;
				meta.storage = spv::StorageClassUniform;
				meta.component_type = raw_width_to_component_type(var.declaration.type, var.declaration.width);
				meta.raw_component_vecsize = var.declaration.vecsize;
				builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set);
				builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding);
			}

            if (options.extended_non_semantic_info)
            {
                emit_root_parameter_index_from_push_index("PushCBV",
                        Converter::pack_desc_set_binding_to_virtual_offset(
                                vulkan_binding.buffer.descriptor_set, vulkan_binding.buffer.binding), 0, false);
            }
		}
	}

	return true;
}

bool Converter::Impl::emit_samplers(const llvm::MDNode *samplers, const llvm::MDNode *refl)
{
	auto &builder = spirv_module.get_builder();
	unsigned num_samplers = samplers->getNumOperands();

	for (unsigned i = 0; i < num_samplers; i++)
	{
		auto *sampler = llvm::cast<llvm::MDNode>(samplers->getOperand(i));

		auto var_meta = get_resource_variable_meta(sampler);
		if (!var_meta.is_active)
			continue;

		unsigned index = get_constant_metadata(sampler, 0);
		auto name = get_resource_name_metadata(sampler, refl);
		unsigned bind_space = get_constant_metadata(sampler, 3);
		unsigned bind_register = get_constant_metadata(sampler, 4);
		unsigned range_size = get_constant_metadata(sampler, 5);

		if (bind_register == UINT32_MAX && bind_space == UINT32_MAX)
		{
			// This seems to be possible in RT shaders when explicit register() is missing?
			LOGE("Nonsensical Sampler binding detected.\n");
			return false;
		}

		if (range_size != 1)
		{
			if (range_size == ~0u)
			{
				builder.addExtension("SPV_EXT_descriptor_indexing");
				builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);
			}

			// This capability also covers samplers.
			builder.addCapability(spv::CapabilitySampledImageArrayDynamicIndexing);
		}

		DescriptorTableEntry local_table_entry = {};
		int local_root_signature_entry = get_local_root_signature_entry(
			ResourceClass::Sampler, bind_space, bind_register, local_table_entry);
		bool need_resource_remapping = local_root_signature_entry < 0 ||
		                               local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table;

		D3DBinding d3d_binding = { get_remapping_stage(execution_model),
			                       DXIL::ResourceKind::Sampler,
			                       index,
			                       bind_space,
			                       bind_register,
			                       range_size, 0 };
		VulkanBinding vulkan_binding = { bind_space, bind_register };
		if (need_resource_remapping && resource_mapping_iface &&
		    !resource_mapping_iface->remap_sampler(d3d_binding, vulkan_binding))
		{
			// We may be rejected if the unbound range has 1 non-bindless descriptor.
			bool retry = d3d_binding.range_size == UINT32_MAX;
			if (retry)
			{
				d3d_binding.range_size = 1;
				range_size = 1;
			}

			if (!retry || !resource_mapping_iface->remap_sampler(d3d_binding, vulkan_binding))
			{
				LOGE("Failed to remap sampler %u:%u.\n", bind_space, bind_register);
				return false;
			}
		}

		sampler_index_to_reference.resize(std::max(sampler_index_to_reference.size(), size_t(index + 1)));

		BindlessInfo bindless_info = {};
		bindless_info.type = DXIL::ResourceType::Sampler;
		bindless_info.kind = DXIL::ResourceKind::Sampler;
		bindless_info.desc_set = vulkan_binding.descriptor_set;
		bindless_info.binding = vulkan_binding.binding;

		if (local_root_signature_entry >= 0)
		{
			// Samplers can only live in table entries.
			if (!vulkan_binding.bindless.use_heap)
			{
				LOGE("Table SBT entries must be bindless.\n");
				return false;
			}

			spv::Id var_id = create_bindless_heap_variable(bindless_info);

			uint32_t heap_offset = local_table_entry.offset_in_heap;
			heap_offset += bind_register - local_table_entry.register_index;

			if (!var_meta.is_lib_variable)
			{
				LOGE("Local root signature requires global lib variables.\n");
				return false;
			}

			auto &ref = sampler_index_to_reference[index];
			ref.var_id = var_id;
			ref.base_offset = heap_offset;
			ref.bindless = true;
			ref.local_root_signature_entry = local_root_signature_entry;
			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = DXIL::ResourceKind::Sampler;
		}
		else if (vulkan_binding.bindless.use_heap)
		{
			spv::Id var_id = create_bindless_heap_variable(bindless_info);

			// DXIL already applies the t# register offset to any dynamic index, so counteract that here.
			// The exception is with lib_* where we access resources by variable, not through
			// createResource() >_____<.
			uint32_t heap_offset = vulkan_binding.bindless.heap_root_offset;
			if (range_size != 1 && !var_meta.is_lib_variable)
				heap_offset -= bind_register;

			auto &ref = sampler_index_to_reference[index];
			ref.var_id = var_id;
			ref.push_constant_member = vulkan_binding.root_constant_index + root_descriptor_count;
			ref.base_offset = heap_offset;
			ref.bindless = true;
			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = DXIL::ResourceKind::Sampler;

			if (options.extended_non_semantic_info)
				emit_root_parameter_index_from_push_index("SamplerTable", vulkan_binding.root_constant_index, 4, false);
		}
		else
		{
			spv::Id type_id = builder.makeSamplerType();

			if (range_size != 1)
			{
				if (range_size == ~0u)
					type_id = builder.makeRuntimeArray(type_id);
				else
					type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
			}

			spv::Id var_id = create_variable(spv::StorageClassUniformConstant, type_id, name.empty() ? nullptr : name.c_str());

			builder.addDecoration(var_id, spv::DecorationDescriptorSet, vulkan_binding.descriptor_set);
			builder.addDecoration(var_id, spv::DecorationBinding, vulkan_binding.binding);

			auto &ref = sampler_index_to_reference[index];
			ref.var_id = var_id;
			ref.base_resource_is_array = range_size != 1;
			ref.resource_kind = DXIL::ResourceKind::Sampler;

			// Counteract any offsets.
			if (range_size != 1 && !var_meta.is_lib_variable)
				ref.base_offset = -bind_register;
		}
	}

	return true;
}

bool Converter::Impl::scan_srvs(ResourceRemappingInterface *iface, const llvm::MDNode *srvs, ShaderStage stage)
{
	unsigned num_srvs = srvs->getNumOperands();
	for (unsigned i = 0; i < num_srvs; i++)
	{
		auto *srv = llvm::cast<llvm::MDNode>(srvs->getOperand(i));
		unsigned index = get_constant_metadata(srv, 0);
		unsigned bind_space = get_constant_metadata(srv, 3);
		unsigned bind_register = get_constant_metadata(srv, 4);
		unsigned range_size = get_constant_metadata(srv, 5);
		auto resource_kind = static_cast<DXIL::ResourceKind>(get_constant_metadata(srv, 6));

		D3DBinding d3d_binding = { stage, resource_kind, index, bind_space, bind_register, range_size };
		VulkanSRVBinding vulkan_binding = {};
		if (iface && !iface->remap_srv(d3d_binding, vulkan_binding))
			return false;
	}

	return true;
}

bool Converter::Impl::scan_samplers(ResourceRemappingInterface *iface, const llvm::MDNode *samplers, ShaderStage stage)
{
	unsigned num_samplers = samplers->getNumOperands();
	for (unsigned i = 0; i < num_samplers; i++)
	{
		auto *sampler = llvm::cast<llvm::MDNode>(samplers->getOperand(i));
		unsigned index = get_constant_metadata(sampler, 0);
		unsigned bind_space = get_constant_metadata(sampler, 3);
		unsigned bind_register = get_constant_metadata(sampler, 4);
		unsigned range_size = get_constant_metadata(sampler, 5);

		D3DBinding d3d_binding = { stage, DXIL::ResourceKind::Sampler, index, bind_space, bind_register, range_size };
		VulkanBinding vulkan_binding = {};
		if (iface && !iface->remap_sampler(d3d_binding, vulkan_binding))
			return false;
	}

	return true;
}

bool Converter::Impl::scan_cbvs(ResourceRemappingInterface *iface, const llvm::MDNode *cbvs, ShaderStage stage)
{
	unsigned num_cbvs = cbvs->getNumOperands();
	for (unsigned i = 0; i < num_cbvs; i++)
	{
		auto *cbv = llvm::cast<llvm::MDNode>(cbvs->getOperand(i));
		unsigned index = get_constant_metadata(cbv, 0);
		unsigned bind_space = get_constant_metadata(cbv, 3);
		unsigned bind_register = get_constant_metadata(cbv, 4);
		unsigned range_size = get_constant_metadata(cbv, 5);

		D3DBinding d3d_binding = { stage, DXIL::ResourceKind::CBuffer, index, bind_space, bind_register, range_size };
		VulkanCBVBinding vulkan_binding = {};
		if (iface && !iface->remap_cbv(d3d_binding, vulkan_binding))
			return false;
	}

	return true;
}

bool Converter::Impl::scan_uavs(ResourceRemappingInterface *iface, const llvm::MDNode *uavs, ShaderStage stage)
{
	unsigned num_uavs = uavs->getNumOperands();
	for (unsigned i = 0; i < num_uavs; i++)
	{
		auto *uav = llvm::cast<llvm::MDNode>(uavs->getOperand(i));
		unsigned index = get_constant_metadata(uav, 0);
		unsigned bind_space = get_constant_metadata(uav, 3);
		unsigned bind_register = get_constant_metadata(uav, 4);
		unsigned range_size = get_constant_metadata(uav, 5);
		auto resource_kind = static_cast<DXIL::ResourceKind>(get_constant_metadata(uav, 6));
		bool has_counter = get_constant_metadata(uav, 8) != 0;

		D3DUAVBinding d3d_binding = { { stage, resource_kind, index, bind_space, bind_register, range_size },
			                          has_counter };
		VulkanUAVBinding vulkan_binding = {};
		if (iface && !iface->remap_uav(d3d_binding, vulkan_binding))
			return false;
	}

	return true;
}

bool Converter::Impl::require_arrayed_root_constants() const
{
	if (!resource_mapping_iface)
		return false;

	auto &module = bitcode_parser.get_module();
	auto *resource_meta = module.getNamedMetadata("dx.resources");
	if (!resource_meta)
		return false;

	auto *metas = resource_meta->getOperand(0);
	if (!metas->getOperand(2))
		return false;

	auto *cbvs = llvm::dyn_cast<llvm::MDNode>(metas->getOperand(2));
	if (!cbvs)
		return false;

	unsigned num_cbvs = cbvs->getNumOperands();
	for (unsigned i = 0; i < num_cbvs; i++)
	{
		auto *cbv = llvm::cast<llvm::MDNode>(cbvs->getOperand(i));
		auto var_meta = get_resource_variable_meta(cbv);
		if (!var_meta.is_active)
			continue;

		unsigned index = get_constant_metadata(cbv, 0);

		auto itr = cbv_access_tracking.find(index);
		if (itr == cbv_access_tracking.end())
			continue;

		unsigned bind_space = get_constant_metadata(cbv, 3);
		unsigned bind_register = get_constant_metadata(cbv, 4);

		DescriptorTableEntry local_table_entry = {};
		int local_root_signature_entry = get_local_root_signature_entry(
		    ResourceClass::CBV, bind_space, bind_register, local_table_entry);
		if (local_root_signature_entry >= 0)
			continue;

		D3DBinding d3d_binding = { get_remapping_stage(execution_model),
			                       DXIL::ResourceKind::CBuffer,
			                       index,
			                       bind_space,
			                       bind_register,
			                       UINT32_MAX, 0 };
		VulkanCBVBinding vulkan_binding = {};
		vulkan_binding.buffer = { bind_space, bind_register };
		if (!resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding))
			continue;
		if (!vulkan_binding.push_constant)
			continue;

		if (itr->second.dynamically_indexed_cbv)
			return true;
	}

	return false;
}

void Converter::Impl::emit_root_constants(unsigned num_descriptors, unsigned num_constant_words)
{
	auto &builder = spirv_module.get_builder();

	bool array_root_constants = require_arrayed_root_constants();

	// Root constants cannot be dynamically indexed in DXIL, so emit them as members.
	Vector<spv::Id> members((array_root_constants ? 1 : num_constant_words) + num_descriptors);

	// Emit root descriptors as u32x2 to work around missing SGPR promotion on RADV.
	for (unsigned i = 0; i < num_descriptors; i++)
		members[i] = builder.makeVectorType(builder.makeUintType(32), 2);

	if (array_root_constants)
	{
		spv::Id type_id = builder.makeUintType(32);
		type_id = builder.makeArrayType(type_id, builder.makeUintConstant(num_constant_words), 4);
		builder.addDecoration(type_id, spv::DecorationArrayStride, 4);
		members[num_descriptors] = type_id;
	}
	else
	{
		for (unsigned i = 0; i < num_constant_words; i++)
			members[i + num_descriptors] = builder.makeUintType(32);
	}

	spv::Id type_id = get_struct_type(members, 0, "RootConstants");
	builder.addDecoration(type_id, spv::DecorationBlock);

	for (unsigned i = 0; i < num_descriptors; i++)
		builder.addMemberDecoration(type_id, i, spv::DecorationOffset, sizeof(uint64_t) * i);

	for (unsigned i = 0; i < (array_root_constants ? 1 : num_constant_words); i++)
	{
		builder.addMemberDecoration(type_id, i + num_descriptors, spv::DecorationOffset,
		                            sizeof(uint64_t) * num_descriptors + sizeof(uint32_t) * i);
	}

	if (array_root_constants)
		builder.addMemberName(type_id, num_descriptors, "root_constants_and_tables");

	if (options.inline_ubo_enable)
	{
		root_constant_id = create_variable(spv::StorageClassUniform, type_id, "registers");
		builder.addDecoration(root_constant_id, spv::DecorationDescriptorSet, options.inline_ubo_descriptor_set);
		builder.addDecoration(root_constant_id, spv::DecorationBinding, options.inline_ubo_descriptor_binding);
	}
	else
		root_constant_id = create_variable(spv::StorageClassPushConstant, type_id, "registers");

	root_descriptor_count = num_descriptors;
	root_constant_num_words = num_constant_words;
	root_constant_arrayed = array_root_constants;
}

static bool execution_model_is_ray_tracing(spv::ExecutionModel model)
{
	switch (model)
	{
	case spv::ExecutionModelRayGenerationKHR:
	case spv::ExecutionModelCallableKHR:
	case spv::ExecutionModelIntersectionKHR:
	case spv::ExecutionModelMissKHR:
	case spv::ExecutionModelClosestHitKHR:
	case spv::ExecutionModelAnyHitKHR:
		return true;

	default:
		return false;
	}
}

spv::Id Converter::Impl::emit_shader_record_buffer_block_type(bool physical_storage)
{
	if (local_root_signature.empty())
		return 0;

	auto &builder = spirv_module.get_builder();

	spv::Id type_id;
	Vector<spv::Id> member_types;
	Vector<uint32_t> offsets;
	member_types.reserve(local_root_signature.size());
	offsets.reserve(local_root_signature.size());
	shader_record_buffer_types.reserve(local_root_signature.size());

	uint32_t current_offset = 0;
	for (auto &elem : local_root_signature)
	{
		switch (elem.type)
		{
		case LocalRootSignatureType::Constants:
		{
			spv::Id array_size_id = builder.makeUintConstant(elem.constants.num_words);
			spv::Id u32_type = builder.makeUintType(32);
			spv::Id member_type_id =
			    builder.makeArrayType(u32_type, array_size_id, 4);
			builder.addDecoration(member_type_id, spv::DecorationArrayStride, 4);
			member_types.push_back(member_type_id);
			offsets.push_back(current_offset);
			current_offset += 4 * elem.constants.num_words;
			shader_record_buffer_types.push_back(member_type_id);
			break;
		}

		case LocalRootSignatureType::Descriptor:
		{
			// A 64-bit integer which we will bitcast to a physical storage buffer later.
			// Emit it as u32x2 as otherwise we don't get SGPR promotion on ACO as of right now.
			spv::Id member_type_id = builder.makeVectorType(builder.makeUintType(32), 2);
			member_types.push_back(member_type_id);
			current_offset = (current_offset + 7) & ~7;
			offsets.push_back(current_offset);
			current_offset += 8;
			shader_record_buffer_types.push_back(member_type_id);
			break;
		}

		case LocalRootSignatureType::Table:
		{
			spv::Id member_type_id = builder.makeVectorType(builder.makeUintType(32), 2);
			member_types.push_back(member_type_id);
			current_offset = (current_offset + 7) & ~7;
			offsets.push_back(current_offset);
			current_offset += 8;
			shader_record_buffer_types.push_back(member_type_id);
			break;
		}

		default:
			return false;
		}
	}

	type_id = get_struct_type(member_types, 0, "SBTBlock");
	builder.addDecoration(type_id, spv::DecorationBlock);

	for (size_t i = 0; i < local_root_signature.size(); i++)
	{
		builder.addMemberDecoration(type_id, i, spv::DecorationOffset, offsets[i]);
		if (physical_storage)
			builder.addMemberDecoration(type_id, i, spv::DecorationNonWritable);
	}

	return type_id;
}

bool Converter::Impl::emit_shader_record_buffer()
{
	spv::Id type_id = emit_shader_record_buffer_block_type(false);
	if (type_id)
		shader_record_buffer_id = create_variable(spv::StorageClassShaderRecordBufferKHR, type_id, "SBT");
	return true;
}

static bool local_root_signature_matches(const LocalRootSignatureEntry &entry,
                                         ResourceClass resource_class,
                                         uint32_t space, uint32_t binding,
                                         DescriptorTableEntry &local_table_entry)
{
	switch (entry.type)
	{
	case LocalRootSignatureType::Constants:
		return resource_class == ResourceClass::CBV &&
		       entry.constants.register_space == space &&
		       entry.constants.register_index == binding;

	case LocalRootSignatureType::Descriptor:
		return entry.descriptor.type == resource_class &&
		       entry.descriptor.register_space == space &&
		       entry.descriptor.register_index == binding;

	case LocalRootSignatureType::Table:
		for (auto &table_entry : entry.table_entries)
		{
			if (table_entry.type == resource_class && table_entry.register_space == space &&
			    table_entry.register_index <= binding &&
			    ((table_entry.num_descriptors_in_range == ~0u) ||
			     ((binding - table_entry.register_index) < table_entry.num_descriptors_in_range)))
			{
				local_table_entry = table_entry;
				return true;
			}
		}
		return false;

	default:
		return false;
	}
}

int Converter::Impl::get_local_root_signature_entry(ResourceClass resource_class, uint32_t space, uint32_t binding,
                                                    DescriptorTableEntry &local_table_entry) const
{
	auto itr = std::find_if(local_root_signature.begin(), local_root_signature.end(), [&](const LocalRootSignatureEntry &entry) {
		return local_root_signature_matches(entry, resource_class, space, binding, local_table_entry);
	});

	if (itr != local_root_signature.end())
		return int(itr - local_root_signature.begin());
	else
		return -1;
}

bool Converter::Impl::emit_resources_global_mapping()
{
	auto &module = bitcode_parser.get_module();
	auto *resource_meta = module.getNamedMetadata("dx.resources");
	if (!resource_meta)
		return true;

	auto *metas = resource_meta->getOperand(0);

	if (metas->getOperand(0))
		if (!emit_resources_global_mapping(DXIL::ResourceType::SRV, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(0))))
			return false;
	if (metas->getOperand(1))
		if (!emit_resources_global_mapping(DXIL::ResourceType::UAV, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(1))))
			return false;
	if (metas->getOperand(2))
		if (!emit_resources_global_mapping(DXIL::ResourceType::CBV, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(2))))
			return false;
	if (metas->getOperand(3))
		if (!emit_resources_global_mapping(DXIL::ResourceType::Sampler, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(3))))
			return false;

	return true;
}

void Converter::Impl::get_shader_model(const llvm::Module &module, String *model, uint32_t *major, uint32_t *minor)
{
	auto *resource_meta = module.getNamedMetadata("dx.shaderModel");
	if (!resource_meta)
	{
		if (major)
			*major = 6;
		if (minor)
			*minor = 0;
		if (model)
			model->clear();
	}
	else
	{
		auto *meta = resource_meta->getOperand(0);

		if (model)
			*model = get_string_metadata(meta, 0);
		if (major)
			*major = get_constant_metadata(meta, 1);
		if (minor)
			*minor = get_constant_metadata(meta, 2);
	}
}

Converter::Impl::RawBufferMeta
Converter::Impl::get_raw_buffer_meta(DXIL::ResourceType resource_type, unsigned meta_index)
{
	auto &module = bitcode_parser.get_module();
	auto *resource_meta = module.getNamedMetadata("dx.resources");
	if (!resource_meta)
		return { DXIL::ResourceKind::Invalid, 0 };

	auto *metas = resource_meta->getOperand(0);
	auto &resource_list = metas->getOperand(uint32_t(resource_type));
	if (!resource_list)
		return { DXIL::ResourceKind::Invalid, 0 };

	auto *entries = llvm::cast<llvm::MDNode>(resource_list);
	unsigned num_entries = entries->getNumOperands();
	for (unsigned i = 0; i < num_entries; i++)
	{
		auto *entry = llvm::cast<llvm::MDNode>(entries->getOperand(i));
		if (get_constant_metadata(entry, 0) == meta_index)
		{
			RawBufferMeta meta = {};
			meta.kind = DXIL::ResourceKind(get_constant_metadata(entry, 6));

			unsigned tag_index = resource_type == DXIL::ResourceType::SRV ? 8 : 10;

			llvm::MDNode *tags = nullptr;
			if (entry->getNumOperands() > tag_index && entry->getOperand(tag_index))
				tags = llvm::dyn_cast<llvm::MDNode>(entry->getOperand(tag_index));
			if (tags)
				meta.stride = get_constant_metadata(tags, 1);
			return meta;
		}
	}

	return { DXIL::ResourceKind::Invalid, 0 };
}

uint32_t Converter::Impl::find_binding_meta_index(uint32_t binding_range_lo, uint32_t binding_range_hi,
                                                  uint32_t binding_space, DXIL::ResourceType resource_type)
{
	auto &module = bitcode_parser.get_module();
	auto *resource_meta = module.getNamedMetadata("dx.resources");
	if (!resource_meta)
		return UINT32_MAX;

	auto *metas = resource_meta->getOperand(0);
	auto &resource_list = metas->getOperand(uint32_t(resource_type));
	if (!resource_list)
		return UINT32_MAX;

	auto *entries = llvm::cast<llvm::MDNode>(resource_list);
	unsigned num_entries = entries->getNumOperands();
	for (unsigned i = 0; i < num_entries; i++)
	{
		auto *entry = llvm::cast<llvm::MDNode>(entries->getOperand(i));
		uint32_t index = get_constant_metadata(entry, 0);
		uint32_t bind_space = get_constant_metadata(entry, 3);
		uint32_t bind_register = get_constant_metadata(entry, 4);
		uint32_t range_size = get_constant_metadata(entry, 5);

		if (binding_space != bind_space)
			continue;

		if (binding_range_lo >= bind_register &&
		    (range_size == UINT32_MAX || (binding_range_hi < bind_register + range_size)))
		{
			return index;
		}
	}

	return UINT32_MAX;
}

bool Converter::Impl::emit_descriptor_heap_size_ubo()
{
	spv::Id u32_type = builder().makeUintType(32);
	spv::Id block_type = builder().makeStructType({ u32_type }, "DescriptorHeapSizeUBO");
	builder().addMemberName(block_type, 0, "count");
	builder().addDecoration(block_type, spv::DecorationBlock);
	builder().addMemberDecoration(block_type, 0, spv::DecorationOffset, 0);

	auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::ResourceDescriptorHeapSize)];
	spv::Id var_id = create_variable(spv::StorageClassUniform, block_type, "DescriptorHeapSize");
	builder().addDecoration(var_id, spv::DecorationDescriptorSet, mapping.desc_set);
	builder().addDecoration(var_id, spv::DecorationBinding, mapping.desc_binding);

	instrumentation.descriptor_heap_size_var_id = var_id;
	return true;
}

bool Converter::Impl::emit_descriptor_heap_introspection_buffer()
{
	if (instrumentation.descriptor_heap_introspection_var_id != 0)
		return true;

	// We need to know the size of the descriptor heap. Rather than passing this
	// through a separate descriptor, we can just query the SSBO size of the
	// side-band SSBO. It is designed to have a size equal to the descriptor heap.
	// Somewhat hacky is that we can ask for a global heap of RTAS, which gets us this descriptor.
	VulkanSRVBinding vulkan_binding = {};

	auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::RawDescriptorHeapView)];

	if (mapping.kind != MetaDescriptorKind::ReadonlySSBO &&
	    mapping.kind != MetaDescriptorKind::UBOContainingBDA &&
	    mapping.kind != MetaDescriptorKind::Invalid)
		return false;

	bool use_full_descriptor = mapping.kind != MetaDescriptorKind::UBOContainingBDA;

	if (mapping.kind == MetaDescriptorKind::Invalid)
	{
		// Legacy proxy. The RTAS heap does what we want in the legacy model.
		D3DBinding d3d_binding = {
			get_remapping_stage(execution_model),
			DXIL::ResourceKind::RTAccelerationStructure,
			0,
			UINT32_MAX,
			UINT32_MAX,
			UINT32_MAX,
			0,
		};

		if (!resource_mapping_iface->remap_srv(d3d_binding, vulkan_binding))
			return false;

		if (vulkan_binding.buffer_binding.descriptor_type != VulkanDescriptorType::SSBO &&
		    vulkan_binding.buffer_binding.descriptor_type != VulkanDescriptorType::Identity)
		{
			LOGE("Dummy SSBO must be an SSBO.\n");
			return false;
		}
	}
	else
	{
		vulkan_binding.buffer_binding.descriptor_set = mapping.desc_set;
		vulkan_binding.buffer_binding.binding = mapping.desc_binding;
	}

	if (options.physical_address_descriptor_stride == 0)
	{
		LOGE("physical_address_descriptor_stride must be set.\n");
		return false;
	}

	spv::Id u32_type = builder().makeUintType(32);
	uint32_t elems = options.physical_address_descriptor_stride;

	if (options.instruction_instrumentation.enabled || !use_full_descriptor)
		u32_type = builder().makeVectorType(u32_type, 2);
	else
		elems *= 2;

	spv::Id u32_array_type = builder().makeArrayType(u32_type, builder().makeUintConstant(elems), 0);
	builder().addDecoration(u32_array_type, spv::DecorationArrayStride,
	                        options.instruction_instrumentation.enabled || !use_full_descriptor ? 8 : 4);

	spv::Id inner_struct_type = get_struct_type({ u32_array_type }, 0, "DescriptorHeapRawPayload");
	builder().addMemberDecoration(inner_struct_type, 0, spv::DecorationOffset, 0);

	spv::Id inner_struct_array_type = builder().makeRuntimeArray(inner_struct_type);
	builder().addDecoration(inner_struct_array_type, spv::DecorationArrayStride,
	                        8u * options.physical_address_descriptor_stride);

	bool sync_val =
		options.instruction_instrumentation.enabled &&
		options.instruction_instrumentation.type == InstructionInstrumentationType::BufferSynchronizationValidation;

	spv::Id block_type_id = get_struct_type(
		{ inner_struct_array_type }, 0, use_full_descriptor ? "DescriptorHeapRobustnessSSBO" : "DescriptorHeapRawBlock");
	builder().addDecoration(block_type_id, spv::DecorationBlock);
	builder().addMemberDecoration(block_type_id, 0, spv::DecorationOffset, 0);
	if (!sync_val)
	{
		builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonWritable);
		if (use_full_descriptor)
			builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonReadable);
	}
	builder().addMemberName(block_type_id, 0, "descriptors");

	spv::Id var_id;
	if (use_full_descriptor)
	{
		var_id = create_variable(spv::StorageClassStorageBuffer, block_type_id, "DescriptorHeapRobustness");
	}
	else
	{
		// Wrap the descriptor as a plain BDA.
		spv::Id ptr_type = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, block_type_id);
		spv::Id ubo_block_type = builder().makeStructType({ ptr_type }, "DescriptorHeapRawPayloadPtr");
		builder().addMemberName(ubo_block_type, 0, "ptr");
		builder().addMemberDecoration(ubo_block_type, 0, spv::DecorationOffset, 0);
		builder().addDecoration(ubo_block_type, spv::DecorationBlock);

		var_id = create_variable(spv::StorageClassUniform, ubo_block_type, "DescriptorHeapRaw");
		instrumentation.descriptor_heap_introspection_block_ptr_type_id = ptr_type;
	}

	builder().addDecoration(var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer_binding.descriptor_set);
	builder().addDecoration(var_id, spv::DecorationBinding, vulkan_binding.buffer_binding.binding);

	instrumentation.descriptor_heap_introspection_var_id = var_id;
	instrumentation.descriptor_heap_introspection_is_bda = !use_full_descriptor;

	if (sync_val)
	{
		instrumentation.invocation_id_var_id =
			create_variable(spv::StorageClassPrivate, builder().makeUintType(32), "InvocationID");
	}

	return true;
}

bool Converter::Impl::emit_global_heaps()
{
	Vector<AnnotateHandleReference *> annotations;
	for (auto &use : llvm_annotate_handle_uses)
		annotations.push_back(&use.second);

	// Ensure reproducible codegen since we iterate over an unordered map.
	std::sort(annotations.begin(), annotations.end(),
	          [](const AnnotateHandleReference *a, const AnnotateHandleReference *b) {
	              return a->ordinal < b->ordinal;
	          });

	for (auto *annotation : annotations)
	{
		BindlessInfo info = {};

		auto actual_component_type = DXIL::ComponentType::U32;
		info.format = spv::ImageFormatUnknown;
		if (annotation->resource_type != DXIL::ResourceType::CBV &&
		    annotation->resource_kind != DXIL::ResourceKind::RawBuffer &&
		    annotation->resource_kind != DXIL::ResourceKind::StructuredBuffer)
		{
			actual_component_type = normalize_component_type(annotation->component_type);
			if (annotation->tracking.has_atomic_64bit)
			{
				// The component type in DXIL is u32, even if the resource itself is u64 in meta reflection data ...
				// This is also the case for signed components. Always use R64UI here.
				actual_component_type = DXIL::ComponentType::U64;
			}
		}
		else if (annotation->resource_type == DXIL::ResourceType::UAV)
		{
			info.format = spv::ImageFormatR32ui;
		}

		auto effective_component_type = get_effective_typed_resource_type(actual_component_type);

		info.type = annotation->resource_type;
		info.component = effective_component_type;
		info.kind = annotation->resource_kind;
		info.relaxed_precision = actual_component_type != effective_component_type &&
		                         component_type_is_16bit(actual_component_type);

		if (info.type == DXIL::ResourceType::UAV)
		{
			// See emit_uavs() for details around coherent and memory model shenanigans ...
			if (annotation->coherent)
				execution_mode_meta.declares_globallycoherent_uav = true;
			if (annotation->rov)
				execution_mode_meta.declares_rov = true;

			// Do not attempt to track read and write here to figure out if this resource in particular needs to be coherent.
			// It's plausible that the write and read can happen across
			// two different accesses to ResourceDescriptorHeap[]. Don't take any chances here ...
			if (shader_analysis.require_uav_thread_group_coherence &&
			    execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
			{
				annotation->coherent = true;
			}

			if (annotation->resource_kind == DXIL::ResourceKind::StructuredBuffer ||
			    annotation->resource_kind == DXIL::ResourceKind::RawBuffer)
			{
				// In case there is aliasing through different declarations,
				// we cannot emit NonWritable or NonReadable safely. Assume full read-write.
				// Be a bit careful with typed resources since it's not always supported with read-write + typed.
				annotation->tracking.has_read = true;
				annotation->tracking.has_written = true;
			}

			info.uav_coherent = annotation->coherent || annotation->rov;
			info.uav_read = annotation->tracking.has_read;
			info.uav_written = annotation->tracking.has_written;
			if (!get_uav_image_format(annotation->resource_kind, actual_component_type,
			                          annotation->tracking, info.format))
			{
				return false;
			}
		}

		unsigned stride = annotation->stride;
		unsigned alignment = info.kind == DXIL::ResourceKind::RawBuffer ? 16 : (stride & -int(stride));
		D3DBinding d3d_binding = {
			get_remapping_stage(execution_model), info.kind, 0,
			UINT32_MAX, UINT32_MAX, UINT32_MAX, alignment,
		};
		VulkanBinding vulkan_binding = {};

		bool remap_success = false;
		if (resource_mapping_iface)
		{
			switch (info.type)
			{
			case DXIL::ResourceType::SRV:
			{
				VulkanSRVBinding vulkan_srv_binding = {};
				remap_success = resource_mapping_iface->remap_srv(d3d_binding, vulkan_srv_binding);
				vulkan_binding = vulkan_srv_binding.buffer_binding;
				if (!get_ssbo_offset_buffer_id(annotation->offset_buffer_id, vulkan_srv_binding.buffer_binding,
				                               vulkan_srv_binding.offset_binding, annotation->resource_kind, alignment))
				{
					return false;
				}
				break;
			}

			case DXIL::ResourceType::UAV:
			{
				VulkanUAVBinding vulkan_uav_binding = {};
				D3DUAVBinding d3d_uav_binding = {};

				d3d_uav_binding.binding = d3d_binding;
				d3d_uav_binding.counter = annotation->counter;

				remap_success = resource_mapping_iface->remap_uav(d3d_uav_binding, vulkan_uav_binding);
				vulkan_binding = vulkan_uav_binding.buffer_binding;
				if (!get_ssbo_offset_buffer_id(annotation->offset_buffer_id, vulkan_uav_binding.buffer_binding,
											   vulkan_uav_binding.offset_binding, annotation->resource_kind, alignment))
				{
					return false;
				}

				if (annotation->counter)
				{
					auto &counter_binding = vulkan_uav_binding.counter_binding;
					BindlessInfo counter_info = {};

					annotation->counter_reference.base_resource_is_array = true;
					annotation->counter_reference.push_constant_member = UINT32_MAX;
					annotation->counter_reference.stride = 4;
					annotation->counter_reference.bindless = true;

					counter_info.type = DXIL::ResourceType::UAV;
					counter_info.component = DXIL::ComponentType::U32;
					counter_info.desc_set = counter_binding.descriptor_set;
					counter_info.binding = counter_binding.binding;

					if (counter_binding.descriptor_type == VulkanDescriptorType::SSBO)
					{
						counter_info.kind = DXIL::ResourceKind::RawBuffer;
						counter_info.counters = true;
					}
					else if (options.physical_storage_buffer &&
					         counter_binding.descriptor_type != VulkanDescriptorType::TexelBuffer)
					{
						counter_info.kind = DXIL::ResourceKind::Invalid;
						counter_info.counters = true;
					}
					else
					{
						counter_info.kind = DXIL::ResourceKind::TypedBuffer;
						counter_info.uav_read = true;
						counter_info.uav_written = true;
						counter_info.uav_coherent = false;
						counter_info.format = spv::ImageFormatR32ui;
					}

					annotation->counter_reference.resource_kind = counter_info.kind;
					annotation->counter_reference.var_id = create_bindless_heap_variable(counter_info);
				}
				break;
			}

			case DXIL::ResourceType::CBV:
			{
				VulkanCBVBinding vulkan_cbv_binding = {};
				remap_success = resource_mapping_iface->remap_cbv(d3d_binding, vulkan_cbv_binding);
				if (vulkan_cbv_binding.push_constant)
				{
					LOGE("Cannot use push constants for SM 6.6 bindless.\n");
					return false;
				}
				vulkan_binding = vulkan_cbv_binding.buffer;
				vulkan_binding.descriptor_type = VulkanDescriptorType::UBO;
				break;
			}

			case DXIL::ResourceType::Sampler:
				remap_success = resource_mapping_iface->remap_sampler(d3d_binding, vulkan_binding);
				break;
			}
		}

		if (!remap_success)
			return false;

		if (!vulkan_binding.bindless.use_heap)
		{
			LOGE("SM 6.6 bindless references must be bindless.\n");
			return false;
		}

		AliasedAccess aliased_access;
		if (!analyze_aliased_access(annotation->tracking, vulkan_binding.descriptor_type, aliased_access))
			return false;

		info.desc_set = vulkan_binding.descriptor_set;
		info.binding = vulkan_binding.binding;
		info.descriptor_type = vulkan_binding.descriptor_type;
		info.aliased = aliased_access.requires_alias_decoration;
		info.debug.stride = annotation->stride;

		annotation->reference.bindless = true;
		annotation->reference.base_resource_is_array = true;
		annotation->reference.push_constant_member = UINT32_MAX;
		annotation->reference.stride = annotation->stride;
		annotation->reference.resource_kind = annotation->resource_kind;
		annotation->reference.coherent = annotation->coherent || annotation->rov;
		annotation->reference.rov = annotation->rov;

		if (execution_mode_meta.memory_model == spv::MemoryModelVulkan)
		{
			annotation->reference.vkmm.non_private = info.type == DXIL::ResourceType::UAV;
			annotation->reference.vkmm.auto_visibility = annotation->coherent || annotation->rov;
		}

		if (aliased_access.requires_alias_decoration)
		{
			annotation->reference.var_alias_group = create_bindless_heap_variable_alias_group(
			    info, aliased_access.raw_declarations);
		}
		else if (aliased_access.override_primary_component_types)
		{
			auto tmp_info = info;
			tmp_info.component = aliased_access.primary_component_type;
			tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize;
			annotation->reference.var_id = create_bindless_heap_variable(tmp_info);
		}
		else
			annotation->reference.var_id = create_bindless_heap_variable(info);

		annotation->reference.aliased = aliased_access.requires_alias_decoration;
	}

	return true;
}

bool Converter::Impl::emit_ray_query_globals()
{
	if (shader_analysis.ray_query.uses_non_direct_indexing)
	{
		auto &b = builder();
		spv::Id type_id = b.makeRayQueryType();
		if (shader_analysis.ray_query.uses_divergent_handles)
		{
			type_id = b.makeArrayType(
				type_id, b.makeUintConstant(shader_analysis.ray_query.num_ray_query_alloca), 0);
		}

		ray_query.global_query_objects_id = create_variable(spv::StorageClassPrivate, type_id, "RayQueryHeap");
	}

	return true;
}

bool Converter::Impl::emit_resources()
{
	unsigned num_root_descriptors = 0;
	unsigned num_root_constant_words = 0;

	if (resource_mapping_iface)
	{
		num_root_descriptors = resource_mapping_iface->get_root_descriptor_count();
		num_root_constant_words = resource_mapping_iface->get_root_constant_word_count();
	}

	if (num_root_constant_words != 0 || num_root_descriptors != 0)
		emit_root_constants(num_root_descriptors, num_root_constant_words);

	if (execution_model_is_ray_tracing(execution_model))
		if (!emit_shader_record_buffer())
			return false;

	if (!emit_global_heaps())
		return false;

	if (options.descriptor_heap_robustness)
	{
		auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::ResourceDescriptorHeapSize)];
		if (mapping.kind == MetaDescriptorKind::UBOContainingConstant)
		{
			// Use legacy path.
			if (!emit_descriptor_heap_size_ubo())
				return false;
		}
		else
		{
			if (!emit_descriptor_heap_introspection_buffer())
				return false;
		}
	}

	if (options.instruction_instrumentation.enabled &&
	    (options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume ||
	     options.instruction_instrumentation.type == InstructionInstrumentationType::BufferSynchronizationValidation))
	{
		// Failure is not a big deal.
		emit_descriptor_heap_introspection_buffer();
	}

	auto &module = bitcode_parser.get_module();
	auto *resource_meta = module.getNamedMetadata("dx.resources");
	if (!resource_meta)
		return true;

	auto *metas = resource_meta->getOperand(0);

	llvm::MDNode *reflection_metas = nullptr;
	if (bitcode_reflection_parser)
	{
		auto &reflection_module = bitcode_reflection_parser->get_module();
		auto *reflection_resource_meta = reflection_module.getNamedMetadata("dx.resources");
		if (reflection_resource_meta)
			reflection_metas = reflection_resource_meta->getOperand(0);
	}

	const llvm::MDNode *reflection_type_metas[4] = {};
	const llvm::MDNode *type_metas[4] = {};

	for (unsigned i = 0; i < 4; i++)
	{
		if (metas->getOperand(i))
		{
			type_metas[i] = llvm::dyn_cast<llvm::MDNode>(metas->getOperand(i));
			if (reflection_metas)
				reflection_type_metas[i] = llvm::dyn_cast<llvm::MDNode>(reflection_metas->getOperand(i));
		}
	}

	if (type_metas[0])
		if (!emit_srvs(type_metas[0], reflection_type_metas[0]))
			return false;
	if (type_metas[1])
		if (!emit_uavs(type_metas[1], reflection_type_metas[1]))
			return false;
	if (type_metas[2])
		if (!emit_cbvs(type_metas[2], reflection_type_metas[2]))
			return false;
	if (type_metas[3])
		if (!emit_samplers(type_metas[3], reflection_type_metas[3]))
			return false;

	for (auto &alloc : alloca_tracking)
	{
		// Now that we have emitted resources, we can determine which alloca -> CBV punchthroughs to accept.
		if (!analyze_alloca_cbv_forwarding_post_resource_emit(*this, alloc.second))
			return false;
	}

	if (!emit_ray_query_globals())
		return false;

	return true;
}

void Converter::Impl::scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &bitcode_parser)
{
	auto &module = bitcode_parser.get_module();
	auto *resource_meta = module.getNamedMetadata("dx.resources");
	if (!resource_meta)
		return;

	auto *metas = resource_meta->getOperand(0);
	auto stage = get_shader_stage(bitcode_parser);

	if (metas->getOperand(0))
		if (!scan_srvs(iface, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(0)), stage))
			return;
	if (metas->getOperand(1))
		if (!scan_uavs(iface, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(1)), stage))
			return;
	if (metas->getOperand(2))
		if (!scan_cbvs(iface, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(2)), stage))
			return;
	if (metas->getOperand(3))
		if (!scan_samplers(iface, llvm::dyn_cast<llvm::MDNode>(metas->getOperand(3)), stage))
			return;
}

ShaderStage Converter::Impl::get_remapping_stage(spv::ExecutionModel execution_model)
{
	switch (execution_model)
	{
	case spv::ExecutionModelVertex:
		return ShaderStage::Vertex;
	case spv::ExecutionModelTessellationControl:
		return ShaderStage::Hull;
	case spv::ExecutionModelTessellationEvaluation:
		return ShaderStage::Domain;
	case spv::ExecutionModelGeometry:
		return ShaderStage::Geometry;
	case spv::ExecutionModelFragment:
		return ShaderStage::Pixel;
	case spv::ExecutionModelGLCompute:
		return ShaderStage::Compute;
	case spv::ExecutionModelIntersectionKHR:
		return ShaderStage::Intersection;
	case spv::ExecutionModelClosestHitKHR:
		return ShaderStage::ClosestHit;
	case spv::ExecutionModelMissKHR:
		return ShaderStage::Miss;
	case spv::ExecutionModelAnyHitKHR:
		return ShaderStage::AnyHit;
	case spv::ExecutionModelRayGenerationKHR:
		return ShaderStage::RayGeneration;
	case spv::ExecutionModelCallableKHR:
		return ShaderStage::Callable;
	case spv::ExecutionModelTaskEXT:
		return ShaderStage::Amplification;
	case spv::ExecutionModelMeshEXT:
		return ShaderStage::Mesh;
	default:
		return ShaderStage::Unknown;
	}
}

static inline float half_to_float(uint16_t u16_value)
{
	// Based on the GLM implementation.
	int s = (u16_value >> 15) & 0x1;
	int e = (u16_value >> 10) & 0x1f;
	int m = (u16_value >> 0) & 0x3ff;

	union {
		float f32;
		uint32_t u32;
	} u;

	if (e == 0)
	{
		if (m == 0)
		{
			u.u32 = uint32_t(s) << 31;
			return u.f32;
		}
		else
		{
			while ((m & 0x400) == 0)
			{
				m <<= 1;
				e--;
			}

			e++;
			m &= ~0x400;
		}
	}
	else if (e == 31)
	{
		if (m == 0)
		{
			u.u32 = (uint32_t(s) << 31) | 0x7f800000u;
			return u.f32;
		}
		else
		{
			u.u32 = (uint32_t(s) << 31) | 0x7f800000u | (m << 13);
			return u.f32;
		}
	}

	e += 127 - 15;
	m <<= 13;
	u.u32 = (uint32_t(s) << 31) | (e << 23) | m;
	return u.f32;
}

spv::Id Converter::Impl::get_padded_constant_array(spv::Id padded_type_id, const llvm::Constant *constant)
{
	auto &builder = spirv_module.get_builder();
	assert(constant->getType()->getTypeID() == llvm::Type::TypeID::ArrayTyID);
	Vector<spv::Id> constituents;

	if (llvm::isa<llvm::ConstantAggregateZero>(constant))
	{
		return builder.makeNullConstant(padded_type_id);
	}
	else if (auto *agg = llvm::dyn_cast<llvm::ConstantAggregate>(constant))
	{
		constituents.reserve(agg->getNumOperands() + 1);
		for (unsigned i = 0; i < agg->getNumOperands(); i++)
		{
			llvm::Constant *c = agg->getOperand(i);
			if (const auto *undef = llvm::dyn_cast<llvm::UndefValue>(c))
				constituents.push_back(get_id_for_undef_constant(undef));
			else
				constituents.push_back(get_id_for_constant(c, 0));
		}
	}
	else if (auto *array = llvm::dyn_cast<llvm::ConstantDataArray>(constant))
	{
		constituents.reserve(array->getType()->getArrayNumElements() + 1);
		for (unsigned i = 0; i < array->getNumElements(); i++)
		{
			llvm::Constant *c = array->getElementAsConstant(i);
			if (const auto *undef = llvm::dyn_cast<llvm::UndefValue>(c))
				constituents.push_back(get_id_for_undef_constant(undef));
			else
				constituents.push_back(get_id_for_constant(c, 0));
		}
	}
	else
		return 0;

	constituents.push_back(builder.makeNullConstant(get_type_id(constant->getType()->getArrayElementType())));
	return builder.makeCompositeConstant(padded_type_id, constituents);
}

spv::Id Converter::Impl::get_id_for_constant(const llvm::Constant *constant, unsigned forced_width)
{
	auto &builder = spirv_module.get_builder();

	switch (constant->getType()->getTypeID())
	{
	case llvm::Type::TypeID::HalfTyID:
	{
		auto *fp = llvm::cast<llvm::ConstantFP>(constant);
		auto f16 = uint16_t(fp->getValueAPF().bitcastToAPInt().getZExtValue());

		if (support_native_fp16_operations())
			return builder.makeFloat16Constant(f16);
		else
			return builder.makeFloatConstant(half_to_float(f16));
	}

	case llvm::Type::TypeID::FloatTyID:
	{
		auto *fp = llvm::cast<llvm::ConstantFP>(constant);
		return builder.makeFloatConstant(fp->getValueAPF().convertToFloat());
	}

	case llvm::Type::TypeID::DoubleTyID:
	{
		auto *fp = llvm::cast<llvm::ConstantFP>(constant);
		return builder.makeDoubleConstant(fp->getValueAPF().convertToDouble());
	}

	case llvm::Type::TypeID::IntegerTyID:
	{
		unsigned integer_width = forced_width ? forced_width : constant->getType()->getIntegerBitWidth();
		int physical_width = physical_integer_bit_width(integer_width);
		switch (physical_width)
		{
		case 1:
			return builder.makeBoolConstant(constant->getUniqueInteger().getZExtValue() != 0);

		case 16:
			return builder.makeUint16Constant(constant->getUniqueInteger().getZExtValue());

		case 32:
			return builder.makeUintConstant(constant->getUniqueInteger().getZExtValue());

		case 64:
			return builder.makeUint64Constant(constant->getUniqueInteger().getZExtValue());

		default:
			return 0;
		}
	}

	case llvm::Type::TypeID::VectorTyID:
	case llvm::Type::TypeID::ArrayTyID:
	case llvm::Type::TypeID::StructTyID:
	{
		Vector<spv::Id> constituents;
		spv::Id type_id = get_type_id(constant->getType());

		if (llvm::isa<llvm::ConstantAggregateZero>(constant))
		{
			return builder.makeNullConstant(type_id);
		}
		else if (auto *agg = llvm::dyn_cast<llvm::ConstantAggregate>(constant))
		{
			constituents.reserve(agg->getNumOperands());
			for (unsigned i = 0; i < agg->getNumOperands(); i++)
			{
				llvm::Constant *c = agg->getOperand(i);
				if (const auto *undef = llvm::dyn_cast<llvm::UndefValue>(c))
					constituents.push_back(get_id_for_undef_constant(undef));
				else
					constituents.push_back(get_id_for_constant(c, 0));
			}
		}
		else if (auto *array = llvm::dyn_cast<llvm::ConstantDataArray>(constant))
		{
			constituents.reserve(array->getType()->getArrayNumElements());
			for (unsigned i = 0; i < array->getNumElements(); i++)
			{
				llvm::Constant *c = array->getElementAsConstant(i);
				if (const auto *undef = llvm::dyn_cast<llvm::UndefValue>(c))
					constituents.push_back(get_id_for_undef_constant(undef));
				else
					constituents.push_back(get_id_for_constant(c, 0));
			}
		}
		else if (auto *vec = llvm::dyn_cast<llvm::ConstantDataVector>(constant))
		{
			constituents.reserve(vec->getType()->getVectorNumElements());
			for (unsigned i = 0; i < vec->getNumElements(); i++)
			{
				llvm::Constant *c = vec->getElementAsConstant(i);
				if (const auto *undef = llvm::dyn_cast<llvm::UndefValue>(c))
					constituents.push_back(get_id_for_undef_constant(undef));
				else
					constituents.push_back(get_id_for_constant(c, 0));
			}
		}
		else
			return 0;

		return builder.makeCompositeConstant(type_id, constituents);
	}

	default:
		return 0;
	}
}

spv::Id Converter::Impl::get_id_for_undef(const llvm::UndefValue *undef)
{
	auto &builder = spirv_module.get_builder();
	if (shader_analysis.global_undefs)
		return builder.createUndefinedConstant(get_type_id(undef->getType()));
	else
		return builder.createUndefined(get_type_id(undef->getType()));
}

spv::Id Converter::Impl::get_id_for_undef_constant(const llvm::UndefValue *undef)
{
	auto &builder = spirv_module.get_builder();
	return builder.createUndefinedConstant(get_type_id(undef->getType()));
}

spv::Id Converter::Impl::get_id_for_value(const llvm::Value *value, unsigned forced_width)
{
	assert(value);

	// Constant expressions must be stamped out every place it is used,
	// since it technically lives at global scope.
	// Do not cache this value in the value map.
	if (auto *cexpr = llvm::dyn_cast<llvm::ConstantExpr>(value))
		return build_constant_expression(*this, cexpr);

	auto itr = value_map.find(value);
	if (itr != value_map.end())
		return itr->second;

	spv::Id ret;
	if (auto *undef = llvm::dyn_cast<llvm::UndefValue>(value))
		ret = get_id_for_undef(undef);
	else if (auto *constant = llvm::dyn_cast<llvm::Constant>(value))
		ret = get_id_for_constant(constant, forced_width);
	else
		ret = spirv_module.allocate_id();

	value_map[value] = ret;
	return ret;
}

static llvm::MDNode *get_entry_point_meta(const llvm::Module &module, const char *entry)
{
	auto *ep_meta = module.getNamedMetadata("dx.entryPoints");
	unsigned num_entry_points = ep_meta->getNumOperands();
	for (unsigned i = 0; i < num_entry_points; i++)
	{
		auto *node = ep_meta->getOperand(i);
		if (node)
		{
			auto &func_node = node->getOperand(0);
			if (func_node)
				if (!entry || (Converter::entry_point_matches(get_string_metadata(node, 1), entry)))
					return node;
		}
	}

	// dxilconv can emit null hull shader with non-null patch constant function ... *shrug*
	// I suppose we need to deal with that too.
	if (!entry && num_entry_points)
	{
		auto *node = ep_meta->getOperand(0);
		if (node)
			return node;
	}

	return nullptr;
}

static llvm::MDNode *get_null_entry_point_meta(const llvm::Module &module)
{
	// In DXR, a dummy entry point with null function pointer owns the shader flags for whatever reason ...
	auto *ep_meta = module.getNamedMetadata("dx.entryPoints");
	unsigned num_entry_points = ep_meta->getNumOperands();
	for (unsigned i = 0; i < num_entry_points; i++)
	{
		auto *node = ep_meta->getOperand(i);
		if (node)
		{
			auto &func_node = node->getOperand(0);
			if (!func_node)
				return node;
		}
	}

	return nullptr;
}

Vector<String> Converter::get_entry_points(const LLVMBCParser &parser)
{
	Vector<String> result;
	auto &module = parser.get_module();
	auto *ep_meta = module.getNamedMetadata("dx.entryPoints");

	unsigned num_entry_points = ep_meta->getNumOperands();
	result.reserve(num_entry_points);

	for (unsigned i = 0; i < num_entry_points; i++)
	{
		auto *node = ep_meta->getOperand(i);
		if (node)
		{
			auto &func_node = node->getOperand(0);
			if (func_node)
				result.push_back(get_string_metadata(node, 1));
		}
	}

	return result;
}

bool Converter::entry_point_matches(const String &mangled, const char *user)
{
	if (is_mangled_entry_point(user))
		return mangled == user;
	else
		return demangle_entry_point(mangled) == user;
}

static String get_entry_point_name(llvm::MDNode *node)
{
	if (!node)
		return {};

	auto &name_node = node->getOperand(1);

	if (name_node)
	{
		auto *str_node = llvm::dyn_cast<llvm::MDString>(name_node);
		if (str_node)
			return get_string_metadata(node, 1);
	}

	return {};
}

static llvm::Function *get_entry_point_function(llvm::MDNode *node)
{
	if (!node)
		return nullptr;

	auto &func_node = node->getOperand(0);

	if (func_node)
		return llvm::dyn_cast<llvm::Function>(llvm::cast<llvm::ConstantAsMetadata>(func_node)->getValue());
	else
		return nullptr;
}

static const llvm::MDOperand *get_shader_property_tag(const llvm::MDNode *func_meta, DXIL::ShaderPropertyTag tag)
{
	if (func_meta && func_meta->getNumOperands() >= 5 && func_meta->getOperand(4))
	{
		auto *tag_values = llvm::dyn_cast<llvm::MDNode>(func_meta->getOperand(4));
		unsigned num_pairs = tag_values->getNumOperands() / 2;
		for (unsigned i = 0; i < num_pairs; i++)
			if (tag == static_cast<DXIL::ShaderPropertyTag>(get_constant_metadata(tag_values, 2 * i)))
				return &tag_values->getOperand(2 * i + 1);
	}

	return nullptr;
}

static bool get_execution_model_lib_target(const llvm::Module &module, llvm::MDNode *entry_point_meta)
{
	String model;
	Converter::Impl::get_shader_model(module, &model, nullptr, nullptr);
	return model == "lib";
}

static spv::ExecutionModel get_execution_model(const llvm::Module &module, llvm::MDNode *entry_point_meta)
{
	if (auto *tag = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::ShaderKind))
	{
		if (!tag)
			return spv::ExecutionModelMax;

		auto shader_kind = static_cast<DXIL::ShaderKind>(
		    llvm::cast<llvm::ConstantAsMetadata>(*tag)->getValue()->getUniqueInteger().getZExtValue());
		switch (shader_kind)
		{
		case DXIL::ShaderKind::Pixel:
			return spv::ExecutionModelFragment;
		case DXIL::ShaderKind::Vertex:
			return spv::ExecutionModelVertex;
		case DXIL::ShaderKind::Hull:
			return spv::ExecutionModelTessellationControl;
		case DXIL::ShaderKind::Domain:
			return spv::ExecutionModelTessellationEvaluation;
		case DXIL::ShaderKind::Geometry:
			return spv::ExecutionModelGeometry;
		case DXIL::ShaderKind::Compute:
		case DXIL::ShaderKind::Node:
			return spv::ExecutionModelGLCompute;
		case DXIL::ShaderKind::Amplification:
			return spv::ExecutionModelTaskEXT;
		case DXIL::ShaderKind::Mesh:
			return spv::ExecutionModelMeshEXT;
		case DXIL::ShaderKind::RayGeneration:
			return spv::ExecutionModelRayGenerationKHR;
		case DXIL::ShaderKind::Miss:
			return spv::ExecutionModelMissKHR;
		case DXIL::ShaderKind::ClosestHit:
			return spv::ExecutionModelClosestHitKHR;
		case DXIL::ShaderKind::Callable:
			return spv::ExecutionModelCallableKHR;
		case DXIL::ShaderKind::AnyHit:
			return spv::ExecutionModelAnyHitKHR;
		case DXIL::ShaderKind::Intersection:
			return spv::ExecutionModelIntersectionKHR;
		default:
			break;
		}
	}
	else
	{
		// Non-RT shaders tend to rely on having the shader model set in the shaderModel meta node.
		String model;
		Converter::Impl::get_shader_model(module, &model, nullptr, nullptr);

		if (model == "vs")
			return spv::ExecutionModelVertex;
		else if (model == "ps")
			return spv::ExecutionModelFragment;
		else if (model == "hs")
			return spv::ExecutionModelTessellationControl;
		else if (model == "ds")
			return spv::ExecutionModelTessellationEvaluation;
		else if (model == "gs")
			return spv::ExecutionModelGeometry;
		else if (model == "cs")
			return spv::ExecutionModelGLCompute;
		else if (model == "as")
			return spv::ExecutionModelTaskEXT;
		else if (model == "ms")
			return spv::ExecutionModelMeshEXT;
	}

	return spv::ExecutionModelMax;
}

spv::Id Converter::Impl::get_type_id(const llvm::Type *type, TypeLayoutFlags flags)
{
	auto &builder = spirv_module.get_builder();
	switch (type->getTypeID())
	{
	case llvm::Type::TypeID::HalfTyID:
		return builder.makeFloatType(support_native_fp16_operations() ? 16 : 32);
	case llvm::Type::TypeID::FloatTyID:
		return builder.makeFloatType(32);
	case llvm::Type::TypeID::DoubleTyID:
		return builder.makeFloatType(64);

	case llvm::Type::TypeID::IntegerTyID:
		if (type->getIntegerBitWidth() == 1)
			return builder.makeBoolType();
		else
		{
			auto width = physical_integer_bit_width(type->getIntegerBitWidth());
			return builder.makeIntegerType(width, false);
		}

	case llvm::Type::TypeID::PointerTyID:
	{
		if (DXIL::AddressSpace(type->getPointerAddressSpace()) != DXIL::AddressSpace::PhysicalNodeIO ||
		    (flags & TYPE_LAYOUT_PHYSICAL_BIT) == 0)
		{
			// Have to deal with this from the outside. Should only be relevant for getelementptr and instructions like that.
			LOGE("Cannot reliably convert LLVM pointer type, we cannot differentiate between Function and Private.\n");
			std::terminate();
		}

		// This is free-flowing BDA in DXIL. We'll deal with it as-is.
		// Main complication is that we have to emit Offset information ourselves.
		spv::Id pointee_type = get_type_id(type->getPointerElementType(), flags);
		return builder.makePointer(spv::StorageClassPhysicalStorageBuffer, pointee_type);
	}

	case llvm::Type::TypeID::ArrayTyID:
	{
		if (type->getArrayNumElements() == 0)
			return 0;

		spv::Id array_size_id;
		spv::Id element_type_id;

		// dxbc2dxil emits broken code for TGSM. It's an array of i8 which is absolute nonsense.
		// It then bitcasts the pointer to i32, which isn't legal either.
		if ((flags & TYPE_LAYOUT_PHYSICAL_BIT) == 0 &&
		    type->getArrayElementType()->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
		    type->getArrayElementType()->getIntegerBitWidth() == 8 &&
		    type->getArrayNumElements() % 4 == 0)
		{
			array_size_id = builder.makeUintConstant(type->getArrayNumElements() / 4);
			element_type_id = builder.makeUintType(32);
		}
		else
		{
			array_size_id = builder.makeUintConstant(type->getArrayNumElements());
			element_type_id = get_type_id(type->getArrayElementType(), flags & ~TYPE_LAYOUT_BLOCK_BIT);
		}

		if ((flags & TYPE_LAYOUT_PHYSICAL_BIT) != 0)
		{
			auto size_stride = get_physical_size_for_type(element_type_id);
			uint32_t stride = size_stride.size;

			// We always use scalar layout.
			for (auto &cached_type : cached_physical_array_types)
				if (cached_type.element_type_id == element_type_id && cached_type.array_size_id == array_size_id)
					return cached_type.id;

			spv::Id array_type_id = builder.makeArrayType(element_type_id, array_size_id, stride);
			builder.addDecoration(array_type_id, spv::DecorationArrayStride, stride);
			cached_physical_array_types.push_back({ array_type_id, element_type_id, array_size_id });
			return array_type_id;
		}
		else
		{
			// glslang emitter deduplicates.
			return builder.makeArrayType(element_type_id, array_size_id, 0);
		}
	}

	case llvm::Type::TypeID::StructTyID:
	{
		auto *struct_type = llvm::cast<llvm::StructType>(type);
		Vector<spv::Id> member_types;
		member_types.reserve(struct_type->getStructNumElements());
		for (unsigned i = 0; i < struct_type->getStructNumElements(); i++)
			member_types.push_back(get_type_id(struct_type->getStructElementType(i), flags & ~TYPE_LAYOUT_BLOCK_BIT));
		return get_struct_type(member_types, flags, "");
	}

	case llvm::Type::TypeID::VectorTyID:
	{
		auto *vec_type = llvm::cast<llvm::VectorType>(type);
		return builder.makeVectorType(get_type_id(vec_type->getElementType()), vec_type->getVectorNumElements());
	}

	case llvm::Type::TypeID::VoidTyID:
		return builder.makeVoidType();

	default:
		return 0;
	}
}

Converter::Impl::SizeAlignment Converter::Impl::get_physical_size_for_type(spv::Id type_id)
{
	SizeAlignment res = {};

	if (builder().isScalarType(type_id))
	{
		res.size = builder().getScalarTypeWidth(type_id) / 8;
		res.alignment = res.size;
	}
	else if (builder().isVectorType(type_id))
	{
		res = get_physical_size_for_type(builder().getContainedTypeId(type_id));
		res.size *= builder().getNumComponents(type_id);
	}
	else if (builder().isArrayType(type_id))
	{
		res = get_physical_size_for_type(builder().getContainedTypeId(type_id));
		uint32_t array_size = builder().getNumTypeConstituents(type_id);
		// Alignment is inherited from constituent, we do scalar block layout here.
		res.size *= array_size;
	}
	else if (builder().isStructType(type_id))
	{
		int num_members = builder().getNumTypeConstituents(type_id);
		for (int i = 0; i < num_members; i++)
		{
			uint32_t member_type_id = builder().getContainedTypeId(type_id, i);
			auto member_res = get_physical_size_for_type(member_type_id);
			res.size = (res.size + member_res.alignment - 1) & ~(member_res.alignment - 1);
			res.size += member_res.size;
			res.alignment = std::max<uint32_t>(res.alignment, member_res.alignment);
		}
		res.size = (res.size + res.alignment - 1) & ~(res.alignment - 1);
	}
	else if (builder().isPointerType(type_id))
	{
		res.size = sizeof(uint64_t);
		res.alignment = sizeof(uint64_t);
	}

	return res;
}

void Converter::Impl::decorate_physical_offsets(spv::Id struct_type_id, const Vector<spv::Id> &type_ids)
{
	uint32_t offset = 0;
	int member_index = 0;
	for (auto &type_id : type_ids)
	{
		// DXIL seems to imply scalar alignment for node payload.
		// It's simple and easy, so just roll with that.
		auto size_alignment = get_physical_size_for_type(type_id);
		assert(size_alignment.size != 0);
		offset = (offset + size_alignment.alignment - 1) & ~(size_alignment.alignment - 1);
		builder().addMemberDecoration(struct_type_id, member_index, spv::DecorationOffset, offset);
		offset += size_alignment.size;
		member_index++;
	}
}

spv::Id Converter::Impl::get_struct_type(const Vector<spv::Id> &type_ids, TypeLayoutFlags flags, const char *name)
{
	auto itr = std::find_if(cached_struct_types.begin(), cached_struct_types.end(), [&](const StructTypeEntry &entry) -> bool {
		if (type_ids.size() != entry.subtypes.size())
			return false;
		if (flags != entry.flags)
			return false;
		if ((!name && !entry.name.empty()) || (entry.name != name))
			return false;

		for (unsigned i = 0; i < type_ids.size(); i++)
			if (type_ids[i] != entry.subtypes[i])
				return false;

		return true;
	});

	if (itr == cached_struct_types.end())
	{
		StructTypeEntry entry;
		entry.subtypes = type_ids;
		entry.name = name ? name : "";

		if ((flags & TYPE_LAYOUT_BLOCK_BIT) != 0)
		{
			constexpr TypeLayoutFlags block_flags = TYPE_LAYOUT_BLOCK_BIT |
			                                        TYPE_LAYOUT_COHERENT_BIT |
			                                        TYPE_LAYOUT_READ_ONLY_BIT;
			spv::Id struct_type_id = get_struct_type(type_ids, flags & ~block_flags, entry.name.c_str());
			entry.id = builder().makeStructType({ struct_type_id }, entry.name.c_str());
			builder().addDecoration(entry.id, spv::DecorationBlock);
			builder().addMemberDecoration(entry.id, 0, spv::DecorationOffset, 0);
			if ((flags & TYPE_LAYOUT_COHERENT_BIT) != 0 && execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
				builder().addMemberDecoration(entry.id, 0, spv::DecorationCoherent);
			if ((flags & TYPE_LAYOUT_READ_ONLY_BIT) != 0)
				builder().addMemberDecoration(entry.id, 0, spv::DecorationNonWritable);
			builder().addMemberName(entry.id, 0, "data");
		}
		else
		{
			entry.id = builder().makeStructType(type_ids, entry.name.c_str());
			if ((flags & TYPE_LAYOUT_PHYSICAL_BIT) != 0)
				decorate_physical_offsets(entry.id, type_ids);
		}

		entry.flags = flags;
		spv::Id id = entry.id;
		cached_struct_types.push_back(std::move(entry));
		return id;
	}
	else
		return itr->id;
}

spv::Id Converter::Impl::get_type_id(DXIL::ComponentType element_type, unsigned rows, unsigned cols, bool force_array)
{
	auto &builder = spirv_module.get_builder();

	spv::Id component_type;
	switch (element_type)
	{
	case DXIL::ComponentType::I1:
		// Cannot have bools in I/O interfaces, these are emitted as 32-bit integers.
		component_type = builder.makeUintType(32);
		break;

	case DXIL::ComponentType::I16:
		component_type = builder.makeIntegerType(16, true);
		break;

	case DXIL::ComponentType::U16:
		component_type = builder.makeIntegerType(16, false);
		break;

	case DXIL::ComponentType::I32:
		component_type = builder.makeIntegerType(32, true);
		break;

	case DXIL::ComponentType::U32:
		component_type = builder.makeIntegerType(32, false);
		break;

	case DXIL::ComponentType::I64:
		component_type = builder.makeIntegerType(64, true);
		break;

	case DXIL::ComponentType::U64:
		component_type = builder.makeIntegerType(64, false);
		break;

	case DXIL::ComponentType::F16:
		component_type = builder.makeFloatType(16);
		break;

	case DXIL::ComponentType::F32:
		component_type = builder.makeFloatType(32);
		break;

	case DXIL::ComponentType::F64:
		component_type = builder.makeFloatType(64);
		break;

	default:
		LOGE("Unknown component type.\n");
		return 0;
	}

	if (cols > 1)
		component_type = builder.makeVectorType(component_type, cols);
	if (rows > 1 || force_array)
		component_type = builder.makeArrayType(component_type, builder.makeUintConstant(rows), 0);
	return component_type;
}

spv::Id Converter::Impl::get_type_id(spv::Id id) const
{
	auto itr = id_to_type.find(id);
	if (itr == id_to_type.end())
		return 0;
	else
		return itr->second;
}

static bool module_is_ident(llvm::Module &module, const char *ident)
{
	auto *ident_meta = module.getNamedMetadata("llvm.ident");
	if (ident_meta)
		if (auto *arg0 = ident_meta->getOperand(0))
			if (auto *str = llvm::dyn_cast<llvm::MDString>(arg0->getOperand(0)))
				if (str->getString().find(ident) != std::string::npos)
					return true;

	return false;
}

static bool module_is_dxilconv(llvm::Module &module)
{
	return module_is_ident(module, "dxbc2dxil");
}

static bool module_is_dxbc_spirv(llvm::Module &module)
{
	return module_is_ident(module, "dxbc-spirv");
}

bool Converter::Impl::emit_patch_variables()
{
	auto *node = entry_point_meta;

	if (!node->getOperand(2))
		return true;

	auto &signature = node->getOperand(2);
	auto *signature_node = llvm::cast<llvm::MDNode>(signature);
	auto &patch_variables = signature_node->getOperand(2);
	if (!patch_variables)
		return true;

	// There are no control points, and there's no explicit parameter, so force 0.
	if (patch_location_offset == ~0u)
		patch_location_offset = 0;

	// dxilconv is broken and emits patch the fork phase in a way that is non-sensical.
	// It assumes that you can write outside the bounds of a signature element.
	// To make this work, we need to lower the patch constant variables from Private variables instead.
	bool broken_patch_variables = false;
	if (execution_model == spv::ExecutionModelTessellationControl)
		broken_patch_variables = module_is_dxilconv(bitcode_parser.get_module());

	auto *patch_node = llvm::dyn_cast<llvm::MDNode>(patch_variables);

	auto &builder = spirv_module.get_builder();

	spv::StorageClass storage =
	    execution_model == spv::ExecutionModelTessellationEvaluation ? spv::StorageClassInput : spv::StorageClassOutput;

	unsigned num_broken_user_rows = 0;

	for (unsigned i = 0; i < patch_node->getNumOperands(); i++)
	{
		auto *patch = llvm::cast<llvm::MDNode>(patch_node->getOperand(i));
		auto element_id = get_constant_metadata(patch, 0);
		auto semantic_name = get_string_metadata(patch, 1);
		auto actual_element_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(patch, 2)));
		auto effective_element_type = get_effective_input_output_type(actual_element_type);
		auto system_value = static_cast<DXIL::Semantic>(get_constant_metadata(patch, 3));

		unsigned semantic_index = 0;
		if (patch->getOperand(4))
			semantic_index = get_constant_metadata(llvm::cast<llvm::MDNode>(patch->getOperand(4)), 0);

		auto rows = get_constant_metadata(patch, 6);
		auto cols = get_constant_metadata(patch, 7);

		auto start_row = get_constant_metadata(patch, 8);
		auto start_col = get_constant_metadata(patch, 9);

		if (system_value == DXIL::Semantic::TessFactor)
			rows = 4;
		else if (system_value == DXIL::Semantic::InsideTessFactor)
			rows = 2;

		if (broken_patch_variables && system_value == DXIL::Semantic::User)
			num_broken_user_rows = std::max<unsigned>(num_broken_user_rows, start_row + rows);

		auto &meta = patch_elements_meta[element_id];
		meta.semantic = system_value;

		// Handle case where shader declares the tess factors twice at different offsets.
		unsigned semantic_offset = 0;
		if (system_value == DXIL::Semantic::TessFactor || system_value == DXIL::Semantic::InsideTessFactor)
		{
			auto builtin = system_value == DXIL::Semantic::TessFactor ?
			               spv::BuiltInTessLevelOuter : spv::BuiltInTessLevelInner;
			if (spirv_module.has_builtin_shader_input(builtin))
			{
				meta = {};
				meta.id = spirv_module.get_builtin_shader_input(builtin);
				meta.component_type = actual_element_type;
				meta.semantic_offset = start_row;
				meta.semantic = system_value;
				continue;
			}
		}

		// Application can emit these in ViewInstancing, in which case it's just an offset.
		if (options.multiview.enable && execution_model == spv::ExecutionModelMeshEXT)
		{
			if (system_value == DXIL::Semantic::RenderTargetArrayIndex)
				multiview.custom_layer_index = true;
			if (system_value == DXIL::Semantic::ViewPortArrayIndex)
				multiview.custom_viewport_index = true;
		}

		spv::Id type_id;

		if (system_value == DXIL::Semantic::CullPrimitive)
			type_id = builder.makeBoolType();
		else
			type_id = get_type_id(effective_element_type, rows, cols);

		if (execution_model == spv::ExecutionModelMeshEXT)
		{
			type_id = builder.makeArrayType(
				type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_primitive, false), 0);
		}

		auto variable_name = semantic_name;
		if (semantic_index != 0)
		{
			variable_name += "_";
			variable_name += dxil_spv::to_string(semantic_index);
		}

		spv::Id variable_id = create_variable(storage, type_id, variable_name.c_str());
		meta.id = variable_id;
		meta.component_type = actual_element_type;
		meta.semantic_offset = semantic_offset;
		meta.start_row = start_row;
		meta.start_col = start_col;
		meta.lowering = broken_patch_variables && system_value == DXIL::Semantic::User;

		if (system_value != DXIL::Semantic::User)
		{
			emit_builtin_decoration(variable_id, system_value, storage);
		}
		else
		{
			// Patch constants are packed together with control point variables,
			// so we need to apply an offset to make this work in SPIR-V.
			// The offset is deduced from the control point I/O signature.
			// TODO: If it's possible to omit trailing CP members in domain shader, we will need to pass this offset
			// into the compiler.
			VulkanStageIO vk_io = { start_row + patch_location_offset, start_col, true };

			if (resource_mapping_iface)
			{
				D3DStageIO d3d_io = { semantic_name.c_str(), semantic_index, start_row, rows };

				if (execution_model == spv::ExecutionModelTessellationEvaluation)
				{
					if (!resource_mapping_iface->remap_stage_input(d3d_io, vk_io))
						return false;
				}
				else if (!resource_mapping_iface->remap_stage_output(d3d_io, vk_io))
					return false;
			}

			builder.addDecoration(variable_id, spv::DecorationLocation, vk_io.location);
			if (vk_io.component != 0)
				builder.addDecoration(variable_id, spv::DecorationComponent, vk_io.component);
		}

		builder.addDecoration(variable_id, execution_model == spv::ExecutionModelMeshEXT
		                                   ? spv::DecorationPerPrimitiveEXT : spv::DecorationPatch);
	}

	if (num_broken_user_rows)
	{
		spv::Id type_id = builder.makeArrayType(builder.makeVectorType(builder.makeUintType(32), 4),
		                                        builder.makeUintConstant(num_broken_user_rows), 0);
		execution_mode_meta.patch_lowering_array_var_id =
			create_variable_with_initializer(spv::StorageClassPrivate, type_id,
			                                 builder.makeNullConstant(type_id),
			                                 "PatchLoweringRows");
	}

	return true;
}

bool Converter::Impl::emit_other_variables()
{
	auto &builder = spirv_module.get_builder();

	if (execution_model == spv::ExecutionModelMeshEXT && execution_mode_meta.stage_output_num_primitive)
	{
		unsigned index_dim = execution_mode_meta.primitive_index_dimension;

		if (index_dim)
		{
			spv::Id type_id = builder.makeArrayType(
				get_type_id(DXIL::ComponentType::U32, 1, index_dim),
				builder.makeUintConstant(execution_mode_meta.stage_output_num_primitive, false), 0);
			primitive_index_array_id = create_variable(spv::StorageClassOutput, type_id, "indices");

			spv::BuiltIn builtin_id =
			    index_dim == 3 ? spv::BuiltInPrimitiveTriangleIndicesEXT : spv::BuiltInPrimitiveLineIndicesEXT;
			builder.addDecoration(primitive_index_array_id, spv::DecorationBuiltIn, builtin_id);
			spirv_module.register_builtin_shader_output(primitive_index_array_id, builtin_id);
		}
	}

	return true;
}

static unsigned get_geometry_shader_stream_index(const llvm::MDNode *node)
{
	if (node->getNumOperands() >= 11 && node->getOperand(10))
	{
		auto *attr = llvm::dyn_cast<llvm::MDNode>(node->getOperand(10));
		if (!attr)
			return 0;

		unsigned num_pairs = attr->getNumOperands() / 2;
		for (unsigned i = 0; i < num_pairs; i++)
		{
			if (static_cast<DXIL::GSStageOutTags>(get_constant_metadata(attr, 2 * i + 0)) == DXIL::GSStageOutTags::Stream)
				return get_constant_metadata(attr, 2 * i + 1);
		}
	}
	return 0;
}

static void build_geometry_stream_row_offsets(unsigned offsets[4], const llvm::MDNode *outputs_node)
{
	unsigned row_count_for_geometry_stream[4] = {};
	for (unsigned i = 0; i < outputs_node->getNumOperands(); i++)
	{
		auto *output = llvm::cast<llvm::MDNode>(outputs_node->getOperand(i));
		unsigned geometry_stream = get_geometry_shader_stream_index(output);
		if (geometry_stream < 4)
		{
			auto start_row = get_constant_metadata(output, 8);
			auto rows = get_constant_metadata(output, 6);
			auto end_rows = rows + start_row;
			if (end_rows > row_count_for_geometry_stream[geometry_stream])
				row_count_for_geometry_stream[geometry_stream] = end_rows;
		}
	}

	for (unsigned row = 0; row < 4; row++)
		for (unsigned i = 0; i < row; i++)
			offsets[row] += row_count_for_geometry_stream[i];
}

bool Converter::Impl::emit_stage_output_variables()
{
	auto *node = entry_point_meta;
	if (!node->getOperand(2))
		return true;

	auto &signature = node->getOperand(2);
	auto *signature_node = llvm::cast<llvm::MDNode>(signature);
	auto &outputs = signature_node->getOperand(1);
	if (!outputs)
		return true;

	auto *outputs_node = llvm::dyn_cast<llvm::MDNode>(outputs);

	auto &builder = spirv_module.get_builder();

	unsigned clip_distance_count = 0;
	unsigned cull_distance_count = 0;
	bool auto_patch_location = patch_location_offset == ~0u &&
	                           (execution_model == spv::ExecutionModelTessellationControl ||
	                            execution_model == spv::ExecutionModelMeshEXT);
	if (auto_patch_location)
		patch_location_offset = 0;

	// If we have multiple geometry streams, need to hallucinate locations.
	// This is okay since we're not going to support multi-stream rasterization anyways.
	unsigned start_row_for_geometry_stream[4] = {};
	if (execution_model == spv::ExecutionModelGeometry)
		build_geometry_stream_row_offsets(start_row_for_geometry_stream, outputs_node);

	for (unsigned i = 0; i < outputs_node->getNumOperands(); i++)
	{
		auto *output = llvm::cast<llvm::MDNode>(outputs_node->getOperand(i));
		auto element_id = get_constant_metadata(output, 0);
		auto semantic_name = get_string_metadata(output, 1);
		auto actual_element_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(output, 2)));
		auto effective_element_type = get_effective_input_output_type(actual_element_type);
		auto system_value = static_cast<DXIL::Semantic>(get_constant_metadata(output, 3));

		unsigned semantic_index = 0;
		if (output->getOperand(4))
			semantic_index = get_constant_metadata(llvm::cast<llvm::MDNode>(output->getOperand(4)), 0);

		auto interpolation = static_cast<DXIL::InterpolationMode>(get_constant_metadata(output, 5));
		auto rows = get_constant_metadata(output, 6);
		auto cols = get_constant_metadata(output, 7);

		auto start_row = get_constant_metadata(output, 8);
		auto start_col = get_constant_metadata(output, 9);
		bool masked_output = false;

		if (options.dual_source_blending && start_row >= 2)
		{
			// Mask out writes to unused higher RTs when using dual source blending.
			continue;
		}

		if (auto_patch_location)
			patch_location_offset = std::max(patch_location_offset, start_row + rows);

		spv::Id type_id = get_type_id(effective_element_type, rows, cols);

		if (options.quirks.ignore_primitive_shading_rate && system_value == DXIL::Semantic::ShadingRate)
		{
			masked_output = true;
		}
		else if (execution_model == spv::ExecutionModelTessellationControl ||
		         (execution_model == spv::ExecutionModelTessellationEvaluation &&
		          system_value == DXIL::Semantic::ShadingRate))
		{
			// For HS <-> DS, ignore system values.
			// Shading rate is also ignored in DS. RE4 hits this case. Just treat it as a normal user varying.
			system_value = DXIL::Semantic::User;
		}

		if (system_value == DXIL::Semantic::Position)
		{
			type_id = get_type_id(effective_element_type, rows, 4);
		}
		else if (system_value == DXIL::Semantic::Coverage)
		{
			type_id = builder.makeArrayType(type_id, builder.makeUintConstant(1), 0);
		}
		else if (system_value == DXIL::Semantic::ClipDistance)
		{
			// DX is rather weird here and you can declare clip distance either as a vector or array, or both!
			output_clip_cull_meta[element_id] = { clip_distance_count, cols, spv::BuiltInClipDistance };
			output_elements_meta[element_id] = { 0, actual_element_type, 0, system_value };
			clip_distance_count += rows * cols;
			continue;
		}
		else if (system_value == DXIL::Semantic::CullDistance)
		{
			// DX is rather weird here and you can declare clip distance either as a vector or array, or both!
			output_clip_cull_meta[element_id] = { cull_distance_count, cols, spv::BuiltInCullDistance };
			output_elements_meta[element_id] = { 0, actual_element_type, 0, system_value };
			cull_distance_count += rows * cols;
			continue;
		}

		// Application can emit these in ViewInstancing, in which case it's just an offset.
		if (options.multiview.enable)
		{
			if (system_value == DXIL::Semantic::RenderTargetArrayIndex)
				multiview.custom_layer_index = true;
			if (system_value == DXIL::Semantic::ViewPortArrayIndex)
				multiview.custom_viewport_index = true;
		}

		if (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT)
		{
			type_id = builder.makeArrayType(
			    type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_vertex, false), 0);
		}

		auto variable_name = semantic_name;
		if (semantic_index != 0)
		{
			variable_name += "_";
			variable_name += dxil_spv::to_string(semantic_index);
		}

		spv::Id variable_id = create_variable(
			masked_output ? spv::StorageClassPrivate : spv::StorageClassOutput, type_id, variable_name.c_str());
		output_elements_meta[element_id] = { variable_id, actual_element_type, 0, system_value };

		if (effective_element_type != actual_element_type && component_type_is_16bit(actual_element_type))
			builder.addDecoration(variable_id, spv::DecorationRelaxedPrecision);

		if (execution_model == spv::ExecutionModelVertex || execution_model == spv::ExecutionModelGeometry ||
		    execution_model == spv::ExecutionModelTessellationEvaluation)
		{
			if (resource_mapping_iface)
			{
				VulkanStreamOutput vk_output = {};
				if (!resource_mapping_iface->remap_stream_output({ semantic_name.c_str(), semantic_index }, vk_output))
					return false;

				if (vk_output.enable)
				{
					builder.addCapability(spv::CapabilityTransformFeedback);
					builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeXfb);
					builder.addDecoration(variable_id, spv::DecorationOffset, vk_output.offset);
					builder.addDecoration(variable_id, spv::DecorationXfbStride, vk_output.stride);
					builder.addDecoration(variable_id, spv::DecorationXfbBuffer, vk_output.buffer_index);
				}
			}
		}

		unsigned geometry_stream = 0;
		if (execution_model == spv::ExecutionModelGeometry)
		{
			geometry_stream = get_geometry_shader_stream_index(output);
			if (geometry_stream != 0)
			{
				builder.addCapability(spv::CapabilityGeometryStreams);
				builder.addDecoration(variable_id, spv::DecorationStream, geometry_stream);
			}
		}

		if (system_value == DXIL::Semantic::Target)
		{
			if (options.dual_source_blending)
			{
				assert(start_row == 0 || start_row == 1);
				if (rows != 1)
				{
					LOGE("For dual source blending, number of rows must be 1.\n");
					return false;
				}
				builder.addDecoration(variable_id, spv::DecorationLocation, 0);
				builder.addDecoration(variable_id, spv::DecorationIndex, start_row);
				output_elements_meta[element_id].semantic_offset = 0;
			}
			else
			{
				builder.addDecoration(variable_id, spv::DecorationLocation, start_row);
				output_elements_meta[element_id].semantic_offset = start_row;
			}

			if (start_col != 0)
				builder.addDecoration(variable_id, spv::DecorationComponent, start_col);
		}
		else if (system_value != DXIL::Semantic::User)
		{
			emit_builtin_decoration(variable_id, system_value, spv::StorageClassOutput);
		}
		else
		{
			if (execution_model == spv::ExecutionModelVertex ||
			    execution_model == spv::ExecutionModelTessellationEvaluation ||
			    execution_model == spv::ExecutionModelGeometry ||
			    execution_model == spv::ExecutionModelMeshEXT)
			{
				emit_interpolation_decorations(variable_id, interpolation);
			}

			VulkanStageIO vk_output = { start_row, start_col };

			if (execution_model == spv::ExecutionModelGeometry && geometry_stream < 4)
				vk_output.location += start_row_for_geometry_stream[geometry_stream];

			if (resource_mapping_iface)
			{
				D3DStageIO d3d_output = { semantic_name.c_str(), semantic_index, start_row, rows };
				if (!resource_mapping_iface->remap_stage_output(d3d_output, vk_output))
					return false;
			}

			builder.addDecoration(variable_id, spv::DecorationLocation, vk_output.location);
			if (vk_output.component != 0)
				builder.addDecoration(variable_id, spv::DecorationComponent, vk_output.component);
		}
	}

	if (clip_distance_count)
	{
		spv::Id type_id = get_type_id(DXIL::ComponentType::F32, clip_distance_count, 1, true);
		if (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT)
		{
			type_id = builder.makeArrayType(
			    type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_vertex, false), 0);
		}

		spv::Id variable_id = create_variable(spv::StorageClassOutput, type_id);
		emit_builtin_decoration(variable_id, DXIL::Semantic::ClipDistance, spv::StorageClassOutput);
		spirv_module.register_builtin_shader_output(variable_id, spv::BuiltInClipDistance);
	}

	if (cull_distance_count)
	{
		spv::Id type_id = get_type_id(DXIL::ComponentType::F32, cull_distance_count, 1, true);
		if (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT)
		{
			type_id = builder.makeArrayType(
			    type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_vertex, false), 0);
		}

		spv::Id variable_id = create_variable(spv::StorageClassOutput, type_id);
		emit_builtin_decoration(variable_id, DXIL::Semantic::CullDistance, spv::StorageClassOutput);
		spirv_module.register_builtin_shader_output(variable_id, spv::BuiltInCullDistance);
	}

	return true;
}

void Converter::Impl::emit_builtin_interpolation_decorations(spv::Id variable_id,
                                                             DXIL::Semantic semantic,
                                                             DXIL::InterpolationMode mode)
{
	switch (semantic)
	{
	case DXIL::Semantic::Barycentrics:
	case DXIL::Semantic::InternalBarycentricsNoPerspective:
		emit_interpolation_decorations(variable_id, mode);
		break;

	case DXIL::Semantic::Position:
		// DXIL emits NoPerspective here, but seems weird to emit that since it's kinda implied.
		// Normalize the interpolate mode first, then emit.
		if (mode == DXIL::InterpolationMode::LinearNoperspective)
			mode = DXIL::InterpolationMode::Linear;
		else if (mode == DXIL::InterpolationMode::LinearNoperspectiveCentroid)
			mode = DXIL::InterpolationMode::LinearCentroid;
		else if (mode == DXIL::InterpolationMode::LinearNoperspectiveSample)
			mode = DXIL::InterpolationMode::LinearSample;
		emit_interpolation_decorations(variable_id, mode);
		break;

	default:
		break;
	}
}

void Converter::Impl::emit_interpolation_decorations(spv::Id variable_id, DXIL::InterpolationMode mode)
{
	auto &builder = spirv_module.get_builder();
	switch (mode)
	{
	case DXIL::InterpolationMode::Constant:
		builder.addDecoration(variable_id, spv::DecorationFlat);
		break;

	case DXIL::InterpolationMode::LinearCentroid:
		builder.addDecoration(variable_id, spv::DecorationCentroid);
		break;

	case DXIL::InterpolationMode::LinearSample:
		builder.addDecoration(variable_id, spv::DecorationSample);
		builder.addCapability(spv::CapabilitySampleRateShading);
		execution_mode_meta.per_sample_shading = true;
		break;

	case DXIL::InterpolationMode::LinearNoperspective:
		builder.addDecoration(variable_id, spv::DecorationNoPerspective);
		break;

	case DXIL::InterpolationMode::LinearNoperspectiveCentroid:
		builder.addDecoration(variable_id, spv::DecorationNoPerspective);
		builder.addDecoration(variable_id, spv::DecorationCentroid);
		break;

	case DXIL::InterpolationMode::LinearNoperspectiveSample:
		builder.addDecoration(variable_id, spv::DecorationNoPerspective);
		builder.addDecoration(variable_id, spv::DecorationSample);
		builder.addCapability(spv::CapabilitySampleRateShading);
		execution_mode_meta.per_sample_shading = true;
		break;

	default:
		break;
	}
}

void Converter::Impl::emit_builtin_decoration(spv::Id id, DXIL::Semantic semantic, spv::StorageClass storage)
{
	auto &builder = spirv_module.get_builder();
	bool requires_flat_input = false;
	switch (semantic)
	{
	case DXIL::Semantic::Position:
		if (execution_model == spv::ExecutionModelFragment)
		{
			builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragCoord);
			spirv_module.register_builtin_shader_input(id, spv::BuiltInFragCoord);
		}
		else if (storage == spv::StorageClassInput)
		{
			builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPosition);
			spirv_module.register_builtin_shader_input(id, spv::BuiltInPosition);
		}
		else if (storage == spv::StorageClassOutput)
		{
			builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPosition);
			spirv_module.register_builtin_shader_output(id, spv::BuiltInPosition);
			if (options.invariant_position)
				builder.addDecoration(id, spv::DecorationInvariant);
		}
		break;

	case DXIL::Semantic::SampleIndex:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInSampleId);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInSampleId);
		builder.addCapability(spv::CapabilitySampleRateShading);
		execution_mode_meta.per_sample_shading = true;
		requires_flat_input = true;
		break;

	case DXIL::Semantic::VertexID:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInVertexIndex);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInVertexIndex);
		break;

	case DXIL::Semantic::InstanceID:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInInstanceIndex);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInInstanceIndex);
		break;

	case DXIL::Semantic::InsideTessFactor:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInTessLevelInner);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInTessLevelInner);
		break;

	case DXIL::Semantic::TessFactor:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInTessLevelOuter);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInTessLevelOuter);
		break;

	case DXIL::Semantic::Coverage:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInSampleMask);
		spirv_module.register_builtin_shader_output(id, spv::BuiltInSampleMask);
		break;

	case DXIL::Semantic::Depth:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragDepth);
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthReplacing);
		spirv_module.register_builtin_shader_output(id, spv::BuiltInFragDepth);
		break;

	case DXIL::Semantic::StencilRef:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragStencilRefEXT);
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeStencilRefReplacingEXT);
		builder.addExtension("SPV_EXT_shader_stencil_export");
		builder.addCapability(spv::CapabilityStencilExportEXT);
		spirv_module.register_builtin_shader_output(id, spv::BuiltInFragStencilRefEXT);
		break;

	case DXIL::Semantic::DepthLessEqual:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragDepth);
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthReplacing);
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthLess);
		spirv_module.register_builtin_shader_output(id, spv::BuiltInFragDepth);
		break;

	case DXIL::Semantic::DepthGreaterEqual:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragDepth);
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthReplacing);
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthGreater);
		spirv_module.register_builtin_shader_output(id, spv::BuiltInFragDepth);
		break;

	case DXIL::Semantic::IsFrontFace:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFrontFacing);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInFrontFacing);
		break;

	case DXIL::Semantic::ClipDistance:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInClipDistance);
		builder.addCapability(spv::CapabilityClipDistance);
		if (storage == spv::StorageClassOutput)
			spirv_module.register_builtin_shader_output(id, spv::BuiltInClipDistance);
		else if (storage == spv::StorageClassInput)
			spirv_module.register_builtin_shader_input(id, spv::BuiltInClipDistance);
		break;

	case DXIL::Semantic::CullDistance:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInCullDistance);
		builder.addCapability(spv::CapabilityCullDistance);
		if (storage == spv::StorageClassOutput)
			spirv_module.register_builtin_shader_output(id, spv::BuiltInCullDistance);
		else if (storage == spv::StorageClassInput)
			spirv_module.register_builtin_shader_input(id, spv::BuiltInCullDistance);
		break;

	case DXIL::Semantic::RenderTargetArrayIndex:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInLayer);
		if (storage == spv::StorageClassOutput)
		{
			spirv_module.register_builtin_shader_output(id, spv::BuiltInLayer);
			if (execution_model != spv::ExecutionModelGeometry)
			{
				builder.addExtension("SPV_EXT_shader_viewport_index_layer");
				builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT);
			}
		}
		else
		{
			spirv_module.register_builtin_shader_input(id, spv::BuiltInLayer);
			requires_flat_input = true;
		}
		builder.addCapability(spv::CapabilityGeometry);
		break;

	case DXIL::Semantic::ViewPortArrayIndex:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInViewportIndex);
		if (storage == spv::StorageClassOutput)
		{
			spirv_module.register_builtin_shader_output(id, spv::BuiltInViewportIndex);
			if (execution_model != spv::ExecutionModelGeometry)
			{
				builder.addExtension("SPV_EXT_shader_viewport_index_layer");
				builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT);
			}
		}
		else
		{
			spirv_module.register_builtin_shader_input(id, spv::BuiltInViewportIndex);
			requires_flat_input = true;
		}
		builder.addCapability(spv::CapabilityMultiViewport);
		break;

	case DXIL::Semantic::PrimitiveID:
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPrimitiveId);
		if (storage == spv::StorageClassOutput)
			spirv_module.register_builtin_shader_output(id, spv::BuiltInPrimitiveId);
		else
		{
			spirv_module.register_builtin_shader_input(id, spv::BuiltInPrimitiveId);
			requires_flat_input = true;
		}
		builder.addCapability(spv::CapabilityGeometry);
		break;

	case DXIL::Semantic::ShadingRate:
		if (storage == spv::StorageClassOutput)
		{
			if (!options.quirks.ignore_primitive_shading_rate)
				builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPrimitiveShadingRateKHR);
			spirv_module.register_builtin_shader_output(id, spv::BuiltInPrimitiveShadingRateKHR);
		}
		else
		{
			if (!options.quirks.ignore_primitive_shading_rate)
			{
				builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInShadingRateKHR);
				requires_flat_input = true;
			}
			spirv_module.register_builtin_shader_input(id, spv::BuiltInShadingRateKHR);
		}
		builder.addExtension("SPV_KHR_fragment_shading_rate");
		builder.addCapability(spv::CapabilityFragmentShadingRateKHR);
		break;

	case DXIL::Semantic::Barycentrics:
	case DXIL::Semantic::InternalBarycentricsNoPerspective:
	{
		if (options.khr_barycentrics_enabled)
		{
			auto builtin =
			    semantic == DXIL::Semantic::Barycentrics ? spv::BuiltInBaryCoordKHR : spv::BuiltInBaryCoordNoPerspKHR;

			builder.addExtension("SPV_KHR_fragment_shader_barycentric");
			builder.addCapability(spv::CapabilityFragmentBarycentricKHR);
			builder.addDecoration(id, spv::DecorationBuiltIn, builtin);
			spirv_module.register_builtin_shader_input(id, builtin);
		}
		else
		{
			// TODO: We're not dealing with centroid vs per-sample decorations here.
			auto builtin =
			    semantic == DXIL::Semantic::Barycentrics ? spv::BuiltInBaryCoordSmoothAMD : spv::BuiltInBaryCoordNoPerspAMD;

			builder.addExtension("SPV_AMD_shader_explicit_vertex_parameter");
			builder.addDecoration(id, spv::DecorationBuiltIn, builtin);
			spirv_module.register_builtin_shader_input(id, builtin);
		}
		break;
	}

	case DXIL::Semantic::CullPrimitive:
	{
		builder.addExtension("SPV_EXT_mesh_shader");
		builder.addCapability(spv::CapabilityMeshShadingEXT);
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInCullPrimitiveEXT);
		spirv_module.register_builtin_shader_output(id, spv::BuiltInCullPrimitiveEXT);
		break;
	}

	case DXIL::Semantic::DomainLocation:
		// This is normally an opcode in DXIL, but custom IR likes it to be a semantic,
		// and it's easier to just treat it like a normal builtin input.
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInTessCoord);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInTessCoord);
		break;

	case DXIL::Semantic::DispatchThreadID:
		// This is normally an opcode in DXIL, but custom IR likes it to be a semantic.
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInGlobalInvocationId);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInGlobalInvocationId);
		break;

	case DXIL::Semantic::GroupThreadID:
		// This is normally an opcode in DXIL, but custom IR likes it to be a semantic.
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInLocalInvocationId);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInLocalInvocationId);
		break;

	case DXIL::Semantic::GroupID:
		// This is normally an opcode in DXIL, but custom IR likes it to be a semantic.
		builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInWorkgroupId);
		spirv_module.register_builtin_shader_input(id, spv::BuiltInWorkgroupId);
		break;

	default:
		LOGE("Unknown DXIL semantic.\n");
		break;
	}

	// VUID-StandaloneSpirv-Flat-04744
	if (requires_flat_input && execution_model == spv::ExecutionModelFragment)
		builder.addDecoration(id, spv::DecorationFlat);
}

static bool execution_model_has_incoming_payload(spv::ExecutionModel model)
{
	return model != spv::ExecutionModelRayGenerationKHR && execution_model_is_ray_tracing(model);
}

static bool execution_model_has_hit_attribute(spv::ExecutionModel model)
{
	switch (model)
	{
	case spv::ExecutionModelAnyHitKHR:
	case spv::ExecutionModelClosestHitKHR:
	case spv::ExecutionModelIntersectionKHR:
		return true;

	default:
		return false;
	}
}

bool Converter::Impl::emit_incoming_payload()
{
	auto *func = get_entry_point_function(entry_point_meta);

	// The first argument to a RT entry point is always a pointer to payload.
	if (func->arg_end() - func->arg_begin() >= 1)
	{
		auto &arg = *func->arg_begin();
		if (!llvm::isa<llvm::PointerType>(arg.getType()))
			return false;
		auto *elem_type = arg.getType()->getPointerElementType();

		spv::StorageClass storage;
		if (execution_model == spv::ExecutionModelCallableKHR)
			storage = spv::StorageClassIncomingCallableDataKHR;
		else
			storage = spv::StorageClassIncomingRayPayloadKHR;

		// This is a POD. We'll emit that as a block containing the payload type.
		spv::Id payload_var = create_variable(storage, get_type_id(elem_type), "payload");
		handle_to_storage_class[&arg] = storage;
		rewrite_value(&arg, payload_var);
	}

	return true;
}

bool Converter::Impl::emit_hit_attribute()
{
	auto *func = get_entry_point_function(entry_point_meta);

	// The second argument to a RT entry point is always a pointer to hit attribute.
	if (func->arg_end() - func->arg_begin() >= 2)
	{
		auto args = func->arg_begin();
		++args;
		auto &arg = *args;
		if (!llvm::isa<llvm::PointerType>(arg.getType()))
			return false;
		auto *elem_type = arg.getType()->getPointerElementType();

		spv::Id hit_attribute_var = create_variable(spv::StorageClassHitAttributeKHR, get_type_id(elem_type), "hit");
		handle_to_storage_class[&arg] = spv::StorageClassHitAttributeKHR;
		rewrite_value(&arg, hit_attribute_var);
	}
	else if (execution_model == spv::ExecutionModelIntersectionKHR && llvm_hit_attribute_output_type)
	{
		auto *elem_type = llvm_hit_attribute_output_type->getPointerElementType();
		llvm_hit_attribute_output_value = create_variable(spv::StorageClassHitAttributeKHR,
		                                                  get_type_id(elem_type), "hit");
	}

	return true;
}

bool Converter::Impl::emit_global_variables()
{
	auto &module = bitcode_parser.get_module();

	if (execution_model_has_incoming_payload(execution_model))
		if (!emit_incoming_payload())
			return false;

	if (execution_model_has_hit_attribute(execution_model))
		if (!emit_hit_attribute())
			return false;

	for (auto itr = module.global_begin(); itr != module.global_end(); ++itr)
	{
		llvm::GlobalVariable &global = *itr;

		auto address_space = static_cast<DXIL::AddressSpace>(global.getType()->getAddressSpace());

		// Workarounds for DXR. RT resources tend to be declared with external linkage + structs.
		// Groupshared is also declared with external linkage, even if that is bogus.
		// Make sure we declare global internal struct LUTs at the very least ...
		if (global.getLinkage() == llvm::GlobalVariable::ExternalLinkage &&
		    address_space != DXIL::AddressSpace::GroupShared)
		{
			continue;
		}

		// Ignore @llvm.global_ctors(). Only observed once with dummy ctor.
		// It probably is not intended to work.
		if (global.getLinkage() == llvm::GlobalVariable::AppendingLinkage)
			continue;

		spv::Id pointee_type_id = 0;
		spv::Id scalar_type_id = 0;
		bool padded_composite = false;
		bool complex_composite = false;

		if (address_space == DXIL::AddressSpace::Thread &&
		    options.extended_robustness.constant_lut &&
		    global.hasInitializer() &&
		    global.isConstant())
		{
			if (auto *array_type = llvm::dyn_cast<llvm::ArrayType>(global.getType()->getPointerElementType()))
			{
				scalar_type_id = get_type_id(array_type->getArrayElementType());
				pointee_type_id = builder().makeArrayType(
					scalar_type_id, builder().makeUintConstant(array_type->getArrayNumElements() + 1), false);
				padded_composite = true;
			}
		}
		else if (address_space == DXIL::AddressSpace::GroupShared &&
		         shader_analysis.require_wmma)
		{
			// Workaround for bugged WMMA shaders.
			// The shaders rely on AMD aligning LDS size to 512 bytes.
			// This avoids overflow spilling into LDSTranspose area by mistake, which breaks some shaders.
			if (auto *array_type = llvm::dyn_cast<llvm::ArrayType>(global.getType()->getPointerElementType()))
			{
				scalar_type_id = get_type_id(array_type->getArrayElementType());
				uint32_t elem_count = array_type->getArrayNumElements();
				uint32_t alignment = (512 * 8) / array_type->getArrayElementType()->getIntegerBitWidth();
				elem_count = (elem_count + alignment - 1) & ~(alignment - 1);
				pointee_type_id = builder().makeArrayType(
					scalar_type_id, builder().makeUintConstant(elem_count), false);
			}
		}
		else if (shader_analysis.require_wmma)
		{
			if (ags_alloca_or_global_filter(*this, &global, pointee_type_id))
				complex_composite = true;
		}

		if (!pointee_type_id)
			pointee_type_id = get_type_id(global.getType()->getPointerElementType());

		// Happens for some global variables in DXR for some reason, benign.
		if (pointee_type_id == 0)
			continue;

		spv::Id initializer_id = 0;

		llvm::Constant *initializer = nullptr;
		if (global.hasInitializer())
			initializer = global.getInitializer();
		if (initializer && llvm::isa<llvm::UndefValue>(initializer))
			initializer = nullptr;

		if (address_space == DXIL::AddressSpace::GroupShared)
		{
			if (initializer)
			{
				// FIXME: Is this even legal DXIL?
				LOGW("Global variable address space cannot have initializer! Ignoring ...\n");
				initializer = nullptr;
			}
		}

		if (initializer)
		{
			if (complex_composite)
			{
				if (!llvm::isa<llvm::ConstantAggregateZero>(initializer))
				{
					LOGE("WMMA initializer must be all zero.\n");
					return false;
				}
				initializer_id = builder().makeNullConstant(pointee_type_id);
			}
			else if (padded_composite)
				initializer_id = get_padded_constant_array(pointee_type_id, initializer);
			else
				initializer_id = get_id_for_constant(initializer, 0);
		}

		spv::StorageClass storage_class = address_space == DXIL::AddressSpace::GroupShared
		                                  ? spv::StorageClassWorkgroup : spv::StorageClassPrivate;
		spv::Id var_id = create_variable_with_initializer(
			get_effective_storage_class(&global, storage_class),
			pointee_type_id, initializer_id);

		decorate_relaxed_precision(global.getType()->getPointerElementType(), var_id, false);
		rewrite_value(&global, var_id);
	}

	return true;
}

static void adjust_system_value(DXIL::Semantic &semantic, DXIL::InterpolationMode &interpolation)
{
	if (semantic == DXIL::Semantic::Barycentrics)
	{
		switch (interpolation)
		{
		case DXIL::InterpolationMode::LinearNoperspective:
			semantic = DXIL::Semantic::InternalBarycentricsNoPerspective;
			interpolation = DXIL::InterpolationMode::Linear;
			break;

		case DXIL::InterpolationMode::LinearNoperspectiveCentroid:
			semantic = DXIL::Semantic::InternalBarycentricsNoPerspective;
			interpolation = DXIL::InterpolationMode::LinearCentroid;
			break;

		case DXIL::InterpolationMode::LinearNoperspectiveSample:
			semantic = DXIL::Semantic::InternalBarycentricsNoPerspective;
			interpolation = DXIL::InterpolationMode::LinearSample;
			break;

		default:
			break;
		}
	}
}

bool Converter::Impl::emit_stage_input_variables()
{
	auto *node = entry_point_meta;
	if (!node->getOperand(2))
		return true;

	auto &signature = node->getOperand(2);
	auto *signature_node = llvm::cast<llvm::MDNode>(signature);
	auto &inputs = signature_node->getOperand(0);
	if (!inputs)
		return true;

	bool stage_arrayed_inputs = execution_model == spv::ExecutionModelGeometry ||
	                            execution_model == spv::ExecutionModelTessellationControl ||
	                            execution_model == spv::ExecutionModelTessellationEvaluation;

	uint32_t stage_input_vertices = execution_mode_meta.stage_input_num_vertex;
	if (execution_model == spv::ExecutionModelTessellationControl)
	{
		// The control point input arrays are effectively unsized. We have to give it something, so use upper bound.
		constexpr uint32_t MaxControlPoints = 32;
		stage_input_vertices = MaxControlPoints;
	}

	auto *inputs_node = llvm::dyn_cast<llvm::MDNode>(inputs);

	auto &builder = spirv_module.get_builder();

	unsigned clip_distance_count = 0;
	unsigned cull_distance_count = 0;
	bool auto_patch_location = patch_location_offset == ~0u &&
	                           execution_model == spv::ExecutionModelTessellationEvaluation;
	if (auto_patch_location)
		patch_location_offset = 0;

	for (unsigned i = 0; i < inputs_node->getNumOperands(); i++)
	{
		bool arrayed_input = stage_arrayed_inputs;
		auto *input = llvm::cast<llvm::MDNode>(inputs_node->getOperand(i));
		auto element_id = get_constant_metadata(input, 0);
		auto semantic_name = get_string_metadata(input, 1);
		auto actual_element_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(input, 2)));
		auto effective_element_type = get_effective_input_output_type(actual_element_type);
		auto system_value = static_cast<DXIL::Semantic>(get_constant_metadata(input, 3));

		unsigned semantic_index = 0;
		if (input->getOperand(4))
			semantic_index = get_constant_metadata(llvm::cast<llvm::MDNode>(input->getOperand(4)), 0);

		auto interpolation = static_cast<DXIL::InterpolationMode>(get_constant_metadata(input, 5));
		adjust_system_value(system_value, interpolation);

		auto rows = get_constant_metadata(input, 6);
		auto cols = get_constant_metadata(input, 7);

		auto start_row = get_constant_metadata(input, 8);
		auto start_col = get_constant_metadata(input, 9);

		if (auto_patch_location)
			patch_location_offset = std::max(patch_location_offset, start_row + rows);

		// For HS <-> DS, ignore system values.
		// Allow certain system values that are synthesized however.
		if (execution_model == spv::ExecutionModelTessellationEvaluation && system_value != DXIL::Semantic::DomainLocation)
			system_value = DXIL::Semantic::User;

		bool masked_input = false;
		if (system_value == DXIL::Semantic::ShadingRate && options.quirks.ignore_primitive_shading_rate)
			masked_input = true;

		if (!options.khr_barycentrics_enabled)
		{
			if (system_value == DXIL::Semantic::Barycentrics ||
				system_value == DXIL::Semantic::InternalBarycentricsNoPerspective)
			{
				cols = 2;
			}
		}

		spv::Id type_id = get_type_id(effective_element_type, rows, cols);
		if (system_value == DXIL::Semantic::Position)
		{
			type_id = get_type_id(effective_element_type, rows, 4);
		}
		else if (system_value == DXIL::Semantic::IsFrontFace)
		{
			// Need to cast this to uint when loading the semantic input.
			type_id = builder.makeBoolType();
		}
		else if (system_value == DXIL::Semantic::ClipDistance)
		{
			// DX is rather weird here and you can declare clip distance either as a vector or array, or both!
			input_clip_cull_meta[element_id] = { clip_distance_count, cols, spv::BuiltInClipDistance };
			input_elements_meta[element_id] = { 0, actual_element_type, 0, system_value };
			clip_distance_count += rows * cols;
			continue;
		}
		else if (system_value == DXIL::Semantic::CullDistance)
		{
			// DX is rather weird here and you can declare clip distance either as a vector or array, or both!
			input_clip_cull_meta[element_id] = { cull_distance_count, cols, spv::BuiltInCullDistance };
			input_elements_meta[element_id] = { 0, actual_element_type, 0, system_value };
			cull_distance_count += rows * cols;
			continue;
		}
		else if (system_value == DXIL::Semantic::PrimitiveID ||
		         system_value == DXIL::Semantic::ShadingRate ||
		         system_value == DXIL::Semantic::DomainLocation)
		{
			arrayed_input = false;
		}

		bool per_vertex = llvm_attribute_at_vertex_indices.count(element_id) != 0;

		if (arrayed_input)
		{
			type_id =
			    builder.makeArrayType(type_id, builder.makeUintConstant(stage_input_vertices), 0);
		}
		else if (per_vertex && options.khr_barycentrics_enabled)
		{
			// TODO: Does this change for barycentrics with lines?
			type_id = builder.makeArrayType(type_id, builder.makeUintConstant(3), 0);
			// Default. We should emit PerVertex instead of flat. Linear here is the default, don't emit anything.
			interpolation = DXIL::InterpolationMode::Linear;
		}

		auto variable_name = semantic_name;
		if (semantic_index != 0)
		{
			variable_name += "_";
			variable_name += dxil_spv::to_string(semantic_index);
		}

		spv::Id variable_id = create_variable(masked_input ? spv::StorageClassPrivate : spv::StorageClassInput, type_id,
		                                      variable_name.c_str());
		input_elements_meta[element_id] = { variable_id, actual_element_type, system_value != DXIL::Semantic::User ? start_row : 0, system_value };

		if (per_vertex)
		{
			if (options.khr_barycentrics_enabled)
			{
				builder.addExtension("SPV_KHR_fragment_shader_barycentric");
				builder.addCapability(spv::CapabilityFragmentBarycentricKHR);
				builder.addDecoration(variable_id, spv::DecorationPerVertexKHR);
			}
			else
			{
				builder.addExtension("SPV_AMD_shader_explicit_vertex_parameter");
				builder.addDecoration(variable_id, spv::DecorationExplicitInterpAMD);
			}
		}

		if (effective_element_type != actual_element_type && component_type_is_16bit(actual_element_type))
			builder.addDecoration(variable_id, spv::DecorationRelaxedPrecision);

		if (system_value != DXIL::Semantic::User)
		{
			emit_builtin_decoration(variable_id, system_value, spv::StorageClassInput);
			if (execution_model == spv::ExecutionModelFragment)
				emit_builtin_interpolation_decorations(variable_id, system_value, interpolation);
		}
		else
		{
			if (execution_model == spv::ExecutionModelFragment)
				emit_interpolation_decorations(variable_id, interpolation);

			VulkanStageIO vk_input = { start_row, start_col };

			if (resource_mapping_iface)
			{
				D3DStageIO d3d_input = { semantic_name.c_str(), semantic_index, start_row, rows };

				if (execution_model == spv::ExecutionModelVertex)
				{
					if (!resource_mapping_iface->remap_vertex_input(d3d_input, vk_input))
						return false;
				}

				if (!resource_mapping_iface->remap_stage_input(d3d_input, vk_input))
					return false;
			}

			builder.addDecoration(variable_id, spv::DecorationLocation, vk_input.location);

			if (execution_model != spv::ExecutionModelVertex && vk_input.component != 0)
				builder.addDecoration(variable_id, spv::DecorationComponent, vk_input.component);

			if (execution_model == spv::ExecutionModelFragment && (vk_input.flags & STAGE_IO_PER_PRIMITIVE))
			{
				builder.addDecoration(variable_id, spv::DecorationPerPrimitiveEXT);
				builder.addExtension("SPV_EXT_mesh_shader");
				builder.addCapability(spv::CapabilityMeshShadingEXT);
			}
		}
	}

	if (clip_distance_count)
	{
		spv::Id type_id = get_type_id(DXIL::ComponentType::F32, clip_distance_count, 1, true);
		if (stage_arrayed_inputs)
		{
			type_id = builder.makeArrayType(
			    type_id, builder.makeUintConstant(stage_input_vertices, false), 0);
		}

		spv::Id variable_id = create_variable(spv::StorageClassInput, type_id);
		emit_builtin_decoration(variable_id, DXIL::Semantic::ClipDistance, spv::StorageClassInput);
		spirv_module.register_builtin_shader_input(variable_id, spv::BuiltInClipDistance);
	}

	if (cull_distance_count)
	{
		spv::Id type_id = get_type_id(DXIL::ComponentType::F32, cull_distance_count, 1, true);
		if (stage_arrayed_inputs)
		{
			type_id = builder.makeArrayType(
			    type_id, builder.makeUintConstant(stage_input_vertices, false), 0);
		}

		spv::Id variable_id = create_variable(spv::StorageClassInput, type_id);
		emit_builtin_decoration(variable_id, DXIL::Semantic::CullDistance, spv::StorageClassInput);
		spirv_module.register_builtin_shader_input(variable_id, spv::BuiltInCullDistance);
	}

	return true;
}

spv::Id Converter::Impl::build_sampled_image(spv::Id image_id, spv::Id sampler_id, bool comparison)
{
	bool is_non_uniform =
	    handle_to_resource_meta[image_id].non_uniform || handle_to_resource_meta[sampler_id].non_uniform;

	auto itr = std::find_if(combined_image_sampler_cache.begin(), combined_image_sampler_cache.end(),
	                        [&](const CombinedImageSampler &combined) {
		                        return combined.image_id == image_id && combined.sampler_id == sampler_id &&
		                               combined.non_uniform == is_non_uniform;
	                        });

	if (itr != combined_image_sampler_cache.end())
		return itr->combined_id;

	auto &builder = spirv_module.get_builder();
	spv::Id image_type_id = get_type_id(image_id);
	spv::Dim dim = builder.getTypeDimensionality(image_type_id);
	bool arrayed = builder.isArrayedImageType(image_type_id);
	bool multisampled = builder.isMultisampledImageType(image_type_id);
	spv::Id sampled_format = builder.getImageComponentType(image_type_id);

	image_type_id =
	    builder.makeImageType(sampled_format, dim, comparison, arrayed, multisampled, 1, spv::ImageFormatUnknown);

	Operation *op = allocate(spv::OpSampledImage, builder.makeSampledImageType(image_type_id));
	op->add_ids({ image_id, sampler_id });
	add(op);

	if (is_non_uniform)
	{
		builder.addDecoration(op->id, spv::DecorationNonUniformEXT);
		op->flags |= Operation::SinkableBit;
	}

	combined_image_sampler_cache.push_back({ image_id, sampler_id, op->id, is_non_uniform });
	return op->id;
}

spv::Id Converter::Impl::build_vector_type(spv::Id element_type, unsigned count)
{
	auto &builder = spirv_module.get_builder();
	if (count == 1)
		return element_type;
	else
		return builder.makeVectorType(element_type, count);
}

spv::Id Converter::Impl::build_vector(spv::Id element_type, const spv::Id *elements, unsigned count)
{
	if (count == 1)
		return elements[0];

	auto &builder = spirv_module.get_builder();

	Operation *op = allocate(spv::OpCompositeConstruct, builder.makeVectorType(element_type, count));
	for (unsigned i = 0; i < count; i++)
		op->add_id(elements[i]);

	add(op);
	return op->id;
}

spv::Id Converter::Impl::build_constant_vector(spv::Id element_type, const spv::Id *elements, unsigned count)
{
	if (count == 1)
		return elements[0];

	auto &builder = spirv_module.get_builder();
	return builder.makeCompositeConstant(builder.makeVectorType(element_type, count), { elements, elements + count });
}

spv::Id Converter::Impl::build_splat_constant_vector(spv::Id element_type, spv::Id value, unsigned count)
{
	spv::Id ids[4];
	for (unsigned i = 0; i < count; i++)
		ids[i] = value;
	return build_constant_vector(element_type, ids, count);
}

spv::Id Converter::Impl::build_offset(spv::Id value, unsigned offset)
{
	if (offset == 0)
		return value;

	auto &builder = spirv_module.get_builder();

	Operation *op = allocate(spv::OpIAdd, builder.makeUintType(32));
	op->add_ids({ value, builder.makeUintConstant(offset) });

	add(op);
	return op->id;
}

void Converter::Impl::repack_sparse_feedback(DXIL::ComponentType component_type, unsigned num_components, const llvm::Value *value,
                                             const llvm::Type *target_type, spv::Id override_value)
{
	auto *code_id = allocate(spv::OpCompositeExtract, builder().makeUintType(32));
	code_id->add_id(get_id_for_value(value));
	code_id->add_literal(0);
	add(code_id);

	auto effective_component_type = get_effective_typed_resource_type(component_type);
	spv::Id texel_id;

	if (override_value)
	{
		texel_id = override_value;
	}
	else
	{
		auto *texel = allocate(spv::OpCompositeExtract, get_type_id(effective_component_type, 1, num_components));
		texel->add_id(get_id_for_value(value));
		texel->add_literal(1);
		add(texel);
		texel_id = texel->id;
	}

	fixup_load_type_typed(component_type, num_components, texel_id, target_type);

	spv::Id components[5];

	if (num_components > 1)
	{
		for (unsigned i = 0; i < num_components; i++)
		{
			auto *extract_op = allocate(spv::OpCompositeExtract, get_type_id(component_type, 1, 1));
			extract_op->add_id(texel_id);
			extract_op->add_literal(i);
			add(extract_op);
			components[i] = extract_op->id;
		}
	}
	else
	{
		for (auto &comp : components)
			comp = texel_id;
		num_components = 4;
	}

	components[num_components] = code_id->id;

	auto *repack_op = allocate(spv::OpCompositeConstruct, get_type_id(value->getType()));
	for (auto &comp : components)
		repack_op->add_id(comp);
	add(repack_op);
	rewrite_value(value, repack_op->id);
}

bool Converter::Impl::support_native_fp16_operations() const
{
	return execution_mode_meta.native_16bit_operations || options.min_precision_prefer_native_16bit;
}

spv::Id Converter::Impl::build_value_cast(spv::Id value_id,
                                          DXIL::ComponentType input_type,
                                          DXIL::ComponentType output_type,
                                          unsigned components)
{
	// This path only hits for bitcasts or 16-bit <-> 32-bit casts.
	bool output_16bit = component_type_is_16bit(output_type);
	bool input_16bit = component_type_is_16bit(input_type);

	spv::Op opcode = spv::OpBitcast;

	if (output_16bit != input_16bit)
	{
		switch (input_type)
		{
		case DXIL::ComponentType::F16:
		case DXIL::ComponentType::F32:
			opcode = spv::OpFConvert;
			break;

		case DXIL::ComponentType::I16:
		case DXIL::ComponentType::I32:
			opcode = spv::OpSConvert;
			break;

		case DXIL::ComponentType::U16:
		case DXIL::ComponentType::U32:
			opcode = spv::OpUConvert;
			break;

		default:
			break;
		}

		// OpUConvert is not allowed on integer outputs.
		// We also need SConvert if we're doing 16 -> I32,
		// since what we actually want is I16 -> I32.
		switch (output_type)
		{
		case DXIL::ComponentType::I16:
		case DXIL::ComponentType::I32:
			opcode = spv::OpSConvert;
			break;

		default:
			break;
		}
	}

	Operation *op = allocate(opcode, get_type_id(output_type, 1, components));
	op->add_id(value_id);
	add(op);
	return op->id;
}

void Converter::Impl::fixup_load_type_io(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value)
{
	auto output_component_type = component_type;
	auto input_component_type = component_type;

	bool promote_fp16 = input_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations();

	if (!options.storage_16bit_input_output || promote_fp16)
		input_component_type = convert_16bit_component_to_32bit(input_component_type);
	if (promote_fp16)
		output_component_type = convert_16bit_component_to_32bit(output_component_type);

	output_component_type = convert_component_to_unsigned(output_component_type);

	if (output_component_type != input_component_type)
	{
		rewrite_value(value, build_value_cast(get_id_for_value(value), input_component_type,
		                                      output_component_type, components));
	}
}

void Converter::Impl::fixup_load_type_atomic(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value)
{
	auto output_component_type = component_type;
	auto input_component_type = component_type;

	output_component_type = convert_component_to_unsigned(output_component_type);

	if (output_component_type != input_component_type)
	{
		rewrite_value(value, build_value_cast(get_id_for_value(value), input_component_type,
		                                      output_component_type, components));
	}
}

void Converter::Impl::fixup_load_type_typed(DXIL::ComponentType &component_type, unsigned components, spv::Id &value_id,
                                            const llvm::Type *target_type)
{
	auto output_component_type = component_type;
	auto input_component_type = get_effective_typed_resource_type(component_type);

	if (output_component_type == DXIL::ComponentType::U64 && target_type->getIntegerBitWidth() == 32)
	{
		// If the component type is U64 it's used for atomics, but load/store interface is still 32-bit.
		// Bit-cast rather than value cast.
		auto *bitcast_op = allocate(spv::OpCompositeExtract, builder().makeUintType(64));
		bitcast_op->add_id(value_id);
		bitcast_op->add_literal(0);
		add(bitcast_op);

		auto *u32_cast_op = allocate(spv::OpBitcast, builder().makeVectorType(builder().makeUintType(32), 2));
		u32_cast_op->add_id(bitcast_op->id);
		add(u32_cast_op);
		output_component_type = DXIL::ComponentType::U32;

		if (components > 2)
		{
			auto *composite_op =
				allocate(spv::OpCompositeConstruct,
				         builder().makeVectorType(builder().makeUintType(32), components));
			composite_op->add_id(u32_cast_op->id);
			for (unsigned i = 2; i < components; i++)
				composite_op->add_id(builder().makeUintConstant(0));
			add(composite_op);
			value_id = composite_op->id;
		}
		else if (components == 1)
		{
			auto *extract_op = allocate(spv::OpCompositeExtract, builder().makeUintType(32));
			extract_op->add_id(u32_cast_op->id);
			extract_op->add_literal(0);
			add(extract_op);
			value_id = extract_op->id;
		}
		else
			value_id = u32_cast_op->id;
	}
	else
	{
		if (output_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations())
			output_component_type = convert_16bit_component_to_32bit(output_component_type);
		else if (target_type->getTypeID() == llvm::Type::TypeID::FloatTyID)
		{
			// Only convert if we actually want half here.
			// Certain operations always return float even if the resource type is half for some silly reason.
			output_component_type = DXIL::ComponentType::F32;
		}

		output_component_type = convert_component_to_unsigned(output_component_type);

		if (output_component_type != input_component_type)
			value_id = build_value_cast(value_id, input_component_type, output_component_type, components);
		component_type = output_component_type;
	}
}

void Converter::Impl::fixup_load_type_typed(DXIL::ComponentType component_type, unsigned components,
                                            const llvm::Value *value, const llvm::Type *target_type)
{
	spv::Id value_id = get_id_for_value(value);
	spv::Id new_value_id = value_id;
	fixup_load_type_typed(component_type, components, new_value_id, target_type);
	if (new_value_id != value_id)
		rewrite_value(value, new_value_id);
}

spv::Id Converter::Impl::fixup_store_type_io(DXIL::ComponentType component_type, unsigned components, spv::Id value)
{
	auto output_component_type = component_type;
	auto input_component_type = component_type;

	if (!options.storage_16bit_input_output ||
	    (output_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations()))
	{
		output_component_type = convert_16bit_component_to_32bit(output_component_type);
	}

	if (input_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations())
		input_component_type = convert_16bit_component_to_32bit(input_component_type);

	input_component_type = convert_component_to_unsigned(input_component_type);

	if (output_component_type != input_component_type)
		value = build_value_cast(value, input_component_type, output_component_type, components);
	return value;
}

spv::Id Converter::Impl::fixup_store_type_atomic(DXIL::ComponentType component_type, unsigned components, spv::Id value)
{
	auto output_component_type = component_type;
	auto input_component_type = component_type;

	input_component_type = convert_component_to_unsigned(input_component_type);

	if (output_component_type != input_component_type)
		value = build_value_cast(value, input_component_type, output_component_type, components);
	return value;
}

spv::Id Converter::Impl::fixup_store_type_typed(DXIL::ComponentType component_type, unsigned components, spv::Id value)
{
	if (component_type == DXIL::ComponentType::U64)
	{
		// If the component type is U64 it's used for atomics, but load/store interface is still 32-bit.
		// Bit-cast rather than value cast.
		spv::Id u64_ids[4] = {};
		for (unsigned i = 0; i < components / 2; i++)
		{
			auto *shuffle_op = allocate(spv::OpVectorShuffle, builder().makeVectorType(builder().makeUintType(32), 2));
			shuffle_op->add_id(value);
			shuffle_op->add_id(value);
			shuffle_op->add_literal(2 * i + 0);
			shuffle_op->add_literal(2 * i + 1);
			add(shuffle_op);

			auto *cast_op = allocate(spv::OpBitcast, builder().makeUintType(64));
			cast_op->add_id(shuffle_op->id);
			add(cast_op);
			u64_ids[i] = cast_op->id;
		}

		for (unsigned i = components / 2; i < components; i++)
			u64_ids[i] = builder().makeUint64Constant(0);

		value = build_vector(builder().makeUintType(64), u64_ids, components);
	}
	else
	{
		auto output_component_type = get_effective_typed_resource_type(component_type);
		auto input_component_type = component_type;

		if (input_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations())
			input_component_type = convert_16bit_component_to_32bit(input_component_type);
		input_component_type = convert_component_to_unsigned(input_component_type);

		if (output_component_type != input_component_type)
			value = build_value_cast(value, input_component_type, output_component_type, components);
	}

	return value;
}

bool Converter::Impl::emit_phi_instruction(CFGNode *block, const llvm::PHINode &instruction)
{
	unsigned count = instruction.getNumIncomingValues();
	spv::Id override_type = 0;

	if (ags_filter_phi(*this, instruction, override_type))
		return true;

	if (count == 1)
	{
		// Degenerate PHI. Seems to happen in some bizarre cases with lcssa passes?
		auto *value = instruction.getIncomingValue(0);
		rewrite_value(&instruction, get_id_for_value(value));

		// This PHI node can actually be pointer or descriptor for whatever reason,
		// so inherit any such mappings.
		{
			auto itr = handle_to_storage_class.find(value);
			if (itr != handle_to_storage_class.end())
				handle_to_storage_class[&instruction] = itr->second;
		}

		{
			auto itr = handle_to_root_member_offset.find(value);
			if (itr != handle_to_root_member_offset.end())
				handle_to_root_member_offset[&instruction] = itr->second;
		}
	}
	else
	{
		PHI phi;
		phi.id = get_id_for_value(&instruction);

		auto itr = llvm_composite_meta.find(&instruction);

		if (itr != llvm_composite_meta.end() && itr->second.components <= 4 && (itr->second.access_mask & ~0xfu) == 0 &&
		    std::find(llvm_dxil_op_fake_struct_types.begin(), llvm_dxil_op_fake_struct_types.end(), instruction.getType()) !=
		    llvm_dxil_op_fake_struct_types.end())
		{
			// Using PHI as a composite is exceedingly quirky, but it does come up.
			// FIXME: This could go wrong if one incoming value uses different components
			// from the others, but this scenario has only ever been observed from single-incoming
			// values, so this code path shouldn't really be taken at all.
			phi.type_id = get_type_id(instruction.getType()->getStructElementType(0));
			if (itr->second.components > 1)
				phi.type_id = builder().makeVectorType(phi.type_id, itr->second.components);
		}
		else
		{
			phi.type_id = override_type ? override_type : get_type_id(instruction.getType());
		}

		phi.relaxed = type_can_relax_precision(instruction.getType(), false);

		for (unsigned i = 0; i < count; i++)
		{
			IncomingValue incoming = {};
			auto bb_itr = bb_map.find(instruction.getIncomingBlock(i));

			// If the block was statically eliminated, it might not exist.
			if (bb_itr != bb_map.end())
			{
				incoming.block = bb_itr->second->node;
				auto *value = instruction.getIncomingValue(i);
				incoming.id = get_id_for_value(value);
				phi.incoming.push_back(incoming);
			}
		}

		if (phi.incoming.empty())
		{
			LOGE("PHI instruction has zero incoming blocks.\n");
			return false;
		}

		if (phi.incoming.size() > 1)
			block->ir.phi.push_back(std::move(phi));
		else
			rewrite_value(&instruction, phi.incoming.front().id);
	}

	return true;
}

static bool instruction_has_side_effects(const llvm::Instruction &instruction)
{
	if (llvm::isa<llvm::StoreInst>(&instruction) ||
	    llvm::isa<llvm::AtomicCmpXchgInst>(&instruction) ||
	    llvm::isa<llvm::AtomicRMWInst>(&instruction))
	{
		return true;
	}

	if (auto *call_inst = llvm::dyn_cast<llvm::CallInst>(&instruction))
	{
		auto *called_function = call_inst->getCalledFunction();
		if (strncmp(called_function->getName().data(), "dx.op", 5) == 0)
			return dxil_instruction_has_side_effects(call_inst);
		else
			return true;
	}

	return false;
}

bool Converter::Impl::emit_instruction(CFGNode *block, const llvm::Instruction &instruction)
{
	if (instruction.isTerminator())
		return true;

	// We really shouldn't have to do this, but DXC misses some dead SSA ops.
	// Helps sanitize repro suite output in some cases.
	if (options.eliminate_dead_code &&
	    !instruction_has_side_effects(instruction) &&
	    llvm_used_ssa_values.count(&instruction) == 0)
	{
		return true;
	}

	current_block = &block->ir.operations;

	if (auto *call_inst = llvm::dyn_cast<llvm::CallInst>(&instruction))
	{
		auto *called_function = call_inst->getCalledFunction();
		if (strncmp(called_function->getName().data(), "dx.op", 5) == 0)
		{
			return emit_dxil_instruction(*this, call_inst);
		}
		else if (strncmp(called_function->getName().data(), "llvm.", 5) == 0)
		{
			// lib_6_6 sometimes emits llvm.lifetime.begin/end for some bizarre reason.
			// Just ignore ...
			return true;
		}
		else
		{
			return emit_call_instruction(*this, *call_inst);
		}
	}
	else if (auto *phi_inst = llvm::dyn_cast<llvm::PHINode>(&instruction))
		return emit_phi_instruction(block, *phi_inst);
	else
		return emit_llvm_instruction(*this, instruction);

	current_block = nullptr;
	return false;
}

bool Converter::Impl::emit_execution_modes_node_output(llvm::MDNode *output)
{
	NodeOutputMeta output_meta = {};

	bool is_rw_sharing;
	output_meta.payload_stride = node_parse_payload_stride(output, is_rw_sharing);
	output_meta.spec_constant_node_index = builder().makeUintConstant(0, true);
	builder().addDecoration(output_meta.spec_constant_node_index, spv::DecorationSpecId,
	                        int(NodeSpecIdOutputBase + node_outputs.size()));

	uint32_t num_ops = output->getNumOperands();
	for (uint32_t i = 0; i < num_ops; i += 2)
	{
		auto tag = DXIL::NodeMetadataTag(get_constant_metadata(output, i));
		if (tag == DXIL::NodeMetadataTag::NodeOutputID)
		{
			auto *output_node = llvm::cast<llvm::MDNode>(output->getOperand(i + 1));
			String name = get_string_metadata(output_node, 0);
			builder().addName(output_meta.spec_constant_node_index, name.c_str());

			// FIXME: This is probably not accurate for arrayed nodes.
			// Can recursive nodes be arrayed? Seems very spicy ...
			output_meta.is_recursive =
				name == node_input.node_id &&
				node_input.node_array_index == get_constant_metadata(output_node, 1);
		}
	}

	node_outputs.push_back(output_meta);
	return true;
}

NodeDispatchGrid Converter::Impl::node_parse_dispatch_grid(llvm::MDNode *node_meta)
{
	uint32_t num_ops = node_meta->getNumOperands();
	for (uint32_t i = 0; i < num_ops; i += 2)
	{
		auto tag = DXIL::NodeMetadataTag(get_constant_metadata(node_meta, i));
		if (tag == DXIL::NodeMetadataTag::NodeRecordType)
		{
			auto *node_record_type = llvm::cast<llvm::MDNode>(node_meta->getOperand(i + 1));
			for (uint32_t j = 0; j < node_record_type->getNumOperands(); j += 2)
			{
				if (get_constant_metadata(node_record_type, j) == 1)
				{
					auto *dispatch_info = llvm::cast<llvm::MDNode>(node_record_type->getOperand(j + 1));
					uint32_t byte_offset = get_constant_metadata(dispatch_info, 0);
					auto component_type = DXIL::ComponentType(get_constant_metadata(dispatch_info, 1));
					uint32_t num_components = get_constant_metadata(dispatch_info, 2);
					return { byte_offset, component_type, num_components };
				}
			}
		}
	}

	return {};
}

uint32_t Converter::Impl::node_parse_payload_stride(llvm::MDNode *node_meta, bool &is_rw_sharing)
{
	uint32_t num_ops = node_meta->getNumOperands();
	uint32_t payload_stride = 0;
	is_rw_sharing = false;

	for (uint32_t i = 0; i < num_ops; i += 2)
	{
		auto tag = DXIL::NodeMetadataTag(get_constant_metadata(node_meta, i));
		if (tag == DXIL::NodeMetadataTag::NodeIOFlags)
		{
			uint32_t node_io_flags = get_constant_metadata(node_meta, i + 1);
			if ((node_io_flags & DXIL::NodeIOEmptyRecordBit) != 0)
				return 0;
			if ((node_io_flags & DXIL::NodeIOTrackRWInputSharingBit) != 0)
				is_rw_sharing = true;
		}
		else if (tag == DXIL::NodeMetadataTag::NodeRecordType)
		{
			auto *node_record_type = llvm::cast<llvm::MDNode>(node_meta->getOperand(i + 1));
			for (uint32_t j = 0; j < node_record_type->getNumOperands(); j += 2)
			{
				if (get_constant_metadata(node_record_type, j) == 0)
				{
					uint32_t input_node_size = get_constant_metadata(node_record_type, j + 1);
					payload_stride = input_node_size;
				}
			}
		}
	}

	if (is_rw_sharing)
	{
		// DXIL metadata does not account for the implied u32 used for group sharing.
		// In case the last member is u16, align to u32.
		payload_stride = (payload_stride + 3u) & ~3u;
		// Allocate space for magic word.
		payload_stride += 4;
	}

	return payload_stride;
}

bool Converter::Impl::emit_execution_modes_node_input()
{
	spv::Id u32_type_id = builder().makeUintType(32);
	spv::Id uvec2_type_id = builder().makeVectorType(u32_type_id, 2);
	spv::Id u64_type_id = builder().makeUintType(64);

	if (node_input.payload_stride)
	{
		node_input.private_bda_var_id = create_variable(
			spv::StorageClassPrivate, u64_type_id, "NodeInputPayloadBDA");
		node_input.private_stride_var_id = create_variable(
			spv::StorageClassPrivate, u32_type_id, "NodeInputStride");
	}

	// We have to rewrite global IDs. Local invocation should remain intact.
	spv::Id uvec3_type = builder().makeVectorType(u32_type_id, 3);
	spv::Id workgroup_id = create_variable(spv::StorageClassPrivate, uvec3_type, "WorkgroupID");
	spv::Id global_invocation_id = create_variable(spv::StorageClassPrivate, uvec3_type, "GlobalInvocationID");
	spirv_module.register_builtin_shader_input(workgroup_id, spv::BuiltInWorkgroupId);
	spirv_module.register_builtin_shader_input(global_invocation_id, spv::BuiltInGlobalInvocationId);

	// Emit binding model.
	// Push constants are our only option.
	if (!options.inline_ubo_enable)
	{
		LOGE("When compiling for nodes, inline UBO path must be enabled for root parameters.\n");
		return false;
	}

	node_input.shader_record_block_type_id = emit_shader_record_buffer_block_type(true);
	spv::Id ptr_shader_record_block_type_id = 0;
	if (node_input.shader_record_block_type_id)
	{
		ptr_shader_record_block_type_id =
			builder().makePointer(spv::StorageClassPhysicalStorageBuffer,
			                      node_input.shader_record_block_type_id);
	}
	else
	{
		// Dummy type
		ptr_shader_record_block_type_id = builder().makeVectorType(builder().makeUintType(32), 2);
	}

	// Declare the ABI for dispatching a node. This will change depending on the dispatch mode,
	// and style of execution (indirect pull or array).

	spv::Id u32_array_type_id = builder().makeRuntimeArray(u32_type_id);
	builder().addDecoration(u32_array_type_id, spv::DecorationArrayStride, 4);

	spv::Id u32_struct_type_id = builder().makeStructType({ u32_type_id }, "NodeReadonlyU32Ptr");
	builder().addDecoration(u32_struct_type_id, spv::DecorationBlock);
	builder().addMemberDecoration(u32_struct_type_id, 0, spv::DecorationOffset, 0);
	builder().addMemberDecoration(u32_struct_type_id, 0, spv::DecorationNonWritable);
	builder().addMemberName(u32_struct_type_id, 0, "value");
	spv::Id u32_ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, u32_struct_type_id);

	spv::Id u32_array_struct_type_id = builder().makeStructType({ u32_array_type_id }, "NodeReadonlyU32ArrayPtr");
	builder().addDecoration(u32_array_struct_type_id, spv::DecorationBlock);
	builder().addMemberDecoration(u32_array_struct_type_id, 0, spv::DecorationOffset, 0);
	builder().addMemberDecoration(u32_array_struct_type_id, 0, spv::DecorationNonWritable);
	builder().addMemberName(u32_array_struct_type_id, 0, "offsets");
	spv::Id u32_array_ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, u32_array_struct_type_id);

	const Vector<spv::Id> members = {
		u64_type_id,
		u32_ptr_type_id,
		u32_ptr_type_id,
		uvec2_type_id,
		u64_type_id,
		u64_type_id,
		ptr_shader_record_block_type_id,
		u32_type_id,
		u32_type_id,
	};

	spv::Id type_id = builder().makeStructType(members, "NodeDispatchRegisters");
	builder().addMemberDecoration(type_id, NodePayloadBDA, spv::DecorationOffset, 0);
	builder().addMemberDecoration(type_id, NodeLinearOffsetBDA, spv::DecorationOffset, 8);
	builder().addMemberDecoration(type_id, NodeEndNodesBDA, spv::DecorationOffset, 16);
	builder().addMemberDecoration(type_id, NodePayloadStrideOrOffsetsBDA, spv::DecorationOffset, 24);
	builder().addMemberDecoration(type_id, NodePayloadOutputBDA, spv::DecorationOffset, 32);
	builder().addMemberDecoration(type_id, NodePayloadOutputAtomicBDA, spv::DecorationOffset, 40);
	builder().addMemberDecoration(type_id, NodeLocalRootSignatureBDA, spv::DecorationOffset, 48);
	builder().addMemberDecoration(type_id, NodePayloadOutputOffset, spv::DecorationOffset, 56);
	builder().addMemberDecoration(type_id, NodeRemainingRecursionLevels, spv::DecorationOffset, 60);

	// For linear node layout (entry point).
	// Node payload is found at PayloadLinearBDA + NodeIndex * PayloadStride.
	builder().addMemberName(type_id, NodePayloadBDA, "PayloadLinearBDA");
	// With packed workgroup layout, need to apply an offset.
	builder().addMemberName(type_id, NodeLinearOffsetBDA, "NodeLinearOffsetBDA");
	// For thread and coalesce, need to know total number of threads to mask execution on edge.
	builder().addMemberName(type_id, NodeEndNodesBDA, "NodeEndNodesBDA");
	builder().addMemberName(type_id, NodePayloadStrideOrOffsetsBDA, "NodePayloadStrideOrOffsetsBDA");
	builder().addMemberName(type_id, NodePayloadOutputBDA, "NodePayloadOutputBDA");
	builder().addMemberName(type_id, NodePayloadOutputAtomicBDA, "NodePayloadOutputAtomicBDA");
	builder().addMemberName(type_id, NodeLocalRootSignatureBDA, "NodeLocalRootSignatureBDA");
	// For broadcast nodes. Need to instance multiple times.
	// Becomes WorkGroupID and affects GlobalInvocationID.
	builder().addMemberName(type_id, NodePayloadOutputOffset, "NodePayloadOutputOffset");
	builder().addMemberName(type_id, NodeRemainingRecursionLevels, "NodeRemainingRecursionLevels");
	builder().addDecoration(type_id, spv::DecorationBlock);
	node_input.node_dispatch_push_id =
	    create_variable(spv::StorageClassPushConstant, type_id, "NodeDispatch");
	builder().addDecoration(node_input.node_dispatch_push_id, spv::DecorationRestrictPointer);

	node_input.private_coalesce_offset_id =
		create_variable(spv::StorageClassPrivate, u32_type_id, "NodeCoalesceOffset");
	node_input.private_coalesce_count_id =
		create_variable(spv::StorageClassPrivate, u32_type_id, "NodeCoalesceCount");

	node_input.u32_ptr_type_id = u32_ptr_type_id;
	node_input.u32_array_ptr_type_id = u32_array_ptr_type_id;

	spv::Id u64_struct_type_id = builder().makeStructType({ u64_type_id }, "NodeReadonlyU64Ptr");
	builder().addDecoration(u64_struct_type_id, spv::DecorationBlock);
	builder().addMemberDecoration(u64_struct_type_id, 0, spv::DecorationOffset, 0);
	builder().addMemberDecoration(u64_struct_type_id, 0, spv::DecorationNonWritable);
	builder().addMemberName(u64_struct_type_id, 0, "value");
	node_input.u64_ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, u64_struct_type_id);

	return true;
}

NodeOutputData Converter::Impl::get_node_output(llvm::MDNode *output)
{
	NodeOutputData data = {};

	uint32_t num_ops = output->getNumOperands();
	for (uint32_t i = 0; i < num_ops; i += 2)
	{
		auto tag = DXIL::NodeMetadataTag(get_constant_metadata(output, i));
		if (tag == DXIL::NodeMetadataTag::NodeOutputID)
		{
			auto *output_node = llvm::cast<llvm::MDNode>(output->getOperand(i + 1));
			data.node_id = get_string_metadata(output_node, 0);
			data.node_array_index = get_constant_metadata(output_node, 1);
		}
		else if (tag == DXIL::NodeMetadataTag::NodeAllowSparseNodes)
			data.sparse_array = get_constant_metadata(output, i + 1) != 0;
		else if (tag == DXIL::NodeMetadataTag::NodeOutputArraySize)
			data.node_array_size = get_constant_metadata(output, i + 1);
		else if (tag == DXIL::NodeMetadataTag::NodeMaxRecords)
			data.max_records = get_constant_metadata(output, i + 1);
	}

	return data;
}

NodeInputData Converter::Impl::get_node_input(llvm::MDNode *meta)
{
	NodeInputData node = {};

	auto *launch_type_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeLaunchType);
	if (!launch_type_node)
		return {};

	node.launch_type = DXIL::NodeLaunchType(
	    llvm::cast<llvm::ConstantAsMetadata>(*launch_type_node)->getValue()->getUniqueInteger().getZExtValue());

	if (node.launch_type == DXIL::NodeLaunchType::Invalid)
		return {};

	auto *is_program_entry_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeIsProgramEntry);
	if (is_program_entry_node)
	{
		node.is_program_entry =
		    llvm::cast<llvm::ConstantAsMetadata>(*is_program_entry_node)->getValue()->getUniqueInteger().getZExtValue() != 0;
	}

	node.is_indirect_bda_stride_program_entry_spec_id = NodeSpecIdIndirectPayloadStride;
	node.is_entry_point_spec_id = NodeSpecIdIsEntryPoint;

	if (node.launch_type == DXIL::NodeLaunchType::Broadcasting)
	{
		node.dispatch_grid_is_upper_bound_spec_id = NodeSpecIdDispatchGridIsUpperBound;
		node.is_static_broadcast_node_spec_id = NodeSpecIdIsStaticBroadcastNode;
		node.max_broadcast_grid_spec_id[0] = NodeSpecIdMaxBroadcastGridX;
		node.max_broadcast_grid_spec_id[1] = NodeSpecIdMaxBroadcastGridY;
		node.max_broadcast_grid_spec_id[2] = NodeSpecIdMaxBroadcastGridZ;
	}
	else
	{
		node.dispatch_grid_is_upper_bound_spec_id = UINT32_MAX;
		node.is_static_broadcast_node_spec_id = UINT32_MAX;
		for (auto &spec_id : node.max_broadcast_grid_spec_id)
			spec_id = UINT32_MAX;
	}

	auto *recursion_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeMaxRecursionDepth);
	if (recursion_node)
	{
		node.recursion_factor =
			llvm::cast<llvm::ConstantAsMetadata>(*recursion_node)->getValue()->getUniqueInteger().getZExtValue();
	}

	if (node.launch_type == DXIL::NodeLaunchType::Broadcasting)
	{
		auto *max_grid = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeMaxDispatchGrid);
		const llvm::MDOperand *fixed_grid;

		if (max_grid)
		{
			node.dispatch_grid_is_upper_bound = true;
			fixed_grid = max_grid;
		}
		else
			fixed_grid = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeDispatchGrid);

		if (!fixed_grid)
			return {};

		for (uint32_t i = 0; i < 3; i++)
			node.broadcast_grid[i] = get_constant_metadata(llvm::cast<llvm::MDNode>(*fixed_grid), i);
	}

	node.thread_group_size_spec_id[0] = NodeSpecIdGroupSizeX;
	node.thread_group_size_spec_id[1] = NodeSpecIdGroupSizeY;
	node.thread_group_size_spec_id[2] = NodeSpecIdGroupSizeZ;

	auto *name_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeID);

	if (name_node)
	{
		auto *name_id = llvm::cast<llvm::MDNode>(*name_node);
		node.node_id = get_string_metadata(name_id, 0);
		node.node_array_index = get_constant_metadata(name_id, 1);
	}

	auto *inputs_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeInputs);

	llvm::MDNode *input = nullptr;
	if (inputs_node)
	{
		auto *inputs = llvm::cast<llvm::MDNode>(*inputs_node);
		// Current spec only allows one input node.
		if (inputs->getNumOperands() != 1)
			return {};
		input = llvm::cast<llvm::MDNode>(inputs->getOperand(0));
	}

	if (input)
	{
		uint32_t num_ops = input->getNumOperands();

		node.grid_buffer = node_parse_dispatch_grid(input);
		node.payload_stride = node_parse_payload_stride(input, node.node_track_rw_input_sharing);

		for (uint32_t i = 0; i < num_ops; i += 2)
		{
			auto tag = DXIL::NodeMetadataTag(get_constant_metadata(input, i));
			if (tag == DXIL::NodeMetadataTag::NodeMaxRecords)
				node.coalesce_factor = get_constant_metadata(input, i + 1);
		}

        // We seem to need a sensible default.
        if (node.coalesce_factor == 0 && node.launch_type == DXIL::NodeLaunchType::Coalescing)
            node.coalesce_factor = 1;
	}

	auto *share_input_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeShareInputOf);
	if (share_input_node)
	{
		auto *share_input = llvm::cast<llvm::MDNode>(*share_input_node);
		node.node_share_input_id = get_string_metadata(share_input, 0);
		node.node_share_input_array_index = get_constant_metadata(share_input, 1);
	}

	auto *local_argument_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeLocalRootArgumentsTableIndex);
	if (local_argument_node)
	{
		node.local_root_arguments_table_index =
		    llvm::cast<llvm::ConstantAsMetadata>(*local_argument_node)->getValue()->getUniqueInteger().getZExtValue();
	}
	else
		node.local_root_arguments_table_index = UINT32_MAX;

	return node;
}

NodeInputData Converter::get_node_input(const LLVMBCParser &parser, const char *entry)
{
	auto *entry_point_meta = get_entry_point_meta(parser.get_module(), entry);
	if (!entry_point_meta)
		return {};
	return Impl::get_node_input(entry_point_meta);
}

Vector<NodeOutputData> Converter::get_node_outputs(const LLVMBCParser &parser, const char *entry)
{
	Vector<NodeOutputData> output_data;
	auto *entry_point_meta = get_entry_point_meta(parser.get_module(), entry);
	if (!entry_point_meta)
		return {};

	auto *outputs_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::NodeOutputs);
	if (outputs_node)
	{
		auto *outputs = llvm::cast<llvm::MDNode>(*outputs_node);
		for (unsigned i = 0; i < outputs->getNumOperands(); i++)
		{
			auto *output = llvm::cast<llvm::MDNode>(outputs->getOperand(i));
			output_data.push_back(Impl::get_node_output(output));
		}
	}

	// Spec constant IDs are allowed incrementally.
	// Spec constant ID 0 is reserved for workgroup size spec constant.
	uint32_t spec_constant_id = NodeSpecIdOutputBase;
	for (auto &output : output_data)
	{
		output.node_index_spec_constant_id = spec_constant_id;
		spec_constant_id++;
	}

	return output_data;
}

String Converter::get_analysis_warnings() const
{
	String str;

	if (impl->shader_analysis.needs_auto_group_shared_barriers)
	{
		// This is a case that might just happen to work if the game assumes lock-step execution.
		// If the group size is larger, it's extremely unlikely the game works by chance on native drivers.
		// Some shaders seem to use groupshared as a sort of "scratch space" per thread, which
		// is a valid use case and does not require barriers to be correct.
		str += "- Has group shared access, but no group shared barrier anywhere.\n";
	}

	return str;
}

bool Converter::Impl::emit_execution_modes_node()
{
	// It will be necessary to override all this metadata through some API.
	// Not really needed to support this until we've implemented everything.
	NodeInputData node = get_node_input(entry_point_meta);
	if (node.launch_type == DXIL::NodeLaunchType::Invalid)
		return false;

	node_input.node_id = node.node_id;
	node_input.node_array_index = node.node_array_index;
	node_input.launch_type = node.launch_type;
	node_input.dispatch_grid = node.grid_buffer;
	node_input.payload_stride = node.payload_stride;
	node_input.coalesce_stride = node.coalesce_factor;

	if (!emit_execution_modes_node_input())
		return false;

	auto *outputs_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::NodeOutputs);
	if (outputs_node)
	{
		auto *outputs = llvm::cast<llvm::MDNode>(*outputs_node);

		for (unsigned i = 0; i < outputs->getNumOperands(); i++)
		{
			auto *output = llvm::cast<llvm::MDNode>(outputs->getOperand(i));
			if (!emit_execution_modes_node_output(output))
				return false;
		}
	}

	node_input.is_indirect_payload_stride_id = builder().makeBoolConstant(false, true);
	builder().addDecoration(node_input.is_indirect_payload_stride_id, spv::DecorationSpecId,
	                        int(node.is_indirect_bda_stride_program_entry_spec_id));
	builder().addName(node_input.is_indirect_payload_stride_id, "NodeEntryIndirectPayloadStride");

	node_input.is_entry_point_id = builder().makeBoolConstant(node.is_program_entry, true);
	builder().addDecoration(node_input.is_entry_point_id, spv::DecorationSpecId,
	                        int(node.is_entry_point_spec_id));
	builder().addName(node_input.is_entry_point_id, "NodeIsProgramEntry");

	if (node_input.launch_type == DXIL::NodeLaunchType::Broadcasting)
	{
		node_input.broadcast_has_max_grid_id = builder().makeBoolConstant(node.dispatch_grid_is_upper_bound, true);
		builder().addDecoration(node_input.broadcast_has_max_grid_id, spv::DecorationSpecId,
		                        int(node.dispatch_grid_is_upper_bound_spec_id));
		builder().addName(node_input.broadcast_has_max_grid_id, "DispatchGridIsUpperBound");

		node_input.is_static_broadcast_node_id = builder().makeBoolConstant(false, true);
		builder().addDecoration(node_input.is_static_broadcast_node_id, spv::DecorationSpecId,
		                        int(node.is_static_broadcast_node_spec_id));
		builder().addName(node_input.is_static_broadcast_node_id, "DispatchStaticPayload");

		spv::Id u32_type = builder().makeUintType(32);

		for (uint32_t i = 0; i < 3; i++)
		{
			node_input.max_broadcast_grid_id[i] = builder().makeUintConstant(node.broadcast_grid[i], true);
			builder().addDecoration(node_input.max_broadcast_grid_id[i], spv::DecorationSpecId,
			                        int(node.max_broadcast_grid_spec_id[i]));
			static const char *names[] = { "MaxBroadcastGridX", "MaxBroadcastGridY", "MaxBroadcastGridZ" };
			builder().addName(node_input.max_broadcast_grid_id[i], names[i]);

			node_input.max_broadcast_grid_minus_1_id[i] = builder().createSpecConstantOp(
			    spv::OpISub, u32_type, { node_input.max_broadcast_grid_id[i], builder().makeUintConstant(1) }, {});
			static const char *sub_names[] = { "GridXMinus1", "GridYMinus1", "GridZMinus1" };
			builder().addName(node_input.max_broadcast_grid_minus_1_id[i], sub_names[i]);
		}
	}

	return emit_execution_modes_compute();
}

bool Converter::Impl::emit_execution_modes_compute()
{
	auto *num_threads_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::NumThreads);
	if (num_threads_node)
	{
		auto *num_threads = llvm::cast<llvm::MDNode>(*num_threads_node);
		return emit_execution_modes_thread_wave_properties(num_threads);
	}
	else
		return false;
}

static bool entry_point_modifies_sample_mask(const llvm::MDNode *node)
{
	if (!node->getOperand(2))
		return false;
	auto &signature = node->getOperand(2);
	auto *signature_node = llvm::cast<llvm::MDNode>(signature);
	auto &outputs = signature_node->getOperand(1);
	if (!outputs)
		return false;

	auto *outputs_node = llvm::dyn_cast<llvm::MDNode>(outputs);
	for (unsigned i = 0; i < outputs_node->getNumOperands(); i++)
	{
		auto *output = llvm::cast<llvm::MDNode>(outputs_node->getOperand(i));
		auto system_value = static_cast<DXIL::Semantic>(get_constant_metadata(output, 3));
		if (system_value == DXIL::Semantic::Depth ||
		    system_value == DXIL::Semantic::DepthLessEqual ||
		    system_value == DXIL::Semantic::DepthGreaterEqual ||
		    system_value == DXIL::Semantic::StencilRef ||
		    system_value == DXIL::Semantic::Coverage)
		{
			return true;
		}
	}

	return false;
}

static uint64_t get_shader_flags(const llvm::MDNode *entry_point_meta)
{
	auto *flags_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::ShaderFlags);
	if (flags_node)
		return llvm::cast<llvm::ConstantAsMetadata>(*flags_node)->getValue()->getUniqueInteger().getZExtValue();
	else
		return 0;
}

bool Converter::Impl::emit_execution_modes_pixel_late()
{
	auto &builder = spirv_module.get_builder();

	if (execution_mode_meta.declares_rov)
	{
		builder.addExtension("SPV_EXT_fragment_shader_interlock");
		if (execution_mode_meta.per_sample_shading)
		{
			builder.addCapability(spv::CapabilityFragmentShaderSampleInterlockEXT);
			builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSampleInterlockOrderedEXT);
		}
		else
		{
			builder.addCapability(spv::CapabilityFragmentShaderPixelInterlockEXT);
			builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModePixelInterlockOrderedEXT);
		}
	}

	return true;
}

bool Converter::Impl::emit_execution_modes_pixel()
{
	auto &builder = spirv_module.get_builder();
	auto flags = get_shader_flags(entry_point_meta);
	bool early_depth_stencil = (flags & DXIL::ShaderFlagEarlyDepthStencil) != 0;

	if (options.descriptor_qa_enabled || options.instruction_instrumentation.enabled)
	{
		// If we have descriptor QA enabled, we will have side effects when running fragment shaders.
		// This forces late-Z which can trigger some horrible performance issues.
		// Make sure to enable early depth-stencil if nothing in the shader is early/late sensitive.
		if (!entry_point_modifies_sample_mask(entry_point_meta) &&
		    !shader_analysis.has_side_effects && !shader_analysis.discards)
		{
			early_depth_stencil = true;
		}
	}

	if (early_depth_stencil)
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeEarlyFragmentTests);

	// Avoid masking helper lanes when strict_helper_lane_waveops is used.
	// Execution modes to enable correct Vulkan behaviour are set up later.
	auto *func = get_entry_point_function(entry_point_meta);
	execution_mode_meta.waveops_include_helper_lanes =
	    func->hasFnAttribute("waveops-include-helper-lanes");

	// If helper lanes don't exist, don't bother trying to mask them out,
	// it will just confuse the compiler.
	spirv_module.set_helper_lanes_participate_in_wave_ops(
		!options.strict_helper_lane_waveops ||
	    execution_model != spv::ExecutionModelFragment ||
		execution_mode_meta.waveops_include_helper_lanes);

	return true;
}

bool Converter::Impl::emit_execution_modes_domain()
{
	auto &builder = spirv_module.get_builder();
	builder.addCapability(spv::CapabilityTessellation);

	auto *ds_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::DSState);
	if (ds_state_node)
	{
		auto *arguments = llvm::cast<llvm::MDNode>(*ds_state_node);
		auto domain = static_cast<DXIL::TessellatorDomain>(get_constant_metadata(arguments, 0));
		auto *func = spirv_module.get_entry_function();

		switch (domain)
		{
		case DXIL::TessellatorDomain::IsoLine:
			builder.addExecutionMode(func, spv::ExecutionModeIsolines);
			break;

		case DXIL::TessellatorDomain::Tri:
			builder.addExecutionMode(func, spv::ExecutionModeTriangles);
			break;

		case DXIL::TessellatorDomain::Quad:
			builder.addExecutionMode(func, spv::ExecutionModeQuads);
			break;

		default:
			LOGE("Unknown tessellator domain!\n");
			return false;
		}

		unsigned input_control_points = get_constant_metadata(arguments, 1);
		execution_mode_meta.stage_input_num_vertex = input_control_points;
		return true;
	}
	else
		return false;
}

bool Converter::Impl::emit_execution_modes_hull()
{
	auto &builder = spirv_module.get_builder();
	builder.addCapability(spv::CapabilityTessellation);
	auto *hs_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::HSState);

	if (hs_state_node)
	{
		auto *arguments = llvm::cast<llvm::MDNode>(*hs_state_node);

		auto *patch_constant = llvm::cast<llvm::ConstantAsMetadata>(arguments->getOperand(0));
		auto *patch_constant_value = patch_constant->getValue();
		execution_mode_meta.patch_constant_function = llvm::cast<llvm::Function>(patch_constant_value);

		unsigned input_control_points = get_constant_metadata(arguments, 1);
		unsigned output_control_points = get_constant_metadata(arguments, 2);
		auto domain = static_cast<DXIL::TessellatorDomain>(get_constant_metadata(arguments, 3));
		auto partitioning = static_cast<DXIL::TessellatorPartitioning>(get_constant_metadata(arguments, 4));
		auto primitive = static_cast<DXIL::TessellatorOutputPrimitive>(get_constant_metadata(arguments, 5));

		auto *func = spirv_module.get_entry_function();

		switch (domain)
		{
		case DXIL::TessellatorDomain::IsoLine:
			builder.addExecutionMode(func, spv::ExecutionModeIsolines);
			break;

		case DXIL::TessellatorDomain::Tri:
			builder.addExecutionMode(func, spv::ExecutionModeTriangles);
			break;

		case DXIL::TessellatorDomain::Quad:
			builder.addExecutionMode(func, spv::ExecutionModeQuads);
			break;

		default:
			LOGE("Unknown tessellator domain!\n");
			return false;
		}

		switch (partitioning)
		{
		case DXIL::TessellatorPartitioning::Integer:
			builder.addExecutionMode(func, spv::ExecutionModeSpacingEqual);
			break;

		case DXIL::TessellatorPartitioning::Pow2:
			LOGE("Emulating Pow2 spacing as Integer.\n");
			builder.addExecutionMode(func, spv::ExecutionModeSpacingEqual);
			break;

		case DXIL::TessellatorPartitioning::FractionalEven:
			builder.addExecutionMode(func, spv::ExecutionModeSpacingFractionalEven);
			break;

		case DXIL::TessellatorPartitioning::FractionalOdd:
			builder.addExecutionMode(func, spv::ExecutionModeSpacingFractionalOdd);
			break;

		default:
			LOGE("Unknown tessellator partitioning.\n");
			return false;
		}

		switch (primitive)
		{
		case DXIL::TessellatorOutputPrimitive::TriangleCCW:
			builder.addExecutionMode(func, spv::ExecutionModeVertexOrderCcw);
			break;

		case DXIL::TessellatorOutputPrimitive::TriangleCW:
			builder.addExecutionMode(func, spv::ExecutionModeVertexOrderCw);
			break;

		case DXIL::TessellatorOutputPrimitive::Point:
			builder.addExecutionMode(func, spv::ExecutionModePointMode);
			// TODO: Do we have to specify CCW/CW in point mode?
			break;

		case DXIL::TessellatorOutputPrimitive::Line:
			break;

		default:
			LOGE("Unknown tessellator primitive.\n");
			return false;
		}

		builder.addExecutionMode(func, spv::ExecutionModeOutputVertices, output_control_points);

		execution_mode_meta.stage_input_num_vertex = input_control_points;
		execution_mode_meta.stage_output_num_vertex = output_control_points;
		return true;
	}
	else
		return false;
}

bool Converter::Impl::emit_execution_modes_geometry()
{
	auto &builder = spirv_module.get_builder();
	builder.addCapability(spv::CapabilityGeometry);
	auto *gs_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::GSState);

	if (gs_state_node)
	{
		auto *arguments = llvm::cast<llvm::MDNode>(*gs_state_node);

		auto input_primitive = static_cast<DXIL::InputPrimitive>(get_constant_metadata(arguments, 0));
		unsigned max_vertex_count = get_constant_metadata(arguments, 1);

		auto *func = spirv_module.get_entry_function();

		auto topology = static_cast<DXIL::PrimitiveTopology>(get_constant_metadata(arguments, 3));
		unsigned gs_instances = get_constant_metadata(arguments, 4);

		execution_mode_meta.gs_stream_active_mask = get_constant_metadata(arguments, 2);

		builder.addExecutionMode(func, spv::ExecutionModeInvocations, gs_instances);
		builder.addExecutionMode(func, spv::ExecutionModeOutputVertices, max_vertex_count);

		switch (input_primitive)
		{
		case DXIL::InputPrimitive::Point:
			builder.addExecutionMode(func, spv::ExecutionModeInputPoints);
			execution_mode_meta.stage_input_num_vertex = 1;
			break;

		case DXIL::InputPrimitive::Line:
			builder.addExecutionMode(func, spv::ExecutionModeInputLines);
			execution_mode_meta.stage_input_num_vertex = 2;
			break;

		case DXIL::InputPrimitive::LineWithAdjacency:
			builder.addExecutionMode(func, spv::ExecutionModeInputLinesAdjacency);
			execution_mode_meta.stage_input_num_vertex = 4;
			break;

		case DXIL::InputPrimitive::Triangle:
			builder.addExecutionMode(func, spv::ExecutionModeTriangles);
			execution_mode_meta.stage_input_num_vertex = 3;
			break;

		case DXIL::InputPrimitive::TriangleWithAdjaceny:
			builder.addExecutionMode(func, spv::ExecutionModeInputTrianglesAdjacency);
			execution_mode_meta.stage_input_num_vertex = 6;
			break;

		default:
			LOGE("Unexpected input primitive (%u).\n", unsigned(input_primitive));
			return false;
		}

		switch (topology)
		{
		case DXIL::PrimitiveTopology::PointList:
			builder.addExecutionMode(func, spv::ExecutionModeOutputPoints);
			break;

		case DXIL::PrimitiveTopology::LineStrip:
			builder.addExecutionMode(func, spv::ExecutionModeOutputLineStrip);
			break;

		case DXIL::PrimitiveTopology::TriangleStrip:
			builder.addExecutionMode(func, spv::ExecutionModeOutputTriangleStrip);
			break;

		default:
			LOGE("Unexpected output primitive topology (%u).\n", unsigned(topology));
			return false;
		}
		return true;
	}
	else
		return false;
}

bool Converter::Impl::emit_execution_modes_ray_tracing(spv::ExecutionModel model)
{
	auto &builder = spirv_module.get_builder();
	builder.addCapability(spv::CapabilityRayTracingKHR);
	if (options.ray_tracing_primitive_culling_enabled && shader_analysis.can_require_primitive_culling)
		builder.addCapability(spv::CapabilityRayTraversalPrimitiveCullingKHR);
	if (options.opacity_micromap_enabled && shader_analysis.can_require_opacity_micromap)
	{
		builder.addCapability(spv::CapabilityRayTracingOpacityMicromapEXT);
		builder.addExtension("SPV_EXT_opacity_micromap");
	}
	builder.addExtension("SPV_KHR_ray_tracing");
	builder.addExtension("SPV_EXT_descriptor_indexing");

	// For DXR, we'll need full bindless.
	builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);

	builder.addCapability(spv::CapabilitySampledImageArrayDynamicIndexing);
	builder.addCapability(spv::CapabilitySampledImageArrayNonUniformIndexing);
	builder.addCapability(spv::CapabilityStorageImageArrayDynamicIndexing);
	builder.addCapability(spv::CapabilityStorageImageArrayNonUniformIndexing);
	builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing);
	builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexing);
	builder.addCapability(spv::CapabilityUniformBufferArrayDynamicIndexing);
	builder.addCapability(spv::CapabilityUniformBufferArrayNonUniformIndexing);

	return true;
}

bool Converter::Impl::emit_execution_modes_thread_wave_properties(const llvm::MDNode *num_threads)
{
	auto &builder = spirv_module.get_builder();

	if (options.force_wave_size_enable && options.force_subgroup_size)
	{
		execution_mode_meta.wave_size_min = options.force_subgroup_size;
		execution_mode_meta.wave_size_max = 0;
		execution_mode_meta.wave_size_preferred = 0;
	}
	else
	{
		auto *wave_size_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::WaveSize);
		auto *wave_size_range_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::RangedWaveSize);

		if (wave_size_range_node)
		{
			auto *wave_size = llvm::cast<llvm::MDNode>(*wave_size_range_node);
			execution_mode_meta.wave_size_min = get_constant_metadata(wave_size, 0);
			execution_mode_meta.wave_size_max = get_constant_metadata(wave_size, 1);
			execution_mode_meta.wave_size_preferred = get_constant_metadata(wave_size, 2);
		}
		else if (wave_size_node)
		{
			auto *wave_size = llvm::cast<llvm::MDNode>(*wave_size_node);
			execution_mode_meta.wave_size_min = get_constant_metadata(wave_size, 0);
			execution_mode_meta.wave_size_max = 0;
			execution_mode_meta.wave_size_preferred = 0;
		}
	}

	unsigned threads[3];
	for (unsigned dim = 0; dim < 3; dim++)
		threads[dim] = get_constant_metadata(num_threads, dim);
	unsigned total_workgroup_threads = threads[0] * threads[1] * threads[2];

	if (execution_model == spv::ExecutionModelGLCompute)
	{
		if ((total_workgroup_threads <= 32 && shader_analysis.require_subgroups) ||
		    (shader_analysis.subgroup_ballot_reads_first && !shader_analysis.subgroup_ballot_reads_upper))
		{
			// Common game bug. Only reading the first scalar of a ballot probably means
			// the shader relies on WaveSize <= 32.
			suggest_maximum_wave_size(32);
		}
	}

	if (shader_analysis.require_compute_shader_derivatives)
	{
		if (execution_model != spv::ExecutionModelGLCompute &&
		    execution_model != spv::ExecutionModelTaskEXT &&
		    execution_model != spv::ExecutionModelMeshEXT)
		{
			LOGE("Derivatives only supported in compute, task and mesh shaders.\n");
			return false;
		}

		// For sanity, verify that dimensions align sufficiently.
		// Spec says that product of workgroup size must align with 4.
		if (total_workgroup_threads % 4 == 0)
		{
			bool derivatives_2d = (threads[0] % 2 == 0) && (threads[1] % 2 == 0);
			if (options.compute_shader_derivatives)
			{
				builder.addExtension(options.compute_shader_derivatives_khr
					? "SPV_KHR_compute_shader_derivatives" : "SPV_NV_compute_shader_derivatives");

				if (derivatives_2d && options.compute_shader_derivatives_quad)
				{
					builder.addCapability(spv::CapabilityComputeDerivativeGroupQuadsKHR);

					// It is technically not in spec to just assume this since subgroup lane mapping to local invocation index
					// is not defined without this. In practice on NV, this holds based on our testing.
					builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDerivativeGroupQuadsKHR);
				}
				else
				{
					builder.addCapability(spv::CapabilityComputeDerivativeGroupLinearKHR);

					// It is technically not in spec to just assume this since subgroup lane mapping to local invocation index
					// is not defined without this. In practice on NV, this holds based on our testing.
					builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDerivativeGroupLinearKHR);
				}
			}

			// If the X and Y dimensions align with 2,
			// we need to assume that any quad op works on a 2D dispatch.
			execution_mode_meta.synthesize_2d_quad_dispatch = !options.compute_shader_derivatives_quad && derivatives_2d;
			if (execution_mode_meta.synthesize_2d_quad_dispatch)
			{
				threads[0] *= 2;
				threads[1] /= 2;
			}
		}
		else
		{
			// DXC is robust against this case.
			// Derivatives become meaningless now, so we have to fake the results.
			execution_mode_meta.synthesize_dummy_derivatives = true;
			LOGW("Invalid use of compute shader derivatives detected. Falling back to robust results.\n");
		}
	}

	for (unsigned dim = 0; dim < 3; dim++)
		execution_mode_meta.workgroup_threads[dim] = threads[dim];

	if (execution_model_lib_target)
	{
		threads[0] = builder.makeUintConstant(threads[0], true);
		threads[1] = builder.makeUintConstant(threads[1], true);
		threads[2] = builder.makeUintConstant(threads[2], true);
		builder.addDecoration(threads[0], spv::DecorationSpecId, NodeSpecIdGroupSizeX);
		builder.addDecoration(threads[1], spv::DecorationSpecId, NodeSpecIdGroupSizeY);
		builder.addDecoration(threads[2], spv::DecorationSpecId, NodeSpecIdGroupSizeZ);
		builder.addExecutionModeId(spirv_module.get_entry_function(),
		                           spv::ExecutionModeLocalSizeId,
		                           threads[0], threads[1], threads[2]);

		node_input.thread_group_size_id =
			builder.makeCompositeConstant(builder.makeVectorType(builder.makeUintType(32), 3),
			                              { threads[0], threads[1], threads[2] }, true);
		builder.addName(node_input.thread_group_size_id, "ThreadGroupSize");
	}
	else
	{
		builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeLocalSize,
		                         threads[0], threads[1], threads[2]);
	}

	return true;
}

bool Converter::Impl::emit_execution_modes_amplification()
{
	auto &builder = spirv_module.get_builder();

	builder.addExtension("SPV_EXT_mesh_shader");
	builder.addCapability(spv::CapabilityMeshShadingEXT);

	auto *as_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::ASState);

	if (as_state_node)
	{
		auto *arguments = llvm::cast<llvm::MDNode>(*as_state_node);
		auto *num_threads = llvm::cast<llvm::MDNode>(arguments->getOperand(0));
		return emit_execution_modes_thread_wave_properties(num_threads);
	}
	else
		return false;
}

bool Converter::Impl::emit_execution_modes_mesh()
{
	auto &builder = spirv_module.get_builder();
	auto *func = spirv_module.get_entry_function();

	builder.addExtension("SPV_EXT_mesh_shader");
	builder.addCapability(spv::CapabilityMeshShadingEXT);

	auto *ms_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::MSState);

	if (ms_state_node)
	{
		auto *arguments = llvm::cast<llvm::MDNode>(*ms_state_node);
		unsigned max_vertex_count = get_constant_metadata(arguments, 1);
		unsigned max_primitive_count = get_constant_metadata(arguments, 2);
		auto topology = static_cast<DXIL::MeshOutputTopology>(get_constant_metadata(arguments, 3));
		unsigned index_count;

		builder.addExecutionMode(func, spv::ExecutionModeOutputVertices, std::max<int>(1, max_vertex_count));
		builder.addExecutionMode(func, spv::ExecutionModeOutputPrimitivesEXT, std::max<int>(1, max_primitive_count));

		switch (topology)
		{
		case DXIL::MeshOutputTopology::Undefined:
			index_count = 0;
			break;

		case DXIL::MeshOutputTopology::Line:
			builder.addExecutionMode(func, spv::ExecutionModeOutputLinesEXT);
			index_count = 2;
			break;

		case DXIL::MeshOutputTopology::Triangle:
			builder.addExecutionMode(func, spv::ExecutionModeOutputTrianglesEXT);
			index_count = 3;
			break;

		default:
			LOGE("Unexpected mesh output topology (%u).\n", unsigned(topology));
			return false;
		}

		execution_mode_meta.stage_output_num_vertex = max_vertex_count;
		execution_mode_meta.stage_output_num_primitive = max_primitive_count;
		execution_mode_meta.primitive_index_dimension = index_count;

		auto *num_threads = llvm::cast<llvm::MDNode>(arguments->getOperand(0));
		return emit_execution_modes_thread_wave_properties(num_threads);
	}
	else
		return false;
}

bool Converter::Impl::emit_execution_modes_fp_denorm_rounding()
{
	// Check for SM 6.2 denorm handling. Only applies to FP32.
	auto *func = get_entry_point_function(entry_point_meta);
	if (!func)
		return true;

	// NVIDIA hack. The way the driver exposes float controls is very unfortunate.
	// If only partial denorm support is enabled, assume we cannot freely control FP32 behavior either.
	// However, for SM 6.2+, we have to force it on NVIDIA, even if driver doesn't actually expose it.
	bool supports_full_denorm_control_fp32 =
		options.supports_float16_denorm_preserve &&
		options.supports_float64_denorm_preserve;

	// Plain DXIL only supports fp32-denorm-mode, the rest are internal extensions.
	const struct
	{
		const char *tag;
		int bits;
		bool supported;
	} denorms[] = {
		{ "dxbc-fp16-denorm-mode", 16, options.supports_float16_denorm_preserve },
		{ "dxbc-fp32-denorm-mode", 32, supports_full_denorm_control_fp32 },
		{ "dxbc-fp64-denorm-mode", 64, options.supports_float64_denorm_preserve },
		{ "fp32-denorm-mode", 32, true },
	};

	// For whatever reason, NVIDIA loses a tremendous amount of performance from setting rounding modes.
	// Just ignore it since it's always RTE in practice anyway.
#if 0
	static const struct
	{
		const char *tag;
		int bits;
	} rounding[] = {
		{ "dxbc-fp16-round-mode", 16 },
		{ "dxbc-fp32-round-mode", 32 },
		{ "dxbc-fp64-round-mode", 64 },
	};
#endif

	for (auto &d : denorms)
	{
		if (!d.supported)
			continue;

		auto attr = func->getFnAttribute(d.tag);
		auto str = attr.getValueAsString();
		if (str == "ftz")
		{
			builder().addExtension("SPV_KHR_float_controls");
			builder().addCapability(spv::CapabilityDenormFlushToZero);
			builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormFlushToZero, d.bits);
		}
		else if (str == "preserve")
		{
			builder().addExtension("SPV_KHR_float_controls");
			builder().addCapability(spv::CapabilityDenormPreserve);
			builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormPreserve, d.bits);
		}
	}

#if 0
	for (auto &r : rounding)
	{
		auto attr = func->getFnAttribute(r.tag);
		auto str = attr.getValueAsString();
		if (str == "rtz")
		{
			builder().addExtension("SPV_KHR_float_controls");
			builder().addCapability(spv::CapabilityRoundingModeRTZ);
			builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRoundingModeRTZ, r.bits);
		}
		else if (str == "rte")
		{
			builder().addExtension("SPV_KHR_float_controls");
			builder().addCapability(spv::CapabilityRoundingModeRTE);
			builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRoundingModeRTE, r.bits);
		}
	}
#endif

	if (shader_analysis.require_wmma && GlobalConfiguration::get().wmma_rdna3_workaround)
	{
		// FP16 RTZ allows faster conversions on AMD.
		// This hack only makes sense on RDNA3.
		builder().addExtension("SPV_KHR_float_controls");
		builder().addCapability(spv::CapabilityRoundingModeRTZ);
		builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRoundingModeRTZ, 16);
	}

	return true;
}

bool Converter::Impl::analyze_execution_modes_meta()
{
	auto *meta = entry_point_meta;

	if (execution_model_lib_target)
		if (auto *null_meta = get_null_entry_point_meta(bitcode_parser.get_module()))
			meta = null_meta;

	auto flags = get_shader_flags(meta);
	execution_mode_meta.native_16bit_operations = (flags & DXIL::ShaderFlagNativeLowPrecision) != 0;
	return true;
}

void Converter::Impl::emit_execution_modes_post_code_generation()
{
	auto &b = builder();

	if (module_is_dxilconv(bitcode_parser.get_module()))
	{
		// We should use these globally, but don't want to invalidate all Fossilize archives just yet.
		// Shader instrumentation may declare its own preservation modes, so only declare execution modes
		// if we haven't done anything.
		if (!builder().hasCapability(spv::CapabilitySignedZeroInfNanPreserve))
		{
			b.addExtension("SPV_KHR_float_controls");
			b.addCapability(spv::CapabilitySignedZeroInfNanPreserve);
			b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 32);
			if (b.hasCapability(spv::CapabilityFloat64))
				b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 64);
		}

		// DXBC assumes flush-to-zero, but dxilconv doesn't explicitly emit that, since it's not in SM 6.0.
		if (!b.hasCapability(spv::CapabilityDenormFlushToZero) && !b.hasCapability(spv::CapabilityDenormPreserve))
		{
			b.addExtension("SPV_KHR_float_controls");
			b.addCapability(spv::CapabilityDenormFlushToZero);
			b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormFlushToZero, 32);
		}
	}

	// Custom IR is expected to set this with extended attributes.
	if (!module_is_dxbc_spirv(bitcode_parser.get_module()))
	{
		// Float16 and Float64 require denorms to be preserved in D3D12.
		if (b.hasCapability(spv::CapabilityFloat16) && options.supports_float16_denorm_preserve)
		{
			b.addExtension("SPV_KHR_float_controls");
			b.addCapability(spv::CapabilityDenormPreserve);
			b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormPreserve, 16);
		}

		if (b.hasCapability(spv::CapabilityFloat64) && options.supports_float64_denorm_preserve)
		{
			b.addExtension("SPV_KHR_float_controls");
			b.addCapability(spv::CapabilityDenormPreserve);
			b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormPreserve, 64);
		}
	}
	else
	{
		// If instrumentation didn't add these already.
		if (!builder().hasCapability(spv::CapabilitySignedZeroInfNanPreserve))
		{
			// Set SignedZeroInfNanPreserve by default for new IR.
			// We should use these globally, but don't want to invalidate all Fossilize archives just yet.
			b.addExtension("SPV_KHR_float_controls");
			b.addCapability(spv::CapabilitySignedZeroInfNanPreserve);
			b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 32);
			if (b.hasCapability(spv::CapabilityFloat16))
				b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 16);
			if (b.hasCapability(spv::CapabilityFloat64))
				b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 64);
		}
	}

	// Opt into quad derivatives and maximal reconvergence for fragment shaders using
	// QuadAll/QuadAny intrinsics to get meaningful behaviour for quad-uniform control
	// flow, other quad ops are ignored for now.
	if (options.supports_quad_control && execution_model == spv::ExecutionModelFragment &&
	    execution_mode_meta.needs_quad_derivatives)
	{
		b.addExtension("SPV_KHR_quad_control");
		b.addCapability(spv::CapabilityQuadControlKHR);
		b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRequireFullQuadsKHR);
		b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeQuadDerivativesKHR);
	}

	if (options.supports_maximal_reconvergence &&
	    (options.force_maximal_reconvergence ||
	     execution_mode_meta.waveops_include_helper_lanes ||
	     execution_mode_meta.needs_quad_derivatives ||
	     shader_analysis.need_maximal_reconvergence_helper_call))
	{
		b.addExtension("SPV_KHR_maximal_reconvergence");
		b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeMaximallyReconvergesKHR);
	}
}

bool Converter::Impl::emit_execution_modes_late()
{
	switch (execution_model)
	{
	case spv::ExecutionModelFragment:
		if (!emit_execution_modes_pixel_late())
			return false;
		break;

	default:
		break;
	}

	return true;
}

bool Converter::Impl::emit_execution_modes()
{
	switch (execution_model)
	{
	case spv::ExecutionModelGLCompute:
		if (execution_model_lib_target)
		{
			if (!emit_execution_modes_node())
				return false;
		}
		else
		{
			if (!emit_execution_modes_compute())
				return false;
		}
		break;

	case spv::ExecutionModelGeometry:
		if (!emit_execution_modes_geometry())
			return false;
		break;

	case spv::ExecutionModelTessellationControl:
		if (!emit_execution_modes_hull())
			return false;
		break;

	case spv::ExecutionModelTessellationEvaluation:
		if (!emit_execution_modes_domain())
			return false;
		break;

	case spv::ExecutionModelFragment:
		if (!emit_execution_modes_pixel())
			return false;
		break;

	case spv::ExecutionModelRayGenerationKHR:
	case spv::ExecutionModelMissKHR:
	case spv::ExecutionModelIntersectionKHR:
	case spv::ExecutionModelAnyHitKHR:
	case spv::ExecutionModelCallableKHR:
	case spv::ExecutionModelClosestHitKHR:
		if (!emit_execution_modes_ray_tracing(execution_model))
			return false;
		break;

	case spv::ExecutionModelTaskEXT:
		if (!emit_execution_modes_amplification())
			return false;
		break;

	case spv::ExecutionModelMeshEXT:
		if (!emit_execution_modes_mesh())
			return false;
		break;

	default:
		break;
	}

	if (!emit_execution_modes_fp_denorm_rounding())
		return false;

	return true;
}

ConvertedFunction::Function
Converter::Impl::build_rov_main(const Vector<llvm::BasicBlock *> &visit_order,
                                CFGNodePool &pool,
                                Vector<ConvertedFunction::Function> &leaves)
{
	auto *code_main = convert_function(visit_order, true);

	// Need to figure out if our ROV use is trivial. If not, we will wrap the entire function in ROV pairs.
	CFGStructurizer cfg{code_main, pool, spirv_module};
	bool trivial_rewrite = cfg.rewrite_rov_lock_region();

	if (trivial_rewrite)
		return { code_main, spirv_module.get_entry_function() };

	// If we need to fallback we need a wrapper function. Replace the entry point.

	spv::Block *code_entry;
	auto *code_func =
	    builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "code_main", {}, {}, &code_entry);

	code_func->moveLocalDeclarationsFrom(spirv_module.get_entry_function());

	auto *entry = pool.create_node();
	entry->ir.operations.push_back(allocate(spv::OpBeginInvocationInterlockEXT));
	auto *call_op = allocate(spv::OpFunctionCall, builder().makeVoidType());
	call_op->add_id(code_func->getId());
	entry->ir.operations.push_back(call_op);
	entry->ir.operations.push_back(allocate(spv::OpEndInvocationInterlockEXT));
	entry->ir.terminator.type = Terminator::Type::Return;
	leaves.push_back({ code_main, code_func });
	return { entry, spirv_module.get_entry_function() };
}


ConvertedFunction::Function
Converter::Impl::build_node_main(const Vector<llvm::BasicBlock *> &visit_order,
                                 CFGNodePool &pool,
                                 Vector<ConvertedFunction::Function> &leaves)
{
	spv::Block *node_entry;
	auto *node_func =
		builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(),
		                            "node_main", {}, {}, &node_entry);

	// Set build point so alloca() functions can create variables correctly.
	builder().setBuildPoint(node_entry);
	auto *node_main = convert_function(visit_order, true);
	leaves.push_back({ node_main, node_func });

	auto *entry = pool.create_node();
	current_block = &entry->ir.operations;
	entry->ir.terminator.type = Terminator::Type::Return;

	if (!emit_workgraph_dispatcher(*this, pool, entry, node_func->getId()))
		return {};

	return { entry, spirv_module.get_entry_function() };
}

void Converter::Impl::emit_patch_output_lowering(CFGNode *bb)
{
	auto *node = entry_point_meta;
	current_block = &bb->ir.operations;
	assert(node->getOperand(2));

	auto &signature = node->getOperand(2);
	auto *signature_node = llvm::cast<llvm::MDNode>(signature);
	auto &patch_variables = signature_node->getOperand(2);
	if (!patch_variables)
		return;

	auto *patch_node = llvm::dyn_cast<llvm::MDNode>(patch_variables);

	spv::Id u32_type = builder().makeUintType(32);
	spv::Id uvec4_type = builder().makeVectorType(u32_type, 4);

	for (unsigned i = 0; i < patch_node->getNumOperands(); i++)
	{
		auto *patch = llvm::cast<llvm::MDNode>(patch_node->getOperand(i));
		auto element_id = get_constant_metadata(patch, 0);
		auto actual_element_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(patch, 2)));
		auto system_value = static_cast<DXIL::Semantic>(get_constant_metadata(patch, 3));

		if (system_value != DXIL::Semantic::User)
			continue;

		auto rows = get_constant_metadata(patch, 6);
		auto cols = get_constant_metadata(patch, 7);
		auto start_row = get_constant_metadata(patch, 8);
		auto start_col = get_constant_metadata(patch, 9);

		auto &meta = patch_elements_meta[element_id];
		assert(meta.id);

		for (unsigned row = 0; row < rows; row++)
		{
			auto *chain = allocate(spv::OpAccessChain, builder().makePointer(spv::StorageClassPrivate, uvec4_type));
			chain->add_id(execution_mode_meta.patch_lowering_array_var_id);
			chain->add_id(builder().makeUintConstant(row + start_row));
			add(chain);

			auto *load_op = allocate(spv::OpLoad, uvec4_type);
			load_op->add_id(chain->id);
			add(load_op);

			spv::Id store_id;

			if (cols == 4)
			{
				store_id = load_op->id;
			}
			else if (cols > 1)
			{
				auto *shuffle_op = allocate(spv::OpVectorShuffle, get_type_id(DXIL::ComponentType::U32, 1, cols));
				shuffle_op->add_id(load_op->id);
				shuffle_op->add_id(load_op->id);
				for (unsigned c = 0; c < cols; c++)
					shuffle_op->add_literal(c + start_col);
				add(shuffle_op);
				store_id = shuffle_op->id;
			}
			else
			{
				auto *extract_op = allocate(spv::OpCompositeExtract, u32_type);
				extract_op->add_id(load_op->id);
				extract_op->add_literal(start_col);
				add(extract_op);
				store_id = extract_op->id;
			}

			if (actual_element_type != DXIL::ComponentType::U32)
			{
				auto *cast = allocate(spv::OpBitcast, get_type_id(actual_element_type, 1, cols));
				cast->add_id(store_id);
				add(cast);
				store_id = cast->id;
			}

			auto *store_op = allocate(spv::OpStore);

			if (rows > 1)
			{
				auto *store_chain = allocate(
					spv::OpAccessChain,
					builder().makePointer(spv::StorageClassOutput,
					                      get_type_id(actual_element_type, 1, cols)));

				store_chain->add_id(meta.id);
				store_chain->add_id(builder().makeUintConstant(row));
				add(store_chain);

				store_op->add_id(store_chain->id);
			}
			else
			{
				store_op->add_id(meta.id);
			}

			store_op->add_id(store_id);
			add(store_op);
		}
	}
}

CFGNode *Converter::Impl::build_hull_passthrough_function(CFGNodePool &pool)
{
	// Hull shader may have a null main entry, which indicates a default passthrough function should be invoked to copy
	// all inputs to corresponding outputs
	auto *entry = pool.create_node();

	auto &signature = entry_point_meta->getOperand(2);
	if (!signature)
		return {};
	auto *signature_node = llvm::cast<llvm::MDNode>(signature);

	auto &inputs = signature_node->getOperand(0);
	if (!inputs)
		return {};
	auto *inputs_node = llvm::dyn_cast<llvm::MDNode>(inputs);

	auto &outputs = signature_node->getOperand(1);
	if (!outputs)
		return {};
	auto *outputs_node = llvm::dyn_cast<llvm::MDNode>(outputs);

	if (!inputs_node || !outputs_node)
		return {};

	auto &builder = spirv_module.get_builder();

	// InvocationId is the control point ID used to index into the input arrays.
	auto *load_cipd_op = allocate(spv::OpLoad, builder.makeUintType(32));
	load_cipd_op->add_id(spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId));
	entry->ir.operations.push_back(load_cipd_op);

	unsigned num_entries = std::min<uint32_t>(inputs_node->getNumOperands(), outputs_node->getNumOperands());

	// It's a little unclear if match by meta entry order, or by row/col.
	// Without any test to prove otherwise, keep it simple.
	for (unsigned i = 0; i < num_entries; i++)
	{
		auto *input = llvm::cast<llvm::MDNode>(inputs_node->getOperand(i));
		auto element_id = get_constant_metadata(input, 0);
		auto actual_element_type = normalize_component_type(static_cast<DXIL::ComponentType>(get_constant_metadata(input, 2)));
		auto effective_element_type = get_effective_input_output_type(actual_element_type);

		auto rows = get_constant_metadata(input, 6);
		auto cols = get_constant_metadata(input, 7);

		auto type_id = get_type_id(effective_element_type, rows, cols);

		auto *input_chain = allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassInput, type_id));
		input_chain->add_id(input_elements_meta[element_id].id);
		input_chain->add_id(load_cipd_op->id);
		entry->ir.operations.push_back(input_chain);

		auto *load_op = allocate(spv::OpLoad, type_id);
		load_op->add_id(input_chain->id);
		entry->ir.operations.push_back(load_op);

		auto *output = llvm::cast<llvm::MDNode>(outputs_node->getOperand(i));
		element_id = get_constant_metadata(output, 0);

		auto *output_chain = allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassOutput, type_id));
		output_chain->add_id(output_elements_meta[element_id].id);
		output_chain->add_id(load_cipd_op->id);
		entry->ir.operations.push_back(output_chain);

		auto *store_op = allocate(spv::OpStore);
		store_op->add_id(output_chain->id);
		store_op->add_id(load_op->id);
		entry->ir.operations.push_back(store_op);
	}

	entry->ir.terminator.type = Terminator::Type::Return;

	return entry;
}

ConvertedFunction::Function
Converter::Impl::build_hull_main(const Vector<llvm::BasicBlock *> &visit_order,
                                 const Vector<llvm::BasicBlock *> &patch_visit_order,
                                 CFGNodePool &pool,
                                 Vector<ConvertedFunction::Function> &leaves)
{
	// Just make sure there is an entry block already created.
	spv::Block *hull_entry = nullptr, *patch_entry = nullptr;
	auto *hull_func =
		builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "hull_main", {}, {}, &hull_entry);

	auto *patch_func =
	    builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "patch_main", {}, {}, &patch_entry);

	// Set build point so alloca() functions can create variables correctly.
	if (hull_entry)
		builder().setBuildPoint(hull_entry);
	CFGNode *hull_main = nullptr;
	if (!visit_order.empty())
		hull_main = convert_function(visit_order, true);
	else
		hull_main = build_hull_passthrough_function(pool);

	builder().setBuildPoint(patch_entry);
	auto *patch_main = convert_function(patch_visit_order, false);
	builder().setBuildPoint(spirv_module.get_entry_function()->getEntryBlock());

	if (hull_main)
		leaves.push_back({ hull_main, hull_func });
	leaves.push_back({ patch_main, patch_func });

	auto *entry = pool.create_node();

	Operation *call_op;
	if (hull_func)
	{
		call_op = allocate(spv::OpFunctionCall, builder().makeVoidType());
		call_op->add_id(hull_func->getId());
		entry->ir.operations.push_back(call_op);
	}

	if (execution_mode_meta.stage_output_num_vertex > 1)
	{
		auto *load_op = allocate(spv::OpLoad, builder().makeUintType(32));
		load_op->add_id(spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId));
		entry->ir.operations.push_back(load_op);

		auto *cmp_op = allocate(spv::OpIEqual, builder().makeBoolType());
		cmp_op->add_ids({ load_op->id, builder().makeUintConstant(0) });
		entry->ir.operations.push_back(cmp_op);

		if (hull_main)
		{
			auto *barrier_op = allocate(spv::OpControlBarrier);
			// Not 100% sure what to emit here. Just do what glslang does.
			barrier_op->add_id(builder().makeUintConstant(spv::ScopeWorkgroup));

			if (execution_mode_meta.memory_model == spv::MemoryModelVulkan)
			{
				barrier_op->add_id(builder().makeUintConstant(spv::ScopeWorkgroup));
				barrier_op->add_id(builder().makeUintConstant(
				    spv::MemorySemanticsOutputMemoryMask | spv::MemorySemanticsAcquireReleaseMask));
			}
			else
			{
				barrier_op->add_id(builder().makeUintConstant(spv::ScopeInvocation));
				barrier_op->add_id(builder().makeUintConstant(0));
			}

			entry->ir.operations.push_back(barrier_op);
		}

		auto *patch_block = pool.create_node();
		auto *merge_block = pool.create_node();

		entry->add_branch(patch_block);
		entry->add_branch(merge_block);
		patch_block->add_branch(merge_block);

		entry->ir.terminator.type = Terminator::Type::Condition;
		entry->ir.terminator.true_block = patch_block;
		entry->ir.terminator.false_block = merge_block;
		entry->ir.terminator.conditional_id = cmp_op->id;

		patch_block->ir.terminator.type = Terminator::Type::Branch;
		patch_block->ir.terminator.direct_block = merge_block;

		call_op = allocate(spv::OpFunctionCall, builder().makeVoidType());
		call_op->add_id(patch_func->getId());
		patch_block->ir.operations.push_back(call_op);
		if (execution_mode_meta.patch_lowering_array_var_id)
			emit_patch_output_lowering(patch_block);

		merge_block->ir.terminator.type = Terminator::Type::Return;
	}
	else
	{
		call_op = allocate(spv::OpFunctionCall, builder().makeVoidType());
		call_op->add_id(patch_func->getId());
		entry->ir.operations.push_back(call_op);
		entry->ir.terminator.type = Terminator::Type::Return;
		if (execution_mode_meta.patch_lowering_array_var_id)
			emit_patch_output_lowering(entry);
	}

	return { entry, spirv_module.get_entry_function() };
}

void Converter::Impl::build_function_bb_visit_order_inner_analysis(
	Vector<llvm::BasicBlock *> &bbs, UnorderedSet<llvm::BasicBlock *> &visited,
	llvm::BasicBlock *bb)
{
	if (visited.count(bb))
		return;
	visited.insert(bb);

	// Check for special case where we optimize to direct branch.
	auto *term = bb->getTerminator();
	if (auto *inst = llvm::dyn_cast<llvm::BranchInst>(term))
	{
		if (inst->isConditional())
		{
			bool cond_value;
			if (can_optimize_conditional_branch_to_static(*this, inst->getCondition(), cond_value))
			{
				auto *succ = inst->getSuccessor(cond_value ? 0 : 1);
				build_function_bb_visit_order_inner_analysis(bbs, visited, succ);
				bbs.push_back(bb);
				return;
			}
		}
	}

	for (auto itr = llvm::succ_begin(bb); itr != llvm::succ_end(bb); ++itr)
	{
		auto *succ = *itr;
		build_function_bb_visit_order_inner_analysis(bbs, visited, succ);
	}

	bbs.push_back(bb);
}

Vector<llvm::BasicBlock *> Converter::Impl::build_function_bb_visit_order_analysis(llvm::Function *func)
{
	if (!func)
		return {};

	UnorderedSet<llvm::BasicBlock *> visited;
	Vector<llvm::BasicBlock *> visit_order;
	auto *entry = &func->getEntryBlock();
	build_function_bb_visit_order_inner_analysis(visit_order, visited, entry);
	// Get natural traverse order, input is post-traversal order.
	std::reverse(visit_order.begin(), visit_order.end());
	return visit_order;
}

void Converter::Impl::build_function_bb_visit_register(llvm::BasicBlock *bb, CFGNodePool &pool, String tag)
{
	auto entry_meta = std::make_unique<BlockMeta>(bb);
	bb_map[bb] = entry_meta.get();
	auto *entry_node = pool.create_node();
	bb_map[bb]->node = entry_node;
	entry_node->name = std::move(tag);
	metas.push_back(std::move(entry_meta));
}

// This only exists so that we can avoid nuking all existing Fossilize caches with completely new shaders.
// This traversal order is not a perfect reverse post-traversal,
// so we cannot use it for analysis with alloca() -> CBV forwarding checks.
// Once we are ready to consider doing large scale SPIR-V changes that invalidate all caches anyway,
// we might as well get rid of this path in the same update and use the common analysis path.
Vector<llvm::BasicBlock *> Converter::Impl::build_function_bb_visit_order_legacy(
    llvm::Function *func, CFGNodePool &pool)
{
	if (!func)
		return {};

	auto *entry = &func->getEntryBlock();
	build_function_bb_visit_register(entry, pool, ".entry");

	Vector<llvm::BasicBlock *> to_process;
	Vector<llvm::BasicBlock *> processing;
	to_process.push_back(entry);
	Vector<llvm::BasicBlock *> visit_order;

	unsigned fake_label_id = 0;

	const auto queue_visit_succ = [&](llvm::BasicBlock *block, llvm::BasicBlock *succ) {
		if (!bb_map.count(succ))
		{
			to_process.push_back(succ);
			build_function_bb_visit_register(succ, pool, dxil_spv::to_string(++fake_label_id));
		}

		bb_map[block]->node->add_branch(bb_map[succ]->node);
	};

	// Traverse the CFG and register all blocks in the pool.
	while (!to_process.empty())
	{
		std::swap(to_process, processing);
		for (auto *block : processing)
		{
			visit_order.push_back(block);

			auto *term = block->getTerminator();
			if (auto *inst = llvm::dyn_cast<llvm::BranchInst>(term))
			{
				if (inst->isConditional())
				{
					bool cond_value;
					if (can_optimize_conditional_branch_to_static(*this, inst->getCondition(), cond_value))
					{
						auto *succ = inst->getSuccessor(cond_value ? 0 : 1);
						queue_visit_succ(block, succ);
						continue;
					}
				}
			}

			for (auto itr = llvm::succ_begin(block); itr != llvm::succ_end(block); ++itr)
				queue_visit_succ(block, *itr);
		}
		processing.clear();
	}

	return visit_order;
}

void Converter::Impl::emit_write_instrumentation_invocation_id(CFGNode *node)
{
	current_block = &node->ir.operations;

	spv::Id alloc_id = spirv_module.get_helper_call_id(HelperCall::AllocateInvocationID);

	auto *call = allocate(spv::OpFunctionCall, builder().makeUintType(32));
	call->add_id(alloc_id);
	add(call);

	auto *store = allocate(spv::OpStore);
	store->add_id(instrumentation.invocation_id_var_id);
	store->add_id(call->id);
	add(store);
}

void Converter::Impl::gather_function_dependencies(llvm::Function *caller, Vector<llvm::Function *> &funcs)
{
	if (std::find(funcs.begin(), funcs.end(), caller) != funcs.end())
		return;

	// Avoid exponential explosion while traversing.
	funcs.push_back(caller);

	for (auto &bb : *caller)
	{
		for (auto &inst : bb)
		{
			if (const auto *call_inst = llvm::dyn_cast<llvm::CallInst>(&inst))
			{
				auto *fn = call_inst->getCalledFunction();
				if (strncmp(fn->getName().data(), "dx.op", 5) != 0 &&
				    strncmp(fn->getName().data(), "llvm.", 5) != 0)
				{
					gather_function_dependencies(fn, funcs);
				}
			}
		}
	}

	// Ensure leaves come before their caller.
	funcs.erase(std::find(funcs.begin(), funcs.end(), caller));
	funcs.push_back(caller);
}

bool Converter::Impl::build_callee_functions(CFGNodePool &pool,
                                             const Vector<llvm::Function *> &callees,
                                             Vector<ConvertedFunction::Function> &leaves)
{
	llvm::Function *func = get_entry_point_function(entry_point_meta);

	for (auto *leaf_func : callees)
	{
		if (leaf_func == func || leaf_func == execution_mode_meta.patch_constant_function)
			continue;

		Vector<spv::Id> arg_types;
		spv::Block *spv_entry;

		// Cannot safely use function-local undefs now.
		shader_analysis.global_undefs = true;

		arg_types.reserve(leaf_func->getFunctionType()->getNumParams());
		for (uint32_t i = 0; i < leaf_func->getFunctionType()->getNumParams(); i++)
			arg_types.push_back(get_type_id(leaf_func->getFunctionType()->getParamType(i)));

		auto *spv_func =
		    builder().makeFunctionEntry(spv::NoPrecision, get_type_id(leaf_func->getFunctionType()->getReturnType()),
#ifdef HAVE_LLVMBC
		                                leaf_func->getName().c_str(),
#else
		                                leaf_func->getName().str().c_str(),
#endif
		                                arg_types, {}, &spv_entry);

		rewrite_value(leaf_func, spv_func->getId());

		auto arg_iter = leaf_func->arg_begin();
		for (uint32_t i = 0, n = leaf_func->getFunctionType()->getNumParams(); i < n; i++, ++arg_iter)
			rewrite_value(&*arg_iter, spv_func->getParamId(i));

		auto visit_order = build_function_bb_visit_order_analysis(leaf_func);

		for (auto *bb : visit_order)
			build_function_bb_visit_register(bb, pool, "");

		for (auto *bb : visit_order)
			for (auto itr = llvm::succ_begin(bb); itr != llvm::succ_end(bb); ++itr)
				bb_map[bb]->node->add_branch(bb_map[*itr]->node);

		builder().setBuildPoint(spv_entry);
		auto *entry = convert_function(visit_order, false);
		if (!entry)
			return false;

		leaves.push_back({ entry, spv_func });
	}

	builder().setBuildPoint(spirv_module.get_entry_function()->getEntryBlock());

	return true;
}

CFGNode *Converter::Impl::convert_function(const Vector<llvm::BasicBlock *> &visit_order, bool primary_code)
{
	bool has_partial_unroll = false;

	for (auto *bb : visit_order)
	{
		auto *meta = bb_map[bb];
		CFGNode *node = meta->node;
		combined_image_sampler_cache.clear();
		peephole_transformation_cache.clear();
		memoized = {};

		if (bb == visit_order.front())
		{
			current_block = &node->ir.operations;
			if (!emit_view_masking(*this))
				return {};
			if (!emit_view_instancing_fixed_layer_viewport(*this, true))
				return {};
			if (instrumentation.invocation_id_var_id && primary_code)
				emit_write_instrumentation_invocation_id(node);
		}

		auto sink_itr = bb_to_sinks.find(bb);
		if (sink_itr != bb_to_sinks.end())
		{
			for (auto *instruction : sink_itr->second)
			{
				auto itr = value_map.find(instruction);
				if (itr != value_map.end())
					value_map.erase(itr);

				if (!emit_instruction(node, *instruction))
				{
					LOGE("Failed to emit instruction.\n");
					return {};
				}
			}
		}

		// Scan opcodes.
		for (auto &instruction : *bb)
		{
			if (!emit_instruction(node, instruction))
			{
				LOGE("Failed to emit instruction.\n");
				return {};
			}
		}

		ags.reset();
		nvapi.reset();

		// We don't know if the block is a loop yet, so just tag every BB.
		// CFG will propagate the information as necessary.
		node->ir.terminator.force_flatten = options.branch_control.force_flatten;
		node->ir.terminator.force_branch = options.branch_control.force_branch;
		node->ir.terminator.force_unroll = options.branch_control.force_unroll;
		node->ir.terminator.force_loop = options.branch_control.force_loop;

		auto *instruction = bb->getTerminator();

		if (auto *inst = llvm::dyn_cast<llvm::BranchInst>(instruction))
		{
			// Loop information is attached to the back edge in LLVM.
			// Continue blocks can be direct branches or conditional ones, so make it generic.
			auto *loop_meta = instruction->getMetadata("llvm.loop");
			if (loop_meta && loop_meta->getNumOperands() >= 2)
			{
				auto *meta_node = llvm::dyn_cast<llvm::MDNode>(loop_meta->getOperand(1));
				if (meta_node)
				{
					auto *meta_name = llvm::dyn_cast<llvm::MDString>(meta_node->getOperand(0));
					if (meta_name)
					{
#ifdef HAVE_LLVMBC
						auto &str = meta_name->getString();
#else
						auto str = meta_name->getString();
#endif

						if (options.branch_control.use_shader_metadata)
						{
							if (str == "llvm.loop.unroll.disable")
							{
								node->ir.terminator.force_loop = true;
								node->ir.terminator.force_unroll = false;
							}
							else if (str == "llvm.loop.unroll.full")
							{
								node->ir.terminator.force_unroll = true;
								node->ir.terminator.force_loop = false;
							}
						}

						if (str == "llvm.loop.unroll.count")
							has_partial_unroll = true;
					}
				}
			}

			if (inst->isConditional())
			{
				// Works around some pathological unrolling scenarios where games may unroll based on WaveGetLaneCount().
				bool cond_value;
				if (can_optimize_conditional_branch_to_static(*this, inst->getCondition(), cond_value))
				{
					node->ir.terminator.type = Terminator::Type::Branch;
					node->ir.terminator.direct_block = bb_map[inst->getSuccessor(cond_value ? 0 : 1)]->node;
				}
				else
				{
					node->ir.terminator.type = Terminator::Type::Condition;
					node->ir.terminator.conditional_id = get_id_for_value(inst->getCondition());
					assert(inst->getNumSuccessors() == 2);
					node->ir.terminator.true_block = bb_map[inst->getSuccessor(0)]->node;
					node->ir.terminator.false_block = bb_map[inst->getSuccessor(1)]->node;

					if (options.branch_control.use_shader_metadata)
					{
						auto *branch_meta = inst->getMetadata("dx.controlflow.hints");
						if (branch_meta && branch_meta->getNumOperands() >= 3)
						{
							if (get_constant_metadata(branch_meta, 2) == 1)
							{
								node->ir.terminator.force_branch = true;
								node->ir.terminator.force_flatten = false;
							}
							else if (get_constant_metadata(branch_meta, 2) == 2)
							{
								node->ir.terminator.force_flatten = true;
								node->ir.terminator.force_branch = false;
							}
						}
					}
				}
			}
			else
			{
				node->ir.terminator.type = Terminator::Type::Branch;
				assert(inst->getNumSuccessors() == 1);
				node->ir.terminator.direct_block = bb_map[inst->getSuccessor(0)]->node;

				// If the shader uses partial unrolling, but we see loops anyway,
				// it's very likely we really want this to be a loop.
				// This is somewhat of a hack heuristic to work around a Mesa bug in Lords of the Fallen,
				// but it makes at least some sense ...
				if (has_partial_unroll)
					node->ir.terminator.force_loop = true;
			}
		}
		else if (auto *inst = llvm::dyn_cast<llvm::SwitchInst>(instruction))
		{
			node->ir.terminator.type = Terminator::Type::Switch;

			Terminator::Case default_case = {};
			default_case.is_default = true;
			default_case.node = bb_map[inst->getDefaultDest()]->node;
			node->ir.terminator.cases.push_back(default_case);

			node->ir.terminator.conditional_id = get_id_for_value(inst->getCondition());
			for (auto itr = inst->case_begin(); itr != inst->case_end(); ++itr)
			{
				Terminator::Case switch_case = {};
				switch_case.node = bb_map[itr->getCaseSuccessor()]->node;
				switch_case.value = uint32_t(itr->getCaseValue()->getUniqueInteger().getZExtValue());
				node->ir.terminator.cases.push_back(switch_case);
			}
		}
		else if (auto *inst = llvm::dyn_cast<llvm::ReturnInst>(instruction))
		{
			node->ir.terminator.type = Terminator::Type::Return;
			if (inst->getReturnValue())
				node->ir.terminator.return_value = get_id_for_value(inst->getReturnValue());
		}
		else if (llvm::isa<llvm::UnreachableInst>(instruction))
		{
			node->ir.terminator.type = Terminator::Type::Unreachable;
		}
		else
		{
			LOGE("Unsupported terminator ...\n");
			return {};
		}

#ifdef HAVE_LLVMBC
		// Forward structured control flow.
		if (bb->get_merge() == llvm::BasicBlock::Merge::Selection)
		{
			node->ir.merge_info.merge_type = MergeType::Selection;

			// Assume both paths can return or break, leaving the merge unreachable.
			if (bb->get_merge_bb() && bb_map.count(bb->get_merge_bb()))
				node->ir.merge_info.merge_block = bb_map[bb->get_merge_bb()]->node;
		}
		else if (bb->get_merge() == llvm::BasicBlock::Merge::Loop)
		{
			node->ir.merge_info.merge_type = MergeType::Loop;

			// In infinite loops, merge block may be unreachable.
			if (bb->get_merge_bb() && bb_map.count(bb->get_merge_bb()))
				node->ir.merge_info.merge_block = bb_map[bb->get_merge_bb()]->node;

			// If back-edge is not reachable, we'll resolve that later.
			if (bb->get_continue_bb() && bb_map.count(bb->get_continue_bb()))
				node->ir.merge_info.continue_block = bb_map[bb->get_continue_bb()]->node;
		}
#endif
	}

	// Rewrite PHI incoming values if we have to.
	if (!phi_incoming_rewrite.empty())
	{
		for (auto *bb : visit_order)
		{
			CFGNode *node = bb_map[bb]->node;
			for (auto &phi : node->ir.phi)
			{
				for (auto &incoming : phi.incoming)
				{
					auto itr = phi_incoming_rewrite.find(incoming.id);
					if (itr != phi_incoming_rewrite.end())
						incoming.id = itr->second;
				}
			}
		}
	}

	return bb_map[visit_order.front()]->node;
}

void Converter::Impl::mark_used_value(const llvm::Value *value)
{
	if (!llvm::isa<llvm::Constant>(value))
	{
		// Technically, we won't be able to eliminate a chain of SSA expressions
		// which are unused this way, but eeeeeeh. DXC really should handle that.
		// This is to deal with odd-ball edge cases where random single SSA instructions
		// were not eliminated for whatever reason.
		llvm_used_ssa_values.insert(value);
	}
}

void Converter::Impl::mark_used_values(const llvm::Instruction *instruction)
{
	if (auto *phi_inst = llvm::dyn_cast<llvm::PHINode>(instruction))
	{
		for (unsigned i = 0, n = phi_inst->getNumIncomingValues(); i < n; i++)
		{
			auto *incoming = phi_inst->getIncomingValue(i);
			// Ignore self-referential PHI. Someone else need to refer to us.
			if (incoming != phi_inst)
				mark_used_value(incoming);
		}
	}
	else if (const auto *ret_inst = llvm::dyn_cast<llvm::ReturnInst>(instruction))
	{
		if (ret_inst->getReturnValue())
			mark_used_value(ret_inst->getReturnValue());
	}
	else if (const auto *cond_inst = llvm::dyn_cast<llvm::BranchInst>(instruction))
	{
		if (cond_inst->isConditional())
			mark_used_value(cond_inst->getCondition());
	}
	else if (const auto *switch_inst = llvm::dyn_cast<llvm::SwitchInst>(instruction))
	{
		mark_used_value(switch_inst->getCondition());
	}
	else
	{
		for (unsigned i = 0, n = instruction->getNumOperands(); i < n; i++)
			mark_used_value(instruction->getOperand(i));
	}
}

static bool instruction_is_precise_sensitive(const llvm::Instruction *value)
{
	if (auto *binary_op = llvm::dyn_cast<llvm::BinaryOperator>(value))
	{
		auto opcode = binary_op->getOpcode();
		switch (opcode)
		{
		case llvm::BinaryOperator::BinaryOps::FAdd:
		case llvm::BinaryOperator::BinaryOps::FSub:
		case llvm::BinaryOperator::BinaryOps::FMul:
		case llvm::BinaryOperator::BinaryOps::FDiv:
		case llvm::BinaryOperator::BinaryOps::FRem:
			return true;

		default:
			break;
		}
	}
	else if (value_is_dx_op_instrinsic(value, DXIL::Op::FMad) ||
	         value_is_dx_op_instrinsic(value, DXIL::Op::Dot2) ||
	         value_is_dx_op_instrinsic(value, DXIL::Op::Dot2AddHalf) ||
	         value_is_dx_op_instrinsic(value, DXIL::Op::Dot3) ||
	         value_is_dx_op_instrinsic(value, DXIL::Op::Dot4))
	{
		return true;
	}

	return false;
}

static bool instruction_requires_no_contraction(const llvm::Instruction *value)
{
	if (instruction_is_precise_sensitive(value))
	{
		if (auto *binary_op = llvm::dyn_cast<llvm::BinaryOperator>(value))
			return !binary_op->isFast();
		else
			return llvm::cast<llvm::CallInst>(value)->hasMetadata("dx.precise");
	}

	return false;
}

static void propagate_precise(UnorderedSet<const llvm::Instruction *> &cache, const llvm::Instruction *value);

static void mark_precise(UnorderedSet<const llvm::Instruction *> &cache, const llvm::Value *value)
{
	// Stop propagating when we hit something not an instruction, i.e. a constant or variable (alloca is very rare).
	if (auto *inst = llvm::dyn_cast<llvm::Instruction>(value))
	{
		if (instruction_is_precise_sensitive(inst) && !instruction_requires_no_contraction(inst))
		{
			if (auto *call_inst = llvm::dyn_cast<llvm::CallInst>(inst))
				const_cast<llvm::CallInst *>(call_inst)->setMetadata("dx.precise", nullptr);
			else if (auto *binary_op = llvm::dyn_cast<llvm::BinaryOperator>(inst))
				const_cast<llvm::BinaryOperator *>(binary_op)->setFast(false);
		}

		propagate_precise(cache, inst);
	}
}

static void propagate_precise(UnorderedSet<const llvm::Instruction *> &cache, const llvm::Instruction *value)
{
	if (cache.count(value) != 0)
		return;
	cache.insert(value);

	if (const auto *phi = llvm::dyn_cast<llvm::PHINode>(value))
	{
		for (unsigned i = 0, n = phi->getNumIncomingValues(); i < n; i++)
			mark_precise(cache, phi->getIncomingValue(i));
	}
	else
	{
		for (unsigned i = 0, n = value->getNumOperands(); i < n; i++)
			mark_precise(cache, value->getOperand(i));
	}
}

static void propagate_precise(llvm::Function *func)
{
	Vector<const llvm::Instruction *> precise_instructions;
	for (auto &bb : *func)
		for (auto &inst : bb)
			if (instruction_requires_no_contraction(&inst))
				precise_instructions.push_back(&inst);

	UnorderedSet<const llvm::Instruction *> visitation_cache;
	for (auto *inst : precise_instructions)
		propagate_precise(visitation_cache, inst);
}

void Converter::Impl::analyze_instructions_post_execution_modes()
{
	if ((options.quirks.group_shared_auto_barrier || !shader_analysis.has_group_shared_barrier) &&
	     shader_analysis.has_group_shared_access)
	{
		unsigned num_threads = execution_mode_meta.workgroup_threads[0] *
		                       execution_mode_meta.workgroup_threads[1] *
		                       execution_mode_meta.workgroup_threads[2];

		if (options.quirks.group_shared_auto_barrier || (num_threads <= 32 && num_threads > 1))
		{
			// This is a case that might just happen to work if the game assumes lock-step execution on NV + AMD (rip Intel).
			// If the group size is larger, it's extremely unlikely the game "just works" by chance on native drivers.
			// Some shaders seem to use groupshared as a sort of "scratch space" per thread, which
			// is a valid use case and does not require barriers to be correct.
			shader_analysis.needs_auto_group_shared_barriers = true;
		}
	}
}

bool Converter::Impl::analyze_instructions(llvm::Function *func)
{
	// Need to analyze this in two stages.
	// In the first stage, we need to analyze:
	// - Load/GetElementPtr to handle lib global variables
	// - CreateHandle family to build LLVM access handles
	// - ExtractValue to track which components are used for BufferLoad.
	// In the second phase we analyze the buffer loads and stores and figure out
	// alignments of the loads and stores. This lets us build up a list of SSBO declarations we need to
	// optimally implement the loads and stores. We need to do this late, because we depend on results
	// of ExtractValue analysis.

	if (func && options.propagate_precise && !options.force_precise)
		propagate_precise(func);

	auto visit_order = build_function_bb_visit_order_analysis(func);

	for (auto *bb : visit_order)
	{
		if (options.eliminate_dead_code)
			mark_used_values(bb->getTerminator());

		for (auto &inst : *bb)
		{
			if (options.eliminate_dead_code)
				mark_used_values(&inst);

			if (auto *load_inst = llvm::dyn_cast<llvm::LoadInst>(&inst))
			{
				if (!analyze_load_instruction(*this, load_inst))
					return false;
			}
			else if (auto *store_inst = llvm::dyn_cast<llvm::StoreInst>(&inst))
			{
				if (!analyze_store_instruction(*this, store_inst))
					return false;
			}
			else if (auto *phi_inst = llvm::dyn_cast<llvm::PHINode>(&inst))
			{
				if (!analyze_phi_instruction(*this, phi_inst))
					return false;
			}
			else if (auto *atomicrmw_inst = llvm::dyn_cast<llvm::AtomicRMWInst>(&inst))
			{
				if (!analyze_atomicrmw_instruction(*this, atomicrmw_inst))
					return false;
			}
			else if (auto *cmpxchg_inst = llvm::dyn_cast<llvm::AtomicCmpXchgInst>(&inst))
			{
				if (!analyze_cmpxchg_instruction(*this, cmpxchg_inst))
					return false;
			}
			else if (auto *alloca_inst = llvm::dyn_cast<llvm::AllocaInst>(&inst))
			{
				if (!analyze_alloca_instruction(*this, alloca_inst))
					return false;
			}
			else if (auto *getelementptr_inst = llvm::dyn_cast<llvm::GetElementPtrInst>(&inst))
			{
				if (!analyze_getelementptr_instruction(*this, getelementptr_inst))
					return false;
			}
			else if (auto *extractvalue_inst = llvm::dyn_cast<llvm::ExtractValueInst>(&inst))
			{
				if (!analyze_extractvalue_instruction(*this, extractvalue_inst))
					return false;
			}
			else if (auto *cmp_inst = llvm::dyn_cast<llvm::CmpInst>(&inst))
			{
				if (!analyze_compare_instruction(*this, cmp_inst))
					return false;
			}
			else if (auto *call_inst = llvm::dyn_cast<llvm::CallInst>(&inst))
			{
				auto *called_function = call_inst->getCalledFunction();
				if (strncmp(called_function->getName().data(), "dx.op", 5) == 0)
				{
					if (!analyze_dxil_instruction_primary_pass(*this, call_inst, bb))
						return false;
				}
			}
		}

		// Reset vendor tracking for every BB.
		ags.reset();
		nvapi.reset();
	}

	for (auto *bb : visit_order)
	{
		for (auto &inst : *bb)
		{
			if (auto *call_inst = llvm::dyn_cast<llvm::CallInst>(&inst))
			{
				auto *called_function = call_inst->getCalledFunction();
				if (strncmp(called_function->getName().data(), "dx.op", 5) == 0)
				{
					if (!analyze_dxil_instruction_secondary_pass(*this, call_inst))
						return false;
				}
			}
		}

		// Reset vendor tracking for every BB.
		ags.reset();
		nvapi.reset();
	}

	for (auto &alloc : alloca_tracking)
	{
		// Mark required resource aliases before we emit resources. Defer some work until after resource creation.
		const auto *scalar_type =
		    alloc.first->getType()->getPointerElementType()->getArrayElementType();
		if (!analyze_alloca_cbv_forwarding_pre_resource_emit(*this, scalar_type, alloc.second))
			return false;
	}

	ags.reset_analysis();
	nvapi.reset_analysis();

	if (shader_analysis.require_wmma)
		execution_mode_meta.memory_model = spv::MemoryModelVulkan;

	return true;
}

bool Converter::Impl::composite_is_accessed(const llvm::Value *composite) const
{
	return llvm_composite_meta.find(composite) != llvm_composite_meta.end();
}

ConvertedFunction Converter::Impl::convert_entry_point()
{
	ConvertedFunction result = {};

	auto &module = bitcode_parser.get_module();
	entry_point_meta = get_entry_point_meta(module, options.entry_point.empty() ? nullptr : options.entry_point.c_str());
	execution_model = get_execution_model(module, entry_point_meta);
	execution_model_lib_target = get_execution_model_lib_target(module, entry_point_meta);

	if (execution_model_lib_target && execution_model == spv::ExecutionModelGLCompute)
	{
		// Might as well go with SPIR-V 1.6. Then we get subgroup size control semantics for "free".
		// When we're willing to do a clean break with Fossilize all shaders should target SPIR-V 1.6.
		spirv_module.set_override_spirv_version(0x10600);
	}
	else if (execution_model == spv::ExecutionModelFragment &&
	         resource_mapping_iface && resource_mapping_iface->has_nontrivial_stage_input_remapping())
	{
		// Force SPIR-V 1.4 for fragment shaders if we might end up requiring mesh shader capabilities.
		// Non-trivial stage input remapping may require PerPrimitiveEXT decoration.
		spirv_module.set_override_spirv_version(0x10400);
	}

	if (!entry_point_meta)
	{
		if (!options.entry_point.empty())
			LOGE("Could not find entry point \"%s\".\n", options.entry_point.c_str());
		else
			LOGE("Could not find any entry point.\n");
		return result;
	}

	if (!options.shader_source_file.empty())
	{
		auto &builder = spirv_module.get_builder();
		uint32_t sm_major = 0, sm_minor = 0;
		get_shader_model(module, nullptr, &sm_major, &sm_minor);
		builder.setSource(spv::SourceLanguageUnknown, sm_major * 100 + sm_minor);
		builder.setSourceFile(options.shader_source_file);
	}

	result.node_pool = std::make_unique<CFGNodePool>();
	auto &pool = *result.node_pool;

	bool need_bda =
		options.physical_storage_buffer ||
		(execution_model_lib_target &&
		 execution_model == spv::ExecutionModelGLCompute);

	spirv_module.set_descriptor_qa_info(options.descriptor_qa);
	options.instruction_instrumentation.fp16 =
	    options.min_precision_prefer_native_16bit || execution_mode_meta.native_16bit_operations;
	spirv_module.set_instruction_instrumentation_info(options.instruction_instrumentation);

	llvm::Function *func = get_entry_point_function(entry_point_meta);
	auto visit_order = build_function_bb_visit_order_legacy(func, pool);
	Vector<llvm::BasicBlock *> patch_visit_order;

	// dxilconv emits somewhat broken code for min16float for resource access.
	// Just use FP32 here since that's what we've tested and avoids lots of awkward workarounds.
	if (module_is_dxilconv(module))
		options.min_precision_prefer_native_16bit = false;

	if (module_is_dxbc_spirv(module))
	{
		backend.skip_non_uniform_promotion = true;
		// This is new code, might as well exercise it.
		execution_mode_meta.memory_model = spv::MemoryModelVulkan;
	}

	// Need to analyze some execution modes early which affect opcode analysis later.
	if (!analyze_execution_modes_meta())
		return result;
	if (!emit_resources_global_mapping())
		return result;
	if (!analyze_instructions(func))
		return result;

	spirv_module.emit_entry_point(get_execution_model(module, entry_point_meta), "main", need_bda,
	                              execution_mode_meta.memory_model);

	if (!emit_execution_modes())
		return result;

	if (execution_mode_meta.patch_constant_function)
		patch_visit_order = build_function_bb_visit_order_legacy(execution_mode_meta.patch_constant_function, pool);

	Vector<llvm::Function *> callees;
	if (func)
		gather_function_dependencies(func, callees);
	if (execution_mode_meta.patch_constant_function)
		gather_function_dependencies(execution_mode_meta.patch_constant_function, callees);

	// Analyze all leaf functions.
	for (auto *leaf_func : callees)
		if (leaf_func != func && !analyze_instructions(leaf_func))
			return result;

	if (!emit_resources())
		return result;
	if (!emit_stage_input_variables())
		return result;
	if (!emit_stage_output_variables())
		return result;
	if (!emit_patch_variables())
		return result;
	if (!emit_other_variables())
		return result;
	if (!emit_global_variables())
		return result;

	if (options.extended_non_semantic_info)
		for (auto &info : non_semantic_debug_info)
			emit_non_semantic_debug_info(info);

	// Some execution modes depend on other execution modes, so handle that here.
	if (!emit_execution_modes_late())
		return result;

	analyze_instructions_post_execution_modes();

	execution_mode_meta.entry_point_name = get_entry_point_name(entry_point_meta);

	if (!build_callee_functions(pool, callees, result.leaf_functions))
		return result;

	if (execution_model == spv::ExecutionModelTessellationControl)
		result.entry = build_hull_main(visit_order, patch_visit_order, pool, result.leaf_functions);
	else if (execution_mode_meta.declares_rov)
		result.entry = build_rov_main(visit_order, pool, result.leaf_functions);
	else if (execution_model_lib_target && execution_model == spv::ExecutionModelGLCompute)
		result.entry = build_node_main(visit_order, pool, result.leaf_functions);
	else
	{
		result.entry.entry = convert_function(visit_order, true);
		if (shader_analysis.needs_auto_group_shared_barriers && options.quirks.group_shared_auto_barrier)
		{
			CFGStructurizer cfg{result.entry.entry, pool, spirv_module};
			cfg.rewrite_auto_group_shared_barrier();
		}

		if (shader_analysis.require_subgroup_shuffles)
		{
			CFGStructurizer cfg{result.entry.entry, pool, spirv_module};
			cfg.flatten_subgroup_shuffles();
		}

		if (options.quirks.fixup_loop_header_undef_phis)
		{
			CFGStructurizer cfg{result.entry.entry, pool, spirv_module};
			cfg.fixup_loop_header_undef_phis();
		}

		result.entry.func = spirv_module.get_entry_function();
	}

#ifdef HAVE_LLVMBC
	if (func && func->get_structured_control_flow())
	{
		// For TESC, the entry is a custom dispatch function.
		result.entry.is_structured = execution_model != spv::ExecutionModelTessellationControl;
		for (auto &leaf : result.leaf_functions)
			leaf.is_structured = true;
	}
#endif

	// Some execution modes depend on code generation, handle that here.
	emit_execution_modes_post_code_generation();

	return result;
}

Operation *Converter::Impl::allocate(spv::Op op)
{
	return spirv_module.allocate_op(op);
}

Operation *Converter::Impl::allocate(spv::Op op, spv::Id id, spv::Id type_id)
{
	assert(type_id != 0);
	assert(id != 0);
	return spirv_module.allocate_op(op, id, type_id);
}

Operation *Converter::Impl::allocate(spv::Op op, spv::Id type_id)
{
	assert(type_id != 0);
	return spirv_module.allocate_op(op, spirv_module.allocate_id(), type_id);
}

Operation *Converter::Impl::allocate(spv::Op op, const llvm::Value *value)
{
	// Constant expressions cannot have an associated opcode ID to them.
	assert(!llvm::isa<llvm::ConstantExpr>(value));
	return spirv_module.allocate_op(op, get_id_for_value(value), get_type_id(value->getType()));
}

Operation *Converter::Impl::allocate(spv::Op op, const llvm::Value *value, spv::Id type_id)
{
	// Constant expressions cannot have an associated opcode ID to them.
	assert(!llvm::isa<llvm::ConstantExpr>(value));
	assert(type_id != 0);
	return spirv_module.allocate_op(op, get_id_for_value(value), type_id);
}

void Converter::Impl::rewrite_value(const llvm::Value *value, spv::Id id)
{
	auto value_itr = value_map.find(value);
	if (value_itr != value_map.end())
	{
		if (value_itr->second != id)
		{
			// If a PHI node previously accessed the value ID map, it will now refer to a dead
			// ID. Remember to rewrite PHI incoming nodes as necessary.
			phi_incoming_rewrite[value_itr->second] = id;
			value_itr->second = id;
		}
	}
	else
		value_map[value] = id;
}

void Converter::Impl::add(Operation *op, bool is_rov)
{
	assert(current_block);
	if (is_rov)
		current_block->push_back(allocate(spv::OpBeginInvocationInterlockEXT));
	current_block->push_back(op);
	if (is_rov)
		current_block->push_back(allocate(spv::OpEndInvocationInterlockEXT));
}

void Converter::Impl::register_externally_visible_write(const llvm::Value *value)
{
	if (!options.instruction_instrumentation.enabled ||
	    options.instruction_instrumentation.type != InstructionInstrumentationType::ExternallyVisibleWriteNanInf)
		return;

	// Ignore undefs and intentional nan/inf writes.
	if (llvm::isa<llvm::Constant>(value))
		return;

	// Punch through any bitcasts.
	// Sometimes, shaders want to store floats as uints for practical reasons.
	while (llvm::isa<llvm::CastInst>(value))
	{
		auto *cast = llvm::cast<llvm::CastInst>(value);
		if (cast->getOpcode() == llvm::CastInst::CastOps::BitCast)
			value = cast->getOperand(0);
		else
			break;
	}

	switch (value->getType()->getTypeID())
	{
	case llvm::Type::TypeID::HalfTyID:
	case llvm::Type::TypeID::FloatTyID:
	case llvm::Type::TypeID::DoubleTyID:
	{
		auto *op = allocate(spv::PseudoOpInstrumentExternallyVisibleStore);
		op->add_id(get_id_for_value(value));
		add(op);
		break;
	}

	default:
		break;
	}
}

spv::Builder &Converter::Impl::builder()
{
	return spirv_module.get_builder();
}

spv::Id Converter::Impl::create_variable(spv::StorageClass storage, spv::Id type_id, const char *name)
{
	return spirv_module.create_variable(storage, type_id, name);
}

spv::Id Converter::Impl::create_variable_with_initializer(spv::StorageClass storage, spv::Id type_id,
                                                          spv::Id initializer, const char *name)
{
	return spirv_module.create_variable_with_initializer(storage, type_id, initializer, name);
}

spv::StorageClass Converter::Impl::get_effective_storage_class(const llvm::Value *value, spv::StorageClass fallback) const
{
	auto itr = handle_to_storage_class.find(value);
	if (itr != handle_to_storage_class.end())
		return itr->second;
	else
		return fallback;
}

bool Converter::Impl::get_needs_temp_storage_copy(const llvm::Value *value) const
{
	// We always need a temp storage copy if this isn't
	// directly the result of an alloca instruction.
	if (!llvm::dyn_cast<llvm::AllocaInst>(value))
		return true;

	// We'll also need a temp storage copy if this
	// alloca is directly referenced by
	// a TraceRay AND a CallShader.
	return needs_temp_storage_copy.count(value) != 0;
}

spv::Id Converter::Impl::get_temp_payload(spv::Id type, spv::StorageClass storage)
{
	for (const auto &temp_payload : temp_payloads)
	{
		if (temp_payload.type == type && temp_payload.storage == storage)
			return temp_payload.id;
	}

	spv::Id var_id = create_variable(storage, type);

	temp_payloads.push_back(TempPayloadEntry{ type, storage, var_id });
	return var_id;
}

DXIL::ComponentType Converter::Impl::get_effective_typed_resource_type(DXIL::ComponentType type)
{
	// Expand/contract on load/store.
	// DXIL can emit half textures for example,
	// but we need to contract or expand instead.
	return convert_16bit_component_to_32bit(type);
}

DXIL::ComponentType Converter::Impl::get_effective_input_output_type(DXIL::ComponentType type)
{
	bool supports_narrow_arith_type = type != DXIL::ComponentType::F16 || support_native_fp16_operations();
	if (options.storage_16bit_input_output && supports_narrow_arith_type)
	{
		if (component_type_is_16bit(type))
			builder().addCapability(spv::CapabilityStorageInputOutput16);
	}
	else
	{
		// Expand/contract on load/store.
		// The only reasonable way this can break is if application relies on
		// lower precision in interpolation, but I don't think you can rely on that
		// kind of implementation detail ...
		type = convert_16bit_component_to_32bit(type);
	}

	return type;
}

spv::Id Converter::Impl::get_effective_input_output_type_id(DXIL::ComponentType type)
{
	return get_type_id(get_effective_input_output_type(type), 1, 1);
}

bool Converter::Impl::type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const
{
	if (!options.arithmetic_relaxed_precision)
		return false;

	if (type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
		type = llvm::cast<llvm::ArrayType>(type)->getArrayElementType();
	if (type->getTypeID() == llvm::Type::TypeID::VectorTyID)
		type = llvm::cast<llvm::VectorType>(type)->getElementType();

	return (!execution_mode_meta.native_16bit_operations && !options.min_precision_prefer_native_16bit) &&
	       (type->getTypeID() == llvm::Type::TypeID::HalfTyID ||
	        (type->getTypeID() == llvm::Type::TypeID::IntegerTyID && type->getIntegerBitWidth() == 16 &&
	         known_integer_sign));
}

void Converter::Impl::decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign)
{
	// Ignore RelaxedPrecision for integers since they are untyped in LLVM for the most part.
	// For texture loading operations and similar, we load in the appropriate sign, so it's safe to use RelaxedPrecision,
	// since RelaxedPrecision may sign-extend based on the OpTypeInt's signage.
	// DXIL is kinda broken in this regard since min16int and min16uint lower to the same i16 type ... :(
	if (type_can_relax_precision(type, known_integer_sign))
		builder().addDecoration(id, spv::DecorationRelaxedPrecision);
}

void Converter::Impl::set_option(const OptionBase &cap)
{
	switch (cap.type)
	{
	case Option::ShaderDemoteToHelper:
		options.shader_demote = static_cast<const OptionShaderDemoteToHelper &>(cap).supported;
		break;

	case Option::DualSourceBlending:
		options.dual_source_blending = static_cast<const OptionDualSourceBlending &>(cap).enabled;
		break;

	case Option::OutputSwizzle:
	{
		auto &swiz = static_cast<const OptionOutputSwizzle &>(cap);
		options.output_swizzles.clear();
		options.output_swizzles.insert(options.output_swizzles.end(), swiz.swizzles,
		                               swiz.swizzles + swiz.swizzle_count);
		break;
	}

	case Option::RasterizerSampleCount:
	{
		auto &count = static_cast<const OptionRasterizerSampleCount &>(cap);
		options.rasterizer_sample_count = count.count;
		options.rasterizer_sample_count_spec_constant = count.spec_constant;
		break;
	}

	case Option::RootConstantInlineUniformBlock:
	{
		auto &ubo = static_cast<const OptionRootConstantInlineUniformBlock &>(cap);
		options.inline_ubo_descriptor_set = ubo.desc_set;
		options.inline_ubo_descriptor_binding = ubo.binding;
		options.inline_ubo_enable = ubo.enable;
		break;
	}

	case Option::BindlessCBVSSBOEmulation:
	{
		auto &bindless = static_cast<const OptionBindlessCBVSSBOEmulation &>(cap);
		options.bindless_cbv_ssbo_emulation = bindless.enable;
		break;
	}

	case Option::PhysicalStorageBuffer:
	{
		auto &psb = static_cast<const OptionPhysicalStorageBuffer &>(cap);
		options.physical_storage_buffer = psb.enable;
		break;
	}

	case Option::SBTDescriptorSizeLog2:
	{
		auto &sbt = static_cast<const OptionSBTDescriptorSizeLog2 &>(cap);
		options.sbt_descriptor_size_srv_uav_cbv_log2 = sbt.size_log2_srv_uav_cbv;
		options.sbt_descriptor_size_sampler_log2 = sbt.size_log2_sampler;
		break;
	}

	case Option::SSBOAlignment:
	{
		auto &align = static_cast<const OptionSSBOAlignment &>(cap);
		options.ssbo_alignment = align.alignment;
		break;
	}

	case Option::TypedUAVReadWithoutFormat:
	{
		auto &uav = static_cast<const OptionTypedUAVReadWithoutFormat &>(cap);
		options.typed_uav_read_without_format = uav.supported;
		break;
	}

	case Option::ShaderSourceFile:
	{
		auto &file = static_cast<const OptionShaderSourceFile &>(cap);
		if (!file.name.empty())
			options.shader_source_file = file.name;
		else
			options.shader_source_file.clear();
		break;
	}

	case Option::BindlessTypedBufferOffsets:
	{
		auto &off = static_cast<const OptionBindlessTypedBufferOffsets &>(cap);
		options.bindless_typed_buffer_offsets = off.enable;
		break;
	}

	case Option::BindlessOffsetBufferLayout:
	{
		auto &off = static_cast<const OptionBindlessOffsetBufferLayout &>(cap);
		options.offset_buffer_layout = { off.untyped_offset, off.typed_offset, off.stride };
		break;
	}

	case Option::StorageInputOutput16:
	{
		auto &storage = static_cast<const OptionStorageInputOutput16 &>(cap);
		options.storage_16bit_input_output = storage.supported;
		break;
	}

	case Option::DescriptorQA:
	{
		auto &qa = static_cast<const OptionDescriptorQA &>(cap);
		options.descriptor_qa_enabled = qa.enabled;
		options.descriptor_qa.version = qa.version;
		options.descriptor_qa.shader_hash = qa.shader_hash;
		options.descriptor_qa.global_desc_set = qa.global_desc_set;
		options.descriptor_qa.global_binding = qa.global_binding;
		options.descriptor_qa.heap_desc_set = qa.heap_desc_set;
		options.descriptor_qa.heap_binding = qa.heap_binding;
		break;
	}

	case Option::MinPrecisionNative16Bit:
	{
		auto &minprec = static_cast<const OptionMinPrecisionNative16Bit &>(cap);
		options.min_precision_prefer_native_16bit = minprec.enabled;
		break;
	}

	case Option::ShaderI8Dot:
		options.shader_i8_dot_enabled = static_cast<const OptionShaderI8Dot &>(cap).supported;
		break;

	case Option::ShaderRayTracingPrimitiveCulling:
		options.ray_tracing_primitive_culling_enabled =
		    static_cast<const OptionShaderRayTracingPrimitiveCulling &>(cap).supported;
		break;

	case Option::InvariantPosition:
		options.invariant_position = static_cast<const OptionInvariantPosition &>(cap).enabled;
		break;

	case Option::ScalarBlockLayout:
		options.scalar_block_layout =
		    static_cast<const OptionScalarBlockLayout &>(cap).supported;
		options.supports_per_component_robustness =
		    static_cast<const OptionScalarBlockLayout &>(cap).supports_per_component_robustness;
		break;

	case Option::BarycentricKHR:
		options.khr_barycentrics_enabled = static_cast<const OptionBarycentricKHR &>(cap).supported;
		break;

	case Option::RobustPhysicalCBVLoad:
		// Obsolete option, use normal quirks instead.
		options.quirks.robust_physical_cbv = static_cast<const OptionRobustPhysicalCBVLoad &>(cap).enabled;
		break;

	case Option::ArithmeticRelaxedPrecision:
		options.arithmetic_relaxed_precision = static_cast<const OptionArithmeticRelaxedPrecision &>(cap).enabled;
		break;

	case Option::PhysicalAddressDescriptorIndexing:
		options.physical_address_descriptor_stride =
		    static_cast<const OptionPhysicalAddressDescriptorIndexing &>(cap).element_stride;
		options.physical_address_descriptor_offset =
			static_cast<const OptionPhysicalAddressDescriptorIndexing &>(cap).element_offset;
		break;

	case Option::ForceSubgroupSize:
		options.force_subgroup_size =
			static_cast<const OptionForceSubgroupSize &>(cap).forced_value;
		options.force_wave_size_enable =
			static_cast<const OptionForceSubgroupSize &>(cap).wave_size_enable;
		break;

	case Option::DenormPreserveSupport:
		options.supports_float16_denorm_preserve =
		    static_cast<const OptionDenormPreserveSupport &>(cap).support_float16_denorm_preserve;
		options.supports_float64_denorm_preserve =
		    static_cast<const OptionDenormPreserveSupport &>(cap).support_float64_denorm_preserve;
		break;

	case Option::StrictHelperLaneWaveOps:
		options.strict_helper_lane_waveops =
		    static_cast<const OptionStrictHelperLaneWaveOps &>(cap).enable;
		break;

	case Option::SubgroupPartitionedNV:
		options.nv_subgroup_partition_enabled =
		    static_cast<const OptionSubgroupPartitionedNV &>(cap).supported;
		break;

	case Option::DeadCodeEliminate:
		options.eliminate_dead_code =
			static_cast<const OptionDeadCodeEliminate &>(cap).enabled;
		break;

	case Option::PreciseControl:
		options.propagate_precise =
		    static_cast<const OptionPreciseControl &>(cap).propagate_precise;
		options.force_precise =
		    static_cast<const OptionPreciseControl &>(cap).force_precise;
		break;

	case Option::SampleGradOptimizationControl:
		options.grad_opt.enabled =
		    static_cast<const OptionSampleGradOptimizationControl &>(cap).enabled;
		options.grad_opt.assume_uniform_scale =
			static_cast<const OptionSampleGradOptimizationControl &>(cap).assume_uniform_scale;
		break;

	case Option::OpacityMicromap:
		options.opacity_micromap_enabled =
		    static_cast<const OptionOpacityMicromap &>(cap).enabled;
		break;

	case Option::BranchControl:
	{
		auto &c = static_cast<const OptionBranchControl &>(cap);
		options.branch_control.use_shader_metadata = c.use_shader_metadata;
		options.branch_control.force_branch = c.force_branch;
		options.branch_control.force_unroll = c.force_unroll;
		options.branch_control.force_loop = c.force_loop;
		options.branch_control.force_flatten = c.force_flatten;
		break;
	}

	case Option::SubgroupProperties:
	{
		auto &c = static_cast<const OptionSubgroupProperties &>(cap);
		options.subgroup_size.implementation_minimum = c.minimum_size;
		options.subgroup_size.implementation_maximum = c.maximum_size;
		break;
	}

	case Option::DescriptorHeapRobustness:
	{
		auto &c = static_cast<const OptionDescriptorHeapRobustness &>(cap);
		options.descriptor_heap_robustness = c.enabled;
		break;
	}

	case Option::ComputeShaderDerivativesNV:
	{
		auto &c = static_cast<const OptionComputeShaderDerivativesNV &>(cap);
		options.compute_shader_derivatives = c.supported;
		break;
	}

	case Option::QuadControlReconvergence:
	{
		auto &c = static_cast<const OptionQuadControlReconvergence &>(cap);
		options.supports_quad_control = c.supports_quad_control;
		options.supports_maximal_reconvergence = c.supports_maximal_reconvergence;
		options.force_maximal_reconvergence = c.force_maximal_reconvergence;
		break;
	}

	case Option::RawAccessChainsNV:
	{
		auto &c = static_cast<const OptionRawAccessChainsNV &>(cap);
		options.nv_raw_access_chains = c.supported;
		break;
	}

	case Option::DriverVersion:
	{
		auto &c = static_cast<const OptionDriverVersion &>(cap);
		options.driver_id = c.driver_id;
		options.driver_version = c.driver_version;
		break;
	}

	case Option::ComputeShaderDerivatives:
	{
		auto &c = static_cast<const OptionComputeShaderDerivatives &>(cap);
		options.compute_shader_derivatives = c.supports_nv || c.supports_khr;
		options.compute_shader_derivatives_khr = c.supports_khr;
		break;
	}

	case Option::InstructionInstrumentation:
	{
		auto &qa = static_cast<const OptionInstructionInstrumentation &>(cap);
		options.instruction_instrumentation.enabled = qa.enabled;
		options.instruction_instrumentation.version = qa.version;
		options.instruction_instrumentation.shader_hash = qa.shader_hash;
		options.instruction_instrumentation.fp16 = false;
		options.instruction_instrumentation.fp32 = true;
		options.instruction_instrumentation.fp64 = true;
		options.instruction_instrumentation.type = qa.type;
		options.instruction_instrumentation.control_desc_set = qa.control_desc_set;
		options.instruction_instrumentation.control_binding = qa.control_binding;
		options.instruction_instrumentation.payload_desc_set = qa.payload_desc_set;
		options.instruction_instrumentation.payload_binding = qa.payload_binding;
		break;
	}

	case Option::ShaderQuirk:
	{
		auto &quirk = static_cast<const OptionShaderQuirk &>(cap);
		switch (quirk.quirk)
		{
		case ShaderQuirk::ForceDeviceMemoryBarriersThreadGroupCoherence:
			// Dragon Age: Veilguard workaround.
			options.quirks.force_device_memory_barriers_thread_group_coherence = true;
			break;

		case ShaderQuirk::AssumeBrokenSub8x8CubeMips:
			// The First Descendant workaround. Importance sampling pass is broken since only mips down to 8x8
			// are populated with valid data.
			options.quirks.assume_broken_sub_8x8_cube_mips = true;
			break;

		case ShaderQuirk::RobustPhysicalCBVForwarding:
			// Gray Zone Warfare workaround. Does CBV forwarding with out of bounds access on the local array <_<.
			// Can trip page faults.
			options.quirks.robust_physical_cbv_forwarding = true;
			break;

		case ShaderQuirk::MeshOutputRobustness:
			options.quirks.mesh_outputs_bounds_check = true;
			break;

		case ShaderQuirk::AggressiveNonUniform:
			// Starfield workaround. Some shaders should have used nonuniform,
			// but the general pattern to detect it is quite complicated.
			options.quirks.aggressive_nonuniform = true;
			break;

		case ShaderQuirk::RobustPhysicalCBV:
			options.quirks.robust_physical_cbv = true;
			break;

		case ShaderQuirk::PromoteGroupToDeviceMemoryBarrier:
			options.quirks.promote_group_to_device_memory_barrier = true;
			break;

		case ShaderQuirk::GroupSharedAutoBarrier:
			options.quirks.group_shared_auto_barrier = true;
			break;

		case ShaderQuirk::FixupLoopHeaderUndefPhis:
			options.quirks.fixup_loop_header_undef_phis = true;
			break;

		case ShaderQuirk::FixupRsqrtInfNan:
			options.quirks.fixup_rsqrt = true;
			break;

		case ShaderQuirk::IgnorePrimitiveShadingRate:
			options.quirks.ignore_primitive_shading_rate = true;
			break;

		case ShaderQuirk::RobustComputeQuadBroadcast:
			options.quirks.robust_compute_quad_broadcast = true;
			break;

		case ShaderQuirk::PreciseFMA:
			options.quirks.precise_fma = true;
			break;

		default:
			break;
		}
		break;
	}

	case Option::ExtendedRobustness:
	{
		auto &robust = static_cast<const OptionExtendedRobustness &>(cap);
		options.extended_robustness.alloca = robust.robust_alloca;
		options.extended_robustness.constant_lut = robust.robust_constant_lut;
		options.extended_robustness.group_shared = robust.robust_group_shared;
		break;
	}

	case Option::MaxTessFactor:
	{
		auto &tess_factor = static_cast<const OptionMaxTessFactor &>(cap);
		options.max_tess_factor = tess_factor.max_tess_factor;
		break;
	}

	case Option::VulkanMemoryModel:
	{
		auto &vmm = static_cast<const OptionVulkanMemoryModel &>(cap);
		execution_mode_meta.memory_model = vmm.enabled ? spv::MemoryModelVulkan : spv::MemoryModelGLSL450;
		break;
	}

	case Option::Float8Support:
	{
		auto &float8 = static_cast<const OptionFloat8Support &>(cap);
		options.wmma_fp8 = float8.wmma_fp8;
		options.nv_cooperative_matrix2_conversions = float8.nv_cooperative_matrix2_conversions;
		break;
	}

	case Option::NvAPI:
	{
		auto &nv = static_cast<const OptionNvAPI &>(cap);
		options.nvapi.enabled = nv.enabled;
		options.nvapi.register_index = nv.register_index;
		options.nvapi.register_space = nv.register_space;
		break;
	}

	case Option::ExtendedNonSemantic:
	{
		auto &sem = static_cast<const OptionExtendedNonSemantic &>(cap);
		options.extended_non_semantic_info = sem.enabled;
		break;
	}

	case Option::ViewInstancing:
	{
		auto &inst = static_cast<const OptionViewInstancing &>(cap);
		options.multiview.enable = inst.enabled;
		options.multiview.last_pre_rasterization_stage = inst.last_pre_rasterization_stage;
		options.multiview.view_index_to_view_instance_spec_id = inst.view_index_to_view_instance_spec_id;
		options.multiview.view_instance_to_viewport_spec_id = inst.view_instance_to_viewport_spec_id;
		break;
	}

	case Option::MixedDotProduct:
	{
		auto &dot = static_cast<const OptionMixedDotProduct &>(cap);
		options.mixed_dot_product_fp16_fp16_fp32 = dot.fp16_fp16_fp32;
		break;
	}

	case Option::ComputeShaderDerivativesQuad:
	{
		auto &c = static_cast<const OptionComputeShaderDerivativesQuad &>(cap);
		options.compute_shader_derivatives_quad = c.supports_quad;
		break;
	}

	default:
		break;
	}
}

void Converter::Impl::suggest_maximum_wave_size(unsigned wave_size)
{
	if ((execution_mode_meta.heuristic_max_wave_size == 0 ||
	     execution_mode_meta.heuristic_max_wave_size > wave_size) &&
	    options.force_subgroup_size == 0)
	{
		execution_mode_meta.heuristic_max_wave_size = wave_size;
	}
}

void Converter::Impl::suggest_minimum_wave_size(unsigned wave_size)
{
	if ((execution_mode_meta.heuristic_min_wave_size == 0 ||
	     execution_mode_meta.heuristic_min_wave_size < wave_size) &&
	    options.force_subgroup_size == 0)
	{
		execution_mode_meta.heuristic_min_wave_size = wave_size;
	}
}

void Converter::set_resource_remapping_interface(ResourceRemappingInterface *iface)
{
	impl->resource_mapping_iface = iface;
}

void Converter::set_meta_descriptor(MetaDescriptor desc, MetaDescriptorKind kind, uint32_t desc_set, uint32_t binding)
{
	if (int(desc) >= int(MetaDescriptor::Count))
		return;
	impl->options.meta_descriptor_mappings[int(desc)] = { kind, desc_set, binding };
}

ShaderStage Converter::get_shader_stage(const LLVMBCParser &bitcode_parser, const char *entry)
{
	auto &module = bitcode_parser.get_module();
	return Impl::get_remapping_stage(get_execution_model(module, get_entry_point_meta(module, entry)));
}

void Converter::scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &bitcode_parser)
{
	Impl::scan_resources(iface, bitcode_parser);
}

void Converter::add_option(const OptionBase &cap)
{
	impl->set_option(cap);
}

bool Converter::recognizes_option(Option cap)
{
	return unsigned(cap) < unsigned(Option::Count);
}

void Converter::set_entry_point(const char *entry)
{
	impl->options.entry_point = entry;
}

void Converter::add_root_parameter_mapping(uint32_t root_parameter_index, uint32_t offset)
{
	impl->root_parameter_mappings.push_back({ root_parameter_index, offset });
}

uint32_t Converter::pack_desc_set_binding_to_virtual_offset(uint32_t desc_set, uint32_t binding)
{
    return 0x80000000u | (desc_set << 24) | binding;
}

void Converter::add_non_semantic_debug_info(const NonSemanticDebugInfo &info)
{
	impl->non_semantic_debug_info.push_back(info);
}

const String &Converter::get_compiled_entry_point() const
{
	return impl->execution_mode_meta.entry_point_name;
}

const GlobalConfiguration &GlobalConfiguration::get()
{
	static GlobalConfiguration config;
	return config;
}

GlobalConfiguration::GlobalConfiguration()
{
	const char *env = getenv("DXIL_SPIRV_CONFIG");
	if (env)
	{
		if (strcmp(env, "wmma_rdna3_workaround") == 0)
			wmma_rdna3_workaround = true;
		else if (strcmp(env, "wmma_conv_hack") == 0)
			wmma_conv_hack = true;
	}
}
} // namespace dxil_spv


================================================
FILE: dxil_converter.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "cfg_structurizer.hpp"
#include "dxil_parser.hpp"
#include "llvm_bitcode_parser.hpp"
#include "node_pool.hpp"
#include "spirv_module.hpp"
#include <memory>

namespace spv
{
class Function;
}

namespace dxil_spv
{
struct ConvertedFunction
{
	struct Function
	{
		CFGNode *entry;
		spv::Function *func;
		bool is_structured;
	};
	Function entry = {};
	Vector<Function> leaf_functions;
	std::unique_ptr<CFGNodePool> node_pool;
};

enum class ShaderStage : unsigned
{
	Unknown = 0,
	Vertex = 1,
	Hull = 2,
	Domain = 3,
	Geometry = 4,
	Pixel = 5,
	Compute = 6,
	Intersection = 7,
	ClosestHit = 8,
	Miss = 9,
	AnyHit = 10,
	RayGeneration = 11,
	Callable = 12,
	Amplification = 13,
	Mesh = 14,
};

struct D3DBinding
{
	ShaderStage stage;
	DXIL::ResourceKind kind;

	// The index in which the resource was declared in the module.
	// Range is [0, N), where N is number of resources.
	unsigned resource_index;

	// : register(N, spaceM)
	// If register(UINT32_MAX, UINT32_MAX) with range_size = UINT32_MAX is used, this is treated
	// as a binding of the global descriptor heap.
	unsigned register_space;
	unsigned register_index;

	// -1 -> unsized, 1 means non-arrayed resource.
	unsigned range_size;

	// For raw buffers, this is equal to 16, for structured buffers this is equal to the stride of the elements.
	// Otherwise, 0.
	unsigned alignment;
};

enum class VulkanDescriptorType : unsigned
{
	Identity = 0,
	SSBO = 1,
	TexelBuffer = 2,
	BufferDeviceAddress = 3,
	UBO = 4,
	InputAttachment = 5
};

struct VulkanBinding
{
	unsigned descriptor_set;
	unsigned binding;

	// For bindless, refers to the Nth root constant.
	// For buffer device address, refers to the Nth root descriptor.
	union
	{
		unsigned root_constant_index;
		unsigned input_attachment_index;
	};

	struct
	{
		unsigned heap_root_offset;

		// If true, the resource is accessed directly from a descriptor heap in way which emulates D3D12 closely.
		// layout(set = descriptor_set, binding = binding) uniform Type HEAP[];
		// HEAP[shader_index + heap_offset + registers.u32s[push_constant_member]].
		bool use_heap;
	} bindless;

	VulkanDescriptorType descriptor_type;
};

struct D3DUAVBinding
{
	D3DBinding binding;
	bool counter;
};

struct VulkanSRVBinding
{
	VulkanBinding buffer_binding;
	VulkanBinding offset_binding;
};

struct VulkanUAVBinding
{
	VulkanBinding buffer_binding;
	VulkanBinding counter_binding;
	VulkanBinding offset_binding;
};

struct VulkanPushConstantBinding
{
	unsigned offset_in_words;
};

struct VulkanCBVBinding
{
	union
	{
		VulkanBinding buffer;
		VulkanPushConstantBinding push;
	};

	// Select if the CBV should fetch constants from push constants, or regular UBO.
	bool push_constant;
};

struct D3DStageIO
{
	const char *semantic;
	unsigned semantic_index;
	unsigned start_row;
	unsigned rows;
};

enum VulkanStageIoFlagBits
{
	STAGE_IO_NONE = 0u,
	STAGE_IO_PER_PRIMITIVE = 0x1u,
};

using VulkanStageIoFlags = unsigned;

struct VulkanStageIO
{
	unsigned location;
	unsigned component;
	VulkanStageIoFlags flags;
};

struct D3DStreamOutput
{
	const char *semantic;
	unsigned semantic_index;
};

struct VulkanStreamOutput
{
	unsigned offset;
	unsigned stride;
	unsigned buffer_index;
	bool enable;
};

class ResourceRemappingInterface
{
public:
	virtual ~ResourceRemappingInterface() = default;
	virtual bool remap_srv(const D3DBinding &d3d_binding, VulkanSRVBinding &vulkan_binding) = 0;
	virtual bool remap_sampler(const D3DBinding &d3d_binding, VulkanBinding &vulkan_binding) = 0;
	virtual bool remap_uav(const D3DUAVBinding &d3d_binding, VulkanUAVBinding &vulkan_binding) = 0;
	virtual bool remap_cbv(const D3DBinding &d3d_binding, VulkanCBVBinding &vulkan_binding) = 0;
	virtual bool remap_vertex_input(const D3DStageIO &d3d_input, VulkanStageIO &vulkan_location) = 0;
	virtual bool remap_stream_output(const D3DStreamOutput &d3d_output, VulkanStreamOutput &vulkan_output) = 0;
	virtual bool remap_stage_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) = 0;
	virtual bool remap_stage_output(const D3DStageIO &d3d_output, VulkanStageIO &vk_output) = 0;
	virtual unsigned get_root_constant_word_count() = 0;
	virtual unsigned get_root_descriptor_count() = 0;
	virtual bool has_nontrivial_stage_input_remapping() = 0;
};

enum class Option : uint32_t
{
	Invalid = 0,
	ShaderDemoteToHelper = 1,
	DualSourceBlending = 2,
	OutputSwizzle = 3,
	RasterizerSampleCount = 4,
	RootConstantInlineUniformBlock = 5,
	BindlessCBVSSBOEmulation = 6,
	PhysicalStorageBuffer = 7,
	SBTDescriptorSizeLog2 = 8,
	SSBOAlignment = 9,
	TypedUAVReadWithoutFormat = 10,
	ShaderSourceFile = 11,
	BindlessTypedBufferOffsets = 12,
	BindlessOffsetBufferLayout = 13,
	StorageInputOutput16 = 14,
	DescriptorQA = 15,
	MinPrecisionNative16Bit = 16,
	ShaderI8Dot = 17,
	ShaderRayTracingPrimitiveCulling = 18,
	InvariantPosition = 19,
	ScalarBlockLayout = 20,
	BarycentricKHR = 21,
	RobustPhysicalCBVLoad = 22,
	ArithmeticRelaxedPrecision = 23,
	PhysicalAddressDescriptorIndexing = 24,
	ForceSubgroupSize = 25,
	DenormPreserveSupport = 26,
	StrictHelperLaneWaveOps = 27,
	SubgroupPartitionedNV = 28,
	DeadCodeEliminate = 29,
	PreciseControl = 30,
	SampleGradOptimizationControl = 31,
	OpacityMicromap = 32,
	BranchControl = 33,
	SubgroupProperties = 34,
	DescriptorHeapRobustness = 35,
	ComputeShaderDerivativesNV = 36,
	QuadControlReconvergence = 37,
	RawAccessChainsNV = 38,
	DriverVersion = 39,
	ComputeShaderDerivatives = 40,
	InstructionInstrumentation = 41,
	ShaderQuirk = 42,
	ExtendedRobustness = 43,
	MaxTessFactor = 44,
	VulkanMemoryModel = 45,
	Float8Support = 46,
	NvAPI = 47,
	ExtendedNonSemantic = 48,
	ViewInstancing = 49,
	MixedDotProduct = 50,
	ComputeShaderDerivativesQuad = 51,
	Count
};

enum class ResourceClass : uint32_t
{
	SRV = 0,
	UAV = 1,
	CBV = 2,
	Sampler = 3
};

struct OptionBase
{
	explicit OptionBase(Option cap)
	    : type(cap)
	{
	}

	Option type;
	DXIL_SPV_OVERRIDE_NEW_DELETE
};

struct OptionShaderDemoteToHelper : OptionBase
{
	OptionShaderDemoteToHelper()
	    : OptionBase(Option::ShaderDemoteToHelper)
	{
	}
	bool supported = false;
};

struct OptionDualSourceBlending : OptionBase
{
	OptionDualSourceBlending()
	    : OptionBase(Option::DualSourceBlending)
	{
	}
	bool enabled = false;
};

struct OptionOutputSwizzle : OptionBase
{
	OptionOutputSwizzle()
	    : OptionBase(Option::OutputSwizzle)
	{
	}
	const unsigned *swizzles = nullptr;
	unsigned swizzle_count = 0;
};

struct OptionRasterizerSampleCount : OptionBase
{
	OptionRasterizerSampleCount()
	    : OptionBase(Option::RasterizerSampleCount)
	{
	}
	unsigned count = 0;
	bool spec_constant = false;
};

struct OptionRootConstantInlineUniformBlock : OptionBase
{
	OptionRootConstantInlineUniformBlock()
	    : OptionBase(Option::RootConstantInlineUniformBlock)
	{
	}
	unsigned desc_set = 0;
	unsigned binding = 0;
	bool enable = false;
};

struct OptionBindlessCBVSSBOEmulation : OptionBase
{
	OptionBindlessCBVSSBOEmulation()
	    : OptionBase(Option::BindlessCBVSSBOEmulation)
	{
	}
	bool enable = false;
};

struct OptionPhysicalStorageBuffer : OptionBase
{
	OptionPhysicalStorageBuffer()
	    : OptionBase(Option::PhysicalStorageBuffer)
	{
	}
	bool enable = false;
};

struct OptionSBTDescriptorSizeLog2 : OptionBase
{
	OptionSBTDescriptorSizeLog2()
		: OptionBase(Option::SBTDescriptorSizeLog2)
	{
	}
	unsigned size_log2_srv_uav_cbv = 0;
	unsigned size_log2_sampler = 0;
};

struct OptionSSBOAlignment : OptionBase
{
	OptionSSBOAlignment()
		: OptionBase(Option::SSBOAlignment)
	{
	}
	unsigned alignment = 1;
};

struct OptionTypedUAVReadWithoutFormat : OptionBase
{
	OptionTypedUAVReadWithoutFormat()
		: OptionBase(Option::TypedUAVReadWithoutFormat)
	{
	}
	bool supported = false;
};

struct OptionShaderSourceFile : OptionBase
{
	OptionShaderSourceFile()
		: OptionBase(Option::ShaderSourceFile)
	{
	}
	String name;
};

struct OptionBindlessTypedBufferOffsets : OptionBase
{
	OptionBindlessTypedBufferOffsets()
		: OptionBase(Option::BindlessTypedBufferOffsets)
	{
	}
	bool enable = false;
};

struct OptionBindlessOffsetBufferLayout : OptionBase
{
	OptionBindlessOffsetBufferLayout()
		: OptionBase(Option::BindlessOffsetBufferLayout)
	{
	}

	unsigned untyped_offset = 0;
	unsigned typed_offset = 0;
	unsigned stride = 1;
};

struct OptionStorageInputOutput16 : OptionBase
{
	OptionStorageInputOutput16()
		: OptionBase(Option::StorageInputOutput16)
	{
	}

	bool supported = true;
};

struct OptionDescriptorQA : OptionBase
{
	OptionDescriptorQA()
		: OptionBase(Option::DescriptorQA)
	{
	}

	enum { DefaultVersion = 1 };

	bool enabled = false;
	uint32_t version = DefaultVersion;
	uint32_t global_desc_set = 0;
	uint32_t global_binding = 0;
	uint32_t heap_desc_set = 0;
	uint32_t heap_binding = 0;
	uint64_t shader_hash = 0;
};

struct OptionMinPrecisionNative16Bit : OptionBase
{
	OptionMinPrecisionNative16Bit()
		: OptionBase(Option::MinPrecisionNative16Bit)
	{
	}

	bool enabled = false;
};

struct OptionShaderI8Dot : OptionBase
{
	OptionShaderI8Dot()
		: OptionBase(Option::ShaderI8Dot)
	{
	}

	bool supported = false;
};

struct OptionShaderRayTracingPrimitiveCulling : OptionBase
{
	OptionShaderRayTracingPrimitiveCulling()
		: OptionBase(Option::ShaderRayTracingPrimitiveCulling)
	{
	}

	bool supported = false;
};

struct OptionInvariantPosition : OptionBase
{
	OptionInvariantPosition()
		: OptionBase(Option::InvariantPosition)
	{
	}

	bool enabled = false;
};

struct OptionScalarBlockLayout : OptionBase
{
	OptionScalarBlockLayout()
		: OptionBase(Option::ScalarBlockLayout)
	{
	}

	bool supported = false;
	bool supports_per_component_robustness = false;
};

struct OptionBarycentricKHR : OptionBase
{
	OptionBarycentricKHR()
		: OptionBase(Option::BarycentricKHR)
	{
	}

	bool supported = false;
};

struct OptionRobustPhysicalCBVLoad : OptionBase
{
	OptionRobustPhysicalCBVLoad()
		: OptionBase(Option::RobustPhysicalCBVLoad)
	{
	}

	bool enabled = false;
};

struct OptionArithmeticRelaxedPrecision : OptionBase
{
	OptionArithmeticRelaxedPrecision()
		: OptionBase(Option::ArithmeticRelaxedPrecision)
	{
	}

	bool enabled = false;
};

struct OptionPhysicalAddressDescriptorIndexing : OptionBase
{
	OptionPhysicalAddressDescriptorIndexing()
	    : OptionBase(Option::PhysicalAddressDescriptorIndexing)
	{
	}

	// In units of uint64_t addresses.
	// Used for scenarios where a descriptor buffer is bound directly as an SSBO
	// and raw VAs might not longer be tightly packed in memory.
	unsigned element_stride = 1;
	unsigned element_offset = 0;
};

struct OptionForceSubgroupSize : OptionBase
{
	OptionForceSubgroupSize()
	    : OptionBase(Option::ForceSubgroupSize)
	{
	}

	// If not 0, forces WaveGetLaneCount() to return a fixed value.
	// Can be used to force a shader to avoid buggy code paths.
	unsigned forced_value = 0;
	// If true and forced_value is not 0,
	// pretends that the shader exposes SM 6.6 WaveSize equal to forced_value.
	// Intended use case:
	// - false: Workaround and avoid bad subgroup code paths by setting forced_value to something meaningless.
	// - true: Performance tweaks. Force e.g. wave32 vs wave64 on RDNA.
	bool wave_size_enable = false;
};

struct OptionDenormPreserveSupport : OptionBase
{
	OptionDenormPreserveSupport()
		: OptionBase(Option::DenormPreserveSupport)
	{
	}

	// Should always be set to true if supported.
	// If not supported, rely on implementation to default to the right thing.
	bool support_float16_denorm_preserve = false;
	bool support_float64_denorm_preserve = false;
};

struct OptionStrictHelperLaneWaveOps : OptionBase
{
	OptionStrictHelperLaneWaveOps()
	    : OptionBase(Option::StrictHelperLaneWaveOps)
	{
	}

	// If true, and WaveOpsIncludeHelperLanes is not set,
	// helper lanes explicitly do not participate in wave ops.
	bool enable = true;
};

struct OptionSubgroupPartitionedNV : OptionBase
{
	OptionSubgroupPartitionedNV()
		: OptionBase(Option::SubgroupPartitionedNV)
	{
	}

	bool supported = false;
};

struct OptionDeadCodeEliminate : OptionBase
{
	OptionDeadCodeEliminate()
		: OptionBase(Option::DeadCodeEliminate)
	{
	}

	bool enabled = false;
};

struct OptionPreciseControl : OptionBase
{
	OptionPreciseControl()
		: OptionBase(Option::PreciseControl)
	{
	}

	bool force_precise = false;
	bool propagate_precise = false;
};

struct OptionSampleGradOptimizationControl : OptionBase
{
	OptionSampleGradOptimizationControl()
	    : OptionBase(Option::SampleGradOptimizationControl)
	{
	}

	bool enabled = false;
	bool assume_uniform_scale = false;
};

struct OptionOpacityMicromap : OptionBase
{
	OptionOpacityMicromap()
		: OptionBase(Option::OpacityMicromap)
	{
	}

	bool enabled = false;
};

struct OptionBranchControl : OptionBase
{
	OptionBranchControl()
		: OptionBase(Option::BranchControl)
	{
	}

	bool use_shader_metadata = false;
	bool force_unroll = false;
	bool force_loop = false;
	bool force_flatten = false;
	bool force_branch = false;
};

struct OptionSubgroupProperties : OptionBase
{
	OptionSubgroupProperties()
		: OptionBase(Option::SubgroupProperties)
	{
	}

	unsigned minimum_size = 4;
	unsigned maximum_size = 128;
};

struct OptionDescriptorHeapRobustness : OptionBase
{
	OptionDescriptorHeapRobustness()
		: OptionBase(Option::DescriptorHeapRobustness)
	{
	}

	bool enabled = false;
};

struct OptionComputeShaderDerivativesNV : OptionBase
{
	OptionComputeShaderDerivativesNV()
		: OptionBase(Option::ComputeShaderDerivativesNV)
	{
	}

	// Before adding this option, we assumed that by default.
	bool supported = true;
};

struct OptionQuadControlReconvergence : OptionBase
{
	OptionQuadControlReconvergence()
		: OptionBase(Option::QuadControlReconvergence)
	{
	}

	bool supports_quad_control = false;
	bool supports_maximal_reconvergence = false;
	bool force_maximal_reconvergence = false;
};

struct OptionRawAccessChainsNV : OptionBase
{
	OptionRawAccessChainsNV()
		: OptionBase(Option::RawAccessChainsNV)
	{
	}

	bool supported = false;
};

struct OptionDriverVersion : OptionBase
{
	OptionDriverVersion()
		: OptionBase(Option::DriverVersion)
	{
	}

	uint32_t driver_id = 0; // Vulkan12Properties::driverID
	uint32_t driver_version = 0; // PhysicalDeviceProperties::driverVersion
};

struct OptionComputeShaderDerivatives : OptionBase
{
	OptionComputeShaderDerivatives()
		: OptionBase(Option::ComputeShaderDerivatives)
	{
	}

	bool supports_nv = false;
	bool supports_khr = false;
};

struct OptionInstructionInstrumentation : OptionBase
{
	OptionInstructionInstrumentation()
	    : OptionBase(Option::InstructionInstrumentation)
	{
	}

	enum { DefaultVersion = 1 };

	bool enabled = false;
	uint32_t version = DefaultVersion;
	uint32_t control_desc_set = 0;
	uint32_t control_binding = 0;
	uint32_t payload_desc_set = 0;
	uint32_t payload_binding = 0;
	uint64_t shader_hash = 0;
	InstructionInstrumentationType type = {};
};

enum class ShaderQuirk : uint32_t
{
	None = 0,
	ForceDeviceMemoryBarriersThreadGroupCoherence,
	AssumeBrokenSub8x8CubeMips,
	RobustPhysicalCBVForwarding,
	MeshOutputRobustness,
	AggressiveNonUniform,
	RobustPhysicalCBV,
	PromoteGroupToDeviceMemoryBarrier,
	GroupSharedAutoBarrier,
	FixupLoopHeaderUndefPhis,
	FixupRsqrtInfNan,
	IgnorePrimitiveShadingRate,
	RobustComputeQuadBroadcast,
	PreciseFMA
};

struct OptionShaderQuirk : OptionBase
{
	OptionShaderQuirk()
	    : OptionBase(Option::ShaderQuirk)
	{
	}

	ShaderQuirk quirk = ShaderQuirk::None;
};

struct OptionExtendedRobustness : OptionBase
{
	OptionExtendedRobustness()
		: OptionBase(Option::ExtendedRobustness)
	{
	}

	bool robust_group_shared = false;
	bool robust_alloca = false;
	bool robust_constant_lut = false;
};

struct OptionMaxTessFactor : OptionBase
{
	OptionMaxTessFactor()
		: OptionBase(Option::MaxTessFactor)
	{
	}

	unsigned max_tess_factor = 0;
};

struct OptionVulkanMemoryModel : OptionBase
{
	OptionVulkanMemoryModel()
	    : OptionBase(Option::VulkanMemoryModel)
	{
	}

	bool enabled = false;
};

struct OptionFloat8Support : OptionBase
{
	OptionFloat8Support()
	    : OptionBase(Option::Float8Support)
	{
	}

	bool wmma_fp8 = false;
	bool nv_cooperative_matrix2_conversions = false;
};

struct OptionNvAPI : OptionBase
{
	OptionNvAPI()
		: OptionBase(Option::NvAPI)
	{
	}

	bool enabled = false;
	unsigned register_index = 0;
	unsigned register_space = 0;
};

struct OptionExtendedNonSemantic : OptionBase
{
	OptionExtendedNonSemantic()
		: OptionBase(Option::ExtendedNonSemantic)
	{
	}

	bool enabled = false;
};

struct OptionViewInstancing : OptionBase
{
	OptionViewInstancing()
		: OptionBase(Option::ViewInstancing)
	{
	}

	bool enabled = false;
	bool implicit_viewport_offset = false;
	bool last_pre_rasterization_stage = false;
	uint32_t view_index_to_view_instance_spec_id = UINT32_MAX;
	uint32_t view_instance_to_viewport_spec_id = UINT32_MAX;
};

struct OptionMixedDotProduct : OptionBase
{
	OptionMixedDotProduct()
		: OptionBase(Option::MixedDotProduct)
	{
	}

	bool fp16_fp16_fp32 = false;
};

struct OptionComputeShaderDerivativesQuad : OptionBase
{
	OptionComputeShaderDerivativesQuad()
		: OptionBase(Option::ComputeShaderDerivativesQuad)
	{
	}

	bool supports_quad = false;
};

struct DescriptorTableEntry
{
	ResourceClass type;
	uint32_t register_space;
	uint32_t register_index;
	uint32_t num_descriptors_in_range;
	uint32_t offset_in_heap;
};

struct NodeDispatchGrid
{
	uint32_t offset;
	DXIL::ComponentType component_type;
	uint32_t count;
};

struct NodeInputData
{
	String node_id;
	uint32_t payload_stride;
	DXIL::NodeLaunchType launch_type;
	uint32_t node_array_index;
	NodeDispatchGrid grid_buffer;
	uint32_t broadcast_grid[3];
	uint32_t thread_group_size_spec_id[3];
	uint32_t max_broadcast_grid_spec_id[3];
	uint32_t recursion_factor;
	uint32_t coalesce_factor;
	String node_share_input_id;
	uint32_t node_share_input_array_index;
	uint32_t local_root_arguments_table_index;
	uint32_t is_indirect_bda_stride_program_entry_spec_id;
	uint32_t is_entry_point_spec_id;
	uint32_t dispatch_grid_is_upper_bound_spec_id;
	uint32_t is_static_broadcast_node_spec_id;
	bool dispatch_grid_is_upper_bound;
	bool node_track_rw_input_sharing;
	bool is_program_entry;
};

struct NodeOutputData
{
	String node_id;
	uint32_t node_array_index;
	uint32_t node_array_size;
	uint32_t node_index_spec_constant_id;
	uint32_t max_records;
	bool sparse_array;
};

struct NonSemanticDebugInfo
{
	const char *tag;
	const void *data;
	size_t size;
};

enum class ShaderFeature
{
	Native16BitOperations = 0,
	Count
};

enum class MetaDescriptor
{
	// u32 containing number of descriptors in CBV_SRV_UAV heap.
	// Must be UBOContainingConstant.
	ResourceDescriptorHeapSize = 0,
	// A BDA pointing to first descriptor payload in resource heap.
	// May point to real descriptors, or only UAV counters depending on driver needs.
	// Stride / offset of pointer is determined by Option::PhysicalAddressDescriptorIndexing.
	// Must be UBOContainingBDA or ReadonlySSBO.
	RawDescriptorHeapView = 1,
	// - u16 ViewID;
	// - u16 LayerOffset;
	// Packed into one u32.
	// Must be UBOContainingConstant.
	DynamicViewInstancingOffsets = 2,
	// - u32 ActiveViewIDMask
	// Must be UBOContainingConstant.
	DynamicViewInstancingMask = 3,
	Count
};

enum class MetaDescriptorKind
{
	Invalid,

	// Currently unused, could be extended as needed.
	PushConstant,
	PushBDA,

	// An UBO containing plain constants.
	// May not be backed by a real descriptor, and be hoisted through some special mechanism.
	UBOContainingConstant,
	// An UBO containing a BDA. May not be backed by a real descriptor.
	// May not be backed by a real descriptor, and be hoisted through some special mechanism.
	UBOContainingBDA,

	// An SSBO backed by a real descriptor, i.e. OpArrayLength is valid.
	ReadonlySSBO
};

class Converter
{
public:
	Converter(LLVMBCParser &bitcode_parser, LLVMBCParser *bitcode_reflection_parser, SPIRVModule &module);
	~Converter();
	ConvertedFunction convert_entry_point();
	void set_resource_remapping_interface(ResourceRemappingInterface *iface);

	static ShaderStage get_shader_stage(const LLVMBCParser &bitcode_parser, const char *entry = nullptr);
	static void scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &bitcode_parser);

	static Vector<String> get_entry_points(const LLVMBCParser &parser);
	static bool entry_point_matches(const String &mangled, const char *user);
	void set_entry_point(const char *entry);
	const String &get_compiled_entry_point() const;

	void add_option(const OptionBase &cap);
	static bool recognizes_option(Option cap);

	// These are declared separately since we need to declare a concrete physical buffer layout
	// for local root signature elements which depends on the entire local root signature.
	// It would get somewhat awkward to shoehorn this into the resource "pull" API for normal resources.

	void add_local_root_constants(uint32_t register_space, uint32_t register_index, uint32_t num_words);
	void add_local_root_descriptor(ResourceClass type, uint32_t register_space, uint32_t register_index);

	// Local root descriptor tables are special. They must be constructed in such a way that
	// the MSB 32 bits can be ignored and the LSB 32 bits are encoded as Index << SBTDescriptorSizeLog2.
	// Thus, we translate GPU VA to index by a simple shift on the lower 32-bit value.
	void add_local_root_descriptor_table(Vector<DescriptorTableEntry> entries);
	void add_local_root_descriptor_table(const DescriptorTableEntry *entries, size_t count);

	// For debug purposes. Makes it possible to map a computed push constant offset
	// back to corresponding root parameter index.
	// Not needed by codegen, but is used by extended debug info.
	void add_root_parameter_mapping(uint32_t root_parameter_index, uint32_t offset);
    static uint32_t pack_desc_set_binding_to_virtual_offset(uint32_t desc_set, uint32_t binding);

	// For debug purposes. Emits arbitrary data with NonSemantic.dxil-spirv.*.
	// Pointers are owned by application and must remain valid until compilation is done.
	void add_non_semantic_debug_info(const NonSemanticDebugInfo &info);

	void set_patch_location_offset(uint32_t offset);

	// After compilation, query CS workgroup size.
	void get_workgroup_dimensions(uint32_t &x, uint32_t &y, uint32_t &z) const;
	// After compilation, query expected patch size.
	uint32_t get_patch_vertex_count() const;
	uint32_t get_patch_location_offset() const;
	// If any of these are non-zero, a CS may have to be compiled for a specific wave size.
	void get_compute_wave_size_range(uint32_t &min, uint32_t &max, uint32_t &preferred) const;
	// If non-zero, similar to required, but can be ignored. Used as a workaround hint or performance hint.
	uint32_t get_compute_heuristic_min_wave_size() const;
	uint32_t get_compute_heuristic_max_wave_size() const;

	// Returns true if view instancing is enabled and the result can be lowered directly to Vulkan.
	bool is_multiview_compatible() const;

	bool shader_requires_feature(ShaderFeature feature) const;

	// For esoteric CFG workarounds.
	bool get_driver_version(uint32_t &driver_id, uint32_t &driver_version) const;

	static NodeInputData get_node_input(const LLVMBCParser &parser, const char *entry);
	static Vector<NodeOutputData> get_node_outputs(const LLVMBCParser &parser, const char *entry);

	String get_analysis_warnings() const;

	void set_meta_descriptor(MetaDescriptor desc, MetaDescriptorKind kind,
	                         uint32_t desc_set, uint32_t binding_or_push_index);

	struct Impl;

private:
	std::unique_ptr<Impl> impl;
};
} // namespace dxil_spv


================================================
FILE: dxil_extract.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "cli_parser.hpp"
#include "dxil_spirv_c.h"
#include "logging.hpp"
#include <stdint.h>
#include <stdio.h>
#include <vector>

using namespace dxil_spv;

static void print_help()
{
	LOGE("dxil-extract <DXIL blob> [--output file.bc] [--reflection] [--verbose]\n");
}

static std::vector<uint8_t> read_file(const char *path)
{
	FILE *file = fopen(path, "rb");
	if (!file)
		return {};

	fseek(file, 0, SEEK_END);
	auto len = ftell(file);
	rewind(file);
	std::vector<uint8_t> result(len);
	if (fread(result.data(), 1, len, file) != size_t(len))
	{
		fclose(file);
		return {};
	}

	fclose(file);
	return result;
}

static bool write_file(const char *path, const void *data, size_t size)
{
	bool ret = true;

	FILE *file = fopen(path, "wb");
	if (!file)
		return false;

	if (fwrite(data, 1, size, file) != size)
	{
		LOGE("Failed to write LLVM BC.\n");
		ret = false;
	}
	fclose(file);
	return ret;
}

static uint64_t vkd3d_proton_hash_fnv1(const void *data_, size_t size)
{
	auto *data = static_cast<const uint8_t *>(data_);
	uint64_t h = 0xcbf29ce484222325ull;

	for (size_t i = 0; i < size; i++)
		h = (h * 0x100000001b3ull) ^ data[i];

	return h;
}

int main(int argc, char **argv)
{
	std::string input, output;
	bool reflection = false;
	bool verbose = false;

	CLICallbacks cbs;
	cbs.add("--help", [](CLIParser &parser) {
		print_help();
		parser.end();
	});
	cbs.add("--output", [&](CLIParser &parser) { output = parser.next_string(); });
	cbs.add("--reflection", [&](CLIParser &) { reflection = true; });
	cbs.add("--verbose", [&](CLIParser &) { verbose = true; });
	cbs.default_handler = [&](const char *arg) { input = arg; };
	CLIParser parser(std::move(cbs), argc - 1, argv + 1);

	if (!parser.parse())
		return EXIT_FAILURE;
	else if (parser.is_ended_state())
		return EXIT_SUCCESS;

	if (input.empty())
	{
		LOGE("Need input file.\n");
		return EXIT_FAILURE;
	}

	auto input_file = read_file(input.c_str());
	if (input_file.empty())
	{
		LOGE("Failed to read file %s.\n", input.c_str());
		return EXIT_FAILURE;
	}

	dxil_spv_parsed_blob blob;
	if (reflection)
	{
		dxil_spv_result result;
		if ((result = dxil_spv_parse_reflection_dxil_blob(input_file.data(), input_file.size(), &blob)) != DXIL_SPV_SUCCESS)
		{
			// Fallback in case there is no STAT block.
			if (result == DXIL_SPV_ERROR_NO_DATA)
			{
				LOGW("There is no STAT block, falling back to normal DXIL block.\n");
				result = dxil_spv_parse_dxil_blob(input_file.data(), input_file.size(), &blob);
			}

			if (result != DXIL_SPV_SUCCESS)
			{
				LOGE("Failed to parse blob.\n");
				return EXIT_FAILURE;
			}
		}
	}
	else
	{
		if (dxil_spv_parse_dxil_blob(input_file.data(), input_file.size(), &blob) != DXIL_SPV_SUCCESS)
		{
			LOGE("Failed to parse blob.\n");
			return EXIT_FAILURE;
		}
	}

	if (verbose)
	{
		printf("=== %s ===\n", input.c_str());
		unsigned entry_point_count = 0;
		dxil_spv_parsed_blob_get_num_entry_points(blob, &entry_point_count);
		for (unsigned i = 0; i < entry_point_count; i++)
		{
			const char *demangled = nullptr;
			dxil_spv_parsed_blob_get_entry_point_demangled_name(blob, i, &demangled);
			printf("  %s\n", demangled);
		}
		printf("vkd3d-proton hash: %016llx\n",
		       static_cast<unsigned long long>(vkd3d_proton_hash_fnv1(input_file.data(), input_file.size())));
		printf("==================\n");
	}

	const void *ir_data;
	size_t ir_size;

	if (dxil_spv_parsed_blob_get_raw_ir(blob, &ir_data, &ir_size) != DXIL_SPV_SUCCESS)
	{
		LOGE("Failed to extract raw IR.\n");
		return EXIT_FAILURE;
	}

	if (output.empty())
	{
		dxil_spv_parsed_blob_dump_llvm_ir(blob);
		dxil_spv_parsed_blob_free(blob);
		return EXIT_SUCCESS;
	}

	if (!write_file(output.c_str(), ir_data, ir_size))
	{
		LOGE("Failed to write IR to %s.\n", output.c_str());
		return EXIT_FAILURE;
	}
	dxil_spv_parsed_blob_free(blob);
}


================================================
FILE: dxil_parser.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_parser.hpp"
#include "dxil.hpp"
#include "memory_stream.hpp"
#include "logging.hpp"
#include <stdio.h>
#include <string.h>
#include <vector>

namespace dxil_spv
{
bool is_mangled_entry_point(const char *user)
{
	// The mangling algorithm is intentionally left undefined in spec.
	// However, the mangling scheme clearly follows MSVC here.
	// The format we're looking for is:
	// <blah>?<identifier>@<suffix>.
	// http://www.agner.org/optimize/calling_conventions.pdf (section 8.1).
	// DXC also seems to start with '\01', but we can ignore that.
	const char *mangle_begin = strchr(user, '?');
	if (!mangle_begin)
		return false;

	const char *mangle_end = strchr(mangle_begin + 1, '@');
	return mangle_end != nullptr;
}

String demangle_entry_point(const String &entry)
{
	auto start_idx = entry.find_first_of('?');
	if (start_idx == std::string::npos)
		return entry;

	start_idx++;

	auto end_idx = entry.find_first_of('@', start_idx);
	if (end_idx == std::string::npos)
		return entry;

	return entry.substr(start_idx, end_idx - start_idx);
}

Vector<uint8_t> &DXILContainerParser::get_blob()
{
	return dxil_blob;
}

Vector<RDATSubobject> &DXILContainerParser::get_rdat_subobjects()
{
	return rdat_subobjects;
}

bool DXILContainerParser::parse_dxil(MemoryStream &stream)
{
	DXIL::ProgramHeader program_header;
	if (!stream.read(program_header))
		return false;

	if (static_cast<DXIL::FourCC>(program_header.dxil_magic) != DXIL::FourCC::DXIL)
		return false;

	constexpr uint32_t DxilMagicPad = sizeof(DXIL::ProgramHeader) - offsetof(DXIL::ProgramHeader, dxil_magic);

	if (program_header.bitcode_offset < DxilMagicPad)
		return false;

	auto substream = stream.create_substream_bitcode_size(
		stream.get_offset() + program_header.bitcode_offset - DxilMagicPad,
		program_header.bitcode_size);

	dxil_blob.resize(substream.get_size());
	if (!substream.read(dxil_blob.data(), substream.get_size()))
		return false;

	return true;
}

bool DXILContainerParser::parse_iosg1(MemoryStream &stream, Vector<DXIL::IOElement> &elements)
{
	uint32_t element_count;
	if (!stream.read(element_count))
		return false;

	if (!stream.skip(sizeof(uint32_t)))
		return false;

	elements.resize(element_count);
	for (uint32_t i = 0; i < element_count; i++)
	{
		if (!stream.read(elements[i].stream_index))
			return false;

		uint32_t string_offset;
		if (!stream.read(string_offset))
			return false;

		if (!stream.read(elements[i].semantic_index))
			return false;
		if (!stream.read(elements[i].system_value_semantic))
			return false;
		if (!stream.read(elements[i].component_type))
			return false;
		if (!stream.read(elements[i].register_index))
			return false;
		if (!stream.read(elements[i].mask))
			return false;
		if (!stream.read(elements[i].min_precision))
			return false;

		size_t offset = stream.get_offset();
		if (!stream.seek(string_offset))
			return false;

		const char *semantic_name;
		if (!stream.map_string_iterate(semantic_name))
			return false;
		elements[i].semantic_name = semantic_name;
		if (!stream.seek(offset))
			return false;
	}

	return true;
}

bool DXILContainerParser::parse_rdat(MemoryStream &stream)
{
	uint32_t version, part_count;
	if (!stream.read(version))
		return false;
	if (!stream.read(part_count))
		return false;

	constexpr uint32_t RDAT_Version = 0x10;
	if (version != RDAT_Version)
		return false;

	Vector<uint32_t> offsets(part_count);
	for (uint32_t i = 0; i < part_count; i++)
		if (!stream.read(offsets[i]))
			return false;

	MemoryStream string_buffer;
	MemoryStream index_buffer;
	MemoryStream raw_bytes;

	for (uint32_t i = 0; i < part_count; i++)
	{
		if (offsets[i] + 2 * sizeof(uint32_t) > stream.get_size())
			return false;

		uint32_t part_size = i + 1 < part_count ?
		                     (offsets[i + 1] - offsets[i]) :
		                     uint32_t(stream.get_size() - offsets[i]);
		auto substream = stream.create_substream(offsets[i], part_size);

		DXIL::RuntimeDataPartType type;
		if (!substream.read(type))
			return false;
		uint32_t subpart_length;
		if (!substream.read(subpart_length))
			return false;
		if (subpart_length + 2 * sizeof(uint32_t) > substream.get_size())
			return false;

		switch (type)
		{
		case DXIL::RuntimeDataPartType::StringBuffer:
		{
			string_buffer = substream.create_substream(substream.get_offset(), subpart_length);
			break;
		}

		case DXIL::RuntimeDataPartType::IndexArrays:
		{
			index_buffer = substream.create_substream(substream.get_offset(), subpart_length);
			break;
		}

		case DXIL::RuntimeDataPartType::RawBytes:
		{
			raw_bytes = substream.create_substream(substream.get_offset(), subpart_length);
			break;
		}

		case DXIL::RuntimeDataPartType::SubobjectTable:
		{
			uint32_t record_count;
			uint32_t record_stride;
			if (!substream.read(record_count))
				return false;
			if (!substream.read(record_stride))
				return false;

			for (unsigned record = 0; record < record_count; record++)
			{
				auto record_stream =
						substream.create_substream(substream.get_offset() + record * record_stride, record_stride);

				DXIL::SubobjectKind kind;
				if (!record_stream.read(kind))
					return false;

				switch (kind)
				{
				case DXIL::SubobjectKind::StateObjectConfig:
				{
					uint32_t name_offset;
					if (!record_stream.read(name_offset))
						return false;

					const char *str = nullptr;
					if (!string_buffer.map_string_absolute(str, name_offset))
						return false;

					uint32_t flag;
					if (!record_stream.read(flag))
						return false;

					RDATSubobject elem = {};
					elem.kind = kind;
					elem.subobject_name = str;
					elem.args[0] = flag;
					rdat_subobjects.push_back(std::move(elem));
					break;
				}

				case DXIL::SubobjectKind::RaytracingShaderConfig:
				{
					uint32_t name_offset;
					if (!record_stream.read(name_offset))
						return false;

					const char *str;
					if (!string_buffer.map_string_absolute(str, name_offset))
						return false;

					uint32_t max_payload_size, max_attribute_size;
					if (!record_stream.read(max_payload_size))
						return false;
					if (!record_stream.read(max_attribute_size))
						return false;

					RDATSubobject elem = {};
					elem.kind = kind;
					elem.subobject_name = str;
					elem.args[0] = max_payload_size;
					elem.args[1] = max_attribute_size;
					rdat_subobjects.push_back(std::move(elem));
					break;
				}

				case DXIL::SubobjectKind::RaytracingPipelineConfig:
				case DXIL::SubobjectKind::RaytracingPipelineConfig1:
				{
					uint32_t name_offset;
					if (!record_stream.read(name_offset))
						return false;

					const char *str;
					if (!string_buffer.map_string_absolute(str, name_offset))
						return false;

					uint32_t max_recursion_depth;
					uint32_t flags = 0;

					if (!record_stream.read(max_recursion_depth))
						return false;

					if (kind == DXIL::SubobjectKind::RaytracingPipelineConfig1)
						if (!record_stream.read(flags))
							return false;

					RDATSubobject elem = {};
					elem.kind = kind;
					elem.subobject_name = str;
					elem.args[0] = max_recursion_depth;
					elem.args[1] = flags;
					rdat_subobjects.push_back(std::move(elem));
					break;
				}

				case DXIL::SubobjectKind::HitGroup:
				{
					uint32_t name_offset;
					if (!record_stream.read(name_offset))
						return false;

					const char *hg_name;
					if (!string_buffer.map_string_absolute(hg_name, name_offset))
						return false;

					DXIL::HitGroupType hit_group_type;
					if (!record_stream.read(hit_group_type))
						return false;

					uint32_t ahit_name_offset, chit_name_offset, intersection_name_offset;
					if (!record_stream.read(ahit_name_offset))
						return false;
					if (!record_stream.read(chit_name_offset))
						return false;
					if (!record_stream.read(intersection_name_offset))
						return false;

					const char *ahit, *chit, *intersection;
					if (!string_buffer.map_string_absolute(ahit, ahit_name_offset))
						return false;
					if (!string_buffer.map_string_absolute(chit, chit_name_offset))
						return false;
					if (!string_buffer.map_string_absolute(intersection, intersection_name_offset))
						return false;

					RDATSubobject elem = {};
					elem.kind = kind;
					elem.subobject_name = hg_name;
					elem.hit_group_type = hit_group_type;
					elem.exports = { ahit, chit, intersection };
					rdat_subobjects.push_back(std::move(elem));
					break;
				}

				case DXIL::SubobjectKind::SubobjectToExportsAssociation:
				{
					RDATSubobject elem = {};
					elem.kind = kind;
					uint32_t name_offset;

					if (!record_stream.read(name_offset))
						return false;

					const char *name;
					if (!string_buffer.map_string_absolute(name, name_offset))
						return false;

					elem.subobject_name = name;

					if (!record_stream.read(name_offset))
						return false;
					const char *object_name;
					if (!string_buffer.map_string_absolute(object_name, name_offset))
						return false;

					elem.exports.push_back(object_name);

					uint32_t index_offset;
					if (!record_stream.read(index_offset))
						return false;

					auto index_substream = index_buffer.create_substream(sizeof(uint32_t) * index_offset);
					uint32_t count;
					if (!index_substream.read(count))
						return false;

					for (uint32_t export_index = 0; export_index < count; export_index++)
					{
						if (!index_substream.read(name_offset))
							return false;
						if (!string_buffer.map_string_absolute(object_name, name_offset))
							return false;
						elem.exports.push_back(object_name);
					}

					rdat_subobjects.push_back(std::move(elem));
					break;
				}

				case DXIL::SubobjectKind::GlobalRootSignature:
				case DXIL::SubobjectKind::LocalRootSignature:
				{
					uint32_t name_offset;

					if (!record_stream.read(name_offset))
						return false;

					const char *name;
					if (!string_buffer.map_string_absolute(name, name_offset))
						return false;

					uint32_t byte_offset;
					uint32_t byte_size;
					if (!record_stream.read(byte_offset))
						return false;
					if (!record_stream.read(byte_size))
						return false;

					auto name_substream = raw_bytes.create_substream(byte_offset, byte_size);
					auto *data = name_substream.map_read<uint8_t>(byte_size);

					RDATSubobject elem = {};
					elem.kind = kind;
					elem.subobject_name = name;
					elem.payload = data;
					elem.payload_size = byte_size;
					rdat_subobjects.push_back(std::move(elem));
					break;
				}

				default:
					break;
				}
			}
			break;
		}

		default:
			break;
		}
	}

	return true;
}

bool DXILContainerParser::parse_container(const void *data, size_t size, bool reflection)
{
	MemoryStream stream(data, size);

	DXIL::ContainerHeader container_header;
	if (!stream.read(container_header))
		return false;

	if (static_cast<DXIL::FourCC>(container_header.header_fourcc) != DXIL::FourCC::Container)
		return false;
	if (container_header.container_size_in_bytes > size)
		return false;

	Vector<uint32_t> parts(container_header.part_count);
	for (uint32_t i = 0; i < container_header.part_count; i++)
	{
		if (!stream.read(parts[i]))
			return false;
	}

	for (auto &part_offset : parts)
	{
		if (!stream.seek(part_offset))
			return false;

		DXIL::PartHeader part_header;
		if (!stream.read(part_header))
			return false;

		auto fourcc = static_cast<DXIL::FourCC>(part_header.part_fourcc);

		if (fourcc == DXIL::FourCC::SHDR || fourcc == DXIL::FourCC::SHEX)
			dxbc_binary = true;
	}

	for (auto &part_offset : parts)
	{
		if (!stream.seek(part_offset))
			return false;

		DXIL::PartHeader part_header;
		if (!stream.read(part_header))
			return false;

		auto fourcc = static_cast<DXIL::FourCC>(part_header.part_fourcc);
		switch (fourcc)
		{
		case DXIL::FourCC::DXIL:
		case DXIL::FourCC::ShaderStatistics:
		{
			DXIL::FourCC expected = reflection ? DXIL::FourCC::ShaderStatistics : DXIL::FourCC::DXIL;
			if (expected != fourcc || dxbc_binary)
				break;

			// The STAT block includes a DXIL blob that is literally the same DXIL IR
			// minus code + string names in the metadata chunks ... <__________________________________<
			auto substream = stream.create_substream(stream.get_offset(), part_header.part_size);
			if (!parse_dxil(substream))
				return false;
			break;
		}

		case DXIL::FourCC::FeatureInfo:
			break;

		case DXIL::FourCC::InputSignature:
		{
			auto substream = stream.create_substream(stream.get_offset(), part_header.part_size);
			if (!parse_iosg1(substream, input_elements))
				return false;
			break;
		}

		case DXIL::FourCC::OutputSignature:
		{
			auto substream = stream.create_substream(stream.get_offset(), part_header.part_size);
			if (!parse_iosg1(substream, output_elements))
				return false;
			break;
		}

		case DXIL::FourCC::PatchConstantSignature:
			break;

		case DXIL::FourCC::PrivateData:
			break;

		case DXIL::FourCC::RootSignature:
			break;

		case DXIL::FourCC::PipelineStateValidation:
			break;

		case DXIL::FourCC::ResourceDef:
			break;

		case DXIL::FourCC::ShaderHash:
			break;

		case DXIL::FourCC::RuntimeData:
		{
			auto substream = stream.create_substream(stream.get_offset(), part_header.part_size);
			if (!parse_rdat(substream))
				return false;
			break;
		}

		default:
			break;
		}
	}

	return true;
}
} // namespace dxil_spv


================================================
FILE: dxil_parser.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "dxil.hpp"
#include <stddef.h>
#include <stdint.h>

namespace dxil_spv
{
class MemoryStream;

struct RDATSubobject
{
	// All strings point directly to the DXBC blob and the pointers are not owned.
	DXIL::SubobjectKind kind;

	// All subobjects have a variable name as declared in the shader.
	const char *subobject_name;

	// All exports.
	// For hit groups, 3 strings: AnyHit, ClosestHit, Intersection. Strings may be empty if not used.
	// For SubobjectToExportsAssociation: N strings. exports[0] is associated with the following exports.
	DXIL::HitGroupType hit_group_type;
	Vector<const char *> exports;

	// For StateObjectConfig, RaytracingShaderConfig, RaytracingPipelineConfig(1).
	// Each element is in struct order.
	uint32_t args[2];

	// For Global/Local Root Signatures.
	const uint8_t *payload;
	size_t payload_size;
};

class DXILContainerParser
{
public:
	bool parse_container(const void *data, size_t size, bool reflection);
	Vector<uint8_t> &get_blob();
	Vector<RDATSubobject> &get_rdat_subobjects();
	bool is_dxbc_binary() const { return dxbc_binary; }

private:
	Vector<uint8_t> dxil_blob;
	Vector<DXIL::IOElement> input_elements;
	Vector<DXIL::IOElement> output_elements;
	Vector<RDATSubobject> rdat_subobjects;

	bool dxbc_binary = false;

	bool parse_dxil(MemoryStream &stream);
	bool parse_iosg1(MemoryStream &stream, Vector<DXIL::IOElement> &elements);
	bool parse_rdat(MemoryStream &stream);
};

bool is_mangled_entry_point(const char *user);
String demangle_entry_point(const String &entry);
} // namespace dxil_spv


================================================
FILE: dxil_spirv.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <algorithm>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <vector>

#define DXIL_SPV_ENABLE_EXPERIMENTAL_MULTIVIEW
#include "dxil_spirv_c.h"

#include "cli_parser.hpp"
#include "logging.hpp"
#include "spirv-tools/libspirv.hpp"
#include "spirv_cross_c.h"

using namespace dxil_spv;

static std::string convert_to_asm(const void *code, size_t size)
{
	spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3);
	tools.SetMessageConsumer([](spv_message_level_t, const char *, const spv_position_t &, const char *message) {
		LOGE("SPIRV-Tools message: %s\n", message);
	});

	std::string str;
	if (!tools.Disassemble(static_cast<const uint32_t *>(code), size / sizeof(uint32_t), &str, 0))
		return "";
	else
		return str;
}

static bool validate_spirv(const void *code, size_t size)
{
	spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3);
	bool expected_failure = false;
	bool unexpected_failure = false;

	tools.SetMessageConsumer([&](spv_message_level_t, const char *, const spv_position_t &, const char *message) {
		if (strstr(message, "08721") || strstr(message, "08722"))
		{
			LOGW("SPIRV-Tools message expected failure: %s\n", message);
			expected_failure = true;
		}
		else
		{
			LOGE("SPIRV-Tools message: %s\n", message);
			unexpected_failure = true;
		}
	});
	spvtools::ValidatorOptions opts;
	opts.SetScalarBlockLayout(true);
	return tools.Validate(static_cast<const uint32_t *>(code), size / sizeof(uint32_t), opts) ||
	       (expected_failure && !unexpected_failure);
}

static std::string convert_to_glsl(const void *code, size_t size)
{
	std::string ret;
	spvc_context context;
	if (spvc_context_create(&context) != SPVC_SUCCESS)
		return ret;

	spvc_parsed_ir ir;
	if (spvc_context_parse_spirv(context, static_cast<const SpvId *>(code), size / sizeof(uint32_t), &ir) !=
	    SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler compiler;
	if (spvc_context_create_compiler(context, SPVC_BACKEND_GLSL, ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler) !=
	    SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler_options opts;
	if (spvc_compiler_create_compiler_options(compiler, &opts) != SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE);
	spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_GLSL_VERSION, 460);
	spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS, SPVC_TRUE);
	spvc_compiler_install_compiler_options(compiler, opts);

	const char *source;
	if (spvc_compiler_compile(compiler, &source) != SPVC_SUCCESS)
		goto cleanup;

	ret = source;

cleanup:
	spvc_context_destroy(context);
	return ret;
}

static std::vector<uint8_t> read_file(const char *path)
{
	FILE *file = fopen(path, "rb");
	if (!file)
		return {};

	fseek(file, 0, SEEK_END);
	auto len = ftell(file);
	rewind(file);
	std::vector<uint8_t> result(len);
	if (fread(result.data(), 1, len, file) != size_t(len))
	{
		fclose(file);
		return {};
	}

	fclose(file);
	return result;
}

static void print_help()
{
	LOGE("Usage: dxil-spirv <input path>\n"
	     "\t[--output <path>]\n"
	     "\t[--glsl]\n"
	     "\t[--asm]\n"
	     "\t[--validate]\n"
	     "\t[--entry name]\n"
	     "\t[--debug-all-entry-points]\n"
	     "\t[--root-constant space binding word_offset word_count]\n"
	     "\t[--root-constant-inline-ubo set binding]\n"
	     "\t[--vertex-input semantic location]\n"
	     "\t[--stream-output semantic index offset stride buffer-index]\n"
	     "\t[--enable-shader-demote]\n"
	     "\t[--enable-shader-i8-dot]\n"
	     "\t[--enable-dual-source-blending]\n"
	     "\t[--bindless]\n"
	     "\t[--no-bda]\n"
	     "\t[--local-root-signature]\n"
	     "\t[--uav-counter-force-texel-buffer]\n"
	     "\t[--uav-counter-force-ssbo]\n"
	     "\t[--bindless-cbv-as-ssbo]\n"
	     "\t[--ssbo-uav]\n"
	     "\t[--ssbo-srv]\n"
	     "\t[--ssbo-rtas]\n"
	     "\t[--input-attachments]\n"
	     "\t[--ssbo-alignment <align>]\n"
	     "\t[--typed-uav-read-without-format]\n"
	     "\t[--bindless-typed-buffer-offsets]\n"
	     "\t[--output-rt-swizzle index xyzw]\n"
	     "\t[--bindless-offset-buffer-layout <untyped offset> <typed offset> <stride>]\n"
	     "\t[--storage-input-output-16bit]\n"
	     "\t[--root-descriptor <cbv/uav/srv> <space> <register>]\n"
	     "\t[--descriptor-qa <set> <binding base> <shader hash>]\n"
	     "\t[--instruction-instrumentation <type> <set> <binding base> <shader hash>]\n"
	     "\t[--min-precision-native-16bit]\n"
	     "\t[--raw-llvm]\n"
	     "\t[--use-reflection-names]\n"
	     "\t[--invariant-position]\n"
	     "\t[--robust-physical-cbv-load]\n"
	     "\t[--allow-arithmetic-relaxed-precision]\n"
	     "\t[--physical-address-descriptor-indexing <element stride> <element offset>]\n"
	     "\t[--nvapi <register index> <register space>]\n"
	     "\t[--subgroup-partitioned-nv]\n"
	     "\t[--dead-code-eliminate]\n"
	     "\t[--propagate-precise]\n"
	     "\t[--force-precise]\n"
	     "\t[--force-flatten]\n"
	     "\t[--force-loop]\n"
	     "\t[--force-branch]\n"
	     "\t[--force-unroll]\n"
	     "\t[--subgroup-size minimum maximum]\n"
	     "\t[--descriptor-heap-robustness]\n"
	     "\t[--no-compute-shader-derivatives]\n"
	     "\t[--quad-control-maximal-reconvergence]\n"
	     "\t[--force-maximal-reconvergence]\n"
	     "\t[--raw-access-chains-nv]\n"
	     "\t[--extended-robustness]\n"
	     "\t[--vkmm]\n"
	     "\t[--full-wmma <fp8> <nv-coopmat2>]\n"
	     "\t[--shader-quirk <index>]\n"
	     "\t[--non-semantic]\n"
	     "\t[--mixed-float-dot-product]\n"
	     "\t[--view-instancing]\n"
	     "\t[--view-instancing-last-pre-rasterization-stage]\n"
	     "\t[--view-instance-to-viewport-spec-id <id>]\n"
	     "\t[--view-index-to-view-instance-spec-id <id>]\n"
	     "\t[--meta-descriptor descriptor kind set binding]\n");
}

struct MetaDescriptor
{
	dxil_spv_meta_descriptor meta;
	dxil_spv_meta_descriptor_kind kind;
	uint32_t desc_set;
	uint32_t desc_binding;
};

struct Arguments
{
	std::string input_path;
	std::string output_path;
	std::string entry_point;
	bool dump_module = false;
	bool glsl = false;
	bool emit_asm = false;
	bool validate = false;
	bool shader_demote = false;
	bool shader_i8_dot = false;
	bool dual_source_blending = false;
	bool debug_all_entry_points = false;
	bool storage_input_output_16bit = false;
	std::vector<unsigned> swizzles;

	unsigned root_constant_inline_ubo_desc_set = 0;
	unsigned root_constant_inline_ubo_binding = 0;
	bool root_constant_inline_ubo = false;
	bool bindless_cbv_as_ssbo = false;
	bool typed_uav_read_without_format = false;
	bool bindless_typed_buffer_offsets = false;
	bool min_precision_native_16bit = false;
	bool raw_llvm = false;
	bool use_reflection_names = false;
	bool invariant_position = false;
	bool robust_physical_cbv_load = false;
	bool allow_arithmetic_relaxed_precision = false;
	bool subgroup_partitioned_nv = false;
	bool dead_code_eliminate = false;
	bool propagate_precise = false;
	bool force_precise = false;
	bool opacity_micromap = false;
	bool force_flatten = false;
	bool force_loop = false;
	bool force_branch = false;
	bool force_unroll = false;
	bool descriptor_heap_robustness = false;
	bool compute_shader_derivatives = true;
	bool quad_control_maximal_reconvergence = false;
	bool force_maximal_reconvergence = false;
	bool raw_access_chains_nv = false;
	bool extended_robustness = false;
	bool vkmm = false;
	bool wmma_fp8 = false;
	bool wmma_nv_coopmat2 = false;
	bool non_semantic = false;
	bool mixed_float_dot_product = false;
	std::vector<dxil_spv_shader_quirk> quirks;

	unsigned ssbo_alignment = 1;
	unsigned physical_address_indexing_stride = 1;
	unsigned physical_address_indexing_offset = 0;
	unsigned subgroup_size_minimum = 4;
	unsigned subgroup_size_maximum = 128;

	bool descriptor_qa = false;
	uint32_t descriptor_qa_set = 0;
	uint32_t descriptor_qa_binding = 0;

	bool nvapi = false;
	unsigned nvapi_register_index = 0;
	unsigned nvapi_register_space = 0;

	bool instruction_instrumentation = false;
	uint32_t instruction_instrumentation_set = 0;
	uint32_t instruction_instrumentation_binding = 0;
	dxil_spv_instruction_instrumentation_type instruction_instrumentation_type = {};

	uint64_t shader_hash = 0;

	dxil_spv_option_bindless_offset_buffer_layout offset_buffer_layout;

	std::vector<MetaDescriptor> meta_descriptors;

	bool view_instancing = false;
	bool view_instancing_last_pre_rasterization_stage = false;
	uint32_t view_index_to_view_instance_spec_id = UINT32_MAX;
	uint32_t view_instance_to_viewport_spec_id = UINT32_MAX;
};

struct Remapper
{
	struct RootConstant
	{
		unsigned register_space;
		unsigned register_index;
		unsigned word_offset;
	};

	struct RootDescriptor
	{
		dxil_spv_resource_class resource_class;
		uint32_t space;
		uint32_t register_index;
	};

	std::vector<RootConstant> root_constants;
	unsigned root_constant_word_count = 0;

	std::vector<RootDescriptor> root_descriptors;

	struct VertexInput
	{
		std::string semantic;
		unsigned index;
	};
	std::vector<VertexInput> vertex_inputs;

	struct StreamOutput
	{
		std::string semantic;
		unsigned index;

		unsigned offset;
		unsigned stride;
		unsigned buffer_index;
	};
	std::vector<StreamOutput> stream_outputs;
	bool bindless = false;
	bool bda = true;
	bool uav_counter_force_texel_buffer = false;
	bool uav_counter_force_ssbo = false;

	bool ssbo_uav = false;
	bool ssbo_srv = false;
	bool ssbo_rtas = false;
	bool input_attachments = false;
};

static bool kind_is_buffer(dxil_spv_resource_kind kind)
{
	return kind == DXIL_SPV_RESOURCE_KIND_RAW_BUFFER || kind == DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER ||
	       kind == DXIL_SPV_RESOURCE_KIND_TYPED_BUFFER;
}

static int32_t find_root_descriptor_index(const Remapper *remapper, const dxil_spv_d3d_binding *binding,
                                          dxil_spv_resource_class resource_class)
{
	auto itr = std::find_if(remapper->root_descriptors.begin(), remapper->root_descriptors.end(),
	                        [&](const Remapper::RootDescriptor &desc)
	                        {
	                          return desc.resource_class == resource_class &&
	                                 desc.space == binding->register_space &&
	                                 desc.register_index == binding->register_index;
	                        });

	if (itr != remapper->root_descriptors.end())
		return int32_t(itr - remapper->root_descriptors.begin());
	else
		return -1;
}

static bool d3d_binding_is_global_heap(const dxil_spv_d3d_binding &binding)
{
	return binding.register_index == UINT32_MAX &&
	       binding.register_space == UINT32_MAX &&
	       binding.range_size == UINT32_MAX;
}

static dxil_spv_bool remap_srv(void *userdata, const dxil_spv_d3d_binding *binding, dxil_spv_srv_vulkan_binding *vk_binding)
{
	auto *remapper = static_cast<Remapper *>(userdata);
	*vk_binding = {};

	int32_t desc_index = find_root_descriptor_index(remapper, binding, DXIL_SPV_RESOURCE_CLASS_SRV);
	if (desc_index >= 0)
	{
		vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS;
		vk_binding->buffer_binding.root_constant_index = uint32_t(desc_index);
	}
	else
	{
		bool is_global_heap = d3d_binding_is_global_heap(*binding);

		if (is_global_heap)
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE;
			vk_binding->buffer_binding.set = 0;
			vk_binding->buffer_binding.binding = 0;
		}
		else if (remapper->bindless)
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE;
			vk_binding->buffer_binding.bindless.heap_root_offset = binding->register_index;
			vk_binding->buffer_binding.root_constant_index = kind_is_buffer(binding->kind) ? 1 : 0;
			vk_binding->buffer_binding.set = kind_is_buffer(binding->kind) ? 1 : 0;
			vk_binding->buffer_binding.binding = 0;
		}
		else
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE;
			vk_binding->buffer_binding.set = binding->register_space;
			vk_binding->buffer_binding.binding = binding->register_index;
		}

		if (binding->kind == DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE)
			if ((remapper->bindless || is_global_heap) && remapper->ssbo_rtas)
				vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO;

		if (remapper->input_attachments &&
			(binding->register_space == 1000 || binding->register_space == 1001) &&
		    (binding->kind == DXIL_SPV_RESOURCE_KIND_TEXTURE_2D ||
		     binding->kind == DXIL_SPV_RESOURCE_KIND_TEXTURE_2DMS))
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE;
			vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
			vk_binding->buffer_binding.input_attachment_index =
			    binding->register_space == 1000 ? binding->register_index : -1u;
		}

		if (remapper->ssbo_srv)
		{
			if (binding->kind == DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER ||
			    binding->kind == DXIL_SPV_RESOURCE_KIND_RAW_BUFFER)
			{
				vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO;
			}
		}

		// In case it's needed, place offset buffer here.
		vk_binding->offset_binding.set = 15;
		vk_binding->offset_binding.binding = 0;
	}

	return DXIL_SPV_TRUE;
}

static dxil_spv_bool remap_sampler(void *userdata, const dxil_spv_d3d_binding *binding,
                                   dxil_spv_vulkan_binding *vk_binding)
{
	auto *remapper = static_cast<Remapper *>(userdata);
	*vk_binding = {};

	if (d3d_binding_is_global_heap(*binding))
	{
		vk_binding->bindless.use_heap = DXIL_SPV_TRUE;
		vk_binding->set = 0;
		vk_binding->binding = 0;
	}
	else if (remapper->bindless)
	{
		vk_binding->bindless.use_heap = DXIL_SPV_TRUE;
		vk_binding->bindless.heap_root_offset = binding->register_index;
		vk_binding->root_constant_index = 2;
		vk_binding->set = 2;
		vk_binding->binding = 0;
	}
	else
	{
		vk_binding->bindless.use_heap = DXIL_SPV_FALSE;
		vk_binding->set = binding->register_space;
		vk_binding->binding = binding->register_index;
	}
	return DXIL_SPV_TRUE;
}

static dxil_spv_bool remap_uav(void *userdata, const dxil_spv_uav_d3d_binding *binding,
                               dxil_spv_uav_vulkan_binding *vk_binding)
{
	auto *remapper = static_cast<Remapper *>(userdata);
	*vk_binding = {};

	int32_t desc_index = find_root_descriptor_index(remapper, &binding->d3d_binding, DXIL_SPV_RESOURCE_CLASS_UAV);
	if (desc_index >= 0)
	{
		vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS;
		vk_binding->buffer_binding.root_constant_index = uint32_t(desc_index);
	}
	else
	{
		bool binding_is_global_heap = d3d_binding_is_global_heap(binding->d3d_binding);

		if (binding_is_global_heap)
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE;
			vk_binding->buffer_binding.set = 0;
			vk_binding->buffer_binding.binding = 0;
		}
		else if (remapper->bindless)
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE;
			vk_binding->buffer_binding.bindless.heap_root_offset = binding->d3d_binding.register_index;
			vk_binding->buffer_binding.root_constant_index = kind_is_buffer(binding->d3d_binding.kind) ? 4 : 3;
			vk_binding->buffer_binding.set = kind_is_buffer(binding->d3d_binding.kind) ? 4 : 3;
			vk_binding->buffer_binding.binding = 0;
		}
		else
		{
			vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE;
			vk_binding->buffer_binding.set = binding->d3d_binding.register_space;
			vk_binding->buffer_binding.binding = binding->d3d_binding.register_index;
		}

		if (remapper->ssbo_uav)
		{
			if (binding->d3d_binding.kind == DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER ||
			    binding->d3d_binding.kind == DXIL_SPV_RESOURCE_KIND_RAW_BUFFER)
			{
				vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO;
			}
		}

		vk_binding->offset_binding.set = 15;
		vk_binding->offset_binding.binding = 0;

		if (binding->has_counter)
		{
			if (remapper->bindless || binding_is_global_heap)
			{
				vk_binding->counter_binding.bindless.use_heap = DXIL_SPV_TRUE;
				vk_binding->counter_binding.root_constant_index = 4;
				vk_binding->counter_binding.bindless.heap_root_offset = binding->d3d_binding.register_index;
				vk_binding->counter_binding.set = 7;
				vk_binding->counter_binding.binding = 0;
			}
			else
			{
				vk_binding->counter_binding.bindless.use_heap = DXIL_SPV_FALSE;
				vk_binding->counter_binding.set = 7;
				vk_binding->counter_binding.binding = binding->d3d_binding.resource_index;
			}

			if (remapper->uav_counter_force_texel_buffer)
				vk_binding->counter_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER;
			else if (remapper->uav_counter_force_ssbo)
				vk_binding->counter_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO;
		}
	}

	return DXIL_SPV_TRUE;
}

static dxil_spv_bool remap_cbv(void *userdata, const dxil_spv_d3d_binding *binding,
                               dxil_spv_cbv_vulkan_binding *vk_binding)
{
	auto *remapper = static_cast<Remapper *>(userdata);
	*vk_binding = {};

	int32_t desc_index = find_root_descriptor_index(remapper, binding, DXIL_SPV_RESOURCE_CLASS_CBV);
	if (desc_index >= 0)
	{
		vk_binding->push_constant = DXIL_SPV_FALSE;
		vk_binding->vulkan.uniform_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS;
		vk_binding->vulkan.uniform_binding.root_constant_index = uint32_t(desc_index);
	}
	else
	{
		auto itr = std::find_if(
		    remapper->root_constants.begin(), remapper->root_constants.end(), [&](const Remapper::RootConstant &root) {
			    return root.register_space == binding->register_space && root.register_index == binding->register_index;
		    });

		if (itr != remapper->root_constants.end())
		{
			vk_binding->push_constant = DXIL_SPV_TRUE;
			vk_binding->vulkan.push_constant.offset_in_words = itr->word_offset;
		}
		else
		{
			if (d3d_binding_is_global_heap(*binding))
			{
				vk_binding->vulkan.uniform_binding.bindless.use_heap = DXIL_SPV_TRUE;
				vk_binding->vulkan.uniform_binding.set = 0;
				vk_binding->vulkan.uniform_binding.binding = 0;
			}
			else if (remapper->bindless)
			{
				vk_binding->vulkan.uniform_binding.bindless.use_heap = DXIL_SPV_TRUE;
				vk_binding->vulkan.uniform_binding.bindless.heap_root_offset = binding->register_index;
				vk_binding->vulkan.uniform_binding.root_constant_index = 5;
				vk_binding->vulkan.uniform_binding.set = 5;
				vk_binding->vulkan.uniform_binding.binding = 0;
			}
			else
			{
				vk_binding->vulkan.uniform_binding.bindless.use_heap = DXIL_SPV_FALSE;
				vk_binding->vulkan.uniform_binding.set = binding->register_space;
				vk_binding->vulkan.uniform_binding.binding = binding->register_index;
			}
		}
	}

	return DXIL_SPV_TRUE;
}

static dxil_spv_bool remap_vertex_input(void *userdata, const dxil_spv_d3d_vertex_input *d3d_input,
                                        dxil_spv_vulkan_vertex_input *vk_input)
{
	auto *remapper = static_cast<Remapper *>(userdata);

	auto itr = std::find_if(remapper->vertex_inputs.begin(), remapper->vertex_inputs.end(),
	                        [&](const Remapper::VertexInput &vin) { return vin.semantic == d3d_input->semantic; });

	if (itr != remapper->vertex_inputs.end())
	{
		vk_input->location = itr->index + d3d_input->semantic_index;
	}
	else
	{
		vk_input->location = d3d_input->start_row;
	}

	return DXIL_SPV_TRUE;
}

#ifdef _MSC_VER
#define strcasecmp _stricmp
#endif

static dxil_spv_bool remap_stream_output(void *userdata, const dxil_spv_d3d_stream_output *d3d_output,
                                         dxil_spv_vulkan_stream_output *vk_output)
{
	auto *remapper = static_cast<Remapper *>(userdata);

	auto itr = std::find_if(remapper->stream_outputs.begin(), remapper->stream_outputs.end(),
	                        [&](const Remapper::StreamOutput &vin) {
		                        return strcasecmp(vin.semantic.c_str(), d3d_output->semantic) == 0 &&
		                               vin.index == d3d_output->semantic_index;
	                        });

	if (itr != remapper->stream_outputs.end())
	{
		vk_output->enable = DXIL_SPV_TRUE;
		vk_output->offset = itr->offset;
		vk_output->stride = itr->stride;
		vk_output->buffer_index = itr->buffer_index;
	}
	else
	{
		*vk_output = {};
	}

	return DXIL_SPV_TRUE;
}

int main(int argc, char **argv)
{
	Arguments args;
	Remapper remapper;
	bool local_root_signature = false;
	dxil_spv_begin_thread_allocator_context();

	args.offset_buffer_layout.base.type = DXIL_SPV_OPTION_BINDLESS_OFFSET_BUFFER_LAYOUT;
	args.offset_buffer_layout.untyped_offset = 0;
	args.offset_buffer_layout.typed_offset = 0;
	args.offset_buffer_layout.stride = 1;

	// Begin with identity swizzles.
	args.swizzles.resize(8, 0 | (1 << 2) | (2 << 4) | (3 << 6));

	CLICallbacks cbs;
	cbs.add("--help", [](CLIParser &parser) {
		print_help();
		parser.end();
	});
	cbs.add("--dump-module", [&](CLIParser &) { args.dump_module = true; });
	cbs.add("--glsl", [&](CLIParser &) { args.glsl = true; });
	cbs.add("--asm", [&](CLIParser &) { args.emit_asm = true; });
	cbs.add("--validate", [&](CLIParser &) { args.validate = true; });
	cbs.add("--output", [&](CLIParser &parser) { args.output_path = parser.next_string(); });
	cbs.add("--root-constant", [&](CLIParser &parser) {
		Remapper::RootConstant root = {};
		root.register_space = parser.next_uint();
		root.register_index = parser.next_uint();
		root.word_offset = parser.next_uint();
		unsigned word_count = parser.next_uint();
		remapper.root_constant_word_count = std::max(remapper.root_constant_word_count, word_count + root.word_offset);
		remapper.root_constants.push_back(root);
	});
	cbs.add("--vertex-input", [&](CLIParser &parser) {
		const char *sem = parser.next_string();
		unsigned loc = parser.next_uint();
		remapper.vertex_inputs.push_back({ std::string(sem), loc });
	});
	cbs.add("--stream-output", [&](CLIParser &parser) {
		const char *sem = parser.next_string();
		unsigned index = parser.next_uint();

		unsigned offset = parser.next_uint();
		unsigned stride = parser.next_uint();
		unsigned buffer_index = parser.next_uint();
		remapper.stream_outputs.push_back({ std::string(sem), index, offset, stride, buffer_index });
	});
	cbs.add("--enable-shader-demote", [&](CLIParser &) { args.shader_demote = true; });
	cbs.add("--enable-shader-i8-dot", [&](CLIParser &parser) { args.shader_i8_dot = true; });
	cbs.add("--enable-dual-source-blending", [&](CLIParser &) { args.dual_source_blending = true; });
	cbs.add("--bindless", [&](CLIParser &) {
		remapper.bindless = true;
		remapper.root_constant_word_count = std::max(remapper.root_constant_word_count, 8u);
	});
	cbs.add("--no-bda", [&](CLIParser &) {
		remapper.bda = false;
	});
	cbs.add("--uav-counter-force-texel-buffer", [&](CLIParser &) {
		remapper.uav_counter_force_texel_buffer = true;
	});
	cbs.add("--uav-counter-force-ssbo", [&](CLIParser &) {
		remapper.uav_counter_force_ssbo = true;
	});
	cbs.add("--local-root-signature", [&](CLIParser &) {
		local_root_signature = true;
	});
	cbs.add("--root-descriptor", [&](CLIParser &parser) {
		const char *tag = parser.next_string();
		uint32_t space = parser.next_uint();
		uint32_t register_index = parser.next_uint();
		dxil_spv_resource_class resource_class;
		if (!strcmp(tag, "cbv"))
			resource_class = DXIL_SPV_RESOURCE_CLASS_CBV;
		else if (!strcmp(tag, "uav"))
			resource_class = DXIL_SPV_RESOURCE_CLASS_UAV;
		else if (!strcmp(tag, "srv"))
			resource_class = DXIL_SPV_RESOURCE_CLASS_SRV;
		else
		{
			LOGE("Invalid resource class %s, ignoring.\n", tag);
			return;
		}

		remapper.root_descriptors.push_back({ resource_class, space, register_index });
	});
	cbs.add("--output-rt-swizzle", [&](CLIParser &parser) {
		unsigned index = parser.next_uint();
		if (index >= args.swizzles.size())
		{
			LOGE("RT index out of range.\n");
			print_help();
			parser.end();
			return;
		}

		const char *arg = parser.next_string();
		if (strlen(arg) != 4)
		{
			LOGE("RT swizzle must be 4 characters (x, y, z, w).\n");
			print_help();
			parser.end();
			return;
		}

		auto &swiz = args.swizzles[index];
		swiz = 0;

		for (unsigned c = 0; c < 4; c++)
		{
			switch (arg[c])
			{
			case 'x':
			case 'X':
			case 'r':
			case 'R':
				swiz |= 0 << (2 * c);
				break;

			case 'y':
			case 'Y':
			case 'g':
			case 'G':
				swiz |= 1 << (2 * c);
				break;

			case 'z':
			case 'Z':
			case 'b':
			case 'B':
				swiz |= 2 << (2 * c);
				break;

			case 'w':
			case 'W':
			case 'a':
			case 'A':
				swiz |= 3 << (2 * c);
				break;

			default:
				LOGE("Invalid swizzle character %c.\n", arg[c]);
				print_help();
				parser.end();
				return;
			}
		}
	});
	cbs.add("--root-constant-inline-ubo", [&](CLIParser &parser) {
		args.root_constant_inline_ubo_desc_set = parser.next_uint();
		args.root_constant_inline_ubo_binding = parser.next_uint();
		args.root_constant_inline_ubo = true;
	});
	cbs.add("--bindless-cbv-as-ssbo", [&](CLIParser &) { args.bindless_cbv_as_ssbo = true; });
	cbs.add("--ssbo-uav", [&](CLIParser &) { remapper.ssbo_uav = true; });
	cbs.add("--ssbo-srv", [&](CLIParser &) { remapper.ssbo_srv = true; });
	cbs.add("--ssbo-rtas", [&](CLIParser &) { remapper.ssbo_rtas = true; });
	cbs.add("--input-attachments", [&](CLIParser &) { remapper.input_attachments = true; });
	cbs.add("--ssbo-alignment", [&](CLIParser &parser) { args.ssbo_alignment = parser.next_uint(); });
	cbs.add("--typed-uav-read-without-format", [&](CLIParser &) { args.typed_uav_read_without_format = true; });
	cbs.add("--bindless-typed-buffer-offsets", [&](CLIParser &) { args.bindless_typed_buffer_offsets = true; });
	cbs.add("--bindless-offset-buffer-layout", [&](CLIParser &parser) {
		args.offset_buffer_layout.untyped_offset = parser.next_uint();
		args.offset_buffer_layout.typed_offset = parser.next_uint();
		args.offset_buffer_layout.stride = parser.next_uint();
	});
	cbs.add("--entry", [&](CLIParser &parser) { args.entry_point = parser.next_string(); });
	cbs.add("--debug-all-entry-points", [&](CLIParser &parser) { args.debug_all_entry_points = true; });
	cbs.add("--storage-input-output-16bit", [&](CLIParser &parser) { args.storage_input_output_16bit = true; });
	cbs.add("--descriptor-qa", [&](CLIParser &parser) {
		args.descriptor_qa = true;
		args.descriptor_qa_set = parser.next_uint();
		args.descriptor_qa_binding = parser.next_uint();
		args.shader_hash = uint64_t(strtoull(parser.next_string(), nullptr, 16));
	});
	cbs.add("--instruction-instrumentation", [&](CLIParser &parser) {
		args.instruction_instrumentation = true;
		args.instruction_instrumentation_type = dxil_spv_instruction_instrumentation_type(parser.next_uint());
		args.instruction_instrumentation_set = parser.next_uint();
		args.instruction_instrumentation_binding = parser.next_uint();
		args.shader_hash = uint64_t(strtoull(parser.next_string(), nullptr, 16));
	});
	cbs.add("--min-precision-native-16bit", [&](CLIParser &) { args.min_precision_native_16bit = true; });
	cbs.add("--raw-llvm", [&](CLIParser &) { args.raw_llvm = true; });
	cbs.add("--use-reflection-names", [&](CLIParser &) { args.use_reflection_names = true; });
	cbs.add("--invariant-position", [&](CLIParser &) { args.invariant_position = true; });
	cbs.add("--robust-physical-cbv-load", [&](CLIParser &) { args.robust_physical_cbv_load = true; });
	cbs.add("--allow-arithmetic-relaxed-precision", [&](CLIParser &) { args.allow_arithmetic_relaxed_precision = true; });
	cbs.add("--physical-address-descriptor-indexing", [&](CLIParser &parser) {
		args.physical_address_indexing_stride = parser.next_uint();
		args.physical_address_indexing_offset = parser.next_uint();
	});
	cbs.add("--nvapi", [&](CLIParser &parser) {
		args.nvapi = true;
		args.nvapi_register_index = parser.next_uint();
		args.nvapi_register_space = parser.next_uint();
	});
	cbs.add("--subgroup-partitioned-nv", [&](CLIParser &) {
		args.subgroup_partitioned_nv = true;
	});
	cbs.add("--dead-code-eliminate", [&](CLIParser &) {
		args.dead_code_eliminate = true;
	});
	cbs.add("--propagate-precise", [&](CLIParser &) {
		args.propagate_precise = true;
	});
	cbs.add("--force-precise", [&](CLIParser &) {
		args.force_precise = true;
	});
	cbs.add("--opacity-micromap", [&](CLIParser &) {
		args.opacity_micromap = true;
	});
	cbs.add("--force-flatten", [&](CLIParser &) {
		args.force_flatten = true;
	});
	cbs.add("--force-loop", [&](CLIParser &) {
		args.force_loop = true;
	});
	cbs.add("--force-unroll", [&](CLIParser &) {
		args.force_unroll = true;
	});
	cbs.add("--force-branch", [&](CLIParser &) {
		args.force_branch = true;
	});
	cbs.add("--subgroup-size", [&](CLIParser &parser) {
		args.subgroup_size_minimum = parser.next_uint();
		args.subgroup_size_maximum = parser.next_uint();
	});
	cbs.add("--descriptor-heap-robustness", [&](CLIParser &) {
		args.descriptor_heap_robustness = true;
	});
	cbs.add("--no-compute-shader-derivatives", [&](CLIParser &) {
		args.compute_shader_derivatives = false;
	});
	cbs.add("--quad-control-maximal-reconvergence", [&](CLIParser &) {
		args.quad_control_maximal_reconvergence = true;
	});
	cbs.add("--force-maximal-reconvergence", [&](CLIParser &) {
		args.force_maximal_reconvergence = true;
	});
	cbs.add("--raw-access-chains-nv", [&](CLIParser &) {
		args.raw_access_chains_nv = true;
	});
	cbs.add("--extended-robustness", [&](CLIParser &) {
		args.extended_robustness = true;
	});
	cbs.add("--vkmm", [&](CLIParser &) {
		args.vkmm = true;
	});
	cbs.add("--full-wmma", [&](CLIParser &parser) {
		args.wmma_fp8 = parser.next_uint() != 0;
		args.wmma_nv_coopmat2 = parser.next_uint() != 0;
	});
	cbs.add("--shader-quirk", [&](CLIParser &parser) {
		args.quirks.push_back(dxil_spv_shader_quirk(parser.next_uint()));
	});
	cbs.add("--non-semantic", [&](CLIParser &) { args.non_semantic = true; });
	cbs.add("--mixed-float-dot-product", [&](CLIParser &) { args.mixed_float_dot_product = true; });
	cbs.add("--meta-descriptor", [&](CLIParser &parser) {
		MetaDescriptor meta = {};
		meta.meta = dxil_spv_meta_descriptor(parser.next_uint());
		meta.kind = dxil_spv_meta_descriptor_kind(parser.next_uint());
		meta.desc_set = parser.next_uint();
		meta.desc_binding = parser.next_uint();
		args.meta_descriptors.push_back(meta);
	});
	cbs.add("--view-instancing", [&](CLIParser &parser) {
		args.view_instancing = true;
	});
	cbs.add("--view-instancing-last-pre-rasterization-stage", [&](CLIParser &parser) {
		args.view_instancing_last_pre_rasterization_stage = true;
	});
	cbs.add("--view-instance-to-viewport-spec-id", [&](CLIParser &parser) {
		args.view_instance_to_viewport_spec_id = parser.next_uint();
	});
	cbs.add("--view-index-to-view-instance-spec-id", [&](CLIParser &parser) {
		args.view_index_to_view_instance_spec_id = parser.next_uint();
	});
	cbs.error_handler = [] { print_help(); };
	cbs.default_handler = [&](const char *arg) { args.input_path = arg; };
	CLIParser cli_parser(std::move(cbs), argc - 1, argv + 1);
	if (!cli_parser.parse())
		return EXIT_FAILURE;
	else if (cli_parser.is_ended_state())
		return EXIT_SUCCESS;

	if (args.input_path.empty())
	{
		LOGE("No input file.\n");
		print_help();
		return EXIT_FAILURE;
	}

	auto binary = read_file(args.input_path.c_str());
	if (binary.empty())
	{
		LOGE("Failed to load file: %s\n", args.input_path.c_str());
		return EXIT_FAILURE;
	}

	dxil_spv_parsed_blob reflection_blob = nullptr;
	dxil_spv_parsed_blob blob;

	if (args.raw_llvm)
	{
		if (dxil_spv_parse_dxil(binary.data(), binary.size(), &blob) != DXIL_SPV_SUCCESS)
		{
			LOGE("Failed to parse raw LLVM blob.\n");
			return EXIT_FAILURE;
		}
	}
	else
	{
		if (dxil_spv_parse_dxil_blob(binary.data(), binary.size(), &blob) != DXIL_SPV_SUCCESS)
		{
			LOGE("Failed to parse blob.\n");
			return EXIT_FAILURE;
		}
	}

	if (args.use_reflection_names)
	{
		auto result = dxil_spv_parse_reflection_dxil_blob(binary.data(), binary.size(), &reflection_blob);
		if (result != DXIL_SPV_SUCCESS && result != DXIL_SPV_ERROR_NO_DATA)
		{
			LOGE("Failed to parse blob.\n");
			return EXIT_FAILURE;
		}
		else if (result == DXIL_SPV_ERROR_NO_DATA)
		{
			LOGW("No STAT block found in DXIL blob.\n");
			reflection_blob = nullptr;
		}
	}

	if (args.dump_module)
		dxil_spv_parsed_blob_dump_llvm_ir(blob);

	dxil_spv_converter converter;
	if (dxil_spv_create_converter_with_reflection(blob, reflection_blob, &converter) != DXIL_SPV_SUCCESS)
		return EXIT_FAILURE;

	dxil_spv_converter_set_srv_remapper(converter, remap_srv, &remapper);
	dxil_spv_converter_set_sampler_remapper(converter, remap_sampler, &remapper);
	dxil_spv_converter_set_uav_remapper(converter, remap_uav, &remapper);
	dxil_spv_converter_set_cbv_remapper(converter, remap_cbv, &remapper);

	dxil_spv_converter_set_vertex_input_remapper(converter, remap_vertex_input, &remapper);
	dxil_spv_converter_set_stream_output_remapper(converter, remap_stream_output, &remapper);
	dxil_spv_converter_set_root_constant_word_count(converter, remapper.root_constant_word_count);
	dxil_spv_converter_set_root_descriptor_count(converter, remapper.root_descriptors.size());

	if (local_root_signature)
	{
		dxil_spv_converter_add_local_root_constants(converter, 15, 0, 5);
		dxil_spv_converter_add_local_root_constants(converter, 15, 1, 6);
		dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_SRV, 15, 1);
		dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_UAV, 15, 1);
		dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_SRV, 15, 2);
		dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_UAV, 15, 2);
		dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_CBV, 15, 2);
		dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_SRV, 15, 3, ~0u, 10);
		dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_UAV, 15, 3, ~0u, 11);
		dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_CBV, 15, 3, ~0u, 12);
		dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_SAMPLER, 15, 3, ~0u, 13);

		dxil_spv_option_sbt_descriptor_size_log2 desc_size =
			{ { DXIL_SPV_OPTION_SBT_DESCRIPTOR_SIZE_LOG2 }, 6, 5 };
		dxil_spv_converter_add_option(converter, &desc_size.base);
	}

	if (remapper.bindless)
	{
		// Dummy mappings.
		for (uint32_t i = 0; i < 64; i++)
			dxil_spv_converter_add_root_parameter_mapping(converter, i, 4 * i);
	}

	if (args.shader_demote)
	{
		const dxil_spv_option_shader_demote_to_helper helper = { { DXIL_SPV_OPTION_SHADER_DEMOTE_TO_HELPER },
			                                                     DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &helper.base);
	}

	if (args.shader_i8_dot)
	{
		const dxil_spv_option_shader_i8_dot helper = { { DXIL_SPV_OPTION_SHADER_I8_DOT },
		                                               DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &helper.base);
	}

	{
		const dxil_spv_option_shader_ray_tracing_primitive_culling helper =
			{ { DXIL_SPV_OPTION_SHADER_RAY_TRACING_PRIMITIVE_CULLING },
			  DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &helper.base);
	}

	if (args.dual_source_blending)
	{
		const dxil_spv_option_dual_source_blending helper = { { DXIL_SPV_OPTION_DUAL_SOURCE_BLENDING }, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &helper.base);
	}

	const dxil_spv_option_output_swizzle swizzle = { { DXIL_SPV_OPTION_OUTPUT_SWIZZLE },
		                                             args.swizzles.data(),
		                                             unsigned(args.swizzles.size()) };
	dxil_spv_converter_add_option(converter, &swizzle.base);

	if (args.root_constant_inline_ubo)
	{
		const dxil_spv_option_root_constant_inline_uniform_block inline_block = {
			{ DXIL_SPV_OPTION_ROOT_CONSTANT_INLINE_UNIFORM_BLOCK },
			args.root_constant_inline_ubo_desc_set,
			args.root_constant_inline_ubo_binding,
			DXIL_SPV_TRUE
		};
		dxil_spv_converter_add_option(converter, &inline_block.base);
	}

	if (args.bindless_cbv_as_ssbo)
	{
		const dxil_spv_option_bindless_cbv_ssbo_emulation cbv = { { DXIL_SPV_OPTION_BINDLESS_CBV_SSBO_EMULATION },
			                                                      DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &cbv.base);
	}

	if (remapper.bindless || !remapper.root_descriptors.empty() || local_root_signature)
	{
		const dxil_spv_option_physical_storage_buffer phys = { { DXIL_SPV_OPTION_PHYSICAL_STORAGE_BUFFER },
			                                                   remapper.bda ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &phys.base);
	}

	{
		dxil_spv_option_ssbo_alignment align = { { DXIL_SPV_OPTION_SSBO_ALIGNMENT }, args.ssbo_alignment };
		dxil_spv_converter_add_option(converter, &align.base);
	}

	{
		dxil_spv_option_typed_uav_read_without_format support = { { DXIL_SPV_OPTION_TYPED_UAV_READ_WITHOUT_FORMAT },
		                                                          args.typed_uav_read_without_format };
		dxil_spv_converter_add_option(converter, &support.base);
	}

	{
		dxil_spv_option_bindless_typed_buffer_offsets offsets = { { DXIL_SPV_OPTION_BINDLESS_TYPED_BUFFER_OFFSETS },
			                                                      args.bindless_typed_buffer_offsets ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &offsets.base);
	}

	{
		dxil_spv_option_storage_input_output_16bit storage = { { DXIL_SPV_OPTION_STORAGE_INPUT_OUTPUT_16BIT },
		                                                       args.storage_input_output_16bit ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &storage.base);
	}

	{
		const dxil_spv_option_descriptor_qa qa = { { DXIL_SPV_OPTION_DESCRIPTOR_QA },
		                                           args.descriptor_qa ? DXIL_SPV_TRUE : DXIL_SPV_FALSE,
		                                           DXIL_SPV_DESCRIPTOR_QA_INTERFACE_VERSION,
		                                           args.descriptor_qa_set, args.descriptor_qa_binding,
		                                           args.descriptor_qa_set, args.descriptor_qa_binding + 1,
		                                           args.shader_hash };
		dxil_spv_converter_add_option(converter, &qa.base);
	}

	{
		const dxil_spv_option_instruction_instrumentation inst = {
			{ DXIL_SPV_OPTION_INSTRUCTION_INSTRUMENTATION },
			args.instruction_instrumentation ? DXIL_SPV_TRUE : DXIL_SPV_FALSE,
			DXIL_SPV_INSTRUCTION_INSTRUMENTATION_INTERFACE_VERSION,
			args.instruction_instrumentation_set, args.instruction_instrumentation_binding,
			args.instruction_instrumentation_set, args.instruction_instrumentation_binding + 1,
			args.shader_hash,
			args.instruction_instrumentation_type,
		};
		dxil_spv_converter_add_option(converter, &inst.base);
	}

	{
		const dxil_spv_option_min_precision_native_16bit minprec = { { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT },
		                                                             args.min_precision_native_16bit ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &minprec.base);
	}

	{
		const dxil_spv_option_invariant_position invariant = { { DXIL_SPV_OPTION_INVARIANT_POSITION },
		                                                       args.invariant_position ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &invariant.base);
	}

	{
		const dxil_spv_option_scalar_block_layout scalar = { { DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT },
			                                                 DXIL_SPV_TRUE, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &scalar.base);
	}

	{
		const dxil_spv_option_barycentric_khr bary = { { DXIL_SPV_OPTION_BARYCENTRIC_KHR },
													   DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &bary.base);
	}

	if (args.robust_physical_cbv_load)
	{
		const dxil_spv_option_robust_physical_cbv_load cbv = { { DXIL_SPV_OPTION_ROBUST_PHYSICAL_CBV_LOAD },
		                                                       DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &cbv.base);
	}

	if (args.allow_arithmetic_relaxed_precision)
	{
		const dxil_spv_option_arithmetic_relaxed_precision relaxed = { { DXIL_SPV_OPTION_ARITHMETIC_RELAXED_PRECISION },
		                                                               DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &relaxed.base);
	}

	{
		const dxil_spv_option_physical_address_descriptor_indexing indexing = { { DXIL_SPV_OPTION_PHYSICAL_ADDRESS_DESCRIPTOR_INDEXING },
		                                                                        args.physical_address_indexing_stride,
		                                                                        args.physical_address_indexing_offset };
		dxil_spv_converter_add_option(converter, &indexing.base);
	}

	{
		const dxil_spv_option_denorm_preserve_support denorm = { { DXIL_SPV_OPTION_DENORM_PRESERVE_SUPPORT },
		                                                         DXIL_SPV_TRUE, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &denorm.base);
	}

	{
		const dxil_spv_option_subgroup_partitioned_nv partitioned = { { DXIL_SPV_OPTION_SUBGROUP_PARTITIONED_NV },
		                                                              args.subgroup_partitioned_nv ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &partitioned.base);
	}

	{
		const dxil_spv_option_dead_code_eliminate eliminate = { { DXIL_SPV_OPTION_DEAD_CODE_ELIMINATE },
		                                                        args.dead_code_eliminate ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &eliminate.base);
	}

	{
		const dxil_spv_option_precise_control precise = { { DXIL_SPV_OPTION_PRECISE_CONTROL },
		                                                  args.force_precise ? DXIL_SPV_TRUE : DXIL_SPV_FALSE,
		                                                  args.propagate_precise ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &precise.base);
	}

	{
		const dxil_spv_option_opacity_micromap omm = { { DXIL_SPV_OPTION_OPACITY_MICROMAP },
		                                               args.opacity_micromap ? DXIL_SPV_TRUE : DXIL_SPV_FALSE };
		dxil_spv_converter_add_option(converter, &omm.base);
	}

	{
		dxil_spv_option_branch_control branch = { { DXIL_SPV_OPTION_BRANCH_CONTROL } };
		branch.force_flatten = args.force_flatten ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
		branch.force_loop = args.force_loop ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
		branch.force_unroll = args.force_unroll ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
		branch.force_branch = args.force_branch ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
		dxil_spv_converter_add_option(converter, &branch.base);
	}

	{
		const dxil_spv_option_subgroup_properties props = { { DXIL_SPV_OPTION_SUBGROUP_PROPERTIES },
		                                                    args.subgroup_size_minimum, args.subgroup_size_maximum };
		dxil_spv_converter_add_option(converter, &props.base);
	}

	if (args.descriptor_heap_robustness)
	{
		const dxil_spv_option_descriptor_heap_robustness robustness = { { DXIL_SPV_OPTION_DESCRIPTOR_HEAP_ROBUSTNESS },
			                                                            DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &robustness.base);
	}

	{
		const dxil_spv_option_compute_shader_derivatives derivs = {
			{ DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES },
			args.compute_shader_derivatives ? DXIL_SPV_TRUE : DXIL_SPV_FALSE,
			args.compute_shader_derivatives ? DXIL_SPV_TRUE : DXIL_SPV_FALSE,
		};
		dxil_spv_converter_add_option(converter, &derivs.base);

		if (args.compute_shader_derivatives)
		{
			const dxil_spv_option_compute_shader_derivatives_quad quad = {
				{ DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES_QUAD }, DXIL_SPV_TRUE,
			};
			dxil_spv_converter_add_option(converter, &quad.base);
		}
	}

	{
		dxil_spv_option_quad_control_reconvergence reconv = {
			{ DXIL_SPV_OPTION_QUAD_CONTROL_RECONVERGENCE } };

		reconv.force_maximal_reconvergence = args.force_maximal_reconvergence ?
		                                     DXIL_SPV_TRUE : DXIL_SPV_FALSE;
		reconv.supports_maximal_reconvergence =
		    (args.quad_control_maximal_reconvergence || args.force_maximal_reconvergence) ?
		        DXIL_SPV_TRUE : DXIL_SPV_FALSE;
		reconv.supports_quad_control = args.quad_control_maximal_reconvergence;
		dxil_spv_converter_add_option(converter, &reconv.base);
	}

	if (args.raw_access_chains_nv)
	{
		const dxil_spv_option_raw_access_chains_nv chain = {
			{ DXIL_SPV_OPTION_RAW_ACCESS_CHAINS_NV }, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &chain.base);
	}

	if (args.extended_robustness)
	{
		dxil_spv_option_extended_robustness robust = {
			{ DXIL_SPV_OPTION_EXTENDED_ROBUSTNESS } };
		robust.robust_constant_lut = DXIL_SPV_TRUE;
		robust.robust_alloca = DXIL_SPV_TRUE;
		dxil_spv_converter_add_option(converter, &robust.base);
	}

	if (args.vkmm)
	{
		dxil_spv_option_extended_robustness vkmm = {
			{ DXIL_SPV_OPTION_VULKAN_MEMORY_MODEL }, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &vkmm.base);
	}

	if (args.wmma_fp8 || args.wmma_nv_coopmat2)
	{
		dxil_spv_option_float8_support wmma = {
			{ DXIL_SPV_OPTION_FLOAT8_SUPPORT }, args.wmma_fp8, args.wmma_nv_coopmat2};
		dxil_spv_converter_add_option(converter, &wmma.base);
	}

	if (args.nvapi)
	{
		dxil_spv_option_nvapi extn = {{ DXIL_SPV_OPTION_NVAPI }};
		extn.enabled = DXIL_SPV_TRUE;
		extn.register_index = args.nvapi_register_index;
		extn.register_space = args.nvapi_register_space;
		dxil_spv_converter_add_option(converter, &extn.base);
	}

	if (args.non_semantic)
	{
		dxil_spv_option_extended_non_semantic sem = {{ DXIL_SPV_OPTION_EXTENDED_NON_SEMANTIC }, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &sem.base);
	}

	if (args.mixed_float_dot_product)
	{
		dxil_spv_option_mixed_float_dot_product mixed = {{ DXIL_SPV_OPTION_MIXED_FLOAT_DOT_PRODUCT }, DXIL_SPV_TRUE };
		dxil_spv_converter_add_option(converter, &mixed.base);
	}

	if (args.view_instancing)
	{
		dxil_spv_option_view_instancing inst = {{ DXIL_SPV_OPTION_VIEW_INSTANCING }};
		inst.enabled = DXIL_SPV_TRUE;
		inst.last_pre_rasterization_stage = args.view_instancing_last_pre_rasterization_stage;
		inst.view_index_to_view_instance_spec_id = args.view_index_to_view_instance_spec_id;
		inst.view_instance_to_viewport_spec_id = args.view_instance_to_viewport_spec_id;
		dxil_spv_converter_add_option(converter, &inst.base);
	}

	for (auto &quirk : args.quirks)
	{
		dxil_spv_option_shader_quirk helper = {
			{ DXIL_SPV_OPTION_SHADER_QUIRK }, quirk };
		dxil_spv_converter_add_option(converter, &helper.base);
	}

	for (auto &meta : args.meta_descriptors)
		dxil_spv_converter_set_meta_descriptor(converter, meta.meta, meta.kind, meta.desc_set, meta.desc_binding);

	dxil_spv_converter_add_option(converter, &args.offset_buffer_layout.base);

	unsigned num_entry_points = 1;
	if (args.debug_all_entry_points)
		dxil_spv_parsed_blob_get_num_entry_points(blob, &num_entry_points);

	std::string final_output;

	for (unsigned entry_point = 0; entry_point < num_entry_points; entry_point++)
	{
		const char *demangled_entry = nullptr;
		if (args.debug_all_entry_points)
		{
			dxil_spv_parsed_blob_get_entry_point_demangled_name(blob, entry_point, &demangled_entry);
			dxil_spv_converter_set_entry_point(converter, demangled_entry);
		}
		else if (!args.entry_point.empty())
			dxil_spv_converter_set_entry_point(converter, args.entry_point.c_str());

		if (dxil_spv_converter_run(converter) != DXIL_SPV_SUCCESS)
		{
			LOGE("Failed to convert DXIL to SPIR-V.\n");
			return EXIT_FAILURE;
		}

		dxil_spv_compiled_spirv compiled;
		if (dxil_spv_converter_get_compiled_spirv(converter, &compiled) != DXIL_SPV_SUCCESS)
			return EXIT_FAILURE;

		unsigned heuristic_min_wave_size = 0;
		unsigned heuristic_max_wave_size = 0;
		unsigned wave_size_min = 0;
		unsigned wave_size_max = 0;
		unsigned wave_size_preferred = 0;
		dxil_spv_converter_get_compute_wave_size_range(converter, &wave_size_min, &wave_size_max, &wave_size_preferred);
		dxil_spv_converter_get_compute_heuristic_min_wave_size(converter, &heuristic_min_wave_size);
		dxil_spv_converter_get_compute_heuristic_max_wave_size(converter, &heuristic_max_wave_size);

		if (args.validate)
		{
			if (!validate_spirv(compiled.data, compiled.size))
			{
				LOGE("Failed to validate SPIR-V.\n");
				return EXIT_FAILURE;
			}
		}

		std::string spirv_asm_string;

		if (args.emit_asm || (!args.glsl && args.output_path.empty()))
		{
			if (wave_size_min)
			{
				spirv_asm_string += "// WaveSize(";
				spirv_asm_string += std::to_string(wave_size_min);
				if (wave_size_max || wave_size_preferred)
					spirv_asm_string += "," + std::to_string(wave_size_max ? wave_size_max : wave_size_min);
				if (wave_size_preferred)
					spirv_asm_string += "," + std::to_string(wave_size_preferred);
				spirv_asm_string += ")\n";
			}

			if (heuristic_min_wave_size)
			{
				spirv_asm_string += "// HeuristicWaveSizeMin(";
				spirv_asm_string += std::to_string(heuristic_min_wave_size);
				spirv_asm_string += ")\n";
			}

			if (heuristic_max_wave_size)
			{
				spirv_asm_string += "// HeuristicWaveSize(";
				spirv_asm_string += std::to_string(heuristic_max_wave_size);
				spirv_asm_string += ")\n";
			}

			dxil_spv_bool compat;
			if (dxil_spv_converter_is_multiview_compatible(converter, &compat) == DXIL_SPV_SUCCESS && compat)
				spirv_asm_string += "// MultiviewCompatible\n";

			if (demangled_entry && !args.glsl)
			{
				spirv_asm_string += "// ========== ";
				spirv_asm_string += demangled_entry;
				spirv_asm_string += " ==========\n";
			}
			spirv_asm_string += convert_to_asm(compiled.data, compiled.size);
			if (demangled_entry && !args.glsl)
				spirv_asm_string += "// ==================\n";
		}

		if (args.glsl)
		{
			auto compiled_glsl = convert_to_glsl(compiled.data, compiled.size);

			if (compiled_glsl.empty())
			{
				LOGE("Failed to convert to GLSL.\n");
				return EXIT_FAILURE;
			}

			const char *warn = dxil_spv_converter_get_analysis_warnings(converter);
			if (warn && *warn != '\0')
			{
				compiled_glsl += "/* WARNINGS:\n";
				compiled_glsl += warn;
				compiled_glsl += "*/\n\n";
			}

			if (!spirv_asm_string.empty())
			{
				compiled_glsl += "\n#if 0\n";
				compiled_glsl += "// SPIR-V disassembly\n";
				compiled_glsl += spirv_asm_string;
				compiled_glsl += "#endif";
			}

			std::string output;
			if (demangled_entry)
			{
				output += "// ========= ";
				output += demangled_entry;
				output += " =========\n";
				output += compiled_glsl;
				output += "\n// =================\n";
			}
			else
				output = std::move(compiled_glsl);

			final_output += output;
		}
		else if (args.emit_asm || args.output_path.empty())
		{
			final_output += spirv_asm_string;
		}
		else
		{
			if (demangled_entry)
			{
				LOGE("Cannot emit binary output when using debug-all-entry-points.\n");
				return EXIT_FAILURE;
			}

			FILE *file = fopen(args.output_path.c_str(), "wb");
			if (file)
			{
				if (fwrite(compiled.data, 1, compiled.size, file) != compiled.size)
				{
					LOGE("Failed to write SPIR-V.\n");
					return EXIT_FAILURE;
				}
				fclose(file);
			}
			else
				LOGE("Failed to open %s.\n", args.output_path.c_str());
		}
	}

	// Dump debug output of RDAT objects if we have them.
	if (args.glsl || args.emit_asm)
	{
		unsigned num_subobjects = dxil_spv_parsed_blob_get_num_rdat_subobjects(blob);
		if (num_subobjects > 0)
		{
			final_output += "\n#if 0\n==== RDAT ====\n";
			for (unsigned i = 0; i < num_subobjects; i++)
			{
				dxil_spv_rdat_subobject obj;
				dxil_spv_parsed_blob_get_rdat_subobject(blob, i, &obj);
				switch (obj.kind)
				{
				case DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG:
					final_output += "ShaderConfig ";
					final_output += obj.subobject_name;
					final_output += " = { flags = ";
					final_output += std::to_string(obj.args[0]);
					final_output += " };\n";
					break;

				case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG:
					final_output += "ShaderConfig ";
					final_output += obj.subobject_name;
					final_output += " = { maxPayloadSize = ";
					final_output += std::to_string(obj.args[0]);
					final_output += ", maxAttributeSize = ";
					final_output += std::to_string(obj.args[1]);
					final_output += " };\n";
					break;

				case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG:
				case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1:
					final_output += "RaytracingPipelineConfig1 ";
					final_output += obj.subobject_name;
					final_output += " = { maxRecursion = ";
					final_output += std::to_string(obj.args[0]);
					final_output += ", flags = ";
					final_output += std::to_string(obj.args[1]);
					final_output += " };\n";
					break;

				case DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE:
				case DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE:
					final_output += obj.kind == DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE ?
							"GlobalRootSignature " : "LocalRootSignature ";
					final_output += obj.subobject_name;
					final_output += " = { ";
					final_output += std::to_string(obj.payload_size);
					final_output += " bytes };\n";
					break;

				case DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP:
					final_output += obj.hit_group_type == DXIL_SPV_HIT_GROUP_TYPE_TRIANGLE ?
							"TriangleHitGroup " : "ProceduralHitGroup ";
					final_output += obj.subobject_name;
					assert(obj.num_exports == 3);
					final_output += " = { ahit = \"";
					final_output += obj.exports[0];
					final_output += "\", chit = \"";
					final_output += obj.exports[1];
					final_output += "\", intersection = \"";
					final_output += obj.exports[2];
					final_output += "\" };\n";
					break;

				case DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION:
					final_output += "SubobjectToExportsAssociation ";
					final_output += obj.subobject_name;
					final_output += " = { ";
					assert(obj.num_exports >= 1);
					final_output += obj.exports[0];
					final_output += ", { ";
					for (unsigned j = 1; j < obj.num_exports; j++)
					{
						final_output += obj.exports[j];
						if (j + 1 < obj.num_exports)
							final_output += ", ";
					}
					final_output += " } };\n";
					break;

				default:
					break;
				}
			}
			final_output += "============\n#endif";
		}
	}

	if (args.output_path.empty())
	{
		printf("%s\n", final_output.c_str());
	}
	else if (!final_output.empty())
	{
		FILE *file = fopen(args.output_path.c_str(), "w");
		if (!file)
		{
			LOGE("Failed to open %s for writing.\n", args.output_path.c_str());
			return EXIT_FAILURE;
		}
		fprintf(file, "%s\n", final_output.c_str());
		fclose(file);
	}

	dxil_spv_converter_free(converter);
	dxil_spv_parsed_blob_free(blob);
	if (reflection_blob)
		dxil_spv_parsed_blob_free(reflection_blob);
	dxil_spv_end_thread_allocator_context();
	return EXIT_SUCCESS;
}


================================================
FILE: dxil_spirv_c.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#define DXIL_SPV_ENABLE_EXPERIMENTAL_WORKGRAPHS
#define DXIL_SPV_ENABLE_EXPERIMENTAL_MULTIVIEW
#include "thread_local_allocator.hpp"
#include "dxil_spirv_c.h"
#include "dxil_converter.hpp"
#include "dxil_parser.hpp"
#include "llvm_bitcode_parser.hpp"
#include "logging.hpp"
#include "spirv_module.hpp"
#include <string.h>
#include <new>

using namespace dxil_spv;

void dxil_spv_get_version(unsigned *major, unsigned *minor, unsigned *patch)
{
	*major = DXIL_SPV_API_VERSION_MAJOR;
	*minor = DXIL_SPV_API_VERSION_MINOR;
	*patch = DXIL_SPV_API_VERSION_PATCH;
}

struct dxil_spv_parsed_blob_s
{
	LLVMBCParser bc;
#ifdef HAVE_LLVMBC
	String disasm;
#else
	std::string disasm;
#endif
	Vector<uint8_t> dxil_blob;
	Vector<RDATSubobject> rdat_subobjects;

	struct EntryPoint
	{
		String mangled;
		String demangled;
		NodeInputData node_input;
		Vector<NodeOutputData> node_outputs;
	};
	Vector<EntryPoint> entry_points;
};

struct Remapper : ResourceRemappingInterface
{
	static void copy_buffer_binding(VulkanBinding &vk_binding, const dxil_spv_vulkan_binding &c_vk_binding)
	{
		vk_binding.descriptor_set = c_vk_binding.set;
		vk_binding.binding = c_vk_binding.binding;
		vk_binding.root_constant_index = c_vk_binding.root_constant_index;
		vk_binding.bindless.use_heap = bool(c_vk_binding.bindless.use_heap);
		vk_binding.bindless.heap_root_offset = c_vk_binding.bindless.heap_root_offset;
		vk_binding.descriptor_type = static_cast<VulkanDescriptorType>(c_vk_binding.descriptor_type);
	}

	bool remap_srv(const D3DBinding &binding, VulkanSRVBinding &vk_binding) override
	{
		if (srv_remapper)
		{
			const dxil_spv_d3d_binding c_binding = { static_cast<dxil_spv_shader_stage>(binding.stage),
				                                     static_cast<dxil_spv_resource_kind>(binding.kind),
				                                     binding.resource_index,
				                                     binding.register_space,
				                                     binding.register_index,
				                                     binding.range_size,
				                                     binding.alignment };

			dxil_spv_srv_vulkan_binding c_vk_binding = {};
			if (srv_remapper(srv_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE)
			{
				copy_buffer_binding(vk_binding.buffer_binding, c_vk_binding.buffer_binding);
				copy_buffer_binding(vk_binding.offset_binding, c_vk_binding.offset_binding);
				return true;
			}
			else
				return false;
		}
		else
		{
			vk_binding.buffer_binding.bindless.use_heap = false;
			vk_binding.buffer_binding.descriptor_set = binding.register_space;
			vk_binding.buffer_binding.binding = binding.register_index;
			vk_binding.buffer_binding.descriptor_type = VulkanDescriptorType::Identity;
			vk_binding.offset_binding = {};
			return true;
		}
	}

	bool remap_sampler(const D3DBinding &binding, VulkanBinding &vk_binding) override
	{
		if (sampler_remapper)
		{
			const dxil_spv_d3d_binding c_binding = { static_cast<dxil_spv_shader_stage>(binding.stage),
				                                     static_cast<dxil_spv_resource_kind>(binding.kind),
				                                     binding.resource_index,
				                                     binding.register_space,
				                                     binding.register_index,
				                                     binding.range_size,
				                                     binding.alignment };

			dxil_spv_vulkan_binding c_vk_binding = {};
			if (sampler_remapper(sampler_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE)
			{
				copy_buffer_binding(vk_binding, c_vk_binding);
				return true;
			}
			else
				return false;
		}
		else
		{
			vk_binding.bindless.use_heap = false;
			vk_binding.descriptor_set = binding.register_space;
			vk_binding.binding = binding.register_index;
			vk_binding.descriptor_type = VulkanDescriptorType::Identity;
			return true;
		}
	}

	bool remap_uav(const D3DUAVBinding &binding, VulkanUAVBinding &vk_binding) override
	{
		if (uav_remapper)
		{
			const dxil_spv_uav_d3d_binding c_binding = {
				{ static_cast<dxil_spv_shader_stage>(binding.binding.stage),
				  static_cast<dxil_spv_resource_kind>(binding.binding.kind), binding.binding.resource_index,
				  binding.binding.register_space, binding.binding.register_index, binding.binding.range_size,
				  binding.binding.alignment },
				binding.counter ? DXIL_SPV_TRUE : DXIL_SPV_FALSE
			};

			dxil_spv_uav_vulkan_binding c_vk_binding = {};
			if (uav_remapper(uav_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE)
			{
				copy_buffer_binding(vk_binding.buffer_binding, c_vk_binding.buffer_binding);
				copy_buffer_binding(vk_binding.counter_binding, c_vk_binding.counter_binding);
				copy_buffer_binding(vk_binding.offset_binding, c_vk_binding.offset_binding);
				return true;
			}
			else
				return false;
		}
		else
		{
			vk_binding.buffer_binding.bindless.use_heap = false;
			vk_binding.counter_binding.bindless.use_heap = false;
			vk_binding.buffer_binding.descriptor_set = binding.binding.register_space;
			vk_binding.buffer_binding.binding = binding.binding.register_index;
			vk_binding.counter_binding.descriptor_set = binding.binding.register_space + 1;
			vk_binding.counter_binding.binding = binding.binding.register_index;
			vk_binding.buffer_binding.descriptor_type = VulkanDescriptorType::Identity;
			vk_binding.counter_binding.descriptor_type = VulkanDescriptorType::Identity;
			vk_binding.offset_binding = {};
			return true;
		}
	}

	bool remap_cbv(const D3DBinding &binding, VulkanCBVBinding &vk_binding) override
	{
		if (cbv_remapper)
		{
			const dxil_spv_d3d_binding c_binding = { static_cast<dxil_spv_shader_stage>(binding.stage),
				                                     static_cast<dxil_spv_resource_kind>(binding.kind),
				                                     binding.resource_index,
				                                     binding.register_space,
				                                     binding.register_index,
				                                     binding.range_size,
				                                     binding.alignment };

			dxil_spv_cbv_vulkan_binding c_vk_binding = {};
			if (cbv_remapper(cbv_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE)
			{
				vk_binding.push_constant = c_vk_binding.push_constant;
				if (vk_binding.push_constant)
					vk_binding.push.offset_in_words = c_vk_binding.vulkan.push_constant.offset_in_words;
				else
					copy_buffer_binding(vk_binding.buffer, c_vk_binding.vulkan.uniform_binding);
				return true;
			}
			else
				return false;
		}
		else
		{
			vk_binding.buffer.bindless.use_heap = false;
			vk_binding.buffer.descriptor_set = binding.register_space;
			vk_binding.buffer.binding = binding.register_index;
			vk_binding.buffer.descriptor_type = VulkanDescriptorType::Identity;
			return true;
		}
	}

	bool remap_vertex_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) override
	{
		dxil_spv_d3d_vertex_input c_input = { d3d_input.semantic, d3d_input.semantic_index, d3d_input.start_row,
			                                  d3d_input.rows };
		dxil_spv_vulkan_vertex_input c_vk_input = {};

		if (input_remapper)
		{
			if (input_remapper(input_userdata, &c_input, &c_vk_input) == DXIL_SPV_TRUE)
			{
				vk_input.location = c_vk_input.location;
				vk_input.component = 0;
				return true;
			}
			else
				return false;
		}
		else
		{
			vk_input.location = d3d_input.start_row;
			return true;
		}
	}

	bool remap_stream_output(const D3DStreamOutput &d3d_output, VulkanStreamOutput &vk_output) override
	{
		dxil_spv_d3d_stream_output c_output = { d3d_output.semantic, d3d_output.semantic_index };
		dxil_spv_vulkan_stream_output c_vk_output = {};

		if (output_remapper)
		{
			if (output_remapper(output_userdata, &c_output, &c_vk_output) == DXIL_SPV_TRUE)
			{
				vk_output.enable = bool(c_vk_output.enable);
				vk_output.offset = c_vk_output.offset;
				vk_output.stride = c_vk_output.stride;
				vk_output.buffer_index = c_vk_output.buffer_index;
				return true;
			}
			else
				return false;
		}
		else
		{
			return true;
		}
	}

	bool remap_stage_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) override
	{
		dxil_spv_d3d_shader_stage_io c_input = { d3d_input.semantic, d3d_input.semantic_index };
		dxil_spv_vulkan_shader_stage_io c_vk_input = { vk_input.location, vk_input.component, vk_input.flags };

		if (stage_input_remapper)
		{
			if (stage_input_remapper(stage_input_userdata, &c_input, &c_vk_input) == DXIL_SPV_TRUE)
			{
				vk_input.location = c_vk_input.location;
				vk_input.component = c_vk_input.component;
				vk_input.flags = c_vk_input.flags;
				return true;
			}
			else
				return false;
		}
		else
		{
			return true;
		}
	}

	bool has_nontrivial_stage_input_remapping() override
	{
		return stage_input_remapper != nullptr;
	}

	bool remap_stage_output(const D3DStageIO &d3d_output, VulkanStageIO &vk_output) override
	{
		dxil_spv_d3d_shader_stage_io c_output = { d3d_output.semantic, d3d_output.semantic_index };
		dxil_spv_vulkan_shader_stage_io c_vk_output = { vk_output.location, vk_output.component, vk_output.flags };

		if (stage_output_remapper)
		{
			if (stage_output_remapper(stage_output_userdata, &c_output, &c_vk_output) == DXIL_SPV_TRUE)
			{
				vk_output.location = c_vk_output.location;
				vk_output.component = c_vk_output.component;
				vk_output.flags = c_vk_output.flags;
				return true;
			}
			else
				return false;
		}
		else
		{
			return true;
		}
	}

	unsigned get_root_constant_word_count() override
	{
		return root_constant_word_count;
	}

	unsigned get_root_descriptor_count() override
	{
		return root_descriptor_count;
	}

	dxil_spv_srv_remapper_cb srv_remapper = nullptr;
	void *srv_userdata = nullptr;

	dxil_spv_sampler_remapper_cb sampler_remapper = nullptr;
	void *sampler_userdata = nullptr;

	dxil_spv_uav_remapper_cb uav_remapper = nullptr;
	void *uav_userdata = nullptr;

	dxil_spv_cbv_remapper_cb cbv_remapper = nullptr;
	void *cbv_userdata = nullptr;

	dxil_spv_vertex_input_remapper_cb input_remapper = nullptr;
	void *input_userdata = nullptr;

	dxil_spv_stream_output_remapper_cb output_remapper = nullptr;
	void *output_userdata = nullptr;

	dxil_spv_shader_stage_io_remapper_cb stage_input_remapper = nullptr;
	void *stage_input_userdata = nullptr;

	dxil_spv_shader_stage_io_remapper_cb stage_output_remapper = nullptr;
	void *stage_output_userdata = nullptr;

	unsigned root_constant_word_count = 0;
	unsigned root_descriptor_count = 0;
};

enum class LocalRootParameterType
{
	Constants,
	Descriptor,
	Table
};

struct LocalConstants
{
	unsigned register_space;
	unsigned register_index;
	unsigned num_words;
};

struct LocalDescriptor
{
	ResourceClass resource_class;
	unsigned register_space;
	unsigned register_index;
};

struct LocalRootParameter
{
	LocalRootParameterType type;
	LocalConstants local_constants;
	LocalDescriptor local_descriptor;
	Vector<DescriptorTableEntry> table_entries;
};

struct dxil_spv_converter_s
{
	dxil_spv_converter_s(LLVMBCParser &bc_parser_, LLVMBCParser *bc_reflection_parser_)
		: bc_parser(bc_parser_), bc_reflection_parser(bc_reflection_parser_)
	{
	}

	LLVMBCParser &bc_parser;
	LLVMBCParser *bc_reflection_parser;
	Vector<uint32_t> spirv;
	String entry_point;
	String compiled_entry_point;
	String analysis_warnings;
	Remapper remapper;

	Vector<LocalRootParameter> local_root_parameters;

	Vector<std::unique_ptr<OptionBase>> options;

	struct MetaDescriptorMapping
	{
		MetaDescriptor meta;
		MetaDescriptorKind kind;
		uint32_t desc_set;
		uint32_t desc_binding;
	};
	Vector<MetaDescriptorMapping> meta_mappings;

	Vector<DescriptorTableEntry> local_entries;
	bool active_table = false;
	bool uses_subgroup_size = false;
	bool is_multiview_compatible = false;
	uint32_t workgroup_size[3] = {};
	uint32_t patch_vertex_count = 0;
    uint32_t patch_location_offset = UINT32_MAX;
	uint32_t wave_size_min = 0;
	uint32_t wave_size_max = 0;
	uint32_t wave_size_preferred = 0;
	uint32_t heuristic_min_wave_size = 0;
	uint32_t heuristic_max_wave_size = 0;
	Vector<std::pair<unsigned, unsigned>> root_parameter_mappings;
	Vector<NonSemanticDebugInfo> non_semantic_debug_info;
	bool shader_feature_used[unsigned(ShaderFeature::Count)] = {};
};

dxil_spv_result dxil_spv_parse_dxil_blob(const void *data, size_t size, dxil_spv_parsed_blob *blob)
{
	auto *parsed = new (std::nothrow) dxil_spv_parsed_blob_s;
	if (!parsed)
		return DXIL_SPV_ERROR_OUT_OF_MEMORY;

	DXILContainerParser parser;
	if (!parser.parse_container(data, size, false))
	{
		delete parsed;
		return DXIL_SPV_ERROR_PARSER;
	}

	parsed->dxil_blob = std::move(parser.get_blob());
	parsed->rdat_subobjects = std::move(parser.get_rdat_subobjects());

	bool success;

	if (parser.is_dxbc_binary())
		success = parsed->bc.parseDXBCBinary(data, size);
	else
		success = parsed->bc.parse(parsed->dxil_blob.data(), parsed->dxil_blob.size());

	if (!success)
	{
		delete parsed;
		return DXIL_SPV_ERROR_PARSER;
	}

	auto names = Converter::get_entry_points(parsed->bc);
	for (auto &name : names)
	{
		parsed->entry_points.push_back({
			name, demangle_entry_point(name),
			Converter::get_node_input(parsed->bc, name.c_str()),
			Converter::get_node_outputs(parsed->bc, name.c_str())
		});
	}

	*blob = parsed;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parse_reflection_dxil_blob(const void *data, size_t size, dxil_spv_parsed_blob *blob)
{
	auto *parsed = new (std::nothrow) dxil_spv_parsed_blob_s;
	if (!parsed)
		return DXIL_SPV_ERROR_OUT_OF_MEMORY;

	DXILContainerParser parser;
	if (!parser.parse_container(data, size, true))
	{
		delete parsed;
		return DXIL_SPV_ERROR_PARSER;
	}

	if (parser.get_blob().empty())
	{
		delete parsed;
		return DXIL_SPV_ERROR_NO_DATA;
	}

	parsed->dxil_blob = std::move(parser.get_blob());

	if (!parsed->bc.parse(parsed->dxil_blob.data(), parsed->dxil_blob.size()))
	{
		delete parsed;
		return DXIL_SPV_ERROR_PARSER;
	}

	*blob = parsed;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parse_dxil(const void *data, size_t size, dxil_spv_parsed_blob *blob)
{
	auto *parsed = new (std::nothrow) dxil_spv_parsed_blob_s;
	if (!parsed)
		return DXIL_SPV_ERROR_OUT_OF_MEMORY;

	if (!parsed->bc.parse(data, size))
	{
		delete parsed;
		return DXIL_SPV_ERROR_PARSER;
	}

	auto names = Converter::get_entry_points(parsed->bc);
	for (auto &name : names)
	{
		parsed->entry_points.push_back({
			name, demangle_entry_point(name),
			Converter::get_node_input(parsed->bc, name.c_str()),
			Converter::get_node_outputs(parsed->bc, name.c_str())
		});
	}

	*blob = parsed;
	return DXIL_SPV_SUCCESS;
}

void dxil_spv_parsed_blob_dump_llvm_ir(dxil_spv_parsed_blob blob)
{
	auto &module = blob->bc.get_module();
#ifdef HAVE_LLVMBC
	String str;
	if (llvm::disassemble(module, str))
		fprintf(stderr, "%s\n", str.c_str());
	else
		fprintf(stderr, "Failed to disassemble LLVM IR!\n");
#else
	module.print(llvm::errs(), nullptr);
#endif
}

dxil_spv_result dxil_spv_parsed_blob_get_disassembled_ir(dxil_spv_parsed_blob blob, const char **str)
{
	blob->disasm.clear();

	auto *module = &blob->bc.get_module();
#ifdef HAVE_LLVMBC
	if (!llvm::disassemble(*module, blob->disasm))
		return DXIL_SPV_ERROR_GENERIC;
#else
	llvm::raw_string_ostream ostr(blob->disasm);
	module->print(ostr, nullptr);
#endif
	*str = blob->disasm.c_str();
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_get_raw_ir(dxil_spv_parsed_blob blob, const void **data, size_t *size)
{
	if (blob->dxil_blob.empty())
		return DXIL_SPV_ERROR_GENERIC;

	*data = blob->dxil_blob.data();
	*size = blob->dxil_blob.size();
	return DXIL_SPV_SUCCESS;
}

dxil_spv_shader_stage dxil_spv_parsed_blob_get_shader_stage(dxil_spv_parsed_blob blob)
{
	return static_cast<dxil_spv_shader_stage>(Converter::get_shader_stage(blob->bc));
}

dxil_spv_shader_stage dxil_spv_parsed_blob_get_shader_stage_for_entry(dxil_spv_parsed_blob blob, const char *entry)
{
	return static_cast<dxil_spv_shader_stage>(Converter::get_shader_stage(blob->bc, entry));
}

dxil_spv_result dxil_spv_parsed_blob_get_entry_index_by_name(dxil_spv_parsed_blob blob,
                                                             const char *entry,
                                                             unsigned *index)
{
    for (size_t i = 0, n = blob->entry_points.size(); i < n; i++)
    {
        if (blob->entry_points[i].demangled == entry || blob->entry_points[i].mangled == entry)
        {
            *index = unsigned(i);
            return DXIL_SPV_SUCCESS;
        }
    }

    return DXIL_SPV_ERROR_GENERIC;
}

dxil_spv_result dxil_spv_parsed_blob_get_num_entry_points(dxil_spv_parsed_blob blob, unsigned *count)
{
	*count = unsigned(blob->entry_points.size());
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_get_entry_point_name(dxil_spv_parsed_blob blob,
                                                          unsigned index,
                                                          const char **mangled_entry)
{
	if (index >= blob->entry_points.size())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;
	*mangled_entry = blob->entry_points[index].mangled.c_str();
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_get_entry_point_demangled_name(dxil_spv_parsed_blob blob,
                                                                    unsigned index,
                                                                    const char **demangled_entry)
{
	if (index >= blob->entry_points.size())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;
	*demangled_entry = blob->entry_points[index].demangled.c_str();
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_get_entry_point_node_input(
    dxil_spv_parsed_blob blob, unsigned index, dxil_spv_node_input_data *data)
{
	if (index >= blob->entry_points.size())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;

	auto &input = blob->entry_points[index].node_input;
	data->node_id = input.node_id.c_str();
	data->payload_stride = input.payload_stride;
	data->launch_type = dxil_spv_node_launch_type(input.launch_type);
	data->node_array_index = input.node_array_index;
	data->dispatch_grid_offset = input.grid_buffer.offset;
	data->dispatch_grid_type_bits = input.grid_buffer.component_type == DXIL::ComponentType::U32 ? 32 : 16;
	data->dispatch_grid_components = input.grid_buffer.count;
	for (int i = 0; i < 3; i++)
	{
		data->broadcast_grid[i] = input.broadcast_grid[i];
		data->thread_group_size_spec_id[i] = input.thread_group_size_spec_id[i];
		data->max_broadcast_grid_spec_id[i] = input.max_broadcast_grid_spec_id[i];
	}
	data->recursion_factor = input.recursion_factor;
	data->coalesce_factor = input.coalesce_factor;
	data->node_share_input_id = input.node_share_input_id.c_str();
	data->node_share_input_array_index = input.node_share_input_array_index;
	data->local_root_arguments_table_index = input.local_root_arguments_table_index;
	data->is_indirect_bda_stride_program_entry_spec_id = input.is_indirect_bda_stride_program_entry_spec_id;
	data->is_entry_point_spec_id = input.is_entry_point_spec_id;
	data->dispatch_grid_is_upper_bound = input.dispatch_grid_is_upper_bound ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
	data->dispatch_grid_is_upper_bound_spec_id = input.dispatch_grid_is_upper_bound_spec_id;
	data->is_static_broadcast_node_spec_id = input.is_static_broadcast_node_spec_id;
	data->node_track_rw_input_sharing = input.node_track_rw_input_sharing ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
	data->is_program_entry = input.is_program_entry ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;

	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_get_entry_point_num_node_outputs(
    dxil_spv_parsed_blob blob, unsigned index, unsigned *num_outputs)
{
	if (index >= blob->entry_points.size())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;

	*num_outputs = unsigned(blob->entry_points[index].node_outputs.size());
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_get_entry_point_node_output(
    dxil_spv_parsed_blob blob, unsigned index, unsigned output_index, dxil_spv_node_output_data *data)
{
	if (index >= blob->entry_points.size())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;

	auto &entry = blob->entry_points[index];
	if (output_index >= entry.node_outputs.size())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;

	auto &output = entry.node_outputs[output_index];

	data->node_id = output.node_id.c_str();
	data->node_array_index = output.node_array_index;
	data->node_array_size = output.node_array_size;
	data->sparse_array = output.sparse_array;
	data->max_records = output.max_records;
	data->node_index_spec_constant_id = output.node_index_spec_constant_id;

	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_parsed_blob_scan_resources(dxil_spv_parsed_blob blob,
                                                    dxil_spv_srv_remapper_cb srv_remapper,
                                                    dxil_spv_sampler_remapper_cb sampler_remapper,
                                                    dxil_spv_cbv_remapper_cb cbv_remapper,
                                                    dxil_spv_uav_remapper_cb uav_remapper, void *userdata)
{
	Remapper remapper;
	remapper.srv_remapper = srv_remapper;
	remapper.srv_userdata = userdata;
	remapper.sampler_remapper = sampler_remapper;
	remapper.sampler_userdata = userdata;
	remapper.cbv_remapper = cbv_remapper;
	remapper.cbv_userdata = userdata;
	remapper.uav_remapper = uav_remapper;
	remapper.uav_userdata = userdata;

	Converter::scan_resources(&remapper, blob->bc);
	return DXIL_SPV_SUCCESS;
}

void dxil_spv_parsed_blob_free(dxil_spv_parsed_blob blob)
{
	delete blob;
}

dxil_spv_result dxil_spv_create_converter_with_reflection(dxil_spv_parsed_blob blob,
                                                          dxil_spv_parsed_blob reflection_blob,
                                                          dxil_spv_converter *converter)
{
	auto *conv = new (std::nothrow) dxil_spv_converter_s(blob->bc, reflection_blob ? &reflection_blob->bc : nullptr);
	if (!conv)
		return DXIL_SPV_ERROR_OUT_OF_MEMORY;

	*converter = conv;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_create_converter(dxil_spv_parsed_blob blob, dxil_spv_converter *converter)
{
	return dxil_spv_create_converter_with_reflection(blob, nullptr, converter);
}

void dxil_spv_converter_free(dxil_spv_converter converter)
{
	delete converter;
}

void dxil_spv_converter_set_entry_point(dxil_spv_converter converter, const char *entry_point)
{
	if (entry_point)
		converter->entry_point = entry_point;
	else
		converter->entry_point.clear();
}

dxil_spv_result dxil_spv_converter_run(dxil_spv_converter converter)
{
	SPIRVModule module;
	Converter dxil_converter(converter->bc_parser, converter->bc_reflection_parser, module);

	if (!converter->entry_point.empty())
		dxil_converter.set_entry_point(converter->entry_point.c_str());
	dxil_converter.set_resource_remapping_interface(&converter->remapper);
	for (auto &opt : converter->options)
		dxil_converter.add_option(*opt);

	if (converter->patch_location_offset != UINT32_MAX)
		dxil_converter.set_patch_location_offset(converter->patch_location_offset);

	for (auto &mapping : converter->root_parameter_mappings)
		dxil_converter.add_root_parameter_mapping(mapping.first, mapping.second);

	for (auto &info : converter->non_semantic_debug_info)
		dxil_converter.add_non_semantic_debug_info(info);

	for (auto &mapping : converter->meta_mappings)
		dxil_converter.set_meta_descriptor(mapping.meta, mapping.kind, mapping.desc_set, mapping.desc_binding);

	for (auto &local_param : converter->local_root_parameters)
	{
		switch (local_param.type)
		{
		case LocalRootParameterType::Constants:
			dxil_converter.add_local_root_constants(local_param.local_constants.register_space,
			                                        local_param.local_constants.register_index,
			                                        local_param.local_constants.num_words);
			break;

		case LocalRootParameterType::Descriptor:
			dxil_converter.add_local_root_descriptor(local_param.local_descriptor.resource_class,
			                                         local_param.local_descriptor.register_space,
			                                         local_param.local_descriptor.register_index);
			break;

		case LocalRootParameterType::Table:
			dxil_converter.add_local_root_descriptor_table(local_param.table_entries);
			break;
		}
	}

	auto entry_point = dxil_converter.convert_entry_point();

	if (entry_point.entry.entry == nullptr)
	{
		LOGE("Failed to convert function.\n");
		return DXIL_SPV_ERROR_GENERIC;
	}

	{
		dxil_spv::CFGStructurizer structurizer(entry_point.entry.entry, *entry_point.node_pool, module);

		module.set_entry_build_point(entry_point.entry.func);

		if (entry_point.entry.is_structured)
			structurizer.run_trivial();
		else
			structurizer.run();

		module.emit_entry_point_function_body(structurizer);
	}

	for (auto &leaf : entry_point.leaf_functions)
	{
		if (!leaf.entry)
		{
			LOGE("Leaf function is nullptr!\n");
			return DXIL_SPV_ERROR_GENERIC;
		}
		dxil_spv::CFGStructurizer structurizer(leaf.entry, *entry_point.node_pool, module);
		module.set_entry_build_point(leaf.func);

		if (leaf.is_structured)
			structurizer.run_trivial();
		else
			structurizer.run();

		module.emit_leaf_function_body(leaf.func, structurizer);
	}

	if (!module.finalize_spirv(converter->spirv))
	{
		LOGE("Failed to finalize SPIR-V.\n");
		return DXIL_SPV_ERROR_GENERIC;
	}

	converter->compiled_entry_point = dxil_converter.get_compiled_entry_point();
	converter->uses_subgroup_size = module.has_builtin_shader_input(spv::BuiltInSubgroupSize);
	converter->is_multiview_compatible = dxil_converter.is_multiview_compatible();
	dxil_converter.get_workgroup_dimensions(converter->workgroup_size[0],
	                                        converter->workgroup_size[1],
	                                        converter->workgroup_size[2]);
	dxil_converter.get_compute_wave_size_range(converter->wave_size_min, converter->wave_size_max, converter->wave_size_preferred);
	converter->heuristic_min_wave_size = dxil_converter.get_compute_heuristic_min_wave_size();
	converter->heuristic_max_wave_size = dxil_converter.get_compute_heuristic_max_wave_size();
	converter->patch_vertex_count = dxil_converter.get_patch_vertex_count();
	converter->patch_location_offset = dxil_converter.get_patch_location_offset();
	for (int i = 0; i < int(ShaderFeature::Count); i++)
		converter->shader_feature_used[i] = dxil_converter.shader_requires_feature(ShaderFeature(i));
	converter->analysis_warnings = dxil_converter.get_analysis_warnings();

	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_compiled_spirv(dxil_spv_converter converter, dxil_spv_compiled_spirv *compiled)
{
	if (converter->spirv.empty())
		return DXIL_SPV_ERROR_GENERIC;

	compiled->data = converter->spirv.data();
	compiled->size = converter->spirv.size() * sizeof(uint32_t);
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_compiled_entry_point(dxil_spv_converter converter,
                                                            const char **entry_point)
{
	if (converter->spirv.empty())
		return DXIL_SPV_ERROR_GENERIC;

	*entry_point = converter->compiled_entry_point.c_str();
	return DXIL_SPV_SUCCESS;
}

void dxil_spv_converter_set_srv_remapper(dxil_spv_converter converter, dxil_spv_srv_remapper_cb remapper,
                                         void *userdata)
{
	converter->remapper.srv_remapper = remapper;
	converter->remapper.srv_userdata = userdata;
}

void dxil_spv_converter_set_sampler_remapper(dxil_spv_converter converter, dxil_spv_sampler_remapper_cb remapper,
                                             void *userdata)
{
	converter->remapper.sampler_remapper = remapper;
	converter->remapper.sampler_userdata = userdata;
}

void dxil_spv_converter_set_root_constant_word_count(dxil_spv_converter converter, unsigned num_words)
{
	converter->remapper.root_constant_word_count = num_words;
}

void dxil_spv_converter_set_root_descriptor_count(dxil_spv_converter converter, unsigned count)
{
	converter->remapper.root_descriptor_count = count;
}

void dxil_spv_converter_set_uav_remapper(dxil_spv_converter converter, dxil_spv_uav_remapper_cb remapper,
                                         void *userdata)
{
	converter->remapper.uav_remapper = remapper;
	converter->remapper.uav_userdata = userdata;
}

void dxil_spv_converter_set_cbv_remapper(dxil_spv_converter converter, dxil_spv_cbv_remapper_cb remapper,
                                         void *userdata)
{
	converter->remapper.cbv_remapper = remapper;
	converter->remapper.cbv_userdata = userdata;
}

void dxil_spv_converter_set_stage_input_remapper(dxil_spv_converter converter,
                                                 dxil_spv_shader_stage_io_remapper_cb remapper, void *userdata)
{
	converter->remapper.stage_input_remapper = remapper;
	converter->remapper.stage_input_userdata = userdata;
}

void dxil_spv_converter_set_stage_output_remapper(dxil_spv_converter converter,
                                                  dxil_spv_shader_stage_io_remapper_cb remapper, void *userdata)
{
	converter->remapper.stage_output_remapper = remapper;
	converter->remapper.stage_output_userdata = userdata;
}

void dxil_spv_converter_set_vertex_input_remapper(dxil_spv_converter converter,
                                                  dxil_spv_vertex_input_remapper_cb remapper, void *userdata)
{
	converter->remapper.input_remapper = remapper;
	converter->remapper.input_userdata = userdata;
}

void dxil_spv_converter_set_stream_output_remapper(dxil_spv_converter converter,
                                                   dxil_spv_stream_output_remapper_cb remapper, void *userdata)
{
	converter->remapper.output_remapper = remapper;
	converter->remapper.output_userdata = userdata;
}

/* Useful to check if the implementation recognizes a particular capability for ABI compatibility. */
dxil_spv_bool dxil_spv_converter_supports_option(dxil_spv_option cap)
{
	return Converter::recognizes_option(static_cast<Option>(cap)) ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
}

template <typename T>
static std::unique_ptr<T> duplicate(const T &value)
{
	return std::unique_ptr<T>(new T(value));
}

dxil_spv_result dxil_spv_converter_add_option(dxil_spv_converter converter, const dxil_spv_option_base *option)
{
	if (!dxil_spv_converter_supports_option(option->type))
		return DXIL_SPV_ERROR_UNSUPPORTED_FEATURE;

	switch (option->type)
	{
	case DXIL_SPV_OPTION_SHADER_DEMOTE_TO_HELPER:
	{
		OptionShaderDemoteToHelper helper;
		helper.supported = bool(reinterpret_cast<const dxil_spv_option_shader_demote_to_helper *>(option)->supported);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_DUAL_SOURCE_BLENDING:
	{
		OptionDualSourceBlending helper;
		helper.enabled = bool(reinterpret_cast<const dxil_spv_option_dual_source_blending *>(option)->enabled);
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_OUTPUT_SWIZZLE:
	{
		OptionOutputSwizzle helper;
		const auto *input = reinterpret_cast<const dxil_spv_option_output_swizzle *>(option);
		helper.swizzles = input->swizzles;
		helper.swizzle_count = input->swizzle_count;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_RASTERIZER_SAMPLE_COUNT:
	{
		OptionRasterizerSampleCount helper;
		const auto *count = reinterpret_cast<const dxil_spv_option_rasterizer_sample_count *>(option);
		helper.count = count->sample_count;
		helper.spec_constant = bool(count->spec_constant);
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_ROOT_CONSTANT_INLINE_UNIFORM_BLOCK:
	{
		OptionRootConstantInlineUniformBlock helper;
		const auto *ubo = reinterpret_cast<const dxil_spv_option_root_constant_inline_uniform_block *>(option);
		helper.desc_set = ubo->desc_set;
		helper.binding = ubo->binding;
		helper.enable = ubo->enable == DXIL_SPV_TRUE;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_BINDLESS_CBV_SSBO_EMULATION:
	{
		OptionBindlessCBVSSBOEmulation helper;
		helper.enable =
		    reinterpret_cast<const dxil_spv_option_bindless_cbv_ssbo_emulation *>(option)->enable == DXIL_SPV_TRUE;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_PHYSICAL_STORAGE_BUFFER:
	{
		OptionPhysicalStorageBuffer helper;
		helper.enable =
		    reinterpret_cast<const dxil_spv_option_physical_storage_buffer *>(option)->enable == DXIL_SPV_TRUE;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SBT_DESCRIPTOR_SIZE_LOG2:
	{
		OptionSBTDescriptorSizeLog2 helper;
		helper.size_log2_srv_uav_cbv = reinterpret_cast<const dxil_spv_option_sbt_descriptor_size_log2 *>(option)->size_log2_srv_uav_cbv;
		helper.size_log2_sampler = reinterpret_cast<const dxil_spv_option_sbt_descriptor_size_log2 *>(option)->size_log2_sampler;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SSBO_ALIGNMENT:
	{
		OptionSSBOAlignment helper;
		helper.alignment = reinterpret_cast<const dxil_spv_option_ssbo_alignment *>(option)->alignment;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_TYPED_UAV_READ_WITHOUT_FORMAT:
	{
		OptionTypedUAVReadWithoutFormat helper;
		helper.supported = reinterpret_cast<const dxil_spv_option_typed_uav_read_without_format *>(option)->supported == DXIL_SPV_TRUE;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SHADER_SOURCE_FILE:
	{
		OptionShaderSourceFile helper;
		const char *name = reinterpret_cast<const dxil_spv_option_shader_source_file *>(option)->name;
		if (name)
			helper.name = name;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_BINDLESS_TYPED_BUFFER_OFFSETS:
	{
		OptionBindlessTypedBufferOffsets helper;
		helper.enable = reinterpret_cast<const dxil_spv_option_bindless_typed_buffer_offsets *>(option)->enable;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_BINDLESS_OFFSET_BUFFER_LAYOUT:
	{
		OptionBindlessOffsetBufferLayout helper;
		auto *opt = reinterpret_cast<const dxil_spv_option_bindless_offset_buffer_layout *>(option);
		helper.untyped_offset = opt->untyped_offset;
		helper.typed_offset = opt->typed_offset;
		helper.stride = opt->stride;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_STORAGE_INPUT_OUTPUT_16BIT:
	{
		OptionStorageInputOutput16 helper;
		helper.supported =
		    reinterpret_cast<const dxil_spv_option_storage_input_output_16bit *>(option)->supported == DXIL_SPV_TRUE;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_DESCRIPTOR_QA:
	{
		OptionDescriptorQA helper;
		auto *qa = reinterpret_cast<const dxil_spv_option_descriptor_qa *>(option);
		helper.enabled = qa->enabled == DXIL_SPV_TRUE;
		helper.shader_hash = qa->shader_hash;
		helper.global_desc_set = qa->global_desc_set;
		helper.global_binding = qa->global_binding;
		helper.heap_desc_set = qa->heap_desc_set;
		helper.heap_binding = qa->heap_binding;
		helper.version = qa->version;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT:
	{
		OptionMinPrecisionNative16Bit helper;
		auto *minprec = reinterpret_cast<const dxil_spv_option_min_precision_native_16bit *>(option);
		helper.enabled = minprec->enabled == DXIL_SPV_TRUE;
		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SHADER_I8_DOT:
	{
		OptionShaderI8Dot helper;
		helper.supported = bool(reinterpret_cast<const dxil_spv_option_shader_i8_dot *>(option)->supported);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SHADER_RAY_TRACING_PRIMITIVE_CULLING:
	{
		OptionShaderRayTracingPrimitiveCulling helper;
		helper.supported = bool(reinterpret_cast<const dxil_spv_option_shader_ray_tracing_primitive_culling *>(option)->supported);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_INVARIANT_POSITION:
	{
		OptionInvariantPosition helper;
		helper.enabled = bool(reinterpret_cast<const dxil_spv_option_invariant_position *>(option)->enabled);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT:
	{
		OptionScalarBlockLayout helper;
		auto *opt = reinterpret_cast<const dxil_spv_option_scalar_block_layout *>(option);
		helper.supported = bool(opt->supported);
		helper.supports_per_component_robustness = bool(opt->supports_per_component_robustness);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_BARYCENTRIC_KHR:
	{
		OptionBarycentricKHR helper;
		auto *opt = reinterpret_cast<const dxil_spv_option_barycentric_khr *>(option);
		helper.supported = bool(opt->supported);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_ROBUST_PHYSICAL_CBV_LOAD:
	{
		OptionRobustPhysicalCBVLoad helper;
		auto *robust = reinterpret_cast<const dxil_spv_option_robust_physical_cbv_load *>(option);
		helper.enabled = bool(robust->enabled);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_ARITHMETIC_RELAXED_PRECISION:
	{
		OptionArithmeticRelaxedPrecision helper;
		auto *robust = reinterpret_cast<const dxil_spv_option_arithmetic_relaxed_precision *>(option);
		helper.enabled = bool(robust->enabled);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_PHYSICAL_ADDRESS_DESCRIPTOR_INDEXING:
	{
		OptionPhysicalAddressDescriptorIndexing helper;
		auto *indexing = reinterpret_cast<const dxil_spv_option_physical_address_descriptor_indexing *>(option);
		helper.element_stride = indexing->element_stride;
		helper.element_offset = indexing->element_offset;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_FORCE_SUBGROUP_SIZE:
	{
		OptionForceSubgroupSize helper;
		auto *subgroup = reinterpret_cast<const dxil_spv_option_force_subgroup_size *>(option);
		helper.forced_value = subgroup->forced_value;
		helper.wave_size_enable = subgroup->wave_size_enable;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_DENORM_PRESERVE_SUPPORT:
	{
		OptionDenormPreserveSupport helper;
		auto *denorm = reinterpret_cast<const dxil_spv_option_denorm_preserve_support *>(option);
		helper.support_float16_denorm_preserve = bool(denorm->supports_float16_denorm_preserve);
		helper.support_float64_denorm_preserve = bool(denorm->supports_float64_denorm_preserve);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_STRICT_HELPER_LANE_WAVE_OPS:
	{
		OptionStrictHelperLaneWaveOps helper;
		auto *strict = reinterpret_cast<const dxil_spv_option_strict_helper_lane_wave_ops *>(option);
		helper.enable = strict->enable;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SUBGROUP_PARTITIONED_NV:
	{
		OptionSubgroupPartitionedNV helper;
		auto *partitioned = reinterpret_cast<const dxil_spv_option_subgroup_partitioned_nv *>(option);
		helper.supported = partitioned->supported;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_DEAD_CODE_ELIMINATE:
	{
		OptionDeadCodeEliminate helper;
		auto *eliminate = reinterpret_cast<const dxil_spv_option_dead_code_eliminate *>(option);
		helper.enabled = eliminate->enabled;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_PRECISE_CONTROL:
	{
		OptionPreciseControl helper;
		auto *precise = reinterpret_cast<const dxil_spv_option_precise_control *>(option);
		helper.force_precise = precise->force_precise;
		helper.propagate_precise = precise->propagate_precise;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SAMPLE_GRAD_OPTIMIZATION_CONTROL:
	{
		OptionSampleGradOptimizationControl helper;
		auto *precise = reinterpret_cast<const dxil_spv_option_sample_grad_optimization_control *>(option);
		helper.enabled = precise->enabled;
		helper.assume_uniform_scale = precise->assume_uniform_scale;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_OPACITY_MICROMAP:
	{
		OptionOpacityMicromap helper;
		auto *omm = reinterpret_cast<const dxil_spv_option_opacity_micromap *>(option);
		helper.enabled = omm->enabled;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_BRANCH_CONTROL:
	{
		OptionBranchControl helper;
		auto *branch = reinterpret_cast<const dxil_spv_option_branch_control *>(option);
		helper.use_shader_metadata = branch->use_shader_metadata;
		helper.force_branch = branch->force_branch;
		helper.force_loop = branch->force_loop;
		helper.force_unroll = branch->force_unroll;
		helper.force_flatten = branch->force_flatten;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SUBGROUP_PROPERTIES:
	{
		OptionSubgroupProperties helper;
		auto *sub = reinterpret_cast<const dxil_spv_option_subgroup_properties *>(option);
		helper.minimum_size = sub->minimum_size;
		helper.maximum_size = sub->maximum_size;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_DESCRIPTOR_HEAP_ROBUSTNESS:
	{
		OptionDescriptorHeapRobustness helper;
		auto *rob = reinterpret_cast<const dxil_spv_option_descriptor_heap_robustness *>(option);
		helper.enabled = rob->enabled;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES_NV:
	{
		OptionComputeShaderDerivativesNV helper;
		auto *deriv = reinterpret_cast<const dxil_spv_option_compute_shader_derivatives_nv *>(option);
		helper.supported = deriv->supported;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_QUAD_CONTROL_RECONVERGENCE:
	{
		OptionQuadControlReconvergence helper;
		auto *quad = reinterpret_cast<const dxil_spv_option_quad_control_reconvergence *>(option);
		helper.supports_quad_control = quad->supports_quad_control;
		helper.supports_maximal_reconvergence = quad->supports_maximal_reconvergence;
		helper.force_maximal_reconvergence = quad->force_maximal_reconvergence;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_RAW_ACCESS_CHAINS_NV:
	{
		OptionRawAccessChainsNV helper;
		auto *chain = reinterpret_cast<const dxil_spv_option_raw_access_chains_nv *>(option);
		helper.supported = chain->supported;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_DRIVER_VERSION:
	{
		OptionDriverVersion helper;
		auto *ver = reinterpret_cast<const dxil_spv_option_driver_version *>(option);
		helper.driver_id = ver->driver_id;
		helper.driver_version = ver->driver_version;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES:
	{
		OptionComputeShaderDerivatives helper;
		auto *deriv = reinterpret_cast<const dxil_spv_option_compute_shader_derivatives *>(option);
		helper.supports_nv = deriv->supports_nv;
		helper.supports_khr = deriv->supports_khr;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_INSTRUCTION_INSTRUMENTATION:
	{
		OptionInstructionInstrumentation helper;
		auto *inst = reinterpret_cast<const dxil_spv_option_instruction_instrumentation *>(option);
		helper.enabled = inst->enabled;
		helper.version = inst->version;
		helper.control_desc_set = inst->control_desc_set;
		helper.control_binding = inst->control_binding;
		helper.payload_desc_set = inst->payload_desc_set;
		helper.payload_binding = inst->payload_binding;
		helper.shader_hash = inst->shader_hash;
		helper.type = InstructionInstrumentationType(inst->type);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_SHADER_QUIRK:
	{
		OptionShaderQuirk helper;
		auto *quirk = reinterpret_cast<const dxil_spv_option_shader_quirk *>(option);
		helper.quirk = static_cast<ShaderQuirk>(quirk->quirk);

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_EXTENDED_ROBUSTNESS:
	{
		OptionExtendedRobustness helper;
		auto *robust = reinterpret_cast<const dxil_spv_option_extended_robustness *>(option);
		helper.robust_group_shared = robust->robust_group_shared;
		helper.robust_alloca = robust->robust_alloca;
		helper.robust_constant_lut = robust->robust_constant_lut;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_MAX_TESS_FACTOR:
	{
		OptionMaxTessFactor helper;
		auto *tess_factor = reinterpret_cast<const dxil_spv_option_max_tess_factor *>(option);
		helper.max_tess_factor = tess_factor->max_tess_factor;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_VULKAN_MEMORY_MODEL:
	{
		OptionVulkanMemoryModel helper;
		auto *vkmm = reinterpret_cast<const dxil_spv_option_vulkan_memory_model *>(option);
		helper.enabled = vkmm->enabled;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_FLOAT8_SUPPORT:
	{
		OptionFloat8Support helper;
		auto *float8 = reinterpret_cast<const dxil_spv_option_float8_support *>(option);
		helper.wmma_fp8 = float8->wmma_fp8;
		helper.nv_cooperative_matrix2_conversions = float8->nv_cooperative_matrix2_conversions;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_NVAPI:
	{
		OptionNvAPI helper;
		auto *nvapi = reinterpret_cast<const dxil_spv_option_nvapi *>(option);
		helper.enabled = nvapi->enabled;
		helper.register_index = nvapi->register_index;
		helper.register_space = nvapi->register_space;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_EXTENDED_NON_SEMANTIC:
	{
		OptionExtendedNonSemantic helper;
		auto *sem = reinterpret_cast<const dxil_spv_option_extended_non_semantic *>(option);
		helper.enabled = sem->enabled;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_VIEW_INSTANCING:
	{
		OptionViewInstancing helper;
		auto *inst = reinterpret_cast<const dxil_spv_option_view_instancing *>(option);
		helper.enabled = inst->enabled == DXIL_SPV_TRUE;
		helper.last_pre_rasterization_stage = inst->last_pre_rasterization_stage == DXIL_SPV_TRUE;
		helper.view_index_to_view_instance_spec_id = inst->view_index_to_view_instance_spec_id;
		helper.view_instance_to_viewport_spec_id = inst->view_instance_to_viewport_spec_id;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_MIXED_FLOAT_DOT_PRODUCT:
	{
		OptionMixedDotProduct helper;
		auto *dot = reinterpret_cast<const dxil_spv_option_mixed_float_dot_product *>(option);
		helper.fp16_fp16_fp32 = dot->fp16_fp16_fp32;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	case DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES_QUAD:
	{
		OptionComputeShaderDerivativesQuad helper;
		auto *deriv = reinterpret_cast<const dxil_spv_option_compute_shader_derivatives_quad *>(option);
		helper.supports_quad = deriv->supports_quad == DXIL_SPV_TRUE;

		converter->options.emplace_back(duplicate(helper));
		break;
	}

	default:
		return DXIL_SPV_ERROR_UNSUPPORTED_FEATURE;
	}

	return DXIL_SPV_SUCCESS;
}

void dxil_spv_converter_add_local_root_constants(dxil_spv_converter converter,
                                                 unsigned register_space,
                                                 unsigned register_index,
                                                 unsigned num_words)
{
	LocalRootParameter param = {};
	param.type = LocalRootParameterType::Constants;
	param.local_constants = { register_space, register_index, num_words };
	converter->local_root_parameters.push_back(std::move(param));
}

void dxil_spv_converter_add_local_root_descriptor(dxil_spv_converter converter,
                                                  dxil_spv_resource_class resource_class,
                                                  unsigned register_space,
                                                  unsigned register_index)
{
	LocalRootParameter param = {};
	param.type = LocalRootParameterType::Descriptor;
	param.local_descriptor = { ResourceClass(resource_class), register_space, register_index };
	converter->local_root_parameters.push_back(std::move(param));
}

void dxil_spv_converter_add_local_root_descriptor_table(dxil_spv_converter converter,
                                                        dxil_spv_resource_class resource_class,
                                                        unsigned register_space,
                                                        unsigned register_index,
                                                        unsigned num_descriptors_in_range,
                                                        unsigned offset_in_heap)
{
	DescriptorTableEntry entry = {};
	entry.type = ResourceClass(resource_class);
	entry.register_space = register_space;
	entry.register_index = register_index;
	entry.num_descriptors_in_range = num_descriptors_in_range;
	entry.offset_in_heap = offset_in_heap;

	if (converter->active_table)
		converter->local_entries.push_back(entry);
	else
	{
		LocalRootParameter param = {};
		param.type = LocalRootParameterType::Table;
		param.table_entries = { entry };
		converter->local_root_parameters.push_back(std::move(param));
	}
}

dxil_spv_result dxil_spv_converter_begin_local_root_descriptor_table(
	dxil_spv_converter converter)
{
	if (converter->active_table)
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;

	converter->local_entries = {};
	converter->active_table = true;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_end_local_root_descriptor_table(
	dxil_spv_converter converter)
{
	if (!converter->active_table || converter->local_entries.empty())
		return DXIL_SPV_ERROR_INVALID_ARGUMENT;

	LocalRootParameter param = {};
	param.type = LocalRootParameterType::Table;
	std::swap(param.table_entries, converter->local_entries);
	converter->local_root_parameters.push_back(std::move(param));

	converter->active_table = false;
	return DXIL_SPV_SUCCESS;
}

void dxil_spv_converter_add_root_parameter_mapping(
	dxil_spv_converter converter, unsigned root_parameter_index, unsigned offset)
{
	converter->root_parameter_mappings.emplace_back(root_parameter_index, offset);
}

void dxil_spv_converter_add_root_descriptor_mapping(
    dxil_spv_converter converter, unsigned root_parameter_index,
    unsigned desc_set, unsigned binding)
{
    converter->root_parameter_mappings.emplace_back(
            root_parameter_index,
            Converter::pack_desc_set_binding_to_virtual_offset(desc_set, binding));
}

void dxil_spv_converter_add_non_semantic_debug_info(
	dxil_spv_converter converter, const char *tag, const void *data, size_t size)
{
	converter->non_semantic_debug_info.push_back({ tag, data, size });
}

void dxil_spv_converter_set_patch_location_offset(
		dxil_spv_converter converter, unsigned offset)
{
	converter->patch_location_offset = offset;
}

unsigned dxil_spv_parsed_blob_get_num_rdat_subobjects(dxil_spv_parsed_blob blob)
{
	return unsigned(blob->rdat_subobjects.size());
}

void dxil_spv_parsed_blob_get_rdat_subobject(
		dxil_spv_parsed_blob blob, unsigned index, dxil_spv_rdat_subobject *subobject)
{
	auto &sub = blob->rdat_subobjects[index];
	subobject->kind = static_cast<dxil_spv_rdat_subobject_kind>(sub.kind);
	subobject->subobject_name = sub.subobject_name;
	subobject->exports = sub.exports.data();
	subobject->num_exports = unsigned(sub.exports.size());
	subobject->payload = sub.payload;
	subobject->payload_size = sub.payload_size;
	subobject->hit_group_type = static_cast<dxil_spv_hit_group_type>(sub.hit_group_type);
	static_assert(sizeof(subobject->args) == sizeof(sub.args), "Mismatch is args size.");
	memcpy(subobject->args, sub.args, sizeof(sub.args));
}

dxil_spv_bool dxil_spv_converter_uses_subgroup_size(dxil_spv_converter converter)
{
	return converter->uses_subgroup_size ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
}

dxil_spv_result dxil_spv_converter_get_compute_workgroup_dimensions(
	dxil_spv_converter converter,
	unsigned *x, unsigned *y, unsigned *z)
{
	*x = converter->workgroup_size[0];
	*y = converter->workgroup_size[1];
	*z = converter->workgroup_size[2];
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_compute_required_wave_size(
	dxil_spv_converter converter, unsigned *wave_size)
{
	/* This API cannot express ranged subgroup sizes */
	if (converter->wave_size_max && converter->wave_size_min != converter->wave_size_max)
		return DXIL_SPV_ERROR_GENERIC;

	*wave_size = converter->wave_size_min;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_compute_wave_size_range(
	dxil_spv_converter converter, unsigned *wave_size_min, unsigned *wave_size_max, unsigned *wave_size_preferred)
{
	*wave_size_min = converter->wave_size_min;
	*wave_size_max = converter->wave_size_max;
	*wave_size_preferred = converter->wave_size_preferred;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_compute_heuristic_max_wave_size(
    dxil_spv_converter converter, unsigned *wave_size)
{
	*wave_size = converter->heuristic_max_wave_size;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_compute_heuristic_min_wave_size(
    dxil_spv_converter converter, unsigned *wave_size)
{
	*wave_size = converter->heuristic_min_wave_size;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_patch_vertex_count(
		dxil_spv_converter converter, unsigned *patch_vertices)
{
	*patch_vertices = converter->patch_vertex_count;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_get_patch_location_offset(
		dxil_spv_converter converter, unsigned *patch_location_offset)
{
	*patch_location_offset = converter->patch_location_offset;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_bool dxil_spv_converter_uses_shader_feature(
		dxil_spv_converter converter, dxil_spv_shader_feature feature)
{
	if (feature < int(ShaderFeature::Count))
		return converter->shader_feature_used[feature] ? DXIL_SPV_TRUE : DXIL_SPV_FALSE;
	else
		return DXIL_SPV_FALSE;
}

dxil_spv_result dxil_spv_converter_is_multiview_compatible(
		dxil_spv_converter converter, dxil_spv_bool *result)
{
	*result = converter->is_multiview_compatible;
	return DXIL_SPV_SUCCESS;
}

dxil_spv_result dxil_spv_converter_set_meta_descriptor(
		dxil_spv_converter converter, dxil_spv_meta_descriptor meta,
		dxil_spv_meta_descriptor_kind kind, unsigned desc_set, unsigned binding_or_push_index)
{
	converter->meta_mappings.push_back({ MetaDescriptor(meta), MetaDescriptorKind(kind), desc_set, binding_or_push_index });
	return DXIL_SPV_SUCCESS;
}

const char *dxil_spv_converter_get_analysis_warnings(dxil_spv_converter converter)
{
	if (converter->analysis_warnings.empty())
		return nullptr;
	return converter->analysis_warnings.c_str();
}

void dxil_spv_begin_thread_allocator_context(void)
{
	begin_thread_allocator_context();
}

void dxil_spv_end_thread_allocator_context(void)
{
	end_thread_allocator_context();
}

void dxil_spv_reset_thread_allocator_context(void)
{
	reset_thread_allocator_context();
}

static thread_local dxil_spv_log_cb c_callback_wrapper;
static void c_callback_wrapper_trampoline(void *userdata, dxil_spv::LogLevel level, const char *msg)
{
	if (c_callback_wrapper)
		c_callback_wrapper(userdata, dxil_spv_log_level(level), msg);
}

void dxil_spv_set_thread_log_callback(dxil_spv_log_cb callback, void *userdata)
{
	c_callback_wrapper = callback;
	dxil_spv::set_thread_log_callback(c_callback_wrapper_trampoline, userdata);
}


================================================
FILE: dxil_spirv_c.h
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef DXIL_SPV_C_API_H
#define DXIL_SPV_C_API_H

#include <stddef.h>

/* C89-compatible wrapper for dxil_spv. */

#ifdef __cplusplus
extern "C" {
#endif

#define DXIL_SPV_API_VERSION_MAJOR 2
#define DXIL_SPV_API_VERSION_MINOR 66
#define DXIL_SPV_API_VERSION_PATCH 0

#define DXIL_SPV_DESCRIPTOR_QA_INTERFACE_VERSION 2
#define DXIL_SPV_INSTRUCTION_INSTRUMENTATION_INTERFACE_VERSION 2

#if !defined(DXIL_SPV_PUBLIC_API)
#if defined(DXIL_SPV_EXPORT_SYMBOLS)
#if defined(__GNUC__)
#define DXIL_SPV_PUBLIC_API __attribute__((visibility("default")))
#elif defined(_MSC_VER)
#define DXIL_SPV_PUBLIC_API __declspec(dllexport)
#else
#define DXIL_SPV_PUBLIC_API
#endif
#else
#define DXIL_SPV_PUBLIC_API
#endif
#else
#define DXIL_SPV_PUBLIC_API
#endif

/* C89 does not have bool type. */
typedef unsigned char dxil_spv_bool;
#define DXIL_SPV_TRUE ((dxil_spv_bool)1)
#define DXIL_SPV_FALSE ((dxil_spv_bool)0)

typedef enum dxil_spv_result
{
	DXIL_SPV_SUCCESS = 0,
	DXIL_SPV_ERROR_OUT_OF_MEMORY = -1,
	DXIL_SPV_ERROR_GENERIC = -2,
	DXIL_SPV_ERROR_UNSUPPORTED_FEATURE = -3,
	DXIL_SPV_ERROR_PARSER = -4,
	DXIL_SPV_ERROR_FAILED_VALIDATION = -5,
	DXIL_SPV_ERROR_INVALID_ARGUMENT = -6,
	DXIL_SPV_ERROR_NO_DATA = -7,
	DXIL_SPV_RESULT_INT_MAX = 0x7fffffff
} dxil_spv_result;

typedef enum dxil_spv_shader_stage
{
	DXIL_SPV_STAGE_UNKNOWN = 0,
	DXIL_SPV_STAGE_VERTEX = 1,
	DXIL_SPV_STAGE_HULL = 2,
	DXIL_SPV_STAGE_DOMAIN = 3,
	DXIL_SPV_STAGE_GEOMETRY = 4,
	DXIL_SPV_STAGE_PIXEL = 5,
	DXIL_SPV_STAGE_COMPUTE = 6,
	DXIL_SPV_STAGE_INTERSECTION = 7,
	DXIL_SPV_STAGE_CLOSEST_HIT = 8,
	DXIL_SPV_STAGE_MISS = 9,
	DXIL_SPV_STAGE_ANY_HIT = 10,
	DXIL_SPV_STAGE_RAY_GENERATION = 11,
	DXIL_SPV_STAGE_CALLABLE = 12,
	DXIL_SPV_STAGE_AMPLIFICATION = 13,
	DXIL_SPV_STAGE_MESH = 14,
	DXIL_SPV_STAGE_INT_MAX = 0x7fffffff
} dxil_spv_shader_stage;

/* Remaps input attribute to desired location. */
typedef struct dxil_spv_d3d_vertex_input
{
	const char *semantic;
	unsigned semantic_index;
	unsigned start_row;
	unsigned rows;
} dxil_spv_d3d_vertex_input;

typedef struct dxil_spv_vulkan_vertex_input
{
	unsigned location;
} dxil_spv_vulkan_vertex_input;
typedef dxil_spv_bool (*dxil_spv_vertex_input_remapper_cb)(void *userdata, const dxil_spv_d3d_vertex_input *d3d_input,
                                                           dxil_spv_vulkan_vertex_input *vulkan_input);

typedef struct dxil_spv_d3d_stream_output
{
	const char *semantic;
	unsigned semantic_index;
} dxil_spv_d3d_stream_output;

typedef struct dxil_spv_vulkan_stream_output
{
	unsigned offset;
	unsigned stride;
	unsigned buffer_index;
	dxil_spv_bool enable;
} dxil_spv_vulkan_stream_output;

typedef dxil_spv_bool (*dxil_spv_stream_output_remapper_cb)(void *userdata, const dxil_spv_d3d_stream_output *d3d_output,
                                                            dxil_spv_vulkan_stream_output *vulkan_output);

typedef struct dxil_spv_d3d_shader_stage_io
{
	const char *semantic;
	unsigned semantic_index;
	unsigned start_row;
	unsigned rows;
} dxil_spv_d3d_shader_stage_io;

enum dxil_spv_vulkan_shader_stage_io_flags
{
	DXIL_SPV_SHADER_STAGE_IO_NONE = 0u,
	DXIL_SPV_SHADER_STAGE_IO_PER_PRIMITIVE = 0x1u,
};

typedef struct dxil_spv_vulkan_shader_stage_io
{
	unsigned location;
	unsigned component;
	unsigned flags;
} dxil_spv_vulkan_shader_stage_io;

typedef dxil_spv_bool (*dxil_spv_shader_stage_io_remapper_cb)(void *userdata, const dxil_spv_d3d_shader_stage_io *d3d_input,
                                                              dxil_spv_vulkan_shader_stage_io *vulkan_variable);

/* Matches DXIL enum */
typedef enum dxil_spv_resource_kind
{
	DXIL_SPV_RESOURCE_KIND_INVALID = 0,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_1D = 1,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_2D = 2,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_2DMS = 3,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_3D = 4,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_CUBE = 5,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_1D_ARRAY = 6,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_2D_ARRAY = 7,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_2D_MS_ARRAY = 8,
	DXIL_SPV_RESOURCE_KIND_TEXTURE_CUBE_ARRAY = 9,
	DXIL_SPV_RESOURCE_KIND_TYPED_BUFFER = 10,
	DXIL_SPV_RESOURCE_KIND_RAW_BUFFER = 11,
	DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER = 12,
	DXIL_SPV_RESOURCE_KIND_CONSTANT_BUFFER = 13,
	DXIL_SPV_RESOURCE_KIND_SAMPLER = 14,
	DXIL_SPV_RESOURCE_KIND_TBUFFER = 15,
	DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE = 16,
	DXIL_SPV_RESOURCE_KIND_FEEDBACK_TEXTURE_2D = 17,
	DXIL_SPV_RESOURCE_KIND_FEEDBACK_TEXTURE_2D_ARRAY = 18,
	DXIL_SPV_RESOURCE_KIND_INT_MAX = 0x7fffffff
} dxil_spv_resource_kind;

typedef enum dxil_spv_resource_class
{
	DXIL_SPV_RESOURCE_CLASS_SRV = 0,
	DXIL_SPV_RESOURCE_CLASS_UAV = 1,
	DXIL_SPV_RESOURCE_CLASS_CBV = 2,
	DXIL_SPV_RESOURCE_CLASS_SAMPLER = 3,
	DXIL_SPV_RESOURCE_CLASS_INT_MAX = 0x7fffffff
} dxil_spv_resource_class;

typedef enum dxil_spv_vulkan_descriptor_type
{
	/* Use the descriptor type we expect, textures, typed buffers,
	 * CBV or samplers only have one target type. */
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_IDENTITY = 0,
	/* For untyped buffer types, we can select whether to use an SSBO implementation or texel buffer one. */
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO = 1,
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER = 2,
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS = 3,
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_UBO = 4,
	/* Special magic for tilers. Does not map to anything directly in D3D12 API. */
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 5,
	DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_INT_MAX = 0x7fffffff
} dxil_spv_vulkan_descriptor_type;

typedef enum dxil_spv_rdat_subobject_kind
{
	DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG = 0,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE = 1,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE = 2,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION = 8,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG = 9,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG = 10,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP = 11,
	DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1 = 12,
	DXIL_SPV_RDAT_SUBOBJECT_TYPE_INT_MAX = 0x7fffffff
} dxil_spv_rdat_subobject_kind;

typedef enum dxil_spv_hit_group_type
{
	DXIL_SPV_HIT_GROUP_TYPE_TRIANGLE = 0,
	DXIL_SPV_HIT_GROUP_TYPE_PROCEDURAL = 1,
	DXIL_SPV_HIT_GROUP_TYPE_INT_MAX = 0x7fffffff
} dxil_spv_hit_group_type;

typedef enum dxil_spv_shader_quirk
{
	DXIL_SPV_SHADER_QUIRK_NONE = 0,
	/* This is softer and only applies to shaders that uses thread group coherence at least once.
	 * If that is the case, promotes all other uses too. Intended to be suitable for a game-global workaround. */
	DXIL_SPV_SHADER_QUIRK_FORCE_DEVICE_MEMORY_BARRIERS_THREAD_GROUP_COHERENCE = 1,
	DXIL_SPV_SHADER_QUIRK_ASSUME_BROKEN_SUB_8x8_CUBE_MIPS = 2,
	DXIL_SPV_SHADER_QUIRK_ROBUST_PHYSICAL_CBV_FORWARDING = 3,
	DXIL_SPV_SHADER_QUIRK_MESH_OUTPUTS_ROBUSTNESS = 4,
	DXIL_SPV_SHADER_QUIRK_AGGRESSIVE_NONUNIFORM = 5,
	DXIL_SPV_SHADER_QUIRK_ROBUST_PHYSICAL_CBV = 6,
	/* This is a harder workaround which forces UAV barriers even if shader does not use anything like that.
	 * Intended to be used with specific shaders since it's not feasible to detect the race condition algorithmically. */
	DXIL_SPV_SHADER_QUIRK_PROMOTE_GROUP_TO_DEVICE_MEMORY_BARRIER = 7,
	DXIL_SPV_SHADER_QUIRK_GROUP_SHARED_AUTO_BARRIER = 8,
	DXIL_SPV_SHADER_QUIRK_FIXUP_LOOP_HEADER_UNDEF_PHIS = 9,
	DXIL_SPV_SHADER_QUIRK_FIXUP_RSQRT_INF_NAN = 10,
	DXIL_SPV_SHADER_QUIRK_IGNORE_PRIMITIVE_SHADING_RATE = 11,
	DXIL_SPV_SHADER_QUIRK_ROBUST_COMPUTE_QUAD_BROADCAST = 12,
	DXIL_SPV_SHADER_QUIRK_PRECISE_FMA = 13,
	DXIL_SPV_SHADER_QUIRK_INT_MAX = 0x7fffffff
} dxil_spv_shader_quirk;

typedef enum dxil_spv_meta_descriptor
{
	DXIL_SPV_META_DESCRIPTOR_RESOURCE_DESCRIPTOR_HEAP_SIZE = 0,
	DXIL_SPV_META_DESCRIPTOR_RAW_DESCRIPTOR_HEAP_VIEW = 1,
	DXIL_SPV_META_DESCRIPTOR_DYNAMIC_VIEW_INSTANCING_OFFSETS = 2,
	DXIL_SPV_META_DESCRIPTOR_DYNAMIC_VIEW_INSTANCING_MASK = 3,
	DXIL_SPV_META_DESCRIPTOR_INT_MAX = 0x7fffffff
} dxil_spv_meta_descriptor;

typedef enum dxil_spv_meta_descriptor_kind
{
	DXIL_SPV_META_DESCRIPTOR_KIND_INVALID = 0,
	DXIL_SPV_META_DESCRIPTOR_KIND_PUSH_CONSTANT,
	DXIL_SPV_META_DESCRIPTOR_KIND_PUSH_BDA,
	DXIL_SPV_META_DESCRIPTOR_KIND_UBO_CONTAINING_CONSTANT,
	DXIL_SPV_META_DESCRIPTOR_KIND_UBO_CONTAINING_BDA,
	DXIL_SPV_META_DESCRIPTOR_KIND_READONLY_SSBO,
	DXIL_SPV_META_DESCRIPTOR_KIND_INT_MAX = 0x7fffffff
} dxil_spv_meta_descriptor_kind;

#ifdef DXIL_SPV_ENABLE_EXPERIMENTAL_WORKGRAPHS
typedef enum dxil_spv_node_launch_type
{
	DXIL_SPV_NODE_LAUNCH_TYPE_INVALID = 0,
	DXIL_SPV_NODE_LAUNCH_TYPE_BROADCASTING = 1,
	DXIL_SPV_NODE_LAUNCH_TYPE_COALESCING = 2,
	DXIL_SPV_NODE_LAUNCH_TYPE_THREAD = 3,
	DXIL_SPV_NODE_LAUNCH_TYPE_INT_MAX = 0x7fffffff
} dxil_spv_node_launch_type;
#endif

typedef struct dxil_spv_d3d_binding
{
	dxil_spv_shader_stage stage;
	dxil_spv_resource_kind kind;
	unsigned resource_index;
	unsigned register_space;
	unsigned register_index;
	unsigned range_size;
	/* For raw buffers, this is equal to 16, for structured buffers this is equal to the stride of the elements.
	 * This can be used by the implementation to select an appropriate descriptor type.
	 * Otherwise, 0. */
	unsigned alignment;
} dxil_spv_d3d_binding;

typedef struct dxil_spv_vulkan_binding
{
	unsigned set;
	unsigned binding;

	union
	{
		/* For bindless, refers to the Nth root constant.
		 * For buffer device address, refers to the Nth root descriptor.
		 */
		unsigned root_constant_index;

		/* For input attachments, refers to the input_attachment_index.
		* -1u maps to depth-stencil attachment and anything else maps to color attachment index. */
		unsigned input_attachment_index;
	};

	struct
	{
		unsigned heap_root_offset;
		dxil_spv_bool use_heap;
	} bindless;
	dxil_spv_vulkan_descriptor_type descriptor_type;
} dxil_spv_vulkan_binding;

/* Remaps UAVs to desired binding points. */
typedef struct dxil_spv_uav_d3d_binding
{
	dxil_spv_d3d_binding d3d_binding;
	dxil_spv_bool has_counter;
} dxil_spv_uav_d3d_binding;

typedef struct dxil_spv_srv_vulkan_binding
{
	dxil_spv_vulkan_binding buffer_binding;
	dxil_spv_vulkan_binding offset_binding;
} dxil_spv_srv_vulkan_binding;

typedef struct dxil_spv_uav_vulkan_binding
{
	dxil_spv_vulkan_binding buffer_binding;
	dxil_spv_vulkan_binding counter_binding;
	dxil_spv_vulkan_binding offset_binding;
} dxil_spv_uav_vulkan_binding;

typedef struct dxil_spv_push_constant_mapping
{
	unsigned offset_in_words;
} dxil_spv_push_constant_mapping;

typedef struct dxil_spv_cbv_vulkan_binding
{
	union
	{
		dxil_spv_vulkan_binding uniform_binding;
		dxil_spv_push_constant_mapping push_constant;
	} vulkan;
	dxil_spv_bool push_constant;
} dxil_spv_cbv_vulkan_binding;

typedef struct dxil_spv_compiled_spirv
{
	const void *data;
	size_t size;
} dxil_spv_compiled_spirv;

typedef dxil_spv_bool (*dxil_spv_srv_remapper_cb)(void *userdata,
                                                  const dxil_spv_d3d_binding *d3d_binding,
                                                  dxil_spv_srv_vulkan_binding *vulkan_binding);
typedef dxil_spv_bool (*dxil_spv_sampler_remapper_cb)(void *userdata,
                                                      const dxil_spv_d3d_binding *d3d_binding,
                                                      dxil_spv_vulkan_binding *vulkan_binding);
typedef dxil_spv_bool (*dxil_spv_uav_remapper_cb)(void *userdata,
                                                  const dxil_spv_uav_d3d_binding *d3d_uav_binding,
                                                  dxil_spv_uav_vulkan_binding *vulkan_uav_binding);
typedef dxil_spv_bool (*dxil_spv_cbv_remapper_cb)(void *userdata,
                                                  const dxil_spv_d3d_binding *d3d_uav_binding,
                                                  dxil_spv_cbv_vulkan_binding *vulkan_uav_binding);

typedef struct dxil_spv_rdat_subobject
{
	// See dxil_parser.hpp for details.
	dxil_spv_rdat_subobject_kind kind;
	const char *subobject_name;
	dxil_spv_hit_group_type hit_group_type;
	const char * const *exports;
	unsigned num_exports;
	unsigned args[2];
	const void *payload;
	size_t payload_size;
} dxil_spv_rdat_subobject;

#ifdef DXIL_SPV_ENABLE_EXPERIMENTAL_WORKGRAPHS
typedef struct dxil_spv_node_input_data
{
	const char *node_id; /* This is often same as entry point name, but does not have to be. */
	unsigned payload_stride; /* If 0, there is no input payload, i.e. EmptyNode. */
	dxil_spv_node_launch_type launch_type;
	unsigned node_array_index;
	unsigned dispatch_grid_offset;
	unsigned dispatch_grid_type_bits;
	unsigned dispatch_grid_components;
	unsigned broadcast_grid[3]; /* For broadcast nodes. */
	unsigned thread_group_size_spec_id[3];
	unsigned max_broadcast_grid_spec_id[3];
	unsigned recursion_factor; /* [NodeMaxRecursionDepth] */
	unsigned coalesce_factor;
	const char *node_share_input_id;
	unsigned node_share_input_array_index;
	unsigned local_root_arguments_table_index;
	unsigned is_indirect_bda_stride_program_entry_spec_id;
	unsigned is_entry_point_spec_id;
	unsigned dispatch_grid_is_upper_bound_spec_id;
	unsigned is_static_broadcast_node_spec_id;
	dxil_spv_bool dispatch_grid_is_upper_bound; /* [NodeMaxDispatchGrid] if true. */
	dxil_spv_bool node_track_rw_input_sharing; /* Payload is tagged with [NodeTrackRWInputSharing]. */
	dxil_spv_bool is_program_entry; /* [NodeIsProgramEntry] */
} dxil_spv_node_input_data;

typedef struct dxil_spv_node_output_data
{
	const char *node_id;
	unsigned node_array_index;
	unsigned node_array_size; /* If UINT32_MAX, it's unbounded. */
	unsigned node_index_spec_constant_id;
	unsigned max_records;
	dxil_spv_bool sparse_array;
	/* We get the rest of the information from the target entry point's input data.
	 * Output data is only for determining linkage. */
} dxil_spv_node_output_data;
#endif

typedef enum dxil_spv_log_level
{
	DXIL_SPV_LOG_LEVEL_DEBUG,
	DXIL_SPV_LOG_LEVEL_WARN,
	DXIL_SPV_LOG_LEVEL_ERROR,
	DXIL_SPV_LOG_LEVEL_INT_MAX = 0x7fffffff
} dxil_spv_log_level;

typedef void (*dxil_spv_log_cb)(void *userdata, dxil_spv_log_level, const char *);

typedef enum dxil_spv_option
{
	DXIL_SPV_OPTION_INVALID = 0,
	DXIL_SPV_OPTION_SHADER_DEMOTE_TO_HELPER = 1,
	DXIL_SPV_OPTION_DUAL_SOURCE_BLENDING = 2,
	DXIL_SPV_OPTION_OUTPUT_SWIZZLE = 3,
	DXIL_SPV_OPTION_RASTERIZER_SAMPLE_COUNT = 4,
	DXIL_SPV_OPTION_ROOT_CONSTANT_INLINE_UNIFORM_BLOCK = 5,
	DXIL_SPV_OPTION_BINDLESS_CBV_SSBO_EMULATION = 6,
	DXIL_SPV_OPTION_PHYSICAL_STORAGE_BUFFER = 7,
	DXIL_SPV_OPTION_SBT_DESCRIPTOR_SIZE_LOG2 = 8,
	DXIL_SPV_OPTION_SSBO_ALIGNMENT = 9,
	DXIL_SPV_OPTION_TYPED_UAV_READ_WITHOUT_FORMAT = 10,
	DXIL_SPV_OPTION_SHADER_SOURCE_FILE = 11,
	DXIL_SPV_OPTION_BINDLESS_TYPED_BUFFER_OFFSETS = 12,
	DXIL_SPV_OPTION_BINDLESS_OFFSET_BUFFER_LAYOUT = 13,
	DXIL_SPV_OPTION_STORAGE_INPUT_OUTPUT_16BIT = 14,
	DXIL_SPV_OPTION_DESCRIPTOR_QA = 15,
	DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT = 16,
	DXIL_SPV_OPTION_SHADER_I8_DOT = 17,
	DXIL_SPV_OPTION_SHADER_RAY_TRACING_PRIMITIVE_CULLING = 18,
	DXIL_SPV_OPTION_INVARIANT_POSITION = 19,
	DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT = 20,
	DXIL_SPV_OPTION_BARYCENTRIC_KHR = 21,
	DXIL_SPV_OPTION_ROBUST_PHYSICAL_CBV_LOAD = 22,
	DXIL_SPV_OPTION_ARITHMETIC_RELAXED_PRECISION = 23,
	DXIL_SPV_OPTION_PHYSICAL_ADDRESS_DESCRIPTOR_INDEXING = 24,
	DXIL_SPV_OPTION_FORCE_SUBGROUP_SIZE = 25,
	DXIL_SPV_OPTION_DENORM_PRESERVE_SUPPORT = 26,
	DXIL_SPV_OPTION_STRICT_HELPER_LANE_WAVE_OPS = 27,
	DXIL_SPV_OPTION_SUBGROUP_PARTITIONED_NV = 28,
	DXIL_SPV_OPTION_DEAD_CODE_ELIMINATE = 29,
	DXIL_SPV_OPTION_PRECISE_CONTROL = 30,
	DXIL_SPV_OPTION_SAMPLE_GRAD_OPTIMIZATION_CONTROL = 31,
	DXIL_SPV_OPTION_OPACITY_MICROMAP = 32,
	DXIL_SPV_OPTION_BRANCH_CONTROL = 33,
	DXIL_SPV_OPTION_SUBGROUP_PROPERTIES = 34,
	DXIL_SPV_OPTION_DESCRIPTOR_HEAP_ROBUSTNESS = 35,
	DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES_NV = 36,
	DXIL_SPV_OPTION_QUAD_CONTROL_RECONVERGENCE = 37,
	DXIL_SPV_OPTION_RAW_ACCESS_CHAINS_NV = 38,
	DXIL_SPV_OPTION_DRIVER_VERSION = 39,
	DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES = 40,
	DXIL_SPV_OPTION_INSTRUCTION_INSTRUMENTATION = 41,
	DXIL_SPV_OPTION_SHADER_QUIRK = 42,
	DXIL_SPV_OPTION_EXTENDED_ROBUSTNESS = 43,
	DXIL_SPV_OPTION_MAX_TESS_FACTOR = 44,
	DXIL_SPV_OPTION_VULKAN_MEMORY_MODEL = 45,
	DXIL_SPV_OPTION_FLOAT8_SUPPORT = 46,
	DXIL_SPV_OPTION_NVAPI = 47,
	DXIL_SPV_OPTION_EXTENDED_NON_SEMANTIC = 48,
	DXIL_SPV_OPTION_VIEW_INSTANCING = 49,
	DXIL_SPV_OPTION_MIXED_FLOAT_DOT_PRODUCT = 50,
	DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES_QUAD = 51,
	DXIL_SPV_OPTION_INT_MAX = 0x7fffffff
} dxil_spv_option;

typedef enum dxil_spv_shader_feature
{
	DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS = 0,
	DXIL_SPV_SHADER_FEATURE_INT_MAX = 0x7fffffff
} dxil_spv_shader_feature;

typedef enum dxil_spv_instruction_instrumentation_type
{
	DXIL_SPV_INSTRUCTION_INSTRUMENTATION_TYPE_FULL_NAN_INF = 0,
	DXIL_SPV_INSTRUCTION_INSTRUMENTATION_TYPE_EXTERNALLY_VISIBLE_WRITE_NAN_INF = 1,
	DXIL_SPV_INSTRUCTION_INSTRUMENTATION_TYPE_FLUSH_NAN_TO_ZERO = 2,
	DXIL_SPV_INSTRUCTION_INSTRUMENTATION_TYPE_EXPECT_ASSUME = 3,
	DXIL_SPV_INSTRUCTION_INSTRUMENTATION_TYPE_BUFFER_SYNCHRONIZATION_VALIDATION = 4,
	DXIL_SPV_INSTRUCTION_INSTRUMENTATION_INT_MAX = 0x7fffffff
} dxil_spv_instruction_instrumentation_type;

typedef struct dxil_spv_option_base
{
	dxil_spv_option type;
} dxil_spv_option_base;

typedef struct dxil_spv_option_shader_demote_to_helper
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_shader_demote_to_helper;

typedef struct dxil_spv_option_dual_source_blending
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_dual_source_blending;

typedef struct dxil_spv_option_output_swizzle
{
	dxil_spv_option_base base;
	/* Each element represents one SV_Target location in a pixel shader.
	 * Bits [0:1] represent which output component to emit in R.
	 * Bits [2:3] represent which output component to emit in G.
	 * Bits [4:5] represent which output component to emit in B.
	 * Bits [6:7] represent which output component to emit in A.
	 * There must exist a 1:1 mapping between input and output, i.e. all components must be used
	 * an an input to exactly one output component.
	 */
	const unsigned *swizzles;
	unsigned swizzle_count;
} dxil_spv_option_output_swizzle;

typedef struct dxil_spv_option_rasterizer_sample_count
{
	dxil_spv_option_base base;
	unsigned sample_count;
	dxil_spv_bool spec_constant;
} dxil_spv_option_rasterizer_sample_count;

typedef struct dxil_spv_option_root_constant_inline_uniform_block
{
	dxil_spv_option_base base;
	unsigned desc_set;
	unsigned binding;
	dxil_spv_bool enable;
} dxil_spv_option_root_constant_inline_uniform_block;

typedef struct dxil_spv_option_bindless_cbv_ssbo_emulation
{
	dxil_spv_option_base base;
	dxil_spv_bool enable;
} dxil_spv_option_bindless_cbv_ssbo_emulation;

typedef struct dxil_spv_option_physical_storage_buffer
{
	dxil_spv_option_base base;
	dxil_spv_bool enable;
} dxil_spv_option_physical_storage_buffer;

typedef struct dxil_spv_option_sbt_descriptor_size_log2
{
	dxil_spv_option_base base;
	unsigned size_log2_srv_uav_cbv;
	unsigned size_log2_sampler;
} dxil_spv_option_sbt_descriptor_size_log2;

typedef struct dxil_spv_option_ssbo_alignment
{
	dxil_spv_option_base base;
	unsigned alignment;
} dxil_spv_option_ssbo_alignment;

typedef struct dxil_spv_option_typed_uav_read_without_format
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_typed_uav_read_without_format;

typedef struct dxil_spv_option_shader_source_file
{
	dxil_spv_option_base base;
	const char *name; /* String constants will be copied. */
} dxil_spv_option_shader_source_file;

typedef struct dxil_spv_option_bindless_typed_buffer_offsets
{
	dxil_spv_option_base base;
	dxil_spv_bool enable;
} dxil_spv_option_bindless_typed_buffer_offsets;

typedef struct dxil_spv_option_bindless_offset_buffer_layout
{
	dxil_spv_option_base base;
	unsigned untyped_offset;
	unsigned typed_offset;
	unsigned stride;
} dxil_spv_option_bindless_offset_buffer_layout;

typedef struct dxil_spv_option_storage_input_output_16bit
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_storage_input_output_16bit;

typedef struct dxil_spv_option_descriptor_qa
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
	unsigned version;
	unsigned global_desc_set;
	unsigned global_binding;
	unsigned heap_desc_set;
	unsigned heap_binding;
	unsigned long long shader_hash;
} dxil_spv_option_descriptor_qa;

typedef struct dxil_spv_option_min_precision_native_16bit
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_min_precision_native_16bit;

typedef struct dxil_spv_option_shader_i8_dot
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_shader_i8_dot;

typedef struct dxil_spv_option_shader_ray_tracing_primitive_culling
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_shader_ray_tracing_primitive_culling;

typedef struct dxil_spv_option_invariant_position
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_invariant_position;

typedef struct dxil_spv_option_scalar_block_layout
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
	dxil_spv_bool supports_per_component_robustness;
} dxil_spv_option_scalar_block_layout;

typedef struct dxil_spv_option_barycentric_khr
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_barycentric_khr;

/* Obsolete. Use the shader quirk version of this instead. */
typedef struct dxil_spv_option_robust_physical_cbv_load
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_robust_physical_cbv_load;

typedef struct dxil_spv_option_arithmetic_relaxed_precision
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_arithmetic_relaxed_precision;

typedef struct dxil_spv_option_physical_address_descriptor_indexing
{
	dxil_spv_option_base base;
	unsigned element_stride;
	unsigned element_offset;
} dxil_spv_option_physical_address_descriptor_indexing;

typedef struct dxil_spv_option_force_subgroup_size
{
	dxil_spv_option_base base;
	unsigned forced_value;
	dxil_spv_bool wave_size_enable;
} dxil_spv_option_force_subgroup_size;

typedef struct dxil_spv_option_denorm_preserve_support
{
	dxil_spv_option_base base;
	dxil_spv_bool supports_float16_denorm_preserve;
	dxil_spv_bool supports_float64_denorm_preserve;
} dxil_spv_option_denorm_preserve_support;

typedef struct dxil_spv_option_strict_helper_lane_wave_ops
{
	dxil_spv_option_base base;
	dxil_spv_bool enable;
} dxil_spv_option_strict_helper_lane_wave_ops;

typedef struct dxil_spv_option_subgroup_partitioned_nv
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_subgroup_partitioned_nv;

typedef struct dxil_spv_option_dead_code_eliminate
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_dead_code_eliminate;

typedef struct dxil_spv_option_precise_control
{
	dxil_spv_option_base base;
	dxil_spv_bool force_precise;
	dxil_spv_bool propagate_precise;
} dxil_spv_option_precise_control;

typedef struct dxil_spv_option_sample_grad_optimization_control
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
	dxil_spv_bool assume_uniform_scale;
} dxil_spv_option_sample_grad_optimization_control;

typedef struct dxil_spv_option_opacity_micromap
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_opacity_micromap;

typedef struct dxil_spv_option_branch_control
{
	dxil_spv_option_base base;
	dxil_spv_bool use_shader_metadata;
	dxil_spv_bool force_unroll;
	dxil_spv_bool force_loop;
	dxil_spv_bool force_flatten;
	dxil_spv_bool force_branch;
} dxil_spv_option_branch_control;

typedef struct dxil_spv_option_subgroup_properties
{
	dxil_spv_option_base base;
	unsigned minimum_size;
	unsigned maximum_size;
} dxil_spv_option_subgroup_properties;

typedef struct dxil_spv_option_descriptor_heap_robustness
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_descriptor_heap_robustness;

typedef struct dxil_spv_option_compute_shader_derivatives_nv
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_compute_shader_derivatives_nv;

typedef struct dxil_spv_option_quad_control_reconvergence
{
	dxil_spv_option_base base;
	dxil_spv_bool supports_quad_control;
	dxil_spv_bool supports_maximal_reconvergence;
	dxil_spv_bool force_maximal_reconvergence;
} dxil_spv_option_quad_control_reconvergence;

typedef struct dxil_spv_option_raw_access_chains_nv
{
	dxil_spv_option_base base;
	dxil_spv_bool supported;
} dxil_spv_option_raw_access_chains_nv;

typedef struct dxil_spv_option_driver_version
{
	dxil_spv_option_base base;
	unsigned driver_id;
	unsigned driver_version;
} dxil_spv_option_driver_version;

typedef struct dxil_spv_option_compute_shader_derivatives
{
	dxil_spv_option_base base;
	dxil_spv_bool supports_nv;
	dxil_spv_bool supports_khr;
} dxil_spv_option_compute_shader_derivatives;

typedef struct dxil_spv_option_instruction_instrumentation
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
	unsigned version;
	unsigned control_desc_set;
	unsigned control_binding;
	unsigned payload_desc_set;
	unsigned payload_binding;
	unsigned long long shader_hash;
	dxil_spv_instruction_instrumentation_type type;
} dxil_spv_option_instruction_instrumentation;

typedef struct dxil_spv_option_shader_quirk
{
	dxil_spv_option_base base;
	dxil_spv_shader_quirk quirk;
} dxil_spv_option_shader_quirk;

typedef struct dxil_spv_option_extended_robustness
{
	dxil_spv_option_base base;
	dxil_spv_bool robust_group_shared;
	dxil_spv_bool robust_alloca;
	dxil_spv_bool robust_constant_lut;
} dxil_spv_option_extended_robustness;

typedef struct dxil_spv_option_max_tess_factor
{
	dxil_spv_option_base base;
	unsigned max_tess_factor;
} dxil_spv_option_max_tess_factor;

typedef struct dxil_spv_option_vulkan_memory_model
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_vulkan_memory_model;

typedef struct dxil_spv_option_float8_support
{
	dxil_spv_option_base base;
	dxil_spv_bool wmma_fp8;
	dxil_spv_bool nv_cooperative_matrix2_conversions;
} dxil_spv_option_float8_support;

typedef struct dxil_spv_option_nvapi
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
	unsigned register_index;
	unsigned register_space;
} dxil_spv_option_nvapi;

typedef struct dxil_spv_option_extended_non_semantic
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
} dxil_spv_option_extended_non_semantic;

#ifdef DXIL_SPV_ENABLE_EXPERIMENTAL_MULTIVIEW
typedef struct dxil_spv_option_view_instancing
{
	dxil_spv_option_base base;
	dxil_spv_bool enabled;
	dxil_spv_bool last_pre_rasterization_stage;
	unsigned view_index_to_view_instance_spec_id;
	unsigned view_instance_to_viewport_spec_id;
} dxil_spv_option_view_instancing;
#endif

typedef struct dxil_spv_option_mixed_float_dot_product
{
	dxil_spv_option_base base;
	dxil_spv_bool fp16_fp16_fp32;
	dxil_spv_bool reserved[4]; /* 4 unique features in the extension + reserved for hypothetical promoted EXT to avoid revving ABI again. */
} dxil_spv_option_mixed_float_dot_product;

typedef struct dxil_spv_option_compute_shader_derivatives_quad
{
	dxil_spv_option_base base;
	dxil_spv_bool supports_quad;
} dxil_spv_option_compute_shader_derivatives_quad;

/* Gets the ABI version used to build this library. Used to detect API/ABI mismatches. */
DXIL_SPV_PUBLIC_API void dxil_spv_get_version(unsigned *major, unsigned *minor, unsigned *patch);

/* Parsing API */
/* Parses and frees a DXBC blob. */
typedef struct dxil_spv_parsed_blob_s *dxil_spv_parsed_blob;

/* Parses a DXBC archive as is passed into CreatePipeline, which contains a DXIL blob. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parse_dxil_blob(const void *data, size_t size, dxil_spv_parsed_blob *blob);
/* A variant that attempts to read the STAT block instead. The STAT block contains DXIL that is only used for reflection. (?!?!) */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parse_reflection_dxil_blob(const void *data, size_t size, dxil_spv_parsed_blob *blob);

/* Parses raw DXIL (LLVM BC). */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parse_dxil(const void *data, size_t size, dxil_spv_parsed_blob *blob);

/* Dumps the LLVM IR representation to console. For debugging. */
DXIL_SPV_PUBLIC_API void dxil_spv_parsed_blob_dump_llvm_ir(dxil_spv_parsed_blob blob);

DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_disassembled_ir(dxil_spv_parsed_blob blob, const char **str);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_raw_ir(dxil_spv_parsed_blob blob, const void **data, size_t *size);

DXIL_SPV_PUBLIC_API dxil_spv_shader_stage dxil_spv_parsed_blob_get_shader_stage(dxil_spv_parsed_blob blob);
DXIL_SPV_PUBLIC_API dxil_spv_shader_stage dxil_spv_parsed_blob_get_shader_stage_for_entry(dxil_spv_parsed_blob blob, const char *entry);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_entry_index_by_name(dxil_spv_parsed_blob blob,
                                                                                 const char *entry,
                                                                                 unsigned *index);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_num_entry_points(dxil_spv_parsed_blob blob, unsigned *count);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_entry_point_name(dxil_spv_parsed_blob blob,
                                                                              unsigned index,
                                                                              const char **mangled_entry);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_entry_point_demangled_name(dxil_spv_parsed_blob blob,
                                                                                        unsigned index,
                                                                                        const char **demangled_entry);

#ifdef DXIL_SPV_ENABLE_EXPERIMENTAL_WORKGRAPHS
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_entry_point_node_input(
	dxil_spv_parsed_blob blob, unsigned index, dxil_spv_node_input_data *data);

DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_entry_point_num_node_outputs(
	dxil_spv_parsed_blob blob, unsigned index, unsigned *num_outputs);

DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_get_entry_point_node_output(
	dxil_spv_parsed_blob blob, unsigned index, unsigned output_index, dxil_spv_node_output_data *data);
#endif

DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_parsed_blob_scan_resources(
		dxil_spv_parsed_blob blob,
		dxil_spv_srv_remapper_cb srv_remapper,
		dxil_spv_sampler_remapper_cb sampler_remapper,
		dxil_spv_cbv_remapper_cb cbv_remapper,
		dxil_spv_uav_remapper_cb uav_remapper,
		void *userdata);

/* For DXR, API subobjects can be embedded inside the blob. */
DXIL_SPV_PUBLIC_API unsigned dxil_spv_parsed_blob_get_num_rdat_subobjects(
		dxil_spv_parsed_blob blob);
DXIL_SPV_PUBLIC_API void dxil_spv_parsed_blob_get_rdat_subobject(
		dxil_spv_parsed_blob blob, unsigned index, dxil_spv_rdat_subobject *subobject);

DXIL_SPV_PUBLIC_API void dxil_spv_parsed_blob_free(dxil_spv_parsed_blob blob);
/* Parsing API */

/* Sets per thread global state. */
DXIL_SPV_PUBLIC_API void dxil_spv_set_thread_log_callback(dxil_spv_log_cb callback, void *userdata);

/* Converter API */

typedef struct dxil_spv_converter_s *dxil_spv_converter;
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_create_converter(dxil_spv_parsed_blob blob, dxil_spv_converter *converter);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_create_converter_with_reflection(dxil_spv_parsed_blob blob,
                                                                              dxil_spv_parsed_blob reflection_blob,
                                                                              dxil_spv_converter *converter);
DXIL_SPV_PUBLIC_API void dxil_spv_converter_free(dxil_spv_converter converter);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_entry_point(dxil_spv_converter converter, const char *entry_point);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_stage_input_remapper(
		dxil_spv_converter converter,
		dxil_spv_shader_stage_io_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_stage_output_remapper(
		dxil_spv_converter converter,
		dxil_spv_shader_stage_io_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_vertex_input_remapper(
		dxil_spv_converter converter,
		dxil_spv_vertex_input_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_stream_output_remapper(
		dxil_spv_converter converter,
		dxil_spv_stream_output_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_srv_remapper(
		dxil_spv_converter converter,
		dxil_spv_srv_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_sampler_remapper(
		dxil_spv_converter converter,
		dxil_spv_sampler_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_root_constant_word_count(dxil_spv_converter converter,
                                                                         unsigned num_words);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_root_descriptor_count(dxil_spv_converter converter,
                                                                      unsigned count);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_uav_remapper(
		dxil_spv_converter converter,
		dxil_spv_uav_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_cbv_remapper(
		dxil_spv_converter converter,
		dxil_spv_cbv_remapper_cb remapper,
		void *userdata);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_add_local_root_constants(
	dxil_spv_converter converter,
	unsigned register_space,
	unsigned register_index,
	unsigned num_words);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_add_local_root_descriptor(
	dxil_spv_converter converter,
	dxil_spv_resource_class resource_class,
	unsigned register_space,
	unsigned register_index);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_add_local_root_descriptor_table(
	dxil_spv_converter converter,
	dxil_spv_resource_class resource_class,
	unsigned register_space,
	unsigned register_index,
	unsigned num_descriptors_in_range,
	unsigned offset_in_heap);

/* For multiple table entries per local root parameter, call begin, at least one add_local, and then end
 * will commit that local root parameter. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_begin_local_root_descriptor_table(
	dxil_spv_converter converter);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_end_local_root_descriptor_table(
	dxil_spv_converter converter);

DXIL_SPV_PUBLIC_API void dxil_spv_converter_add_root_parameter_mapping(
	dxil_spv_converter converter, unsigned root_parameter_index, unsigned offset);
DXIL_SPV_PUBLIC_API void dxil_spv_converter_add_root_descriptor_mapping(
        dxil_spv_converter converter, unsigned root_parameter_index,
        unsigned desc_set, unsigned binding);

/* Pointer is owned by application. Must remain valid until compile() is called. */
DXIL_SPV_PUBLIC_API void dxil_spv_converter_add_non_semantic_debug_info(
	dxil_spv_converter converter, const char *tag, const void *data, size_t size);

/* For domain shader, when linking with hull shader. */
DXIL_SPV_PUBLIC_API void dxil_spv_converter_set_patch_location_offset(
	dxil_spv_converter converter, unsigned offset);

/* After setting up converter, runs the converted to SPIR-V. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_run(dxil_spv_converter converter);

/* Obtain final SPIR-V. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compiled_spirv(dxil_spv_converter converter,
                                                                          dxil_spv_compiled_spirv *compiled);

DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compiled_entry_point(dxil_spv_converter converter,
                                                                                const char **entry_point);

/* Useful to check if the implementation recognizes a particular option for ABI compatibility. */
DXIL_SPV_PUBLIC_API dxil_spv_bool dxil_spv_converter_supports_option(dxil_spv_option option);
/* Adds a generic option to the implementation which allows it to generate more advanced code or change codegen if desired.
 * Baseline assumed feature set is Vulkan 1.1 / SPIR-V 1.3. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_add_option(dxil_spv_converter converter,
                                                                  const dxil_spv_option_base *option);

/* After compilation. Queries if SubgroupSize builtin was emitted, which requires ALLOW_VARYING_SUBGROUP_SIZE
 * in Vulkan. */
DXIL_SPV_PUBLIC_API dxil_spv_bool dxil_spv_converter_uses_subgroup_size(dxil_spv_converter converter);

/* After compilation. Queries CS workgroup dimensions used.
 * Can be relevant for some subgroup_size_control workarounds. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compute_workgroup_dimensions(
	dxil_spv_converter converter,
	unsigned *x, unsigned *y, unsigned *z);

/* After compilation. Queries CS required wave size.
 * If *wave_size is non-zero, maps to requiredSubgroupSize, otherwise, VARYING_SUBGROUP_SIZE. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compute_required_wave_size(
	dxil_spv_converter converter, unsigned *wave_size);
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compute_wave_size_range(
	dxil_spv_converter converter, unsigned *wave_size, unsigned *wave_size_max, unsigned *wave_size_preferred);

/* After compilation. Queries suggested maximum wave size. The behavior here depends on heuristics
 * and can be ignored, but is often used for workarounds and identifying dubious shader code.
 * If *wave_size is non-zero, could map to requiredSubgroupSize. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compute_heuristic_max_wave_size(
	dxil_spv_converter converter, unsigned *wave_size);

/* After compilation. Queries suggested minimum wave size. The behavior here depends on heuristics
 * and can be ignored, but is often used for workarounds and identifying dubious shader code.
 * If *wave_size is non-zero, could map to requiredSubgroupSize. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_compute_heuristic_min_wave_size(
	dxil_spv_converter converter, unsigned *wave_size);

/* After compilation, queries num vertices for HS. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_patch_vertex_count(
	dxil_spv_converter converter, unsigned *patch_vertices);

DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_get_patch_location_offset(
        dxil_spv_converter converter, unsigned *patch_location_offset);

/* After compilation, queries if shader feature is used.
 * Designed to map closely to D3D12 feature checks.
 * NOTE: This is obsolete now, and replaced with a simple SPIR-V scan in vkd3d-proton. */
DXIL_SPV_PUBLIC_API dxil_spv_bool dxil_spv_converter_uses_shader_feature(
	dxil_spv_converter converter, dxil_spv_shader_feature feature);

#ifdef DXIL_SPV_ENABLE_EXPERIMENTAL_MULTIVIEW
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_is_multiview_compatible(
	dxil_spv_converter converter, dxil_spv_bool *result);
#endif

/* Intended to be added to the GLSL output in repro suite.
 * Attempts to analyze the DXIL for potential out of spec behavior that needs another pair of eyes.
 * Lifetime of string is only as long as converter is alive.
 * Returns NULL when there are no warnings. */
DXIL_SPV_PUBLIC_API const char *dxil_spv_converter_get_analysis_warnings(dxil_spv_converter converter);

/* Adds explicit mapping for system descriptors. */
DXIL_SPV_PUBLIC_API dxil_spv_result dxil_spv_converter_set_meta_descriptor(
		dxil_spv_converter converter, dxil_spv_meta_descriptor meta,
		dxil_spv_meta_descriptor_kind kind, unsigned desc_set, unsigned binding_or_push_index);

/* Use an optimized allocation scheme.
 * Call begin before allocating any dxil_spv objects,
 * and end after all dxil_spv created by this thread is destroyed.
 * Reset is an optimized variant of end() -> begin(). Useful if compiling multiple shaders one after another. */
DXIL_SPV_PUBLIC_API void dxil_spv_begin_thread_allocator_context(void);
DXIL_SPV_PUBLIC_API void dxil_spv_end_thread_allocator_context(void);
DXIL_SPV_PUBLIC_API void dxil_spv_reset_thread_allocator_context(void);

/* Converter API */

#ifdef __cplusplus
}
#endif
#endif


================================================
FILE: external/CMakeLists.txt
================================================
if (DXIL_SPIRV_NATIVE_LLVM)
	message(STATUS "Building Native LLVM.")

    # Disable everything we can.
    set(LLVM_TARGETS_TO_BUILD "" CACHE INTERNAL "")
    set(LLVM_BUILD_RUNTIME OFF)
    set(LLVM_BUILD_TOOLS OFF)
    set(LLVM_INCLUDE_BENCHMARKS OFF)
    set(LLVM_INCLUDE_DOCS OFF)
    set(LLVM_INCLUDE_EXAMPLES OFF)
    set(LLVM_INCLUDE_TESTS OFF)
    set(LLVM_INCLUDE_TOOLS OFF)
    set(LLVM_INCLUDE_UTILS OFF)
    set(LLVM_ENABLE_OCAMLDOC OFF)
    set(LLVM_ENABLE_BINDINGS OFF)
    set(LLVM_ENABLE_THREADS OFF)
    set(LLVM_ENABLE_LIBPFM OFF)
    set(LLVM_ENABLE_TERMINFO OFF)
    set(LLVM_ENABLE_WARNINGS OFF)
    set(LLVM_ENABLE_PEDANTIC OFF)
    set(LLVM_POLLY_LINK_INTO_TOOLS OFF)
    set(LLVM_POLLY_BUILD OFF)
    set(WITH_POLLY OFF)
    set(LLVM_ENABLE_Z3_SOLVER OFF)

    # Need to build out-of-tree.
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/llvm ${CMAKE_CURRENT_BINARY_DIR}/llvm_build EXCLUDE_FROM_ALL)

    set(LLVM_DIR "${CMAKE_CURRENT_BINARY_DIR}/llvm_build/lib/cmake/llvm/")

    find_package(LLVM CONFIG)
    if (NOT LLVM_FOUND)
        message(FATAL_ERROR "Could not build LLVM from the submodule. Run `git submodule update --init` first.")
    endif()

    llvm_map_components_to_libnames(third_party_llvm_libs Core IRReader Support)
    add_library(third_party_llvm INTERFACE)
    target_link_libraries(third_party_llvm INTERFACE ${third_party_llvm_libs})
    target_include_directories(third_party_llvm INTERFACE ${LLVM_INCLUDE_DIRS})
    target_compile_definitions(third_party_llvm INTERFACE ${LLVM_DEFINITIONS})
    add_library(external::llvm ALIAS third_party_llvm)
else()
    message(STATUS "Using builtin LLVM C++ API replacement.")
    add_library(external::llvm ALIAS llvm-bc)
endif()


================================================
FILE: format_all.sh
================================================
#!/bin/bash

for file in *.{cpp,hpp} debug/*.hpp opcodes/*.{cpp,hpp} opcodes/dxil/*.{cpp,hpp} bc/*.{cpp,hpp}
do
    echo "Formatting file: $file ..."
    clang-format -style=file -i $file
done


================================================
FILE: ir.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "spirv.hpp"
#include <assert.h>
#include <initializer_list>
#include <stdint.h>

// A simple IR representation which allows the CFGStructurizer to do some simple rewrites of blocks,
// PHI nodes in particular.

namespace dxil_spv
{
enum class MergeType
{
	None,
	Loop,
	Selection
};

struct CFGNode;
struct MergeInfo
{
	MergeType merge_type = MergeType::None;
	CFGNode *merge_block = nullptr;
	CFGNode *continue_block = nullptr;
	spv::LoopControlMask loop_control_mask = spv::LoopControlMaskNone;
	spv::SelectionControlMask selection_control_mask = spv::SelectionControlMaskNone;
};

struct IncomingValue
{
	CFGNode *block = nullptr;
	uint32_t id = 0;
};

struct PHI
{
	uint32_t id = 0;
	uint32_t type_id = 0;
	bool relaxed = false;
	Vector<IncomingValue> incoming;
};

struct IDArgument
{
	explicit IDArgument(spv::Id id_)
	    : id(id_)
	{
	}

	spv::Id id;
};

struct LiteralArgument
{
	explicit LiteralArgument(uint32_t lit_)
	    : lit(lit_)
	{
	}

	uint32_t lit;
};

struct Operation
{
	enum
	{
		MaxArguments = 13
	};

	Operation() = default;

	explicit Operation(spv::Op op_)
	    : op(op_)
	{
	}

	Operation(spv::Op op_, spv::Id id_, spv::Id type_id_)
	    : op(op_)
	    , id(id_)
	    , type_id(type_id_)
	{
	}

	void add_id(spv::Id arg)
	{
		assert(num_arguments < MaxArguments);
		assert(arg != 0);
		arguments[num_arguments++] = arg;
	}

	void add_ids(const std::initializer_list<spv::Id> &args)
	{
		for (auto &arg : args)
		{
			assert(arg != 0);
			add_id(arg);
		}
	}

	void add_literal(uint32_t lit)
	{
		assert(num_arguments < MaxArguments);
		literal_mask |= 1u << num_arguments;
		arguments[num_arguments++] = lit;
	}

	const spv::Id *begin() const
	{
		return arguments;
	}

	const spv::Id *end() const
	{
		return arguments + num_arguments;
	}

	uint8_t get_literal_mask() const
	{
		return literal_mask;
	}

	spv::Op op = spv::OpNop;
	spv::Id id = 0;
	spv::Id type_id = 0;

	spv::Id arguments[MaxArguments];
	unsigned num_arguments = 0;
	uint8_t literal_mask = 0;

	enum : uint8_t
	{
		SinkableBit = 1 << 0,
		DependencySinkableBit = 1 << 1,
		AutoGroupSharedBarrier = 1 << 2,
		// Inserted after analysis passes are done.
		SubgroupSyncPre = 1 << 3,
		SubgroupSyncPost = 1 << 4
	};
	uint8_t flags = 0;
};

struct Terminator
{
	enum class Type
	{
		Unreachable,
		Branch,
		Condition,
		Switch,
		Return,
		Kill
	};

	uint32_t conditional_id = 0;
	Type type = Type::Unreachable;
	CFGNode *direct_block = nullptr;
	CFGNode *true_block = nullptr;
	CFGNode *false_block = nullptr;

	struct Case
	{
		CFGNode *node = nullptr;
		uint64_t global_order = 0;
		uint32_t value = 0;
		bool is_default = false;
	};
	Vector<Case> cases;
	uint32_t return_value = 0;

	bool force_unroll = false;
	bool force_loop = false;
	bool force_flatten = false;
	bool force_branch = false;
};

struct IRBlock
{
	Vector<PHI> phi;
	Vector<Operation *> operations;
	MergeInfo merge_info;
	Terminator terminator;
};

} // namespace dxil_spv


================================================
FILE: link.T
================================================
{
	global: dxil_spv_*;
	local: *;
};


================================================
FILE: llvm_bitcode_parser.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "llvm_bitcode_parser.hpp"

#ifdef HAVE_LLVMBC
#include "context.hpp"
#include "module.hpp"
#else
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/SourceMgr.h>
#endif

namespace dxil_spv
{
struct LLVMBCParser::Impl
{
	llvm::LLVMContext context;
#ifdef HAVE_LLVMBC
	llvm::Module *module = nullptr;
#else
	std::unique_ptr<llvm::Module> module;
#endif
};

LLVMBCParser::LLVMBCParser()
{
	impl.reset(new Impl);
}

LLVMBCParser::~LLVMBCParser()
{
}

bool LLVMBCParser::parse(const void *data, size_t size)
{
#ifdef HAVE_LLVMBC
	impl->module = llvm::parseIR(impl->context, data, size);
	if (!impl->module)
		return false;
#else
	auto memory = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size);
	memcpy(memory->getBufferStart(), data, size);

	llvm::SMDiagnostic error;
	impl->module = llvm::parseIR(*memory, error, impl->context);
	if (!impl->module)
	{
		error.print("DXIL", llvm::errs());
		return false;
	}
#endif

	return true;
}

bool LLVMBCParser::parseDXBCBinary(const void *data, size_t size)
{
#ifdef HAVE_LLVMBC
	impl->module = llvm::parseDXBCBinary(impl->context, data, size);
	return impl->module != nullptr;
#else
	(void)data;
	(void)size;
	return false;
#endif
}

bool LLVMBCParser::parseDXBC(dxbc_spv::ir::Builder &builder)
{
#ifdef HAVE_LLVMBC
	impl->module = llvm::parseDXBCIR(impl->context, builder);
	return impl->module != nullptr;
#else
	(void)builder;
	return false;
#endif
}

llvm::Module &LLVMBCParser::get_module()
{
	return *impl->module;
}

const llvm::Module &LLVMBCParser::get_module() const
{
	return *impl->module;
}
} // namespace dxil_spv


================================================
FILE: llvm_bitcode_parser.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#ifdef HAVE_LLVMBC
#include "module.hpp"
#else
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#endif

#include <memory>
#include <stddef.h>

namespace dxbc_spv
{
namespace ir
{
class Builder;
}
}

namespace dxil_spv
{
class LLVMBCParser
{
public:
	LLVMBCParser();
	~LLVMBCParser();
	bool parse(const void *data, size_t size);
	bool parseDXBCBinary(const void *data, size_t size);
	bool parseDXBC(dxbc_spv::ir::Builder &builder);
	llvm::Module &get_module();
	const llvm::Module &get_module() const;

private:
	struct Impl;
	std::unique_ptr<Impl> impl;
};
} // namespace dxil_spv


================================================
FILE: memory_stream.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "memory_stream.hpp"
#include <string.h>

namespace dxil_spv
{
MemoryStream::MemoryStream(const void *blob_, size_t size)
    : blob(static_cast<const uint8_t *>(blob_))
    , blob_size(size)
{
}

void MemoryStream::reset()
{
	blob_offset = 0;
}

bool MemoryStream::read(void *buffer, size_t size)
{
	if (blob_offset + size > blob_size)
		return false;

	memcpy(buffer, blob + blob_offset, size);
	blob_offset += size;
	return true;
}

const void *MemoryStream::map_read(size_t size)
{
	if (blob_offset + size > blob_size)
		return nullptr;

	const void *mapped = blob + blob_offset;
	blob_offset += size;
	return mapped;
}

bool MemoryStream::map_string_iterate(const char *&str)
{
	// Strings are C strings and can be mapped 1:1.
	// Just need to verify they terminate properly.
	str = reinterpret_cast<const char *>(blob + blob_offset);
	char c;

	do
	{
		if (!read(c))
			return false;
		else if (c == '\0')
			break;
	} while (c != '\0');

	return true;
}

bool MemoryStream::map_string_absolute(const char *&str, size_t offset) const
{
	auto substream = create_substream(offset);
	return substream.map_string_iterate(str);
}

bool MemoryStream::seek(size_t new_offset)
{
	if (new_offset > blob_size)
		return false;

	blob_offset = new_offset;
	return true;
}

bool MemoryStream::skip(size_t count)
{
	return seek(blob_offset + count);
}

MemoryStream MemoryStream::create_substream(size_t offset, size_t size) const
{
	if (offset >= blob_size || offset + size > blob_size)
		return { nullptr, 0 };
	else
		return { blob + offset, size };
}

MemoryStream MemoryStream::create_substream(size_t offset) const
{
	if (offset >= blob_size)
		return { nullptr, 0 };
	else
		return { blob + offset, blob_size - offset };
}

MemoryStream MemoryStream::create_substream_bitcode_size(size_t offset, size_t bitcode_size) const
{
	if (offset + bitcode_size > blob_size)
		return { nullptr, 0 };
	else
		return { blob + offset, bitcode_size };
}

size_t MemoryStream::get_offset() const
{
	return blob_offset;
}

size_t MemoryStream::get_size() const
{
	return blob_size;
}

} // namespace dxil_spv


================================================
FILE: memory_stream.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include <stddef.h>
#include <stdint.h>

namespace dxil_spv
{
class MemoryStream
{
public:
	MemoryStream(const void *blob, size_t size);
	MemoryStream() = default;

	void reset();

	template <typename T>
	bool read(T &buffer)
	{
		return read(&buffer, sizeof(T));
	}

	const void *map_read(size_t byte_size);

	template <typename T>
	const T *map_read(size_t byte_size)
	{
		return static_cast<const T *>(map_read(byte_size));
	}

	bool read(void *buffer, size_t size);
	bool map_string_iterate(const char *&str);
	bool map_string_absolute(const char *&str, size_t offset) const;
	bool seek(size_t offset);
	bool skip(size_t count);

	size_t get_offset() const;
	size_t get_size() const;
	MemoryStream create_substream(size_t offset, size_t size) const;
	MemoryStream create_substream(size_t offset) const;
	MemoryStream create_substream_bitcode_size(size_t offset, size_t bitcode_size) const;

private:
	const uint8_t *blob = nullptr;
	size_t blob_size = 0;
	size_t blob_offset = 0;
};
} // namespace dxil_spv


================================================
FILE: meson.build
================================================
project('dxil-spirv', ['cpp'], version : '0.0', meson_version : '>= 0.49', default_options : ['b_ndebug=if-release'])

dxil_spirv_compiler      = meson.get_compiler('cpp')
dxil_spirv_cpp_std       = 'c++14'
dxil_spirv_warning_level = '2' # Equivelant of -Wall -Wextra

add_project_arguments('-DAMD_EXTENSIONS',          language : 'cpp')
add_project_arguments('-DHAVE_LLVMBC',             language : 'cpp')
add_project_arguments('-DNOMINMAX',                language : 'cpp')

dxil_spirv_include_dirs = include_directories([
  'bc',
  'debug',
  'util',
  'third_party/bc-decoder',
  'third_party/glslang-spirv',
  'third_party/spirv-headers/include/spirv/unified1',
])

add_project_arguments(dxil_spirv_compiler.get_supported_arguments([
    '-Wno-missing-field-initializers',
    '-Wno-empty-body',
    '-Wno-unused-parameter',
    '-fno-exceptions',
    '-fvisibility=hidden',
    '/wd4267',   # Narrowing conversions
    '/wd4244',   # Narrowing conversions
    '/wd4996']), # Secure no warnings
  language : 'cpp')

dxil_spirv_src = [
  # dxil-spirv-c
  'dxil_spirv_c.cpp',

  # bc-decoder
  'third_party/bc-decoder/llvm_decoder.cpp',

  # glslang-spirv-builder
  'third_party/glslang-spirv/Logger.cpp',
  'third_party/glslang-spirv/InReadableOrder.cpp',
  'third_party/glslang-spirv/SpvBuilder.cpp',

  # llvm-bc
  'bc/value.cpp',
  'bc/instruction.cpp',
  'bc/function.cpp',
  'bc/context.cpp',
  'bc/type.cpp',
  'bc/module.cpp',
  'bc/metadata.cpp',
  'bc/disassembler.cpp',

  # spirv-module
  'spirv_module.cpp',
  'descriptor_qa.cpp',
  'spirv_module_instrumentation.cpp',

  # dxil-converter
  'memory_stream.cpp',
  'llvm_bitcode_parser.cpp',

  'dxil_converter.cpp',
  'cfg_structurizer.cpp',
  'node_pool.cpp',
  'node.cpp',
  'dxil_parser.cpp',

  'opcodes/dxil/dxil_ags.cpp',
  'opcodes/dxil/dxil_nvapi.cpp',
  'opcodes/dxil/dxil_common.cpp',
  'opcodes/dxil/dxil_resources.cpp',
  'opcodes/dxil/dxil_compute.cpp',
  'opcodes/dxil/dxil_arithmetic.cpp',
  'opcodes/dxil/dxil_pixel_ops.cpp',
  'opcodes/dxil/dxil_geometry.cpp',
  'opcodes/dxil/dxil_tessellation.cpp',
  'opcodes/dxil/dxil_waveops.cpp',
  'opcodes/dxil/dxil_sampling.cpp',
  'opcodes/dxil/dxil_buffer.cpp',
  'opcodes/dxil/dxil_ray_tracing.cpp',
  'opcodes/dxil/dxil_mesh.cpp',
  'opcodes/dxil/dxil_workgraph.cpp',
  'opcodes/opcodes_llvm_builtins.cpp',
  'opcodes/opcodes_dxil_builtins.cpp',

  # dxil-utils
  'util/thread_local_allocator.cpp',

  # debug
  'debug/logging.cpp',
]

dxbc_spirv = subproject('dxbc-spirv')

dxbc_spirv_include_dirs = include_directories([
  'subprojects/dxbc-spirv'
])

dxbc_spirv_dep = dependency('dxbc-spirv',
  fallback        : [ 'dxbc-spirv', 'dxbc_spv_dep' ],
  default_options : [ 'cpp_std=c++17' ])

dxbc_spv_module = static_library('dxbc_spv_module', files('bc/module_dxbc_ir.cpp'),
  include_directories : [ dxil_spirv_include_dirs ],
  dependencies        : [ dxbc_spirv_dep ],
  override_options    : [ 'cpp_std=c++17' ])

dxil_spirv_lib = static_library('dxil-spirv', dxil_spirv_src,
  include_directories : [ dxil_spirv_include_dirs ],
  link_with           : [ dxbc_spv_module ],
  override_options    : [
    'cpp_std='       + dxil_spirv_cpp_std,
    'warning_level=' + dxil_spirv_warning_level
  ])

dxil_spirv_dep = declare_dependency(
  include_directories : include_directories('.'),
  link_with           : [ dxil_spirv_lib ])


================================================
FILE: misc/structurize_test.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "cfg_structurizer.hpp"
#include "node.hpp"
#include "node_pool.hpp"
#include "spirv_module.hpp"
#include "SpvBuilder.h"
#include <stdio.h>
#include <string.h>
#include <string>
#include <unordered_map>

#include "logging.hpp"
#include "spirv-tools/libspirv.hpp"
#include "spirv_cross_c.h"

using namespace dxil_spv;

struct Emitter : BlockEmissionInterface
{
	void emit_basic_block(CFGNode *node) override;
	void register_block(CFGNode *node) override
	{
		if (node->id == 0)
			node->id = module.allocate_id();
	}

	SPIRVModule module;
};

void Emitter::emit_basic_block(CFGNode *node)
{
	auto &info = node->ir.merge_info;
	LOGE("%u (%s):\n", node->id, node->name.c_str());

	// Emit opcodes here ...

	switch (info.merge_type)
	{
	case MergeType::Selection:
		LOGE("    SelectionMerge -> %s\n", info.merge_block ? info.merge_block->name.c_str() : "Unreachable");
		break;

	case MergeType::Loop:
		LOGE("    LoopMerge -> %s, Continue <- %s\n", info.merge_block ? info.merge_block->name.c_str() : "Unreachable",
		     info.continue_block ? info.continue_block->name.c_str() : "Unreachable");
		break;

	default:
		break;
	}

	switch (node->ir.terminator.type)
	{
	case Terminator::Type::Branch:
		LOGE("  Direct -> %s\n", node->ir.terminator.direct_block->name.c_str());
		break;

	case Terminator::Type::Condition:
		LOGE("  Selection -> %s : %s\n", node->ir.terminator.true_block->name.c_str(),
		     node->ir.terminator.false_block->name.c_str());
		break;

	case Terminator::Type::Return:
		LOGE("  Return\n");
		break;

	case Terminator::Type::Unreachable:
		LOGE("  Unreachable\n");
		break;

	default:
		break;
	}
}

static void print_spirv_assembly(const Vector<uint32_t> &code)
{
	spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_1);
	std::string str;
	if (!tools.Disassemble(code.data(), code.size(), &str, SPV_BINARY_TO_TEXT_OPTION_INDENT))
		LOGE("Failed to disassemble SPIR-V.\n");
	else
		LOGE("\nSPIR-V:\n%s\n", str.c_str());
}

static void print_glsl(const Vector<uint32_t> &code)
{
	spvc_context context;
	if (spvc_context_create(&context) != SPVC_SUCCESS)
		return;

	spvc_parsed_ir ir;
	if (spvc_context_parse_spirv(context, code.data(), code.size(), &ir) != SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler compiler;
	if (spvc_context_create_compiler(context, SPVC_BACKEND_GLSL, ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler) !=
	    SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler_options opts;
	if (spvc_compiler_create_compiler_options(compiler, &opts) != SPVC_SUCCESS)
		goto cleanup;

	spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE);
	spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_GLSL_VERSION, 460);
	spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS, SPVC_TRUE);
	spvc_compiler_install_compiler_options(compiler, opts);

	const char *source;
	if (spvc_compiler_compile(compiler, &source) != SPVC_SUCCESS)
		goto cleanup;

	LOGI("==== GLSL ====\n");
	fputs(source, stderr);
	LOGI("====\n");

cleanup:
	spvc_context_destroy(context);
}

static void validate_spirv(const Vector<uint32_t> &code)
{
	spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_1);
	tools.SetMessageConsumer([](spv_message_level_t, const char *, const spv_position_t &, const char *message) {
		LOGE("Message: %s\n", message);
	});
	if (!tools.Validate(code.data(), code.size()))
		LOGE("Validation error.\n");
	else
		LOGE("Validated successfully!\n");
}

static Vector<String> tokenize(char *line_buffer)
{
	Vector<String> tokens;

	char *saveptr;
	char *first = strtok_r(line_buffer, " ", &saveptr);
	if (first)
		tokens.push_back(first);
	else
		return tokens;
	while (char *token = strtok_r(nullptr, " ", &saveptr))
		tokens.push_back(token);

	for (auto &token : tokens)
		while (!token.empty() && token.back() == '\n')
			token.pop_back();

	return tokens;
}

int main(int argc, char **argv)
{
	if (argc != 2)
	{
		fprintf(stderr, "Usage: structurize-test <input test>\n");
		return EXIT_FAILURE;
	}

	std::unordered_map<String, CFGNode *> block_metas;
	Emitter emitter;
	CFGNodePool pool;

	const auto get = [&](const String &name) -> CFGNode * {
		auto itr = block_metas.find(name);
		if (itr == block_metas.end())
		{
			auto &new_entry = block_metas[name];
			auto *node = pool.create_node();
			node->ir.terminator.type = Terminator::Type::Return;
			node->name = name;
			new_entry = node;
			return node;
		}
		else
			return itr->second;
	};

	const auto add_branch = [&](const char *from, const char *to) {
		auto *f = get(from);
		auto *t = get(to);
		f->add_branch(t);
		f->ir.terminator.type = Terminator::Type::Branch;
		f->ir.terminator.direct_block = t;
	};

	const auto add_selection = [&](const char *from, const char *to0, const char *to1) {
		auto *f = get(from);
		auto *t0 = get(to0);
		auto *t1 = get(to1);
		f->add_branch(t0);
		f->add_branch(t1);
		f->ir.terminator.type = Terminator::Type::Condition;
		f->ir.terminator.true_block = t0;
		f->ir.terminator.false_block = t1;
		f->ir.terminator.conditional_id = emitter.module.get_builder().makeBoolConstant(true, true);
		emitter.module.get_builder().addName(f->ir.terminator.conditional_id, (std::string(from) + "_sel").c_str());
	};

	const auto add_switch = [&](const char *from, const String *nodes, size_t num_nodes) {
		auto *f = get(from);
		f->ir.terminator.type = Terminator::Type::Switch;
		f->ir.terminator.conditional_id = emitter.module.get_builder().makeUintConstant(0, true);
		for (size_t i = 0; i < num_nodes; i++)
		{
			auto *t = get(nodes[i]);
			f->add_branch(t);
			Terminator::Case case_label = {};
			case_label.node = t;
			case_label.global_order = i;
			case_label.is_default = i == 0;
			f->ir.terminator.cases.push_back(case_label);
		}
	};

	const auto add_phi = [&](const char *phi, const Vector<const char *> &from_nodes) {
		auto *p = get(phi);
		p->ir.phi.emplace_back();
		auto &phi_node = p->ir.phi.back();
		phi_node.type_id = emitter.module.get_builder().makeUintType(32);
		phi_node.id = emitter.module.allocate_id();
		emitter.module.get_builder().addName(phi_node.id, phi);

		for (auto &from : from_nodes)
		{
			IncomingValue value = {};
			value.block = get(from);
			value.id = emitter.module.get_builder().makeUintConstant(uint32_t(std::hash<std::string>()(from)), true);
			emitter.module.get_builder().addName(value.id, (std::string("incoming_value_") + from).c_str());
			phi_node.incoming.push_back(value);
		}
	};

	const auto add_sideeffect = [&](const char *block) {
		auto *b = get(block);
		auto &builder = emitter.module.get_builder();
		spv::Id var_id = builder.createVariable(spv::StorageClassFunction, builder.makeUintType(32));

		auto *op = emitter.module.allocate_op(spv::OpStore);
		op->add_id(var_id);
		op->add_id(builder.makeUintConstant(0));
		b->ir.operations.push_back(op);
	};

	emitter.module.emit_entry_point(spv::ExecutionModelVertex, "main", false, spv::MemoryModelGLSL450);

	FILE *file = fopen(argv[1], "r");
	if (!file)
	{
		fprintf(stderr, "Failed to open input file: %s.\n", argv[1]);
		return EXIT_FAILURE;
	}

	char line_buffer[1024];
	while (fgets(line_buffer, sizeof(line_buffer), file))
	{
		auto tokens = tokenize(line_buffer);
		if (tokens.empty())
			continue;

		if (tokens.front() == "b")
		{
			if (tokens.size() != 3)
			{
				LOGE("b token needs 3 elements.\n");
				continue;
			}

			add_branch(tokens[1].c_str(), tokens[2].c_str());
		}
		else if (tokens.front() == "c")
		{
			if (tokens.size() != 4)
			{
				LOGE("c token needs 4 elements.\n");
				continue;
			}

			add_selection(tokens[1].c_str(), tokens[2].c_str(), tokens[3].c_str());
		}
		else if (tokens.front() == "switch")
		{
			if (tokens.size() < 3)
			{
				LOGE("switch token needs at least.\n");
				continue;
			}

			add_switch(tokens[1].c_str(), tokens.data() + 2, tokens.size() - 2);
		}
		else if (tokens.front() == "phi")
		{
			if (tokens.size() < 3)
			{
				LOGE("phi token needs at least 3 elements.\n");
				continue;
			}

			Vector<const char *> src_blocks;
			for (auto itr = tokens.begin() + 2; itr != tokens.end(); ++itr)
				src_blocks.push_back(itr->c_str());
			add_phi(tokens[1].c_str(), src_blocks);
		}
		else if (tokens.front() == "sideeffect")
		{
			if (tokens.size() != 2)
			{
				LOGE("sideeffects token needs 2 elements.\n");
				continue;;
			}
			add_sideeffect(tokens[1].c_str());
		}
		else
		{
			LOGE("Unknown token %s.\n", tokens.front().c_str());
		}
	}

	CFGStructurizer traverser(get("entry"), pool, emitter.module);
	traverser.run();
	traverser.traverse(emitter);

	pool.for_each_node([](CFGNode &node) {
		node.userdata = nullptr;
		node.id = 0;
	});

	emitter.module.emit_entry_point_function_body(traverser);
	Vector<uint32_t> code;
	emitter.module.finalize_spirv(code);

	print_glsl(code);
	print_spirv_assembly(code);
	validate_spirv(code);
}


================================================
FILE: node.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "node.hpp"
#include "node_pool.hpp"
#include "logging.hpp"

#include <algorithm>
#include <assert.h>

namespace dxil_spv
{
CFGNode::CFGNode(CFGNodePool &pool_)
	: pool(pool_)
{
}

void CFGNode::add_unique_pred(CFGNode *node)
{
	auto itr = std::find(pred.begin(), pred.end(), node);
	if (itr == pred.end())
		pred.push_back(node);
}

void CFGNode::add_unique_fake_pred(CFGNode *node)
{
	auto itr = std::find(fake_pred.begin(), fake_pred.end(), node);
	if (itr == fake_pred.end())
		fake_pred.push_back(node);
}

void CFGNode::add_unique_header(CFGNode *node)
{
	auto itr = std::find(headers.begin(), headers.end(), node);
	if (itr == headers.end())
		headers.push_back(node);
}

void CFGNode::add_branch(CFGNode *to)
{
	add_unique_succ(to);
	to->add_unique_pred(this);
}

void CFGNode::clear_branches()
{
	for (auto *s : succ)
	{
		auto itr = std::find(s->pred.begin(), s->pred.end(), this);
		// We might have become stale during rewrites. In this case we might have a succ
		// with no corresponding pred. Ignore that.
		if (itr != s->pred.end())
			s->pred.erase(itr);
	}
	succ.clear();
}

void CFGNode::add_fake_branch(CFGNode *to)
{
	if (std::find(succ.begin(), succ.end(), to) != succ.end())
		return;

	add_unique_fake_succ(to);
	to->add_unique_fake_pred(this);
}

void CFGNode::add_unique_succ(CFGNode *node)
{
	assert(std::find(fake_succ.begin(), fake_succ.end(), node) == fake_succ.end());
	auto itr = std::find(succ.begin(), succ.end(), node);
	if (itr == succ.end())
		succ.push_back(node);
}

void CFGNode::add_unique_fake_succ(CFGNode *node)
{
	auto itr = std::find(fake_succ.begin(), fake_succ.end(), node);
	if (itr == fake_succ.end())
		fake_succ.push_back(node);
}

unsigned CFGNode::num_forward_preds() const
{
	return unsigned(pred.size());
}

bool CFGNode::has_pred_back_edges() const
{
	return pred_back_edge != nullptr;
}

bool CFGNode::reaches_domination_frontier_before_merge(const CFGNode *merge) const
{
	for (auto *frontier : dominance_frontier)
		if (merge != frontier && merge->post_dominates(frontier))
			return true;
	return false;
}

bool CFGNode::dominates_outer_continue(const CFGNode *loop_header) const
{
	if (loop_header->pred.empty())
		return false;

	loop_header = loop_header->immediate_dominator;
	while (loop_header)
	{
		if (loop_header->pred_back_edge && dominates(loop_header->pred_back_edge))
			return true;

		if (loop_header->pred.empty())
			break;
		loop_header = loop_header->immediate_dominator;
	}

	return false;
}

bool CFGNode::dominates(const CFGNode *other) const
{
	// Follow immediate dominator graph. Either we end up at this, or entry block.
	while (this != other)
	{
		if (!other->immediate_dominator || other == other->immediate_dominator)
			break;
		other = other->immediate_dominator;
	}

	return this == other;
}

bool CFGNode::can_loop_merge_to(const CFGNode *other) const
{
	if (!dominates(other))
		return false;

	auto *c = pred_back_edge;

	if (c && !c->succ.empty())
	{
		// If the continue block branches to something which is not the loop header,
		// it must be the merge block we're after, i.e., it must be a clean break (or we are kind of screwed).
		// Detect a "fake" merge branch here.
		// E.g., we have a fake merge branch if an escaping edge is branching to one block beyond the real merge block.
		// This can happen after split_merge_scopes() transform where inner loop
		// tries to break out of multiple loops and multiple selection scopes at the same time.
		// We can still dominate this escape target, but it's still an escape which must be resolved some other way with ladders.
		if (std::find(c->succ.begin(), c->succ.end(), other) == c->succ.end())
			return false;
	}

	return true;
}

bool CFGNode::can_backtrace_to(const CFGNode *parent, UnorderedSet<const CFGNode *> &node_cache) const
{
	if (node_cache.count(this))
		return false;
	node_cache.insert(this);

	for (auto *p : pred)
		if (p == parent || p->can_backtrace_to(parent, node_cache))
			return true;

	return false;
}

bool CFGNode::can_backtrace_to_with_blockers(const CFGNode *parent, const Vector<CFGNode *> &block_nodes,
                                             UnorderedSet<const CFGNode *> &node_cache) const
{
	if (node_cache.count(this))
		return false;
	node_cache.insert(this);

	if (std::find(block_nodes.begin(), block_nodes.end(), this) != block_nodes.end())
		return false;

	for (auto *p : pred)
		if (p == parent || p->can_backtrace_to_with_blockers(parent, block_nodes, node_cache))
			return true;

	return false;
}

bool CFGNode::can_backtrace_to(const CFGNode *parent) const
{
	// If parent can branch to this, then post_order(parent) must be greater than post_order(this).
	if (parent->forward_post_visit_order < forward_post_visit_order)
		return false;

	UnorderedSet<const CFGNode *> node_cache;
	return can_backtrace_to(parent, node_cache);
}

bool CFGNode::can_backtrace_to_with_blockers(const CFGNode *parent, const Vector<CFGNode *> &block_nodes) const
{
	// If parent can branch to this, then post_order(parent) must be greater than post_order(this).
	if (parent->forward_post_visit_order < forward_post_visit_order)
		return false;

	UnorderedSet<const CFGNode *> node_cache;
	return can_backtrace_to_with_blockers(parent, block_nodes, node_cache);
}

bool CFGNode::post_dominates_any_work(const CFGNode *parent, UnorderedSet<const CFGNode *> &node_cache) const
{
	// If we reached this node before and didn't terminate, it must have returned false.
	if (node_cache.count(parent))
		return false;
	node_cache.insert(parent);

	// This is not a dummy block, we have an answer.
	if (!parent->ir.operations.empty() || !parent->ir.phi.empty())
		return post_dominates(parent);

	for (auto *p : parent->pred)
		if (post_dominates_any_work(p, node_cache))
			return true;

	return false;
}

unsigned CFGNode::count_post_dominates_work_from_incoming_preds() const
{
	auto *start_node = this;
	// Trivial back-trace as far as we can go.
	while (start_node->pred.size() == 1 &&
	       start_node->ir.operations.empty() && start_node->ir.phi.empty() &&
	       start_node->post_dominates(start_node->pred.front()))
	{
		start_node = start_node->pred.front();
	}

	if (!start_node->ir.operations.empty() || !start_node->ir.phi.empty())
		return true;

	unsigned num_post_dominated_preds = 0;
	UnorderedSet<const CFGNode *> node_cache;

	for (auto *p : start_node->pred)
		if (start_node->post_dominates_any_work(p, node_cache))
			num_post_dominated_preds++;
	return num_post_dominated_preds;
}

bool CFGNode::post_dominates_any_work() const
{
	return count_post_dominates_work_from_incoming_preds() != 0;
}

bool CFGNode::post_dominates(const CFGNode *start_node) const
{
	while (start_node != this)
	{
		// Reached exit node.
		if (!start_node->immediate_post_dominator || start_node == start_node->immediate_post_dominator)
			break;
		start_node = start_node->immediate_post_dominator;
	}

	return this == start_node;
}

bool CFGNode::post_dominates_perfect_structured_construct() const
{
	if (!post_dominates(immediate_dominator))
		return false;

	for (auto *p : pred)
		if (!post_dominates(p))
			return false;
	return true;
}

bool CFGNode::dominates_all_reachable_exits(UnorderedSet<const CFGNode *> &completed, const CFGNode &header) const
{
	if (!completed.count(this))
	{
		completed.insert(this);
		if (succ_back_edge && !header.dominates(succ_back_edge))
			return false;

		for (auto *node : succ)
			if (!header.dominates(node) || !node->dominates_all_reachable_exits(completed, header))
				return false;
	}

	return true;
}

bool CFGNode::dominates_all_reachable_exits() const
{
	UnorderedSet<const CFGNode *> completed;
	return dominates_all_reachable_exits(completed, *this);
}

CFGNode *CFGNode::find_common_post_dominator(CFGNode *a, CFGNode *b)
{
	assert(a);
	assert(b);

	while (a != b)
	{
		if (!a->immediate_post_dominator)
		{
			for (auto *p : a->succ)
				p->recompute_immediate_post_dominator();
			a->recompute_immediate_post_dominator();
		}

		if (!b->immediate_post_dominator)
		{
			for (auto *p : b->succ)
				p->recompute_immediate_post_dominator();
			b->recompute_immediate_post_dominator();
		}

		if (a->backward_post_visit_order == b->backward_post_visit_order)
		{
			// Should not normally happen, but when we insert ladder blocks,
			// we might have assigned temporary visit orders which can alias with
			// other nodes in some cases. Fixing this up requires a full traversal of the entire CFG,
			// so as a fallback we can do direct reachability and domination analysis.
			if (b->post_dominates(a))
				return b;
			else if (a->post_dominates(b))
				return a;

			// If there is no clear domination relationship, then we need to iterate both a and b.
			// This is fine as we now know that neither a nor b can be the common node.
			assert(a->immediate_post_dominator);
			assert(b->immediate_post_dominator);
			a = a->immediate_post_dominator;
			b = b->immediate_post_dominator;
		}
		else if (a->backward_post_visit_order < b->backward_post_visit_order)
		{
			assert(a->immediate_post_dominator);
			a = a->immediate_post_dominator;
		}
		else
		{
			assert(b->immediate_post_dominator);
			b = b->immediate_post_dominator;
		}
	}

	return a;
}

CFGNode *CFGNode::find_common_dominator(CFGNode *a, CFGNode *b)
{
	assert(a);
	assert(b);

	while (a != b)
	{
		if (!a->immediate_dominator)
		{
			for (auto *p : a->pred)
				p->recompute_immediate_dominator();
			a->recompute_immediate_dominator();
		}

		if (!b->immediate_dominator)
		{
			for (auto *p : b->pred)
				p->recompute_immediate_dominator();
			b->recompute_immediate_dominator();
		}

		if (a->forward_post_visit_order == b->forward_post_visit_order)
		{
			// Should not normally happen, but when we insert ladder blocks,
			// we might have assigned temporary visit orders which can alias with
			// other nodes in some cases. Fixing this up requires a full traversal of the entire CFG,
			// so as a fallback we can do direct reachability and domination analysis.
			if (b->dominates(a))
				return const_cast<CFGNode *>(b);
			else if (a->dominates(b))
				return const_cast<CFGNode *>(a);

			// If there is no clear domination relationship, then we need to iterate both a and b.
			// This is fine as we now know that neither a nor b can be the common node.
			assert(a->immediate_dominator);
			assert(b->immediate_dominator);
			if (a == a->immediate_dominator)
				return b;
			else if (b == b->immediate_dominator)
				return a;
			a = a->immediate_dominator;
			b = b->immediate_dominator;
		}
		else if (a->forward_post_visit_order < b->forward_post_visit_order)
		{
			// Awkward case which can happen when nodes are unreachable in the CFG.
			// Can occur with the dummy blocks we create.
			if (a == a->immediate_dominator)
				return b;
			a = a->immediate_dominator;
		}
		else
		{
			// Awkward case which can happen when nodes are unreachable in the CFG.
			// Can occur with the dummy blocks we create.
			if (b == b->immediate_dominator)
				return a;

			b = b->immediate_dominator;
		}
	}
	return const_cast<CFGNode *>(a);
}

CFGNode *CFGNode::get_immediate_dominator_loop_header()
{
	assert(immediate_dominator);
	auto *node = this;
	while (!node->pred_back_edge)
	{
		if (node->pred.empty())
			return nullptr;

		assert(node->immediate_dominator);
		node = node->immediate_dominator;
	}

	return node;
}

CFGNode *CFGNode::rewrite_branch_through_intermediate_node(CFGNode *to_prev, CFGNode *to_next)
{
	assert(std::find(succ.begin(), succ.end(), to_next) != succ.end());

	auto *intermediate = pool.create_node();
	intermediate->name = name + ".intermediate." + to_next->name;
	intermediate->ir.terminator.type = Terminator::Type::Branch;
	intermediate->ir.terminator.direct_block = to_next;
	intermediate->add_branch(to_next);
	intermediate->immediate_post_dominator = to_next;
	intermediate->immediate_dominator = this;
	intermediate->forward_post_visit_order = forward_post_visit_order;
	intermediate->backward_post_visit_order = backward_post_visit_order;
	retarget_branch(to_prev, intermediate);
	to_next->recompute_immediate_dominator();

	return intermediate;
}

void CFGNode::retarget_branch_with_intermediate_node(CFGNode *to_prev, CFGNode *to_next)
{
	// If there is no duplication, just go ahead.
	if (std::find(succ.begin(), succ.end(), to_next) == succ.end())
		retarget_branch(to_prev, to_next);
	else
		rewrite_branch_through_intermediate_node(to_prev, to_next);
}

void CFGNode::retarget_branch_pre_traversal(CFGNode *to_prev, CFGNode *to_next)
{
	//LOGI("Retargeting branch for %s: %s -> %s\n", name.c_str(), to_prev->name.c_str(), to_next->name.c_str());

	assert(std::find(succ.begin(), succ.end(), to_prev) != succ.end());
	assert(std::find(to_prev->pred.begin(), to_prev->pred.end(), this) != to_prev->pred.end());
	assert(std::find(succ.begin(), succ.end(), to_next) == succ.end());
	assert(std::find(to_next->pred.begin(), to_next->pred.end(), this) == to_next->pred.end());

	to_prev->pred.erase(std::find(to_prev->pred.begin(), to_prev->pred.end(), this));

	// Modify succ in place so we don't invalidate iterator in traverse_dominated_blocks_and_rewrite_branch.
	*std::find(succ.begin(), succ.end(), to_prev) = to_next;

	auto replace_itr = std::find(to_next->pred.begin(), to_next->pred.end(), to_prev);
	// If to_prev now becomes unreachable, just replace pred in-place to avoid a stale pred.
	// The stale pred will be cleaned up later when recomputing CFG.
	if (to_prev->pred.empty() && !to_prev->pred_back_edge && replace_itr != to_next->pred.end())
		*replace_itr = this;
	else
		to_next->add_unique_pred(this);

	if (ir.terminator.direct_block == to_prev)
		ir.terminator.direct_block = to_next;
	if (ir.terminator.true_block == to_prev)
		ir.terminator.true_block = to_next;
	if (ir.terminator.false_block == to_prev)
		ir.terminator.false_block = to_next;
	for (auto &c : ir.terminator.cases)
		if (c.node == to_prev)
			c.node = to_next;
}

void CFGNode::retarget_branch(CFGNode *to_prev, CFGNode *to_next)
{
	retarget_branch_pre_traversal(to_prev, to_next);

	// Branch targets have changed, so recompute immediate dominators.
	if (to_prev->forward_post_visit_order > to_next->forward_post_visit_order)
	{
		to_prev->recompute_immediate_dominator();
		to_next->recompute_immediate_dominator();
	}
	else
	{
		to_next->recompute_immediate_dominator();
		to_prev->recompute_immediate_dominator();
	}

	// ... and post dominator for ourself.
	// I am not sure if it's technically possible that we have to recompute the entire post domination graph now?
	recompute_immediate_post_dominator();
}

void CFGNode::retarget_fake_succ(CFGNode *to_prev, CFGNode *to_next)
{
	assert(std::find(to_prev->fake_pred.begin(), to_prev->fake_pred.end(), this) != to_prev->fake_pred.end());
	auto succ_itr = std::find(fake_succ.begin(), fake_succ.end(), to_next);
	auto prev_itr = std::find(fake_succ.begin(), fake_succ.end(), to_prev);
	assert(prev_itr != fake_succ.end());

	// It is valid to rewrite a fake succ to an existing one.
	// It's possible that we need to rewrite from one loop exit to another.

	if (succ_itr == fake_succ.end())
	{
		*prev_itr = to_next;
		assert(std::find(to_next->fake_pred.begin(), to_next->fake_pred.end(), this) == to_next->fake_pred.end());
	}
	else
	{
		// We can invalidate iterators since we break out immediately when rewriting a fake succ.
		fake_succ.erase(prev_itr);
	}

	to_next->add_unique_fake_pred(this);
	recompute_immediate_post_dominator();
}

void CFGNode::fixup_merge_info_after_branch_rewrite(CFGNode *from, CFGNode *to)
{
	// If we end up re-seating merge sites, make sure we add it to headers in the target block, since we might have
	// to keep splitting merge scopes in innermost scopes.
	if (std::find(from->headers.begin(), from->headers.end(), this) != from->headers.end())
	{
		if (std::find(to->headers.begin(), to->headers.end(), this) == to->headers.end())
			to->headers.push_back(this);
		if (selection_merge_block == from)
			selection_merge_block = to;
		if (loop_merge_block == from)
			loop_merge_block = to;
		if (loop_ladder_block == from)
			loop_ladder_block = to;
	}
}

void CFGNode::recompute_immediate_dominator()
{
	if (pred.empty())
	{
		// For entry block only.
		immediate_dominator = this;
	}
	else
	{
		immediate_dominator = nullptr;

		for (auto *edge : pred)
		{
			if (immediate_dominator)
				immediate_dominator = CFGNode::find_common_dominator(immediate_dominator, edge);
			else
				immediate_dominator = edge;
		}
	}
}

void CFGNode::recompute_immediate_post_dominator()
{
	if (!succ.empty() || !fake_succ.empty())
	{
		// For non-leaf blocks only. The immediate post dominator is already set up to be the exit node in leaf nodes.
		immediate_post_dominator = nullptr;
		for (auto *edge : succ)
		{
			if (immediate_post_dominator)
				immediate_post_dominator = CFGNode::find_common_post_dominator(immediate_post_dominator, edge);
			else
				immediate_post_dominator = edge;
		}

		for (auto *edge : fake_succ)
		{
			if (immediate_post_dominator)
				immediate_post_dominator = CFGNode::find_common_post_dominator(immediate_post_dominator, edge);
			else
				immediate_post_dominator = edge;
		}
	}
}

CFGNode *CFGNode::get_outer_selection_dominator()
{
	assert(immediate_dominator);
	auto *node = immediate_dominator;

	// We need to find an immediate dominator which we do not post-dominate.
	// That first idom is considered the outer selection header.
	while (node->ir.terminator.type != Terminator::Type::Switch && post_dominates(node))
	{
		if (node->pred.empty())
			break;

		// Skip from merge block to header.
		while (std::find(node->headers.begin(), node->headers.end(), node->immediate_dominator) != node->headers.end())
			node = node->immediate_dominator;

		if (post_dominates(node))
		{
			assert(node->immediate_dominator);
			node = node->immediate_dominator;
		}
	}

	return node;
}

CFGNode *CFGNode::get_outer_header_dominator()
{
	assert(immediate_dominator);
	auto *node = immediate_dominator;
	while (node->succ.size() == 1 && node->ir.terminator.type != Terminator::Type::Switch && !node->pred_back_edge)
	{
		if (node->pred.empty())
			break;

		assert(node->immediate_dominator);
		node = node->immediate_dominator;
	}

	return node;
}

bool CFGNode::block_is_jump_thread_ladder() const
{
	if (!ir.operations.empty() || ir.terminator.type != Terminator::Type::Condition || ir.phi.size() != 1)
		return false;

	auto &phi = ir.phi.front();

	// Detect a jump thread block. If the branch target directly depends on the incoming blocks,
	// we have this scenario.
	return ir.terminator.conditional_id == phi.id;
}

bool CFGNode::reaches_backward_visited_node(UnorderedSet<const dxil_spv::CFGNode *> &completed) const
{
	if (completed.count(this))
		return false;
	completed.insert(this);

	if (backward_visited)
		return true;

	for (auto *node : succ)
		if (node->reaches_backward_visited_node(completed))
			return true;

	for (auto *node : fake_succ)
		if (node->reaches_backward_visited_node(completed))
			return true;

	return false;
}

bool CFGNode::reaches_backward_visited_node() const
{
	UnorderedSet<const CFGNode *> visit;
	return reaches_backward_visited_node(visit);
}
} // namespace dxil_spv


================================================
FILE: node.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "ir.hpp"

#include <algorithm>
#include <stdint.h>

namespace dxil_spv
{
class CFGNodePool;

struct CFGNode
{
public:
	String name;
	uint32_t id = 0;
	void *userdata = nullptr;
	IRBlock ir;

	void add_branch(CFGNode *to);
	void add_fake_branch(CFGNode *to);
	void clear_branches();

	DXIL_SPV_OVERRIDE_NEW_DELETE

private:
	friend class CFGNodePool;
	friend class CFGStructurizer;
	friend struct LoopBacktracer;
	friend struct LoopMergeTracer;
	explicit CFGNode(CFGNodePool &pool);

	CFGNodePool &pool;
	uint32_t forward_post_visit_order = 0;
	uint32_t backward_post_visit_order = 0;
	bool visited = false;
	bool backward_visited = false;
	bool traversing = false;
	bool freeze_structured_analysis = false;
	bool is_pseudo_back_edge = false;

	MergeType merge = MergeType::None;
	CFGNode *loop_merge_block = nullptr;
	CFGNode *loop_ladder_block = nullptr;
	CFGNode *selection_merge_block = nullptr;
	Vector<CFGNode *> headers;

	CFGNode *immediate_dominator = nullptr;
	CFGNode *immediate_post_dominator = nullptr;
	Vector<CFGNode *> succ;
	Vector<CFGNode *> pred;

	// Fake successors and predecessors which only serve to make the flipped CFG reducible.
	// This makes post-domination analysis not strictly correct in all cases, but it is
	// fine for the purposes we need post-domination analysis for.
	// If a continue block is not reachable in the flipped CFG, we will
	// add fake successors from the continue block.
	Vector<CFGNode *> fake_succ;
	Vector<CFGNode *> fake_pred;

	CFGNode *pred_back_edge = nullptr;
	CFGNode *succ_back_edge = nullptr;

	void add_unique_succ(CFGNode *node);
	void add_unique_pred(CFGNode *node);
	void add_unique_fake_succ(CFGNode *node);
	void add_unique_fake_pred(CFGNode *node);
	void add_unique_header(CFGNode *node);
	unsigned num_forward_preds() const;
	bool has_pred_back_edges() const;
	bool dominates(const CFGNode *other) const;
	bool reaches_domination_frontier_before_merge(const CFGNode *merge) const;
	bool dominates_outer_continue(const CFGNode *loop_header) const;
	bool can_loop_merge_to(const CFGNode *other) const;
	bool post_dominates(const CFGNode *other) const;
	bool post_dominates_perfect_structured_construct() const;
	bool dominates_all_reachable_exits() const;
	static CFGNode *find_common_dominator(CFGNode *a, CFGNode *b);
	static CFGNode *find_common_post_dominator(CFGNode *a, CFGNode *b);
	CFGNode *get_immediate_dominator_loop_header();
	bool can_backtrace_to(const CFGNode *parent) const;
	bool can_backtrace_to_with_blockers(const CFGNode *parent, const Vector<CFGNode *> &block_nodes) const;
	bool can_backtrace_to(const CFGNode *parent, UnorderedSet<const CFGNode *> &node_cache) const;
	bool can_backtrace_to_with_blockers(const CFGNode *parent, const Vector<CFGNode *> &block_nodes,
	                                    UnorderedSet<const CFGNode *> &node_cache) const;
	bool post_dominates_any_work() const;
	unsigned count_post_dominates_work_from_incoming_preds() const;
	bool post_dominates_any_work(const CFGNode *parent, UnorderedSet<const CFGNode *> &node_cache) const;
	bool reaches_backward_visited_node() const;

	void retarget_branch(CFGNode *to_prev, CFGNode *to_next);
	void retarget_branch_pre_traversal(CFGNode *to_prev, CFGNode *to_next);
	void retarget_branch_with_intermediate_node(CFGNode *to_prev, CFGNode *to_next);
	CFGNode *rewrite_branch_through_intermediate_node(CFGNode *to_prev, CFGNode *to_next);

	void fixup_merge_info_after_branch_rewrite(CFGNode *from, CFGNode *to);

	template <typename Op>
	void walk_cfg_from(const Op &op) const;

	void recompute_immediate_dominator();
	void recompute_immediate_post_dominator();

	template <typename Op>
	void traverse_dominated_blocks(const Op &op) const;

	CFGNode *get_outer_selection_dominator();
	CFGNode *get_outer_header_dominator();

	Vector<CFGNode *> dominance_frontier;
	Vector<CFGNode *> post_dominance_frontier;

	bool block_is_jump_thread_ladder() const;

private:
	bool dominates_all_reachable_exits(UnorderedSet<const CFGNode *>& completed, const CFGNode &header) const;

	template <typename Op>
	void traverse_dominated_blocks(UnorderedSet<const CFGNode *> &completed,
	                                    const CFGNode &header, const Op &op) const;

	void retarget_fake_succ(CFGNode *from, CFGNode *to);
	bool reaches_backward_visited_node(UnorderedSet<const CFGNode *> &completed) const;
};

template <typename Op>
void CFGNode::walk_cfg_from(const Op &op) const
{
	if (!op(this))
		return;

	for (auto *s : succ)
		s->walk_cfg_from(op);
}

template <typename Op>
void CFGNode::traverse_dominated_blocks(UnorderedSet<const CFGNode *> &completed,
                                        const CFGNode &header, const Op &op) const
{
	for (auto *node : succ)
	{
		bool can_visit = completed.count(node) == 0;
		if (can_visit)
			completed.insert(node);

		if (can_visit && header.dominates(node))
		{
			if (op(node))
				node->traverse_dominated_blocks(completed, header, op);
		}
	}
}

template <typename Op>
void CFGNode::traverse_dominated_blocks(const Op &op) const
{
	UnorderedSet<const CFGNode *> completed;
	traverse_dominated_blocks(completed, *this, op);
}
} // namespace dxil_spv


================================================
FILE: node_pool.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "node_pool.hpp"
#include "node.hpp"
#include <utility>

namespace dxil_spv
{
CFGNodePool::CFGNodePool()
{
}

CFGNodePool::~CFGNodePool()
{
}

CFGNode *CFGNodePool::create_node()
{
	auto node = std::unique_ptr<CFGNode>(new CFGNode(*this));
	auto *ret = node.get();
	nodes.push_back(std::move(node));
	return ret;
}

} // namespace dxil_spv


================================================
FILE: node_pool.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include <memory>

namespace dxil_spv
{
struct CFGNode;

class CFGNodePool
{
public:
	DXIL_SPV_OVERRIDE_NEW_DELETE

	CFGNodePool();
	~CFGNodePool();

	CFGNode *create_node();

	template <typename Op>
	void for_each_node(const Op &op)
	{
		for (auto &node : nodes)
			op(*node);
	}

private:
	Vector<std::unique_ptr<CFGNode>> nodes;
};
} // namespace dxil_spv


================================================
FILE: opcodes/converter_impl.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "SpvBuilder.h"
#include "cfg_structurizer.hpp"
#include "dxil_converter.hpp"
#include "scratch_pool.hpp"
#include "descriptor_qa.hpp"
#include "opcodes.hpp"
#include "dxil/dxil_ags.hpp"
#include "dxil/dxil_nvapi.hpp"

#include "GLSL.std.450.h"

#ifdef HAVE_LLVMBC
#include "function.hpp"
#include "instruction.hpp"
#include "module.hpp"
#include "value.hpp"
#include "metadata.hpp"
#else
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/CFG.h>
#include <llvm/IR/Instructions.h>
#endif

#include <stdlib.h>
#include <string.h>

namespace dxil_spv
{
enum class LocalRootSignatureType
{
	Constants,
	Descriptor,
	Table
};

struct LocalRootSignatureConstants
{
	uint32_t register_space;
	uint32_t register_index;
	uint32_t num_words;
};

struct LocalRootSignatureDescriptor
{
	ResourceClass type;
	uint32_t register_space;
	uint32_t register_index;
};

struct LocalRootSignatureEntry
{
	LocalRootSignatureType type;
	union
	{
		LocalRootSignatureConstants constants;
		LocalRootSignatureDescriptor descriptor;
	};
	Vector<DescriptorTableEntry> table_entries;
};

struct ReferenceVkMemoryModel
{
	bool non_private = false;
	bool auto_visibility = false;
};

static inline DXIL::ComponentType raw_width_to_component_type(RawType type, RawWidth raw_width)
{
	switch (raw_width)
	{
	case RawWidth::B8:
		return DXIL::ComponentType::InternalU8;
	case RawWidth::B16:
		return type == RawType::Float ? DXIL::ComponentType::F16 : DXIL::ComponentType::U16;
	case RawWidth::B64:
		return type == RawType::Float ? DXIL::ComponentType::F64 : DXIL::ComponentType::U64;
	default:
		return type == RawType::Float ? DXIL::ComponentType::F32 : DXIL::ComponentType::U32;
	}
}

static inline unsigned raw_vecsize_to_vecsize(RawVecSize raw_vecsize)
{
	return unsigned(raw_vecsize) + 1;
}

static inline unsigned raw_component_type_to_bits(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::InternalU8:
		return 8;
	case DXIL::ComponentType::U16:
	case DXIL::ComponentType::F16:
		return 16;
	case DXIL::ComponentType::U32:
	case DXIL::ComponentType::F32:
		return 32;
	case DXIL::ComponentType::U64:
	case DXIL::ComponentType::F64:
		return 64;
	default:
		assert(0 && "Invalid component type.");
		return 0;
	}
}

static inline RawType raw_component_type_to_type(DXIL::ComponentType type)
{
	switch (type)
	{
	case DXIL::ComponentType::InternalU8:
	case DXIL::ComponentType::U16:
	case DXIL::ComponentType::U32:
	case DXIL::ComponentType::U64:
		return RawType::Integer;
	case DXIL::ComponentType::F16:
	case DXIL::ComponentType::F32:
	case DXIL::ComponentType::F64:
		return RawType::Float;
	default:
		assert(0 && "Invalid component type.");
		return RawType::Count;
	}
}

static inline unsigned raw_width_to_bits(RawWidth raw_width)
{
	return raw_component_type_to_bits(raw_width_to_component_type(RawType::Integer, raw_width));
}

// Track patterns where shader copies CBV arrays into a local array.
// DXC does not seem to optimize this in all cases that games run into.
struct AllocaCBVForwardingTracking
{
	const llvm::Value *cbv_handle = nullptr;
	uint32_t scalar_index_offset = 0;
	uint32_t stride = 0;
	uint32_t highest_store_index = 0;
	uint32_t min_highest_store_index = 0;
	bool has_load = false;
};

class GlobalConfiguration
{
public:
	// Only used for temporary hackery and testing.
	static const GlobalConfiguration &get();
	bool wmma_rdna3_workaround = false;
	bool wmma_conv_hack = false;

private:
	GlobalConfiguration();
};

struct AccessTracking
{
	bool has_read = false;
	bool has_written = false;
	bool has_atomic = false;
	bool has_atomic_64bit = false;
	bool has_counter = false;
	bool dynamically_indexed_cbv = false;
	bool raw_access_buffer_declarations[unsigned(RawType::Count)][unsigned(RawWidth::Count)][unsigned(RawVecSize::Count)] = {};
};

struct Converter::Impl
{
	DXIL_SPV_OVERRIDE_NEW_DELETE

	Impl(LLVMBCParser &bitcode_parser_, LLVMBCParser *bitcode_reflection_parser_, SPIRVModule &module_)
	    : bitcode_parser(bitcode_parser_)
	    , bitcode_reflection_parser(bitcode_reflection_parser_)
	    , spirv_module(module_)
	{
	}

	LLVMBCParser &bitcode_parser;
	LLVMBCParser *bitcode_reflection_parser;
	SPIRVModule &spirv_module;

	struct BlockMeta
	{
		explicit BlockMeta(llvm::BasicBlock *bb_)
		    : bb(bb_)
		{
		}

		llvm::BasicBlock *bb;
		CFGNode *node = nullptr;

		DXIL_SPV_OVERRIDE_NEW_DELETE
	};
	Vector<std::unique_ptr<BlockMeta>> metas;
	UnorderedMap<const llvm::BasicBlock *, BlockMeta *> bb_map;
	UnorderedMap<const llvm::Value *, spv::Id> value_map;
	UnorderedMap<spv::Id, spv::Id> phi_incoming_rewrite;

	ConvertedFunction convert_entry_point();
	CFGNode *convert_function(const Vector<llvm::BasicBlock *> &bbs, bool primary_code);
	CFGNode *build_hull_passthrough_function(CFGNodePool &pool);
	ConvertedFunction::Function build_hull_main(const Vector<llvm::BasicBlock *> &bbs,
	                                            const Vector<llvm::BasicBlock *> &patch_bbs,
	                                            CFGNodePool &pool,
	                                            Vector<ConvertedFunction::Function> &leaves);
	ConvertedFunction::Function build_rov_main(const Vector<llvm::BasicBlock *> &bbs,
	                                           CFGNodePool &pool,
	                                           Vector<ConvertedFunction::Function> &leaves);
	ConvertedFunction::Function build_node_main(const Vector<llvm::BasicBlock *> &bbs,
	                                            CFGNodePool &pool,
	                                            Vector<ConvertedFunction::Function> &leaves);
	void gather_function_dependencies(llvm::Function *caller, Vector<llvm::Function *> &funcs);
	bool build_callee_functions(CFGNodePool &pool, const Vector<llvm::Function *> &callees,
	                            Vector<ConvertedFunction::Function> &leaves);
	spv::Id get_id_for_value(const llvm::Value *value, unsigned forced_integer_width = 0);
	spv::Id get_id_for_constant(const llvm::Constant *constant, unsigned forced_width);
	spv::Id get_padded_constant_array(spv::Id padded_type_id, const llvm::Constant *constant);
	spv::Id get_id_for_undef(const llvm::UndefValue *undef);
	spv::Id get_id_for_undef_constant(const llvm::UndefValue *undef);
	void emit_patch_output_lowering(CFGNode *node);

	Vector<llvm::BasicBlock *> build_function_bb_visit_order_legacy(llvm::Function *func, CFGNodePool &pool);
	Vector<llvm::BasicBlock *> build_function_bb_visit_order_analysis(llvm::Function *func);
	void build_function_bb_visit_order_inner_analysis(Vector<llvm::BasicBlock *> &bbs,
	                                                  UnorderedSet<llvm::BasicBlock *> &visited,
	                                                  llvm::BasicBlock *bb);
	void build_function_bb_visit_register(llvm::BasicBlock *bb, CFGNodePool &pool, String tag);

	bool emit_stage_input_variables();
	bool emit_stage_output_variables();
	bool emit_patch_variables();
	bool emit_other_variables();
	bool emit_global_variables();
	bool emit_incoming_payload();
	bool emit_hit_attribute();
	void emit_interpolation_decorations(spv::Id variable_id, DXIL::InterpolationMode mode);
	void emit_builtin_interpolation_decorations(spv::Id variable_id, DXIL::Semantic semantic, DXIL::InterpolationMode mode);

	spv::ExecutionModel execution_model = spv::ExecutionModelMax;
	bool execution_model_lib_target = false;
	bool emit_execution_modes();
	bool emit_execution_modes_late();
	void emit_execution_modes_post_code_generation();
	bool analyze_execution_modes_meta();
	bool emit_execution_modes_compute();
	bool emit_execution_modes_node();

	static NodeDispatchGrid node_parse_dispatch_grid(llvm::MDNode *node_meta);
	static uint32_t node_parse_payload_stride(llvm::MDNode *node_meta, bool &is_rw_sharing);
	bool emit_execution_modes_node_input();
	bool emit_execution_modes_node_output(llvm::MDNode *input);
	bool emit_execution_modes_geometry();
	bool emit_execution_modes_hull();
	bool emit_execution_modes_domain();
	bool emit_execution_modes_pixel();
	bool emit_execution_modes_pixel_late();
	bool emit_execution_modes_ray_tracing(spv::ExecutionModel model);
	bool emit_execution_modes_amplification();
	bool emit_execution_modes_mesh();
	bool emit_execution_modes_fp_denorm_rounding();
	bool emit_execution_modes_thread_wave_properties(const llvm::MDNode *num_threads);

	bool analyze_instructions(llvm::Function *func);
	void analyze_instructions_post_execution_modes();
	void mark_used_values(const llvm::Instruction *instruction);
	void mark_used_value(const llvm::Value *value);

	struct RawDeclaration
	{
		RawType type;
		RawWidth width;
		RawVecSize vecsize;
	};

	UnorderedMap<uint32_t, AccessTracking> cbv_access_tracking;
	UnorderedMap<uint32_t, AccessTracking> srv_access_tracking;
	UnorderedMap<uint32_t, AccessTracking> uav_access_tracking;
	UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_cbv_resource_index_map;
	UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_srv_resource_index_map;
	UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_uav_resource_index_map;
	UnorderedSet<const llvm::Value *> llvm_values_using_update_counter;
	UnorderedMap<const llvm::Value *, spv::Id> llvm_value_actual_type;
	UnorderedSet<uint32_t> llvm_attribute_at_vertex_indices;

	AGSState ags;
	NVAPIState nvapi;

	// DXIL has no storage class concept for hit/callable/payload types.
	const llvm::Type *llvm_hit_attribute_output_type = nullptr;
	spv::Id llvm_hit_attribute_output_value = 0;

	struct CompositeMeta
	{
		// Keeps track of which elements of a struct composite type are statically accessed.
		// This is required to eliminate dead loads when we're unrolling and also needed to keep
		// track of if opcodes should use sparse residency or not.
		unsigned access_mask = 0;
		// Effectively findMSB(access_mask) + 1
		unsigned components = 0;
		// If true, the composite was loaded as a vector, i.e. typed buffer or texture read.
		bool forced_composite = true;
	};
	UnorderedMap<const llvm::Value *, CompositeMeta> llvm_composite_meta;
	Vector<const llvm::Type *> llvm_dxil_op_fake_struct_types;

	bool composite_is_accessed(const llvm::Value *composite) const;

	struct ResourceMetaReference
	{
		DXIL::ResourceType type;
		unsigned meta_index;
		llvm::Value *offset;
		const llvm::GlobalVariable *variable;
		bool non_uniform;
	};
	UnorderedMap<const llvm::Value *, ResourceMetaReference> llvm_global_variable_to_resource_mapping;
	UnorderedSet<const llvm::GlobalVariable *> llvm_active_global_resource_variables;

	struct ResourceVariableMeta
	{
		bool is_lib_variable;
		bool is_active;
	};
	ResourceVariableMeta get_resource_variable_meta(const llvm::MDNode *resource) const;

	struct ExecutionModeMeta
	{
		unsigned stage_input_num_vertex = 0;
		unsigned stage_output_num_vertex = 0;
		unsigned stage_output_num_primitive = 0;
		unsigned primitive_index_dimension = 0;
		unsigned gs_stream_active_mask = 0;
		llvm::Function *patch_constant_function = nullptr;
		spv::Id patch_lowering_array_var_id = 0;
		unsigned workgroup_threads[3] = {};
		bool native_16bit_operations = false;
		bool synthesize_2d_quad_dispatch = false;
		bool synthesize_dummy_derivatives = false;
		unsigned wave_size_min = 0;
		unsigned wave_size_max = 0;
		unsigned wave_size_preferred = 0;
		unsigned heuristic_min_wave_size = 0;
		unsigned heuristic_max_wave_size = 0;
		bool declares_globallycoherent_uav = false;
		bool declares_rov = false;
		bool per_sample_shading = false;
		bool waveops_include_helper_lanes = false;
		bool needs_quad_derivatives = false;
		String entry_point_name;

		// Eventually, we should use Vulkan memory model across the board,
		// but don't want to invalidate all Foz caches.
		spv::MemoryModel memory_model = spv::MemoryModelGLSL450;
	} execution_mode_meta;

	static ShaderStage get_remapping_stage(spv::ExecutionModel model);

	llvm::MDNode *entry_point_meta = nullptr;
	bool emit_resources_global_mapping();
	bool emit_resources_global_mapping(DXIL::ResourceType type, const llvm::MDNode *node);
	bool emit_resources();
	bool emit_global_heaps();
	bool emit_descriptor_heap_introspection_buffer();
	bool emit_descriptor_heap_size_ubo();
	bool emit_srvs(const llvm::MDNode *srvs, const llvm::MDNode *refl);
	bool emit_uavs(const llvm::MDNode *uavs, const llvm::MDNode *refl);
	bool emit_cbvs(const llvm::MDNode *cbvs, const llvm::MDNode *refl);
	bool emit_samplers(const llvm::MDNode *samplers, const llvm::MDNode *refl);
	bool emit_shader_record_buffer();
	spv::Id emit_shader_record_buffer_block_type(bool physical_storage);
	void register_resource_meta_reference(const llvm::MDOperand &operand, DXIL::ResourceType type, unsigned index);
	void emit_root_constants(unsigned num_descriptors, unsigned num_constant_words);
	bool require_arrayed_root_constants() const;
	static void scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &parser);
	static bool scan_srvs(ResourceRemappingInterface *iface, const llvm::MDNode *srvs, ShaderStage stage);
	static bool scan_uavs(ResourceRemappingInterface *iface, const llvm::MDNode *uavs, ShaderStage stage);
	static bool scan_cbvs(ResourceRemappingInterface *iface, const llvm::MDNode *cbvs, ShaderStage stage);
	static bool scan_samplers(ResourceRemappingInterface *iface, const llvm::MDNode *samplers, ShaderStage stage);
	bool get_ssbo_offset_buffer_id(spv::Id &buffer_id, const VulkanBinding &buffer_binding, const VulkanBinding &offset_binding,
	                               DXIL::ResourceKind kind, unsigned alignment);

	Vector<LocalRootSignatureEntry> local_root_signature;
	int get_local_root_signature_entry(ResourceClass resource_class, uint32_t space, uint32_t binding,
	                                   DescriptorTableEntry &local_table_entry) const;

	void emit_root_parameter_index_from_push_index(const char *tag, uint32_t index, uint32_t size, bool bda);
	uint64_t root_parameter_emit_mask = 0;

	struct RawDeclarationVariable
	{
		RawDeclaration declaration;
		spv::Id var_id;
	};

	struct ResourceReference
	{
		spv::Id var_id = 0;
		Vector<RawDeclarationVariable> var_alias_group;
		bool aliased = false;

		uint32_t push_constant_member = 0;
		uint32_t base_offset = 0;
		unsigned stride = 0;
		bool bindless = false;
		bool base_resource_is_array = false;
		bool root_descriptor = false;
		bool coherent = false;
		bool rov = false;
		DXIL::ResourceKind resource_kind = DXIL::ResourceKind::Invalid;
		int local_root_signature_entry = -1;
		ReferenceVkMemoryModel vkmm;
	};

	// Collects all unique calls to annotateHandle (SM 6.6),
	// we use this to build the various bindless variables as necessary.
	struct AnnotateHandleReference
	{
		unsigned ordinal; // Important for reproducible codegen later.
		DXIL::ResourceKind resource_kind;
		DXIL::ResourceType resource_type;
		DXIL::ComponentType component_type;
		AccessTracking tracking;
		unsigned stride;
		bool coherent;
		bool rov;
		bool counter;
		ResourceReference reference;
		ResourceReference counter_reference;
		spv::Id offset_buffer_id;
	};
	UnorderedMap<const llvm::Value *, AnnotateHandleReference> llvm_annotate_handle_uses;
	UnorderedSet<const llvm::Value *> llvm_annotate_handle_lib_uses;
	Vector<const llvm::Value *> llvm_vkmm_coherent_ptrs;

	Vector<ResourceReference> srv_index_to_reference;
	Vector<spv::Id> srv_index_to_offset;
	Vector<ResourceReference> sampler_index_to_reference;
	Vector<ResourceReference> cbv_index_to_reference;
	Vector<ResourceReference> uav_index_to_reference;
	Vector<ResourceReference> uav_index_to_counter;
	Vector<spv::Id> uav_index_to_offset;
	spv::Id root_constant_id = 0;
	unsigned root_descriptor_count = 0;
	unsigned root_constant_num_words = 0;
	bool root_constant_arrayed = false;
	unsigned patch_location_offset = ~0u;
	unsigned descriptor_qa_counter = 0;
	spv::Id primitive_index_array_id = 0;

	struct
	{
		spv::Id descriptor_heap_size_var_id;
		spv::Id descriptor_heap_introspection_var_id;
		spv::Id descriptor_heap_introspection_block_ptr_type_id;
		spv::Id invocation_id_var_id;
		bool descriptor_heap_introspection_is_bda;
	} instrumentation = {};
	void emit_write_instrumentation_invocation_id(CFGNode *node);

	struct PhysicalPointerMeta
	{
		bool nonwritable;
		bool nonreadable;
		bool coherent;
		bool rov;
		uint8_t stride;
		uint32_t size;
	};

	struct ResourceMetaInstrumentation
	{
		spv::Id bda_id;
		spv::Id elem_size_id;
		spv::Id resource_size_id;
	};

	struct ResourceMeta
	{
		DXIL::ResourceKind kind;
		DXIL::ComponentType component_type;
		RawVecSize raw_component_vecsize;
		unsigned stride;

		spv::Id var_id;
		Vector<RawDeclarationVariable> var_alias_group;
		bool aliased;
		bool non_uniform;
		bool rov;
		ReferenceVkMemoryModel vkmm;

		spv::StorageClass storage;
		spv::StorageClass counter_storage;

		spv::Id counter_var_id;
		PhysicalPointerMeta physical_pointer_meta;
		spv::Id index_offset_id;
		ResourceMetaInstrumentation instrumentation;
	};
	UnorderedMap<spv::Id, ResourceMeta> handle_to_resource_meta;
	UnorderedMap<spv::Id, spv::Id> id_to_type;
	UnorderedMap<const llvm::Value *, unsigned> handle_to_root_member_offset;
	UnorderedMap<const llvm::Value *, spv::StorageClass> handle_to_storage_class;
	UnorderedMap<const llvm::Value *, spv::Id> handle_to_robustness;
	UnorderedSet<const llvm::Value *> needs_temp_storage_copy;

	struct TempPayloadEntry
	{
		spv::Id type;
		spv::StorageClass storage;
		spv::Id id;
	};
	Vector<TempPayloadEntry> temp_payloads;

	spv::StorageClass get_effective_storage_class(const llvm::Value *value, spv::StorageClass fallback) const;
	bool get_needs_temp_storage_copy(const llvm::Value *value) const;
	spv::Id get_temp_payload(spv::Id type, spv::StorageClass storage);

	spv::Id get_type_id(DXIL::ComponentType element_type, unsigned rows, unsigned cols, bool force_array = false);

	enum TypeLayoutFlagBits
	{
		TYPE_LAYOUT_PHYSICAL_BIT = 0x1,
		TYPE_LAYOUT_READ_ONLY_BIT = 0x2,
		TYPE_LAYOUT_COHERENT_BIT = 0x4,
		TYPE_LAYOUT_BLOCK_BIT = 0x8,
	};
	using TypeLayoutFlags = uint32_t;

	spv::Id get_type_id(const llvm::Type *type, TypeLayoutFlags flags = 0);
	spv::Id get_type_id(spv::Id id) const;

	struct ElementMeta
	{
		spv::Id id;
		DXIL::ComponentType component_type;
		unsigned semantic_offset;
		DXIL::Semantic semantic;
	};

	struct ElementPatchMeta : ElementMeta
	{
		spv::Id start_row;
		spv::Id start_col;
		bool lowering;
	};

	struct ClipCullMeta
	{
		unsigned offset;
		unsigned row_stride;
		spv::BuiltIn builtin;
	};
	UnorderedMap<uint32_t, ElementMeta> input_elements_meta;
	UnorderedMap<uint32_t, ElementMeta> output_elements_meta;
	UnorderedMap<uint32_t, ElementPatchMeta> patch_elements_meta;
	UnorderedMap<uint32_t, ClipCullMeta> input_clip_cull_meta;
	UnorderedMap<uint32_t, ClipCullMeta> output_clip_cull_meta;
	void emit_builtin_decoration(spv::Id id, DXIL::Semantic semantic, spv::StorageClass storage);

	struct NodeInputMeta
	{
		String node_id;
		uint32_t node_array_index;
		spv::Id private_bda_var_id; // Private variable which holds a BDA to the node, set by the main() dispatcher.
		spv::Id private_stride_var_id; // Private variable which holds stride for entry point nodes, set by the main() dispatcher.
		spv::Id private_coalesce_offset_id; // Private variable which holds the linear input index.
		spv::Id private_coalesce_count_id; // Private variable which holds the input count.
		spv::Id node_dispatch_push_id; // PushConstant ABI for dispatching.
		spv::Id shader_record_block_type_id;
		// Stride for the payload. This is recorded in metadata, but we may have to allocate SV_DispatchGrid.
		uint32_t payload_stride;
		uint32_t coalesce_stride; // Coalesce width.
		spv::Id u32_ptr_type_id;
		spv::Id u64_ptr_type_id;
		spv::Id u32_array_ptr_type_id;
		spv::Id is_indirect_payload_stride_id;
		spv::Id is_entry_point_id;
		spv::Id broadcast_has_max_grid_id;
		spv::Id thread_group_size_id;
		spv::Id is_static_broadcast_node_id;
		spv::Id max_broadcast_grid_id[3];
		spv::Id max_broadcast_grid_minus_1_id[3];
		spv::Id real_workgroup_id;
		spv::Id real_global_invocation_id;
		DXIL::NodeLaunchType launch_type;
		NodeDispatchGrid dispatch_grid;
	};

	struct NodeOutputMeta
	{
		spv::Id spec_constant_node_index;
		// Stride for the payload. This is recorded in metadata, but we may have to allocate SV_DispatchGrid.
		uint32_t payload_stride;
		bool is_recursive;
	};

	NodeInputMeta node_input = {};
	Vector<NodeOutputMeta> node_outputs;

	bool emit_instruction(CFGNode *block, const llvm::Instruction &instruction);
	bool emit_phi_instruction(CFGNode *block, const llvm::PHINode &instruction);

	spv::Id build_sampled_image(spv::Id image_id, spv::Id sampler_id, bool comparison);
	spv::Id build_vector(spv::Id element_type, const spv::Id *elements, unsigned count);
	spv::Id build_vector_type(spv::Id element_type, unsigned count);
	spv::Id build_constant_vector(spv::Id element_type, const spv::Id *elements, unsigned count);
	spv::Id build_splat_constant_vector(spv::Id element_type, spv::Id value, unsigned count);
	spv::Id build_offset(spv::Id value, unsigned offset);
	void fixup_load_type_io(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value);
	void fixup_load_type_atomic(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value);
	void fixup_load_type_typed(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value,
	                           const llvm::Type *target_type);
	void fixup_load_type_typed(DXIL::ComponentType &component_type, unsigned components, spv::Id &value_id,
	                           const llvm::Type *target_type);
	void repack_sparse_feedback(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value,
	                            const llvm::Type *target_type, spv::Id override_value = 0);
	spv::Id fixup_store_type_io(DXIL::ComponentType component_type, unsigned components, spv::Id value);
	spv::Id fixup_store_type_atomic(DXIL::ComponentType component_type, unsigned components, spv::Id value);
	spv::Id fixup_store_type_typed(DXIL::ComponentType component_type, unsigned components, spv::Id value);
	spv::Id build_value_cast(spv::Id value_id, DXIL::ComponentType input_type, DXIL::ComponentType output_type,
	                         unsigned components);
	bool support_native_fp16_operations() const;

	Vector<Operation *> *current_block = nullptr;
	void add(Operation *op, bool is_rov = false);
	Operation *allocate(spv::Op op);
	Operation *allocate(spv::Op op, const llvm::Value *value);
	Operation *allocate(spv::Op op, spv::Id type_id);
	Operation *allocate(spv::Op op, const llvm::Value *value, spv::Id type_id);
	Operation *allocate(spv::Op op, spv::Id id, spv::Id type_id);
	void register_externally_visible_write(const llvm::Value *value);
	void rewrite_value(const llvm::Value *value, spv::Id id);
	spv::Builder &builder();
	spv::Id create_variable(spv::StorageClass storage, spv::Id type_id, const char *name = nullptr);
	spv::Id create_variable_with_initializer(spv::StorageClass storage, spv::Id type_id,
	                                         spv::Id initializer, const char *name = nullptr);

	spv::Id glsl_std450_ext = 0;
	spv::Id texture_sample_pos_lut_id = 0;
	spv::Id rasterizer_sample_count_id = 0;
	spv::Id shader_record_buffer_id = 0;
	Vector<spv::Id> shader_record_buffer_types;

	ResourceRemappingInterface *resource_mapping_iface = nullptr;

	struct StructTypeEntry
	{
		spv::Id id;
		String name;
		Vector<spv::Id> subtypes;
		TypeLayoutFlags flags;
	};

	struct ArrayTypeEntry
	{
		spv::Id id;
		uint32_t element_type_id;
		uint32_t array_size_id;
	};
	Vector<StructTypeEntry> cached_struct_types;
	Vector<ArrayTypeEntry> cached_physical_array_types;
	spv::Id get_struct_type(const Vector<spv::Id> &type_ids, TypeLayoutFlags flags, const char *name);
	void decorate_physical_offsets(spv::Id struct_type_id, const Vector<spv::Id> &type_ids);

	struct SizeAlignment
	{
		uint32_t size;
		uint32_t alignment;
	};
	SizeAlignment get_physical_size_for_type(spv::Id type_id);

	void set_option(const OptionBase &cap);
	struct
	{
		bool shader_demote = false;
		bool dual_source_blending = false;
		unsigned rasterizer_sample_count = 0;
		bool rasterizer_sample_count_spec_constant = true;
		Vector<unsigned> output_swizzles;
		String shader_source_file;
		String entry_point;

		unsigned inline_ubo_descriptor_set = 0;
		unsigned inline_ubo_descriptor_binding = 0;
		bool inline_ubo_enable = false;
		bool bindless_cbv_ssbo_emulation = false;
		bool physical_storage_buffer = false;

		unsigned sbt_descriptor_size_srv_uav_cbv_log2 = 0;
		unsigned sbt_descriptor_size_sampler_log2 = 0;
		unsigned ssbo_alignment = 16;
		bool typed_uav_read_without_format = false;
		bool bindless_typed_buffer_offsets = false;
		bool storage_16bit_input_output = false;

		struct
		{
			unsigned untyped_offset = 0;
			unsigned typed_offset = 0;
			unsigned stride = 1;
		} offset_buffer_layout;

		DescriptorQAInfo descriptor_qa;
		InstructionInstrumentationInfo instruction_instrumentation;
		bool descriptor_qa_enabled = false;
		bool descriptor_qa_sink_handles = true;
		bool min_precision_prefer_native_16bit = false;
		bool shader_i8_dot_enabled = false;
		bool ray_tracing_primitive_culling_enabled = false;
		bool invariant_position = false;
		bool scalar_block_layout = false;
		bool supports_per_component_robustness = false;
		bool khr_barycentrics_enabled = false;
		bool arithmetic_relaxed_precision = false;
		bool force_wave_size_enable = false;
		bool supports_float16_denorm_preserve = false;
		bool supports_float64_denorm_preserve = false;
		bool strict_helper_lane_waveops = true;
		bool nv_subgroup_partition_enabled = false;
		// Assumed by default in earlier code, so avoid surprises if dxil-spirv
		// is updated on its own.
		bool compute_shader_derivatives = true;
		bool compute_shader_derivatives_khr = false;
		bool compute_shader_derivatives_quad = false;
		bool eliminate_dead_code = false;
		bool propagate_precise = false;
		bool force_precise = false;
		bool descriptor_heap_robustness = false;
		bool supports_quad_control = false;
		bool supports_maximal_reconvergence = false;
		bool force_maximal_reconvergence = false;
		bool nv_raw_access_chains = false;
		bool wmma_fp8 = false;
		bool nv_cooperative_matrix2_conversions = false;
		struct
		{
			bool enabled = false;
			bool assume_uniform_scale = false;
		} grad_opt;
		bool opacity_micromap_enabled = false;
		unsigned physical_address_descriptor_stride = 1;
		unsigned physical_address_descriptor_offset = 0;
		unsigned force_subgroup_size = 0;

		struct
		{
			bool use_shader_metadata = false;
			bool force_unroll = false;
			bool force_loop = false;
			bool force_flatten = false;
			bool force_branch = false;
		} branch_control;

		struct
		{
			unsigned implementation_minimum = 4;
			unsigned implementation_maximum = 128;
		} subgroup_size;

		uint32_t driver_id = 0;
		uint32_t driver_version = 0;

		struct
		{
			bool force_device_memory_barriers_thread_group_coherence = false;
			bool assume_broken_sub_8x8_cube_mips = false;
			bool robust_physical_cbv_forwarding = false;
			bool robust_physical_cbv = false;
			bool mesh_outputs_bounds_check = false;
			bool aggressive_nonuniform = false;
			bool promote_group_to_device_memory_barrier = false;
			bool group_shared_auto_barrier = false;
			bool fixup_loop_header_undef_phis = false;
			bool fixup_rsqrt = false;
			bool ignore_primitive_shading_rate = false;
			bool robust_compute_quad_broadcast = false;
			bool precise_fma = false;
		} quirks;

		struct
		{
			bool group_shared = false;
			bool constant_lut = false;
			bool alloca = false;
		} extended_robustness;

		unsigned max_tess_factor = 0;

		struct
		{
			bool enabled = false;
			unsigned register_index = 0;
			unsigned register_space = 0;
		} nvapi;

		bool extended_non_semantic_info = false;
		bool mixed_dot_product_fp16_fp16_fp32 = false;

		struct
		{
			MetaDescriptorKind kind = MetaDescriptorKind::Invalid;
			uint32_t desc_set = UINT32_MAX;
			uint32_t desc_binding = UINT32_MAX;
		} meta_descriptor_mappings[int(MetaDescriptor::Count)];

		struct
		{
			// If view instancing is enabled at all.
			// If not, ViewID will just return constant 0.
			bool enable = false;

			// If false and multiview is enabled, it only means we're interested in ViewID potentially ...
			bool last_pre_rasterization_stage = false;

			// One u32 spec constant mapping 16 ViewIndices back to ViewID.
			// If this is not set, it's implied we always the ViewID and layer offset from a constant.
			// If the shader exports Layer offset, this is ignored,
			// and we must fall back to draw level instancing.
			uint32_t view_index_to_view_instance_spec_id = UINT32_MAX;

			// If not UINT32_MAX, implies view instancing needs to apply a non-zero offset to output ViewportIndex.
			uint32_t view_instance_to_viewport_spec_id = UINT32_MAX;
		} multiview;
	} options;

	struct
	{
		spv::Id view_index_to_view_instance_id = 0;
		spv::Id view_instance_to_viewport_id = 0;
		bool custom_layer_index = false;
		bool custom_viewport_index = false;
	} multiview;

	struct BindlessInfo
	{
		DXIL::ResourceType type;
		DXIL::ComponentType component;
		RawVecSize raw_vecsize;
		DXIL::ResourceKind kind;
		spv::ImageFormat format;
		VulkanDescriptorType descriptor_type;
		bool uav_read;
		bool uav_written;
		bool uav_coherent;
		bool counters;
		bool offsets;
		bool aliased;
		bool relaxed_precision;
		uint32_t desc_set;
		uint32_t binding;
		struct
		{
			uint32_t stride;
		} debug;
	};

	spv::Id create_bindless_heap_variable(const BindlessInfo &info);
	Vector<RawDeclarationVariable> create_bindless_heap_variable_alias_group(
			const BindlessInfo &base_info, const Vector<RawDeclaration> &raw_decls);
	Vector<RawDeclarationVariable> create_raw_ssbo_variable_alias_group(
			const Vector<RawDeclaration> &raw_decls,
			uint32_t range_size, const String &name);
	Vector<RawDeclarationVariable> create_ubo_variable_alias_group(
			const Vector<RawDeclaration> &raw_decls,
			uint32_t range_size, const String &name, unsigned cbv_size);
	spv::Id create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name);
	spv::Id create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name, unsigned cbv_size);

	struct BindlessResource
	{
		BindlessInfo info;
		uint32_t var_id;
	};
	Vector<BindlessResource> bindless_resources;

	struct CombinedImageSampler
	{
		spv::Id image_id;
		spv::Id sampler_id;
		spv::Id combined_id;
		bool non_uniform;
	};
	Vector<CombinedImageSampler> combined_image_sampler_cache;

	struct PeepholeTransformation
	{
		spv::Id input_id;
		spv::Id result_id;
		uint64_t key; // Depends on the transform. Extend to something more sophisticated if needed.
	};
	Vector<PeepholeTransformation> peephole_transformation_cache;

	// For quirk workarounds which need to check if a lane is active or not.
	struct
	{
		spv::Id current_ballot_value_id;
		spv::Id current_subgroup_quad_index_id;
		spv::Id current_quad_lane_active_id[4];
		bool current_quad_uniform_checked;
	} memoized = {};

	struct PhysicalPointerEntry
	{
		spv::Id ptr_type_id;
		spv::Id base_type_id;
		PhysicalPointerMeta meta;
	};
	Vector<PhysicalPointerEntry> physical_pointer_entries;
	spv::Id get_physical_pointer_block_type(spv::Id base_type_id, const PhysicalPointerMeta &meta);

	DXIL::ComponentType get_effective_input_output_type(DXIL::ComponentType type);
	static DXIL::ComponentType get_effective_typed_resource_type(DXIL::ComponentType type);
	spv::Id get_effective_input_output_type_id(DXIL::ComponentType type);
	bool get_uav_image_format(DXIL::ResourceKind resource_kind,
	                          DXIL::ComponentType actual_component_type,
	                          const AccessTracking &access_meta,
	                          spv::ImageFormat &format);

	uint32_t find_binding_meta_index(uint32_t binding_range_lo, uint32_t binding_range_hi,
	                                 uint32_t binding_space, DXIL::ResourceType resource_type);

	struct RawBufferMeta
	{
		DXIL::ResourceKind kind;
		unsigned stride;
	};
	RawBufferMeta get_raw_buffer_meta(DXIL::ResourceType resource_type, unsigned meta_index);

	static void get_shader_model(const llvm::Module &module, String *model, uint32_t *major, uint32_t *minor);

	struct AliasedAccess
	{
		Vector<RawDeclaration> raw_declarations;
		bool requires_alias_decoration = false;
		bool override_primary_component_types = false;
		DXIL::ComponentType primary_component_type = DXIL::ComponentType::Invalid;
		RawVecSize primary_raw_vecsize = RawVecSize::V1;
	};
	bool analyze_aliased_access(const AccessTracking &tracking,
	                            VulkanDescriptorType descriptor_type,
	                            AliasedAccess &aliased_access) const;

	struct
	{
		// Only relevant for fragment shaders, and some other misc uses.
		bool has_side_effects = false;
		bool discards = false;
		bool can_require_primitive_culling = false;
		bool require_compute_shader_derivatives = false;
		bool precise_f16_to_f32_observed = false;
		bool require_uav_thread_group_coherence = false;
		bool require_node_output_group_coherence = false;
		bool require_node_input_group_coherence = false;
		bool require_subgroups = false;
		bool require_subgroup_shuffles = false;
		bool subgroup_ballot_reads_upper = false;
		bool subgroup_ballot_reads_first = false;
		bool can_require_opacity_micromap = false;
		bool need_maximal_reconvergence_helper_call = false;
		bool has_group_shared_barrier = false;
		bool has_group_shared_access = false;
		bool needs_auto_group_shared_barriers = false;
		bool require_wmma = false;
		// Should just do this always, but don't want to nuke FOZ caches needlessly.
		bool global_undefs = false;

		struct
		{
			bool uses_non_direct_indexing = false;

			// If every instance of ray query uses the same handle IDs,
			// we can conveniently ignore any weird Phis, etc.
			// This might get somewhat spicy when considering loops,
			// but if the loop always starts with a resetting TraceInlineRay,
			// there cannot be cases where multiple instances of the loop body
			// refers to different objects each time.
			const llvm::Value *reference_handle_value = nullptr;
			bool uses_divergent_handles = false;

			// If we need to treat ray query objects as pure index into global table.
			uint32_t num_ray_query_alloca = 0;

			struct Mapping
			{
				const llvm::AllocaInst *alloca;
				uint32_t ray_query_flags;
			};
			Vector<Mapping> alloca_mappings;
		} ray_query;
	} shader_analysis;

	struct
	{
		bool skip_non_uniform_promotion = false;
	} backend;

	// For descriptor QA, we need to rewrite how resource handles are emitted.
	UnorderedMap<const llvm::CallInst *, const llvm::BasicBlock *> resource_handle_to_block;
	UnorderedSet<const llvm::CallInst *> resource_handles_needing_sink;
	UnorderedSet<const llvm::CallInst *> resource_handle_is_conservative;
	UnorderedMap<const llvm::BasicBlock *, Vector<const llvm::Instruction *>> bb_to_sinks;
	UnorderedSet<const llvm::CallInst *> wave_op_forced_helper_lanes;

	UnorderedSet<const llvm::Value *> llvm_used_ssa_values;
	UnorderedMap<const llvm::AllocaInst *, AllocaCBVForwardingTracking> alloca_tracking;
	UnorderedSet<const llvm::GetElementPtrInst *> masked_alloca_forward_gep;

	struct
	{
		spv::Id global_query_objects_id = 0;
		uint32_t ray_query_index = 0;
	} ray_query;

	bool emit_ray_query_globals();

	struct RootParameterMapping
	{
		uint32_t root_parameter_index;
		uint32_t offset;
	};
	Vector<RootParameterMapping> root_parameter_mappings;
	Vector<NonSemanticDebugInfo> non_semantic_debug_info;
	void emit_non_semantic_debug_info(const NonSemanticDebugInfo &info);

	bool type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const;
	void decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign);

	void suggest_minimum_wave_size(unsigned wave_size);
	void suggest_maximum_wave_size(unsigned wave_size);

	static NodeInputData get_node_input(llvm::MDNode *meta);
	static NodeOutputData get_node_output(llvm::MDNode *meta);
};
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_ags.cpp
================================================
/* Copyright (c) 2019-2025 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "opcodes/opcodes.hpp"
#include "dxil_ags.hpp"
#include "spirv_module.hpp"
#include "opcodes/converter_impl.hpp"
#include "dxil_common.hpp"
#include "dxil_sampling.hpp"
#include "dxil_buffer.hpp"
#include "logging.hpp"
#include <limits>

namespace dxil_spv
{
static bool emit_magic_ags_atomic_u64(Converter::Impl &impl, spv::Id image_id,
                                      spv::Op atomic_opcode, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityInt64Atomics);

	const auto &meta = impl.handle_to_resource_meta[image_id];

	spv::Id coords[3] = {};
	uint32_t num_coords_full = 0, num_coords = 0;

	if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
		LOGE("RWStructuredBuffer not supported for AGS u64 atomics.\n");
	else if (meta.kind == DXIL::ResourceKind::TypedBuffer)
		LOGE("RWBuffer not supported for AGS u64 atomics.\n");

	if (meta.kind == DXIL::ResourceKind::RawBuffer)
	{
		// AGS header only supports BAB.
		coords[0] = build_index_divider(impl, impl.ags.backdoor_instructions[0]->getOperand(5), 3, 1);
		num_coords = 1;
		num_coords_full = 1;
	}
	else
	{
		if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
			return false;

		if (num_coords_full > 3)
			return false;

		// The actual coordinates are stored in the backdoor instructions.
		for (uint32_t i = 0; i < num_coords_full; i++)
		{
			auto *coord_value = impl.ags.backdoor_instructions[i / 2]->getOperand(5 + (i & 1));
			coords[i] = impl.get_id_for_value(coord_value);
		}
	}

	spv::Id coord = impl.build_vector(builder.makeUintType(32), coords, num_coords_full);

	DXIL::ComponentType component_type;
	spv::Id counter_ptr_id = emit_atomic_access_chain(impl, meta, RawWidth::B64, image_id, coord, component_type);

	spv::Id u32s[2];
	u32s[0] = impl.get_id_for_value(impl.ags.backdoor_instructions[1]->getOperand(6));
	u32s[1] = impl.get_id_for_value(impl.ags.backdoor_instructions[2]->getOperand(5));
	spv::Id u32_vec = impl.build_vector(builder.makeUintType(32), u32s, 2);

	auto *bitcast_op = impl.allocate(spv::OpBitcast, impl.get_type_id(DXIL::ComponentType::U64, 1, 1));
	bitcast_op->add_id(u32_vec);
	impl.add(bitcast_op);

	auto *atomic_op = impl.allocate(atomic_opcode, impl.get_type_id(DXIL::ComponentType::U64, 1, 1));

	atomic_op->add_id(counter_ptr_id);
	atomic_op->add_id(builder.getAtomicDeviceScopeId());
	atomic_op->add_id(builder.makeUintConstant(0));
	atomic_op->add_id(bitcast_op->id);
	impl.add(atomic_op);

	bitcast_op = impl.allocate(spv::OpBitcast, impl.get_type_id(DXIL::ComponentType::U32, 1, 2));
	bitcast_op->add_id(atomic_op->id);
	impl.add(bitcast_op);

	for (unsigned i = 0; i < 2; i++)
	{
		auto *extract_op = impl.allocate(spv::OpCompositeExtract, impl.get_type_id(DXIL::ComponentType::U32, 1, 1));
		extract_op->add_id(bitcast_op->id);
		extract_op->add_literal(i);
		impl.add(extract_op);
		u32s[i] = extract_op->id;
	}

	// The backdoor instructions end up with the final result.
	impl.rewrite_value(impl.ags.backdoor_instructions[0], u32s[0]);
	impl.rewrite_value(impl.ags.backdoor_instructions[2], u32s[1]);
	impl.rewrite_value(instruction, u32s[1]);
	return true;
}

static spv::Id make_fp8_type(Converter::Impl &impl, bool load_store)
{
	if (impl.options.wmma_fp8)
		return impl.builder().makeFloatType(8, spv::FPEncodingFloat8E4M3EXT);
	else if (load_store)
		return impl.builder().makeUintType(8);
	else
		return impl.builder().makeFloatType(16);
}

static inline uint32_t get_type_data_format(uint32_t imm)
{
	return (imm >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagShift) &
	       AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagMask;
}

static inline uint32_t get_matrix_type(uint32_t imm)
{
	return (imm >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagShift) &
	       AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagMask;
}

static inline uint32_t get_matrix_shape(uint32_t imm)
{
	return (imm >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_ShapeShift) &
	       AmdExtD3DShaderIntrinsicsWaveMatrixModifier_ShapeMask;
}

static bool validate_convert_compatibility(uint32_t imm_a, uint32_t imm_b)
{
	// AGS opcodes allow conversion between types + transpositions.
	// Horrifying :(
	uint32_t shape_a = get_matrix_shape(imm_a);
	uint32_t shape_b = get_matrix_shape(imm_b);
	return shape_a == shape_b;
}

static spv::CooperativeMatrixUse convert_matrix_use(uint32_t use)
{
	switch (use)
	{
	case AmdExtD3DShaderIntrinsicsWaveMatrixType_A:
		return spv::CooperativeMatrixUseMatrixAKHR;
	case AmdExtD3DShaderIntrinsicsWaveMatrixType_B:
		return spv::CooperativeMatrixUseMatrixBKHR;
	case AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator:
		return spv::CooperativeMatrixUseMatrixAccumulatorKHR;
	default:
		return spv::CooperativeMatrixUseMax;
	}
}

static spv::Id build_coopmat_type(Converter::Impl &impl, uint32_t immediate, bool load_store)
{
	auto &builder = impl.builder();

	uint32_t fmt = get_type_data_format(immediate);
	uint32_t type = get_matrix_type(immediate);
	uint32_t shape = get_matrix_shape(immediate);

	if (shape != AmdExtD3DShaderIntrinsicsWaveMatrixShape_16X16)
		return 0;

	spv::Id use;
	spv::Id scalar_type;
	auto spv_use = convert_matrix_use(type);

	switch (fmt)
	{
	case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		if (spv_use != spv::CooperativeMatrixUseMatrixAccumulatorKHR)
			return 0;
		scalar_type = builder.makeFloatType(32);
		break;

	case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
		scalar_type = builder.makeFloatType(16);
		break;

	case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
		scalar_type = make_fp8_type(impl, load_store);
		break;

	default:
		return 0;
	}

	use = builder.makeUintConstant(spv_use);
	spv::Id rows_cols = builder.makeUintConstant(16);
	return builder.makeCooperativeMatrixType(scalar_type, rows_cols, rows_cols, use);
}

static inline uint32_t get_matrix_io_channel(uint32_t imm)
{
	return (imm >> AmdExtD3DShaderIntrinsicsWaveMatrixInOut_ChannelShift) &
	       AmdExtD3DShaderIntrinsicsWaveMatrixInOut_ChannelMask;
}

static inline uint32_t get_matrix_io_register(uint32_t imm)
{
	return (imm >> AmdExtD3DShaderIntrinsicsWaveMatrixInOut_SecondRegFlagShift) &
	       AmdExtD3DShaderIntrinsicsWaveMatrixInOut_SecondRegFlagMask;
}

static inline uint32_t get_matrix_io_type(uint32_t imm)
{
	return (imm >> AmdExtD3DShaderIntrinsicsWaveMatrixInOut_MatRegTypeFlagShift) &
	       AmdExtD3DShaderIntrinsicsWaveMatrixInOut_MatRegTypeFlagMask;
}

static bool validate_wmma_io_registers(Converter::Impl &impl, uint32_t base_argument,
                                       AmdExtD3DShaderIntrinsicsWaveMatrixRegType reg_type,
                                       uint32_t phase, bool input_validation)
{
	if (reg_type == AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg && !input_validation)
	{
		// 8 outputs
		for (uint32_t i = 0; i < 8; i++)
		{
			uint32_t inst_index = base_argument + i;
			uint32_t imm = impl.ags.instructions[inst_index].immediate;
			uint32_t channel = get_matrix_io_channel(imm);
			uint32_t reg = get_matrix_io_register(imm);
			uint32_t type = get_matrix_io_type(imm);

			if (impl.ags.instructions[inst_index].phase != phase)
				return false;

			// Check for unexpected cases.
			if ((imm >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTileShift) != 0)
				return false;
			if (type != reg_type)
				return false;
			if (channel != i % 4 || reg != i / 4)
				return false;
		}
	}
	else
	{
		spv::Id coopmat_id = impl.get_id_for_value(impl.ags.backdoor_instructions[base_argument]->getOperand(5));

		// If the matrix is PHI or loaded from alloca, we only check component mapping.
		bool all_phi_or_load = true;
		for (uint32_t i = 0; i < 4 && all_phi_or_load; i++)
		{
			uint32_t inst_index = base_argument + i;
			const auto *value0 = impl.ags.backdoor_instructions[inst_index]->getOperand(5);
			const auto *value1 = impl.ags.backdoor_instructions[inst_index]->getOperand(6);
			all_phi_or_load =
				(llvm::isa<llvm::PHINode>(value0) || llvm::isa<llvm::LoadInst>(value0)) &&
				(llvm::isa<llvm::PHINode>(value1) || llvm::isa<llvm::LoadInst>(value1));
		}

		// 4x2 inputs.
		for (uint32_t i = 0; i < 4; i++)
		{
			uint32_t inst_index = base_argument + i;
			const auto *value0 = impl.ags.backdoor_instructions[inst_index]->getOperand(5);
			const auto *value1 = impl.ags.backdoor_instructions[inst_index]->getOperand(6);
			auto itr0 = impl.ags.coopmat_component_mapping.find(value0);
			auto itr1 = impl.ags.coopmat_component_mapping.find(value1);

			if (impl.ags.instructions[inst_index].phase != phase)
				return false;

			// Don't accept any weird shuffle.
			if (itr0 == impl.ags.coopmat_component_mapping.end() || itr0->second.component != 2 * i + 0 ||
			    (!all_phi_or_load && impl.get_id_for_value(value0) != coopmat_id))
			{
				return false;
			}

			if (itr1 == impl.ags.coopmat_component_mapping.end() || itr1->second.component != 2 * i + 1 ||
			    (!all_phi_or_load && impl.get_id_for_value(value1) != coopmat_id))
			{
				return false;
			}

			uint32_t imm = impl.ags.instructions[inst_index].immediate;

			uint32_t channel = get_matrix_io_channel(imm);
			uint32_t reg = get_matrix_io_register(imm);
			uint32_t type = get_matrix_io_type(imm);

			// Check for unexpected cases.
			if ((imm >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTileShift) != 0)
				return false;
			if (type != reg_type)
				return false;
			if (channel != i % 2 || reg != i / 2)
				return false;
		}
	}

	return true;
}

static bool emit_wmma_return_values(Converter::Impl &impl, spv::Id type_id, spv::Id id, uint32_t phase)
{
	if (!validate_wmma_io_registers(impl, impl.ags.num_instructions - 8,
	                                AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg,
	                                phase, false))
	{
		return false;
	}

	// Validate output register assignment.
	for (uint32_t i = 0; i < 8; i++)
	{
		uint32_t inst_index = impl.ags.num_instructions - 8 + i;
		auto *inst = impl.ags.backdoor_instructions[inst_index];
		if (id)
			impl.rewrite_value(inst, id);
		impl.ags.coopmat_component_mapping[inst] = { type_id, i };
	}

	return true;
}

static spv::Id emit_coopmat_transfer(Converter::Impl &impl, spv::Id v, uint32_t input_imm, uint32_t output_imm)
{
	spv::Id input_type = build_coopmat_type(impl, input_imm, false);
	spv::Id output_type = build_coopmat_type(impl, output_imm, false);

	spv::Id aux_types[3] = { input_type, output_type };
	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::CoopMatTransfer, aux_types, 2);

	// RADV workaround. Should pass as value, but NIR aborts.
	spv::Id param = impl.create_variable(spv::StorageClassFunction, input_type);
	auto *store = impl.allocate(spv::OpStore);
	store->add_id(param);
	store->add_id(v);
	impl.add(store);

	auto *call = impl.allocate(spv::OpFunctionCall, output_type);
	call->add_id(call_id);
	call->add_id(param);
	impl.add(call);

	return call->id;
}

static spv::Id emit_coopmat_broken_saturation_fixup(Converter::Impl &impl, spv::Id v, spv::Id type_id)
{
	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::CoopMatSaturationFixup, type_id);

	// RADV workaround. Should pass as value, but NIR aborts.
	spv::Id param = impl.create_variable(spv::StorageClassFunction, type_id);
	auto *store = impl.allocate(spv::OpStore);
	store->add_id(param);
	store->add_id(v);
	impl.add(store);

	auto *call = impl.allocate(spv::OpFunctionCall, type_id);
	call->add_id(call_id);
	call->add_id(param);
	impl.add(call);

	return call->id;
}

static spv::Id emit_coopmat_saturate_fp8(Converter::Impl &impl, spv::Id v, spv::Id type_id)
{
	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::CoopMatSaturateFP8, type_id);

	// RADV workaround. Should pass as value, but NIR aborts.
	spv::Id param = impl.create_variable(spv::StorageClassFunction, type_id);
	auto *store = impl.allocate(spv::OpStore);
	store->add_id(param);
	store->add_id(v);
	impl.add(store);

	auto *call = impl.allocate(spv::OpFunctionCall, type_id);
	call->add_id(call_id);
	call->add_id(param);
	impl.add(call);

	return call->id;
}

static spv::Id emit_coopmat_transpose_with_convert(
    Converter::Impl &impl, spv::Id v, uint32_t input_imm, uint32_t output_imm, bool saturating)
{
	auto &builder = impl.builder();

	bool can_convert_use;
	if (GlobalConfiguration::get().wmma_conv_hack || GlobalConfiguration::get().wmma_rdna3_workaround)
	{
		// Go out of spec if we explicitly enable that.
		can_convert_use = get_matrix_type(input_imm) != get_matrix_type(output_imm);
	}
	else
	{
		can_convert_use = get_matrix_type(input_imm) == AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator &&
		                  get_matrix_type(output_imm) != AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator;
	}

	if (impl.options.nv_cooperative_matrix2_conversions && can_convert_use)
	{
		// We can only go from accumulator to non-accumulator in this path.
		// NV extension allows us to convert between matrix types without extra fuzz.
		// However, it "just werks" on RADV for now (with wmma_conv_hack enabled),
		// so skirt around the issue.
		spv::Op opcode;

		auto effective_input_fmt = get_type_data_format(input_imm);
		auto effective_output_fmt = get_type_data_format(output_imm);

		// On RDNA3, it's better to do any per-element operation in C matrix rather than A or B.
		if (!impl.options.wmma_fp8 &&
		    effective_output_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 &&
		    effective_input_fmt != AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 && saturating &&
		    get_matrix_type(input_imm) == AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator &&
		    get_matrix_type(output_imm) != AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator)
		{
			uint32_t output_accum_imm = output_imm;
			output_accum_imm &= ~(
				AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagMask <<
				AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagShift);
			output_accum_imm |= AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator <<
			                    AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagShift;

			auto *conv = impl.allocate(spv::OpFConvert, build_coopmat_type(impl, output_accum_imm, false));
			conv->add_id(v);
			impl.add(conv);

			v = emit_coopmat_saturate_fp8(impl, conv->id, build_coopmat_type(impl, output_accum_imm, false));

			effective_input_fmt = effective_output_fmt;
			saturating = false;
		}

		if (!impl.options.wmma_fp8)
		{
			if (effective_input_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
				effective_input_fmt = AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16;
			if (effective_output_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
				effective_output_fmt = AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16;
		}

		if (effective_input_fmt != effective_output_fmt)
		{
			switch (effective_input_fmt)
			{
			case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
			case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
			case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
				break;

			default:
				// FSR4 only needs FP, don't care about all possible combinations yet.
				return 0;
			}

			switch (effective_output_fmt)
			{
			case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
			case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
			case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
				break;

			default:
				// FSR4 only needs FP, don't care about all possible combinations yet.
				return 0;
			}

			opcode = spv::OpFConvert;
		}
		else
		{
			opcode = spv::OpCooperativeMatrixConvertNV;
		}

		builder.addExtension("SPV_NV_cooperative_matrix2");
		builder.addCapability(spv::CapabilityCooperativeMatrixConversionsNV);

		auto *conv = impl.allocate(opcode, build_coopmat_type(impl, output_imm, false));
		conv->add_id(v);
		impl.add(conv);

		spv::Id id = conv->id;

		if (saturating)
		{
			if (impl.options.wmma_fp8 && opcode == spv::OpFConvert &&
			    get_type_data_format(output_imm) == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
			{
				impl.builder().addDecoration(id, spv::DecorationSaturatedToLargestFloat8NormalConversionEXT);
			}
			else if (get_type_data_format(output_imm) == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 &&
			         get_type_data_format(input_imm) != AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 &&
			         !impl.options.wmma_fp8)
			{
				id = emit_coopmat_saturate_fp8(impl, id, build_coopmat_type(impl, output_imm, false));
			}
		}

		return id;
	}

	return 0;
}

static spv::Id emit_coopmat_transpose(Converter::Impl &impl, spv::Id v, uint32_t input_imm, uint32_t output_imm)
{
	auto &builder = impl.builder();

	if (impl.options.wmma_fp8)
	{
		if (get_matrix_type(input_imm) != AmdExtD3DShaderIntrinsicsWaveMatrixType_A &&
		    get_matrix_type(output_imm) != AmdExtD3DShaderIntrinsicsWaveMatrixType_A &&
		    get_type_data_format(input_imm) == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
		{
			// It appears that in the cases we care about, layout of B and C are the same.
			// Just do element-wise copy here to avoid the bad roundtrip.
			// This assumption only seems to hold for FP8, but that's the only case we care about.
			return emit_coopmat_transfer(impl, v, input_imm, output_imm);
		}
	}

	if (!impl.ags.coopmat_transpose_scratch)
	{
		spv::Id lds_type_id = builder.makeUintType(32);
		uint32_t num_elements = 16 * 16;

		// Assume that the workgroup is aligned.
		uint32_t max_num_workgroups = impl.execution_mode_meta.workgroup_threads[0] *
		                              impl.execution_mode_meta.workgroup_threads[1] *
		                              impl.execution_mode_meta.workgroup_threads[2];

		if (impl.execution_mode_meta.wave_size_min)
			max_num_workgroups /= impl.execution_mode_meta.wave_size_min;
		else if (impl.options.subgroup_size.implementation_minimum)
			max_num_workgroups /= impl.options.subgroup_size.implementation_minimum;
		else
		{
			LOGE("Need LDS transpose, but there is no estimate for how large the LDS buffer needs to be.\n");
			return 0;
		}

		if (!max_num_workgroups)
		{
			LOGE("Invalid workgroup size.\n");
			return 0;
		}

		num_elements *= max_num_workgroups;

		lds_type_id = builder.makeArrayType(lds_type_id, builder.makeUintConstant(num_elements), 0);
		impl.ags.coopmat_transpose_scratch =
		    impl.create_variable(spv::StorageClassWorkgroup, lds_type_id, "LDSTransposeScratch");
	}

	if (get_type_data_format(input_imm) != get_type_data_format(output_imm))
		return 0;
	if (get_type_data_format(input_imm) == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32)
		return 0;

	spv::Id subgroup_id_var = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSubgroupId);
	auto *subgroup_id = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	subgroup_id->add_id(subgroup_id_var);
	impl.add(subgroup_id);

	auto *index = impl.allocate(spv::OpIMul, builder.makeUintType(32));
	index->add_id(subgroup_id->id);
	index->add_id(builder.makeUintConstant(16 * 16));
	impl.add(index);

	auto *chain = impl.allocate(spv::OpInBoundsAccessChain,
	                            builder.makePointer(spv::StorageClassWorkgroup, builder.makeUintType(32)));
	chain->add_id(impl.ags.coopmat_transpose_scratch);
	chain->add_id(index->id);
	impl.add(chain);

	auto *barrier = impl.allocate(spv::OpControlBarrier);
	barrier->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	barrier->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	barrier->add_id(builder.getWorkgroupBarrierSemanticsId());
	impl.add(barrier);

	auto *store = impl.allocate(spv::OpCooperativeMatrixStoreKHR);
	store->add_id(chain->id);
	store->add_id(v);
	store->add_id(builder.makeUintConstant(spv::CooperativeMatrixLayoutColumnMajorKHR));
	store->add_id(builder.makeUintConstant(16));
	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan)
		store->add_literal(spv::MemoryAccessNonPrivatePointerMask);

	impl.add(store);

	barrier = impl.allocate(spv::OpControlBarrier);
	barrier->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	barrier->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	barrier->add_id(builder.getWorkgroupBarrierSemanticsId());
	impl.add(barrier);

	spv::Id output_type = build_coopmat_type(impl, output_imm, false);
	if (!output_type)
		return 0;

	auto *load = impl.allocate(spv::OpCooperativeMatrixLoadKHR, output_type);
	load->add_id(chain->id);
	load->add_id(builder.makeUintConstant(spv::CooperativeMatrixLayoutColumnMajorKHR));
	load->add_id(builder.makeUintConstant(16));
	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan)
		load->add_literal(spv::MemoryAccessNonPrivatePointerMask);
	impl.add(load);

	return load->id;
}

static spv::Id emit_fp8_to_fp16_coopmat(Converter::Impl &impl, spv::Id v, spv::CooperativeMatrixUse use)
{
	auto &builder = impl.builder();
	spv::Id rows_cols = builder.makeUintConstant(16);
	spv::Id u8_type = builder.makeCooperativeMatrixType(
		builder.makeUintType(8), rows_cols, rows_cols, builder.makeUintConstant(use));
	spv::Id f16_type = builder.makeCooperativeMatrixType(
		builder.makeFloatType(16), rows_cols, rows_cols, builder.makeUintConstant(use));

	spv::Id aux_types[2] = { u8_type, f16_type };
	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::CoopMatFP8toFP16, aux_types, 2);

	// RADV workaround. Should pass as value, but NIR aborts.
	spv::Id param = impl.create_variable(spv::StorageClassFunction, u8_type);
	auto *store = impl.allocate(spv::OpStore);
	store->add_id(param);
	store->add_id(v);
	impl.add(store);

	auto *call = impl.allocate(spv::OpFunctionCall, f16_type);
	call->add_id(call_id);
	call->add_id(param);
	impl.add(call);

	return call->id;
}

static spv::Id emit_fp16_to_fp8_coopmat(Converter::Impl &impl, spv::Id v, spv::CooperativeMatrixUse use)
{
	auto &builder = impl.builder();
	spv::Id rows_cols = builder.makeUintConstant(16);
	spv::Id u8_type = builder.makeCooperativeMatrixType(
	    builder.makeUintType(8), rows_cols, rows_cols, builder.makeUintConstant(use));
	spv::Id f16_type = builder.makeCooperativeMatrixType(
	    builder.makeFloatType(16), rows_cols, rows_cols, builder.makeUintConstant(use));

	spv::Id aux_types[2] = { u8_type, f16_type };
	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::CoopMatFP16toFP8, aux_types, 2);

	// RADV workaround. Should pass as value, but NIR aborts.
	spv::Id param = impl.create_variable(spv::StorageClassFunction, f16_type);
	auto *store = impl.allocate(spv::OpStore);
	store->add_id(param);
	store->add_id(v);
	impl.add(store);

	auto *call = impl.allocate(spv::OpFunctionCall, u8_type);
	call->add_id(call_id);
	call->add_id(param);
	impl.add(call);

	return call->id;
}

static bool emit_wmma_length(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	uint32_t type_imm = impl.ags.instructions[0].immediate;
	uint32_t fmt = get_type_data_format(type_imm);

	spv::Id type_id = build_coopmat_type(impl, type_imm, false);
	if (type_id == 0)
		return false;

	spv::Id id = 0;

	auto *len = impl.allocate(spv::OpCooperativeMatrixLengthKHR, builder.makeUintType(32));
	len->add_id(type_id);
	impl.add(len);
	id = len->id;

	if (fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
	{
		// FP8 is tightly packed.
		auto *div = impl.allocate(spv::OpUDiv, builder.makeUintType(32));
		div->add_id(id);
		div->add_id(builder.makeUintConstant(4));
		impl.add(div);
		id = div->id;
	}

	impl.rewrite_value(impl.ags.backdoor_instructions[0], id);
	return true;
}

static spv::Id build_packed_fp8_to_fp16_vec4(Converter::Impl &impl, spv::Id packed_id)
{
	auto &builder = impl.builder();
	spv::Id u8_type = builder.makeUintType(8);
	spv::Id u8vec4_type = builder.makeVectorType(u8_type, 4);
	spv::Id i16_type = builder.makeIntType(16);
	spv::Id i16vec4_type = builder.makeVectorType(i16_type, 4);
	spv::Id f16_type = builder.makeFloatType(16);
	spv::Id f16vec4_type = builder.makeVectorType(f16_type, 4);

	auto *cast = impl.allocate(spv::OpBitcast, u8vec4_type);
	cast->add_id(packed_id);
	impl.add(cast);

	auto *sext = impl.allocate(spv::OpSConvert, i16vec4_type);
	sext->add_id(cast->id);
	impl.add(sext);

	auto *shift = impl.allocate(spv::OpShiftLeftLogical, i16vec4_type);
	shift->add_id(sext->id);
	shift->add_id(impl.build_splat_constant_vector(i16_type, builder.makeInt16Constant(7), 4));
	impl.add(shift);

	auto *mask = impl.allocate(spv::OpBitwiseAnd, i16vec4_type);
	mask->add_id(shift->id);
	mask->add_id(impl.build_splat_constant_vector(i16_type, builder.makeInt16Constant(int16_t(0xffff ^ 0x4000)), 4));
	impl.add(mask);

	auto *bitcast = impl.allocate(spv::OpBitcast, f16vec4_type);
	bitcast->add_id(mask->id);
	impl.add(bitcast);

	// Need post-scale to correctly deal with denorms.
	auto *mul = impl.allocate(spv::OpVectorTimesScalar, f16vec4_type);
	mul->add_id(bitcast->id);
	mul->add_id(builder.makeFloat16Constant(0x5c00 /* 256.0 */));
	impl.add(mul);

	return mul->id;
}

static bool emit_wmma_element_insert(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	if (!validate_wmma_io_registers(impl, 0, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg, 0, true))
		return false;

	uint32_t type_imm = impl.ags.instructions[4].immediate;
	uint32_t fmt = get_type_data_format(type_imm);
	spv::Id coop_type = build_coopmat_type(impl, type_imm, false);
	if (!coop_type)
		return false;

	const llvm::Value *elem = impl.ags.backdoor_instructions[4]->getOperand(5);
	spv::Id data_id = impl.get_id_for_value(impl.ags.backdoor_instructions[4]->getOperand(6));
	spv::Id id = 0;
	if (!impl.options.wmma_fp8 && fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
		data_id = build_packed_fp8_to_fp16_vec4(impl, data_id);

	if (const auto *const_e = llvm::dyn_cast<llvm::ConstantInt>(elem))
	{
		switch (fmt)
		{
		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
		{
			// 8-bit elements are packed in AGS for some reason ...
			id = impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5));

			spv::Id cast_id = 0;
			if (impl.options.wmma_fp8)
			{
				auto *cast = impl.allocate(spv::OpBitcast, builder.makeVectorType(builder.makeUintType(8), 4));
				cast->add_id(data_id);
				impl.add(cast);
				cast_id = cast->id;
			}

			for (int i = 0; i < 4; i++)
			{
				auto *ext = impl.allocate(spv::OpCompositeExtract,
				                          impl.options.wmma_fp8 ? builder.makeUintType(8) : builder.makeFloatType(16));
				ext->add_id(cast_id ? cast_id : data_id);
				ext->add_literal(i);
				impl.add(ext);

				if (impl.options.wmma_fp8)
				{
					auto *bitcast = impl.allocate(spv::OpBitcast, make_fp8_type(impl, false));
					bitcast->add_id(ext->id);
					impl.add(bitcast);
					ext = bitcast;
				}

				auto *insert = impl.allocate(spv::OpCompositeInsert, coop_type);
				insert->add_id(ext->id);
				insert->add_id(id);
				insert->add_literal(4 * const_e->getUniqueInteger().getZExtValue() + i);
				impl.add(insert);
				id = insert->id;
			}

			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
		{
			auto *downcast = impl.allocate(spv::OpUConvert, builder.makeUintType(16));
			downcast->add_id(data_id);
			impl.add(downcast);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(16));
			bitcast->add_id(downcast->id);
			impl.add(bitcast);

			auto *insert = impl.allocate(spv::OpCompositeInsert, coop_type);
			insert->add_id(bitcast->id);
			insert->add_id(impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5)));
			insert->add_literal(const_e->getUniqueInteger().getZExtValue());
			impl.add(insert);

			id = insert->id;
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		{
			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(16));
			bitcast->add_id(data_id);
			impl.add(bitcast);

			auto *insert = impl.allocate(spv::OpCompositeInsert, coop_type);
			insert->add_id(bitcast->id);
			insert->add_id(impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5)));
			insert->add_literal(const_e->getUniqueInteger().getZExtValue());
			impl.add(insert);

			id = insert->id;
			break;
		}

		default:
			LOGE("Unexpected type for element extract.\n");
			return false;
		}
	}
	else
	{
		spv::Id local_var = impl.create_variable(spv::StorageClassFunction, coop_type);
		auto *store = impl.allocate(spv::OpStore);
		store->add_id(local_var);
		store->add_id(impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5)));
		impl.add(store);

		switch (fmt)
		{
		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
		{
			// 8-bit elements are packed in AGS for some reason ...
			spv::Id cast_id = 0;

			if (impl.options.wmma_fp8)
			{
				auto *cast = impl.allocate(spv::OpBitcast, builder.makeVectorType(builder.makeUintType(8), 4));
				cast->add_id(data_id);
				impl.add(cast);
				cast_id = cast->id;
			}

			auto *index4 = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			index4->add_id(impl.get_id_for_value(elem));
			index4->add_id(builder.makeUintConstant(4));
			impl.add(index4);

			for (int i = 0; i < 4; i++)
			{
				auto *ext = impl.allocate(spv::OpCompositeExtract,
				                          impl.options.wmma_fp8 ? builder.makeUintType(8) : builder.makeFloatType(16));
				ext->add_id(cast_id ? cast_id : data_id);
				ext->add_literal(i);
				impl.add(ext);

				if (impl.options.wmma_fp8)
				{
					auto *bitcast = impl.allocate(spv::OpBitcast, make_fp8_type(impl, false));
					bitcast->add_id(ext->id);
					impl.add(bitcast);
					ext = bitcast;
				}

				auto *index = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				index->add_id(index4->id);
				index->add_id(builder.makeUintConstant(i));
				impl.add(index);

				auto *chain = impl.allocate(spv::OpInBoundsAccessChain,
				                            builder.makePointer(spv::StorageClassFunction, make_fp8_type(impl, false)));
				chain->add_id(local_var);
				chain->add_id(index->id);
				impl.add(chain);

				auto *store_elem = impl.allocate(spv::OpStore);
				store_elem->add_id(chain->id);
				store_elem->add_id(ext->id);
				impl.add(store_elem);
			}

			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
		{
			auto *downcast = impl.allocate(spv::OpUConvert, builder.makeUintType(16));
			downcast->add_id(data_id);
			impl.add(downcast);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(16));
			bitcast->add_id(downcast->id);
			impl.add(bitcast);

			auto *chain = impl.allocate(spv::OpInBoundsAccessChain,
			                            builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(16)));
			chain->add_id(local_var);
			chain->add_id(impl.get_id_for_value(elem));
			impl.add(chain);

			auto *store_elem = impl.allocate(spv::OpStore);
			store_elem->add_id(chain->id);
			store_elem->add_id(bitcast->id);
			impl.add(store_elem);
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		{
			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(32));
			bitcast->add_id(data_id);
			impl.add(bitcast);

			auto *chain = impl.allocate(spv::OpInBoundsAccessChain,
			                            builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(32)));
			chain->add_id(local_var);
			chain->add_id(impl.get_id_for_value(elem));
			impl.add(chain);

			auto *store_elem = impl.allocate(spv::OpStore);
			store_elem->add_id(chain->id);
			store_elem->add_id(bitcast->id);
			impl.add(store_elem);
			break;
		}

		default:
			LOGE("Unexpected type for element extract.\n");
			return false;
		}

		auto *load = impl.allocate(spv::OpLoad, coop_type);
		load->add_id(local_var);
		impl.add(load);
		id = load->id;
	}

	return emit_wmma_return_values(impl, coop_type, id, 2);
}

static bool emit_wmma_element_extract(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	if (!validate_wmma_io_registers(impl, 0, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg, 0, true))
		return false;

	uint32_t type_imm = impl.ags.instructions[4].immediate;
	uint32_t fmt = get_type_data_format(type_imm);

	spv::Id coop_vec_id = impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5));
	const llvm::Value *elem = impl.ags.backdoor_instructions[4]->getOperand(5);
	spv::Id id = 0;

	// FSR4 doesn't need this, just don't bother if we emulate FP8 as FP16.
	if (!impl.options.wmma_fp8 && fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
		return false;

	if (const auto *const_e = llvm::dyn_cast<llvm::ConstantInt>(elem))
	{
		switch (fmt)
		{
		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
		{
			// 8-bit elements are packed in AGS for some reason ...
			spv::Id elements[4];

			for (int i = 0; i < 4; i++)
			{
				auto *extract = impl.allocate(spv::OpCompositeExtract, make_fp8_type(impl, false));
				extract->add_id(coop_vec_id);
				extract->add_literal(4 * const_e->getUniqueInteger().getZExtValue() + i);
				impl.add(extract);
				elements[i] = extract->id;
			}

			spv::Id vec = impl.build_vector(make_fp8_type(impl, false), elements, 4);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
			bitcast->add_id(vec);
			impl.add(bitcast);

			id = bitcast->id;
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
		{
			auto *extract = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(16));
			extract->add_id(coop_vec_id);
			extract->add_literal(const_e->getUniqueInteger().getZExtValue());
			impl.add(extract);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(16));
			bitcast->add_id(extract->id);
			impl.add(bitcast);

			auto *upcast = impl.allocate(spv::OpUConvert, builder.makeUintType(32));
			upcast->add_id(bitcast->id);
			impl.add(upcast);

			id = upcast->id;
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		{
			auto *extract = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(32));
			extract->add_id(coop_vec_id);
			extract->add_literal(const_e->getUniqueInteger().getZExtValue());
			impl.add(extract);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
			bitcast->add_id(extract->id);
			impl.add(bitcast);

			id = bitcast->id;
			break;
		}

		default:
			LOGE("Unexpected type for element extract.\n");
			return false;
		}
	}
	else
	{
		spv::Id type_id = build_coopmat_type(impl, type_imm, false);
		if (!type_id)
			return false;

		spv::Id local_var = impl.create_variable(spv::StorageClassFunction, type_id);

		auto *store = impl.allocate(spv::OpStore);
		store->add_id(local_var);
		store->add_id(coop_vec_id);
		impl.add(store);

		switch (fmt)
		{
		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
		{
			// 8-bit elements are packed in AGS for some reason ...
			spv::Id elements[4];

			auto *index4 = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			index4->add_id(impl.get_id_for_value(elem));
			index4->add_id(builder.makeUintConstant(4));
			impl.add(index4);

			for (int i = 0; i < 4; i++)
			{
				auto *index = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				index->add_id(index4->id);
				index->add_id(builder.makeUintConstant(i));
				impl.add(index);

				auto *chain = impl.allocate(
					spv::OpInBoundsAccessChain,
					builder.makePointer(spv::StorageClassFunction, make_fp8_type(impl, false)));
				chain->add_id(local_var);
				chain->add_id(index->id);
				impl.add(chain);

				auto *load = impl.allocate(spv::OpLoad, make_fp8_type(impl, false));
				load->add_id(chain->id);
				impl.add(load);

				elements[i] = load->id;
			}

			spv::Id vec = impl.build_vector(make_fp8_type(impl, false), elements, 4);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
			bitcast->add_id(vec);
			impl.add(bitcast);

			id = bitcast->id;
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
		{
			auto *chain = impl.allocate(
				spv::OpInBoundsAccessChain,
				builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(16)));
			chain->add_id(local_var);
			chain->add_id(impl.get_id_for_value(elem));
			impl.add(chain);

			auto *load = impl.allocate(spv::OpLoad, builder.makeFloatType(16));
			load->add_id(chain->id);
			impl.add(load);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(16));
			bitcast->add_id(load->id);
			impl.add(bitcast);

			auto *upcast = impl.allocate(spv::OpUConvert, builder.makeUintType(32));
			upcast->add_id(bitcast->id);
			impl.add(upcast);

			id = upcast->id;
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		{
			auto *chain = impl.allocate(
				spv::OpInBoundsAccessChain,
				builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(32)));
			chain->add_id(local_var);
			chain->add_id(impl.get_id_for_value(elem));
			impl.add(chain);

			auto *load = impl.allocate(spv::OpLoad, builder.makeFloatType(32));
			load->add_id(chain->id);
			impl.add(load);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
			bitcast->add_id(load->id);
			impl.add(bitcast);

			id = bitcast->id;
			break;
		}

		default:
			LOGE("Unexpected type for element extract.\n");
			return false;
		}
	}

	impl.rewrite_value(impl.ags.backdoor_instructions[4], id);
	return true;
}

static bool emit_wmma_fill(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	const auto *v = impl.ags.backdoor_instructions[0]->getOperand(5);

	uint32_t type_imm = impl.ags.instructions[0].immediate;
	uint32_t fmt = get_type_data_format(type_imm);
	spv::Id type = build_coopmat_type(impl, type_imm, false);
	spv::Id id;

	if (const auto *const_v = llvm::dyn_cast<llvm::ConstantInt>(v))
	{
		switch (fmt)
		{
		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
			if (impl.options.wmma_fp8)
			{
				id = builder.makeFloat8Constant(const_v->getUniqueInteger().getZExtValue(),
				                                spv::FPEncodingFloat8E4M3EXT);
			}
			else if (const_v->getUniqueInteger().getZExtValue() == 0)
			{
				// Trivial
				id = builder.makeFloat16Constant(0);
			}
			else
			{
				// Not needed by FSR4.
				return false;
			}
			break;

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
			id = builder.makeFloat16Constant(const_v->getUniqueInteger().getZExtValue());
			break;

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		{
			union { float f32; uint32_t u32; } u;
			u.u32 = const_v->getUniqueInteger().getZExtValue();
			id = builder.makeFloatConstant(u.f32);
			break;
		}

		default:
			LOGE("Unsupported WMMA Fill format.\n");
			return false;
		}

		id = builder.makeCompositeConstant(type, { id });
	}
	else
	{
		id = impl.get_id_for_value(v);

		switch (fmt)
		{
		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8:
		{
			auto *narrow = impl.allocate(spv::OpUConvert, builder.makeUintType(8));
			narrow->add_id(id);
			impl.add(narrow);
			id = narrow->id;

			if (impl.options.wmma_fp8)
			{
				auto *bitcast = impl.allocate(spv::OpBitcast, make_fp8_type(impl, false));
				bitcast->add_id(id);
				impl.add(bitcast);
				id = bitcast->id;
			}
			else
			{
				// Not needed by FSR4.
				return false;
			}

			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16:
		{
			auto *narrow = impl.allocate(spv::OpUConvert, builder.makeUintType(16));
			narrow->add_id(id);
			impl.add(narrow);

			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(16));
			bitcast->add_id(narrow->id);
			impl.add(bitcast);

			id = bitcast->id;
			break;
		}

		case AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32:
		{
			auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(32));
			bitcast->add_id(id);
			impl.add(bitcast);

			id = bitcast->id;
			break;
		}

		default:
			LOGE("Unsupported WMMA Fill format.\n");
			return false;
		}

		// Cooperative matrices are implicitly splatted.
		auto *splat = impl.allocate(spv::OpCompositeConstruct, type);
		splat->add_id(id);
		impl.add(splat);

		id = splat->id;
	}

	if (!emit_wmma_return_values(impl, type, id, 1))
	{
		LOGE("Failed to emit WMMA return values.\n");
		return false;
	}

	return true;
}

static bool emit_wmma_element_wise_arith(Converter::Impl &impl)
{
	if (impl.ags.instructions[8].phase != 1)
		return false;

	if (!validate_wmma_io_registers(impl, 0, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_A_TempReg, 0, true))
		return false;
	if (!validate_wmma_io_registers(impl, 4, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_B_TempReg, 0, true))
		return false;

	uint32_t type_imm = impl.ags.instructions[8].immediate;
	spv::Id type = build_coopmat_type(impl, type_imm, false);

	spv::Id a = impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5));
	spv::Id b = impl.get_id_for_value(impl.ags.backdoor_instructions[4]->getOperand(5));
	spv::Op op;

	uint32_t float_op = 0;

	if (!get_constant_operand(impl.ags.backdoor_instructions[8], 6, &float_op) || float_op != 0)
		return false;

	if (!get_constant_operand(impl.ags.backdoor_instructions[8], 5, &float_op))
		return false;

	switch (float_op)
	{
	case AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Add:
		op = spv::OpFAdd;
		break;

	case AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Sub:
		op = spv::OpFSub;
		break;

	case AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Mul:
		op = spv::OpFMul;
		break;

	case AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Div:
		op = spv::OpFDiv;
		break;

	case AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Times:
		// AMD implements this as constant 0 apparently.
		// It's not used, so just implement it as we'd expect.
		op = spv::OpFMul;
		break;

	default:
		return false;
	}

	auto *arith_op = impl.allocate(op, type);
	arith_op->add_id(a);
	arith_op->add_id(b);
	impl.add(arith_op);

	if (!emit_wmma_return_values(impl, type, arith_op->id, 2))
	{
		LOGE("Failed to emit WMMA return values.\n");
		return false;
	}

	return true;
}

static bool emit_float8_conversion(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	auto convert_op = AmdExtD3DShaderIntrinsicsFloat8CvtOp(impl.ags.instructions[0].immediate &
	                                                       AmdExtD3DShaderIntrinsicsFloat8Conversion_CvtOpMask);
	bool saturate = ((impl.ags.instructions[0].immediate >> AmdExtD3DShaderIntrinsicsFloat8Conversion_SatShift) &
	                 AmdExtD3DShaderIntrinsicsFloat8Conversion_SatMask) != 0;

	builder.addExtension("SPV_EXT_float8");
	builder.addCapability(spv::CapabilityFloat8EXT);
	builder.addCapability(spv::CapabilityInt8);
	bool is_bfloat = false;

	uint32_t dummy_value;
	if (!get_constant_operand(impl.ags.backdoor_instructions[0], 6, &dummy_value) || dummy_value != 0)
		return false;

	switch (convert_op)
	{
	case AmdExtD3DShaderIntrinsicsFloat8CvtOp_F32_2_BF8:
		is_bfloat = true;
		// fallthrough
	case AmdExtD3DShaderIntrinsicsFloat8CvtOp_F32_2_FP8:
	{
		auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeFloatType(32));
		bitcast->add_id(impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5)));
		impl.add(bitcast);

#if 0
		// Native implementation seems to correctly treat saturated inf in this case.
		if (saturate && !is_bfloat)
		{
			// Fixup for RDNA4 HW compat.
			auto *is_inf = impl.allocate(spv::OpIsInf, builder.makeBoolType());
			is_inf->add_id(bitcast->id);
			impl.add(is_inf);

			auto *select = impl.allocate(spv::OpSelect, builder.makeFloatType(32));
			select->add_id(is_inf->id);
			select->add_id(builder.makeFloatConstant(std::numeric_limits<float>::quiet_NaN()));
			select->add_id(bitcast->id);
			impl.add(select);

			bitcast = select;
		}
#endif

		auto *conv = impl.allocate(spv::OpFConvert, builder.makeFloatType(8, is_bfloat ? spv::FPEncodingFloat8E5M2EXT :
		                                                                                 spv::FPEncodingFloat8E4M3EXT));
		conv->add_id(bitcast->id);
		impl.add(conv);

		if (saturate)
			builder.addDecoration(conv->id, spv::DecorationSaturatedToLargestFloat8NormalConversionEXT);

		bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(8));
		bitcast->add_id(conv->id);
		impl.add(bitcast);

		auto *ext = impl.allocate(spv::OpUConvert, impl.ags.backdoor_instructions[0]);
		ext->add_id(bitcast->id);
		impl.add(ext);
		break;
	}

	case AmdExtD3DShaderIntrinsicsFloat8CvtOp_BF8_2_F32:
		is_bfloat = true;
		// fallthrough
	case AmdExtD3DShaderIntrinsicsFloat8CvtOp_FP8_2_F32:
	{
		auto *trunc = impl.allocate(spv::OpUConvert, builder.makeUintType(8));
		trunc->add_id(impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5)));
		impl.add(trunc);

		auto *bitcast =
		    impl.allocate(spv::OpBitcast, builder.makeFloatType(8, is_bfloat ? spv::FPEncodingFloat8E5M2EXT :
		                                                                       spv::FPEncodingFloat8E4M3EXT));
		bitcast->add_id(trunc->id);
		impl.add(bitcast);

		auto *conv = impl.allocate(spv::OpFConvert, builder.makeFloatType(32));
		conv->add_id(bitcast->id);
		impl.add(conv);

		bitcast = impl.allocate(spv::OpBitcast, impl.ags.backdoor_instructions[0]);
		bitcast->add_id(conv->id);
		impl.add(bitcast);
		break;
	}

	default:
		return false;
	}

	return true;
}

static spv::Id get_matmul_result_type(Converter::Impl &impl, uint32_t opcode)
{
	auto &builder = impl.builder();
	spv::Id rows_cols = builder.makeUintConstant(16);
	spv::Id use = builder.makeUintConstant(spv::CooperativeMatrixUseMatrixAccumulatorKHR);
	spv::Id scalar_type;

	switch (opcode)
	{
	case AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_FP8_FP8:
	case AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_F16:
		scalar_type = builder.makeFloatType(32);
		break;

	case AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F16_16X16X16_F16:
		scalar_type = builder.makeFloatType(16);
		break;

	default:
		return 0;
	}

	return builder.makeCooperativeMatrixType(scalar_type, rows_cols, rows_cols, use);
}

static bool emit_wmma_muladd(Converter::Impl &impl)
{
	if (!validate_wmma_io_registers(impl, 0, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_A_TempReg, 0, true))
		return false;
	if (!validate_wmma_io_registers(impl, 4, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_B_TempReg, 0, true))
		return false;
	if (!validate_wmma_io_registers(impl, 8, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_Accumulator_TempReg, 0, true))
		return false;

	if (impl.ags.instructions[12].phase != 1)
		return false;

	auto opcode = (impl.ags.instructions[12].immediate >> AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsShift) &
	              AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsMask;

	spv::Id type = get_matmul_result_type(impl, opcode);
	if (!type)
		return false;

	auto *muladd = impl.allocate(spv::OpCooperativeMatrixMulAddKHR, type);

	spv::Id a = impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5));
	spv::Id b = impl.get_id_for_value(impl.ags.backdoor_instructions[4]->getOperand(5));
	spv::Id c = impl.get_id_for_value(impl.ags.backdoor_instructions[8]->getOperand(5));

	muladd->add_id(a);
	muladd->add_id(b);
	muladd->add_id(c);
	impl.add(muladd);

	if (!emit_wmma_return_values(impl, type, muladd->id, 2))
	{
		LOGE("Failed to emit WMMA return values.\n");
		return false;
	}

	return true;
}

static spv::Id emit_wmma_complex_convert(Converter::Impl &impl, spv::Id coopmat,
                                         uint32_t type_imm, uint32_t output_immediate,
                                         bool saturating)
{
	uint32_t input_fmt = get_type_data_format(type_imm);
	uint32_t output_fmt = get_type_data_format(output_immediate);

	if (impl.options.wmma_fp8 &&
	    output_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 &&
	    (input_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16 ||
	     input_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32) &&
	    saturating)
	{
		// Works around broken RDNA4 HW behavior.
		// Even when saturating, +/- inf becomes NaN.
		// To allow RADV to emit optimal code we have to explicitly "emulate" this HW bug.
		// Only do this fixup if we have native FP8, otherwise the compiler will not understand it
		// and likely emit worse code.
		coopmat = emit_coopmat_broken_saturation_fixup(impl, coopmat, build_coopmat_type(impl, type_imm, false));
	}

	// If this is supported, we can do everything in one go.
	spv::Id res_id = emit_coopmat_transpose_with_convert(impl, coopmat, type_imm, output_immediate, saturating);
	if (res_id != 0)
		return res_id;

	uint32_t input_use = get_matrix_type(type_imm);
	uint32_t output_use = get_matrix_type(output_immediate);
	auto in_use = convert_matrix_use(input_use);
	auto out_use = convert_matrix_use(output_use);

	spv::Id output_type = build_coopmat_type(impl, output_immediate, false);

	constexpr uint32_t FormatMask =
		AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagMask <<
		AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagShift;

	constexpr uint32_t TypeMask =
		AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagMask <<
		AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagShift;

	// Transpose in input format, convert later.
	uint32_t output_type_input_use_imm = output_immediate;
	output_type_input_use_imm &= ~TypeMask;
	output_type_input_use_imm |= impl.ags.instructions[4].immediate & TypeMask;
	spv::Id output_type_input_use = build_coopmat_type(impl, output_type_input_use_imm, false);

	uint32_t output_type_input_fmt_imm = output_immediate;
	output_type_input_fmt_imm &= ~FormatMask;
	output_type_input_fmt_imm |= impl.ags.instructions[4].immediate & FormatMask;
	spv::Id output_type_input_fmt = build_coopmat_type(impl, output_type_input_fmt_imm, false);

	if (!output_type || (!output_type_input_use && !output_type_input_fmt))
		return 0;

	// Transpose early if the target format does not support the input use.
	if (in_use != out_use && !output_type_input_use)
	{
		coopmat = emit_coopmat_transpose(impl, coopmat, type_imm, output_type_input_fmt_imm);
		if (!coopmat)
			return 0;
		in_use = out_use;
		output_type_input_use = output_type;
	}

	auto effective_input_fmt = input_fmt;
	auto effective_output_fmt = output_fmt;
	if (!impl.options.wmma_fp8)
	{
		if (effective_input_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
			effective_input_fmt = AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16;
		if (effective_output_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
			effective_output_fmt = AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16;
	}

	if (effective_input_fmt != effective_output_fmt)
	{
		auto *conv = impl.allocate(spv::OpFConvert, output_type_input_use);
		conv->add_id(coopmat);
		impl.add(conv);
		coopmat = conv->id;

		if (impl.options.wmma_fp8 && saturating &&
		    output_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
		{
			impl.builder().addDecoration(
				coopmat, spv::DecorationSaturatedToLargestFloat8NormalConversionEXT);
		}
	}

	if (output_fmt == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 && !impl.options.wmma_fp8)
		coopmat = emit_coopmat_saturate_fp8(impl, coopmat, output_type_input_use);

	if (in_use != out_use)
	{
		coopmat = emit_coopmat_transpose(impl, coopmat, output_type_input_use_imm, output_immediate);
		if (!coopmat)
			return 0;
	}

	return coopmat;
}

static bool emit_wmma_convert(Converter::Impl &impl)
{
	if (!validate_wmma_io_registers(impl, 0, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg, 0, true))
		return false;

	if (impl.ags.instructions[4].phase != 1)
		return false;

	uint32_t output_immediate = 0;
	if (!get_constant_operand(impl.ags.backdoor_instructions[4], 5, &output_immediate))
		return false;

	uint32_t saturating = 0;
	if (!get_constant_operand(impl.ags.backdoor_instructions[4], 6, &saturating))
		return false;

	if (!validate_convert_compatibility(impl.ags.instructions[4].immediate, output_immediate))
		return false;

	spv::Id output_type = build_coopmat_type(impl, output_immediate, false);

	spv::Id res_id;

	if (impl.ags.instructions[4].immediate == output_immediate)
	{
		res_id = impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5));
	}
	else
	{
		spv::Id coopmat = impl.get_id_for_value(impl.ags.backdoor_instructions[0]->getOperand(5));
		res_id = emit_wmma_complex_convert(impl, coopmat, impl.ags.instructions[4].immediate,
		                                   output_immediate, saturating != 0);
		if (!res_id)
			return false;
	}

	if (!emit_wmma_return_values(impl, output_type, res_id, 2))
	{
		LOGE("Failed to emit WMMA return values.\n");
		return false;
	}

	return true;
}

struct WMMAAccessChain
{
	spv::Id chain_id;
	spv::Id stride_id;
	uint32_t alignment;
};

static WMMAAccessChain build_wmma_access_chain(
	Converter::Impl &impl, const llvm::Value *offset, const llvm::Value *stride)
{
	WMMAAccessChain ret = {};
	auto &builder = impl.builder();

	if (impl.ags.active_uav_op == DXIL::Op::AtomicBinOp)
	{
		// LDS access chains are in terms of u32 elements for whatever reason ...
		// This is convenient for us however, since SPIR-V is defined the same way.
		// TODO: Unsure what happens if the LDS pointer is u16, but FSR4 doesn't use that, so whatever.
		auto *chain = impl.allocate(spv::OpAccessChain,
		                            builder.makePointer(spv::StorageClassWorkgroup, builder.makeUintType(32)));
		chain->add_id(impl.ags.active_uav_ptr);
		chain->add_id(impl.get_id_for_value(offset));

		impl.add(chain);
		ret.chain_id = chain->id;
		ret.stride_id = impl.get_id_for_value(stride);
		return ret;
	}

	// Build the actual load.
	const auto &meta = impl.handle_to_resource_meta[impl.ags.active_uav_ptr];

	if (meta.storage == spv::StorageClassStorageBuffer || impl.ags.active_uav_op == DXIL::Op::AtomicBinOp)
	{
		spv::Id buffer_id = get_buffer_alias_handle(
			impl, meta, impl.ags.active_uav_ptr, RawType::Integer, RawWidth::B8, RawVecSize::V1);

		if (!buffer_id)
		{
			LOGE("Failed to get buffer alias.\n");
			return ret;
		}

		// Cooperative matrix is in terms of elements of the value type, not bytes.
		// We could use 8-bit storage, but that means adding a lot of extra cruft ...
		auto *chain = impl.allocate(spv::OpAccessChain,
		                            builder.makePointer(spv::StorageClassStorageBuffer, builder.makeUintType(8)));
		chain->add_id(buffer_id);
		chain->add_id(builder.makeUintConstant(0));
		chain->add_id(impl.get_id_for_value(offset));

		impl.add(chain);
		ret.chain_id = chain->id;
		ret.stride_id = impl.get_id_for_value(stride);
	}
	else if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
	{
		if (!impl.ags.u8_array_bda_type)
		{
			impl.ags.u8_array_bda_type = builder.makeRuntimeArray(builder.makeUintType(8));
			builder.addDecoration(impl.ags.u8_array_bda_type, spv::DecorationArrayStride, 1);
		}

		auto *cast = impl.allocate(spv::OpBitcast,
		                           builder.makePointer(spv::StorageClassPhysicalStorageBuffer, impl.ags.u8_array_bda_type));

		cast->add_id(impl.ags.active_uav_ptr);
		impl.add(cast);

		auto *chain = impl.allocate(spv::OpAccessChain,
		                            builder.makePointer(spv::StorageClassPhysicalStorageBuffer, builder.makeUintType(8)));
		chain->add_id(cast->id);
		chain->add_id(impl.get_id_for_value(offset));
		impl.add(chain);

		ret.chain_id = chain->id;
		ret.stride_id = impl.get_id_for_value(stride);
		ret.alignment = 16; // Unsure what requirements are here. Just do the simple thing.
	}
	else
	{
		LOGE("Expected BDA or SSBO for WMMA load-store.\n");
	}

	return ret;
}

static bool emit_wmma_store(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	// Sanity check that we're reading from the hooked read opcode.
	if (impl.ags.backdoor_instructions[5]->getOperand(5) != impl.ags.active_read_backdoor)
		return false;

	if (impl.ags.instructions[0].phase != 0)
		return false;
	if (impl.ags.instructions[5].phase != 2)
		return false;

	if (!validate_wmma_io_registers(impl, 1, AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg, 1, true))
		return false;

	uint32_t type_immediate = impl.ags.instructions[5].immediate;
	bool column_major = ((type_immediate >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagShift) &
	                     AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagMask) != 0;

	auto chain = build_wmma_access_chain(impl, impl.ags.backdoor_instructions[0]->getOperand(5),
	                                     impl.ags.backdoor_instructions[0]->getOperand(6));

	if (chain.chain_id == 0)
		return false;

	spv::Id store_id = impl.get_id_for_value(impl.ags.backdoor_instructions[1]->getOperand(5));

	if (!impl.options.wmma_fp8 &&
	    get_type_data_format(type_immediate) == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
	{
		auto spv_use = convert_matrix_use(get_matrix_type(type_immediate));
		// Until we have proper FP8 support, we'll need to downconvert on store.
		store_id = emit_fp16_to_fp8_coopmat(impl, store_id, spv_use);
	}

	auto *store = impl.allocate(spv::OpCooperativeMatrixStoreKHR);
	store->add_id(chain.chain_id);
	store->add_id(store_id);
	store->add_id(builder.makeUintConstant(
		column_major ? spv::CooperativeMatrixLayoutColumnMajorKHR : spv::CooperativeMatrixLayoutRowMajorKHR));
	store->add_id(chain.stride_id);

	bool non_private = impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan;

	if (chain.alignment)
	{
		store->add_literal(spv::MemoryAccessAlignedMask);
		store->add_literal(chain.alignment);
	}

	add_vkmm_access_qualifiers(impl, store, { non_private });

	impl.add(store);

	return true;
}

static void emit_subgroup_barrier(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	auto *barrier = impl.allocate(spv::OpControlBarrier);
	barrier->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	barrier->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	barrier->add_id(builder.getWorkgroupBarrierSemanticsId());
	impl.add(barrier);
}

static bool emit_wmma_load(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	// Sanity check that we're reading from the hooked read opcode.
	if (impl.ags.backdoor_instructions[1]->getOperand(5) != impl.ags.active_read_backdoor)
		return false;

	if (impl.ags.instructions[0].phase != 0)
		return false;
	if (impl.ags.instructions[1].phase != 1)
		return false;

	uint32_t type_immediate = impl.ags.instructions[1].immediate;
	spv::Id type_id = build_coopmat_type(impl, type_immediate, true);

	if (!type_id)
	{
		LOGE("Failed to create coopmat type.\n");
		return false;
	}

	auto chain = build_wmma_access_chain(impl, impl.ags.backdoor_instructions[0]->getOperand(5),
	                                     impl.ags.backdoor_instructions[0]->getOperand(6));

	if (chain.chain_id == 0)
		return false;

	bool column_major = ((type_immediate >> AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagShift) &
	                     AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagMask) != 0;

	// Workaround AGS omitting barriers.
	if (impl.ags.active_uav_op == DXIL::Op::AtomicBinOp)
		emit_subgroup_barrier(impl);

	auto *load = impl.allocate(spv::OpCooperativeMatrixLoadKHR, type_id);
	load->add_id(chain.chain_id);
	load->add_id(builder.makeUintConstant(
		column_major ? spv::CooperativeMatrixLayoutColumnMajorKHR : spv::CooperativeMatrixLayoutRowMajorKHR));
	load->add_id(chain.stride_id);

	bool non_private = impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan;

	if (chain.alignment)
	{
		load->add_literal(spv::MemoryAccessAlignedMask);
		load->add_literal(chain.alignment);
	}

	add_vkmm_access_qualifiers(impl, load, { non_private });

	impl.add(load);

	// Workaround AGS omitting barriers.
	if (impl.ags.active_uav_op == DXIL::Op::AtomicBinOp)
		emit_subgroup_barrier(impl);

	spv::Id id = load->id;

	if (!impl.options.wmma_fp8 &&
	    get_type_data_format(type_immediate) == AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8)
	{
		auto spv_use = convert_matrix_use(get_matrix_type(type_immediate));
		// Until we have proper FP8 support, we'll need to upconvert.
		id = emit_fp8_to_fp16_coopmat(impl, id, spv_use);
	}

	type_id = build_coopmat_type(impl, type_immediate, false);

	if (!emit_wmma_return_values(impl, type_id, id, 2))
	{
		LOGE("Failed to emit WMMA return values.\n");
		return false;
	}

	return true;
}

bool wmma_store_is_masked(Converter::Impl &impl, const llvm::StoreInst *inst)
{
	auto itr = impl.ags.coopmat_component_mapping.find(inst->getOperand(0));
	if (itr == impl.ags.coopmat_component_mapping.end() || itr->second.component == 0)
		return false;

	itr = impl.ags.coopmat_component_mapping.find(inst->getOperand(1));
	if (itr == impl.ags.coopmat_component_mapping.end() || itr->second.component == 0)
		return false;

	return true;
}

static spv::Id rewrite_gep_rdna3(Converter::Impl &impl, const llvm::Value *value, spv::Id id)
{
	if (impl.ags.column_oriented_allocas_or_globals.count(value) == 0)
		return id;

	auto &builder = impl.builder();
	auto *shift = impl.allocate(spv::OpShiftLeftLogical, builder.makeUintType(32));
	shift->add_id(id);
	shift->add_id(builder.makeUintConstant(1));
	impl.add(shift);

	auto *mask = impl.allocate(spv::OpBitwiseAnd, builder.makeUintType(32));
	mask->add_id(shift->id);
	mask->add_id(builder.makeUintConstant(7));
	impl.add(mask);

	spv::Id local_id_var = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSubgroupLocalInvocationId);
	auto *load = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	load->add_id(local_id_var);
	impl.add(load);

	auto *shift_down = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
	shift_down->add_id(load->id);
	shift_down->add_id(builder.makeUintConstant(4));
	impl.add(shift_down);

	auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
	add->add_id(mask->id);
	add->add_id(shift_down->id);
	impl.add(add);

	return add->id;
}

static const llvm::Value *get_underlying_gep_array(const llvm::GetElementPtrInst *gep)
{
	if (!gep)
		return nullptr;

	auto *source_variable = gep->getOperand(0);
	if (const auto *alloca = llvm::dyn_cast<llvm::AllocaInst>(source_variable))
		return alloca;
	else if (const auto *global = llvm::dyn_cast<llvm::GlobalVariable>(source_variable))
		return global;
	return nullptr;
}

static const llvm::Value *get_underlying_gep_array(const llvm::ConstantExpr *cexpr)
{
	if (!cexpr)
		return nullptr;

	if (cexpr->getOpcode() != llvm::Instruction::GetElementPtr)
		return nullptr;

	// Not possible to constexpr-gep into alloca, since alloca isn't constant expression.
	auto *source_variable = cexpr->getOperand(0);
	if (const auto *global = llvm::dyn_cast<llvm::GlobalVariable>(source_variable))
		return global;
	return nullptr;
}

spv::Id rewrite_alloca_gep_index(Converter::Impl &impl, const llvm::GetElementPtrInst *gep, spv::Id id)
{
	auto *value = get_underlying_gep_array(gep);
	if (!value)
		return id;

	if (!llvm::isa<llvm::Constant>(gep->getOperand(2)))
	{
		spv::Id rdna3_gep_id = rewrite_gep_rdna3(impl, value, id);
		if (rdna3_gep_id != id)
			return rdna3_gep_id;
	}

	auto itr = impl.ags.alloca_or_global_tracking.find(value);
	if (itr == impl.ags.alloca_or_global_tracking.end())
		return id;

	if (itr->second.override_element_type && itr->second.override_element_stride)
	{
		auto split = split_index_scale_bias(gep->getOperand(2));
		if (!split.stride)
			return 0;
		if (split.stride != UINT32_MAX && split.stride != itr->second.override_element_stride)
			return 0;

		uint32_t const_index = 0;

		if (split.stride == UINT32_MAX && itr->second.override_element_stride && !split.index)
		{
			const_index = split.elem / itr->second.override_element_stride;
			split.elem -= const_index * itr->second.override_element_stride;
		}

		impl.ags.coopmat_component_mapping[gep] =
		    { itr->second.override_element_type, split.elem };

		// If we're writing anything other than the first element, skip the actual write.
		if (split.elem != 0)
			return UINT32_MAX;

		if (split.index)
			id = impl.get_id_for_value(split.index);
		else
			id = impl.builder().makeUintConstant(const_index);
	}

	return id;
}

static bool is_gep_instruction(const llvm::Value *value)
{
	if (llvm::isa<llvm::GetElementPtrInst>(value))
		return true;
	else if (const auto *cexpr = llvm::dyn_cast<llvm::ConstantExpr>(value))
		return cexpr->getOpcode() == llvm::Instruction::GetElementPtr;
	else
		return false;
}

bool analyze_ags_wmma_store(Converter::Impl &impl, const llvm::StoreInst *store)
{
	auto itr = impl.ags.coopmat_component_mapping.find(store->getOperand(0));
	if (itr == impl.ags.coopmat_component_mapping.end())
		return true;

	auto *store_ptr = store->getOperand(1);

	if (!is_gep_instruction(store_ptr))
	{
		LOGE("Trying to store a WMMA matrix without GEP.\n");
		return false;
	}

	auto *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(store_ptr);
	auto *cexpr = llvm::dyn_cast<llvm::ConstantExpr>(store_ptr);
	const llvm::Value *value = nullptr;
	if (gep)
		value = get_underlying_gep_array(gep);
	else if (cexpr)
		value = get_underlying_gep_array(cexpr);

	if (!value || (gep ? gep->getNumOperands() : cexpr->getNumOperands()) < 3)
	{
		LOGE("Trying to store WMMA to something not Alloca or global array.\n");
		return false;
	}

	const auto *index = gep ? gep->getOperand(2) : cexpr->getOperand(2);
	auto split = split_index_scale_bias(index);
	if (!split.stride)
		return false;

	auto &tracking = impl.ags.alloca_or_global_tracking[value];
	if (tracking.override_element_stride &&
	    tracking.override_element_stride != split.stride &&
	    split.stride != UINT32_MAX)
	{
		LOGE("Mismatch WMMA stride.\n");
		return false;
	}

	if (tracking.override_element_type && tracking.override_element_type != itr->second.type_id)
	{
		LOGE("Mismatch WMMA type.\n");
		return false;
	}

	if (split.stride == UINT32_MAX)
	{
		// Somewhat hacky, but this is the AGS assumption.
		tracking.override_element_stride = 8;

#if 0
		// This breaks in some cases.
		if (split.elem > itr->second.component)
		{
			if (tracking.override_element_stride == 0)
			{
				// We can deduce stride from this.
				tracking.override_element_stride = split.elem - itr->second.component;
			}
			else
			{
				// This isn't perfect if there are gaps in the writes, but we can assume that's not the case.
				tracking.override_element_stride =
					std::min<uint32_t>(split.elem - itr->second.component,
									   tracking.override_element_stride);
			}
		}
#endif
	}

	if (split.stride == UINT32_MAX && tracking.override_element_stride)
	{
		split.stride = tracking.override_element_stride;
		split.elem %= split.stride;
	}

	// If we have constant indices we don't know how the mapping works yet.
	if (split.elem != itr->second.component)
	{
		LOGE("Unexpected component mapping in WMMA store.\n");
		return false;
	}

	if (split.stride != UINT32_MAX)
		tracking.override_element_stride = split.stride;
	tracking.override_element_type = itr->second.type_id;
	return true;
}

static void analyze_dubious_implementation_defined_column_behavior(
	Converter::Impl &impl, const llvm::Value *value)
{
	// Try to figure out if the inserted element depends on manually adding elements from alloca().
	// This has different results on RDNA4 and RDNA3 and we can detect
	// this very specific case and rewrite the GEP to hack around it.

	if (!GlobalConfiguration::get().wmma_rdna3_workaround)
		return;

	if (const auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(value))
	{
		analyze_dubious_implementation_defined_column_behavior(impl, binop->getOperand(0));
		analyze_dubious_implementation_defined_column_behavior(impl, binop->getOperand(1));
	}
	else if (const auto *cast = llvm::dyn_cast<llvm::CastInst>(value))
	{
		analyze_dubious_implementation_defined_column_behavior(impl, cast->getOperand(0));
	}
	else if (const auto *load = llvm::dyn_cast<llvm::LoadInst>(value))
	{
		auto *ptr = load->getPointerOperand();
		if (const auto *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(ptr))
		{
			if (const auto *alloca = llvm::dyn_cast<llvm::AllocaInst>(gep->getOperand(0)))
			{
				auto *type = alloca->getType()->getPointerElementType();
				if (type->getTypeID() == llvm::Type::TypeID::ArrayTyID && type->getArrayNumElements() == 8)
				{
					impl.ags.column_oriented_allocas_or_globals.insert(alloca);
				}
			}
		}
	}
}

bool analyze_magic_ags_instruction(Converter::Impl &impl)
{
	if (impl.ags.num_instructions == 0)
		return true;

	bool has_wmma_return_values = false;
	spv::Id type_id = 0;
	uint32_t phase = 0;

	switch (impl.ags.instructions[0].opcode)
	{
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavLoad:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsLoad:
		has_wmma_return_values = impl.ags.num_instructions == 10;
		if (has_wmma_return_values)
		{
			type_id = build_coopmat_type(impl, impl.ags.instructions[1].immediate, false);
			phase = 2;
		}
		break;

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixCopy:
		if (impl.ags.current_phase == 1)
		{
			if (impl.ags.num_instructions > 5)
			{
				// Apparently some shaders encode dummy inputs? Shift away useless stuff either way.
				memmove(impl.ags.instructions, impl.ags.instructions + (impl.ags.num_instructions - 5),
				        5 * sizeof(impl.ags.instructions[0]));
				memmove(impl.ags.backdoor_instructions, impl.ags.backdoor_instructions + (impl.ags.num_instructions - 5),
				        5 * sizeof(impl.ags.backdoor_instructions[0]));
				impl.ags.num_instructions = 5;
			}
		}
		has_wmma_return_values = impl.ags.num_instructions == 13;
		if (has_wmma_return_values)
		{
			uint32_t output_immediate = 0;
			if (!get_constant_operand(impl.ags.backdoor_instructions[4], 5, &output_immediate))
				return false;
			type_id = build_coopmat_type(impl, output_immediate, false);
			phase = 2;
		}
		break;

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixMulAcc:
		has_wmma_return_values = impl.ags.num_instructions == 21;
		if (has_wmma_return_values)
		{
			auto opcode = (impl.ags.instructions[12].immediate >> AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsShift) &
			              AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsMask;
			type_id = get_matmul_result_type(impl, opcode);
			phase = 2;
		}
		break;

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixFill:
		has_wmma_return_values = impl.ags.num_instructions == 9;
		if (has_wmma_return_values)
		{
			type_id = build_coopmat_type(impl, impl.ags.instructions[0].immediate, false);
			phase = 1;
		}
		break;

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementFill:
		 has_wmma_return_values = impl.ags.num_instructions == 13;
		 if (has_wmma_return_values)
		 {
			 type_id = build_coopmat_type(impl, impl.ags.instructions[4].immediate, false);
			 // FSR4 workaround for RDNA3.
			 analyze_dubious_implementation_defined_column_behavior(
			     impl, impl.ags.backdoor_instructions[4]->getOperand(6));
			 phase = 2;
		 }
		 break;

	default:
		break;
	}

	if (has_wmma_return_values)
		return emit_wmma_return_values(impl, type_id, 0, phase);
	else
		return true;
}

bool emit_magic_ags_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	push_ags_instruction(impl, instruction);

	// We might be able to retire an instruction now.
	// Only support exactly what we need to support. Anything else will fail compilation.
	if (impl.ags.num_instructions == 0)
		return true;

	switch (impl.ags.instructions[0].opcode)
	{
	case AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane:
	{
		// Don't bother with all the weird special cases.
		auto *op = impl.allocate(spv::OpGroupNonUniformBroadcastFirst, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(5)));

		impl.add(op);
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
		impl.ags.num_instructions = 0;
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_Readlane:
	{
		auto *op = impl.allocate(spv::OpGroupNonUniformBroadcast, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(5)));
		op->add_id(builder.makeUintConstant(impl.ags.instructions[0].immediate));

		impl.add(op);
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
		impl.ags.num_instructions = 0;
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_LaneId:
	{
		spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSubgroupLocalInvocationId);
		auto *load_op = impl.allocate(spv::OpLoad, instruction);
		load_op->add_id(var_id);
		impl.add(load_op);
		builder.addCapability(spv::CapabilityGroupNonUniform);
		impl.ags.num_instructions = 0;
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_DrawIndex:
	{
		spv::Id draw_index_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInDrawIndex);
		builder.addCapability(spv::CapabilityDrawParameters);

		auto *op = impl.allocate(spv::OpLoad, instruction);
		op->add_id(draw_index_id);
		impl.add(op);
		impl.ags.num_instructions = 0;
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_AtomicU64:
	{
		spv::Op atomic_op = spv::OpNop;
		if (impl.ags.num_instructions == 3)
		{
			switch (impl.ags.instructions[0].immediate)
			{
			case AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64:
				atomic_op = spv::OpAtomicUMax;
				break;
			case AmdExtD3DShaderIntrinsicsAtomicOp_MinU64:
				atomic_op = spv::OpAtomicUMin;
				break;
			case AmdExtD3DShaderIntrinsicsAtomicOp_AddU64:
				atomic_op = spv::OpAtomicIAdd;
				break;
			case AmdExtD3DShaderIntrinsicsAtomicOp_XorU64:
				atomic_op = spv::OpAtomicXor;
				break;
			case AmdExtD3DShaderIntrinsicsAtomicOp_OrU64:
				atomic_op = spv::OpAtomicOr;
				break;
			case AmdExtD3DShaderIntrinsicsAtomicOp_AndU64:
				atomic_op = spv::OpAtomicAnd;
				break;
			case AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64:
				atomic_op = spv::OpAtomicExchange;
				break;
			default:
				// CmpXchg isn't hard to support, just need to modify the impl a bit, but only care if we have to.
				LOGE("Unsupported AGS AtomicU64 variant: immediate %u.\n", impl.ags.instructions[0].immediate);
				return false;
			}

			bool ret = false;

			// Magic 64-bit image atomics.
			if ((impl.ags.active_uav_op == DXIL::Op::TextureStore ||
			     impl.ags.active_uav_op == DXIL::Op::BufferStore) && impl.ags.active_uav_ptr)
			{
				ret = emit_magic_ags_atomic_u64(impl, impl.ags.active_uav_ptr, atomic_op, instruction);
			}
			else
			{
				LOGE("Attempting to use AGS U64 atomic on unknown resource type.\n");
			}

			impl.ags.num_instructions = 0;
			return ret;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavLoad:
	{
		if (impl.ags.num_instructions == 10)
		{
			if (!emit_wmma_load(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavStore:
	{
		if (impl.ags.num_instructions == 6)
		{
			if (!emit_wmma_store(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixCopy:
	{
		if (impl.ags.current_phase == 1)
		{
			if (impl.ags.num_instructions > 5)
			{
				// Apparently some shaders encode dummy inputs? Shift away useless stuff either way.
				memmove(impl.ags.instructions, impl.ags.instructions + (impl.ags.num_instructions - 5),
				        5 * sizeof(impl.ags.instructions[0]));
				memmove(impl.ags.backdoor_instructions, impl.ags.backdoor_instructions + (impl.ags.num_instructions - 5),
				        5 * sizeof(impl.ags.backdoor_instructions[0]));
				impl.ags.num_instructions = 5;
			}
		}

		if (impl.ags.num_instructions == 13)
		{
			if (!emit_wmma_convert(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsLoad:
	{
		if (impl.ags.num_instructions == 10)
		{
			if (!emit_wmma_load(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsStore:
	{
		if (impl.ags.num_instructions == 6)
		{
			if (!emit_wmma_store(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixMulAcc:
	{
		if (impl.ags.num_instructions == 21)
		{
			if (!emit_wmma_muladd(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixFill:
	{
		if (impl.ags.num_instructions == 9)
		{
			if (!emit_wmma_fill(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementFill:
	{
		if (impl.ags.num_instructions == 13)
		{
			if (!emit_wmma_element_insert(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLength:
	{
		if (!emit_wmma_length(impl))
			return false;
		impl.ags.num_instructions = 0;
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementExtract:
	{
		if (impl.ags.num_instructions == 5)
		{
			if (!emit_wmma_element_extract(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_MatrixElementWiseArithmetic:
	{
		if (impl.ags.num_instructions == 17)
		{
			if (!emit_wmma_element_wise_arith(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	case AmdExtD3DShaderIntrinsicsOpcode_Float8Conversion:
	{
		if (impl.ags.num_instructions == 1)
		{
			if (!emit_float8_conversion(impl))
				return false;
			impl.ags.num_instructions = 0;
		}
		break;
	}

	default:
		LOGE("Unsupported AGS magic instruction 0x%x (immediate %u).\n",
		     impl.ags.instructions[0].opcode,
		     impl.ags.instructions[0].immediate);
		return false;
	}

	return true;
}

void push_ags_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	uint32_t op = 0;
	if (!get_constant_operand(instruction, 2, &op))
		return;

	if (!is_ags_magic(op))
		return;

	auto inst = decode_ags_instruction(op);

	switch (inst.opcode)
	{
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavLoad:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavStore:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixCopy:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsLoad:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsStore:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixMulAcc:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixFill:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementFill:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLength:
	case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementExtract:
	case AmdExtD3DShaderIntrinsicsOpcode_MatrixElementWiseArithmetic:
		impl.shader_analysis.require_subgroups = true;
		impl.shader_analysis.require_wmma = true;
		break;
	}

	auto &ags = impl.ags;

	// Reset if we're beginning a new instruction sequence.
	// New instruction type, reset.
	if (inst.phase < ags.current_phase ||
	    (ags.num_instructions && inst.opcode != ags.instructions[0].opcode))
	{
		ags.reset();
	}

	if ((inst.phase == ags.current_phase || (inst.phase == ags.current_phase + 1)) &&
	    inst.phase <= ags.num_instructions &&
	    ags.num_instructions < AgsInstruction::MaxInstructions)
	{
		ags.instructions[ags.num_instructions] = inst;
		ags.backdoor_instructions[ags.num_instructions] = instruction;
		ags.num_instructions++;
		ags.current_phase = inst.phase;
	}
}

// This is done early to keep track of which resources are supposed to be redirected to AGS
// when dealing with buffer stores, etc.
bool analyze_prepass_ags_dxil_atomic_op(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t resource_index)
{
	// Special magic.
	if (resource_index == impl.ags.uav_magic_resource_type_index &&
	    value_is_dx_op_instrinsic(instruction, DXIL::Op::AtomicCompareExchange))
	{
		push_ags_instruction(impl, instruction);
		return true;
	}

	return false;
}

// This is done later where we do a more complex analysis.
bool analyze_ags_dxil_cmpxchg_op(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_uav_resource_index_map.end())
	{
		// Special magic.
		if (itr->second == impl.ags.uav_magic_resource_type_index &&
		    value_is_dx_op_instrinsic(instruction, DXIL::Op::AtomicCompareExchange))
		{
			push_ags_instruction(impl, instruction);
			if (!analyze_magic_ags_instruction(impl))
				return false;
		}
	}

	return true;
}

bool analyze_ags_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, AccessTracking *tracking)
{
	if (impl.ags.num_instructions == 1)
	{
		if (instruction->getOperand(2) == impl.ags.backdoor_instructions[0])
		{
			switch (impl.ags.instructions[0].opcode)
			{
			case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavStore:
				tracking->has_written = true;
				break;

			case AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavLoad:
				tracking->has_read = true;
				break;

			default:
				return true;
			}

			// Be byte oriented.
			tracking->raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B8)][int(RawVecSize::V1)] = true;

			return true;
		}
	}

	return false;
}

void analyze_ags_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction,
                              AccessTracking *tracking, DXIL::Op opcode)
{
	// Detect 64-bit atomics.
	if (impl.ags.num_instructions == 2 && impl.ags.backdoor_instructions[0] == instruction->getOperand(2))
	{
		auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
		tracking->has_atomic = true;
		tracking->has_read = true;
		if (itr != impl.llvm_value_to_uav_resource_index_map.end())
		{
			impl.ags.active_uav_index = itr->second;
			impl.ags.active_uav_op = opcode;
		}

		// Mark 64-bit usage.
		if (opcode != DXIL::Op::TextureStore && opcode != DXIL::Op::TextureStoreSample)
			tracking->raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B64)][int(RawVecSize::V1)] = true;
		else
			tracking->has_atomic_64bit = true;
	}
}

bool emit_ags_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode)
{
	if (impl.ags.num_instructions == 1)
	{
		if (instruction->getOperand(2) == impl.ags.backdoor_instructions[0])
		{
			impl.ags.active_uav_ptr = impl.get_id_for_value(instruction->getOperand(1));
			impl.ags.active_uav_op = opcode;
			impl.ags.active_read_backdoor = instruction;
			return true;
		}
	}

	return false;
}

bool emit_ags_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id)
{
	// Deferred 64-bit atomic. Resolve in a later AGS atomic.
	if (impl.ags.num_instructions == 2 && impl.ags.backdoor_instructions[0] == instruction->getOperand(2))
	{
		impl.ags.active_uav_ptr = id;
		impl.ags.active_uav_op = DXIL::Op::BufferStore;
		return true;
	}

	return false;
}

bool emit_ags_texture_store(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id, bool multi_sampled)
{
	if (impl.ags.num_instructions == 2 &&
	    impl.ags.backdoor_instructions[0] == instruction->getOperand(2) && !multi_sampled)
	{
		impl.ags.active_uav_ptr = id;
		impl.ags.active_uav_op = DXIL::Op::TextureStore;
		return true;
	}

	return false;
}

bool emit_ags_resource_uav_handle(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t resource_range)
{
	if (resource_range == impl.ags.uav_magic_resource_type_index)
	{
		// Resources tied to constant uints are considered "magic".
		if (impl.ags.magic_ptr_id == 0)
		{
			spv::Id dummy_value = impl.spirv_module.allocate_id();
			impl.ags.magic_ptr_id = dummy_value;
		}

		impl.rewrite_value(instruction, impl.ags.magic_ptr_id);
		return true;
	}

	return false;
}

bool ags_llvm_load_filter(Converter::Impl &impl, Operation *op, const llvm::LoadInst *instruction)
{
	// Only load if it's the first element.
	auto component_itr = impl.ags.coopmat_component_mapping.find(instruction->getPointerOperand());
	if (component_itr != impl.ags.coopmat_component_mapping.end())
	{
		op->type_id = component_itr->second.type_id;
		impl.ags.coopmat_component_mapping[instruction] = component_itr->second;
		if (component_itr->second.component != 0)
			return true;
	}

	return false;
}

bool emit_ags_extract_value(Converter::Impl &impl, const llvm::ExtractValueInst *instruction)
{
	if (instruction->getAggregateOperand() == impl.ags.active_read_backdoor)
	{
		impl.ags.active_read_backdoor = instruction;
		return true;
	}

	return false;
}

bool ags_alloca_or_global_filter(Converter::Impl &impl, const llvm::Value *value, spv::Id &pointee_type_id)
{
	auto ags_itr = impl.ags.alloca_or_global_tracking.find(value);

	if (ags_itr != impl.ags.alloca_or_global_tracking.end() && ags_itr->second.override_element_type)
	{
		auto *element_type = value->getType()->getPointerElementType();
		uint32_t elems = element_type->getArrayNumElements();
		if (ags_itr->second.override_element_stride == 0)
		{
			LOGE("Element stride is currently unknown. Something must have been missed during analysis.\n");
			return false;
		}

		pointee_type_id = impl.builder().makeArrayType(
		    ags_itr->second.override_element_type,
		    impl.builder().makeUintConstant(elems / ags_itr->second.override_element_stride), 0);
	}

	return pointee_type_id != 0;
}

bool emit_ags_atomicrmw(Converter::Impl &impl, const llvm::AtomicRMWInst *instruction)
{
	if (instruction->getPointerOperand() == impl.ags.active_read_backdoor)
	{
		impl.ags.active_read_backdoor = instruction;
		return true;
	}

	return false;
}

bool emit_ags_getelementptr(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction)
{
	if (impl.ags.num_instructions == 1)
	{
		if (instruction->getOperand(2) == impl.ags.backdoor_instructions[0] &&
		    DXIL::AddressSpace(instruction->getOperand(0)->getType()->getPointerAddressSpace()) ==
		    DXIL::AddressSpace::GroupShared)
		{
			impl.ags.active_uav_ptr = impl.get_id_for_value(instruction->getOperand(0));
			// Dummy, signal LDS operation.
			impl.ags.active_uav_op = DXIL::Op::AtomicBinOp;
			impl.ags.active_read_backdoor = instruction;
			return true;
		}
	}

	return false;
}

void ags_getelementptr_filter(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction, spv::Id &type_id)
{
	if (const auto *value = get_underlying_gep_array(instruction))
	{
		auto override_itr = impl.ags.alloca_or_global_tracking.find(value);
		if (override_itr != impl.ags.alloca_or_global_tracking.end() && override_itr->second.override_element_type)
			type_id = override_itr->second.override_element_type;
	}
}

static spv::Id ags_build_cexpr_gep(Converter::Impl &impl, const llvm::Value *load_store_pointer, spv::Id &type_id,
                                   uint32_t &split_elem)
{
	if (auto *value = get_underlying_gep_array(llvm::dyn_cast<llvm::ConstantExpr>(load_store_pointer)))
	{
		auto itr = impl.ags.alloca_or_global_tracking.find(value);
		if (itr != impl.ags.alloca_or_global_tracking.end() && itr->second.override_element_type)
		{
			auto &builder = impl.builder();

			type_id = itr->second.override_element_type;
			auto *cexpr = llvm::cast<llvm::ConstantExpr>(load_store_pointer);

			auto split = split_index_scale_bias(cexpr->getOperand(2));
			if (!split.stride)
				return 0;
			if (split.stride != UINT32_MAX && split.stride != itr->second.override_element_stride)
				return 0;

			uint32_t const_index = 0;

			if (split.stride == UINT32_MAX && itr->second.override_element_stride && !split.index)
			{
				const_index = split.elem / itr->second.override_element_stride;
				split.elem -= const_index * itr->second.override_element_stride;
			}

			split_elem = split.elem;

			// If we're writing anything other than the first element, skip the actual write.
			if (split.elem != 0)
				return UINT32_MAX;

			spv::Id index_id = 0;

			if (split.index)
				index_id = impl.get_id_for_value(split.index);
			else
				index_id = impl.builder().makeUintConstant(const_index);

			auto *chain = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPrivate, type_id));
			chain->add_id(impl.get_id_for_value(cexpr->getOperand(0)));
			chain->add_id(index_id);
			impl.add(chain);
			return chain->id;
		}
	}

	return 0;
}

bool ags_llvm_load_filter_cexpr(Converter::Impl &impl, const llvm::LoadInst *instruction)
{
	if (!impl.shader_analysis.require_wmma)
		return false;

	uint32_t split_elem = 0;
	spv::Id type_id = 0;
	spv::Id ptr_id = ags_build_cexpr_gep(impl, instruction->getPointerOperand(), type_id, split_elem);

	if (ptr_id != 0)
		impl.ags.coopmat_component_mapping[instruction] = { type_id, split_elem };

	if (ptr_id != 0 && ptr_id != UINT32_MAX)
	{
		auto *load = impl.allocate(spv::OpLoad, instruction, type_id);
		load->add_id(ptr_id);
		impl.add(load);
	}

	return ptr_id != 0;
}

bool ags_store_filter(Converter::Impl &impl, const llvm::StoreInst *instruction)
{
	if (!impl.shader_analysis.require_wmma)
		return false;

	uint32_t split_elem = 0;
	spv::Id type_id = 0;
	spv::Id ptr_id = ags_build_cexpr_gep(impl, instruction->getOperand(1), type_id, split_elem);

	if (ptr_id != 0 && ptr_id != UINT32_MAX)
	{
		auto *store = impl.allocate(spv::OpStore);
		store->add_id(ptr_id);
		store->add_id(impl.get_id_for_value(instruction->getOperand(0)));
		impl.add(store);
	}

	return ptr_id != 0;
}

bool ags_filter_phi(Converter::Impl &impl, const llvm::PHINode &instruction, spv::Id &override_type)
{
	for (uint32_t i = 0; i < instruction.getNumIncomingValues() && override_type == 0; i++)
	{
		auto *incoming = instruction.getIncomingValue(i);
		auto itr = impl.ags.coopmat_component_mapping.find(incoming);
		if (itr != impl.ags.coopmat_component_mapping.end())
		{
			override_type = itr->second.type_id;
			impl.ags.coopmat_component_mapping[&instruction] = { override_type, itr->second.component };
			if (itr->second.component != 0)
			{
				// Dummy value, will not actually emit a PHI. Just need to forward the mapping.
				return true;
			}
		}
	}

	return false;
}
}


================================================
FILE: opcodes/dxil/dxil_ags.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#pragma once

#include <stdint.h>
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
// From https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK/blob/master/ags_lib/hlsl/ags_shader_intrinsics_dx12.hlsl

//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//

static constexpr uint32_t AgsUAVMagicRegisterSpace = 2147420894;

static constexpr uint32_t AmdExtD3DShaderIntrinsics_MagicCodeShift   = 28;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_MagicCodeMask    = 0xf;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_OpcodePhaseShift = 24;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_OpcodePhaseMask  = 0x3;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_DataShift        = 8;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_DataMask         = 0xffff;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_OpcodeShift      = 0;
static constexpr uint32_t AmdExtD3DShaderIntrinsics_OpcodeMask       = 0xff;

static constexpr uint32_t AmdExtD3DShaderIntrinsics_MagicCode        = 0x5;


/**
***********************************************************************************************************************
*   Intrinsic opcodes.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane          = 0x01;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Readlane               = 0x02;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_LaneId                 = 0x03;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Swizzle                = 0x04;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Ballot                 = 0x05;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_MBCnt                  = 0x06;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Min3U                  = 0x07;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Min3F                  = 0x08;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Med3U                  = 0x09;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Med3F                  = 0x0a;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Max3U                  = 0x0b;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Max3F                  = 0x0c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BaryCoord              = 0x0d;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_VtxParam               = 0x0e;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Reserved1              = 0x0f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Reserved2              = 0x10;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Reserved3              = 0x11;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveReduce             = 0x12;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveScan               = 0x13;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr           = 0x14;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_DrawIndex              = 0x17;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_AtomicU64              = 0x18;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize            = 0x19;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BaseInstance           = 0x1a;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BaseVertex             = 0x1b;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_FloatConversion        = 0x1c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt             = 0x1d;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_ShaderClock            = 0x1f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock    = 0x20;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Halt                   = 0x21;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_IntersectBvhNode            = 0x22;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BufferStoreByte             = 0x23;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BufferStoreShort            = 0x24;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_ShaderMarker                = 0x25;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_FloatOpWithRoundMode        = 0x26;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Reserved5                   = 0x27;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixMulAcc            = 0x28;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavLoad           = 0x29;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavStore          = 0x2a;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixGlobalLoad        = 0x2b;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixGlobalStore       = 0x2c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsLoad           = 0x2d;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsStore          = 0x2e;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementFill       = 0x2f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementExtract    = 0x30;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLength            = 0x31;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixCopy              = 0x32;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixFill              = 0x33;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_MatrixSparsityIndexLoad     = 0x34;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_MatrixElementWiseArithmetic = 0x35;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_Float8Conversion            = 0x36;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BuiltIn1                    = 0x37;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_BuiltInArg                  = 0x38;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcode_LastValidOpcode             = 0x38;
/**
***********************************************************************************************************************
*   Intrinsic opcode phases.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcodePhase_0   = 0x0;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcodePhase_1   = 0x1;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcodePhase_2   = 0x2;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsOpcodePhase_3   = 0x3;

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsWaveOp defines for supported operations. Can be used as the parameter for the
*   AmdExtD3DShaderIntrinsicsOpcode_WaveOp intrinsic.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_AddF = 0x01;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_AddI = 0x02;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_AddU = 0x03;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MulF = 0x04;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MulI = 0x05;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MulU = 0x06;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MinF = 0x07;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MinI = 0x08;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MinU = 0x09;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MaxF = 0x0a;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MaxI = 0x0b;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_MaxU = 0x0c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_And  = 0x0d;    // Reduction only
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_Or   = 0x0e;    // Reduction only
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_Xor  = 0x0f;    // Reduction only
/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsWaveOp masks and shifts for opcode and flags
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift = 0;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_OpcodeMask  = 0xff;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_FlagShift   = 8;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_FlagMask    = 0xff;

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsWaveOp flags for use with AmdExtD3DShaderIntrinsicsOpcode_WaveScan.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_Inclusive  = 0x01;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsWaveOp_Exclusive  = 0x02;

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsSwizzle defines for common swizzles.  Can be used as the operation parameter for the
*   AmdExtD3DShaderIntrinsics_Swizzle intrinsic.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_SwapX1     = 0x041f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_SwapX2     = 0x081f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_SwapX4     = 0x101f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_SwapX8     = 0x201f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_SwapX16    = 0x401f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_ReverseX2  = 0x041f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4  = 0x0c1f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_ReverseX8  = 0x1c1f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_ReverseX16 = 0x3c1f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_ReverseX32 = 0x7c1f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_BCastX2    = 0x003e;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_BCastX4    = 0x003c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_BCastX8    = 0x0038;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_BCastX16   = 0x0030;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsSwizzle_BCastX32   = 0x0020;

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsBarycentric defines for barycentric interpolation mode.  To be used with
*   AmdExtD3DShaderIntrinsicsOpcode_IjBarycentricCoords to specify the interpolation mode.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_LinearCenter   = 0x1;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_LinearCentroid = 0x2;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_LinearSample   = 0x3;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_PerspCenter    = 0x4;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_PerspCentroid  = 0x5;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_PerspSample    = 0x6;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel = 0x7;
/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsBarycentric defines for specifying vertex and parameter indices.  To be used as inputs to
*   the AmdExtD3DShaderIntrinsicsOpcode_VertexParameter function
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Vertex0       = 0x0;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Vertex1       = 0x1;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Vertex2       = 0x2;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param0        = 0x00;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param1        = 0x01;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param2        = 0x02;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param3        = 0x03;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param4        = 0x04;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param5        = 0x05;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param6        = 0x06;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param7        = 0x07;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param8        = 0x08;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param9        = 0x09;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param10       = 0x0a;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param11       = 0x0b;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param12       = 0x0c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param13       = 0x0d;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param14       = 0x0e;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param15       = 0x0f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param16       = 0x10;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param17       = 0x11;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param18       = 0x12;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param19       = 0x13;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param20       = 0x14;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param21       = 0x15;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param22       = 0x16;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param23       = 0x17;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param24       = 0x18;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param25       = 0x19;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param26       = 0x1a;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param27       = 0x1b;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param28       = 0x1c;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param29       = 0x1d;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param30       = 0x1e;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_Param31       = 0x1f;

static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ComponentX     = 0x0;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ComponentY     = 0x1;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ     = 0x2;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ComponentW     = 0x3;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ParamShift     = 0;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ParamMask      = 0x1f;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_VtxShift       = 0x5;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_VtxMask        = 0x3;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift = 0x7;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsBarycentric_ComponentMask  = 0x3;
/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsAtomic defines for supported operations. Can be used as the parameter for the
*   AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 intrinsic.
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_MinU64     = 0x01;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64     = 0x02;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_AndU64     = 0x03;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_OrU64      = 0x04;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_XorU64     = 0x05;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_AddU64     = 0x06;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64    = 0x07;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 = 0x08;

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions.
*   To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction
***********************************************************************************************************************
*/
static constexpr uint32_t AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near    = 0x01;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf  = 0x02;
static constexpr uint32_t AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf = 0x03;

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsFloat8CvtOp enumeration to specify the conversion operation
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsFloat8CvtOp
{
	AmdExtD3DShaderIntrinsicsFloat8CvtOp_FP8_2_F32 = 0x0,
	AmdExtD3DShaderIntrinsicsFloat8CvtOp_BF8_2_F32 = 0x1,
	AmdExtD3DShaderIntrinsicsFloat8CvtOp_F32_2_FP8 = 0x2,
	AmdExtD3DShaderIntrinsicsFloat8CvtOp_F32_2_BF8 = 0x3,
};

/**
***********************************************************************************************************************
* @brief
*    Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsFloat8Conversion
***********************************************************************************************************************
*/
constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_CvtOpShift = 0;
constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_CvtOpMask  = 0xff;
constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_SatShift   = 8;
constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_SatMask    = 0x1;
/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat enumeration for supported matrix element data format.
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsWaveMatrixOpDataFormat
{
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_I4   = 0x0,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_U4   = 0x1,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_I8   = 0x2,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_U8   = 0x3,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16  = 0x4,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_BF16 = 0x5,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32  = 0x6,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_I32  = 0x7,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_U32  = 0x8,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_BF8  = 0x9,
	AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8  = 0xa,
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsWaveMatrixType enumeration for supported wave matrix type.
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsWaveMatrixOpMatrixType
{
	AmdExtD3DShaderIntrinsicsWaveMatrixType_A            = 0x0,
	AmdExtD3DShaderIntrinsicsWaveMatrixType_B            = 0x1,
	AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator  = 0x2,
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsWaveMatrixMatrixShape enumeration for supported wave matrix shape.
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsWaveMatrixOpMatrixShape
{
	AmdExtD3DShaderIntrinsicsWaveMatrixShape_16X16 = 0x0,
	AmdExtD3DShaderIntrinsicsWaveMatrixShape_32X16 = 0x1,
	AmdExtD3DShaderIntrinsicsWaveMatrixShape_16X32 = 0x2,
	AmdExtD3DShaderIntrinsicsWaveMatrixShape_64X16 = 0x3,
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsWaveMatrixOpcode enumeration to specify the wmma inst opcode.
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsWaveMatrixOpcode
{
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_BF16_16X16X16_BF16     = 0x0,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F16_16X16X16_F16       = 0x1,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_BF16      = 0x2,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_BF8_BF8   = 0x3,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_BF8_FP8   = 0x4,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_F16       = 0x5,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_FP8_BF8   = 0x6,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_FP8_FP8   = 0x7,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_I4        = 0x8,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_U4        = 0x9,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_IU4       = 0xa,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_UI4       = 0xb,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_I8        = 0xc,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_U8        = 0xd,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_IU8       = 0xe,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_UI8       = 0xf,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_I4        = 0x10,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_U4        = 0x11,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_IU4       = 0x12,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_UI4       = 0x13,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_BF16_16X16X32_BF16    = 0x14,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F16_16X16X32_F16      = 0x15,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_BF16     = 0x16,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_BF8_BF8  = 0x17,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_BF8_FP8  = 0x18,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_F16      = 0x19,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_FP8_BF8  = 0x1a,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_FP8_FP8  = 0x1b,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_I4       = 0x1c,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_U4       = 0x1d,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_IU4      = 0x1e,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_UI4      = 0x1f,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_I8       = 0x20,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_U8       = 0x21,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_IU8      = 0x22,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_UI8      = 0x23,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_I4       = 0x24,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_U4       = 0x25,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_IU4      = 0x26,
	AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_UI4      = 0x27,
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsWaveMatrixRegType enumeration to specify the temp register.
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsWaveMatrixRegType
{
	AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg          = 0x0,
	AmdExtD3DShaderIntrinsicsWaveMatrixRegType_A_TempReg           = 0x1,
	AmdExtD3DShaderIntrinsicsWaveMatrixRegType_B_TempReg           = 0x2,
	AmdExtD3DShaderIntrinsicsWaveMatrixRegType_Accumulator_TempReg = 0x3,
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsMatrixElementWiseOp enumeration to specify the element-wise operation
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsMatrixElementWiseOp
{
	AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Add   = 0x1,
	AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Sub   = 0x2,
	AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Mul   = 0x3,
	AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Div   = 0x4,
	AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Times = 0x5,
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsSparsityIndexMem enumeration is used to specify where to read sparsity indexes.
***********************************************************************************************************************
*/
enum AmdExtD3DShaderIntrinsicsSparsityIndexMem
{
	AmdExtD3DShaderIntrinsicsSparsityIndexMem_UavBuffer    = 0x0,
	AmdExtD3DShaderIntrinsicsSparsityIndexMem_GroupShared  = 0x1,
	AmdExtD3DShaderIntrinsicsSparsityIndexMem_GlobalBuffer = 0x2,
};

/**
***********************************************************************************************************************
* @brief
*    Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsWaveMatrixOpcode
***********************************************************************************************************************
*/
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsShift  = 0;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsMask   = 0x7f;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_FlagShift = 15;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_FlagMask  = 0x1;

/**
***********************************************************************************************************************
* @brief
*    Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsWaveMatrixOpInOut
***********************************************************************************************************************
*/
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_ChannelShift        = 0;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_ChannelMask         = 0xf;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_SecondRegFlagShift  = 4;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_SecondRegFlagMask   = 0xf;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_MatRegTypeFlagShift = 8;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_MatRegTypeFlagMask  = 0xff;

/**
***********************************************************************************************************************
* @brief
*    Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsWaveMatrixModifier
***********************************************************************************************************************
*/
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagShift = 0;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagMask  = 0xf;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagShift = 4;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagMask  = 0x7;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagShift     = 7;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagMask      = 0x1;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_ShapeShift          = 8;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_ShapeMask           = 0x7;
// Following flags only apply to AmdExtD3DShaderIntrinsicsOpcode_WaveMatrix*Load
// and AmdExtD3DShaderIntrinsicsOpcode_WaveMatrix*Store
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTileShift     = 11;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTileMask      = 0x1;
// Following flags only apply to AmdExtD3DShaderIntrinsicsOpcode_MatrixSparsityIndexLoad
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_IndexMemTypeShift   = 14;
constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_IndexMemTypeMask    = 0x3;

static inline bool is_ags_magic(uint32_t v)
{
	return (v >> AmdExtD3DShaderIntrinsics_MagicCodeShift) == AmdExtD3DShaderIntrinsics_MagicCode;
}

static inline AgsInstruction decode_ags_instruction(uint32_t v)
{
	AgsInstruction inst = {};

	inst.opcode = (v >> AmdExtD3DShaderIntrinsics_OpcodeShift) & AmdExtD3DShaderIntrinsics_OpcodeMask;
	inst.phase = (v >> AmdExtD3DShaderIntrinsics_OpcodePhaseShift) & AmdExtD3DShaderIntrinsics_OpcodePhaseMask;
	inst.immediate = (v >> AmdExtD3DShaderIntrinsics_DataShift) & AmdExtD3DShaderIntrinsics_DataMask;

	return inst;
}

bool emit_magic_ags_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool analyze_magic_ags_instruction(Converter::Impl &impl);
bool analyze_ags_wmma_store(Converter::Impl &impl, const llvm::StoreInst *store);
spv::Id rewrite_alloca_gep_index(Converter::Impl &impl, const llvm::GetElementPtrInst *gep, spv::Id id);
bool wmma_store_is_masked(Converter::Impl &impl, const llvm::StoreInst *inst);

void push_ags_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool analyze_prepass_ags_dxil_atomic_op(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t resource_index);
bool analyze_ags_dxil_cmpxchg_op(Converter::Impl &impl, const llvm::CallInst *instruction);

struct AccessTracking;
bool analyze_ags_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, AccessTracking *tracking);
void analyze_ags_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, AccessTracking *tracking, DXIL::Op opcode);

bool emit_ags_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode);
bool emit_ags_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id);
bool emit_ags_texture_store(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id, bool multi_sampled);

bool emit_ags_resource_uav_handle(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t resource_range);

bool ags_llvm_load_filter(Converter::Impl &impl, Operation *op, const llvm::LoadInst *instruction);
bool ags_llvm_load_filter_cexpr(Converter::Impl &impl, const llvm::LoadInst *instruction);
bool emit_ags_extract_value(Converter::Impl &impl, const llvm::ExtractValueInst *instruction);
bool emit_ags_atomicrmw(Converter::Impl &impl, const llvm::AtomicRMWInst *instruction);
bool emit_ags_getelementptr(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction);
bool ags_store_filter(Converter::Impl &impl, const llvm::StoreInst *instruction);
bool ags_alloca_or_global_filter(Converter::Impl &impl, const llvm::Value *value, spv::Id &pointee_type_id);
void ags_getelementptr_filter(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction, spv::Id &type_id);

bool ags_filter_phi(Converter::Impl &impl, const llvm::PHINode &instruction, spv::Id &override_type);

struct AGSCoopMatMapping
{
	spv::Id type_id;
	uint32_t component;
};

struct AllocaAGSForwardingTracking
{
	// For AGS WMMA rewrites.
	spv::Id override_element_type = 0;
	uint32_t override_element_stride = 0;
};

struct AGSState
{
	// For magic resource types, assume there is one globally declared dummy resource.
	// Don't conflict with sentinel index for SM 6.6 heap.
	uint32_t uav_magic_resource_type_index = UINT32_MAX - 1;
	spv::Id magic_ptr_id = 0;
	uint32_t active_uav_index = 0;
	spv::Id active_uav_ptr = 0;
	DXIL::Op active_uav_op;
	AgsInstruction instructions[AgsInstruction::MaxInstructions];
	const llvm::CallInst *backdoor_instructions[AgsInstruction::MaxInstructions];
	unsigned current_phase = 0;
	unsigned num_instructions = 0;
	spv::Id debug_var_id = 0;
	const llvm::Value *active_read_backdoor = nullptr;
	UnorderedMap<const llvm::Value *, AGSCoopMatMapping> coopmat_component_mapping;
	UnorderedSet<const llvm::Value *> column_oriented_allocas_or_globals;
	UnorderedMap<const llvm::Value *, AllocaAGSForwardingTracking> alloca_or_global_tracking;
	spv::Id u8_array_bda_type = 0;
	spv::Id coopmat_transpose_scratch = 0;

	void reset()
	{
		current_phase = 0;
		num_instructions = 0;
		active_read_backdoor = nullptr;
	}

	void reset_analysis()
	{
		coopmat_component_mapping.clear();
	}
};
}

================================================
FILE: opcodes/dxil/dxil_arithmetic.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_arithmetic.hpp"
#include "dxil_resources.hpp"
#include "dxil_common.hpp"
#include "opcodes/converter_impl.hpp"
#include "opcodes/opcodes_llvm_builtins.hpp"
#include <limits>

namespace dxil_spv
{
bool emit_imad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// FIXME: Do we need to deal with intermediate mul overflow here somehow?

	Operation *mul = impl.allocate(spv::OpIMul, impl.get_type_id(instruction->getType()));
	mul->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	mul->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	impl.add(mul);

	Operation *add = impl.allocate(spv::OpIAdd, instruction);
	add->add_id(mul->id);
	add->add_id(impl.get_id_for_value(instruction->getOperand(3)));
	impl.add(add);
	return true;
}

bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id result_id;

	bool is_precise = instruction->hasMetadata("dx.precise") || impl.options.force_precise;

	if (!impl.options.quirks.precise_fma && is_precise)
	{
		// DXIL docs says to split the expression explicitly.
		// HLSL docs says it just has to be invariant.
		// These conflict since we could have used NoContract FMA instead,
		// but at least Big Navi splits fmac into mul + add when doing nocontract fma(), so ...
		spv::Id type_id = impl.get_type_id(instruction->getType());
		Operation *mul_op = impl.allocate(spv::OpFMul, type_id);
		mul_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		mul_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
		impl.add(mul_op);
		builder.addDecoration(mul_op->id, spv::DecorationNoContraction);

		impl.decorate_relaxed_precision(instruction->getType(), mul_op->id, false);

		Operation *add_op = impl.allocate(spv::OpFAdd, instruction);
		add_op->add_id(mul_op->id);
		add_op->add_id(impl.get_id_for_value(instruction->getOperand(3)));
		impl.add(add_op);
		builder.addDecoration(add_op->id, spv::DecorationNoContraction);

		result_id = add_op->id;
	}
	else
	{
		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		Operation *op = impl.allocate(spv::OpExtInst, instruction);
		op->add_id(impl.glsl_std450_ext);
		op->add_literal(GLSLstd450Fma);
		for (unsigned i = 1; i < 4; i++)
			op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
		impl.add(op);

		if (is_precise)
			builder.addDecoration(op->id, spv::DecorationNoContraction);

		result_id = op->id;
	}

	impl.decorate_relaxed_precision(instruction->getType(), result_id, false);
	return true;
}

bool emit_isfinite_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	// There is an OpIsFinite instruction, but it's only supported in kernel mode, so we have to decompose here.

	Operation *nan_op = impl.allocate(spv::OpIsNan, builder.makeBoolType());
	Operation *inf_op = impl.allocate(spv::OpIsInf, builder.makeBoolType());
	nan_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	inf_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

	impl.add(nan_op);
	impl.add(inf_op);

	Operation *non_finite_op = impl.allocate(spv::OpLogicalOr, builder.makeBoolType());
	non_finite_op->add_ids({ nan_op->id, inf_op->id });
	impl.add(non_finite_op);

	Operation *op = impl.allocate(spv::OpLogicalNot, instruction);
	op->add_id(non_finite_op->id);
	impl.add(op);
	return true;
}

spv::Id emit_native_bitscan(GLSLstd450 opcode, Converter::Impl &impl,
                            const llvm::Instruction *instruction, const llvm::Value *value)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	Operation *op;

	// Vulkan currently does not allow 16/64-bit for these ... :(
	if (value->getType()->getIntegerBitWidth() == 16)
	{
		auto *extend = impl.allocate(
		    opcode == GLSLstd450FindSMsb ? spv::OpSConvert : spv::OpUConvert, builder.makeUintType(32));

		extend->add_id(impl.get_id_for_value(value));
		impl.add(extend);

		if (instruction)
			op = impl.allocate(spv::OpExtInst, instruction);
		else
			op = impl.allocate(spv::OpExtInst, builder.makeUintType(32));

		op->add_id(impl.glsl_std450_ext);
		op->add_literal(opcode);
		op->add_id(extend->id);
	}
	else if (value->getType()->getIntegerBitWidth() == 64)
	{
		spv::Id uint_type = builder.makeUintType(32);
		spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);

		auto *bitcast = impl.allocate(spv::OpBitcast, uvec2_type);
		bitcast->add_id(impl.get_id_for_value(value));
		impl.add(bitcast);

		if (opcode == GLSLstd450FindSMsb)
		{
			auto *ext = impl.allocate(spv::OpCompositeExtract, uint_type);
			ext->add_id(bitcast->id);
			ext->add_literal(1);
			impl.add(ext);

			spv::Id int_type = builder.makeIntType(32);
			auto *shifted = impl.allocate(spv::OpShiftRightArithmetic, int_type);
			shifted->add_id(ext->id);
			shifted->add_id(builder.makeIntConstant(31));
			impl.add(shifted);

			const spv::Id elems[] = { shifted->id, shifted->id };

			auto *xored = impl.allocate(spv::OpBitwiseXor, uvec2_type);
			xored->add_id(bitcast->id);
			xored->add_id(impl.build_vector(int_type, elems, 2));
			impl.add(xored);

			bitcast = xored;
			opcode = GLSLstd450FindUMsb;
		}

		auto *ilsb = impl.allocate(spv::OpExtInst, uvec2_type);
		ilsb->add_id(impl.glsl_std450_ext);
		ilsb->add_literal(opcode);
		ilsb->add_id(bitcast->id);
		impl.add(ilsb);

		spv::Id scalars[2];
		for (int i = 0; i < 2; i++)
		{
			auto *ext = impl.allocate(spv::OpCompositeExtract, uint_type);
			ext->add_id(ilsb->id);
			ext->add_literal(i);
			impl.add(ext);
			scalars[i] = ext->id;
		}

		auto *or32 = impl.allocate(spv::OpBitwiseOr, uint_type);
		or32->add_id(scalars[1]);
		or32->add_id(builder.makeUintConstant(32));
		impl.add(or32);
		scalars[1] = or32->id;

		auto merge_op = opcode == GLSLstd450FindILsb ? GLSLstd450UMin : GLSLstd450SMax;

		if (instruction)
			op = impl.allocate(spv::OpExtInst, instruction);
		else
			op = impl.allocate(spv::OpExtInst, uint_type);

		op->add_id(impl.glsl_std450_ext);
		op->add_literal(merge_op);
		op->add_id(scalars[0]);
		op->add_id(scalars[1]);
	}
	else
	{
		if (instruction)
			op = impl.allocate(spv::OpExtInst, instruction);
		else
			op = impl.allocate(spv::OpExtInst, builder.makeUintType(32));

		op->add_id(impl.glsl_std450_ext);
		op->add_literal(opcode);
		op->add_id(impl.get_id_for_value(value));
	}

	impl.add(op);
	return op->id;
}

bool emit_find_low_bit_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	emit_native_bitscan(GLSLstd450FindILsb, impl, instruction, instruction->getOperand(1));
	return true;
}

bool emit_find_high_bit_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	// This is actually CLZ, and not FindMSB.
	auto *value = instruction->getOperand(1);
	spv::Id msb_id = emit_native_bitscan(opcode, impl, nullptr, value);

	Operation *eq_neg1_op = impl.allocate(spv::OpIEqual, builder.makeBoolType());
	{
		eq_neg1_op->add_ids({ msb_id, builder.makeUintConstant(~0u) });
		impl.add(eq_neg1_op);
	}

	Operation *msb_sub_op = impl.allocate(spv::OpISub, impl.get_type_id(instruction->getType()));
	{
		msb_sub_op->add_ids({
		    builder.makeUintConstant(value->getType()->getIntegerBitWidth() - 1),
		    msb_id
		});
		impl.add(msb_sub_op);
	}

	Operation *op = impl.allocate(spv::OpSelect, instruction);
	op->add_ids({ eq_neg1_op->id, builder.makeUintConstant(~0u), msb_sub_op->id });
	impl.add(op);
	return true;
}

static bool emit_dxil_peephole_iabs(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (opcode != GLSLstd450SMax)
		return false;

	const auto *binop0 = llvm::dyn_cast<llvm::BinaryOperator>(instruction->getOperand(1));
	const auto *binop1 = llvm::dyn_cast<llvm::BinaryOperator>(instruction->getOperand(2));

	bool peephole0 = binop0 && can_optimize_to_snegate(binop0) && binop0->getOperand(1) == instruction->getOperand(2);
	bool peephole1 = binop1 && can_optimize_to_snegate(binop1) && binop1->getOperand(1) == instruction->getOperand(1);

	if (peephole0 || peephole1)
	{
		Operation *op = impl.allocate(spv::OpExtInst, instruction);
		op->add_id(impl.glsl_std450_ext);
		op->add_literal(GLSLstd450SAbs);

		if (peephole0)
			op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
		else
			op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

		impl.add(op);
		return true;
	}

	return false;
}

bool emit_dxil_std450_binary_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	if (emit_dxil_peephole_iabs(opcode, impl, instruction))
		return true;

	Operation *op = impl.allocate(spv::OpExtInst, instruction);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(opcode);
	for (unsigned i = 1; i < 3; i++)
		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));

	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return true;
}

bool emit_dxil_wide_arith_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction,
                                      bool spirv_semantics)
{
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);

	if ((opcode == spv::OpSMulExtended || opcode == spv::OpUMulExtended) && !spirv_semantics)
	{
		// Demote to plain multiply.
		auto composite_itr = impl.llvm_composite_meta.find(instruction);
		if (composite_itr != impl.llvm_composite_meta.end() && composite_itr->second.access_mask == 0x2)
		{
			Operation *mul_op = impl.allocate(spv::OpIMul, instruction, u32_type);
			mul_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
			mul_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
			impl.add(mul_op);
			composite_itr->second.components = 1;
			composite_itr->second.forced_composite = false;
			return true;
		}
	}

	spv::Id type_id;

	if (spirv_semantics)
	{
		type_id = impl.get_type_id(llvm::cast<llvm::VectorType>(instruction->getType())->getElementType());
		type_id = impl.get_struct_type({ type_id, type_id }, 0, "WideArithResult");
	}
	else
	{
		type_id = impl.get_type_id(instruction->getType());
	}

	Operation *mul_op = impl.allocate(opcode, type_id);
	mul_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	mul_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	impl.add(mul_op);

	spv::Id extracted_values[2];
	for (uint32_t i = 0; i < 2; i++)
	{
		auto *ext_op = impl.allocate(spv::OpCompositeExtract, u32_type);
		ext_op->add_id(mul_op->id);
		ext_op->add_literal(i);
		impl.add(ext_op);

		if (spirv_semantics)
			extracted_values[i] = ext_op->id;
		else
			extracted_values[1 - i] = ext_op->id;
	}

	spv::Id result = impl.build_vector(u32_type, extracted_values, 2);
	impl.rewrite_value(instruction, result);

	return true;
}

bool emit_dxbc_udiv_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id id0 = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id id1 = impl.get_id_for_value(instruction->getOperand(2));

	auto *div_op = impl.allocate(spv::OpUDiv, builder.makeUintType(32));
	div_op->add_id(id0);
	div_op->add_id(id1);
	impl.add(div_op);

	auto *mod_op = impl.allocate(spv::OpUMod, builder.makeUintType(32));
	mod_op->add_id(id0);
	mod_op->add_id(id1);
	impl.add(mod_op);

	auto *is_zero_divider = impl.allocate(spv::OpIEqual, builder.makeBoolType());
	is_zero_divider->add_id(id1);
	is_zero_divider->add_id(builder.makeUintConstant(0));
	impl.add(is_zero_divider);

	auto *quot_select = impl.allocate(spv::OpSelect, builder.makeUintType(32));
	quot_select->add_id(is_zero_divider->id);
	quot_select->add_id(builder.makeUintConstant(UINT32_MAX));
	quot_select->add_id(div_op->id);
	impl.add(quot_select);

	auto *rem_select = impl.allocate(spv::OpSelect, builder.makeUintType(32));
	rem_select->add_id(is_zero_divider->id);
	rem_select->add_id(builder.makeUintConstant(UINT32_MAX));
	rem_select->add_id(mod_op->id);
	impl.add(rem_select);

	spv::Id elems[2] = { quot_select->id, rem_select->id };
	impl.rewrite_value(instruction, impl.build_vector(builder.makeUintType(32), elems, 2));
	return true;
}

bool emit_dxil_std450_trinary_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	Operation *op = impl.allocate(spv::OpExtInst, instruction);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(opcode);

	for (unsigned i = 1; i < 4; i++)
		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));

	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return true;
}

static bool value_is_length_squared(const llvm::Value *value)
{
	int components = 0;
	if (value_is_dx_op_instrinsic(value, DXIL::Op::Dot2))
		components = 2;
	else if (value_is_dx_op_instrinsic(value, DXIL::Op::Dot3))
		components = 3;
	else if (value_is_dx_op_instrinsic(value, DXIL::Op::Dot4))
		components = 4;
	else
		return false;

	auto *dot = llvm::cast<llvm::CallInst>(value);
	for (int i = 0; i < components; i++)
		if (dot->getOperand(1 + i) != dot->getOperand(i + 1 + components))
			return false;

	return true;
}

static bool is_canonically_normalizing_value(const llvm::Value *value)
{
	return value_is_dx_op_instrinsic(value, DXIL::Op::Rsqrt) &&
	       value_is_length_squared(llvm::cast<llvm::CallInst>(value)->getOperand(1));
}

static std::pair<const llvm::Value *, double> split_constant_multipliers(const llvm::Value *value)
{
	if (const auto *fp = llvm::dyn_cast<llvm::ConstantFP>(value))
	{
		return { nullptr, fp->getValueAPF().convertToDouble() };
	}
	else if (const auto *bin_op = llvm::dyn_cast<llvm::BinaryOperator>(value))
	{
		if (bin_op->getOpcode() != llvm::BinaryOperator::BinaryOps::FMul)
			return { value, 1.0 };

		auto split0 = split_constant_multipliers(bin_op->getOperand(0));
		auto split1 = split_constant_multipliers(bin_op->getOperand(1));

		if (split0.first && split1.first)
			return { value, 1.0 };
		else if (split0.first)
			return { split0.first, split0.second * split1.second };
		else if (split1.first)
			return { split1.first, split0.second * split1.second };
		else
			return { nullptr, split0.second * split1.second };
	}

	return { value, 1.0 };
}

static bool get_expression_upper_bound(const llvm::Value *value, double &boundary_value)
{
	auto overall_split = split_constant_multipliers(value);
	value = overall_split.first;
	if (!overall_split.first)
	{
		boundary_value = overall_split.second;
		return true;
	}

	// Simple analysis to find what we want to see.
	if (auto *bin_op = llvm::dyn_cast<llvm::BinaryOperator>(value))
	{
		if (bin_op->getOpcode() != llvm::BinaryOperator::BinaryOps::FMul)
			return false;

		const llvm::Value *op0 = bin_op->getOperand(0);
		const llvm::Value *op1 = bin_op->getOperand(1);

		auto split0 = split_constant_multipliers(op0);
		auto split1 = split_constant_multipliers(op1);

		// Don't want to deal with any negative numbers here since the definition of max and min flip.
		if (split0.second < 0.0 || split1.second < 0.0)
			return false;

		if (split0.first)
			op0 = split0.first;
		if (split1.first)
			op1 = split1.first;

		bool is_normalizing0 = is_canonically_normalizing_value(split0.first);
		bool is_normalizing1 = is_canonically_normalizing_value(split1.first);

		if (is_normalizing0 || is_normalizing1)
		{
			const llvm::Value *mul_value = is_normalizing0 ? op1 : op0;
			auto *rsqrt = llvm::cast<llvm::CallInst>(is_normalizing0 ? op0 : op1);
			auto *dot = llvm::cast<llvm::CallInst>(rsqrt->getOperand(1));
			for (unsigned i = 1; i < dot->getNumOperands(); i++)
			{
				if (dot->getOperand(i) == mul_value)
				{
					boundary_value = overall_split.second * split0.second * split1.second;
					return true;
				}
			}
		}
	}

	return false;
}

static spv::Id build_clamped_non_negative_sqrt(Converter::Impl &impl, spv::Id id, const llvm::Value *value)
{
	// Try to detect a pattern that can manifest in some shaders in the wild where 1 ULP causes
	// A tiny negative value to occur, leading to NaN. This is a workaround, obviously.
	auto *bin_op = llvm::dyn_cast<llvm::BinaryOperator>(value);
	if (!bin_op || bin_op->getOpcode() != llvm::BinaryOperator::BinaryOps::FSub || !bin_op->isFast())
		return id;

	auto *const0 = llvm::dyn_cast<llvm::ConstantFP>(bin_op->getOperand(0));
	if (!const0)
		return id;

	double bound = 0.0;
	if (!get_expression_upper_bound(bin_op->getOperand(1), bound) ||
	    const0->getValueAPF().convertToDouble() != bound || bound == 0.0)
		return id;

	auto *lower = impl.allocate(spv::OpExtInst, impl.get_type_id(value->getType()));
	lower->add_id(impl.glsl_std450_ext);
	lower->add_literal(GLSLstd450FMax);
	lower->add_id(id);
	lower->add_id(impl.builder().makeNullConstant(lower->type_id));
	impl.add(lower);
	return lower->id;
}

bool emit_dxil_std450_unary_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	// Games rely on this peephole, since DXC doesn't do it for us.
	if (opcode == GLSLstd450Exp2 || opcode == GLSLstd450Log2)
	{
		bool precise = instruction->hasMetadata("dx.precise") || impl.options.force_precise;
		if (!precise)
		{
			if ((opcode == GLSLstd450Exp2 && value_is_dx_op_instrinsic(instruction->getOperand(1), DXIL::Op::Log)) ||
			    (value_is_dx_op_instrinsic(instruction->getOperand(1), DXIL::Op::Exp)))
			{
				auto *base_value = llvm::cast<llvm::CallInst>(instruction->getOperand(1))->getOperand(1);
				impl.rewrite_value(instruction, impl.get_id_for_value(base_value));
				return true;
			}
		}
	}

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	Operation *op = impl.allocate(spv::OpExtInst, instruction);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(opcode);

	spv::Id id = impl.get_id_for_value(instruction->getOperand(1));
	if (opcode == GLSLstd450Sqrt)
		id = build_clamped_non_negative_sqrt(impl, id, instruction->getOperand(1));

	op->add_id(id);

	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);

	if (opcode == GLSLstd450InverseSqrt && impl.options.quirks.fixup_rsqrt &&
	    instruction->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID)
	{
		// Only consider normal FP32 floats for simplicity since this is just a workaround.
		auto *clamp = impl.allocate(spv::OpExtInst, impl.get_type_id(instruction->getType()));
		clamp->add_id(impl.glsl_std450_ext);
		clamp->add_literal(GLSLstd450NMin);
		clamp->add_id(op->id);
		clamp->add_id(impl.builder().makeFloatConstant(std::numeric_limits<float>::max()));
		impl.add(clamp);
		impl.rewrite_value(instruction, clamp->id);
	}

	return true;
}

bool emit_dxil_unary_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	Operation *op = impl.allocate(opcode, instruction);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return true;
}

bool emit_saturate_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	spv::Id constant_0, constant_1;

	switch (instruction->getType()->getTypeID())
	{
	case llvm::Type::TypeID::HalfTyID:
		if (impl.support_native_fp16_operations())
		{
			constant_0 = builder.makeFloat16Constant(0);
			constant_1 = builder.makeFloat16Constant(0x3c00);
		}
		else
		{
			constant_0 = builder.makeFloatConstant(0.0f);
			constant_1 = builder.makeFloatConstant(1.0f);
		}
		break;

	case llvm::Type::TypeID::FloatTyID:
		constant_0 = builder.makeFloatConstant(0.0f);
		constant_1 = builder.makeFloatConstant(1.0f);
		break;

	case llvm::Type::TypeID::DoubleTyID:
		constant_0 = builder.makeDoubleConstant(0.0);
		constant_1 = builder.makeDoubleConstant(1.0);
		break;

	default:
		return false;
	}

	Operation *op = impl.allocate(spv::OpExtInst, instruction);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(GLSLstd450NClamp);
	op->add_ids({ impl.get_id_for_value(instruction->getOperand(1)),
	              constant_0, constant_1 });

	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return true;
}

bool emit_dot_instruction(unsigned dimensions, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	Operation *op = impl.allocate(spv::OpDot, instruction);

	spv::Id vec0_args[4] = {};
	spv::Id vec1_args[4] = {};
	for (unsigned i = 0; i < dimensions; i++)
		vec0_args[i] = impl.get_id_for_value(instruction->getOperand(1 + i));
	for (unsigned i = 0; i < dimensions; i++)
		vec1_args[i] = impl.get_id_for_value(instruction->getOperand(1 + i + dimensions));

	spv::Id vec0 = impl.build_vector(op->type_id, vec0_args, dimensions);
	spv::Id vec1 = impl.build_vector(op->type_id, vec1_args, dimensions);

	op->add_ids({ vec0, vec1 });
	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);

	bool precise = instruction->hasMetadata("dx.precise") || impl.options.force_precise;
	if (precise)
		impl.builder().addDecoration(op->id, spv::DecorationNoContraction);

	return true;
}

static spv::Id clamp_bitfield_width(Converter::Impl &impl, spv::Id offset, spv::Id width)
{
	auto &builder = impl.builder();
	// D3D has well-defined behavior when width + offset overflows in bitfield instructions.
	// To get similar behavior, we just need to clamp width.
	auto *max_width_op = impl.allocate(spv::OpISub, builder.makeUintType(32));
	max_width_op->add_id(builder.makeUintConstant(32));
	max_width_op->add_id(offset);
	impl.add(max_width_op);

	auto *clamp_op = impl.allocate(spv::OpExtInst, builder.makeUintType(32));
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");
	clamp_op->add_id(impl.glsl_std450_ext);
	clamp_op->add_literal(GLSLstd450UMin);
	clamp_op->add_id(width);
	clamp_op->add_id(max_width_op->id);
	impl.add(clamp_op);
	return clamp_op->id;
}

static spv::Id mask_input(Converter::Impl &impl, const llvm::Value *value)
{
	Operation *op = impl.allocate(spv::OpBitwiseAnd, impl.get_type_id(value->getType()));
	op->add_id(impl.get_id_for_value(value));
	op->add_id(impl.builder().makeUintConstant(31));
	impl.add(op);
	return op->id;
}

bool emit_bfe_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics)
{
	if (spirv_semantics)
	{
		Operation *op = impl.allocate(opcode, instruction);
		for (unsigned i = 0; i < 3; i++)
			op->add_id(impl.get_id_for_value(instruction->getOperand(i + 1)));
		impl.add(op);
	}
	else
	{
		// SPIR-V spec doesn't say anything about masking inputs, but Ibfe/Ubfe do, so ...
		spv::Id masked_width_id = mask_input(impl, instruction->getOperand(1));
		spv::Id masked_offset_id = mask_input(impl, instruction->getOperand(2));
		masked_width_id = clamp_bitfield_width(impl, masked_offset_id, masked_width_id);

		Operation *op = impl.allocate(opcode, instruction);
		op->add_ids({ impl.get_id_for_value(instruction->getOperand(3)), masked_offset_id, masked_width_id });
		impl.add(op);
	}
	return true;
}

bool emit_bfi_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics)
{
	if (spirv_semantics)
	{
		Operation *op = impl.allocate(spv::OpBitFieldInsert, instruction);
		for (unsigned i = 0; i < 4; i++)
			op->add_id(impl.get_id_for_value(instruction->getOperand(i + 1)));
		impl.add(op);
	}
	else
	{
		spv::Id masked_width_id = mask_input(impl, instruction->getOperand(1));
		spv::Id masked_offset_id = mask_input(impl, instruction->getOperand(2));
		masked_width_id = clamp_bitfield_width(impl, masked_offset_id, masked_width_id);

		spv::Id src_id = impl.get_id_for_value(instruction->getOperand(3));
		spv::Id dst_id = impl.get_id_for_value(instruction->getOperand(4));

		Operation *op = impl.allocate(spv::OpBitFieldInsert, instruction);
		op->add_ids({ dst_id, src_id, masked_offset_id, masked_width_id });
		impl.add(op);
	}

	return true;
}

bool emit_make_double_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	Operation *op = impl.allocate(spv::OpExtInst, instruction);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(GLSLstd450PackDouble2x32);

	spv::Id inputs[2];
	for (unsigned i = 0; i < 2; i++)
		inputs[i] = impl.get_id_for_value(instruction->getOperand(1 + i));
	op->add_id(impl.build_vector(builder.makeUintType(32), inputs, 2));

	impl.add(op);
	return true;
}

bool emit_split_double_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	Operation *op = impl.allocate(spv::OpExtInst, instruction, builder.makeVectorType(builder.makeUintType(32), 2));
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(GLSLstd450UnpackDouble2x32);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(op);
	return true;
}

bool emit_legacy_f16_to_f32_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	Operation *unpack_op = impl.allocate(spv::OpExtInst, builder.makeVectorType(builder.makeFloatType(32), 2));
	unpack_op->add_id(impl.glsl_std450_ext);
	unpack_op->add_literal(GLSLstd450UnpackHalf2x16);
	unpack_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(unpack_op);

	// By construction, these are relaxed precision, but spams lots of unrelated shader changes,
	// and doesn't make too much sense to add ...
	//builder.addDecoration(unpack_op->id, spv::DecorationRelaxedPrecision);

	Operation *op = impl.allocate(spv::OpCompositeExtract, instruction);
	op->add_id(unpack_op->id);
	op->add_literal(0);
	impl.add(op);

	// By construction, these are relaxed precision, but spams lots of unrelated shader changes,
	// and doesn't make too much sense to add ...
	//builder.addDecoration(op->id, spv::DecorationRelaxedPrecision);

	return true;
}

bool emit_legacy_f32_to_f16_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (impl.support_native_fp16_operations() && emit_bypass_fp16_trunc(impl, instruction))
		return true;

	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	// According to D3D11 functional spec (and implementations), this is required to be RTZ.
	// We have no obvious way of getting this behavior on a per-instruction basis,
	// and it is too expensive/complicated to implement this behavior exactly.
	// In practice, we have observed cases where this matters in HZD, but we don't really need RTZ, we just
	// need invariance to be preserved for an FP16 write and the following unpack of that expression.
	// QuantizeToFP16 ensures a baseline which works for the time being on the affected implementations.
	// - Polaris / Vega: RTZ rounding can be optimized to RTE rounding with just plain packHalf, which breaks invariance.
	// - NV: packHalf / unpackHalf pairs are optimized away, leaving full FP32 precision.
	// Ideally we'd have a PackHalf variant which takes rounding mode / denorm mode to be correct, but alas ...
	// Only do this hack when heuristics deduce it to be necessary.
	spv::Id input_id = impl.get_id_for_value(instruction->getOperand(1));

	if (impl.shader_analysis.precise_f16_to_f32_observed)
	{
		auto *quant_op = impl.allocate(spv::OpQuantizeToF16, builder.makeFloatType(32));
		quant_op->add_id(input_id);
		impl.add(quant_op);
		input_id = quant_op->id;
	}

	Operation *op = impl.allocate(spv::OpExtInst, instruction);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(GLSLstd450PackHalf2x16);

	spv::Id inputs[2] = { input_id, builder.makeFloatConstant(0.0f) };
	op->add_id(impl.build_vector(builder.makeFloatType(32), inputs, 2));
	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return true;
}

bool emit_bitcast_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *op = impl.allocate(spv::OpBitcast, instruction);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(op);
	return true;
}

static spv::Id build_bfe(Converter::Impl &impl, spv::Id value_id, unsigned offset, unsigned bits, bool sign_extend)
{
	auto &builder = impl.builder();
	auto *op = impl.allocate(sign_extend ? spv::OpBitFieldSExtract : spv::OpBitFieldUExtract, builder.makeUintType(32));
	op->add_id(value_id);
	op->add_id(builder.makeUintConstant(offset));
	op->add_id(builder.makeUintConstant(bits));
	impl.add(op);
	return op->id;
}

bool emit_i8_dot_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool sign_extend)
{
	auto &builder = impl.builder();
	spv::Id acc = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id a = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id b = impl.get_id_for_value(instruction->getOperand(3));

	if (impl.options.shader_i8_dot_enabled)
	{
		builder.addExtension("SPV_KHR_integer_dot_product");
		builder.addCapability(spv::CapabilityDotProductKHR);
		builder.addCapability(spv::CapabilityDotProductInput4x8BitPackedKHR);

		// Not supposed to saturate.
		auto *dot_op = impl.allocate(sign_extend ? spv::OpSDotKHR : spv::OpUDotKHR, builder.makeUintType(32));
		dot_op->add_id(a);
		dot_op->add_id(b);
		dot_op->add_literal(spv::PackedVectorFormatPackedVectorFormat4x8BitKHR);
		impl.add(dot_op);

		auto *acc_op = impl.allocate(spv::OpIAdd, instruction);
		acc_op->add_id(acc);
		acc_op->add_id(dot_op->id);
		impl.add(acc_op);
	}
	else
	{
		for (unsigned i = 0; i < 4; i++)
		{
			spv::Id a_component = build_bfe(impl, a, 8 * i, 8, sign_extend);
			spv::Id b_component = build_bfe(impl, b, 8 * i, 8, sign_extend);
			auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			mul_op->add_id(a_component);
			mul_op->add_id(b_component);
			impl.add(mul_op);

			auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			add_op->add_id(acc);
			add_op->add_id(mul_op->id);
			acc = add_op->id;
			impl.add(add_op);
		}

		impl.rewrite_value(instruction, acc);
	}

	return true;
}

bool emit_dot2_add_half_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id as[2], bs[2];
	as[0] = impl.get_id_for_value(instruction->getOperand(2));
	as[1] = impl.get_id_for_value(instruction->getOperand(3));
	bs[0] = impl.get_id_for_value(instruction->getOperand(4));
	bs[1] = impl.get_id_for_value(instruction->getOperand(5));

	bool precise = instruction->hasMetadata("dx.precise") || impl.options.force_precise;

	// V_DOT2C_F32_F16 is emitted on native drivers, and based on some reversing, the behavior is
	// acc = (float(a.x * b.x) + float(a.y * b.y)) + acc
	// - MUL in FP16
	// - FEXT to FP32
	// - FADD dot results
	// - FADD to acc
	spv::Id half_type_id = builder.makeFloatType(16);
	spv::Id float_type_id = builder.makeFloatType(32);
	spv::Id a = impl.build_vector(half_type_id, as, 2);
	spv::Id b = impl.build_vector(half_type_id, bs, 2);

	if (impl.options.mixed_dot_product_fp16_fp16_fp32)
	{
		builder.addExtension("SPV_VALVE_mixed_float_dot_product");
		builder.addCapability(spv::CapabilityDotProductFloat16AccFloat32VALVE);

		auto *acc_op = impl.allocate(spv::OpFDot2MixAcc32VALVE, instruction);
		acc_op->add_id(a);
		acc_op->add_id(b);
		acc_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(acc_op);
		return true;
	}

	auto *dot_op = impl.allocate(spv::OpFMul, builder.makeVectorType(half_type_id, 2));
	dot_op->add_id(a);
	dot_op->add_id(b);
	impl.add(dot_op);
	if (precise)
		builder.addDecoration(dot_op->id, spv::DecorationNoContraction);

	spv::Id expanded_input = dot_op->id;
	if (impl.support_native_fp16_operations())
	{
		auto *extend_op = impl.allocate(spv::OpFConvert, builder.makeVectorType(float_type_id, 2));
		extend_op->add_id(expanded_input);
		impl.add(extend_op);
		expanded_input = extend_op->id;
	}

	spv::Id components[2];
	for (unsigned i = 0; i < 2; i++)
	{
		auto *extract_op = impl.allocate(spv::OpCompositeExtract, float_type_id);
		extract_op->add_id(expanded_input);
		extract_op->add_literal(i);
		impl.add(extract_op);
		components[i] = extract_op->id;
	}

	auto *dot_sum = impl.allocate(spv::OpFAdd, float_type_id);
	dot_sum->add_id(components[0]);
	dot_sum->add_id(components[1]);
	impl.add(dot_sum);
	if (precise)
		builder.addDecoration(dot_sum->id, spv::DecorationNoContraction);

	auto *acc_op = impl.allocate(spv::OpFAdd, instruction);
	acc_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	acc_op->add_id(dot_sum->id);
	impl.add(acc_op);
	if (precise)
		builder.addDecoration(acc_op->id, spv::DecorationNoContraction);

	// This opcode requires native FP16, so RelaxedPrecision is useless.

	return true;
}

bool emit_unpack4x8_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityInt8);

	auto *bitcast_op = impl.allocate(spv::OpBitcast, builder.makeVectorType(builder.makeUintType(8), 4));
	bitcast_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	impl.add(bitcast_op);

	uint32_t signed_expand;
	if (!get_constant_operand(instruction, 1, &signed_expand))
		return false;

	auto *element_type = get_composite_element_type(instruction->getType());
	spv::Id result_type = builder.makeVectorType(impl.get_type_id(element_type), 4);

	auto *expand_op = impl.allocate(signed_expand ? spv::OpSConvert : spv::OpUConvert,
	                                instruction, result_type);
	expand_op->add_id(bitcast_op->id);
	impl.add(expand_op);

	return true;
}

bool emit_pack4x8_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityInt8);

	auto *element_type = instruction->getOperand(2)->getType();
	spv::Id uint_type = impl.get_type_id(element_type);

	uint32_t clamp_op_literal;
	if (!get_constant_operand(instruction, 1, &clamp_op_literal))
		return false;

	spv::Id elements[4];
	for (unsigned i = 0; i < 4; i++)
		elements[i] = impl.get_id_for_value(instruction->getOperand(2 + i));

	spv::Id vec_id = impl.build_vector(uint_type, elements, 4);
	if (clamp_op_literal != 0)
	{
		// Signed saturate with u8 range or s8 range.
		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		spv::Id lo_id, hi_id;
		if (element_type->getIntegerBitWidth() == 16)
		{
			lo_id = builder.makeInt16Constant(clamp_op_literal == 1 ? 0 : -128);
			hi_id = builder.makeInt16Constant(clamp_op_literal == 1 ? 255 : 127);
		}
		else
		{
			lo_id = builder.makeIntConstant(clamp_op_literal == 1 ? 0 : -128);
			hi_id = builder.makeIntConstant(clamp_op_literal == 1 ? 255 : 127);
		}

		Vector<spv::Id> lo_ids = { lo_id, lo_id, lo_id, lo_id };
		Vector<spv::Id> hi_ids = { hi_id, hi_id, hi_id, hi_id };

		spv::Id int_type = builder.makeIntType(element_type->getIntegerBitWidth());
		spv::Id int4_type = builder.makeVectorType(int_type, 4);
		lo_id = builder.makeCompositeConstant(int4_type, lo_ids);
		hi_id = builder.makeCompositeConstant(int4_type, hi_ids);

		auto *clamp_op = impl.allocate(spv::OpExtInst, int4_type);
		clamp_op->add_id(impl.glsl_std450_ext);
		clamp_op->add_literal(GLSLstd450SClamp);
		clamp_op->add_id(vec_id);
		clamp_op->add_id(lo_id);
		clamp_op->add_id(hi_id);
		impl.add(clamp_op);
		vec_id = clamp_op->id;
	}

	auto *narrow_op = impl.allocate(spv::OpUConvert, builder.makeVectorType(builder.makeUintType(8), 4));
	narrow_op->add_id(vec_id);
	impl.add(narrow_op);

	auto *bitcast_op = impl.allocate(spv::OpBitcast, instruction);
	bitcast_op->add_id(narrow_op->id);
	impl.add(bitcast_op);

	return true;
}

bool emit_msad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	// Don't modify this implementation since compilers pattern match this.

	spv::Id uint32_scalar_type = builder.makeUintType(32);
	spv::Id uint32_vector_type = builder.makeVectorType(uint32_scalar_type, 4);
	spv::Id bool_type = builder.makeVectorType(builder.makeBoolType(), 4);

	Vector<spv::Id> ref_byte_ids = { 0, 0, 0, 0 };
	Vector<spv::Id> src_byte_ids = { 0, 0, 0, 0 };

	for (uint32_t i = 0; i < 4; i++)
	{
		auto *ref_bfe_op = impl.allocate(spv::OpBitFieldUExtract, uint32_scalar_type);
		ref_bfe_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		ref_bfe_op->add_id(builder.makeUintConstant(8 * i));
		ref_bfe_op->add_id(builder.makeUintConstant(8));
		impl.add(ref_bfe_op);

		auto *src_bfe_op = impl.allocate(spv::OpBitFieldUExtract, uint32_scalar_type);
		src_bfe_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
		src_bfe_op->add_id(builder.makeUintConstant(8 * i));
		src_bfe_op->add_id(builder.makeUintConstant(8));
		impl.add(src_bfe_op);

		ref_byte_ids[i] = ref_bfe_op->id;
		src_byte_ids[i] = src_bfe_op->id;
	}

	auto *ref_composite_op = impl.allocate(spv::OpCompositeConstruct, uint32_vector_type);
	for (uint32_t i = 0; i < 4; i++)
		ref_composite_op->add_id(ref_byte_ids[i]);
	impl.add(ref_composite_op);

	auto *src_composite_op = impl.allocate(spv::OpCompositeConstruct, uint32_vector_type);
	for (uint32_t i = 0; i < 4; i++)
		src_composite_op->add_id(src_byte_ids[i]);
	impl.add(src_composite_op);

	spv::Id zero_vector = builder.makeNullConstant(uint32_vector_type);

	auto *compare_ref_zero_op = impl.allocate(spv::OpIEqual, bool_type);
	compare_ref_zero_op->add_id(ref_composite_op->id);
	compare_ref_zero_op->add_id(zero_vector);
	impl.add(compare_ref_zero_op);

	auto *ref_src_diff_op = impl.allocate(spv::OpISub, uint32_vector_type);
	ref_src_diff_op->add_id(ref_composite_op->id);
	ref_src_diff_op->add_id(src_composite_op->id);
	impl.add(ref_src_diff_op);

	auto *abs_diff_op = impl.allocate(spv::OpExtInst, uint32_vector_type);
	abs_diff_op->add_id(impl.glsl_std450_ext);
	abs_diff_op->add_literal(GLSLstd450SAbs);
	abs_diff_op->add_id(ref_src_diff_op->id);
	impl.add(abs_diff_op);

	auto *masked_diff_op = impl.allocate(spv::OpSelect, uint32_vector_type);
	masked_diff_op->add_id(compare_ref_zero_op->id);
	masked_diff_op->add_id(zero_vector);
	masked_diff_op->add_id(abs_diff_op->id);
	impl.add(masked_diff_op);

	spv::Id sum_id = 0;

	for (uint32_t i = 0; i < 4; i++)
	{
		auto *extract_op = impl.allocate(spv::OpCompositeExtract, uint32_scalar_type);
		extract_op->add_id(masked_diff_op->id);
		extract_op->add_literal(i);
		impl.add(extract_op);

		if (sum_id)
		{
			auto *add_op = impl.allocate(spv::OpIAdd, uint32_scalar_type);
			add_op->add_id(sum_id);
			add_op->add_id(extract_op->id);
			impl.add(add_op);

			sum_id = add_op->id;
		}
		else
			sum_id = extract_op->id;
	}

	/* DXIL docs say that the addition should saturate on overflow,
	 * but but drivers don't seem to do that. */
	auto *add_op = impl.allocate(spv::OpIAdd, instruction);
	add_op->add_id(impl.get_id_for_value(instruction->getOperand(3)));
	add_op->add_id(sum_id);
	impl.add(add_op);

	return true;
}

bool emit_bit_reverse_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	auto int_width = instruction->getType()->getIntegerBitWidth();

	if (int_width == 32)
	{
		auto *op = impl.allocate(spv::OpBitReverse, instruction);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(op);
		return true;
	}
	else if (int_width == 16)
	{
		spv::Id uint_type = builder.makeUintType(32);
		spv::Id u16_type = builder.makeUintType(16);
		spv::Id u16vec2_type = builder.makeVectorType(u16_type, 2);

		auto *conv_op = impl.allocate(spv::OpUConvert, uint_type);
		conv_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(conv_op);

		auto *reverse_op = impl.allocate(spv::OpBitReverse, uint_type);
		reverse_op->add_id(conv_op->id);
		impl.add(reverse_op);

		auto *cast_op = impl.allocate(spv::OpBitcast, u16vec2_type);
		cast_op->add_id(reverse_op->id);
		impl.add(cast_op);

		auto *ext1 = impl.allocate(spv::OpCompositeExtract, instruction);
		ext1->add_id(cast_op->id);
		ext1->add_literal(1);
		impl.add(ext1);
		return true;
	}
	else if (int_width == 64)
	{
		spv::Id uint_type = builder.makeUintType(32);
		spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);

		auto *conv_op = impl.allocate(spv::OpBitcast, uvec2_type);
		conv_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(conv_op);

		auto *reverse_op = impl.allocate(spv::OpBitReverse, uvec2_type);
		reverse_op->add_id(conv_op->id);
		impl.add(reverse_op);

		auto *shuf_op = impl.allocate(spv::OpVectorShuffle, uvec2_type);
		shuf_op->add_id(reverse_op->id);
		shuf_op->add_id(reverse_op->id);
		shuf_op->add_literal(1);
		shuf_op->add_literal(0);
		impl.add(shuf_op);

		auto *cast_op = impl.allocate(spv::OpBitcast, instruction);
		cast_op->add_id(shuf_op->id);
		impl.add(cast_op);

		return true;
	}

	return false;
}

bool emit_bit_count_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// Vulkan only allows 32-bit types here for whatever reason ...
	auto &builder = impl.builder();
	auto int_width = instruction->getOperand(1)->getType()->getIntegerBitWidth();

	if (int_width == 32)
	{
		auto *op = impl.allocate(spv::OpBitCount, instruction);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(op);
		return true;
	}
	else if (int_width == 16)
	{
		auto *conv_op = impl.allocate(spv::OpUConvert, builder.makeUintType(32));
		conv_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(conv_op);

		auto *op = impl.allocate(spv::OpBitCount, instruction);
		op->add_id(conv_op->id);
		impl.add(op);
		return true;
	}
	else if (int_width == 64)
	{
		spv::Id uint_type = builder.makeUintType(32);
		spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);

		auto *conv_op = impl.allocate(spv::OpBitcast, uvec2_type);
		conv_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(conv_op);

		auto *count_op = impl.allocate(spv::OpBitCount, uvec2_type);
		count_op->add_id(conv_op->id);
		impl.add(count_op);

		auto *ext0 = impl.allocate(spv::OpCompositeExtract, uint_type);
		ext0->add_id(count_op->id);
		ext0->add_literal(0);
		impl.add(ext0);

		auto *ext1 = impl.allocate(spv::OpCompositeExtract, uint_type);
		ext1->add_id(count_op->id);
		ext1->add_literal(1);
		impl.add(ext1);

		auto *add_op = impl.allocate(spv::OpIAdd, instruction);
		add_op->add_id(ext0->id);
		add_op->add_id(ext1->id);
		impl.add(add_op);
		return true;
	}

	return false;
}

bool emit_legacy_double_conv_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id input_value = impl.get_id_for_value(instruction->getOperand(1));

	// We probably need round-to-zero semantics here, but w/e.
	if (opcode != spv::OpFConvert)
	{
		auto &builder = impl.builder();
		// Clamp to target range.
		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		auto *clamp_op = impl.allocate(spv::OpExtInst, builder.makeFloatType(64));
		clamp_op->add_id(impl.glsl_std450_ext);
		clamp_op->add_literal(GLSLstd450NClamp);
		clamp_op->add_id(input_value);
		clamp_op->add_id(builder.makeDoubleConstant(opcode == spv::OpConvertFToU ? 0.0 : double(INT32_MIN)));
		clamp_op->add_id(builder.makeDoubleConstant(opcode == spv::OpConvertFToU ? double(UINT32_MAX) : double(INT32_MAX)));

		impl.add(clamp_op);
		input_value = clamp_op->id;
	}

	auto *op = impl.allocate(opcode, instruction);
	op->add_id(input_value);
	impl.add(op);
	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_arithmetic.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "GLSL.std.450.h"
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_imad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_isfinite_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_saturate_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_find_high_bit_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_dxil_unary_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dxil_std450_unary_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dxil_std450_binary_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dxil_std450_trinary_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dxil_wide_arith_instruction(spv::Op op, Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics);
bool emit_dxbc_udiv_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_dot_instruction(unsigned dimensions, Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_bfe_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics);
bool emit_bfi_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics);

template <bool spirv_semantics>
static inline bool emit_bfi_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_bfi_instruction(impl, instruction, spirv_semantics);
}

bool emit_make_double_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_split_double_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_legacy_f16_to_f32_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_legacy_f32_to_f16_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_bitcast_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

spv::Id emit_native_bitscan(GLSLstd450 opcode, Converter::Impl &impl,
                            const llvm::Instruction *instruction, const llvm::Value *value);
bool emit_find_low_bit_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

template <GLSLstd450 opcode>
static inline bool emit_find_high_bit_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_find_high_bit_instruction(opcode, impl, instruction);
}

template <GLSLstd450 opcode>
static inline bool std450_trinary_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_dxil_std450_trinary_instruction(opcode, impl, instruction);
}

template <GLSLstd450 opcode>
static inline bool std450_binary_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_dxil_std450_binary_instruction(opcode, impl, instruction);
}

template <GLSLstd450 opcode>
static inline bool std450_unary_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_dxil_std450_unary_instruction(opcode, impl, instruction);
}

template <spv::Op opcode>
static inline bool unary_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_dxil_unary_instruction(opcode, impl, instruction);
}

template <spv::Op opcode, bool spirv_semantics>
static inline bool wide_arith_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_dxil_wide_arith_instruction(opcode, impl, instruction, spirv_semantics);
}

template <unsigned Dim>
static inline bool emit_dot_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_dot_instruction(Dim, impl, instruction);
}

template <spv::Op opcode, bool spirv_semantics>
static inline bool emit_bfe_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_bfe_instruction(opcode, impl, instruction, spirv_semantics);
}

bool emit_i8_dot_instruction(Converter::Impl &Impl, const llvm::CallInst *instruction, bool sign_extend);
template <bool sign_extend>
static inline bool emit_i8_dot_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_i8_dot_instruction(impl, instruction, sign_extend);
}

bool emit_dot2_add_half_instruction(Converter::Impl &Impl, const llvm::CallInst *instruction);
bool emit_unpack4x8_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_pack4x8_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_msad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_bit_reverse_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_bit_count_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_legacy_double_conv_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);

template <spv::Op opcode>
static inline bool emit_legacy_double_conv_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_legacy_double_conv_instruction(opcode, impl, instruction);
}

} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_buffer.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_buffer.hpp"
#include "dxil_common.hpp"
#include "dxil_sampling.hpp"
#include "dxil_ags.hpp"
#include "dxil_resources.hpp"
#include "logging.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"

namespace dxil_spv
{
static RawWidth get_buffer_access_bits_per_component(
	Converter::Impl &impl, spv::StorageClass storage, const llvm::Type *element_type)
{
    if (impl.execution_mode_meta.native_16bit_operations &&
            (storage == spv::StorageClassPhysicalStorageBuffer || storage == spv::StorageClassStorageBuffer) &&
            type_is_16bit(element_type))
	{
		return RawWidth::B16;
	}
	else if (type_is_64bit(element_type))
		return RawWidth::B64;
	else
		return RawWidth::B32;
}

void emit_buffer_synchronization_validation(Converter::Impl &impl,
                                            const llvm::CallInst *instruction,
                                            BDAOperation bda_operation)
{
	if (!impl.options.instruction_instrumentation.enabled ||
	    impl.options.instruction_instrumentation.type != InstructionInstrumentationType::BufferSynchronizationValidation)
	{
		return;
	}

	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	const auto &meta = impl.handle_to_resource_meta[image_id];

	if (meta.storage != spv::StorageClassPhysicalStorageBuffer && meta.instrumentation.bda_id == 0)
		return;

	auto &builder = impl.builder();
	spv::Id stride_id = 0;
	spv::Id offset_id = 0;
	spv::Id elem_id = 0;
	spv::Id len_id = 0;

	if (bda_operation == BDAOperation::Store || bda_operation == BDAOperation::Load)
	{
		const llvm::Type *element_type;
		if (bda_operation == BDAOperation::Store)
			element_type = instruction->getOperand(4)->getType();
		else
			element_type = get_composite_element_type(instruction->getType());

		if (meta.kind == DXIL::ResourceKind::RawBuffer || meta.kind == DXIL::ResourceKind::StructuredBuffer)
		{
			unsigned mask;

			if (bda_operation == BDAOperation::Load)
			{
				auto &access_meta = impl.llvm_composite_meta[instruction];
				mask = access_meta.access_mask & 0xf;
			}
			else
			{
				mask = llvm::cast<llvm::ConstantInt>(instruction->getOperand(8))->getUniqueInteger().getZExtValue();
			}

			unsigned width = raw_width_to_bits(get_buffer_access_bits_per_component(impl, meta.storage, element_type));

			unsigned num_elems = 0;
			for (unsigned i = 0; i < 4; i++)
				if ((mask & (1u << i)) != 0)
					num_elems = i + 1;

			len_id = builder.makeUintConstant(num_elems * width / 8);
		}

		if (meta.kind == DXIL::ResourceKind::RawBuffer)
		{
			offset_id = impl.get_id_for_value(instruction->getOperand(2));
		}
		else if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
		{
			elem_id = impl.get_id_for_value(instruction->getOperand(2));
			if (!llvm::isa<llvm::UndefValue>(instruction->getOperand(3)))
				offset_id = impl.get_id_for_value(instruction->getOperand(3));
			stride_id = builder.makeUintConstant(meta.stride);
		}
		else if (meta.kind == DXIL::ResourceKind::TypedBuffer)
		{
			elem_id = impl.get_id_for_value(instruction->getOperand(2));
			stride_id = meta.instrumentation.elem_size_id;
			len_id = meta.instrumentation.elem_size_id;
		}
		else
		{
			elem_id = impl.get_id_for_value(instruction->getOperand(2));
			stride_id = builder.makeUintConstant(16);
			len_id = stride_id;
		}
	}
	else
	{
		unsigned elem_index;

		if (value_is_dx_op_instrinsic(instruction, DXIL::Op::AtomicCompareExchange))
			elem_index = 2;
		else
			elem_index = 3;

		if (meta.kind == DXIL::ResourceKind::RawBuffer)
		{
			offset_id = impl.get_id_for_value(instruction->getOperand(elem_index));
		}
		else if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
		{
			elem_id = impl.get_id_for_value(instruction->getOperand(elem_index));
			if (!llvm::isa<llvm::UndefValue>(instruction->getOperand(elem_index + 1)))
				offset_id = impl.get_id_for_value(instruction->getOperand(elem_index + 1));
			stride_id = builder.makeUintConstant(meta.stride);
		}
		else if (meta.kind == DXIL::ResourceKind::TypedBuffer)
		{
			elem_id = impl.get_id_for_value(instruction->getOperand(elem_index));
			stride_id = meta.instrumentation.elem_size_id;
		}

		len_id = builder.makeUintConstant(instruction->getType()->getIntegerBitWidth() / 8);
	}

	spv::Id total_offset_id = 0;

	if (elem_id != 0)
	{
		auto *mul = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		mul->add_id(elem_id);
		mul->add_id(stride_id);
		impl.add(mul);
		total_offset_id = mul->id;
	}

	if (!total_offset_id)
	{
		total_offset_id = offset_id;
	}
	else if (offset_id != 0)
	{
		auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add->add_id(total_offset_id);
		add->add_id(offset_id);
		impl.add(add);
		total_offset_id = add->id;
	}

	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::ValidateBDALoadStore);
	auto *call = impl.allocate(spv::OpFunctionCall, builder.makeBoolType());
	call->add_id(call_id);
	call->add_id(meta.instrumentation.bda_id ? meta.instrumentation.bda_id : impl.get_id_for_value(instruction->getOperand(1)));
	call->add_id(total_offset_id);
	call->add_id(len_id);
	call->add_id(builder.makeUintConstant(unsigned(bda_operation)));

	auto *load_invocation_id = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	load_invocation_id->add_id(impl.instrumentation.invocation_id_var_id);
	impl.add(load_invocation_id);

	call->add_id(load_invocation_id->id);

	if (meta.instrumentation.resource_size_id)
	{
		auto *in_bounds = impl.allocate(spv::OpULessThan, builder.makeBoolType());

		if (meta.kind == DXIL::ResourceKind::TypedBuffer)
			in_bounds->add_id(elem_id);
		else
			in_bounds->add_id(total_offset_id);
		in_bounds->add_id(meta.instrumentation.resource_size_id);

		impl.add(in_bounds);
		call->add_id(in_bounds->id);
	}
	else
	{
		call->add_id(builder.makeBoolConstant(true));
	}

	impl.add(call);

	auto *expect_true = impl.allocate(spv::OpAssumeTrueKHR);
	expect_true->add_id(call->id);
	impl.add(expect_true);
}

bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Type *type,
                                           const llvm::Value *byte_offset,
                                           unsigned vecsize)
{
	// vec3 vectorization requires scalar block layout always.
	// For byte address buffers, robustness must be checked per component, and a vectorized vec3 load
	// can straddle the boundary. If the hardware is known not to support per-component robustness correctly,
	// avoid vectorizing this case.
	if ((!impl.options.scalar_block_layout || !impl.options.supports_per_component_robustness) && vecsize == 3)
		return false;

	if (impl.options.ssbo_alignment > 16 && vecsize == 3)
		return false;

	// The rules for raw BAB vectorization are pretty simple.
	// If the offset % element_size == 0, we can translate that load to a clean vectorized load-store.
	// vec3 is the special case due to robustness, but robustness2 will generally have 16 byte alignment here,
	// so it should be fine.
	unsigned addr_shift_log2 = raw_buffer_data_type_to_addr_shift_log2(impl, type);

	RawBufferAccessSplit split = {};
	// If we achieve a successful split, we can vectorize.
	return extract_raw_buffer_access_split(byte_offset, 1, addr_shift_log2, vecsize, split);
}

bool raw_access_structured_can_vectorize(
		Converter::Impl &impl, const llvm::Type *type,
		const llvm::Value *index, unsigned stride,
		const llvm::Value *byte_offset,
		unsigned vecsize)
{
	// vec3 vectorization requires scalar block layout always.
	if (!impl.options.scalar_block_layout && vecsize == 3)
		return false;

	unsigned addr_shift_log2 = raw_buffer_data_type_to_addr_shift_log2(impl, type);
	unsigned element_size = (1u << addr_shift_log2) * vecsize;
	unsigned alignment = element_size & -int(element_size);

	// A hypothetical offset buffer must be able to cleanly divide by element_size.
	// If stride aligns properly, we know we will never need offset buffers.
	if ((stride & (impl.options.ssbo_alignment - 1)) != 0)
	{
		// Mostly relevant for vec3 here, where the binding alignment is smaller than element size divider.
		if (element_size > alignment)
			return false;
	}

	RawBufferAccessSplit split = {};
	return extract_raw_buffer_access_split(index, stride, addr_shift_log2, vecsize, split) &&
	       extract_raw_buffer_access_split(byte_offset, 1, addr_shift_log2, vecsize, split);
}

RawVecSize raw_access_byte_address_vectorize(
	Converter::Impl &impl, const llvm::Type *type,
    const llvm::Value *byte_offset, uint32_t mask)
{
	if (mask == 0xfu && raw_access_byte_address_can_vectorize(impl, type, byte_offset, 4))
		return RawVecSize::V4;
	else if (mask == 0x7u && raw_access_byte_address_can_vectorize(impl, type, byte_offset, 3))
		return RawVecSize::V3;
	else if (mask == 0x3u && raw_access_byte_address_can_vectorize(impl, type, byte_offset, 2))
		return RawVecSize::V2;
	else
		return RawVecSize::V1;
}

RawVecSize raw_access_structured_vectorize(
	Converter::Impl &impl, const llvm::Type *type,
	const llvm::Value *index,
	unsigned stride,
    const llvm::Value *byte_offset,
	uint32_t mask)
{
	if (mask == 0xfu && raw_access_structured_can_vectorize(impl, type, index, stride, byte_offset, 4))
		return RawVecSize::V4;
	else if (mask == 0x7u && raw_access_structured_can_vectorize(impl, type, index, stride, byte_offset, 3))
		return RawVecSize::V3;
	else if (mask == 0x3u && raw_access_structured_can_vectorize(impl, type, index, stride, byte_offset, 2))
		return RawVecSize::V2;
	else
		return RawVecSize::V1;
}

static spv::Id build_accumulate_offsets(Converter::Impl &impl, const spv::Id *ids, unsigned count)
{
	spv::Id accumulated_id = 0;
	for (unsigned i = 0; i < count; i++)
	{
		if (!ids[i])
			continue;

		if (!accumulated_id)
		{
			accumulated_id = ids[i];
		}
		else
		{
			auto *add_op = impl.allocate(spv::OpIAdd, impl.builder().makeUintType(32));
			add_op->add_id(accumulated_id);
			add_op->add_id(ids[i]);
			impl.add(add_op);
			accumulated_id = add_op->id;
		}
	}

	if (!accumulated_id)
		accumulated_id = impl.builder().makeUintConstant(0);

	return accumulated_id;
}

static spv::Id build_structured_index(Converter::Impl &impl, const llvm::Value *index,
                                      unsigned stride,
                                      const llvm::Value *byte_offset,
                                      unsigned addr_shift_log2,
                                      unsigned vecsize)
{
	auto &builder = impl.builder();
	RawBufferAccessSplit stride_split = {};
	RawBufferAccessSplit byte_split = {};
	if (extract_raw_buffer_access_split(index, stride, addr_shift_log2, vecsize, stride_split) &&
	    extract_raw_buffer_access_split(byte_offset, 1, addr_shift_log2, vecsize, byte_split))
	{
		stride_split.bias += byte_split.bias;
		byte_split.bias = 0;

		spv::Id offsets_id[3] = {};

		if (stride_split.dynamic_index)
		{
			if (stride_split.scale != 1)
			{
				auto *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
				scale_op->add_id(impl.get_id_for_value(stride_split.dynamic_index));
				scale_op->add_id(builder.makeUintConstant(stride_split.scale));
				impl.add(scale_op);
				offsets_id[0] = scale_op->id;
			}
			else
				offsets_id[0] = impl.get_id_for_value(stride_split.dynamic_index);
		}

		if (byte_split.dynamic_index)
		{
			if (byte_split.scale != 1)
			{
				auto *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
				scale_op->add_id(builder.makeUintConstant(byte_split.scale));
				scale_op->add_id(impl.get_id_for_value(byte_split.dynamic_index));
				impl.add(scale_op);
				offsets_id[1] = scale_op->id;
			}
			else
				offsets_id[1] = impl.get_id_for_value(byte_split.dynamic_index);
		}

		if (stride_split.bias)
			offsets_id[2] = builder.makeUintConstant(stride_split.bias);

		// byte_split bias is folded.

		return build_accumulate_offsets(impl, offsets_id, 3);
	}
	else
	{
		assert(vecsize == 1);
		spv::Id offsets_id[2] = {};

		// Do it the conservative way.
		if (stride != (1u << addr_shift_log2))
		{
			auto *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			scale_op->add_id(impl.get_id_for_value(index));
			scale_op->add_id(builder.makeUintConstant(stride / (1u << addr_shift_log2)));
			impl.add(scale_op);
			offsets_id[0] = scale_op->id;
		}
		else
			offsets_id[0] = impl.get_id_for_value(index);

		offsets_id[1] = build_index_divider(impl, byte_offset, addr_shift_log2, 1);

		return build_accumulate_offsets(impl, offsets_id, 2);
	}
}

unsigned raw_buffer_data_type_to_addr_shift_log2(Converter::Impl &impl, const llvm::Type *data_type)
{
	// A 16-bit raw load is only actually 16-bit if native 16-bit operations are enabled.
	if (impl.execution_mode_meta.native_16bit_operations && type_is_16bit(data_type))
		return 1;
	else if (type_is_64bit(data_type))
		return 3;
	else
		return 2;
}

static BufferAccessInfo build_buffer_access(Converter::Impl &impl, const llvm::CallInst *instruction,
                                            unsigned operand_offset,
                                            spv::Id index_offset_id,
                                            const llvm::Type *data_type,
                                            uint32_t access_mask)
{
	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	const auto &meta = impl.handle_to_resource_meta[image_id];

	spv::Id index_id = 0;
	RawVecSize raw_vecsize = RawVecSize::V1;
	unsigned addr_shift_log2 = raw_buffer_data_type_to_addr_shift_log2(impl, data_type);

	if (meta.kind == DXIL::ResourceKind::RawBuffer)
	{
		// For raw buffers, the index is in bytes.
		raw_vecsize = raw_access_byte_address_vectorize(impl, data_type, instruction->getOperand(2 + operand_offset), access_mask);
		index_id = build_index_divider(impl, instruction->getOperand(2 + operand_offset), addr_shift_log2, raw_vecsize_to_vecsize(raw_vecsize));
	}
	else if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
	{
		raw_vecsize = raw_access_structured_vectorize(
			impl, data_type,
			instruction->getOperand(2 + operand_offset),
		    meta.stride,
			instruction->getOperand(3 + operand_offset),
			access_mask);

		index_id = build_structured_index(
			impl,
			instruction->getOperand(2 + operand_offset),
			meta.stride,
			instruction->getOperand(3 + operand_offset),
			addr_shift_log2,
			raw_vecsize_to_vecsize(raw_vecsize));
	}
	else
		index_id = impl.get_id_for_value(instruction->getOperand(2 + operand_offset));

	if (index_offset_id)
	{
		unsigned vectorized_addr_shift_log2 = addr_shift_log2;

		switch (raw_vecsize)
		{
		case RawVecSize::V2:
			vectorized_addr_shift_log2 += 1;
			break;

		case RawVecSize::V4:
			vectorized_addr_shift_log2 += 2;
			break;

		default:
			// If we need offset buffers, we should never hit this case.
			assert(raw_vecsize != RawVecSize::V3);
			break;
		}

		// Need to shift the offset buffer last minute instead.
		if (meta.aliased)
		{
			spv::Id vec_type = builder.makeVectorType(builder.makeUintType(32), 2);
			Operation *shift_op = impl.allocate(spv::OpShiftRightLogical, vec_type);
			shift_op->add_id(index_offset_id);

			spv::Id shamt[2];
			shamt[0] = shamt[1] = builder.makeUintConstant(vectorized_addr_shift_log2);
			spv::Id const_vec = impl.build_constant_vector(builder.makeUintType(32), shamt, 2);

			shift_op->add_id(const_vec);
			impl.add(shift_op);

			index_offset_id = shift_op->id;
		}

		Operation *extract_offset = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
		extract_offset->add_id(index_offset_id);
		extract_offset->add_literal(0);
		impl.add(extract_offset);

		Operation *extract_len = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
		extract_len->add_id(index_offset_id);
		extract_len->add_literal(1);
		impl.add(extract_len);

		Operation *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_op->add_ids({ index_id, extract_offset->id });
		impl.add(add_op);

		Operation *compare_op = impl.allocate(spv::OpULessThan, builder.makeBoolType());
		compare_op->add_ids({ index_id, extract_len->id });
		impl.add(compare_op);

		// If we have an offset ID, it also means we cannot rely on accurate robustness.
		// To handle this, we will range check and fabricate an invalid index, which is guaranteed to trip OOB.
		// This avoids us having to inject branches.
		// Choose 0x3ffffffc since it is the largest index (for 32-bit) that won't overflow 4GB offset (will break some drivers),
		// and we potentially need to write 4 elements.
		// If the allocation in question was really 4GB, then we will never trigger OOB check anyways.
		// For typed buffers, there is no address computation in the shader, so we should be able to do UINT_MAX.

		Operation *select_op = impl.allocate(spv::OpSelect, builder.makeUintType(32));

		uint32_t oob_index;
		if (meta.kind == DXIL::ResourceKind::TypedBuffer)
			oob_index = 0xffffffffu;
		else if (raw_vecsize != RawVecSize::V1)
			oob_index = 0xffffffffu >> vectorized_addr_shift_log2;
		else
			oob_index = (0xffffffffu >> addr_shift_log2) - 3u;

		select_op->add_ids({ compare_op->id, add_op->id, builder.makeUintConstant(oob_index) });
		impl.add(select_op);

		index_id = select_op->id;
	}

	return { index_id, raw_vecsize };
}

static spv::Id build_physical_pointer_address_for_raw_load_store(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	const auto &meta = impl.handle_to_resource_meta[ptr_id];

	spv::Id index_id = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id element_offset = 0;
	if (meta.stride != 0)
		element_offset = impl.get_id_for_value(instruction->getOperand(3));

	spv::Id byte_offset_id = 0;
	if (meta.stride)
	{
		auto *stride_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		stride_op->add_id(index_id);
		stride_op->add_id(builder.makeUintConstant(meta.stride));
		impl.add(stride_op);

		auto *offset_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		offset_op->add_id(stride_op->id);
		offset_op->add_id(element_offset);
		impl.add(offset_op);

		byte_offset_id = offset_op->id;
	}
	else
	{
		byte_offset_id = index_id;
	}

	return emit_u32x2_u32_add(impl, ptr_id, byte_offset_id);
}

static spv::Id build_vectorized_physical_load_store_access(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                           unsigned vecsize, const llvm::Type *element_type)
{
	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	const auto &meta = impl.handle_to_resource_meta[ptr_id];
	unsigned mask = (1u << vecsize) - 1u;

	// If we can express this as a plain access chain, do so for clarity and ideally better perf.
	// If we cannot do it trivially, fallback to raw pointer arithmetic.
	bool can_vectorize = false;

	if (meta.stride)
	{
		if (raw_access_structured_can_vectorize(impl, element_type,
		                                        instruction->getOperand(2), meta.stride,
		                                        instruction->getOperand(3), vecsize))
		{
			can_vectorize = true;
		}
	}
	else if (raw_access_byte_address_can_vectorize(impl, element_type,
	                                               instruction->getOperand(2), vecsize))
	{
		can_vectorize = true;
	}

	if (can_vectorize)
	{
		auto access = build_buffer_access(impl, instruction, 0, 0, element_type, mask);
		return access.index_id;
	}
	else
		return 0;
}

static bool emit_physical_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                  const Converter::Impl::PhysicalPointerMeta &ptr_meta,
                                                  const ReferenceVkMemoryModel &vkmm,
                                                  uint32_t mask = 0, uint32_t alignment = 0)
{
	auto &builder = impl.builder();

	if (mask == 0 && !get_constant_operand(instruction, 4, &mask))
		return false;
	if (alignment == 0 && !get_constant_operand(instruction, 5, &alignment))
		return false;

	unsigned vecsize = 0;
	if (mask == 1)
		vecsize = 1;
	else if (mask == 3)
		vecsize = 2;
	else if (mask == 7)
		vecsize = 3;
	else if (mask == 15)
		vecsize = 4;
	else
	{
		LOGE("Unexpected mask for RawBufferLoad = %u.\n", mask);
		return false;
	}

	auto *element_type = get_composite_element_type(instruction->getType());
	// If we can express this as a plain access chain, do so for clarity and ideally better perf.
	// If we cannot do it trivially, fallback to raw pointer arithmetic.
	spv::Id array_id = build_vectorized_physical_load_store_access(impl, instruction, vecsize, element_type);

	spv::Id physical_type_id;
	spv::Op value_cast_op;
	get_physical_load_store_cast_info(impl, element_type, physical_type_id, value_cast_op);

	if (vecsize > 1)
		physical_type_id = builder.makeVectorType(physical_type_id, vecsize);

	auto scalar_alignment = get_type_scalar_alignment(impl, element_type);
	// Fixup broken DXIL.
	alignment = std::max<uint32_t>(alignment, scalar_alignment);

	auto tmp_ptr_meta = ptr_meta;
	tmp_ptr_meta.stride = array_id ? vecsize * scalar_alignment : 0;
	spv::Id ptr_type_id = impl.get_physical_pointer_block_type(physical_type_id, tmp_ptr_meta);

	if (tmp_ptr_meta.stride && (tmp_ptr_meta.stride & (tmp_ptr_meta.stride - 1)) == 0)
	{
		alignment = std::max<uint32_t>(alignment, tmp_ptr_meta.stride);
		alignment = std::min<uint32_t>(alignment, 16);
	}

	spv::Id u64_ptr_id;
	if (array_id)
		u64_ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	else
		u64_ptr_id = build_physical_pointer_address_for_raw_load_store(impl, instruction);

	emit_buffer_synchronization_validation(impl, instruction, BDAOperation::Load);

	auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id);
	ptr_bitcast_op->add_id(u64_ptr_id);
	impl.add(ptr_bitcast_op);

	auto *chain_op = impl.allocate(spv::OpInBoundsAccessChain,
								   builder.makePointer(spv::StorageClassPhysicalStorageBuffer, physical_type_id));
	chain_op->add_id(ptr_bitcast_op->id);
	chain_op->add_id(builder.makeUintConstant(0));
	if (array_id)
		chain_op->add_id(array_id);
	impl.add(chain_op);

	auto *load_op = impl.allocate(spv::OpLoad, physical_type_id);
	load_op->add_id(chain_op->id);
	load_op->add_literal(spv::MemoryAccessAlignedMask);
	load_op->add_literal(alignment);
	add_vkmm_access_qualifiers(impl, load_op, vkmm);

	impl.add(load_op, ptr_meta.rov);

	spv::Id loaded_id = load_op->id;

	if (value_cast_op != spv::OpNop)
	{
		spv::Id type_id = impl.get_type_id(element_type);
		if (vecsize > 1)
			type_id = builder.makeVectorType(type_id, vecsize);
		auto *cast_op = impl.allocate(value_cast_op, type_id);
		cast_op->add_id(loaded_id);
		impl.add(cast_op);
		loaded_id = cast_op->id;
	}

	impl.rewrite_value(instruction, loaded_id);

	if (vecsize == 1)
		impl.llvm_composite_meta[instruction].forced_composite = false;

	return true;
}

struct RawAccessChain
{
    spv::Id ptr_id;
    spv::Id component_type_id;
    spv::Id vector_type_id;
    unsigned alignment;
};

static bool buffer_access_is_raw_access_chain(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta)
{
    return impl.options.nv_raw_access_chains &&
            (meta.storage == spv::StorageClassStorageBuffer || meta.storage == spv::StorageClassPhysicalStorageBuffer);
}

static RawAccessChain emit_raw_access_chain(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta,
                                            const llvm::CallInst *inst, const llvm::Type *element_type, unsigned vecsize)
{
	auto &builder = impl.builder();
	spv::Id raw_component_type_id;
	RawAccessChain raw = {};

	// If we're storing to min16 types and we use native 16-bit in arithmetic,
	// we have to expand to 32-bit before storing :(
	// This will probably fall over with int vs uint, since we don't know how to sign-extend.
	if (!impl.execution_mode_meta.native_16bit_operations &&
	    impl.options.min_precision_prefer_native_16bit &&
	    type_is_16bit(element_type))
	{
		if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
			raw_component_type_id = builder.makeFloatType(32);
		else
			raw_component_type_id = builder.makeUintType(32);
	}
	else
		raw_component_type_id = impl.get_type_id(element_type);

	spv::Id vec_type = vecsize > 1 ? builder.makeVectorType(raw_component_type_id, vecsize) : raw_component_type_id;
	spv::Id ptr_vec_type = builder.makePointer(spv::StorageClassStorageBuffer, vec_type);

	builder.addCapability(spv::CapabilityRawAccessChainsNV);
	builder.addExtension("SPV_NV_raw_access_chains");

	auto *op = impl.allocate(spv::OpRawAccessChainNV, ptr_vec_type);
	op->add_id(impl.get_id_for_value(inst->getOperand(1)));

	unsigned scalar_size = builder.getScalarTypeWidth(raw_component_type_id) / 8;

	if (meta.kind == DXIL::ResourceKind::RawBuffer)
	{
		unsigned addr_shift_log2 = raw_buffer_data_type_to_addr_shift_log2(impl, element_type);

		if (raw_access_byte_address_can_vectorize(impl, element_type, inst->getOperand(2), 4))
			raw.alignment = 4;
		else if (raw_access_byte_address_can_vectorize(impl, element_type, inst->getOperand(2), 2))
			raw.alignment = 2;
		else
			raw.alignment = 1;

		if (raw.alignment != 1 && raw.alignment * scalar_size <= 16 && vecsize <= raw.alignment)
		{
			// If we can prove vectorization, we can treat this as a structured buffer instead.
			// That way we needlessly avoid per-component robustness.
			// BAB descriptor range is aligned to 16 bytes, so we cannot use PerElementMask if the load
			// can straddle a 16 byte boundary.
			// If we care enough, we can split this load into two, and use per-element on both, but that's overkill.

			spv::Id element_id = build_index_divider(impl, inst->getOperand(2), addr_shift_log2, raw.alignment);
			op->add_id(builder.makeUintConstant(raw.alignment * scalar_size));
			op->add_id(element_id);
			op->add_id(builder.makeUintConstant(0));
			op->add_literal(spv::RawAccessChainOperandsRobustnessPerElementNVMask);
		}
		else
		{
			op->add_id(builder.makeUintConstant(0));
			op->add_id(builder.makeUintConstant(0));
			op->add_id(impl.get_id_for_value(inst->getOperand(2)));
			op->add_literal(spv::RawAccessChainOperandsRobustnessPerComponentNVMask);
		}
	}
	else
	{
		op->add_id(builder.makeUintConstant(meta.stride));
		op->add_id(impl.get_id_for_value(inst->getOperand(2)));
		op->add_id(impl.get_id_for_value(inst->getOperand(3)));
		op->add_literal(spv::RawAccessChainOperandsRobustnessPerElementNVMask);

		// Need extra check for stride alignment since we can normally "vectorize" vec3 structured buffers
		// if SSBO alignment is 4. However, we also need to make sure the alignment is correct before accepting.
		if ((meta.stride & (scalar_size * 4 - 1)) == 0 &&
		    raw_access_structured_can_vectorize(
			    impl, element_type,
			    inst->getOperand(2), meta.stride, inst->getOperand(3), 4))
		{
			raw.alignment = 4;
		}
		else if ((meta.stride & (scalar_size * 2 - 1)) == 0 &&
		         raw_access_structured_can_vectorize(
			         impl, element_type,
			         inst->getOperand(2), meta.stride,
			         inst->getOperand(3), 2))
		{
			raw.alignment = 2;
		}
		else
			raw.alignment = 1;
	}

	raw.alignment *= scalar_size;
	raw.component_type_id = raw_component_type_id;
	raw.vector_type_id = vec_type;
	raw.ptr_id = op->id;

	impl.add(op);

	if (meta.non_uniform)
		builder.addDecoration(op->id, spv::DecorationNonUniform);

	if (meta.physical_pointer_meta.nonwritable)
		builder.addDecoration(op->id, spv::DecorationNonWritable);
	if (meta.physical_pointer_meta.coherent && impl.execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
		builder.addDecoration(op->id, spv::DecorationCoherent);

	return raw;
}

static bool emit_buffer_load_raw_chain_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                   const Converter::Impl::ResourceMeta &meta,
                                                   Converter::Impl::CompositeMeta &access_meta)
{
	auto *result_type = instruction->getType();
	auto *target_type = get_composite_element_type(result_type);

	unsigned num_elements = 1;
	for (unsigned i = 0; i < 4; i++)
		if ((access_meta.access_mask & (1u << i)) != 0)
			num_elements = i + 1;
	auto raw = emit_raw_access_chain(impl, meta, instruction, target_type, num_elements);

	auto *load_op = impl.allocate(spv::OpLoad, instruction, raw.vector_type_id);
	load_op->add_id(raw.ptr_id);
	load_op->add_literal(spv::MemoryAccessAlignedMask);
	load_op->add_literal(raw.alignment);
	add_vkmm_access_qualifiers(impl, load_op, meta.vkmm);
	impl.add(load_op);

	if (type_is_16bit(target_type) &&
	    !impl.execution_mode_meta.native_16bit_operations &&
	    impl.options.min_precision_prefer_native_16bit)
	{
		Operation *narrow_op;

		if (target_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
		{
			narrow_op = impl.allocate(
				spv::OpFConvert,
				impl.get_type_id(DXIL::ComponentType::F16, 1, num_elements));
		}
		else
		{
			narrow_op = impl.allocate(
				spv::OpUConvert,
				impl.get_type_id(DXIL::ComponentType::U16, 1, num_elements));
		}

		narrow_op->add_id(load_op->id);
		impl.add(narrow_op);
		impl.rewrite_value(instruction, narrow_op->id);
	}

	access_meta.forced_composite = false;
	return true;
}

bool emit_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (emit_ags_buffer_load(impl, instruction, DXIL::Op::BufferLoad))
		return true;
	if (emit_nvapi_buffer_load(impl, instruction, DXIL::Op::BufferLoad))
		return true;

	// Elide dead loads.
	if (!impl.composite_is_accessed(instruction))
		return true;

	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id image_type_id = impl.get_type_id(image_id);
	const auto &meta = impl.handle_to_resource_meta[image_id];

	auto &access_meta = impl.llvm_composite_meta[instruction];
	bool sparse = (access_meta.access_mask & (1u << 4)) != 0;

	// Leave no gaps in the access mask to aid vectorization.
	// For reads, we can safely read components we not strictly need to read.
	uint32_t smeared_access_mask;

	if (meta.storage != spv::StorageClassUniformConstant)
	{
		smeared_access_mask = access_meta.access_mask & 0xfu;
		smeared_access_mask |= smeared_access_mask >> 1u;
		smeared_access_mask |= smeared_access_mask >> 2u;
	}
	else
		smeared_access_mask = 1;

	if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
	{
		if (sparse)
		{
			LOGE("Cannot use BDA and sparse feedback. >:(\n");
			return false;
		}

		// We don't know more about alignment in SM 5.1 BufferStore.
		// We know the type must be 32-bit however ...
		// Might be possible to do some fancy analysis to deduce a better alignment.

		return emit_physical_buffer_load_instruction(impl, instruction, meta.physical_pointer_meta, meta.vkmm,
		                                             smeared_access_mask, 4);
	}

	emit_buffer_synchronization_validation(impl, instruction, BDAOperation::Load);

	bool raw_access_chain = buffer_access_is_raw_access_chain(impl, meta) && !sparse;
	if (raw_access_chain)
		return emit_buffer_load_raw_chain_instruction(impl, instruction, meta, access_meta);

	auto *result_type = instruction->getType();
	auto *target_type = get_composite_element_type(result_type);

	bool is_typed = meta.kind == DXIL::ResourceKind::TypedBuffer;
	auto access = build_buffer_access(impl, instruction, 0, meta.index_offset_id,
	                                  get_composite_element_type(result_type),
	                                  smeared_access_mask);

	auto width = get_buffer_access_bits_per_component(impl, meta.storage, target_type);
	RawType raw_type = target_type->getTypeID() == llvm::Type::TypeID::DoubleTyID ?
	                   RawType::Float : RawType::Integer;

	image_id = get_buffer_alias_handle(impl, meta, image_id, raw_type, width, access.raw_vec_size);
	bool vectorized_load = access.raw_vec_size != RawVecSize::V1;

	// Sparse information is stored in the 5th component.
	if (sparse)
		builder.addCapability(spv::CapabilitySparseResidency);

	if (!is_typed)
	{
		// Unroll up to 4 loads. Ideally, we'd probably use physical_storage_buffer here, but unfortunately we have no indication
		// how many components we need to load here, and the number of components we load is not necessarily constant,
		// so we cannot reliably encode this information in the SRV.
		// The best we can do is to infer it from stride if we can.
		//unsigned conservative_num_elements = std::min(access.num_components, std::min(4u, access_meta.components));
		unsigned conservative_num_elements = 0;
		unsigned vecsize = raw_vecsize_to_vecsize(access.raw_vec_size);

		if (vectorized_load)
			conservative_num_elements = vecsize;
		else
		{
			for (unsigned i = 0; i < 4; i++)
				if ((access_meta.access_mask & (1u << i)) != 0)
					conservative_num_elements = i + 1;
		}

		spv::Id component_ids[4] = {};

		spv::Id extracted_id_type = raw_type == RawType::Integer ?
		                            builder.makeUintType(raw_width_to_bits(width)) :
		                            builder.makeFloatType(raw_width_to_bits(width));

		if (vectorized_load)
			extracted_id_type = builder.makeVectorType(extracted_id_type, vecsize);

		spv::Id constructed_id = 0;
		bool ssbo = meta.storage == spv::StorageClassStorageBuffer;

		auto *element_type = get_composite_element_type(result_type);
		bool need_cast = (element_type->getTypeID() != llvm::Type::TypeID::IntegerTyID) ||
		                 (type_is_16bit(element_type) && !impl.execution_mode_meta.native_16bit_operations &&
		                  impl.options.min_precision_prefer_native_16bit);

		// FP64 is handled directly.
		if (target_type->getTypeID() == llvm::Type::TypeID::DoubleTyID)
			need_cast = false;

		if (ssbo && sparse)
		{
			LOGE("Cannot use SSBOs and sparse feedback. >:(\n");
			return false;
		}

		if (ssbo)
		{
			spv::Id ptr_type = builder.makePointer(spv::StorageClassStorageBuffer, extracted_id_type);
			for (unsigned i = 0; i < (vectorized_load ? 1 : conservative_num_elements); i++)
			{
				if (vectorized_load || (access_meta.access_mask & (1u << i)) != 0)
				{
					auto *chain_op = impl.allocate(spv::OpAccessChain, ptr_type);
					chain_op->add_id(image_id);
					chain_op->add_id(builder.makeUintConstant(0));
					chain_op->add_id(impl.build_offset(access.index_id, i));
					impl.add(chain_op);

					if (meta.non_uniform)
						builder.addDecoration(chain_op->id, spv::DecorationNonUniform);

					auto *load_op = impl.allocate(spv::OpLoad, extracted_id_type);
					load_op->add_id(chain_op->id);

					add_vkmm_access_qualifiers(impl, load_op, meta.vkmm);
					impl.add(load_op, meta.rov);
					component_ids[i] = load_op->id;
				}
				else
					component_ids[i] = builder.createUndefined(extracted_id_type);
			}

			if (vectorized_load)
				constructed_id = component_ids[0];
			else
				constructed_id = impl.build_vector(extracted_id_type, component_ids, conservative_num_elements);
		}
		else
		{
			bool is_uav = builder.isStorageImageType(image_type_id);

			spv::Id loaded_id_type = builder.makeVectorType(extracted_id_type, 4);
			spv::Id sparse_code_id = 0;
			spv::Id sparse_loaded_id_type = 0;
			if (sparse)
				sparse_loaded_id_type = impl.get_struct_type({ extracted_id_type, loaded_id_type }, 0, "SparseTexel");

			bool first_load = true;
			for (unsigned i = 0; i < conservative_num_elements; i++)
			{
				if (access_meta.access_mask & (1u << i))
				{
					// There is no sane way to combine sparse feedback code, since it's completely opaque to application.
					// We could hypothetically return a vector of status code and deal with it magically, but let's not go there ...
					spv::Op opcode;
					if (is_uav)
						opcode = (sparse && first_load) ? spv::OpImageSparseRead : spv::OpImageRead;
					else
						opcode = (sparse && first_load) ? spv::OpImageSparseFetch : spv::OpImageFetch;

					Operation *loaded_op =
					    impl.allocate(opcode, (sparse && first_load) ? sparse_loaded_id_type : loaded_id_type);
					loaded_op->add_ids({ image_id, impl.build_offset(access.index_id, i) });

					add_vkmm_access_qualifiers(impl, loaded_op, meta.vkmm);
					impl.add(loaded_op, meta.rov);

					if (sparse && first_load)
					{
						auto *code_extract_op = impl.allocate(spv::OpCompositeExtract, extracted_id_type);
						code_extract_op->add_id(loaded_op->id);
						code_extract_op->add_literal(0);
						impl.add(code_extract_op);
						sparse_code_id = code_extract_op->id;

						Operation *extracted_op = impl.allocate(spv::OpCompositeExtract, extracted_id_type);
						extracted_op->add_id(loaded_op->id);
						extracted_op->add_literal(1);
						extracted_op->add_literal(0);
						impl.add(extracted_op);
						component_ids[i] = extracted_op->id;
					}
					else
					{
						Operation *extracted_op = impl.allocate(spv::OpCompositeExtract, extracted_id_type);
						extracted_op->add_id(loaded_op->id);
						extracted_op->add_literal(0);
						impl.add(extracted_op);
						component_ids[i] = extracted_op->id;
					}
					first_load = false;
				}
				else
					component_ids[i] = builder.createUndefined(builder.makeUintType(32));
			}

			if (sparse)
			{
				Operation *op = impl.allocate(spv::OpCompositeConstruct, instruction);

				if (need_cast)
				{
					for (unsigned i = 0; i < conservative_num_elements; i++)
					{
						auto *bitcast_op =
						    impl.allocate(spv::OpBitcast, impl.get_type_id(element_type));
						bitcast_op->add_id(component_ids[i]);
						impl.add(bitcast_op);
						component_ids[i] = bitcast_op->id;
					}
				}

				for (unsigned i = 0; i < conservative_num_elements; i++)
					op->add_id(component_ids[i]);
				for (unsigned i = conservative_num_elements; i < 4; i++)
					op->add_id(builder.createUndefined(impl.get_type_id(element_type)));
				op->add_id(sparse_code_id);
				impl.add(op);
			}
			else
			{
				constructed_id = impl.build_vector(extracted_id_type, component_ids, conservative_num_elements);
			}
		}

		if (!sparse)
		{
			if (need_cast)
			{
				spv::Id casted_id;

				if (type_is_16bit(element_type) &&
				    !impl.execution_mode_meta.native_16bit_operations &&
				    impl.options.min_precision_prefer_native_16bit)
				{
					if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
					{
						Operation *bitcast_op = impl.allocate(
							spv::OpBitcast,
							impl.get_type_id(DXIL::ComponentType::F32, 1, conservative_num_elements));
						bitcast_op->add_id(constructed_id);
						impl.add(bitcast_op);
						casted_id = bitcast_op->id;

						Operation *narrow_op = impl.allocate(
							spv::OpFConvert,
							impl.get_type_id(DXIL::ComponentType::F16, 1, conservative_num_elements));
						narrow_op->add_id(casted_id);
						impl.add(narrow_op);
						casted_id = narrow_op->id;
					}
					else
					{
						Operation *narrow_op = impl.allocate(
							spv::OpUConvert,
							impl.get_type_id(DXIL::ComponentType::U16, 1, conservative_num_elements));
						narrow_op->add_id(constructed_id);
						impl.add(narrow_op);
						casted_id = narrow_op->id;
					}
				}
				else
				{
					Operation *op = impl.allocate(
						spv::OpBitcast, impl.build_vector_type(impl.get_type_id(element_type), conservative_num_elements));
					op->add_id(constructed_id);
					impl.add(op);
					casted_id = op->id;
				}

				impl.rewrite_value(instruction, casted_id);
			}
			else
				impl.rewrite_value(instruction, constructed_id);
		}

		access_meta.forced_composite = false;
	}
	else
	{
		bool is_uav = builder.isStorageImageType(image_type_id);

		auto effective_component_type = Converter::Impl::get_effective_typed_resource_type(meta.component_type);
		spv::Id texel_type = impl.get_type_id(effective_component_type, 1, 4);
		spv::Id sample_type;

		if (sparse)
			sample_type = impl.get_struct_type({ builder.makeUintType(32), texel_type }, 0, "SparseTexel");
		else
			sample_type = texel_type;

		spv::Op opcode;
		if (is_uav)
			opcode = sparse ? spv::OpImageSparseRead : spv::OpImageRead;
		else
			opcode = sparse ? spv::OpImageSparseFetch : spv::OpImageFetch;

		Operation *op = impl.allocate(opcode, instruction, sample_type);

		if (!sparse)
			impl.decorate_relaxed_precision(get_composite_element_type(instruction->getType()), op->id, true);

		op->add_ids({ image_id, access.index_id });
		add_vkmm_access_qualifiers(impl, op, meta.vkmm);
		impl.add(op, meta.rov);

		if (sparse)
			impl.repack_sparse_feedback(meta.component_type, 4, instruction, target_type);
		else
			impl.fixup_load_type_typed(meta.component_type, 4, instruction, target_type);
	}

	return true;
}

static bool emit_physical_buffer_store_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                   const Converter::Impl::PhysicalPointerMeta &ptr_meta,
                                                   const ReferenceVkMemoryModel &vkmm,
                                                   uint32_t alignment = 0)
{
	auto &builder = impl.builder();

	uint32_t mask = 0;
	if (!get_constant_operand(instruction, 8, &mask))
		return false;

	if (alignment == 0 && !get_constant_operand(instruction, 9, &alignment))
		return false;

	unsigned vecsize = 0;
	if (mask == 1)
		vecsize = 1;
	else if (mask == 3)
		vecsize = 2;
	else if (mask == 7)
		vecsize = 3;
	else if (mask == 15)
		vecsize = 4;
	else
	{
		LOGE("Unexpected mask for RawBufferStore = %u.\n", mask);
		return false;
	}

	auto *element_type = instruction->getOperand(4)->getType();

	// If we can express this as a plain access chain, do so for clarity and ideally better perf.
	// If we cannot do it trivially, fallback to raw pointer arithmetic.
	spv::Id array_id = build_vectorized_physical_load_store_access(impl, instruction, vecsize, element_type);

	spv::Id physical_type_id;
	spv::Op value_cast_op;
	get_physical_load_store_cast_info(impl, element_type, physical_type_id, value_cast_op);

	spv::Id vec_type_id = physical_type_id;
	if (vecsize > 1)
		vec_type_id = builder.makeVectorType(physical_type_id, vecsize);

	auto scalar_alignment = get_type_scalar_alignment(impl, element_type);
	// Fixup broken DXIL.
	alignment = std::max<uint32_t>(alignment, scalar_alignment);

	auto tmp_ptr_meta = ptr_meta;
	tmp_ptr_meta.stride = array_id ? vecsize * scalar_alignment : 0;
	spv::Id ptr_type_id = impl.get_physical_pointer_block_type(vec_type_id, tmp_ptr_meta);

	if (tmp_ptr_meta.stride && (tmp_ptr_meta.stride & (tmp_ptr_meta.stride - 1)) == 0)
	{
		alignment = std::max<uint32_t>(alignment, tmp_ptr_meta.stride);
		alignment = std::min<uint32_t>(alignment, 16);
	}

	spv::Id u64_ptr_id;
	if (array_id)
		u64_ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	else
		u64_ptr_id = build_physical_pointer_address_for_raw_load_store(impl, instruction);

	emit_buffer_synchronization_validation(impl, instruction, BDAOperation::Store);

	auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id);
	ptr_bitcast_op->add_id(u64_ptr_id);
	impl.add(ptr_bitcast_op);

	auto *chain_op = impl.allocate(spv::OpInBoundsAccessChain,
								   builder.makePointer(spv::StorageClassPhysicalStorageBuffer, vec_type_id));
	chain_op->add_id(ptr_bitcast_op->id);
	chain_op->add_id(builder.makeUintConstant(0));
	if (array_id)
		chain_op->add_id(array_id);
	impl.add(chain_op);

	spv::Id elems[4] = {};
	for (unsigned i = 0; i < 4; i++)
	{
		impl.register_externally_visible_write(instruction->getOperand(4 + i));
		elems[i] = impl.get_id_for_value(instruction->getOperand(4 + i));
	}

	auto *store_op = impl.allocate(spv::OpStore);
	store_op->add_id(chain_op->id);

	spv::Id vec_id = impl.build_vector(physical_type_id, elems, vecsize);
	if (value_cast_op != spv::OpNop)
	{
		auto *op = impl.allocate(value_cast_op, vec_type_id);
		op->add_id(vec_id);
		impl.add(op);
		vec_id = op->id;
	}

	store_op->add_id(vec_id);
	store_op->add_literal(spv::MemoryAccessAlignedMask);
	store_op->add_literal(alignment);
	add_vkmm_access_qualifiers(impl, store_op, vkmm);

	impl.add(store_op, ptr_meta.rov);

	return true;
}

bool emit_raw_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (emit_ags_buffer_load(impl, instruction, DXIL::Op::RawBufferLoad))
		return true;
	if (emit_nvapi_buffer_load(impl, instruction, DXIL::Op::RawBufferLoad))
		return true;

	if (!impl.composite_is_accessed(instruction))
		return true;

	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	const auto &meta = impl.handle_to_resource_meta[ptr_id];

	if (meta.storage != spv::StorageClassPhysicalStorageBuffer)
	{
		auto *ret_component = get_composite_element_type(instruction->getType());
		if (ret_component->getTypeID() != llvm::Type::TypeID::FloatTyID &&
		    !(ret_component->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
		      ret_component->getIntegerBitWidth() == 32) &&
		    meta.storage != spv::StorageClassStorageBuffer)
		{
			LOGE("16 or 64-bit RawBufferLoad on descriptors is only supported for SSBOs.\n");
			return false;
		}

		// Ignore the mask. We'll read too much, but robustness should take care of any OOB.
		return emit_buffer_load_instruction(impl, instruction);
	}
	else
		return emit_physical_buffer_load_instruction(impl, instruction, meta.physical_pointer_meta, meta.vkmm);
}

static unsigned emit_buffer_store_values_bitcast(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                 spv::Id *store_values, unsigned write_mask,
                                                 RawWidth raw_width,
                                                 bool is_typed, bool ignore_bitcast)
{
	auto *element_type = instruction->getOperand(4)->getType();
	auto &builder = impl.builder();
	unsigned raw_vecsize = 0;

	for (unsigned i = 0; i < 4; i++)
	{
		if ((write_mask & (1u << i)) != 0)
		{
			impl.register_externally_visible_write(instruction->getOperand(4 + i));
			store_values[i] = impl.get_id_for_value(instruction->getOperand(4 + i));
			if (!is_typed)
			{
				// If we're storing to min16 types and we use native 16-bit in arithmetic,
				// we have to expand to 32-bit before storing :(
				// This will probably fall over with int vs uint, since we don't know how to sign-extend.
				if (!impl.execution_mode_meta.native_16bit_operations &&
				    impl.options.min_precision_prefer_native_16bit &&
				    type_is_16bit(element_type))
				{
					if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
					{
						Operation *op = impl.allocate(spv::OpFConvert, builder.makeFloatType(32));
						op->add_id(store_values[i]);
						store_values[i] = op->id;
						impl.add(op);

						if (!ignore_bitcast)
						{
							Operation *bitcast_op = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
							bitcast_op->add_id(store_values[i]);
							impl.add(bitcast_op);
							store_values[i] = bitcast_op->id;
						}
					}
					else
					{
						// SConvert or UConvert, who knows. :)
						Operation *op = impl.allocate(spv::OpUConvert, builder.makeUintType(32));
						op->add_id(store_values[i]);
						store_values[i] = op->id;
						impl.add(op);
					}
				}
				else if (!ignore_bitcast &&
				         element_type->getTypeID() != llvm::Type::TypeID::DoubleTyID &&
				         element_type->getTypeID() != llvm::Type::TypeID::IntegerTyID)
				{
					Operation *op = impl.allocate(spv::OpBitcast, builder.makeUintType(raw_width_to_bits(raw_width)));
					op->add_id(store_values[i]);
					store_values[i] = op->id;
					impl.add(op);
				}
			}

			raw_vecsize = i + 1;
		}
	}

	return raw_vecsize;
}

bool emit_buffer_store_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));

	if (emit_ags_buffer_store(impl, instruction, image_id))
		return true;
	if (emit_nvapi_buffer_store(impl, instruction, image_id))
		return true;

	const auto &meta = impl.handle_to_resource_meta[image_id];

	if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
	{
		// We don't more about alignment in SM 5.1 BufferStore.
		// We know the type must be 32-bit however ...
		// Might be possible to do some fancy analysis to deduce a better alignment.
		return emit_physical_buffer_store_instruction(impl, instruction, meta.physical_pointer_meta, meta.vkmm, 4);
	}

	emit_buffer_synchronization_validation(impl, instruction, BDAOperation::Store);

	auto *element_type = instruction->getOperand(4)->getType();

	// SSBO operations with min16* types are actually 32-bit.
	// We only get native 16-bit load-store with native_16bit_operations.
	bool is_typed = meta.kind == DXIL::ResourceKind::TypedBuffer;
	unsigned mask = llvm::cast<llvm::ConstantInt>(instruction->getOperand(8))->getUniqueInteger().getZExtValue();
	auto width = get_buffer_access_bits_per_component(impl, meta.storage, element_type);

	bool raw_access_chain = buffer_access_is_raw_access_chain(impl, meta);
	spv::Id store_values[4] = {};

	if (raw_access_chain)
	{
		auto raw_vecsize = emit_buffer_store_values_bitcast(impl, instruction, store_values, mask, width, false, true);
		auto raw = emit_raw_access_chain(impl, meta, instruction, element_type, raw_vecsize);
		spv::Id vector_value_id = impl.build_vector(raw.component_type_id, store_values, raw_vecsize);

		auto *store_op = impl.allocate(spv::OpStore);
		store_op->add_id(raw.ptr_id);
		store_op->add_id(vector_value_id);
		store_op->add_literal(spv::MemoryAccessAlignedMask);
		store_op->add_literal(raw.alignment);
		add_vkmm_access_qualifiers(impl, store_op, meta.vkmm);

		impl.add(store_op);
		return true;
	}

	auto access = build_buffer_access(impl, instruction, 0, meta.index_offset_id,
	                                  instruction->getOperand(4)->getType(),
	                                  meta.storage != spv::StorageClassUniformConstant ? mask : 1u);

	RawType raw_type = element_type->getTypeID() == llvm::Type::TypeID::DoubleTyID ?
	                   RawType::Float : RawType::Integer;

	image_id = get_buffer_alias_handle(impl, meta, image_id, raw_type, width, access.raw_vec_size);
	bool vectorized_store = access.raw_vec_size != RawVecSize::V1;

	// We could hoist the call to emit_buffer_store_values_bitcast,
	// but causes too much churn on shader deltas.
	emit_buffer_store_values_bitcast(impl, instruction, store_values, mask, width, is_typed, false);

	if (is_typed)
	{
		spv::Id element_type_id = impl.get_type_id(instruction->getOperand(4)->getType());

		// Deal with signed resource store.
		Operation *op = impl.allocate(spv::OpImageWrite);
		op->add_ids(
		    { image_id, access.index_id,
		      impl.fixup_store_type_typed(meta.component_type, 4, impl.build_vector(element_type_id, store_values, 4)) });

		add_vkmm_access_qualifiers(impl, op, meta.vkmm);
		impl.add(op, meta.rov);
	}
	else if (meta.storage == spv::StorageClassStorageBuffer)
	{
		if (vectorized_store)
		{
			spv::Id elem_type_id = raw_type == RawType::Integer ?
			                       builder.makeUintType(raw_width_to_bits(width)) :
			                       builder.makeFloatType(raw_width_to_bits(width));

			unsigned vecsize = raw_vecsize_to_vecsize(access.raw_vec_size);
			spv::Id vec_type_id = builder.makeVectorType(elem_type_id, vecsize);
			spv::Id vector_value_id = impl.build_vector(elem_type_id, store_values, vecsize);
			Operation *chain_op = impl.allocate(
				spv::OpAccessChain, builder.makePointer(spv::StorageClassStorageBuffer, vec_type_id));

			chain_op->add_id(image_id);
			chain_op->add_id(builder.makeUintConstant(0));
			chain_op->add_id(access.index_id);
			impl.add(chain_op);

			if (meta.non_uniform)
				builder.addDecoration(chain_op->id, spv::DecorationNonUniform);

			Operation *store_op = impl.allocate(spv::OpStore);
			store_op->add_id(chain_op->id);
			store_op->add_id(vector_value_id);
			add_vkmm_access_qualifiers(impl, store_op, meta.vkmm);

			impl.add(store_op, meta.rov);
		}
		else
		{
			for (unsigned i = 0; i < 4; i++)
			{
				if (mask & (1u << i))
				{
					spv::Id elem_type_id = raw_type == RawType::Integer ?
					                       builder.makeUintType(raw_width_to_bits(width)) :
					                       builder.makeFloatType(raw_width_to_bits(width));

					Operation *chain_op = impl.allocate(
					    spv::OpAccessChain, builder.makePointer(spv::StorageClassStorageBuffer, elem_type_id));
					chain_op->add_id(image_id);
					chain_op->add_id(builder.makeUintConstant(0));
					chain_op->add_id(impl.build_offset(access.index_id, i));
					impl.add(chain_op);

					if (meta.non_uniform)
						builder.addDecoration(chain_op->id, spv::DecorationNonUniform);

					Operation *store_op = impl.allocate(spv::OpStore);
					store_op->add_id(chain_op->id);
					store_op->add_id(store_values[i]);

					add_vkmm_access_qualifiers(impl, store_op, meta.vkmm);
					impl.add(store_op, meta.rov);
				}
			}
		}
	}
	else
	{
		spv::Id splat_type_id = builder.makeVectorType(builder.makeUintType(32), 4);
		for (unsigned i = 0; i < 4; i++)
		{
			if (mask & (1u << i))
			{
				Operation *splat_op = impl.allocate(spv::OpCompositeConstruct, splat_type_id);
				splat_op->add_ids({ store_values[i], store_values[i], store_values[i], store_values[i] });
				impl.add(splat_op);

				Operation *op = impl.allocate(spv::OpImageWrite);
				op->add_ids({
				    image_id,
				    impl.build_offset(access.index_id, i),
				    splat_op->id,
				});

				add_vkmm_access_qualifiers(impl, op, meta.vkmm);
				impl.add(op, meta.rov);
			}
		}
	}

	if (is_typed)
		builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat);

	return true;
}

bool emit_raw_buffer_store_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));

	if (emit_ags_buffer_store(impl, instruction, ptr_id))
		return true;
	if (emit_nvapi_buffer_store(impl, instruction, ptr_id))
		return true;

	const auto &meta = impl.handle_to_resource_meta[ptr_id];

	if (meta.storage != spv::StorageClassPhysicalStorageBuffer)
	{
		auto *store_type = instruction->getOperand(4)->getType();
		if (store_type->getTypeID() != llvm::Type::TypeID::FloatTyID &&
		    !(store_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && store_type->getIntegerBitWidth() == 32) &&
		    meta.storage != spv::StorageClassStorageBuffer)
		{
			LOGE("16 or 64-bit RawBufferStore on descriptors is only supported for SSBOs.\n");
			return false;
		}

		return emit_buffer_store_instruction(impl, instruction);
	}
	else
		return emit_physical_buffer_store_instruction(impl, instruction, meta.physical_pointer_meta, meta.vkmm);
}

spv::Id emit_atomic_access_chain(Converter::Impl &impl,
                                 const Converter::Impl::ResourceMeta &meta,
                                 RawWidth width, spv::Id image_id, spv::Id coord,
                                 DXIL::ComponentType &component_type)
{
	auto &builder = impl.builder();
	Operation *counter_ptr_op = nullptr;
	component_type = raw_width_to_component_type(RawType::Integer, width);
	spv::Id var_id = get_buffer_alias_handle(impl, meta, meta.var_id, RawType::Integer, width, RawVecSize::V1);
	if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
	{
		spv::Id uint_type = builder.makeUintType(raw_width_to_bits(width));
		auto physical_pointer_meta = meta.physical_pointer_meta;
		physical_pointer_meta.stride = raw_width_to_bits(width) / 8;
		spv::Id ptr_type_id =
		    impl.get_physical_pointer_block_type(uint_type, physical_pointer_meta);

		auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id);
		ptr_bitcast_op->add_id(image_id);
		impl.add(ptr_bitcast_op);

		counter_ptr_op = impl.allocate(spv::OpAccessChain,
		                               builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type));
		counter_ptr_op->add_ids({ ptr_bitcast_op->id, builder.makeUintConstant(0), coord });
	}
	else if (meta.storage == spv::StorageClassStorageBuffer)
	{
		counter_ptr_op =
		    impl.allocate(spv::OpAccessChain,
		                  builder.makePointer(spv::StorageClassStorageBuffer,
		                                      builder.makeUintType(raw_width_to_bits(width))));
		counter_ptr_op->add_ids({ var_id, builder.makeUintConstant(0), coord });
	}
	else
	{
		counter_ptr_op =
		    impl.allocate(spv::OpImageTexelPointer,
		                  builder.makePointer(spv::StorageClassImage, impl.get_type_id(meta.component_type, 1, 1)));
		counter_ptr_op->add_ids({ var_id, coord, builder.makeUintConstant(0) });
		component_type = meta.component_type;
	}
	impl.add(counter_ptr_op);

	return counter_ptr_op->id;
}

bool emit_atomic_binop_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	const auto &meta = impl.handle_to_resource_meta[image_id];
	auto binop = static_cast<DXIL::AtomicBinOp>(
	    llvm::cast<llvm::ConstantInt>(instruction->getOperand(2))->getUniqueInteger().getZExtValue());

	emit_buffer_synchronization_validation(impl, instruction, BDAOperation::AtomicRMW);

	spv::Id coords[3] = {};
	uint32_t num_coords_full = 0, num_coords = 0;

	if (meta.kind == DXIL::ResourceKind::StructuredBuffer || meta.kind == DXIL::ResourceKind::RawBuffer ||
	    meta.kind == DXIL::ResourceKind::TypedBuffer)
	{
		auto access = build_buffer_access(impl, instruction, 1, meta.index_offset_id, instruction->getType(), 1);
		coords[0] = access.index_id;
		num_coords = 1;
		num_coords_full = 1;
	}
	else
	{
		if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
			return false;

		if (num_coords_full > 3)
			return false;

		for (uint32_t i = 0; i < num_coords_full; i++)
			coords[i] = impl.get_id_for_value(instruction->getOperand(3 + i));
	}
	spv::Id coord = impl.build_vector(builder.makeUintType(32), coords, num_coords_full);

	auto width = get_buffer_access_bits_per_component(impl, meta.storage, instruction->getType());
	if (width == RawWidth::B64)
		builder.addCapability(spv::CapabilityInt64Atomics);

	DXIL::ComponentType component_type;
	spv::Id counter_ptr_id = emit_atomic_access_chain(impl, meta, width, image_id, coord, component_type);

	if (meta.non_uniform)
		builder.addDecoration(counter_ptr_id, spv::DecorationNonUniformEXT);

	spv::Op opcode;

	switch (binop)
	{
	case DXIL::AtomicBinOp::Exchange:
		opcode = spv::OpAtomicExchange;
		break;

	case DXIL::AtomicBinOp::IAdd:
		opcode = spv::OpAtomicIAdd;
		break;

	case DXIL::AtomicBinOp::And:
		opcode = spv::OpAtomicAnd;
		break;

	case DXIL::AtomicBinOp::Or:
		opcode = spv::OpAtomicOr;
		break;

	case DXIL::AtomicBinOp::Xor:
		opcode = spv::OpAtomicXor;
		break;

	case DXIL::AtomicBinOp::IMin:
		opcode = spv::OpAtomicSMin;
		break;

	case DXIL::AtomicBinOp::IMax:
		opcode = spv::OpAtomicSMax;
		break;

	case DXIL::AtomicBinOp::UMin:
		opcode = spv::OpAtomicUMin;
		break;

	case DXIL::AtomicBinOp::UMax:
		opcode = spv::OpAtomicUMax;
		break;

	// Internal extensions.
	case DXIL::AtomicBinOp::Load:
		opcode = spv::OpAtomicLoad;
		break;

	case DXIL::AtomicBinOp::Store:
		opcode = spv::OpAtomicStore;
		break;

	case DXIL::AtomicBinOp::Sub:
		opcode = spv::OpAtomicISub;
		break;

	default:
		return false;
	}

	Operation *op;

	if (opcode != spv::OpAtomicStore)
		op = impl.allocate(opcode, instruction, impl.get_type_id(component_type, 1, 1));
	else
		op = impl.allocate(opcode);

	op->add_id(counter_ptr_id);
	op->add_id(builder.getAtomicDeviceScopeId());
	op->add_id(builder.makeUintConstant(0));

	if (opcode != spv::OpAtomicLoad)
		op->add_id(impl.fixup_store_type_atomic(component_type, 1, impl.get_id_for_value(instruction->getOperand(6))));

	impl.add(op, meta.rov);

	impl.fixup_load_type_atomic(component_type, 1, instruction);
	return true;
}

bool emit_atomic_cmpxchg_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));

	if (image_id == impl.ags.magic_ptr_id)
		return emit_magic_ags_instruction(impl, instruction);

	const auto &meta = impl.handle_to_resource_meta[image_id];

	emit_buffer_synchronization_validation(impl, instruction, BDAOperation::AtomicRMW);

	spv::Id coords[3] = {};
	uint32_t num_coords_full = 0, num_coords = 0;

	if (meta.kind == DXIL::ResourceKind::StructuredBuffer ||
	    meta.kind == DXIL::ResourceKind::RawBuffer ||
	    meta.kind == DXIL::ResourceKind::TypedBuffer)
	{
		auto access = build_buffer_access(impl, instruction, 0, meta.index_offset_id, instruction->getType(), 1);
		coords[0] = access.index_id;
		num_coords = 1;
		num_coords_full = 1;
	}
	else
	{
		if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
			return false;

		if (num_coords_full > 3)
			return false;

		for (uint32_t i = 0; i < num_coords_full; i++)
			coords[i] = impl.get_id_for_value(instruction->getOperand(2 + i));
	}

	spv::Id coord = impl.build_vector(builder.makeUintType(32), coords, num_coords_full);

	auto width = get_buffer_access_bits_per_component(impl, meta.storage, instruction->getType());
	if (width == RawWidth::B64)
		builder.addCapability(spv::CapabilityInt64Atomics);

	DXIL::ComponentType component_type;
	spv::Id counter_ptr_id = emit_atomic_access_chain(impl, meta, width, image_id, coord, component_type);

	if (meta.non_uniform)
		builder.addDecoration(counter_ptr_id, spv::DecorationNonUniformEXT);

	Operation *op =
	    impl.allocate(spv::OpAtomicCompareExchange, instruction, impl.get_type_id(component_type, 1, 1));

	spv::Id comparison_id = impl.get_id_for_value(instruction->getOperand(5));
	spv::Id new_value_id = impl.get_id_for_value(instruction->getOperand(6));
	comparison_id = impl.fixup_store_type_atomic(component_type, 1, comparison_id);
	new_value_id = impl.fixup_store_type_atomic(component_type, 1, new_value_id);

	op->add_id(counter_ptr_id);
	op->add_id(builder.getAtomicDeviceScopeId());
	op->add_id(builder.makeUintConstant(0));
	op->add_id(builder.makeUintConstant(0));
	op->add_id(new_value_id);
	op->add_id(comparison_id);
	impl.add(op, meta.rov);
	impl.fixup_load_type_atomic(component_type, 1, instruction);
	return true;
}

bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));

	if (emit_nvapi_buffer_update_counter(impl, instruction, image_id))
		return true;

	const auto &meta = impl.handle_to_resource_meta[image_id];
	int direction = llvm::cast<llvm::ConstantInt>(instruction->getOperand(2))->getUniqueInteger().getSExtValue();

	if (meta.counter_storage == spv::StorageClassPhysicalStorageBuffer)
	{
		spv::Id func_id = impl.spirv_module.get_helper_call_id(HelperCall::RobustAtomicCounter);
		auto *op = impl.allocate(spv::OpFunctionCall, instruction);
		op->add_id(func_id);
		op->add_id(meta.counter_var_id);
		op->add_id(builder.makeUintConstant(direction));
		op->add_id(builder.makeUintConstant(direction < 0 ? -1u : 0u));
		impl.add(op, meta.rov);
	}
	else
	{
		spv::Id ptr_id;

		if (meta.counter_storage == spv::StorageClassUniformConstant)
		{
			auto *counter_ptr_op = impl.allocate(spv::OpImageTexelPointer,
			                                     builder.makePointer(spv::StorageClassImage, builder.makeUintType(32)));

			counter_ptr_op->add_id(meta.counter_var_id);
			counter_ptr_op->add_id(builder.makeUintConstant(0));
			counter_ptr_op->add_id(builder.makeUintConstant(0));

			if (meta.non_uniform)
				builder.addDecoration(counter_ptr_op->id, spv::DecorationNonUniformEXT);
			impl.add(counter_ptr_op);
			ptr_id = counter_ptr_op->id;
		}
		else
		{
			ptr_id = meta.counter_var_id;
		}

		auto *op = impl.allocate(spv::OpAtomicIAdd, instruction);

		op->add_id(ptr_id);
		op->add_id(builder.getAtomicDeviceScopeId());
		op->add_id(builder.makeUintConstant(0));
		op->add_id(builder.makeUintConstant(direction));
		impl.add(op, meta.rov);

		spv::Id result_id = op->id;

		if (direction < 0)
		{
			op = impl.allocate(spv::OpISub, builder.makeUintType(32));
			op->add_ids({ result_id, builder.makeUintConstant(1) });
			impl.add(op);
			impl.rewrite_value(instruction, op->id);
		}
	}

	return true;
}

} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_buffer.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"
#include "opcodes/converter_impl.hpp"

namespace dxil_spv
{
struct BufferAccessInfo
{
	spv::Id index_id;
	RawVecSize raw_vec_size;
};

bool emit_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_raw_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_buffer_store_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_raw_buffer_store_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_atomic_binop_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_atomic_cmpxchg_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

unsigned raw_buffer_data_type_to_addr_shift_log2(Converter::Impl &impl, const llvm::Type *data_type);

bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Type *type,
                                           const llvm::Value *byte_offset, unsigned vecsize);

bool raw_access_structured_can_vectorize(Converter::Impl &impl, const llvm::Type *type,
                                         const llvm::Value *index,
                                         unsigned stride,
                                         const llvm::Value *byte_offset,
                                         unsigned vecsize);

RawVecSize raw_access_byte_address_vectorize(Converter::Impl &impl, const llvm::Type *type,
                                             const llvm::Value *byte_offset, uint32_t mask);

RawVecSize raw_access_structured_vectorize(Converter::Impl &impl, const llvm::Type *type,
                                           const llvm::Value *index,
                                           unsigned stride,
                                           const llvm::Value *byte_offset,
                                           uint32_t mask);

void emit_buffer_synchronization_validation(Converter::Impl &impl, const llvm::CallInst *instruction, BDAOperation bda_operation);

spv::Id emit_atomic_access_chain(Converter::Impl &impl,
                                 const Converter::Impl::ResourceMeta &meta,
                                 RawWidth width, spv::Id image_id, spv::Id coord,
                                 DXIL::ComponentType &component_type);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_common.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_common.hpp"
#include "logging.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"

namespace dxil_spv
{
bool get_constant_operand(const llvm::Instruction *value, unsigned index, uint32_t *operand)
{
	if (index >= value->getNumOperands())
	{
		LOGE("Operand index out of range.\n");
		return false;
	}

	auto *constant = llvm::dyn_cast<llvm::ConstantInt>(value->getOperand(index));
	if (!constant)
		return false;

	*operand = uint32_t(constant->getUniqueInteger().getZExtValue());
	return true;
}

spv::Id emit_u32x2_u32_add(Converter::Impl &impl, spv::Id u32x2_value, spv::Id u32_value)
{
	auto &builder = impl.builder();
	spv::Id uint_type = builder.makeUintType(32);

	auto *base_addr_lo = impl.allocate(spv::OpCompositeExtract, uint_type);
	base_addr_lo->add_id(u32x2_value);
	base_addr_lo->add_literal(0);
	impl.add(base_addr_lo);

	auto *base_addr_hi = impl.allocate(spv::OpCompositeExtract, uint_type);
	base_addr_hi->add_id(u32x2_value);
	base_addr_hi->add_literal(1);
	impl.add(base_addr_hi);

	auto *add_op = impl.allocate(spv::OpIAddCarry, impl.get_struct_type({ uint_type, uint_type }, 0, "AddCarry"));
	add_op->add_ids({ base_addr_lo->id, u32_value });
	impl.add(add_op);

	auto *lo_op = impl.allocate(spv::OpCompositeExtract, uint_type);
	lo_op->add_id(add_op->id);
	lo_op->add_literal(0);
	impl.add(lo_op);

	auto *carry_op = impl.allocate(spv::OpCompositeExtract, uint_type);
	carry_op->add_id(add_op->id);
	carry_op->add_literal(1);
	impl.add(carry_op);

	auto *hi_op = impl.allocate(spv::OpIAdd, uint_type);
	hi_op->add_id(base_addr_hi->id);
	hi_op->add_id(carry_op->id);
	impl.add(hi_op);

	spv::Id addr_elems[2] = { lo_op->id, hi_op->id };
	spv::Id addr_vec = impl.build_vector(uint_type, addr_elems, 2);
	return addr_vec;
}

unsigned get_type_scalar_alignment(Converter::Impl &impl, const llvm::Type *type)
{
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
		type = vec->getElementType();

	unsigned scalar_alignment;
	switch (type->getTypeID())
	{
	case llvm::Type::TypeID::IntegerTyID:
		scalar_alignment = type->getIntegerBitWidth() / 8;
		break;
	case llvm::Type::TypeID::HalfTyID:
		scalar_alignment = 2;
		break;
	case llvm::Type::TypeID::FloatTyID:
		scalar_alignment = 4;
		break;
	case llvm::Type::TypeID::DoubleTyID:
		scalar_alignment = 8;
		break;
	default:
		LOGE("Invalid type for scalar alignment query.\n");
		return 1;
	}

	if (!impl.execution_mode_meta.native_16bit_operations && scalar_alignment == 2)
		scalar_alignment = 4;

	return scalar_alignment;
}

spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta,
                                spv::Id default_id, RawType type, RawWidth width, RawVecSize vecsize)
{
	for (auto &alias : meta.var_alias_group)
	{
		if (alias.declaration.type == type &&
		    alias.declaration.width == width &&
		    alias.declaration.vecsize == vecsize)
		{
			default_id = alias.var_id;
			break;
		}
	}

	return default_id;
}

bool type_is_16bit(const llvm::Type *data_type)
{
	return data_type->getTypeID() == llvm::Type::TypeID::HalfTyID ||
	       (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
	        data_type->getIntegerBitWidth() == 16);
}

bool type_is_64bit(const llvm::Type *data_type)
{
	return data_type->getTypeID() == llvm::Type::TypeID::DoubleTyID ||
	       (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
	        data_type->getIntegerBitWidth() == 64);
}

void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type,
                                       spv::Id &physical_type_id, spv::Op &value_cast_op)
{
	bool using_native_16bit_arith =
		type_is_16bit(element_type) &&
		(element_type->getTypeID() == llvm::Type::TypeID::IntegerTyID || impl.support_native_fp16_operations());

	if (!impl.execution_mode_meta.native_16bit_operations && using_native_16bit_arith)
	{
		if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
		{
			physical_type_id = impl.get_type_id(DXIL::ComponentType::F32, 1, 1);
			value_cast_op = spv::OpFConvert;
		}
		else
		{
			physical_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, 1);
			value_cast_op = spv::OpUConvert;
		}
	}
	else
	{
		physical_type_id = impl.get_type_id(element_type);
		value_cast_op = spv::OpNop;
	}
}

static spv::Id build_index_divider_fallback(Converter::Impl &impl, const llvm::Value *offset, unsigned addr_shift_log2)
{
	auto &builder = impl.builder();
	Operation *op = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
	op->add_id(impl.get_id_for_value(offset));
	op->add_id(builder.makeUintConstant(addr_shift_log2));
	impl.add(op);
	return op->id;
}

bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride,
                                     uint32_t addr_shift_log2, unsigned vecsize,
                                     RawBufferAccessSplit &split)
{
	unsigned element_size = (1u << addr_shift_log2) * vecsize;

	// Base case first, a constant value.
	if (const auto *const_addr = llvm::dyn_cast<llvm::ConstantInt>(index))
	{
		int64_t constant_offset = const_addr->getUniqueInteger().getSExtValue();
		constant_offset *= stride;

		// Always pass scalar constant dividers through.
		// Building a fallback divider helps nothing.
		if (vecsize == 1 || constant_offset % int(element_size) == 0)
		{
			split = {};
			split.bias = constant_offset / element_size;
			return true;
		}
		else
			return false;
	}

	const llvm::ConstantInt *scale = nullptr;
	const llvm::ConstantInt *bias = nullptr;
	bool scale_log2 = false;
	bool bias_is_add = false;
	bool bias_negate = false;

	while (!scale && llvm::isa<llvm::BinaryOperator>(index))
	{
		auto *binop = llvm::cast<llvm::BinaryOperator>(index);
		auto *lhs = binop->getOperand(0);
		auto *rhs = binop->getOperand(1);
		if (!bias && (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add ||
		              binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub ||
		              binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Or ||
		              binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Xor))
		{
			if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs))
			{
				bias = const_lhs;
				index = rhs;
			}
			else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
			{
				bias = const_rhs;
				index = lhs;
			}
			else
				break;

			// DXC tends to be emit shift + or in some cases.
			// We can turn this back into mul + add in most cases.
			bias_negate = binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub;
			bias_is_add =
					binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add ||
					bias_negate;
		}
		else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Shl)
		{
			if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
			{
				scale = const_rhs;
				index = lhs;
			}
			else
				break;

			scale_log2 = true;
		}
		else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Mul)
		{
			if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs))
			{
				scale = const_lhs;
				index = rhs;
			}
			else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
			{
				scale = const_rhs;
				index = lhs;
			}
			else
				break;

			scale_log2 = false;
		}
		else
			break;
	}

	if (!scale && !bias)
	{
		// We cannot split anything, but we might be able to vectorize if the stride alone carries us.
		if (stride % element_size == 0)
		{
			split = {};
			split.scale = stride / element_size;
			split.dynamic_index = index;
			return true;
		}
		else
			return false;
	}

	uint64_t scale_factor = 1;
	if (scale)
		scale_factor = scale->getUniqueInteger().getZExtValue();
	if (scale_log2)
		scale_factor = 1ull << scale_factor;

	int64_t bias_factor = 0;
	if (bias)
		bias_factor = bias->getUniqueInteger().getSExtValue();
	if (bias_negate)
		bias_factor = -bias_factor;

	// If there is no bit overlap between scale_factor and bias_factor
	// then the bitwise OR is equivalent to add.
	if (!bias_is_add && (scale_factor & bias_factor) != 0)
		return false;

	scale_factor *= stride;
	bias_factor *= stride;

	if (scale_factor % element_size == 0 && bias_factor % element_size == 0 && index)
	{
		split.scale = scale_factor / element_size;
		split.bias = bias_factor / int(element_size);
		split.dynamic_index = index;
		return true;
	}
	else
		return false;
}

spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset,
                            unsigned addr_shift_log2, unsigned vecsize)
{
	auto &builder = impl.builder();
	// Attempt to do trivial constant folding to make output a little more sensible to read.
	// Try to find an expression for offset which is "constant0 * offset + constant1",
	// where constant0 and constant1 are aligned with addr_shift_log2.

	spv::Id index_id;
	RawBufferAccessSplit split = {};

	if (extract_raw_buffer_access_split(offset, 1, addr_shift_log2, vecsize, split))
	{
		if (!split.dynamic_index)
			return builder.makeUintConstant(split.bias);

		spv::Op bias_opcode = split.bias > 0 ? spv::OpIAdd : spv::OpISub;
		if (bias_opcode == spv::OpISub)
			split.bias = -split.bias;

		spv::Id scaled_id;
		if (split.scale != 1)
		{
			Operation *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			scale_op->add_id(impl.get_id_for_value(split.dynamic_index));
			scale_op->add_id(builder.makeUintConstant(split.scale));
			impl.add(scale_op);
			scaled_id = scale_op->id;
		}
		else
			scaled_id = impl.get_id_for_value(split.dynamic_index);

		spv::Id bias_id;
		if (split.bias != 0)
		{
			Operation *bias_op = impl.allocate(bias_opcode, builder.makeUintType(32));
			bias_op->add_id(scaled_id);
			bias_op->add_id(builder.makeUintConstant(split.bias));
			impl.add(bias_op);
			bias_id = bias_op->id;
		}
		else
			bias_id = scaled_id;

		index_id = bias_id;
	}
	else
	{
		assert(vecsize == 1);
		index_id = build_index_divider_fallback(impl, offset, addr_shift_log2);
	}

	return index_id;
}

spv::Id build_load_invocation_id(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId);

	Operation *op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	op->add_id(var_id);

	impl.add(op);
	return op->id;
}

spv::Id get_clip_cull_distance_access_chain(Converter::Impl &impl, const llvm::CallInst *instruction,
                                            const Converter::Impl::ClipCullMeta &meta, spv::StorageClass storage)
{
	auto &builder = impl.builder();
	uint32_t var_id = storage == spv::StorageClassOutput ? impl.spirv_module.get_builtin_shader_output(meta.builtin) :
	                  impl.spirv_module.get_builtin_shader_input(meta.builtin);

	Operation *op = impl.allocate(spv::OpAccessChain, builder.makePointer(storage, builder.makeFloatType(32)));
	op->add_id(var_id);

	if (instruction->getNumOperands() >= 6 &&
	    storage == spv::StorageClassOutput &&
	    impl.execution_model == spv::ExecutionModelMeshEXT)
	{
		// Mesh shaders, need to index into per-vertex array.
		op->add_id(impl.get_id_for_value(instruction->getOperand(5)));
	}

	if (storage == spv::StorageClassInput)
	{
		if (impl.execution_model == spv::ExecutionModelGeometry ||
		    impl.execution_model == spv::ExecutionModelTessellationControl ||
		    impl.execution_model == spv::ExecutionModelTessellationEvaluation)
		{
			op->add_id(impl.get_id_for_value(instruction->getOperand(4)));
		}
	}
	else if (storage == spv::StorageClassOutput && impl.execution_model == spv::ExecutionModelTessellationControl)
	{
		op->add_id(build_load_invocation_id(impl));
	}

	auto *row = instruction->getOperand(2);
	auto *row_const = llvm::dyn_cast<llvm::ConstantInt>(row);

	uint32_t constant_col;
	if (!get_constant_operand(instruction, 3, &constant_col))
		return false;

	unsigned stride = meta.row_stride;

	if (row_const)
	{
		op->add_id(builder.makeUintConstant(row_const->getUniqueInteger().getZExtValue() * stride + constant_col +
		                                    meta.offset));
	}
	else if (stride == 1 && meta.offset == 0)
	{
		// Simple case, can just index directly into ClipDistance array.
		op->add_id(impl.get_id_for_value(row));
	}
	else if (stride == 1)
	{
		Operation *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_op->add_id(impl.get_id_for_value(row));
		add_op->add_id(builder.makeUintConstant(meta.offset));
		impl.add(add_op);

		op->add_id(add_op->id);
	}
	else
	{
		// A more annoying case, flatten 2D to 1D.
		Operation *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		mul_op->add_id(impl.get_id_for_value(row));
		mul_op->add_id(builder.makeUintConstant(stride));
		impl.add(mul_op);

		Operation *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_op->add_id(mul_op->id);
		add_op->add_id(builder.makeUintConstant(constant_col + meta.offset));
		impl.add(add_op);

		op->add_id(add_op->id);
	}

	impl.add(op);
	return op->id;
}

bool emit_store_clip_cull_distance(Converter::Impl &impl, const llvm::CallInst *instruction,
                                   const Converter::Impl::ClipCullMeta &meta)
{
	spv::Id ptr_id = get_clip_cull_distance_access_chain(impl, instruction, meta, spv::StorageClassOutput);

	impl.register_externally_visible_write(instruction->getOperand(4));
	spv::Id store_value = impl.get_id_for_value(instruction->getOperand(4));
	Operation *store_op = impl.allocate(spv::OpStore);
	store_op->add_ids({ ptr_id, store_value });
	impl.add(store_op);
	return true;
}

Converter::Impl::ClipCullMeta *output_clip_cull_distance_meta(Converter::Impl &impl, unsigned index)
{
	auto itr = impl.output_clip_cull_meta.find(index);
	if (itr != impl.output_clip_cull_meta.end())
		return &itr->second;
	else
		return nullptr;
}

bool value_is_dx_op_instrinsic(const llvm::Value *value, DXIL::Op op)
{
	auto *call = llvm::dyn_cast<llvm::CallInst>(value);
	if (!call)
		return false;

	auto *func = call->getCalledFunction();
	if (strncmp(func->getName().data(), "dx.op", 5) != 0)
		return false;

	// The opcode is encoded as a constant integer.
	uint32_t opcode;
	if (!get_constant_operand(call, 0, &opcode))
		return false;

	return op == DXIL::Op(opcode);
}

void emit_expect_assume_quad_uniform(Converter::Impl &impl)
{
	if (impl.options.instruction_instrumentation.enabled &&
	    impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume &&
	    !impl.memoized.current_quad_uniform_checked)
	{
		spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::IsQuadUniformControlFlow);
		auto *call = impl.allocate(spv::OpFunctionCall, impl.builder().makeBoolType());
		call->add_id(call_id);
		impl.add(call);

		auto *assert_uniform = impl.allocate(spv::OpAssumeTrueKHR);
		assert_uniform->add_id(call->id);
		impl.add(assert_uniform);

		impl.memoized.current_quad_uniform_checked = true;
	}
}

SplitScaleBias split_index_scale_bias(const llvm::Value *value)
{
	const auto *cint = llvm::dyn_cast<llvm::ConstantInt>(value);
	// Return UINT32_MAX as stride since we cannot reasonably know yet.
	if (cint)
		return { UINT32_MAX, nullptr, uint32_t(cint->getUniqueInteger().getZExtValue()) };

	const auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(value);
	if (!binop)
		return {};

	if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add)
	{
		const auto *const0 = llvm::dyn_cast<llvm::ConstantInt>(binop->getOperand(0));
		const auto *const1 = llvm::dyn_cast<llvm::ConstantInt>(binop->getOperand(1));
		if (const0 && !const1)
		{
			auto split = split_index_scale_bias(binop->getOperand(1));
			split.elem += const0->getUniqueInteger().getZExtValue();
			return split;
		}
		else if (!const0 && const1)
		{
			auto split = split_index_scale_bias(binop->getOperand(0));
			split.elem += const1->getUniqueInteger().getZExtValue();
			return split;
		}
	}
	else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Mul)
	{
		const auto *const0 = llvm::dyn_cast<llvm::ConstantInt>(binop->getOperand(0));
		const auto *const1 = llvm::dyn_cast<llvm::ConstantInt>(binop->getOperand(1));
		if (const0 && !const1)
			return { uint32_t(const0->getUniqueInteger().getZExtValue()), binop->getOperand(1), 0 };
		else if (!const0 && const1)
			return { uint32_t(const1->getUniqueInteger().getZExtValue()), binop->getOperand(0), 0 };
	}

	return {};
}

void add_vkmm_access_qualifiers(Converter::Impl &impl, Operation *op, const ReferenceVkMemoryModel &vkmm)
{
	if (impl.execution_mode_meta.memory_model != spv::MemoryModelVulkan)
		return;

	uint32_t mm_argument_index = 0;
	uint32_t mm_flags = 0;

	switch (op->op)
	{
	case spv::OpLoad:
		if (vkmm.non_private)
			mm_flags |= spv::MemoryAccessNonPrivatePointerMask;
		if (vkmm.auto_visibility)
			mm_flags |= spv::MemoryAccessMakePointerVisibleMask;
		mm_argument_index = 1;
		break;

	case spv::OpStore:
		if (vkmm.non_private)
			mm_flags |= spv::MemoryAccessNonPrivatePointerMask;
		if (vkmm.auto_visibility)
			mm_flags |= spv::MemoryAccessMakePointerAvailableMask;
		mm_argument_index = 2;
		break;

	case spv::OpCooperativeMatrixLoadKHR:
		if (vkmm.non_private)
			mm_flags |= spv::MemoryAccessNonPrivatePointerMask;
		if (vkmm.auto_visibility)
			mm_flags |= spv::MemoryAccessMakePointerVisibleMask;
		mm_argument_index = 3;
		break;

	case spv::OpCooperativeMatrixStoreKHR:
		if (vkmm.non_private)
			mm_flags |= spv::MemoryAccessNonPrivatePointerMask;
		if (vkmm.auto_visibility)
			mm_flags |= spv::MemoryAccessMakePointerAvailableMask;
		mm_argument_index = 4;
		break;

	case spv::OpImageRead:
	case spv::OpImageSparseRead:
		if (vkmm.non_private)
			mm_flags |= spv::ImageOperandsNonPrivateTexelMask;
		if (vkmm.auto_visibility)
			mm_flags |= spv::ImageOperandsMakeTexelVisibleMask;
		mm_argument_index = 2;
		break;

	case spv::OpImageWrite:
		if (vkmm.non_private)
			mm_flags |= spv::ImageOperandsNonPrivateTexelMask;
		if (vkmm.auto_visibility)
			mm_flags |= spv::ImageOperandsMakeTexelAvailableMask;
		mm_argument_index = 3;
		break;

	default:
		return;
	}

	if (mm_flags)
	{
		if (op->num_arguments > mm_argument_index)
			op->arguments[mm_argument_index] |= mm_flags;
		else
			op->add_literal(mm_flags);

		if (vkmm.auto_visibility)
			op->add_id(impl.builder().makeUintConstant(spv::ScopeQueueFamily));
	}
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_common.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "SpvBuilder.h"
#include "opcodes/opcodes.hpp"
#include "opcodes/converter_impl.hpp"

namespace dxil_spv
{
bool get_constant_operand(const llvm::Instruction *value, unsigned index, uint32_t *operand);
spv::Id emit_u32x2_u32_add(Converter::Impl &impl, spv::Id u32x2_value, spv::Id u32_value);
unsigned get_type_scalar_alignment(Converter::Impl &impl, const llvm::Type *type);

spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta,
                                spv::Id default_id, RawType type, RawWidth width, RawVecSize vecsize);

bool type_is_16bit(const llvm::Type *data_type);
bool type_is_64bit(const llvm::Type *data_type);

void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type,
                                       spv::Id &physical_type_id, spv::Op &value_cast_op);

struct RawBufferAccessSplit
{
	uint64_t scale;
	int64_t bias;
	const llvm::Value *dynamic_index;
};

bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride,
									 uint32_t addr_shift_log2, unsigned vecsize,
									 RawBufferAccessSplit &split);

spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset,
                            unsigned addr_shift_log2, unsigned vecsize);

// Clip-cull distance munging.
spv::Id get_clip_cull_distance_access_chain(
	Converter::Impl &impl, const llvm::CallInst *instruction,
	const Converter::Impl::ClipCullMeta &meta, spv::StorageClass storage);
Converter::Impl::ClipCullMeta *output_clip_cull_distance_meta(
	Converter::Impl &impl, unsigned index);
bool emit_store_clip_cull_distance(
	Converter::Impl &impl, const llvm::CallInst *instruction,
	const Converter::Impl::ClipCullMeta &meta);

bool value_is_dx_op_instrinsic(const llvm::Value *value, DXIL::Op op);

spv::Id build_load_invocation_id(Converter::Impl &impl);

void emit_expect_assume_quad_uniform(Converter::Impl &impl);

struct SplitScaleBias
{
	uint32_t stride;
	const llvm::Value *index;
	uint32_t elem;
};

SplitScaleBias split_index_scale_bias(const llvm::Value *value);

struct ReferenceVkMemoryModel;
void add_vkmm_access_qualifiers(Converter::Impl &impl, Operation *op, const ReferenceVkMemoryModel &vkmm);
}


================================================
FILE: opcodes/dxil/dxil_compute.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_compute.hpp"
#include "dxil_common.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"

namespace dxil_spv
{
bool emit_barrier_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	uint32_t operation;
	if (!get_constant_operand(instruction, 1, &operation))
		return false;

	// Match DXC SPIR-V output here.
	bool is_sync = (operation & DXIL::SyncThreadGroup) != 0;
	auto *op = impl.allocate(is_sync ? spv::OpControlBarrier : spv::OpMemoryBarrier);

	if (is_sync)
		op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));

	// We might only need to ensure coherency within the workgroup, in which case we can narrow the scope.
	// DXC emits AccessUAVGlobal all the time, so try to demote to UAVThreadGroup when appropriate.
	spv::Scope memory_scope = spv::ScopeWorkgroup;

	bool needs_device_scope_uav =
	    (operation & DXIL::AccessUAVGlobal) != 0 && impl.execution_mode_meta.declares_globallycoherent_uav;

	// We only ever need explicit Vis/Avail in workgroup scope for VkMM.
	// globallycoherent is handled per-operation,
	// and we only need acquire release to ensure ordering.
	if (impl.execution_mode_meta.memory_model == spv::MemoryModelGLSL450 && needs_device_scope_uav)
		memory_scope = spv::ScopeDevice;

	op->add_id(builder.makeUintConstant(memory_scope));

	// AcquireReleaseMask doesn't care about scope.
	uint32_t semantics = spv::MemorySemanticsAcquireReleaseMask;

	// Game workaround. Sometimes a game might forget to insert device memory barrier in the proper place ._.
	const bool force_device_memory_barriers =
		is_sync &&
		(impl.options.quirks.force_device_memory_barriers_thread_group_coherence ||
		 impl.options.quirks.promote_group_to_device_memory_barrier);

	bool has_uav_barrier = (operation & (DXIL::AccessUAVGlobal | DXIL::AccessUAVThreadGroup)) != 0 ||
	                       (force_device_memory_barriers && impl.shader_analysis.require_uav_thread_group_coherence);

	if (has_uav_barrier)
		semantics |= spv::MemorySemanticsImageMemoryMask | spv::MemorySemanticsUniformMemoryMask;

	if ((operation & DXIL::AccessGroupShared) != 0)
		semantics |= spv::MemorySemanticsWorkgroupMemoryMask;

	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan)
		semantics |= spv::MemorySemanticsMakeAvailableMask | spv::MemorySemanticsMakeVisibleMask;

	op->add_id(builder.makeUintConstant(semantics));

	impl.add(op);
	return true;
}

static spv::StorageClass get_compute_builtin_storage_class(Converter::Impl &impl, spv::BuiltIn builtin)
{
	if (!impl.execution_model_lib_target || impl.execution_model != spv::ExecutionModelGLCompute)
		return spv::StorageClassInput;

	return builtin == spv::BuiltInWorkgroupId || builtin == spv::BuiltInGlobalInvocationId ?
	       spv::StorageClassPrivate : spv::StorageClassInput;
}

static bool emit_thread_2d_quad_fixup_instruction(spv::BuiltIn builtin, Converter::Impl &impl,
                                                  const llvm::CallInst *instruction, uint32_t component)
{
	// We have to compute everything from scratch. Sigh ... <_>
	auto &builder = impl.builder();
	spv::Id local_thread_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInLocalInvocationId);

	{
		auto *load_op = impl.allocate(spv::OpLoad, builder.makeVectorType(builder.makeUintType(32), 3));
		load_op->add_id(local_thread_id);
		impl.add(load_op);
		local_thread_id = load_op->id;
	}

	spv::Id comp_ids[3] = {};
	const bool require[3] = {
		true,
		component >= 1 || builtin == spv::BuiltInLocalInvocationIndex,
		builtin == spv::BuiltInLocalInvocationIndex
	};

	for (unsigned i = 0; i < 3; i++)
	{
		if (require[i])
		{
			auto *extract_op = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
			extract_op->add_id(local_thread_id);
			extract_op->add_literal(i);
			impl.add(extract_op);
			comp_ids[i] = extract_op->id;
		}
	}

	if (require[1])
	{
		// Y * 2 + ((X >> 1) & 1).
		auto *x_part = impl.allocate(spv::OpBitFieldUExtract, builder.makeUintType(32));
		x_part->add_id(comp_ids[0]);
		x_part->add_id(builder.makeUintConstant(1));
		x_part->add_id(builder.makeUintConstant(1));
		impl.add(x_part);

		auto *y_part = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		y_part->add_id(comp_ids[1]);
		y_part->add_id(builder.makeUintConstant(2));
		impl.add(y_part);

		auto *add_part = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_part->add_id(x_part->id);
		add_part->add_id(y_part->id);
		impl.add(add_part);
		comp_ids[1] = add_part->id;
	}

	{
		// Reconstruct X. In a group of 4 threads, we should see [0, 1, 0, 1], but for different Ys.
		auto *and_op = impl.allocate(spv::OpBitwiseAnd, builder.makeUintType(32));
		and_op->add_id(comp_ids[0]);
		and_op->add_id(builder.makeUintConstant(1));
		impl.add(and_op);

		auto *shift_down = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
		shift_down->add_id(comp_ids[0]);
		shift_down->add_id(builder.makeUintConstant(2));
		impl.add(shift_down);

		auto *shift_up = impl.allocate(spv::OpShiftLeftLogical, builder.makeUintType(32));
		shift_up->add_id(shift_down->id);
		shift_up->add_id(builder.makeUintConstant(1));
		impl.add(shift_up);

		auto *or_op = impl.allocate(spv::OpBitwiseOr, builder.makeUintType(32));
		or_op->add_id(and_op->id);
		or_op->add_id(shift_up->id);
		impl.add(or_op);

		comp_ids[0] = or_op->id;
	}

	// Reconstruct the flattened index.
	if (builtin == spv::BuiltInLocalInvocationIndex)
	{
		auto *y_base_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		y_base_op->add_id(comp_ids[1]);
		y_base_op->add_id(builder.makeUintConstant(impl.execution_mode_meta.workgroup_threads[0] / 2));
		impl.add(y_base_op);

		auto *z_base_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		z_base_op->add_id(comp_ids[2]);
		z_base_op->add_id(builder.makeUintConstant(
			impl.execution_mode_meta.workgroup_threads[0] * impl.execution_mode_meta.workgroup_threads[1]));
		impl.add(z_base_op);

		auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_op->add_id(y_base_op->id);
		add_op->add_id(z_base_op->id);
		impl.add(add_op);

		auto *final_add_op = impl.allocate(spv::OpIAdd, instruction);
		final_add_op->add_id(add_op->id);
		final_add_op->add_id(comp_ids[0]);
		impl.add(final_add_op);
	}
	else if (builtin == spv::BuiltInLocalInvocationId)
	{
		impl.rewrite_value(instruction, comp_ids[component]);
	}
	else
	{
		spv::Id wg_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInWorkgroupId);
		auto *ptr_wg = impl.allocate(spv::OpAccessChain, builder.makePointer(
			get_compute_builtin_storage_class(impl, spv::BuiltInWorkgroupId),
			builder.makeUintType(32)));

		ptr_wg->add_id(wg_id);
		ptr_wg->add_id(builder.makeUintConstant(component));
		impl.add(ptr_wg);

		auto *load_wg = impl.allocate(spv::OpLoad, builder.makeUintType(32));
		load_wg->add_id(ptr_wg->id);
		impl.add(load_wg);

		auto *base_thread = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		base_thread->add_id(load_wg->id);

		if (component == 0)
			base_thread->add_id(builder.makeUintConstant(impl.execution_mode_meta.workgroup_threads[component] / 2));
		else // if (component == 1)
			base_thread->add_id(builder.makeUintConstant(impl.execution_mode_meta.workgroup_threads[component] * 2));

		impl.add(base_thread);

		auto *final_add = impl.allocate(spv::OpIAdd, instruction);
		final_add->add_id(base_thread->id);
		final_add->add_id(comp_ids[component]);
		impl.add(final_add);
	}

	return true;
}

bool emit_thread_id_load_instruction(spv::BuiltIn builtin, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// This appears to always be constant, not unlike all other input loading instructions.
	// Any attempt to dynamically index forces alloca.
	uint32_t component = 0;
	if (builtin != spv::BuiltInLocalInvocationIndex && !get_constant_operand(instruction, 1, &component))
		return false;

	// Querying Z component for ThreadId or ThreadIdInGroup doesn't change with 2D quad fixup, just use normal path.
	// Need to consider ThreadId.xy, ThreadIdInGroup.xy and FlattenedThreadIdInGroup.
	if (builtin != spv::BuiltInWorkgroupId && impl.execution_mode_meta.synthesize_2d_quad_dispatch && component <= 1)
		return emit_thread_2d_quad_fixup_instruction(builtin, impl, instruction, component);

	// Awkward NVIDIA workaround. If loading LocalInvocationId, check if we can return constant 0.
	// This is key to avoid some particular shader compiler bugs.
	if (builtin == spv::BuiltInLocalInvocationId)
	{
		if (component <= 2 && impl.execution_mode_meta.workgroup_threads[component] == 1)
		{
			spv::Id const_zero = impl.builder().makeUintConstant(0);
			impl.rewrite_value(instruction, const_zero);
			return true;
		}
	}

	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(builtin);

	if (builtin != spv::BuiltInLocalInvocationIndex)
	{
		Operation *op =
			impl.allocate(spv::OpAccessChain, impl.builder().makePointer(
				get_compute_builtin_storage_class(impl, builtin), impl.get_type_id(instruction->getType())));

		op->add_id(var_id);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(op);
		var_id = op->id;
	}

	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);
	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_compute.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_barrier_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_thread_id_load_instruction(spv::BuiltIn builtin, Converter::Impl &impl, const llvm::CallInst *instruction);

template <spv::BuiltIn builtin>
static inline bool emit_thread_id_load_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_thread_id_load_instruction(builtin, impl, instruction);
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_geometry.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_geometry.hpp"
#include "logging.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"

namespace dxil_spv
{
bool emit_stream_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// If view instancing, need to emit these every time.
	if (!emit_view_instancing_fixed_layer_viewport(impl, false))
		return false;

	Operation *op;
	auto &builder = impl.builder();

	if (impl.execution_mode_meta.gs_stream_active_mask != 1)
	{
		op = impl.allocate(spv::OpEmitStreamVertex);

		auto *constant = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(1));
		if (!constant)
		{
			LOGE("Argument to emitStream must be a constant.\n");
			return false;
		}
		op->add_id(builder.makeUintConstant(constant->getUniqueInteger().getZExtValue()));
		builder.addCapability(spv::CapabilityGeometryStreams);
	}
	else
		op = impl.allocate(spv::OpEmitVertex);

	impl.add(op);
	return true;
}

bool emit_cut_stream_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	Operation *op;
	auto &builder = impl.builder();

	if (impl.execution_mode_meta.gs_stream_active_mask != 1)
	{
		op = impl.allocate(spv::OpEndStreamPrimitive);

		auto *constant = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(1));
		if (!constant)
		{
			LOGE("Argument to emitStream must be a constant.\n");
			return false;
		}
		op->add_id(builder.makeUintConstant(constant->getUniqueInteger().getZExtValue()));
		builder.addCapability(spv::CapabilityGeometryStreams);
	}
	else
		op = impl.allocate(spv::OpEndPrimitive);

	impl.add(op);
	return true;
}

bool emit_then_cut_stream_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (!emit_stream_instruction(impl, instruction))
		return false;
	return emit_cut_stream_instruction(impl, instruction);
}

bool emit_gs_instance_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId);
	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);
	return true;
}

bool emit_primitive_id_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInPrimitiveId);
	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);

	if (impl.execution_model == spv::ExecutionModelFragment)
		impl.builder().addCapability(spv::CapabilityGeometry);

	return true;
}

static bool emit_view_instance_ubo(Converter::Impl &impl)
{
	if (impl.multiview.view_index_to_view_instance_id)
		return true;

	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);

	auto &mapping = impl.options.meta_descriptor_mappings[int(MetaDescriptor::DynamicViewInstancingOffsets)];
	if (mapping.kind != MetaDescriptorKind::UBOContainingConstant)
	{
		LOGE("If using dynamic view instancing, must provide a UBO with offsets.\n");
		return false;
	}

	spv::Id type_id = builder.makeStructType({ u32_type }, "ViewInstancingOffsetsUBO");
	builder.addDecoration(type_id, spv::DecorationBlock);
	builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
	builder.addMemberName(type_id, 0, "ViewID_Layer");

	impl.multiview.view_index_to_view_instance_id =
	    impl.create_variable(spv::StorageClassUniform, type_id, "ViewInstancingOffsets");

	builder.addDecoration(impl.multiview.view_index_to_view_instance_id,
	                      spv::DecorationDescriptorSet, mapping.desc_set);
	builder.addDecoration(impl.multiview.view_index_to_view_instance_id,
	                      spv::DecorationBinding, mapping.desc_binding);

	return true;
}

spv::Id build_layer_offset_id(Converter::Impl &impl)
{
	if (!impl.options.multiview.enable)
		return 0;

	auto &mapping = impl.options.meta_descriptor_mappings[int(MetaDescriptor::DynamicViewInstancingOffsets)];
	if (mapping.kind == MetaDescriptorKind::Invalid)
		return 0;

	if (!emit_view_instance_ubo(impl))
		return 0;

	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);

	auto *chain = impl.allocate(
	    spv::OpAccessChain, builder.makePointer(spv::StorageClassUniform, u32_type));
	chain->add_id(impl.multiview.view_index_to_view_instance_id);
	chain->add_id(builder.makeUintConstant(0));
	impl.add(chain);

	auto *load = impl.allocate(spv::OpLoad, u32_type);
	load->add_id(chain->id);
	impl.add(load);

	auto *ext = impl.allocate(spv::OpBitFieldUExtract, u32_type);
	ext->add_id(load->id);
	ext->add_id(builder.makeUintConstant(16));
	ext->add_id(builder.makeUintConstant(16));
	impl.add(ext);

	return ext->id;
}

static bool should_emit_view_instancing_fixed_layer_viewport(Converter::Impl &impl, bool entry_point)
{
	return impl.execution_model == spv::ExecutionModelVertex ||
	       impl.execution_model == spv::ExecutionModelTessellationEvaluation ||
	       (impl.execution_model == spv::ExecutionModelGeometry && !entry_point) ||
	       (impl.execution_model == spv::ExecutionModelMeshEXT && !entry_point);
}

static void emit_workgroup_unrolled_array_store(Converter::Impl &impl,
                                                spv::Id var_id, spv::Id value_id, spv::Id limit_id)
{
	auto &builder = impl.builder();

	unsigned num_threads = impl.execution_mode_meta.workgroup_threads[0] *
	                       impl.execution_mode_meta.workgroup_threads[1] *
	                       impl.execution_mode_meta.workgroup_threads[2];

	spv::Id local_index = impl.spirv_module.get_builtin_shader_input(spv::BuiltInLocalInvocationIndex);
	auto *load = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	load->add_id(local_index);
	impl.add(load);

	// Statically unroll. Simplifies the implementation.
	for (unsigned i = 0; i < impl.execution_mode_meta.stage_output_num_primitive; i += num_threads)
	{
		auto *offset = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		offset->add_id(load->id);
		offset->add_id(builder.makeUintConstant(i));
		impl.add(offset);

		auto *in_bounds = impl.allocate(spv::OpULessThan, builder.makeBoolType());
		in_bounds->add_id(offset->id);
		in_bounds->add_id(limit_id);
		impl.add(in_bounds);

		auto *chain = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassOutput, builder.makeUintType(32)));
		chain->add_id(var_id);
		chain->add_id(offset->id);
		impl.add(chain);

		auto *store = impl.allocate(spv::PseudoOpMaskedStore);
		store->add_id(chain->id);
		store->add_id(value_id);
		store->add_id(in_bounds->id);
		impl.add(store);
	}
}

static bool emit_view_instancing_fixed_layer(Converter::Impl &impl, bool entry_point, spv::Id limit_id)
{
	if (!impl.options.multiview.enable || impl.multiview.custom_layer_index ||
	    !impl.options.multiview.last_pre_rasterization_stage ||
	    impl.options.multiview.view_index_to_view_instance_spec_id != UINT32_MAX ||
	    !should_emit_view_instancing_fixed_layer_viewport(impl, entry_point))
		return true;

	auto &mapping = impl.options.meta_descriptor_mappings[int(MetaDescriptor::DynamicViewInstancingOffsets)];
	if (mapping.kind == MetaDescriptorKind::Invalid)
		return true;

	spv::Id layer_offset_id = build_layer_offset_id(impl);
	if (!layer_offset_id)
		return false;

	spv::Id layer_id = impl.spirv_module.get_builtin_shader_output(spv::BuiltInLayer);
	auto &builder = impl.builder();

	if (!layer_id)
	{
		spv::Id type_id = builder.makeUintType(32);
		if (impl.execution_model == spv::ExecutionModelMeshEXT)
			type_id = builder.makeArrayType(type_id, builder.makeUintConstant(impl.execution_mode_meta.stage_output_num_primitive), 0);
		layer_id = impl.create_variable(spv::StorageClassOutput, type_id);

		impl.spirv_module.register_builtin_shader_output(layer_id, spv::BuiltInLayer);
		impl.emit_builtin_decoration(layer_id, DXIL::Semantic::RenderTargetArrayIndex, spv::StorageClassOutput);

		if (impl.execution_model == spv::ExecutionModelMeshEXT)
			builder.addDecoration(layer_id, spv::DecorationPerPrimitiveEXT);
	}

	if (impl.execution_model == spv::ExecutionModelMeshEXT)
	{
		emit_workgroup_unrolled_array_store(impl, layer_id, layer_offset_id, limit_id);
	}
	else
	{
		auto *store = impl.allocate(spv::OpStore);
		store->add_id(layer_id);
		store->add_id(layer_offset_id);
		impl.add(store);
	}

	return true;
}

spv::Id build_viewport_offset_id(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	if (impl.options.multiview.view_instance_to_viewport_spec_id == UINT32_MAX)
	{
		LOGE("Need to set view instance to viewport spec id.\n");
		return 0;
	}

	spv::Id view_instance_id = build_view_instance_id(impl);

	if (!impl.multiview.view_instance_to_viewport_id)
	{
		impl.multiview.view_instance_to_viewport_id = builder.makeUintConstant(0, true);
		builder.addName(impl.multiview.view_instance_to_viewport_id, "ViewIDToViewport");
		builder.addDecoration(impl.multiview.view_instance_to_viewport_id,
		                      spv::DecorationSpecId, impl.options.multiview.view_instance_to_viewport_spec_id);
	}

	spv::Id u32_type = builder.makeUintType(32);
	auto *mul = impl.allocate(spv::OpIMul, u32_type);
	mul->add_id(view_instance_id);
	mul->add_id(builder.makeUintConstant(8));
	impl.add(mul);

	auto *mask = impl.allocate(spv::OpBitFieldUExtract, u32_type);
	mask->add_id(impl.multiview.view_instance_to_viewport_id);
	mask->add_id(mul->id);
	mask->add_id(builder.makeUintConstant(8));
	impl.add(mask);

	return mask->id;
}

static bool emit_view_instancing_fixed_viewport(Converter::Impl &impl, bool entry_point, spv::Id limit_id)
{
	if (!impl.options.multiview.enable || impl.multiview.custom_viewport_index ||
	    !impl.options.multiview.last_pre_rasterization_stage ||
	    impl.options.multiview.view_instance_to_viewport_spec_id == UINT32_MAX ||
	    !should_emit_view_instancing_fixed_layer_viewport(impl, entry_point))
		return true;

	auto &builder = impl.builder();

	spv::Id viewport_id = build_viewport_offset_id(impl);
	spv::Id vp_id = impl.spirv_module.get_builtin_shader_output(spv::BuiltInViewportIndex);

	if (!vp_id)
	{
		spv::Id type_id = builder.makeUintType(32);
		if (impl.execution_model == spv::ExecutionModelMeshEXT)
			type_id = builder.makeArrayType(type_id, builder.makeUintConstant(impl.execution_mode_meta.stage_output_num_primitive), 0);

		vp_id = impl.create_variable(spv::StorageClassOutput, type_id);
		impl.spirv_module.register_builtin_shader_output(vp_id, spv::BuiltInViewportIndex);
		impl.emit_builtin_decoration(vp_id, DXIL::Semantic::ViewPortArrayIndex, spv::StorageClassOutput);

		if (impl.execution_model == spv::ExecutionModelMeshEXT)
			builder.addDecoration(vp_id, spv::DecorationPerPrimitiveEXT);
	}

	if (impl.execution_model == spv::ExecutionModelMeshEXT)
	{
		emit_workgroup_unrolled_array_store(impl, vp_id, viewport_id, limit_id);
	}
	else
	{
		auto *store = impl.allocate(spv::OpStore);
		store->add_id(vp_id);
		store->add_id(viewport_id);
		impl.add(store);
	}

	return true;
}

bool emit_view_instancing_fixed_layer_viewport(Converter::Impl &impl, bool entry_point, spv::Id limit_id)
{
	if (!emit_view_instancing_fixed_layer(impl, entry_point, limit_id))
		return false;
	if (!emit_view_instancing_fixed_viewport(impl, entry_point, limit_id))
		return false;

	return true;
}

bool emit_view_masking(Converter::Impl &impl)
{
	if (!impl.options.multiview.enable || !impl.options.multiview.last_pre_rasterization_stage)
		return true;

	auto &mapping = impl.options.meta_descriptor_mappings[int(MetaDescriptor::DynamicViewInstancingMask)];
	if (mapping.kind == MetaDescriptorKind::Invalid)
		return true;

	if (mapping.kind != MetaDescriptorKind::UBOContainingConstant)
	{
		LOGE("DynamicViewMasking must be UBO containing constant.\n");
		return false;
	}

	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);

	spv::Id type_id = builder.makeStructType({ u32_type }, "ViewInstanceMaskUBO");
	builder.addDecoration(type_id, spv::DecorationBlock);
	builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
	builder.addMemberName(type_id, 0, "Mask");

	spv::Id view_mask_id = impl.create_variable(spv::StorageClassUniform, type_id, "ViewInstanceMask");
	builder.addDecoration(view_mask_id, spv::DecorationDescriptorSet, mapping.desc_set);
	builder.addDecoration(view_mask_id, spv::DecorationBinding, mapping.desc_binding);

	spv::Id view_id = build_view_instance_id(impl);

	auto *chain = impl.allocate(
	    spv::OpAccessChain, builder.makePointer(spv::StorageClassUniform, u32_type));
	chain->add_id(view_mask_id);
	chain->add_id(builder.makeUintConstant(0));
	impl.add(chain);

	auto *load = impl.allocate(spv::OpLoad, u32_type);
	load->add_id(chain->id);
	impl.add(load);

	auto *mask = impl.allocate(spv::OpBitFieldUExtract, u32_type);
	mask->add_id(load->id);
	mask->add_id(view_id);
	mask->add_id(builder.makeUintConstant(1));
	impl.add(mask);

	auto *eq = impl.allocate(spv::OpIEqual, builder.makeBoolType());
	eq->add_id(mask->id);
	eq->add_id(builder.makeUintConstant(0));
	impl.add(eq);

	// In geometry shaders and mesh shaders, we can simply just return early, nothing will be emitted.
	// Make sure to write a dummy invalid position on other targets.
	// TODO: Could optimize for hull shaders by forcing invalid tess factors early,
	// but might screw up phase constant phase if there are side effects, so ...
	if (impl.execution_model == spv::ExecutionModelVertex ||
	    impl.execution_model == spv::ExecutionModelTessellationEvaluation)
	{
		spv::Id pos_id = impl.spirv_module.get_builtin_shader_output(spv::BuiltInPosition);
		auto *write_degen = impl.allocate(spv::OpStore);
		write_degen->add_id(pos_id);
		write_degen->add_id(impl.build_splat_constant_vector(
		    builder.makeFloatType(32), builder.makeFloatConstant(-1.0f), 4));
		impl.add(write_degen);
	}

	auto *retcond = impl.allocate(spv::PseudoOpReturnCond);
	retcond->add_id(eq->id);
	impl.add(retcond);

	return true;
}

spv::Id build_view_instance_id(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	if (!impl.multiview.custom_layer_index &&
	    impl.options.multiview.view_index_to_view_instance_spec_id != UINT32_MAX)
	{
		// We're using proper multiview.
		if (!impl.multiview.view_index_to_view_instance_id)
		{
			impl.multiview.view_index_to_view_instance_id = builder.makeUintConstant(0, true);
			builder.addDecoration(impl.multiview.view_index_to_view_instance_id,
			                      spv::DecorationSpecId, impl.options.multiview.view_index_to_view_instance_spec_id);
			builder.addName(impl.multiview.view_index_to_view_instance_id, "ViewIndexToViewInstanceMap");
		}

		spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInViewIndex);
		Operation *op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
		op->add_id(var_id);
		impl.add(op);
		builder.addCapability(spv::CapabilityMultiView);

		auto *mul = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		mul->add_id(op->id);
		mul->add_id(builder.makeUintConstant(2));
		impl.add(mul);

		auto *extract = impl.allocate(spv::OpBitFieldUExtract, builder.makeUintType(32));
		extract->add_id(impl.multiview.view_index_to_view_instance_id);
		extract->add_id(mul->id);
		extract->add_id(builder.makeUintConstant(2));
		impl.add(extract);

		return extract->id;
	}
	else
	{
		spv::Id u32_type = builder.makeUintType(32);

		if (!emit_view_instance_ubo(impl))
			return 0;

		auto *chain = impl.allocate(
			spv::OpAccessChain, builder.makePointer(spv::StorageClassUniform, u32_type));
		chain->add_id(impl.multiview.view_index_to_view_instance_id);
		chain->add_id(builder.makeUintConstant(0));
		impl.add(chain);

		auto *load = impl.allocate(spv::OpLoad, u32_type);
		load->add_id(chain->id);
		impl.add(load);

		auto *ext = impl.allocate(spv::OpBitFieldUExtract, u32_type);
		ext->add_id(load->id);
		ext->add_id(builder.makeUintConstant(0));
		ext->add_id(builder.makeUintConstant(16));
		impl.add(ext);

		return ext->id;
	}
}

bool emit_view_id_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	if (!impl.options.multiview.enable)
		impl.rewrite_value(instruction, builder.makeUintConstant(0));
	else
		impl.rewrite_value(instruction, build_view_instance_id(impl));

	return true;
}

} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_geometry.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_stream_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_cut_stream_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_then_cut_stream_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_gs_instance_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_primitive_id_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_view_id_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

spv::Id build_view_instance_id(Converter::Impl &impl);
spv::Id build_viewport_offset_id(Converter::Impl &impl);
spv::Id build_layer_offset_id(Converter::Impl &impl);
bool emit_view_masking(Converter::Impl &impl);
bool emit_view_instancing_fixed_layer_viewport(Converter::Impl &impl, bool entry_point, spv::Id limit_id = 0);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_mesh.cpp
================================================
/*
 * Copyright 2022 Philip Rebohle for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_mesh.hpp"
#include "dxil_common.hpp"
#include "logging.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include "dxil_geometry.hpp"

namespace dxil_spv
{
bool emit_set_mesh_output_counts_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	Operation *op = impl.allocate(spv::OpSetMeshOutputsEXT);

	spv::Id num_vertex_id, num_prim_id;

	// If we have a degenerate case where either of these is 0, we have to declare 0 as max output.
	if (impl.execution_mode_meta.stage_output_num_vertex == 0)
		num_vertex_id = impl.builder().makeUintConstant(0);
	else
		num_vertex_id = impl.get_id_for_value(instruction->getOperand(1));
	op->add_id(num_vertex_id);

	if (impl.execution_mode_meta.stage_output_num_primitive == 0)
		num_prim_id = impl.builder().makeUintConstant(0);
	else
		num_prim_id = impl.get_id_for_value(instruction->getOperand(2));
	op->add_id(num_prim_id);

	if (impl.options.quirks.mesh_outputs_bounds_check)
	{
		// Technically the first thread wins so we need to broadcast the results,
		// but in all cases where we care about this quirk, the inputs are threadgroup uniform, so this is fine.
		auto *assert_vertex = impl.allocate(spv::OpUGreaterThan, impl.builder().makeBoolType());
		assert_vertex->add_id(num_vertex_id);
		assert_vertex->add_id(impl.builder().makeUintConstant(impl.execution_mode_meta.stage_output_num_vertex));

		auto *assert_prim = impl.allocate(spv::OpUGreaterThan, impl.builder().makeBoolType());
		assert_prim->add_id(num_prim_id);
		assert_prim->add_id(impl.builder().makeUintConstant(impl.execution_mode_meta.stage_output_num_primitive));

		auto *should_exit = impl.allocate(spv::OpLogicalOr, impl.builder().makeBoolType());
		should_exit->add_id(assert_vertex->id);
		should_exit->add_id(assert_prim->id);

		impl.add(assert_vertex);
		impl.add(assert_prim);
		impl.add(should_exit);

		auto *return_op = impl.allocate(spv::PseudoOpReturnCond);
		return_op->add_id(should_exit->id);
		impl.add(return_op);

		// OpSampledImage must be consumed in same block.
		// We'll split blocks here, so just recreate the combined sampler image if needed.
		impl.combined_image_sampler_cache.clear();
	}

	if (impl.options.instruction_instrumentation.enabled &&
	    impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume)
	{
		auto *assert_vertex = impl.allocate(spv::OpULessThanEqual, impl.builder().makeBoolType());
		assert_vertex->add_id(num_vertex_id);
		assert_vertex->add_id(impl.builder().makeUintConstant(impl.execution_mode_meta.stage_output_num_vertex));
		auto *assume_vertex = impl.allocate(spv::OpAssumeTrueKHR);
		assume_vertex->add_id(assert_vertex->id);

		auto *assert_prim = impl.allocate(spv::OpULessThanEqual, impl.builder().makeBoolType());
		assert_prim->add_id(num_prim_id);
		assert_prim->add_id(impl.builder().makeUintConstant(impl.execution_mode_meta.stage_output_num_primitive));
		auto *assume_prim = impl.allocate(spv::OpAssumeTrueKHR);
		assume_prim->add_id(assert_prim->id);

		impl.add(assert_vertex);
		impl.add(assume_vertex);
		impl.add(assert_prim);
		impl.add(assume_prim);
	}

	impl.add(op);

	// Workaround shader compiler bugs by emitting a conditional return.
	if (!impl.shader_analysis.has_side_effects)
	{
		auto &builder = impl.builder();

		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
		spv::Id num_ids = impl.spirv_module.get_builtin_shader_input(spv::BuiltInNumSubgroups);
		auto *load_op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
		load_op->add_id(num_ids);
		impl.add(load_op);

		auto *one_subgroup = impl.allocate(spv::OpIEqual, builder.makeBoolType());
		one_subgroup->add_id(load_op->id);
		one_subgroup->add_id(builder.makeUintConstant(1));
		impl.add(one_subgroup);

		auto *broadcast_vert = impl.allocate(spv::OpGroupNonUniformBroadcastFirst, builder.makeUintType(32));
		broadcast_vert->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		broadcast_vert->add_id(num_vertex_id);
		impl.add(broadcast_vert);

		auto *broadcast_prim = impl.allocate(spv::OpGroupNonUniformBroadcastFirst, builder.makeUintType(32));
		broadcast_prim->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		broadcast_prim->add_id(num_prim_id);
		impl.add(broadcast_prim);

		auto *vert_zero = impl.allocate(spv::OpIEqual, builder.makeBoolType());
		vert_zero->add_id(broadcast_vert->id);
		vert_zero->add_id(builder.makeUintConstant(0));
		impl.add(vert_zero);

		auto *prim_zero = impl.allocate(spv::OpIEqual, builder.makeBoolType());
		prim_zero->add_id(broadcast_prim->id);
		prim_zero->add_id(builder.makeUintConstant(0));
		impl.add(prim_zero);

		auto *degenerate_meshlet = impl.allocate(spv::OpLogicalOr, builder.makeBoolType());
		degenerate_meshlet->add_id(vert_zero->id);
		degenerate_meshlet->add_id(prim_zero->id);
		impl.add(degenerate_meshlet);

		auto *early_return = impl.allocate(spv::OpLogicalAnd, builder.makeBoolType());
		early_return->add_id(one_subgroup->id);
		early_return->add_id(degenerate_meshlet->id);
		impl.add(early_return);

		// This may also be useful in some cases if application does misc shader work after SetMeshOutputsEXT.
		// Use a pseudo-op (arbitrarily chosen as OpLifetimeStop as the name is convenient) for conditional return
		// since we have no easy way of introducing additional control flow
		// in the LLVM -> SPIR-V emitter.
		// This gets resolved during final SPIR-V lowering.
		auto *cond_ret = impl.allocate(spv::PseudoOpReturnCond);
		cond_ret->add_id(early_return->id);
		impl.add(cond_ret);
	}

	emit_view_instancing_fixed_layer_viewport(impl, false, num_prim_id);

	return true;
}

bool emit_emit_indices_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	// If we for some reason have max primitives 0 in the execution mode,
	// just ignore any access to index buffer.
	if (!impl.primitive_index_array_id || impl.execution_mode_meta.stage_output_num_primitive == 0)
		return true;

	unsigned index_dim = impl.execution_mode_meta.primitive_index_dimension;
	spv::Id index_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, index_dim);
	spv::Id index_scalar_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, 1);

	spv::Id components[3];
	for (unsigned i = 0; i < index_dim; i++)
		components[i] = impl.get_id_for_value(instruction->getOperand(2 + i));
	spv::Id index_id = impl.build_vector(index_scalar_type_id, components, index_dim);

	Operation *op = impl.allocate(spv::OpAccessChain,
	                              builder.makePointer(spv::StorageClassOutput, index_type_id));
	spv::Id ptr_id = op->id;

	op->add_id(impl.primitive_index_array_id);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(op);

	op = impl.allocate(spv::OpStore);
	op->add_ids({ ptr_id, index_id });
	impl.add(op);
	return true;
}

bool emit_store_vertex_output_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// If we for some reason have max vertices 0 in the execution mode,
	// just ignore any access to vertex output.
	if (impl.execution_mode_meta.stage_output_num_vertex == 0)
		return true;

	auto &builder = impl.builder();
	uint32_t output_element_index;
	if (!get_constant_operand(instruction, 1, &output_element_index))
		return false;

	// Need special handling for clip distance.
	auto *clip_cull_meta = output_clip_cull_distance_meta(impl, output_element_index);
	if (clip_cull_meta)
		return emit_store_clip_cull_distance(impl, instruction, *clip_cull_meta);

	const auto &meta = impl.output_elements_meta[output_element_index];

	uint32_t var_id = meta.id;
	uint32_t ptr_id;

	spv::Id output_type_id = builder.getContainedTypeId(builder.getDerefTypeId(var_id));

	bool row_index = false;
	if (builder.isArrayType(output_type_id))
	{
		row_index = true;
		output_type_id = builder.getContainedTypeId(output_type_id);
	}
	uint32_t num_cols = builder.getNumTypeComponents(output_type_id);

	Operation *op = impl.allocate(
			spv::OpAccessChain, builder.makePointer(spv::StorageClassOutput, builder.getScalarTypeId(output_type_id)));
	ptr_id = op->id;

	op->add_id(var_id);
	op->add_id(impl.get_id_for_value(instruction->getOperand(5)));
	if (row_index)
		op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	if (num_cols > 1)
		op->add_id(impl.get_id_for_value(instruction->getOperand(3), 32));

	impl.add(op);

	impl.register_externally_visible_write(instruction->getOperand(4));
	spv::Id store_value = impl.get_id_for_value(instruction->getOperand(4));

	op = impl.allocate(spv::OpStore);
	op->add_ids({ ptr_id, impl.fixup_store_type_io(meta.component_type, 1, store_value) });
	impl.add(op);
	return true;
}

bool emit_store_primitive_output_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// If we for some reason have max primitives 0 in the execution mode,
	// just ignore any access to primitive output.
	if (impl.execution_mode_meta.stage_output_num_primitive == 0)
		return true;

	auto &builder = impl.builder();
	uint32_t output_element_index;
	if (!get_constant_operand(instruction, 1, &output_element_index))
		return false;

	const auto &meta = impl.patch_elements_meta[output_element_index];
	uint32_t var_id = meta.id;
	spv::Id ptr_id;

	spv::Id output_type_id = builder.getContainedTypeId(builder.getDerefTypeId(var_id));

	bool row_index = false;
	if (builder.isArrayType(output_type_id))
	{
		row_index = true;
		output_type_id = builder.getContainedTypeId(output_type_id);
	}
	uint32_t num_cols = builder.getNumTypeComponents(output_type_id);

	impl.register_externally_visible_write(instruction->getOperand(4));
	spv::Id store_value = impl.fixup_store_type_io(meta.component_type, 1,
	                                               impl.get_id_for_value(instruction->getOperand(4)));

	Operation *op;
	spv::BuiltIn builtin;
	if (impl.spirv_module.query_builtin_shader_output(var_id, &builtin) && builtin == spv::BuiltInCullPrimitiveEXT)
	{
		// Special case where the output variable is bool, but the value we get is uint32
		op = impl.allocate(spv::OpINotEqual, builder.makeBoolType());
		op->add_id(store_value);
		op->add_id(builder.makeUintConstant(0));
		impl.add(op);
		store_value = op->id;
	}

	if (impl.options.multiview.enable && impl.options.multiview.last_pre_rasterization_stage)
	{
		if (meta.semantic == DXIL::Semantic::RenderTargetArrayIndex)
		{
			auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			add->add_id(store_value);
			add->add_id(build_layer_offset_id(impl));
			impl.add(add);

			store_value = add->id;
		}
		else if (meta.semantic == DXIL::Semantic::ViewPortArrayIndex &&
		         impl.options.multiview.view_instance_to_viewport_spec_id != UINT32_MAX)
		{
			auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			add->add_id(store_value);
			add->add_id(build_viewport_offset_id(impl));
			impl.add(add);

			store_value = add->id;
		}
	}

	op = impl.allocate(spv::OpAccessChain,
	                   builder.makePointer(spv::StorageClassOutput, builder.getScalarTypeId(output_type_id)));
	ptr_id = op->id;
	op->add_id(var_id);
	op->add_id(impl.get_id_for_value(instruction->getOperand(5)));
	if (row_index)
		op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	if (num_cols > 1)
		op->add_id(impl.get_id_for_value(instruction->getOperand(3), 32));

	impl.add(op);

	op = impl.allocate(spv::OpStore);
	op->add_ids({ ptr_id, store_value });
	impl.add(op);
	return true;
}

bool emit_dispatch_mesh_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	Operation *op = impl.allocate(spv::OpEmitMeshTasksEXT);

	for (unsigned i = 1; i <= 4; i++)
		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));

	impl.add(op);
	return true;
}

bool emit_get_mesh_payload_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// GetMeshPayload can only be called once per shader
	spv::Id type_id = impl.get_type_id(instruction->getType()->getPointerElementType());
	spv::Id var_id = impl.create_variable(spv::StorageClassTaskPayloadWorkgroupEXT, type_id);

	impl.rewrite_value(instruction, var_id);
	return true;
}

} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_mesh.hpp
================================================
/*
 * Copyright 2022 Philip Rebohle for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_set_mesh_output_counts_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_emit_indices_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_store_vertex_output_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_store_primitive_output_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dispatch_mesh_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_get_mesh_payload_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_nvapi.cpp
================================================
/* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 */

#include "dxil_nvapi.hpp"
#include "dxil_common.hpp"
#include "dxil_ray_tracing.hpp"
#include "opcodes/converter_impl.hpp"
#include "logging.hpp"

namespace dxil_spv
{
enum NVExtnOp
{
	NV_EXTN_OP_SHFL = 1,
	NV_EXTN_OP_SHFL_UP = 2,
	NV_EXTN_OP_SHFL_DOWN = 3,
	NV_EXTN_OP_SHFL_XOR = 4,
	NV_EXTN_OP_VOTE_ALL = 5,
	NV_EXTN_OP_VOTE_ANY = 6,
	NV_EXTN_OP_VOTE_BALLOT = 7,
	NV_EXTN_OP_GET_LANE_ID = 8,
	NV_EXTN_OP_FP16_ATOMIC = 12,
	NV_EXTN_OP_FP32_ATOMIC = 13,
	NV_EXTN_OP_GET_SPECIAL = 19,
	NV_EXTN_OP_UINT64_ATOMIC = 20,
	NV_EXTN_OP_MATCH_ANY = 21,
	NV_EXTN_OP_FOOTPRINT = 28,
	NV_EXTN_OP_FOOTPRINT_BIAS = 29,
	NV_EXTN_OP_GET_SHADING_RATE = 30,
	NV_EXTN_OP_FOOTPRINT_LEVEL = 31,
	NV_EXTN_OP_FOOTPRINT_GRAD = 32,
	NV_EXTN_OP_SHFL_GENERIC = 33,
	NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE = 51,
	NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED = 52,
	NV_EXTN_OP_HIT_OBJECT_TRACE_RAY = 67,
	NV_EXTN_OP_HIT_OBJECT_MAKE_HIT = 68,
	NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX  = 69,
	NV_EXTN_OP_HIT_OBJECT_MAKE_MISS = 70,
	NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD = 71,
	NV_EXTN_OP_HIT_OBJECT_INVOKE = 72,
	NV_EXTN_OP_HIT_OBJECT_IS_MISS = 73,
	NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID = 74,
	NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX = 75,
	NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX = 76,
	NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX = 77,
	NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND = 78,
	NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC = 79,
	NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES = 80,
	NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX = 81,
	NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT = 82,
	NV_EXTN_OP_HIT_OBJECT_IS_HIT = 83,
	NV_EXTN_OP_HIT_OBJECT_IS_NOP = 84,
	NV_EXTN_OP_HIT_OBJECT_MAKE_NOP = 85,
	NV_EXTN_OP_RT_TRIANGLE_OBJECT_POSITIONS = 86,
	NV_EXTN_OP_RT_MICRO_TRIANGLE_OBJECT_POSITIONS = 87,
	NV_EXTN_OP_RT_MICRO_TRIANGLE_BARYCENTRICS = 88,
	NV_EXTN_OP_RT_IS_MICRO_TRIANGLE_HIT = 89,
	NV_EXTN_OP_RT_IS_BACK_FACING = 90,
	NV_EXTN_OP_RT_MICRO_VERTEX_OBJECT_POSITION = 91,
	NV_EXTN_OP_RT_MICRO_VERTEX_BARYCENTRICS = 92,
	NV_EXTN_OP_RT_GET_CLUSTER_ID = 93,
	NV_EXTN_OP_RT_GET_CANDIDATE_CLUSTER_ID = 94,
	NV_EXTN_OP_RT_GET_COMMITTED_CLUSTER_ID = 95,
	NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID = 96,
	NV_EXTN_OP_RT_CANDIDATE_TRIANGLE_OBJECT_POSITIONS = 97,
	NV_EXTN_OP_RT_COMMITTED_TRIANGLE_OBJECT_POSITIONS = 98,
	NV_EXTN_OP_HIT_OBJECT_GET_TRIANGLE_OBJECT_POSITIONS = 99,
	NV_EXTN_OP_RT_SPHERE_OBJECT_POSITION_AND_RADIUS = 100,
	NV_EXTN_OP_RT_CANDIDATE_SPHERE_OBJECT_POSITION_AND_RADIUS = 101,
	NV_EXTN_OP_RT_COMMITTED_SPHERE_OBJECT_POSITION_AND_RADIUS = 102,
	NV_EXTN_OP_HIT_OBJECT_GET_SPHERE_OBJECT_POSITION_AND_RADIUS = 103,
	NV_EXTN_OP_RT_LSS_OBJECT_POSITIONS_AND_RADII = 104,
	NV_EXTN_OP_RT_CANDIDATE_LSS_OBJECT_POSITIONS_AND_RADII = 105,
	NV_EXTN_OP_RT_COMMITTED_LSS_OBJECT_POSITIONS_AND_RADII = 106,
	NV_EXTN_OP_HIT_OBJECT_GET_LSS_OBJECT_POSITIONS_AND_RADII = 107,
	NV_EXTN_OP_RT_IS_SPHERE_HIT = 108,
	NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_SPHERE = 109,
	NV_EXTN_OP_RT_COMMITTED_IS_SPHERE = 110,
	NV_EXTN_OP_HIT_OBJECT_IS_SPHERE_HIT = 111,
	NV_EXTN_OP_RT_IS_LSS_HIT = 112,
	NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_LSS = 113,
	NV_EXTN_OP_RT_COMMITTED_IS_LSS = 114,
	NV_EXTN_OP_HIT_OBJECT_IS_LSS_HIT = 115,
	NV_EXTN_OP_RT_CANDIDATE_LSS_HIT_PARAMETER = 116,
	NV_EXTN_OP_RT_COMMITTED_LSS_HIT_PARAMETER = 117,
	NV_EXTN_OP_RT_CANDIDATE_BUILTIN_PRIMITIVE_RAY_T = 118,
	NV_EXTN_OP_RT_COMMIT_NONOPAQUE_BUILTIN_PRIMITIVE_HIT = 119
};

enum NVSpecialOp
{
	NV_SPECIALOP_THREADLTMASK = 4,
	NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED = 5,
	NV_SPECIALOP_GLOBAL_TIMER_LO = 9,
	NV_SPECIALOP_GLOBAL_TIMER_HI = 10
};

void NVAPIState::reset()
{
	for (auto &input : fake_doorbell_inputs)
		input = nullptr;
	for (auto &intermediate : fake_doorbell_intermediates)
		intermediate = nullptr;
	for (auto &output : fake_doorbell_outputs)
		output = 0;

	doorbell = nullptr;
	deferred_opcode = 0;
	clock_output_index = 0;
	num_expected_clock_outputs = 0;
	// The marked UAV persists.
}

void NVAPIState::notify_doorbell(Converter::Impl &impl, const llvm::CallInst *instruction, bool analysis)
{
	// IncrementCounter is used for either starting an intrinsic or sometimes clocking out values from it.
	if (num_expected_clock_outputs == 0)
	{
		reset();

		// When the dummy structured buffer is being written to,
		// the structure index comes from this counter value.
		doorbell = instruction;
	}
	else
	{
		// We're clocking out arguments.
		// We either clock out arguments by IncrementCounter() or reading the magic UAV.

		if (clock_output_index < num_expected_clock_outputs)
		{
			// Deferred instructions will consume the outputs, emitting SPIR-V and rewriting
			// values as needed when the last fake CallShader/TraceRay call is encountered
			if (deferred_opcode)
				fake_doorbell_intermediates[clock_output_index] = instruction;
			else if (!analysis)
				impl.rewrite_value(instruction, impl.nvapi.fake_doorbell_outputs[clock_output_index]);
			clock_output_index++;
		}

		if (clock_output_index >= num_expected_clock_outputs && !deferred_opcode)
		{
			// We're done consuming the opcode. The next IncrementCounter starts a new opcode.
			reset();
		}
	}
}

static spv::Id get_argument(Converter::Impl &impl, uint32_t offset)
{
	return impl.get_id_for_value(impl.nvapi.fake_doorbell_inputs[offset]);
}

static spv::Id get_argument_as_float(Converter::Impl &impl, uint32_t offset)
{
	auto *op = impl.nvapi.fake_doorbell_inputs[offset];
	auto *cast_inst = llvm::dyn_cast<llvm::CastInst>(op);

	if (cast_inst != nullptr && cast_inst->getOpcode() == llvm::Instruction::BitCast)
	{
		op = cast_inst->getOperand(0);

		if (op->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID)
			return impl.get_id_for_value(op);
	}

	auto *bitcast_op = impl.allocate(spv::OpBitcast, impl.builder().makeFloatType(32));
	bitcast_op->add_id(impl.get_id_for_value(op));
	impl.add(bitcast_op);

	return bitcast_op->id;
}

static bool emit_nvapi_extn_op_shuffle(Converter::Impl &impl)
{
	// Dummy throwaway implementation.
	spv::Id val = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);
	spv::Id lane = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 1);

	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityGroupNonUniformShuffle);

	auto *op = impl.allocate(spv::OpGroupNonUniformShuffle, builder.makeUintType(32));
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	op->add_id(val);
	op->add_id(lane);
	impl.add(op);

	impl.nvapi.fake_doorbell_outputs[0] = op->id;
	return true;
}

static bool emit_nvapi_extn_op_fp16x2_atomic(Converter::Impl &impl)
{
	if (!impl.nvapi.marked_uav)
		return false;

	// Dummy throwaway implementation to demonstrate UAV reference plumbing.
	spv::Id addr = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);
	spv::Id val = get_argument(impl, NVAPI_ARGUMENT_SRC1U + 0);
	spv::Id type = get_argument(impl, NVAPI_ARGUMENT_SRC2U + 0);
	(void)type;

	auto &builder = impl.builder();

	spv::Id id = impl.get_id_for_value(impl.nvapi.marked_uav);
	const auto &meta = impl.handle_to_resource_meta[id];

	if (meta.storage == spv::StorageClassStorageBuffer)
	{
		spv::Id ssbo_id = get_buffer_alias_handle(impl, meta, id, RawType::Integer, RawWidth::B32, RawVecSize::V1);

		auto *chain = impl.allocate(spv::OpAccessChain,
		                            builder.makePointer(spv::StorageClassStorageBuffer, builder.makeUintType(32)));
		chain->add_id(ssbo_id);
		chain->add_id(builder.makeUintConstant(0));
		chain->add_id(addr);
		impl.add(chain);

		auto *atomic = impl.allocate(spv::OpAtomicIAdd, builder.makeUintType(32));
		atomic->add_id(chain->id);
		atomic->add_id(builder.makeUintConstant(spv::ScopeDevice));
		atomic->add_id(builder.makeUintConstant(0));
		atomic->add_id(val);
		impl.add(atomic);

		impl.nvapi.fake_doorbell_outputs[NVAPI_ARGUMENT_DST0U + 0] = atomic->id;
	}
	else if (meta.storage == spv::StorageClassUniformConstant)
	{
		auto *ptr = impl.allocate(spv::OpImageWrite);
		ptr->add_id(id);
		ptr->add_id(addr);
		ptr->add_id(impl.build_splat_constant_vector(builder.makeFloatType(32), builder.makeFloatConstant(2.0f), 4));
		impl.add(ptr);

		builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat);
		impl.nvapi.fake_doorbell_outputs[NVAPI_ARGUMENT_DST0U + 0] = builder.makeUintConstant(42);
	}

	impl.nvapi.marked_uav = nullptr;
	return true;
}

static bool emit_nvapi_extn_op_get_special(Converter::Impl &impl)
{
	auto *c = llvm::dyn_cast<llvm::ConstantInt>(impl.nvapi.fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0]);
	if (c != nullptr)
	{
		auto subopcode = uint32_t(c->getUniqueInteger().getZExtValue());
		auto &builder = impl.builder();

		switch (subopcode)
		{
		case NV_SPECIALOP_GLOBAL_TIMER_LO:
		case NV_SPECIALOP_GLOBAL_TIMER_HI:
		{
			builder.addExtension("SPV_KHR_shader_clock");
			builder.addCapability(spv::CapabilityShaderClockKHR);

			auto *read_op = impl.allocate(spv::OpReadClockKHR, builder.makeVectorType(builder.makeUintType(32), 2));
			read_op->add_id(builder.makeUintConstant(1));
			impl.add(read_op);

			auto *extract_op = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
			extract_op->add_id(read_op->id);
			extract_op->add_literal(subopcode - NV_SPECIALOP_GLOBAL_TIMER_LO);
			impl.add(extract_op);

			impl.nvapi.fake_doorbell_outputs[0] = extract_op->id;
			return true;
		}
		}
	}

	return false;
}

static bool emit_nvapi_extn_op_hit_object_trace_ray(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id miss_index = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);

	auto *hit_object = impl.nvapi.fake_doorbell_intermediates[NVAPI_INTERMEDIATE_HANDLE_0];

	auto &builder = impl.builder();

	builder.addExtension("SPV_NV_shader_invocation_reorder");
	builder.addCapability(spv::CapabilityShaderInvocationReorderNV);

	spv::Id acceleration_structure = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id ray_flags = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id instance_inclusion_mask = impl.get_id_for_value(instruction->getOperand(3));
	spv::Id ray_contribution_to_hit_group = impl.get_id_for_value(instruction->getOperand(4));
	spv::Id multiplier_for_geometry = impl.get_id_for_value(instruction->getOperand(5));

	spv::Id ray_origin[3];
	spv::Id ray_dir[3];

	for (unsigned i = 0; i < 3; i++)
	{
		ray_origin[i] = impl.get_id_for_value(instruction->getOperand(7 + i));
		ray_dir[i] = impl.get_id_for_value(instruction->getOperand(11 + i));
	}

	spv::Id tmin = impl.get_id_for_value(instruction->getOperand(10));
	spv::Id tmax = impl.get_id_for_value(instruction->getOperand(14));

	spv::Id float32 = builder.makeFloatType(32);
	spv::Id ray_origin_vec = impl.build_vector(float32, ray_origin, 3);
	spv::Id ray_dir_vec = impl.build_vector(float32, ray_dir, 3);

	auto *ray_payload = instruction->getOperand(15);

	bool needs_temp_copy = impl.get_needs_temp_storage_copy(ray_payload);
	spv::Id ray_payload_var_id = needs_temp_copy
		? emit_temp_storage_copy(impl, ray_payload, spv::StorageClassRayPayloadKHR)
		: impl.get_id_for_value(ray_payload);

	spv::Id variable = impl.create_variable(spv::StorageClassFunction, builder.makeHitObjectNVType());

	auto op = impl.allocate(spv::OpHitObjectTraceRayNV);
	op->add_id(variable);
	op->add_id(acceleration_structure);
	op->add_id(ray_flags);
	op->add_id(instance_inclusion_mask);
	op->add_id(ray_contribution_to_hit_group);
	op->add_id(multiplier_for_geometry);
	op->add_id(miss_index);
	op->add_id(ray_origin_vec);
	op->add_id(tmin);
	op->add_id(ray_dir_vec);
	op->add_id(tmax);
	op->add_id(ray_payload_var_id);
	impl.add(op);

	if (needs_temp_copy)
		emit_temp_storage_resolve(impl, ray_payload, ray_payload_var_id);

	impl.rewrite_value(hit_object, variable);
	return true;
}

static bool emit_nvapi_extn_op_hit_object_make_hit(Converter::Impl &impl, const llvm::CallInst *instruction, bool with_index)
{
	spv::Id instance_index = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);
	spv::Id geometry_index = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 1);
	spv::Id primitive_index = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 2);
	spv::Id hit_kind = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 3);
	
	spv::Id hit_group_record_index;
	spv::Id ray_contribution_to_hit_group_index;
	spv::Id multiplier_for_geometry_contribution_to_hit_group_index;

	if (with_index)
		hit_group_record_index = get_argument(impl, NVAPI_ARGUMENT_SRC1U + 0);
	else
	{
		ray_contribution_to_hit_group_index = get_argument(impl, NVAPI_ARGUMENT_SRC1U + 0);
		multiplier_for_geometry_contribution_to_hit_group_index = get_argument(impl, NVAPI_ARGUMENT_SRC1U + 1);
	}

	auto *hit_object = impl.nvapi.fake_doorbell_intermediates[NVAPI_INTERMEDIATE_HANDLE_0];
	auto *attributes = impl.nvapi.fake_doorbell_intermediates[NVAPI_INTERMEDIATE_ATTRIBUTES];

	auto &builder = impl.builder();

	builder.addExtension("SPV_NV_shader_invocation_reorder");
	builder.addCapability(spv::CapabilityShaderInvocationReorderNV);

	spv::Id acceleration_structure = impl.get_id_for_value(instruction->getOperand(1));

	spv::Id ray_origin[3];
	spv::Id ray_dir[3];

	for (unsigned i = 0; i < 3; i++)
	{
		ray_origin[i] = impl.get_id_for_value(instruction->getOperand(7 + i));
		ray_dir[i] = impl.get_id_for_value(instruction->getOperand(11 + i));
	}

	spv::Id tmin = impl.get_id_for_value(instruction->getOperand(10));
	spv::Id tmax = impl.get_id_for_value(instruction->getOperand(14));

	spv::Id float32 = builder.makeFloatType(32);
	spv::Id ray_origin_vec = impl.build_vector(float32, ray_origin, 3);
	spv::Id ray_dir_vec = impl.build_vector(float32, ray_dir, 3);

	bool needs_temp_copy = impl.get_needs_temp_storage_copy(attributes);
	spv::Id attribute_var_id = needs_temp_copy
		? emit_temp_storage_copy(impl, attributes, spv::StorageClassHitObjectAttributeNV)
		: impl.get_id_for_value(attributes);

	spv::Id variable = impl.create_variable(spv::StorageClassFunction, builder.makeHitObjectNVType());

	auto op = impl.allocate(with_index ? spv::OpHitObjectRecordHitWithIndexNV : spv::OpHitObjectRecordHitNV);
	op->add_id(variable);
	op->add_id(acceleration_structure);
	op->add_id(instance_index);
	op->add_id(primitive_index);
	op->add_id(geometry_index);
	op->add_id(hit_kind);

	if (with_index)
		op->add_id(hit_group_record_index);
	else
	{
		op->add_id(ray_contribution_to_hit_group_index);
		op->add_id(multiplier_for_geometry_contribution_to_hit_group_index);
	}

	op->add_id(ray_origin_vec);
	op->add_id(tmin);
	op->add_id(ray_dir_vec);
	op->add_id(tmax);
	op->add_id(attribute_var_id);
	impl.add(op);

	impl.rewrite_value(hit_object, variable);
	return true;
}

static bool emit_nvapi_extn_op_hit_object_make_miss(Converter::Impl &impl)
{
	spv::Id index = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);
	spv::Id tmin = get_argument_as_float(impl, NVAPI_ARGUMENT_SRC0U + 1);
	spv::Id tmax = get_argument_as_float(impl, NVAPI_ARGUMENT_SRC0U + 2);

	spv::Id ray_origin[3];
	spv::Id ray_dir[3];

	for (unsigned i = 0; i < 3; i++)
	{
		ray_origin[i] = get_argument_as_float(impl, NVAPI_ARGUMENT_SRC1U + i);
		ray_dir[i] = get_argument_as_float(impl, NVAPI_ARGUMENT_SRC2U + i);
	}

	auto &builder = impl.builder();

	builder.addExtension("SPV_NV_shader_invocation_reorder");
	builder.addCapability(spv::CapabilityShaderInvocationReorderNV);

	spv::Id float32 = builder.makeFloatType(32);
	spv::Id ray_origin_vec = impl.build_vector(float32, ray_origin, 3);
	spv::Id ray_dir_vec = impl.build_vector(float32, ray_dir, 3);

	spv::Id variable = impl.create_variable(spv::StorageClassFunction, builder.makeHitObjectNVType());

	auto op = impl.allocate(spv::OpHitObjectRecordMissNV);
	op->add_id(variable);
	op->add_id(index);
	op->add_id(ray_origin_vec);
	op->add_id(tmin);
	op->add_id(ray_dir_vec);
	op->add_id(tmax);
	impl.add(op);

	impl.nvapi.fake_doorbell_outputs[0] = variable;
	return true;
}

static bool emit_nvapi_extn_op_hit_object_reorder_thread(Converter::Impl &impl)
{
	auto *c = llvm::dyn_cast<llvm::ConstantInt>(impl.nvapi.fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0]);
	if (c != nullptr)
	{
		spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 1);
		spv::Id hint = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 2);
		spv::Id bits = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 3);

		auto &builder = impl.builder();

		builder.addExtension("SPV_NV_shader_invocation_reorder");
		builder.addCapability(spv::CapabilityShaderInvocationReorderNV);

		Operation *op;

		if (c->getUniqueInteger().getZExtValue())
		{
			op = impl.allocate(spv::OpReorderThreadWithHitObjectNV);
			op->add_id(hit_object);
		}
		else
			op = impl.allocate(spv::OpReorderThreadWithHintNV);

		op->add_id(hint);
		op->add_id(bits);
		impl.add(op);

		impl.nvapi.fake_doorbell_outputs[0] = op->id;
		return true;
	}

	return false;
}

static bool emit_nvapi_extn_op_hit_object_invoke(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);

	auto *ray_payload = instruction->getOperand(15);

	bool needs_temp_copy = impl.get_needs_temp_storage_copy(ray_payload);
	spv::Id ray_payload_var_id = needs_temp_copy
		? emit_temp_storage_copy(impl, ray_payload, spv::StorageClassRayPayloadKHR)
		: impl.get_id_for_value(ray_payload);

	auto *op = impl.allocate(spv::Op::OpHitObjectExecuteShaderNV);
	op->add_id(hit_object);
	op->add_id(ray_payload_var_id);
	impl.add(op);

	if (needs_temp_copy)
		emit_temp_storage_resolve(impl, ray_payload, ray_payload_var_id);

	return true;
}

static bool emit_nvapi_extn_op_hit_object_get_bool(Converter::Impl &impl, uint32_t opcode)
{
	spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);

	auto &builder = impl.builder();
	spv::Op op;

	switch (opcode)
	{
	case NV_EXTN_OP_HIT_OBJECT_IS_MISS:
		op = spv::OpHitObjectIsMissNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_IS_HIT:
		op = spv::OpHitObjectIsHitNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_IS_NOP:
		op = spv::OpHitObjectIsEmptyNV;
		break;

	default:
		return false;
	}

	auto *is_op = impl.allocate(op, builder.makeBoolType());
	is_op->add_id(hit_object);
	impl.add(is_op);

	auto *select_op = impl.allocate(spv::OpSelect, builder.makeUintType(32));
	select_op->add_id(is_op->id);
	select_op->add_id(builder.makeUintConstant(1));
	select_op->add_id(builder.makeUintConstant(0));
	impl.add(select_op);

	impl.nvapi.fake_doorbell_outputs[0] = select_op->id;
	return true;
}

static bool emit_nvapi_extn_op_hit_object_get_uint(Converter::Impl &impl, uint32_t opcode)
{
	spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);

	auto &builder = impl.builder();
	spv::Op op;

	switch (opcode)
	{
	case NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID:
		op = spv::OpHitObjectGetInstanceIdNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX:
		op = spv::OpHitObjectGetInstanceCustomIndexNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX:
		op = spv::OpHitObjectGetPrimitiveIndexNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX:
		op = spv::OpHitObjectGetGeometryIndexNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND:
		op = spv::OpHitObjectGetHitKindNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX:
		op = spv::OpHitObjectGetShaderBindingTableRecordIndexNV;
		break;

	case NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID:
		builder.addExtension("SPV_NV_cluster_acceleration_structure");
		builder.addCapability(spv::CapabilityRayTracingClusterAccelerationStructureNV);
		op = spv::OpHitObjectGetClusterIdNV;
		break;

	default:
		return false;
	}

	auto *get_op = impl.allocate(op, builder.makeUintType(32));
	get_op->add_id(hit_object);
	impl.add(get_op);

	impl.nvapi.fake_doorbell_outputs[0] = get_op->id;
	return true;
}

static bool emit_nvapi_extn_op_hit_object_get_ray_desc(Converter::Impl &impl)
{
	spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);

	auto &builder = impl.builder();

	auto uint32 = builder.makeUintType(32);
	auto float32 = builder.makeFloatType(32);
	auto vec3float32 = builder.makeVectorType(float32, 3);

	auto o = 0;

	for (auto op : { spv::OpHitObjectGetRayTMinNV, spv::OpHitObjectGetRayTMaxNV })
	{
		auto *get_op = impl.allocate(op, float32);
		get_op->add_id(hit_object);
		impl.add(get_op);

		auto *bitcast_op = impl.allocate(spv::OpBitcast, uint32);
		bitcast_op->add_id(get_op->id);
		impl.add(bitcast_op);

		impl.nvapi.fake_doorbell_outputs[o++] = bitcast_op->id;
	}

	for (auto op : { spv::OpHitObjectGetWorldRayOriginNV, spv::OpHitObjectGetWorldRayDirectionNV })
	{
		auto *get_op = impl.allocate(op, vec3float32);
		get_op->add_id(hit_object);
		impl.add(get_op);

		for (unsigned i = 0; i < 3; i++)
		{
			auto *extract_op = impl.allocate(spv::OpCompositeExtract, float32);
			extract_op->add_id(get_op->id);
			extract_op->add_literal(i);
			impl.add(extract_op);

			auto *bitcast_op = impl.allocate(spv::OpBitcast, uint32);
			bitcast_op->add_id(extract_op->id);
			impl.add(bitcast_op);

			impl.nvapi.fake_doorbell_outputs[o++] = bitcast_op->id;
		}
	}

	return true;
}

static bool emit_nvapi_extn_op_hit_object_get_attributes(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);

	auto *attributes = instruction->getOperand(2);

	bool needs_temp_copy = impl.get_needs_temp_storage_copy(attributes);
	spv::Id attributes_var_id = needs_temp_copy
		? emit_temp_storage_copy(impl, attributes, spv::StorageClassHitObjectAttributeNV)
		: impl.get_id_for_value(attributes);

	auto op = impl.allocate(spv::OpHitObjectGetAttributesNV);
	op->add_id(hit_object);
	op->add_id(attributes_var_id);
	impl.add(op);

	if (needs_temp_copy)
		emit_temp_storage_resolve(impl, attributes, attributes_var_id);

	return true;
}

static bool emit_nvapi_extn_op_hit_object_load_local_root_table_constant(Converter::Impl &impl)
{
	spv::Id hit_object = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 0);
	spv::Id offset = get_argument(impl, NVAPI_ARGUMENT_SRC0U + 1);

	auto &builder = impl.builder();

	spv::Id uint32 = builder.makeUintType(32);
	spv::Id uint64 = builder.makeUintType(64);

	if (!impl.nvapi.hit_object_srb_ptr)
	{
		spv::Id srb_struct = builder.makeStructType({ uint32 }, "HitObjectSRB");

		builder.addDecoration(srb_struct, spv::DecorationBlock);
		builder.addDecoration(srb_struct, spv::DecorationHitObjectShaderRecordBufferNV);

		builder.addMemberDecoration(srb_struct, 0, spv::DecorationOffset, 0);
		builder.addMemberDecoration(srb_struct, 0, spv::DecorationNonWritable);

		impl.nvapi.hit_object_srb_ptr = builder.makePointer(spv::StorageClassPhysicalStorageBuffer, srb_struct);
		impl.nvapi.hit_object_srb_member_ptr = builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint32);
	}

	auto *op = impl.allocate(spv::OpHitObjectGetShaderRecordBufferHandleNV, builder.makeVectorType(uint32, 2));
	op->add_id(hit_object);
	impl.add(op);

	auto *cast_op = impl.allocate(spv::OpBitcast, uint64);
	cast_op->add_id(op->id);
	impl.add(cast_op);

	auto *convert_op = impl.allocate(spv::OpUConvert, uint64);
	convert_op->add_id(offset);
	impl.add(convert_op);

	auto *add_op = impl.allocate(spv::OpIAdd, uint64);
	add_op->add_id(cast_op->id);
	add_op->add_id(convert_op->id);
	impl.add(add_op);

	convert_op = impl.allocate(spv::OpConvertUToPtr, impl.nvapi.hit_object_srb_ptr);
	convert_op->add_id(add_op->id);
	impl.add(convert_op);

	auto *chain_op = impl.allocate(spv::OpAccessChain, impl.nvapi.hit_object_srb_member_ptr);
	chain_op->add_id(convert_op->id);
	chain_op->add_id(builder.makeUintConstant(0));
	impl.add(chain_op);

	auto *load_op = impl.allocate(spv::OpLoad, uint32);
	load_op->add_id(chain_op->id);
	load_op->add_literal(spv::MemoryAccessAlignedMask);
	load_op->add_literal(sizeof(uint32_t));
	impl.add(load_op);

	impl.nvapi.fake_doorbell_outputs[0] = load_op->id;
	return true;
}

static bool emit_nvapi_extn_op_hit_object_make_nop(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	builder.addExtension("SPV_NV_shader_invocation_reorder");
	builder.addCapability(spv::CapabilityShaderInvocationReorderNV);

	spv::Id variable = impl.create_variable(spv::StorageClassFunction, builder.makeHitObjectNVType());

	auto op = impl.allocate(spv::OpHitObjectRecordEmptyNV);
	op->add_id(variable);
	impl.add(op);

	impl.nvapi.fake_doorbell_outputs[0] = variable;
	return true;
}

static bool emit_nvapi_extn_op_rt_get_cluster_id(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	builder.addExtension("SPV_NV_cluster_acceleration_structure");
	builder.addCapability(spv::CapabilityRayTracingClusterAccelerationStructureNV);

	spv::Id id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInClusterIDNV);
	auto *op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	op->add_id(id);
	impl.add(op);

	impl.nvapi.fake_doorbell_outputs[0] = op->id;
	return true;
}

static bool emit_nvapi_extn_op_rt_get_intersection_cluster_id(Converter::Impl &impl, spv::RayQueryIntersection intersection)
{
	auto *ray_flags = llvm::dyn_cast<llvm::CallInst>(impl.nvapi.fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0]);
	if (ray_flags != nullptr)
	{
		auto &builder = impl.builder();
		spv::Id ray_object_id = 0;
		if (!build_ray_query_object(impl, ray_flags->getOperand(1), ray_object_id))
			return false;

		builder.addExtension("SPV_NV_cluster_acceleration_structure");
		builder.addCapability(spv::CapabilityRayTracingClusterAccelerationStructureNV);

		auto *op = impl.allocate(spv::OpRayQueryGetClusterIdNV, builder.makeUintType(32));
		op->add_id(ray_object_id);
		op->add_id(builder.makeUintConstant(intersection));
		impl.add(op);

		impl.nvapi.fake_doorbell_outputs[0] = op->id;
		return true;
	}

	return false;
}

bool NVAPIState::can_commit_opcode()
{
	if (!fake_doorbell_inputs[NVAPI_ARGUMENT_OPCODE])
		return false;

	auto *c = llvm::dyn_cast<llvm::ConstantInt>(fake_doorbell_inputs[NVAPI_ARGUMENT_OPCODE]);
	if (c != nullptr)
	{
		auto opcode = uint32_t(c->getUniqueInteger().getZExtValue());
		switch (opcode)
		{
		case NV_EXTN_OP_SHFL:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 2] != nullptr;

		case NV_EXTN_OP_FP16_ATOMIC:
			return marked_uav &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC2U + 0] != nullptr;

		case NV_EXTN_OP_GET_SPECIAL:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_TRACE_RAY:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_NUM_OUTPUTS_FOR_INC_COUNTER] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_NUM_OUTPUTS_FOR_INC_COUNTER] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 2] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 3] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 1] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_NUM_OUTPUTS_FOR_INC_COUNTER] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 2] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 3] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 0] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_MAKE_MISS:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 2] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC1U + 2] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC2U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC2U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC2U + 2] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 1] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 2] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 3] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_INVOKE:
		case NV_EXTN_OP_HIT_OBJECT_IS_MISS:
		case NV_EXTN_OP_HIT_OBJECT_IS_HIT:
		case NV_EXTN_OP_HIT_OBJECT_IS_NOP:
		case NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID:
		case NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND:
		case NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC:
		case NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES:
		case NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr &&
			       fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 1] != nullptr;

		case NV_EXTN_OP_HIT_OBJECT_MAKE_NOP:
		case NV_EXTN_OP_RT_GET_CLUSTER_ID:
			return true;

		case NV_EXTN_OP_RT_GET_CANDIDATE_CLUSTER_ID:
		case NV_EXTN_OP_RT_GET_COMMITTED_CLUSTER_ID:
			return fake_doorbell_inputs[NVAPI_ARGUMENT_SRC0U + 0] != nullptr;

		default:
			return false;
		}
	}
	else
		return false;
}

bool NVAPIState::commit_opcode(Converter::Impl &impl, bool analysis)
{
	if (!fake_doorbell_inputs[NVAPI_ARGUMENT_OPCODE])
		return false;

	auto *c = llvm::dyn_cast<llvm::ConstantInt>(fake_doorbell_inputs[NVAPI_ARGUMENT_OPCODE]);
	if (c != nullptr)
	{
		auto opcode = uint32_t(c->getUniqueInteger().getZExtValue());
		switch (opcode)
		{
		case NV_EXTN_OP_SHFL:
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_shuffle(impl))
				return false;
			break;

		case NV_EXTN_OP_FP16_ATOMIC:
			impl.nvapi.num_expected_clock_outputs = 0;
			if (!analysis && !emit_nvapi_extn_op_fp16x2_atomic(impl))
				return false;
			break;

		case NV_EXTN_OP_GET_SPECIAL:
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_get_special(impl))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_TRACE_RAY:
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 2;
			impl.nvapi.deferred_opcode = opcode;
			break;

		case NV_EXTN_OP_HIT_OBJECT_MAKE_MISS:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_hit_object_make_miss(impl))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_hit_object_reorder_thread(impl))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_INVOKE:
		case NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES:
			impl.nvapi.num_expected_clock_outputs = 1;
			impl.nvapi.deferred_opcode = opcode;
			break;

		case NV_EXTN_OP_HIT_OBJECT_IS_MISS:
		case NV_EXTN_OP_HIT_OBJECT_IS_HIT:
		case NV_EXTN_OP_HIT_OBJECT_IS_NOP:
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_hit_object_get_bool(impl, opcode))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID:
		case NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND:
		case NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID:
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_hit_object_get_uint(impl, opcode))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC:
			impl.nvapi.num_expected_clock_outputs = 8;
			if (!analysis && !emit_nvapi_extn_op_hit_object_get_ray_desc(impl))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT:
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_hit_object_load_local_root_table_constant(impl))
				return false;
			break;

		case NV_EXTN_OP_HIT_OBJECT_MAKE_NOP:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_hit_object_make_nop(impl))
				return false;
			break;

		case NV_EXTN_OP_RT_GET_CLUSTER_ID:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_rt_get_cluster_id(impl))
				return false;
			break;

		case NV_EXTN_OP_RT_GET_CANDIDATE_CLUSTER_ID:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_rt_get_intersection_cluster_id(impl, spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR))
				return false;
			break;

		case NV_EXTN_OP_RT_GET_COMMITTED_CLUSTER_ID:
			impl.spirv_module.set_override_spirv_version(0x10400);
			impl.nvapi.num_expected_clock_outputs = 1;
			if (!analysis && !emit_nvapi_extn_op_rt_get_intersection_cluster_id(impl, spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR))
				return false;
			break;

		default:
			return false;
		}

		return true;
	}
	else
		return false;
}

bool emit_nvapi_buffer_update_counter(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id)
{
	if (id == impl.nvapi.magic_ptr_id)
	{
		impl.nvapi.notify_doorbell(impl, instruction, false);
		return true;
	}

	return false;
}

bool analyze_nvapi_buffer_update_counter(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (impl.options.nvapi.enabled)
	{
		auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
		if (itr != impl.llvm_value_to_uav_resource_index_map.end())
		{
			if (itr->second == impl.nvapi.uav_magic_resource_type_index)
			{
				impl.nvapi.notify_doorbell(impl, instruction, true);
				return true;
			}
		}
	}

	return false;
}

bool nvapi_buffer_update_counter_filter(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (impl.options.nvapi.enabled)
	{
		auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
		if (itr != impl.llvm_value_to_uav_resource_index_map.end())
			if (itr->second == impl.nvapi.uav_magic_resource_type_index)
				return true;
	}

	return false;
}

static const llvm::Value *get_nvapi_trace_handle(Converter::Impl &impl)
{
	switch (impl.nvapi.deferred_opcode)
	{
	case NV_EXTN_OP_HIT_OBJECT_TRACE_RAY:
	case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
	case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
		return impl.nvapi.fake_doorbell_intermediates[NVAPI_INTERMEDIATE_HANDLE_1];
	case NV_EXTN_OP_HIT_OBJECT_INVOKE:
	case NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES:
		return impl.nvapi.fake_doorbell_intermediates[NVAPI_INTERMEDIATE_HANDLE_0];
	default:
		return nullptr;
	}
}

static void mark_alloca_variable(Converter::Impl &impl, const llvm::Value *variable, spv::StorageClass storage_class)
{
	const auto *alloca_inst = llvm::dyn_cast<llvm::AllocaInst>(variable);
	if (alloca_inst != nullptr)
	{
		auto storage = impl.get_effective_storage_class(alloca_inst, spv::StorageClassFunction);
		if (storage != spv::StorageClassFunction && storage != storage_class)
		{
			impl.handle_to_storage_class[alloca_inst] = spv::StorageClassFunction;
			if (!impl.get_needs_temp_storage_copy(alloca_inst))
				impl.needs_temp_storage_copy.insert(alloca_inst);
		}
		else if (!impl.get_needs_temp_storage_copy(alloca_inst))
		{
			impl.handle_to_storage_class[alloca_inst] = storage_class;
		}
	}
}

bool analyze_nvapi_call_shader(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *handle = get_nvapi_trace_handle(impl);
	if (handle != nullptr && handle == instruction->getOperand(1))
	{
		switch (impl.nvapi.deferred_opcode)
		{
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
			mark_alloca_variable(impl, instruction->getOperand(2), spv::StorageClassHitObjectAttributeNV);
			return true;
		case NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES:
			mark_alloca_variable(impl, instruction->getOperand(2), spv::StorageClassHitObjectAttributeNV);
			impl.nvapi.reset();
			return true;
		}
	}

	return false;
}

bool analyze_nvapi_trace_ray(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *handle = get_nvapi_trace_handle(impl);
	if (handle != nullptr && handle == instruction->getOperand(6))
	{
		switch (impl.nvapi.deferred_opcode)
		{
		case NV_EXTN_OP_HIT_OBJECT_TRACE_RAY:
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
		case NV_EXTN_OP_HIT_OBJECT_INVOKE:
			mark_alloca_variable(impl, instruction->getOperand(15), spv::StorageClassRayPayloadKHR);
			impl.nvapi.reset();
			return true;
		}
	}

	return false;
}

bool emit_nvapi_call_shader(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *handle = get_nvapi_trace_handle(impl);
	if (handle != nullptr && handle == instruction->getOperand(1))
	{
		switch (impl.nvapi.deferred_opcode)
		{
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
			impl.nvapi.fake_doorbell_intermediates[NVAPI_INTERMEDIATE_ATTRIBUTES] = instruction->getOperand(2);
			return true;
		case NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES:
			if (!emit_nvapi_extn_op_hit_object_get_attributes(impl, instruction))
				return false;
			break;
		default:
			return false;
		}

		impl.nvapi.reset();
		return true;
	}

	return false;
}

bool emit_nvapi_trace_ray(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *handle = get_nvapi_trace_handle(impl);
	if (handle != nullptr && handle == instruction->getOperand(6))
	{
		switch (impl.nvapi.deferred_opcode)
		{
		case NV_EXTN_OP_HIT_OBJECT_TRACE_RAY:
			if (!emit_nvapi_extn_op_hit_object_trace_ray(impl, instruction))
				return false;
			break;
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT:
			if (!emit_nvapi_extn_op_hit_object_make_hit(impl, instruction, false))
				return false;
			break;
		case NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX:
			if (!emit_nvapi_extn_op_hit_object_make_hit(impl, instruction, true))
				return false;
			break;
		case NV_EXTN_OP_HIT_OBJECT_INVOKE:
			if (!emit_nvapi_extn_op_hit_object_invoke(impl, instruction))
				return false;
			break;
		default:
			return false;
		}

		impl.nvapi.reset();
		return true;
	}

	return false;
}

bool emit_nvapi_resource_uav_handle(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t resource_range)
{
	if (resource_range == impl.nvapi.uav_magic_resource_type_index)
	{
		// Resources tied to constant uints are considered "magic".
		if (impl.nvapi.magic_ptr_id == 0)
		{
			spv::Id dummy_value = impl.spirv_module.allocate_id();
			impl.nvapi.magic_ptr_id = dummy_value;
		}

		impl.rewrite_value(instruction, impl.nvapi.magic_ptr_id);
		return true;
	}

	return false;
}

bool analyze_nvapi_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, AccessTracking *)
{
	if (impl.options.nvapi.enabled &&
	    impl.nvapi.uav_magic_resource_type_index != NVAPI_MAGIC_RESOURCE_SENTINEL &&
	    instruction->getOperand(2) == impl.nvapi.doorbell)
	{
		return true;
	}

	return false;
}

void analyze_nvapi_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction,
                                AccessTracking *, DXIL::Op)
{
	// We don't have the magic_ptr_id yet, but if we write with the doorbell value, we're very sure.
	if (impl.options.nvapi.enabled &&
	    impl.nvapi.uav_magic_resource_type_index != NVAPI_MAGIC_RESOURCE_SENTINEL &&
	    instruction->getOperand(2) == impl.nvapi.doorbell)
	{
		if (!impl.nvapi.mark_uav_write(instruction))
			impl.nvapi.write_arguments_from_store(impl, instruction, true);
	}
}

bool emit_nvapi_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op)
{
	// This can clock out arguments.
	if (impl.get_id_for_value(instruction->getOperand(1)) == impl.nvapi.magic_ptr_id &&
	    instruction->getOperand(2) == impl.nvapi.doorbell)
	{
		uint32_t offset;
		if (!get_constant_operand(instruction, 3, &offset))
			return false;

		if (offset & 3)
		{
			LOGE("NVAPI offset is not aligned by 4 bytes.\n");
			return false;
		}

		offset /= 4;

		if (offset >= NVAPI_ARGUMENT_COUNT)
		{
			LOGE("NVAPI offset is too large.\n");
			return false;
		}

		if (impl.nvapi.fake_doorbell_outputs[offset] == 0)
		{
			LOGE("Output argument is empty.\n");
			return false;
		}

		impl.rewrite_value(instruction, impl.nvapi.fake_doorbell_outputs[offset]);
		// TODO
		impl.llvm_composite_meta[instruction].components = 1;
		impl.llvm_composite_meta[instruction].forced_composite = false;
		return true;
	}

	return false;
}

bool NVAPIState::mark_uav_write(const llvm::CallInst *instruction)
{
	auto *mark = fake_doorbell_inputs[NVAPI_ARGUMENT_MARK_UAV_REF];
	if (mark != nullptr)
	{
		const auto *c = llvm::dyn_cast<llvm::ConstantInt>(mark);
		if (c != nullptr)
		{
			if (c->getUniqueInteger().getZExtValue() == 1)
			{
				// This completes an operation.
				marked_uav = instruction->getOperand(1);
				reset();
				return true;
			}
		}
	}

	return false;
}

bool NVAPIState::write_arguments_from_store(Converter::Impl &impl, const llvm::CallInst *instruction, bool analysis)
{
	uint32_t offset;
	if (!get_constant_operand(instruction, 3, &offset))
		return false;

	uint32_t mask;
	if (!get_constant_operand(instruction, 8, &mask))
		return false;

	if (offset & 3)
	{
		LOGE("NVAPI offset is not aligned by 4 bytes.\n");
		return false;
	}

	offset /= 4;

	if (offset >= NVAPI_ARGUMENT_COUNT)
	{
		LOGE("NVAPI offset is too large.\n");
		return false;
	}

	for (unsigned i = 0; i < 4; i++)
	{
		if (mask & (1u << i))
			fake_doorbell_inputs[offset + i] = instruction->getOperand(4 + i);
	}

	if (can_commit_opcode())
		commit_opcode(impl, analysis);

	return true;
}

bool emit_nvapi_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id)
{
	if (instruction->getOperand(2) == impl.nvapi.doorbell)
	{
		if (id == impl.nvapi.magic_ptr_id)
			return impl.nvapi.write_arguments_from_store(impl, instruction, false);
		else
			return impl.nvapi.mark_uav_write(instruction);
	}

	return false;
}
}

================================================
FILE: opcodes/dxil/dxil_nvapi.hpp
================================================
/* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
enum
{
	NVAPI_ARGUMENT_OPCODE = 0,
	NVAPI_ARGUMENT_RID = 1,
	NVAPI_ARGUMENT_SID = 2,
	NVAPI_ARGUMENT_DST1U = 3,
	NVAPI_ARGUMENT_SRC3U = 7,
	NVAPI_ARGUMENT_SRC4U = 11,
	NVAPI_ARGUMENT_SRC5U = 15,
	NVAPI_ARGUMENT_SRC0U = 19,
	NVAPI_ARGUMENT_SRC1U = 23,
	NVAPI_ARGUMENT_SRC2U = 27,
	NVAPI_ARGUMENT_DST0U = 31,
	NVAPI_ARGUMENT_MARK_UAV_REF = 35,
	NVAPI_ARGUMENT_NUM_OUTPUTS_FOR_INC_COUNTER = 36,
	NVAPI_ARGUMENT_COUNT
};

enum
{
	NVAPI_INTERMEDIATE_HANDLE_0 = 0,
	NVAPI_INTERMEDIATE_HANDLE_1 = 1,
	NVAPI_INTERMEDIATE_ATTRIBUTES = 2,
	NVAPI_INTERMEDIATE_COUNT
};

// Don't conflict with sentinel index for SM 6.6 heap.
static constexpr uint32_t NVAPI_MAGIC_RESOURCE_SENTINEL = UINT32_MAX - 1;

struct NVAPIState
{
	spv::Id magic_ptr_id = 0;
	const llvm::Value *doorbell = nullptr;
	unsigned uav_magic_resource_type_index = NVAPI_MAGIC_RESOURCE_SENTINEL;

	const llvm::Value *fake_doorbell_inputs[NVAPI_ARGUMENT_COUNT] = {};
	const llvm::Value *fake_doorbell_intermediates[NVAPI_INTERMEDIATE_COUNT] = {};
	spv::Id fake_doorbell_outputs[NVAPI_ARGUMENT_COUNT] = {};
	const llvm::Value *marked_uav = nullptr;

	spv::Id hit_object_srb_ptr = 0;
	spv::Id hit_object_srb_member_ptr = 0;

	unsigned deferred_opcode = 0;

	void reset();
	void reset_analysis() {}
	void notify_doorbell(Converter::Impl &impl, const llvm::CallInst *instruction, bool analysis);
	bool can_commit_opcode();
	bool commit_opcode(Converter::Impl &impl, bool analysis);

	// Some opcodes expect to read outputs by iteratively calling IncrementCounter().
	unsigned clock_output_index = 0;
	unsigned num_expected_clock_outputs = 0;

	bool write_arguments_from_store(Converter::Impl &impl, const llvm::CallInst *instruction, bool analysis);
	bool mark_uav_write(const llvm::CallInst *instruction);
};

// IncrementCounter is the doorbell opcode for NVAPI.
bool analyze_nvapi_buffer_update_counter(Converter::Impl &impl, const llvm::CallInst *instruction);
bool nvapi_buffer_update_counter_filter(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_nvapi_buffer_update_counter(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id);

bool analyze_nvapi_call_shader(Converter::Impl &impl, const llvm::CallInst *instruction);
bool analyze_nvapi_trace_ray(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_nvapi_call_shader(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_nvapi_trace_ray(Converter::Impl &impl, const llvm::CallInst *instruction);

struct AccessTracking;
bool analyze_nvapi_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, AccessTracking *tracking);
void analyze_nvapi_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, AccessTracking *tracking, DXIL::Op opcode);

bool emit_nvapi_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode);
bool emit_nvapi_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id id);

bool emit_nvapi_resource_uav_handle(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t resource_range);
}

================================================
FILE: opcodes/dxil/dxil_pixel_ops.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_pixel_ops.hpp"
#include "dxil_common.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include "dxil_waveops.hpp"

namespace dxil_spv
{
bool emit_discard_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *cond = instruction->getOperand(1);
	bool has_condition = false;
	if (const auto *constant_int = llvm::dyn_cast<llvm::ConstantInt>(cond))
	{
		if (!constant_int->getUniqueInteger().getZExtValue())
			return true;
	}
	else
		has_condition = true;

	Operation *op = impl.allocate(spv::OpDemoteToHelperInvocationEXT);
	if (has_condition)
		op->add_id(impl.get_id_for_value(cond));
	impl.add(op);
	impl.spirv_module.enable_shader_discard(impl.options.shader_demote);
	return true;
}

static bool emit_derivative_instruction_quad_coarse(
    spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	emit_expect_assume_quad_uniform(impl);
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityGroupNonUniformQuad);

	spv::Id type_id = impl.get_type_id(instruction->getType());

	auto *base = impl.allocate(spv::OpGroupNonUniformQuadBroadcast, type_id);
	base->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	base->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	base->add_id(builder.makeUintConstant(0));

	auto *flip = impl.allocate(spv::OpGroupNonUniformQuadBroadcast, type_id);
	flip->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	flip->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	flip->add_id(builder.makeUintConstant(opcode == spv::OpDPdyCoarse ? 2 : 1));

	auto *sub = impl.allocate(spv::OpFSub, instruction);
	sub->add_id(flip->id);
	sub->add_id(base->id);

	impl.add(base);
	impl.add(flip);
	impl.add(sub);

	return true;
}

static bool emit_derivative_instruction_quad_fine(
	spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	emit_expect_assume_quad_uniform(impl);
	auto &builder = impl.builder();

	builder.addCapability(spv::CapabilityGroupNonUniform);
	builder.addCapability(spv::CapabilityGroupNonUniformQuad);

	spv::Id type_id = impl.get_type_id(instruction->getType());

	auto *flip = impl.allocate(spv::OpGroupNonUniformQuadSwap, type_id);
	flip->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	flip->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	flip->add_id(builder.makeUintConstant(opcode == spv::OpDPdyFine ? 1 : 0));
	impl.add(flip);

	auto *sub = impl.allocate(spv::OpFSub, type_id);
	sub->add_id(flip->id);
	sub->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(sub);

	auto *flip_result = impl.allocate(spv::OpGroupNonUniformQuadSwap, type_id);
	flip_result->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	flip_result->add_id(sub->id);
	flip_result->add_id(builder.makeUintConstant(opcode == spv::OpDPdyFine ? 1 : 0));
	impl.add(flip_result);

	spv::Id local_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSubgroupLocalInvocationId);
	auto *load_local = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	load_local->add_id(local_id);
	impl.add(load_local);

	auto *mask_op = impl.allocate(spv::OpBitwiseAnd, builder.makeUintType(32));
	mask_op->add_id(load_local->id);
	mask_op->add_id(builder.makeUintConstant(opcode == spv::OpDPdyFine ? 2 : 1));
	impl.add(mask_op);

	auto *should_flip = impl.allocate(spv::OpINotEqual, builder.makeBoolType());
	should_flip->add_id(mask_op->id);
	should_flip->add_id(builder.makeUintConstant(0));
	impl.add(should_flip);

	auto *sel = impl.allocate(spv::OpSelect, instruction);
	sel->add_id(should_flip->id);
	sel->add_id(flip_result->id);
	sel->add_id(sub->id);
	impl.add(sel);

	return true;
}

static bool emit_derivative_instruction_quad(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	emit_expect_assume_quad_uniform(impl);

	switch (opcode)
	{
	case spv::OpDPdxCoarse:
	case spv::OpDPdyCoarse:
		return emit_derivative_instruction_quad_coarse(opcode, impl, instruction);

	case spv::OpDPdxFine:
	case spv::OpDPdyFine:
		return emit_derivative_instruction_quad_fine(opcode, impl, instruction);

	default:
		return false;
	}
}

bool emit_extended_derivative_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	emit_expect_assume_quad_uniform(impl);

	uint32_t dim, mode;
	if (!get_constant_operand(instruction, 2, &dim))
		return false;
	if (!get_constant_operand(instruction, 3, &mode))
		return false;

	spv::Op opcode;

	if (dim == 0 && mode == 0)
		opcode = spv::OpDPdx;
	else if (dim == 0 && mode == 1)
		opcode = spv::OpDPdxCoarse;
	else if (dim == 0 && mode == 2)
		opcode = spv::OpDPdxFine;
	else if (dim == 1 && mode == 0)
		opcode = spv::OpDPdy;
	else if (dim == 1 && mode == 1)
		opcode = spv::OpDPdyCoarse;
	else if (dim == 1 && mode == 2)
		opcode = spv::OpDPdyFine;
	else
		return false;

	auto *op = impl.allocate(opcode, instruction);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(op);
	impl.builder().addCapability(spv::CapabilityDerivativeControl);
	return true;
}

bool emit_derivative_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	emit_expect_assume_quad_uniform(impl);

	if (impl.execution_mode_meta.synthesize_dummy_derivatives)
	{
		spv::Id constant_0;
		if (instruction->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID &&
		    impl.support_native_fp16_operations())
		{
			constant_0 = builder.makeFloat16Constant(0);
		}
		else
		{
			constant_0 = builder.makeFloatConstant(0.0f);
		}

		impl.rewrite_value(instruction, constant_0);
		return true;
	}

	if (impl.execution_model != spv::ExecutionModelFragment &&
	    !impl.options.compute_shader_derivatives)
	{
		return emit_derivative_instruction_quad(opcode, impl, instruction);
	}

	bool relax_precision = false;
	spv::Id input_id;

	// SPIR-V only supports 32-bit derivatives.
	bool fp32 = instruction->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID;

	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID &&
	    !impl.support_native_fp16_operations())
	{
		fp32 = true;
		relax_precision = true;
	}

	Operation *op;
	if (fp32)
	{
		op = impl.allocate(opcode, instruction);
		// Somewhat redundant, but avoid changing reference output
		// for all shaders that use derivatives with constants.
		input_id = impl.get_id_for_value(instruction->getOperand(1));
	}
	else
	{
		spv::Id fp32_type = builder.makeFloatType(32);
		auto *cast_op = impl.allocate(spv::OpFConvert, fp32_type);
		cast_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(cast_op);
		input_id = cast_op->id;

		op = impl.allocate(opcode, fp32_type);
	}

	op->add_id(input_id);
	impl.add(op);

	if (relax_precision)
		impl.decorate_relaxed_precision(instruction->getType(), op->id, false);

	if (!fp32)
	{
		auto *cast_op = impl.allocate(spv::OpFConvert, instruction);
		cast_op->add_id(op->id);
		impl.add(cast_op);
		op = cast_op;
	}

	if (impl.execution_model == spv::ExecutionModelGLCompute && impl.options.quirks.robust_compute_quad_broadcast &&
		(opcode == spv::OpDPdxCoarse || opcode == spv::OpDPdyCoarse))
	{
		// Could implement for Fine too, but it gets a little more complicated.
		spv::Id base_active_id = emit_current_quad_lane_active(impl, 0);
		spv::Id other_active_id = emit_current_quad_lane_active(impl, opcode == spv::OpDPdxCoarse ? 1 : 2);

		auto *both_active = impl.allocate(spv::OpLogicalAnd, builder.makeBoolType());
		both_active->add_id(base_active_id);
		both_active->add_id(other_active_id);
		impl.add(both_active);

		auto *sel = impl.allocate(spv::OpSelect, impl.get_type_id(instruction->getType()));
		sel->add_id(both_active->id);
		sel->add_id(op->id);
		sel->add_id(builder.makeNullConstant(impl.get_type_id(instruction->getType())));
		impl.add(sel);

		impl.rewrite_value(instruction, sel->id);
	}

	impl.builder().addCapability(spv::CapabilityDerivativeControl);
	return true;
}

bool emit_sample_index_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSampleId);
	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);
	impl.builder().addCapability(spv::CapabilitySampleRateShading);
	return true;
}

bool emit_coverage_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSampleMask);

	Operation *ptr_op =
	    impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassInput, builder.makeUintType(32)));
	ptr_op->add_id(var_id);
	ptr_op->add_id(builder.makeUintConstant(0));
	impl.add(ptr_op);

	Operation *load_op = impl.allocate(spv::OpLoad, instruction);
	load_op->add_id(ptr_op->id);
	impl.add(load_op);
	return true;
}

bool emit_inner_coverage_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInFullyCoveredEXT);

	builder.addCapability(spv::CapabilityFragmentFullyCoveredEXT);
	builder.addExtension("SPV_EXT_fragment_fully_covered");

	Operation *load_op = impl.allocate(spv::OpLoad, builder.makeBoolType());
	load_op->add_id(var_id);
	impl.add(load_op);

	auto *type = instruction->getType();

	if (type->getIntegerBitWidth() == 1)
	{
		impl.rewrite_value(instruction, load_op->id);
	}
	else
	{
		auto *select_op = impl.allocate(spv::OpSelect, instruction);
		select_op->add_id(load_op->id);
		select_op->add_id(builder.makeUintConstant(1));
		select_op->add_id(builder.makeUintConstant(0));
		impl.add(select_op);
	}

	return true;
}

bool emit_is_helper_lane_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto *op = impl.allocate(spv::OpIsHelperInvocationEXT, instruction);
	impl.add(op);
	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_pixel_ops.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_discard_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_is_helper_lane_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_sample_index_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_coverage_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_inner_coverage_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_derivative_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);

template <spv::Op opcode>
static inline bool emit_derivative_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_derivative_instruction(opcode, impl, instruction);
}

bool emit_extended_derivative_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_ray_tracing.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "opcodes/opcodes.hpp"
#include "dxil_common.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include "logging.hpp"

namespace dxil_spv
{
spv::Id emit_temp_storage_copy(Converter::Impl &impl, const llvm::Value *value, spv::StorageClass storage)
{
	// Make a new temporary variable for the ray payload/callable data.
	auto *pointer_type = llvm::cast<llvm::PointerType>(value->getType());
	auto *pointee_type = pointer_type->getPointerElementType();
	spv::Id type_id = impl.get_type_id(pointee_type);
	spv::Id var_id = impl.get_temp_payload(type_id, storage);

	// Load the alloca'ed value
	auto *load_op = impl.allocate(spv::OpLoad, type_id);
	load_op->add_id(impl.get_id_for_value(value));
	impl.add(load_op);

	// Store the alloca'ed value to our data in the right storage type
	auto *store_op = impl.allocate(spv::OpStore);
	store_op->add_id(var_id);
	store_op->add_id(load_op->id);
	impl.add(store_op);

	return var_id;
}

void emit_temp_storage_resolve(Converter::Impl &impl, const llvm::Value *real_value, spv::Id temp_storage)
{
	auto *pointer_type = llvm::cast<llvm::PointerType>(real_value->getType());
	auto *pointee_type = pointer_type->getPointerElementType();
	spv::Id type_id = impl.get_type_id(pointee_type);

	// Load the result from the temp
	auto *load_op = impl.allocate(spv::OpLoad, type_id);
	load_op->add_id(temp_storage);
	impl.add(load_op);

	// Store the result in the alloca'ed value
	auto *store_op = impl.allocate(spv::OpStore);
	store_op->add_id(impl.get_id_for_value(real_value));
	store_op->add_id(load_op->id);
	impl.add(store_op);
}

bool emit_trace_ray_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	if (emit_nvapi_trace_ray(impl, inst))
		return true;

	auto &builder = impl.builder();
	spv::Id acceleration_structure = impl.get_id_for_value(inst->getOperand(1));
	spv::Id ray_flags = impl.get_id_for_value(inst->getOperand(2));
	spv::Id instance_inclusion_mask = impl.get_id_for_value(inst->getOperand(3));
	spv::Id ray_contribution_to_hit_group = impl.get_id_for_value(inst->getOperand(4));
	spv::Id multiplier_for_geometry = impl.get_id_for_value(inst->getOperand(5));
	spv::Id miss_shader_index = impl.get_id_for_value(inst->getOperand(6));

	spv::Id ray_origin[3];
	spv::Id ray_dir[3];

	for (unsigned i = 0; i < 3; i++)
	{
		ray_origin[i] = impl.get_id_for_value(inst->getOperand(7 + i));
		ray_dir[i] = impl.get_id_for_value(inst->getOperand(11 + i));
	}

	spv::Id tmin = impl.get_id_for_value(inst->getOperand(10));
	spv::Id tmax = impl.get_id_for_value(inst->getOperand(14));

	spv::Id ray_origin_vec = impl.build_vector(builder.makeFloatType(32), ray_origin, 3);
	spv::Id ray_dir_vec = impl.build_vector(builder.makeFloatType(32), ray_dir, 3);

	auto *ray_payload = inst->getOperand(15);

	bool needs_temp_copy = impl.get_needs_temp_storage_copy(ray_payload);
	spv::Id ray_payload_var_id = needs_temp_copy
		? emit_temp_storage_copy(impl, ray_payload, spv::StorageClassRayPayloadKHR)
		: impl.get_id_for_value(ray_payload);

	auto *op = impl.allocate(spv::OpTraceRayKHR);
	op->add_ids({
	    acceleration_structure,
	    ray_flags,
	    instance_inclusion_mask,
	    ray_contribution_to_hit_group,
	    multiplier_for_geometry,
	    miss_shader_index,
	    ray_origin_vec,
	    tmin,
	    ray_dir_vec,
	    tmax,
	    ray_payload_var_id
	});
	impl.add(op);

	// In this instance, the ray_payload_var_id is our temp.
	if (needs_temp_copy)
		emit_temp_storage_resolve(impl, ray_payload, ray_payload_var_id);

	return true;
}

bool emit_ray_tracing_builtin_load_instruction(Converter::Impl &impl, const llvm::CallInst *inst,
                                               spv::BuiltIn builtin, spv::Id scalar_type)
{
	auto &builder = impl.builder();
	spv::Id input = impl.spirv_module.get_builtin_shader_input(builtin);

	auto *access_chain = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassInput, scalar_type));
	access_chain->add_id(input);
	access_chain->add_id(impl.get_id_for_value(inst->getOperand(1), 32));
	impl.add(access_chain);

	auto *load = impl.allocate(spv::OpLoad, inst);
	load->add_id(access_chain->id);
	impl.add(load);
	return true;
}

bool emit_dispatch_rays_index_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_builtin_load_instruction(impl, inst, spv::BuiltInLaunchIdKHR,
	                                                 impl.builder().makeUintType(32));
}

bool emit_dispatch_rays_dimensions_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_builtin_load_instruction(impl, inst, spv::BuiltInLaunchSizeKHR,
	                                                 impl.builder().makeUintType(32));
}

bool emit_object_ray_origin_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_builtin_load_instruction(impl, inst, spv::BuiltInObjectRayOriginKHR,
	                                                 impl.builder().makeFloatType(32));
}

bool emit_world_ray_origin_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_builtin_load_instruction(impl, inst, spv::BuiltInWorldRayOriginKHR,
	                                                 impl.builder().makeFloatType(32));
}

bool emit_object_ray_direction_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_builtin_load_instruction(impl, inst, spv::BuiltInObjectRayDirectionKHR,
	                                                 impl.builder().makeFloatType(32));
}

bool emit_world_ray_direction_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_builtin_load_instruction(impl, inst, spv::BuiltInWorldRayDirectionKHR,
	                                                 impl.builder().makeFloatType(32));
}

bool emit_ray_t_min_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	spv::Id id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInRayTminKHR);
	auto *op = impl.allocate(spv::OpLoad, inst);
	op->add_id(id);
	impl.add(op);
	return true;
}

bool emit_ray_t_current_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	spv::Id id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInRayTmaxKHR);
	auto *op = impl.allocate(spv::OpLoad, inst);
	op->add_id(id);

	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan &&
	    impl.spirv_module.builtin_requires_volatile(spv::BuiltInRayTmaxKHR))
	{
		op->add_literal(spv::MemoryAccessVolatileMask);
	}

	impl.add(op);
	return true;
}

static bool emit_ray_tracing_matrix_load(Converter::Impl &impl, const llvm::CallInst *inst, spv::BuiltIn builtin)
{
	auto &builder = impl.builder();
	spv::Id matrix_id = impl.spirv_module.get_builtin_shader_input(builtin);
	auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassInput, builder.makeFloatType(32)));
	chain_op->add_id(matrix_id);

	// Transpose here.
	chain_op->add_id(impl.get_id_for_value(inst->getOperand(2), 32));
	chain_op->add_id(impl.get_id_for_value(inst->getOperand(1)));

	impl.add(chain_op);

	auto *load_op = impl.allocate(spv::OpLoad, inst);
	load_op->add_id(chain_op->id);
	impl.add(load_op);
	return true;
}

bool emit_ray_tracing_report_hit(Converter::Impl &impl, const llvm::CallInst *inst)
{
	// We only have one global HitAttributeKHR per shader, so we'll need to copy from argument into that.
	auto *load_op = impl.allocate(spv::OpLoad, impl.get_type_id(impl.llvm_hit_attribute_output_type->getPointerElementType()));
	load_op->add_id(impl.get_id_for_value(inst->getOperand(3)));
	impl.add(load_op);

	auto *store_op = impl.allocate(spv::OpStore);
	store_op->add_id(impl.llvm_hit_attribute_output_value);
	store_op->add_id(load_op->id);
	impl.add(store_op);

	auto *op = impl.allocate(spv::OpReportIntersectionKHR, inst);
	op->add_id(impl.get_id_for_value(inst->getOperand(1)));
	op->add_id(impl.get_id_for_value(inst->getOperand(2)));
	impl.add(op);
	return true;
}

bool emit_ray_tracing_accept_hit_and_end_search(Converter::Impl &impl, const llvm::CallInst *)
{
	auto *op = impl.allocate(spv::OpTerminateRayKHR);
	impl.add(op);
	return true;
}

bool emit_ray_tracing_ignore_hit(Converter::Impl &impl, const llvm::CallInst *)
{
	auto *op = impl.allocate(spv::OpIgnoreIntersectionKHR);
	impl.add(op);
	return true;
}

bool emit_ray_tracing_call_shader(Converter::Impl &impl, const llvm::CallInst *inst)
{
	if (emit_nvapi_call_shader(impl, inst))
		return true;

	auto *callable_data = inst->getOperand(2);

	bool needs_temp_copy = impl.get_needs_temp_storage_copy(callable_data);
	spv::Id callable_data_var_id = impl.get_needs_temp_storage_copy(callable_data)
		? emit_temp_storage_copy(impl, callable_data, spv::StorageClassCallableDataKHR)
		: impl.get_id_for_value(callable_data);

	auto *op = impl.allocate(spv::OpExecuteCallableKHR);
	op->add_id(impl.get_id_for_value(inst->getOperand(1)));
	op->add_id(callable_data_var_id);
	impl.add(op);

	// In this instance, the callable_data_var_id is our temp.
	if (needs_temp_copy)
		emit_temp_storage_resolve(impl, callable_data, callable_data_var_id);

	return true;
}

bool emit_world_to_object_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_matrix_load(impl, inst, spv::BuiltInWorldToObjectKHR);
}

bool emit_object_to_world_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_matrix_load(impl, inst, spv::BuiltInObjectToWorldKHR);
}

static bool emit_ray_tracing_load_uint(Converter::Impl &impl, const llvm::CallInst *inst, spv::BuiltIn builtin)
{
	spv::Id index_id = impl.spirv_module.get_builtin_shader_input(builtin);
	auto *op = impl.allocate(spv::OpLoad, inst);
	op->add_id(index_id);
	impl.add(op);
	return true;
}

bool emit_ray_tracing_instance_id_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_load_uint(impl, inst, spv::BuiltInInstanceCustomIndexKHR);
}

bool emit_ray_tracing_instance_index_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_load_uint(impl, inst, spv::BuiltInInstanceId);
}

bool emit_ray_tracing_geometry_index_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_load_uint(impl, inst, spv::BuiltInRayGeometryIndexKHR);
}

bool emit_ray_tracing_primitive_index_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_load_uint(impl, inst, spv::BuiltInPrimitiveId);
}

bool emit_ray_tracing_ray_flags_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_load_uint(impl, inst, spv::BuiltInIncomingRayFlagsKHR);
}

bool emit_ray_tracing_hit_kind_instruction(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return emit_ray_tracing_load_uint(impl, inst, spv::BuiltInHitKindKHR);
}

static void emit_ray_query_capabilities(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	builder.addExtension("SPV_KHR_ray_query");
	builder.addCapability(spv::CapabilityRayQueryKHR);
	builder.addCapability(spv::CapabilityRayTraversalPrimitiveCullingKHR);
	if (impl.options.opacity_micromap_enabled)
	{
		builder.addExtension("SPV_EXT_opacity_micromap");
		builder.addCapability(spv::CapabilityRayTracingOpacityMicromapEXT);
	}
}

bool emit_allocate_ray_query(Converter::Impl &impl, const llvm::CallInst *inst)
{
	auto &builder = impl.builder();

	if (impl.ray_query.global_query_objects_id)
	{
		uint32_t index;
		if (impl.shader_analysis.ray_query.uses_divergent_handles)
			index = impl.ray_query.ray_query_index++;
		else
			index = 0;

		impl.rewrite_value(inst, builder.makeUintConstant(index));
	}
	else
	{
		spv::Id var_id = impl.spirv_module.create_variable(spv::StorageClassPrivate, builder.makeRayQueryType());
		impl.rewrite_value(inst, var_id);
		impl.handle_to_storage_class[inst] = spv::StorageClassPrivate;
	}

	emit_ray_query_capabilities(impl);
	return true;
}

static bool get_representative_ray_query_flags(Converter::Impl &impl, const llvm::Value *operand,
                                               uint32_t &ray_query_flags)
{
	if (value_is_dx_op_instrinsic(operand, DXIL::Op::AllocateRayQuery))
	{
		return get_constant_operand(llvm::cast<llvm::CallInst>(operand), 1, &ray_query_flags);
	}
	else if (const auto *phi = llvm::dyn_cast<llvm::PHINode>(operand))
	{
		// HLSL expectation is that all ray query flags are the same, so pick the static flags from first incoming.
		return get_representative_ray_query_flags(impl, phi->getIncomingValue(0), ray_query_flags);
	}
	else if (const auto *select = llvm::dyn_cast<llvm::SelectInst>(operand))
	{
		return get_representative_ray_query_flags(impl, select->getOperand(1), ray_query_flags);
	}
	else if (const auto *load = llvm::dyn_cast<llvm::LoadInst>(operand))
	{
		return get_representative_ray_query_flags(impl, load->getPointerOperand(), ray_query_flags);
	}
	else if (const auto *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(operand))
	{
		return get_representative_ray_query_flags(impl, gep->getOperand(0), ray_query_flags);
	}
	else if (const auto *alloca = llvm::dyn_cast<llvm::AllocaInst>(operand))
	{
		for (auto &mapping : impl.shader_analysis.ray_query.alloca_mappings)
		{
			if (mapping.alloca == alloca)
			{
				ray_query_flags = mapping.ray_query_flags;
				return true;
			}
		}

		LOGE("Cannot find ray query flags mapping.\n");
		return false;
	}

	// No idea ...
	LOGE("Cannot find representative ray query flags.\n");
	return false;
}

bool build_ray_query_object(Converter::Impl &impl, const llvm::Value *operand,
                            spv::Id &object_id, uint32_t *ray_query_flags = nullptr)
{
	if (impl.ray_query.global_query_objects_id)
	{
		auto &builder = impl.builder();

		if (impl.shader_analysis.ray_query.uses_divergent_handles)
		{
			// Painful case.
			auto *chain = impl.allocate(spv::OpInBoundsAccessChain,
			                            builder.makePointer(spv::StorageClassPrivate, builder.makeRayQueryType()));
			chain->add_id(impl.ray_query.global_query_objects_id);
			chain->add_id(impl.get_id_for_value(operand));
			impl.add(chain);

			object_id = chain->id;
		}
		else
		{
			// Trivial case.
			object_id = impl.ray_query.global_query_objects_id;
		}

		// Need to deduce the flags for the allocation.
		// This must be statically deducible from an HLSL point of view.
		// If we fail this check, fail compilation.
		if (ray_query_flags && !get_representative_ray_query_flags(impl, operand, *ray_query_flags))
			return false;
	}
	else
	{
		object_id = impl.get_id_for_value(operand);
		auto *ray_object = llvm::cast<llvm::CallInst>(operand);
		if (ray_query_flags && !get_constant_operand(ray_object, 1, ray_query_flags))
			return false;
	}

	return true;
}

bool emit_ray_query_trace_ray_inline_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	auto *init_op = impl.allocate(spv::OpRayQueryInitializeKHR);

	spv::Id ray_object_id = 0;
	uint32_t ray_query_flags = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id, &ray_query_flags))
		return false;

	init_op->add_id(ray_object_id);
	init_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));

	// The template type of the ray query object is embedded in the object itself, we must OR in the constant flags.
	if (auto *const_flags = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(3)))
	{
		ray_query_flags |= const_flags->getUniqueInteger().getZExtValue();
		init_op->add_id(builder.makeUintConstant(ray_query_flags));
	}
	else if (ray_query_flags == 0)
	{
		init_op->add_id(impl.get_id_for_value(instruction->getOperand(3)));
	}
	else
	{
		auto *or_op = impl.allocate(spv::OpBitwiseOr, builder.makeUintType(32));
		or_op->add_id(impl.get_id_for_value(instruction->getOperand(3)));
		or_op->add_id(builder.makeUintConstant(ray_query_flags));
		impl.add(or_op);
		init_op->add_id(or_op->id);
	}

	init_op->add_id(impl.get_id_for_value(instruction->getOperand(4)));

	spv::Id origin[3], direction[3];
	for (unsigned i = 0; i < 3; i++)
	{
		origin[i] = impl.get_id_for_value(instruction->getOperand(5 + i));
		direction[i] = impl.get_id_for_value(instruction->getOperand(9 + i));
	}
	init_op->add_id(impl.build_vector(builder.makeFloatType(32), origin, 3));
	init_op->add_id(impl.get_id_for_value(instruction->getOperand(8)));
	init_op->add_id(impl.build_vector(builder.makeFloatType(32), direction, 3));
	init_op->add_id(impl.get_id_for_value(instruction->getOperand(12)));

	impl.add(init_op);

	return true;
}

bool emit_ray_query_proceed_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(spv::OpRayQueryProceedKHR, instruction);
	op->add_id(ray_object_id);
	impl.add(op);
	return true;
}

bool emit_ray_query_abort_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(spv::OpRayQueryTerminateKHR);
	op->add_id(ray_object_id);
	impl.add(op);
	return true;
}

bool emit_ray_query_intersection_type_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                  spv::RayQueryIntersection intersection)
{
	auto &builder = impl.builder();
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(spv::OpRayQueryGetIntersectionTypeKHR, instruction);
	op->add_id(ray_object_id);
	op->add_id(builder.makeUintConstant(intersection));
	impl.add(op);
	return true;
}

bool emit_ray_query_system_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                             spv::Op opcode, uint32_t vecsize)
{
	auto &builder = impl.builder();
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	if (vecsize == 1)
	{
		auto *op = impl.allocate(opcode, instruction);
		op->add_id(ray_object_id);
		impl.add(op);
	}
	else
	{
		auto *op = impl.allocate(opcode, builder.makeVectorType(impl.get_type_id(instruction->getType()), vecsize));
		op->add_id(ray_object_id);
		impl.add(op);
		auto *extract_op = impl.allocate(spv::OpCompositeExtract, instruction);
		extract_op->add_id(op->id);

		uint32_t index = 0;
		if (!get_constant_operand(instruction, 2, &index))
			return false;
		extract_op->add_literal(index);
		impl.add(extract_op);
	}
	return true;
}

bool emit_ray_query_get_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                          spv::Op opcode, uint32_t vecsize, spv::RayQueryIntersection intersection)
{
	auto &builder = impl.builder();
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	if (vecsize == 1)
	{
		auto *op = impl.allocate(opcode, instruction);
		op->add_id(ray_object_id);
		op->add_id(builder.makeUintConstant(intersection));
		impl.add(op);
	}
	else
	{
		auto *op = impl.allocate(opcode, builder.makeVectorType(impl.get_type_id(instruction->getType()), vecsize));
		op->add_id(ray_object_id);
		op->add_id(builder.makeUintConstant(intersection));
		impl.add(op);
		auto *extract_op = impl.allocate(spv::OpCompositeExtract, instruction);
		extract_op->add_id(op->id);

		uint32_t index = 0;
		if (!get_constant_operand(instruction, 2, &index))
			return false;
		extract_op->add_literal(index);
		impl.add(extract_op);
	}
	return true;
}

bool emit_ray_query_get_matrix_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                 spv::Op opcode, spv::RayQueryIntersection intersection)
{
	auto &builder = impl.builder();
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(opcode, builder.makeMatrixType(impl.get_type_id(instruction->getType()), 4, 3));
	op->add_id(ray_object_id);
	op->add_id(builder.makeUintConstant(intersection));
	impl.add(op);

	auto *extract_op = impl.allocate(spv::OpCompositeExtract, instruction);
	uint32_t row = 0, col = 0;
	if (!get_constant_operand(instruction, 2, &row))
		return false;
	if (!get_constant_operand(instruction, 3, &col))
		return false;
	extract_op->add_id(op->id);
	extract_op->add_literal(col);
	extract_op->add_literal(row);
	impl.add(extract_op);
	return true;
}

bool emit_ray_query_commit_non_opaque_triangle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(spv::OpRayQueryConfirmIntersectionKHR);
	op->add_id(ray_object_id);
	impl.add(op);
	return true;
}

bool emit_ray_query_commit_procedural_primitive_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(spv::OpRayQueryGenerateIntersectionKHR);
	op->add_id(ray_object_id);
	op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	impl.add(op);
	return true;
}

bool emit_ray_query_candidate_procedural_primitive_non_opaque_instruction(Converter::Impl &impl,
                                                                          const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id ray_object_id = 0;
	if (!build_ray_query_object(impl, instruction->getOperand(1), ray_object_id))
		return false;

	auto *op = impl.allocate(spv::OpRayQueryGetIntersectionCandidateAABBOpaqueKHR, builder.makeBoolType());
	op->add_id(ray_object_id);
	impl.add(op);

	auto *not_op = impl.allocate(spv::OpLogicalNot, instruction);
	not_op->add_id(op->id);
	impl.add(not_op);
	return true;
}
}


================================================
FILE: opcodes/dxil/dxil_ray_tracing.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
spv::Id emit_temp_storage_copy(Converter::Impl &impl, const llvm::Value *value, spv::StorageClass storage);
void emit_temp_storage_resolve(Converter::Impl &impl, const llvm::Value *real_value, spv::Id temp_storage);
bool emit_trace_ray_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dispatch_rays_index_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dispatch_rays_dimensions_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_object_ray_origin_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_world_ray_origin_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_object_ray_direction_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_world_ray_direction_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_t_min_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_t_current_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_world_to_object_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_object_to_world_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_instance_id_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_instance_index_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_geometry_index_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_primitive_index_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_ray_flags_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_hit_kind_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_report_hit(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_accept_hit_and_end_search(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_ignore_hit(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_tracing_call_shader(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_allocate_ray_query(Converter::Impl &impl, const llvm::CallInst *instruction);
bool build_ray_query_object(Converter::Impl &impl, const llvm::Value *operand, 
                            spv::Id &object_id, uint32_t *ray_query_flags = nullptr);
bool emit_ray_query_trace_ray_inline_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_query_proceed_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_query_abort_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_ray_query_commit_non_opaque_triangle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_query_commit_procedural_primitive_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_ray_query_candidate_procedural_primitive_non_opaque_instruction(Converter::Impl &impl,
                                                                          const llvm::CallInst *instruction);

bool emit_ray_query_intersection_type_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                  spv::RayQueryIntersection intersection);
template <spv::RayQueryIntersection intersection>
bool emit_ray_query_intersection_type_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_ray_query_intersection_type_instruction(impl, instruction, intersection);
}

bool emit_ray_query_system_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                             spv::Op op, uint32_t vecsize);
template <spv::Op opcode, uint32_t vecsize>
inline bool emit_ray_query_system_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_ray_query_system_value_instruction(impl, instruction, opcode, vecsize);
}

bool emit_ray_query_get_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                          spv::Op opcode, uint32_t vecsize, spv::RayQueryIntersection intersection);
template <spv::Op opcode, uint32_t vecsize, spv::RayQueryIntersection intersection>
inline bool emit_ray_query_get_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_ray_query_get_value_instruction(impl, instruction, opcode, vecsize, intersection);
}

bool emit_ray_query_get_matrix_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                 spv::Op opcode, spv::RayQueryIntersection intersection);
template <spv::Op opcode, spv::RayQueryIntersection intersection>
inline bool emit_ray_query_get_matrix_value_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_ray_query_get_matrix_value_instruction(impl, instruction, opcode, intersection);
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_resources.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_resources.hpp"
#include "dxil_common.hpp"
#include "logging.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include "dxil_buffer.hpp"
#include "dxil_workgraph.hpp"
#include "dxil_waveops.hpp"
#include "dxil_ags.hpp"
#include "dxil_geometry.hpp"

namespace dxil_spv
{
static Converter::Impl::ClipCullMeta *input_clip_cull_distance_meta(Converter::Impl &impl, unsigned index)
{
	auto itr = impl.input_clip_cull_meta.find(index);
	if (itr != impl.input_clip_cull_meta.end())
		return &itr->second;
	else
		return nullptr;
}

static bool emit_load_clip_cull_distance(Converter::Impl &impl, const llvm::CallInst *instruction,
                                         const Converter::Impl::ClipCullMeta &meta)
{
	spv::Id ptr_id = get_clip_cull_distance_access_chain(impl, instruction, meta, spv::StorageClassInput);

	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(ptr_id);
	impl.add(op);
	return true;
}

static void fixup_builtin_load(Converter::Impl &impl, spv::Id var_id, const llvm::CallInst *instruction,
                               bool spirv_semantics)
{
	auto &builder = impl.builder();
	spv::BuiltIn builtin;
	if (impl.spirv_module.query_builtin_shader_input(var_id, &builtin))
	{
		if (builtin == spv::BuiltInInstanceIndex || builtin == spv::BuiltInVertexIndex)
		{
			// Need to shift down to 0-base.
			spv::Id base_id = impl.spirv_module.get_builtin_shader_input(
			    builtin == spv::BuiltInInstanceIndex ? spv::BuiltInBaseInstance : spv::BuiltInBaseVertex);
			{
				Operation *op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
				op->add_id(base_id);
				base_id = op->id;
				impl.add(op);
			}

			Operation *sub_op = impl.allocate(spv::OpISub, builder.makeUintType(32));
			sub_op->add_ids({ impl.get_id_for_value(instruction), base_id });
			impl.add(sub_op);
			impl.rewrite_value(instruction, sub_op->id);
			builder.addCapability(spv::CapabilityDrawParameters);
		}
		else if (builtin == spv::BuiltInFrontFacing && instruction->getType()->getIntegerBitWidth() != 1)
		{
			Operation *cast_op = impl.allocate(spv::OpSelect, builder.makeUintType(32));
			cast_op->add_id(impl.get_id_for_value(instruction));
			cast_op->add_id(builder.makeUintConstant(~0u));
			cast_op->add_id(builder.makeUintConstant(0));
			impl.add(cast_op);
			impl.rewrite_value(instruction, cast_op->id);
		}
		else if (builtin == spv::BuiltInFragCoord && !spirv_semantics)
		{
			auto *col = llvm::cast<llvm::ConstantInt>(instruction->getOperand(3));
			if (col->getUniqueInteger().getZExtValue() == 3)
			{
				// FragCoord.w is inverted in DX.
				Operation *op = impl.allocate(spv::OpFDiv, builder.makeFloatType(32));
				op->add_id(builder.makeFloatConstant(1.0f));
				op->add_id(impl.get_id_for_value(instruction));
				impl.add(op);
				impl.rewrite_value(instruction, op->id);
			}
		}
	}
}

static spv::Id get_builtin_load_type(Converter::Impl &impl, const Converter::Impl::ElementMeta &meta)
{
	spv::BuiltIn builtin;
	if (impl.spirv_module.query_builtin_shader_input(meta.id, &builtin) && builtin == spv::BuiltInFrontFacing)
		return impl.builder().makeBoolType();
	else
		return impl.get_effective_input_output_type_id(meta.component_type);
}

static bool emit_amd_bary_coord_load(Converter::Impl &impl, const llvm::CallInst *instruction, spv::Id var_id)
{
	auto &builder = impl.builder();

	uint32_t col = 0;
	if (!get_constant_operand(instruction, 3, &col) || col > 2)
	{
		LOGE("Bary coord index must be constant {0, 1, 2}.\n");
		return false;
	}

	/* Need to swizzle the bary coord appropriately. */
	auto *bary = impl.allocate(spv::OpLoad, builder.makeVectorType(builder.makeFloatType(32), 2));
	bary->add_id(var_id);
	impl.add(bary);

	if (col >= 1)
	{
		auto *ext = impl.allocate(spv::OpCompositeExtract, instruction);
		ext->add_id(bary->id);
		ext->add_literal(col - 1);
		impl.add(ext);
	}
	else
	{
		auto *bary_i = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(32));
		bary_i->add_id(bary->id);
		bary_i->add_literal(0);
		impl.add(bary_i);

		auto *bary_j = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(32));
		bary_j->add_id(bary->id);
		bary_j->add_literal(1);
		impl.add(bary_j);

		auto *sub_i = impl.allocate(spv::OpFSub, builder.makeFloatType(32));
		sub_i->add_id(builder.makeFloatConstant(1.0f));
		sub_i->add_id(bary_i->id);
		impl.add(sub_i);

		auto *sub_j = impl.allocate(spv::OpFSub, instruction);
		sub_j->add_id(sub_i->id);
		sub_j->add_id(bary_j->id);
		impl.add(sub_j);
	}

	return true;
}

bool emit_load_input_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics)
{
	auto &builder = impl.builder();
	uint32_t input_element_index;
	if (!get_constant_operand(instruction, 1, &input_element_index))
		return false;

	// Need special handling for clip distance.
	auto *clip_cull_meta = input_clip_cull_distance_meta(impl, input_element_index);
	if (clip_cull_meta)
		return emit_load_clip_cull_distance(impl, instruction, *clip_cull_meta);

	const auto &meta = impl.input_elements_meta[input_element_index];
	uint32_t var_id = meta.id;
	uint32_t ptr_id;

	if (impl.options.quirks.ignore_primitive_shading_rate && meta.semantic == DXIL::Semantic::ShadingRate)
	{
		impl.rewrite_value(instruction, builder.makeUintConstant(0));
		return true;
	}
	else if ((meta.semantic == DXIL::Semantic::Barycentrics ||
	          meta.semantic == DXIL::Semantic::InternalBarycentricsNoPerspective) &&
	         !impl.options.khr_barycentrics_enabled)
	{
		return emit_amd_bary_coord_load(impl, instruction, var_id);
	}

	spv::Id input_type_id = builder.getDerefTypeId(var_id);

	bool array_index = false;

	// Deal with custom IR routing through this function.
	// In plain DXIL, this check is not necessary.
	if (builder.isArrayType(input_type_id))
	{
		if (impl.execution_model == spv::ExecutionModelTessellationControl ||
		    impl.execution_model == spv::ExecutionModelGeometry ||
		    impl.execution_model == spv::ExecutionModelTessellationEvaluation ||
		    (impl.llvm_attribute_at_vertex_indices.count(input_element_index) != 0 &&
		     impl.options.khr_barycentrics_enabled))
		{
			input_type_id = builder.getContainedTypeId(input_type_id);
			array_index = true;
		}
	}

	bool row_index = false;
	if (builder.isArrayType(input_type_id))
	{
		row_index = true;
		input_type_id = builder.getContainedTypeId(input_type_id);
	}

	uint32_t num_cols = builder.getNumTypeComponents(input_type_id);

	if (num_cols > 1 || row_index || array_index)
	{
		// Need to deal with signed vs unsigned here.
		Operation *op =
			impl.allocate(spv::OpAccessChain,
			              builder.makePointer(spv::StorageClassInput,
			                                  impl.get_effective_input_output_type_id(meta.component_type)));
		ptr_id = op->id;

		op->add_id(var_id);
		// Vertex array index for GS/DS/HS or barycentrics.
		if (array_index)
		{
			auto *index = instruction->getOperand(4);
			if (llvm::isa<llvm::UndefValue>(index))
			{
				// If we loadInput on a barycentric input, we get nointerpolation,
				// i.e. flat, i.e. provoking vertex, i.e. vertex 0.
				op->add_id(builder.makeUintConstant(0));
			}
			else
				op->add_id(impl.get_id_for_value(index));
		}
		if (row_index)
			op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
		if (num_cols > 1)
			op->add_id(impl.get_id_for_value(instruction->getOperand(3), 32));

		impl.add(op);
	}
	else
		ptr_id = var_id;

	// Need to deal with signed vs unsigned here.
	spv::Id load_type = get_builtin_load_type(impl, meta);

	if (impl.llvm_attribute_at_vertex_indices.count(input_element_index) != 0 &&
		!impl.options.khr_barycentrics_enabled)
	{
		builder.addCapability(spv::CapabilityInterpolationFunction);
		constexpr uint32_t InterpolateAtVertexAMD = 1;
		spv::Id ext_id = builder.import("SPV_AMD_shader_explicit_vertex_parameter");
		auto *op = impl.allocate(spv::OpExtInst, instruction, load_type);
		op->add_id(ext_id);
		op->add_literal(InterpolateAtVertexAMD);
		op->add_id(ptr_id);

		uint32_t vid = 0;
		if (!llvm::isa<llvm::UndefValue>(instruction->getOperand(4)) &&
		    (!get_constant_operand(instruction, 4, &vid) || vid > 2))
		{
			LOGE("InterpolateAtVertexAMD only takes constant vid {0, 1, 2}.\n");
			return false;
		}

		op->add_id(builder.makeUintConstant(vid));
		impl.add(op);
	}
	else
	{
		auto *op = impl.allocate(spv::OpLoad, instruction, load_type);
		op->add_id(ptr_id);
		impl.add(op);
	}

	fixup_builtin_load(impl, var_id, instruction, spirv_semantics);

	// Need to bitcast after we load.
	impl.fixup_load_type_io(meta.component_type, 1, instruction);
	return true;
}

static spv::Id build_attribute_offset(spv::Id id, Converter::Impl &impl)
{
	auto &builder = impl.builder();
	{
		Operation *op = impl.allocate(spv::OpBitFieldSExtract, builder.makeUintType(32));
		op->add_id(id);
		op->add_id(builder.makeUintConstant(0));
		op->add_id(builder.makeUintConstant(4));
		id = op->id;
		impl.add(op);
	}

	{
		Operation *op = impl.allocate(spv::OpConvertSToF, builder.makeFloatType(32));
		op->add_id(id);
		id = op->id;
		impl.add(op);
	}

	{
		Operation *op = impl.allocate(spv::OpFMul, builder.makeFloatType(32));
		op->add_ids({ id, builder.makeFloatConstant(1.0f / 16.0f) });
		id = op->id;
		impl.add(op);
	}

	return id;
}

bool emit_interpolate_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction, bool extended)
{
	auto &builder = impl.builder();
	uint32_t input_element_index;
	if (!get_constant_operand(instruction, 1, &input_element_index))
		return false;

	const auto &meta = impl.input_elements_meta[input_element_index];
	uint32_t var_id = meta.id;
	uint32_t ptr_id;

	spv::Id input_type_id = builder.getDerefTypeId(var_id);

	bool row_index = false;
	if (builder.isArrayType(input_type_id))
	{
		row_index = true;
		input_type_id = builder.getContainedTypeId(input_type_id);
	}

	uint32_t num_cols = builder.getNumTypeComponents(input_type_id);

	if (num_cols > 1 || row_index)
	{
		// Need to deal with signed vs unsigned here.
		Operation *op =
			impl.allocate(spv::OpAccessChain,
			              builder.makePointer(spv::StorageClassInput,
			                                  impl.get_effective_input_output_type_id(meta.component_type)));

		op->add_id(var_id);

		if (row_index)
			op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
		if (num_cols > 1)
			op->add_id(impl.get_id_for_value(instruction->getOperand(3), 32));

		impl.add(op);
		ptr_id = op->id;
	}
	else
		ptr_id = var_id;

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	spv::Id aux_id = 0;

	if (opcode == GLSLstd450InterpolateAtOffset && !extended)
	{
		spv::Id offsets[2] = {};
		bool is_non_const = false;
		for (unsigned i = 0; i < 2; i++)
		{
			auto *operand = instruction->getOperand(4 + i);
			auto *constant_operand = llvm::dyn_cast<llvm::ConstantInt>(operand);

			// Need to do it the tedious way, extracting bits and converting to float ...
			if (!constant_operand)
			{
				offsets[i] = impl.get_id_for_value(instruction->getOperand(4 + i));
				offsets[i] = build_attribute_offset(offsets[i], impl);
				is_non_const = true;
			}
			else
			{
				float off = float(constant_operand->getUniqueInteger().getSExtValue()) / 16.0f;
				offsets[i] = builder.makeFloatConstant(off);
			}
		}

		if (is_non_const)
			aux_id = impl.build_vector(builder.makeFloatType(32), offsets, 2);
		else
			aux_id = impl.build_constant_vector(builder.makeFloatType(32), offsets, 2);
	}
	else if (opcode == GLSLstd450InterpolateAtSample || opcode == GLSLstd450InterpolateAtOffset)
		aux_id = impl.get_id_for_value(instruction->getOperand(4));

	// Need to deal with signed vs unsigned here.
	Operation *op = impl.allocate(spv::OpExtInst, instruction,
	                              impl.get_effective_input_output_type_id(meta.component_type));
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(opcode);
	op->add_id(ptr_id);

	if (aux_id)
		op->add_id(aux_id);

	impl.add(op);

	// Need to bitcast after we load.
	impl.fixup_load_type_io(meta.component_type, 1, instruction);
	builder.addCapability(spv::CapabilityInterpolationFunction);
	return true;
}

bool emit_store_output_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	uint32_t output_element_index;
	if (!get_constant_operand(instruction, 1, &output_element_index))
		return false;

	// Need special handling for clip distance.
	auto *clip_cull_meta = output_clip_cull_distance_meta(impl, output_element_index);
	if (clip_cull_meta)
		return emit_store_clip_cull_distance(impl, instruction, *clip_cull_meta);

	if (impl.options.dual_source_blending)
	{
		// Mask out writes to unused higher RTs when using dual source blending.
		if (impl.output_elements_meta.count(output_element_index) == 0)
			return true;
	}

	const auto &meta = impl.output_elements_meta[output_element_index];

	uint32_t var_id = meta.id;
	uint32_t ptr_id;

	spv::Id output_type_id = builder.getDerefTypeId(var_id);
	bool is_control_point_output = impl.execution_model == spv::ExecutionModelTessellationControl;
	if (is_control_point_output)
		output_type_id = builder.getContainedTypeId(output_type_id);

	bool row_index = false;
	if (builder.isArrayType(output_type_id))
	{
		row_index = true;
		output_type_id = builder.getContainedTypeId(output_type_id);
	}
	uint32_t num_cols = builder.getNumTypeComponents(output_type_id);

	if (num_cols > 1 || row_index || is_control_point_output)
	{
		Operation *op = impl.allocate(
		    spv::OpAccessChain, builder.makePointer(spv::StorageClassOutput, builder.getScalarTypeId(output_type_id)));
		ptr_id = op->id;

		op->add_id(var_id);
		if (is_control_point_output)
			op->add_id(build_load_invocation_id(impl));

		if (row_index)
			op->add_id(impl.get_id_for_value(instruction->getOperand(2)));

		if (num_cols > 1)
		{
			unsigned col;
			if (!get_constant_operand(instruction, 3, &col))
			{
				LOGE("Column index to StoreOutput must be a constant.\n");
				return false;
			}

			// If we need to swizzle fragment shader outputs, do it here.
			if (impl.execution_model == spv::ExecutionModelFragment &&
			    meta.semantic_offset < impl.options.output_swizzles.size())
			{
				// Assume a 1:1 reversible mapping, so we don't need to splat the write or something like that.
				unsigned swiz = impl.options.output_swizzles[meta.semantic_offset];
				for (unsigned output_component = 0; output_component < 4; output_component++)
				{
					if (((swiz >> (2u * output_component)) & 3u) == col)
					{
						col = output_component;
						break;
					}
				}
			}

			op->add_id(builder.makeUintConstant(col));
		}

		impl.add(op);
	}
	else
		ptr_id = var_id;

	impl.register_externally_visible_write(instruction->getOperand(4));
	spv::Id store_value = impl.get_id_for_value(instruction->getOperand(4));

	if (impl.options.multiview.enable && impl.options.multiview.last_pre_rasterization_stage)
	{
		if (meta.semantic == DXIL::Semantic::RenderTargetArrayIndex)
		{
			auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			add->add_id(store_value);
			add->add_id(build_layer_offset_id(impl));
			impl.add(add);

			store_value = add->id;
		}
		else if (meta.semantic == DXIL::Semantic::ViewPortArrayIndex &&
		         impl.options.multiview.view_instance_to_viewport_spec_id != UINT32_MAX)
		{
			auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			add->add_id(store_value);
			add->add_id(build_viewport_offset_id(impl));
			impl.add(add);

			store_value = add->id;
		}
	}

	Operation *op = impl.allocate(spv::OpStore);
	op->add_ids({ ptr_id, impl.fixup_store_type_io(meta.component_type, 1, store_value) });
	impl.add(op);
	return true;
}

bool emit_load_draw_parameter_instruction(spv::BuiltIn builtin, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(builtin);

	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);

	builder.addCapability(spv::CapabilityDrawParameters);
	return true;
}

bool emit_load_control_point_count_in(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInPatchVertices);
	Operation *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);
	return true;
}

static spv::Id build_bindless_heap_offset(Converter::Impl &impl,
                                          spv::Id table_index_id,
                                          uint32_t base_offset,
                                          const llvm::Value *dynamic_offset)
{
	auto &builder = impl.builder();

	if (base_offset != 0 && dynamic_offset)
	{
		// Try to constant fold the offsets.
		// Works around some driver issues in some cases and makes the code neater.
		// It's very common for bindless to get counteracting - and + here, turning base_offset into 0 ...
		if (const auto *bin_op = llvm::dyn_cast<llvm::BinaryOperator>(dynamic_offset))
		{
			if (bin_op->getOpcode() == llvm::BinaryOperator::BinaryOps::Add)
			{
				auto *a = bin_op->getOperand(0);
				auto *b = bin_op->getOperand(1);
				if (const auto *a_const = llvm::dyn_cast<llvm::ConstantInt>(a))
				{
					base_offset += uint32_t(a_const->getUniqueInteger().getZExtValue());
					dynamic_offset = b;
				}
				else if (const auto *b_const = llvm::dyn_cast<llvm::ConstantInt>(b))
				{
					base_offset += uint32_t(b_const->getUniqueInteger().getZExtValue());
					dynamic_offset = a;
				}
			}
		}
		else if (const auto *cint = llvm::dyn_cast<llvm::ConstantInt>(dynamic_offset))
		{
			base_offset += uint32_t(cint->getUniqueInteger().getZExtValue());
			dynamic_offset = nullptr;
		}
	}

	if (base_offset != 0)
	{
		if (table_index_id)
		{
			auto *heap_offset = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			heap_offset->add_id(table_index_id);
			heap_offset->add_id(builder.makeUintConstant(base_offset));
			impl.add(heap_offset);
			table_index_id = heap_offset->id;
		}
		else
		{
			table_index_id = builder.makeUintConstant(base_offset);
		}
	}

	if (dynamic_offset)
	{
		if (table_index_id)
		{
			auto *offset = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			offset->add_id(table_index_id);
			offset->add_id(impl.get_id_for_value(dynamic_offset));
			impl.add(offset);
			table_index_id = offset->id;
		}
		else
		{
			table_index_id = impl.get_id_for_value(dynamic_offset);
		}
	}

	if (!table_index_id)
		table_index_id = builder.makeUintConstant(0);

	return table_index_id;
}

static spv::Id build_adjusted_descriptor_indexing(Converter::Impl &impl,
                                                  uint32_t base_offset,
                                                  const llvm::Value *dynamic_offset)
{
	return build_bindless_heap_offset(impl, 0, base_offset, dynamic_offset);
}

static spv::Id build_bindless_heap_offset_shader_record(Converter::Impl &impl, const Converter::Impl::ResourceReference &reference,
                                                        const llvm::Value *dynamic_offset)
{
	auto &builder = impl.builder();

	Operation *descriptor_table;

	if (impl.node_input.node_dispatch_push_id)
	{
		// Nodes
		spv::Id shader_record_buffer_id = emit_load_node_input_push_parameter(
		    impl, NodeLocalRootSignatureBDA,
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer,
		                        impl.node_input.shader_record_block_type_id));

		descriptor_table = impl.allocate(
			spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, builder.makeUintType(32)));
		descriptor_table->add_id(shader_record_buffer_id);
	}
	else
	{
		descriptor_table = impl.allocate(
		    spv::OpAccessChain, builder.makePointer(spv::StorageClassShaderRecordBufferKHR, builder.makeUintType(32)));
		descriptor_table->add_id(impl.shader_record_buffer_id);
	}

	descriptor_table->add_id(builder.makeUintConstant(reference.local_root_signature_entry));
	descriptor_table->add_id(builder.makeUintConstant(0));
	impl.add(descriptor_table);

	auto *loaded_word = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	loaded_word->add_id(descriptor_table->id);
	if (impl.node_input.node_dispatch_push_id)
	{
		loaded_word->add_literal(spv::MemoryAccessAlignedMask);
		loaded_word->add_literal(8);
	}
	impl.add(loaded_word);

	auto *shifted_word = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
	shifted_word->add_id(loaded_word->id);

	// Need to translate fake GPU VA to index.
	unsigned shamt = reference.resource_kind == DXIL::ResourceKind::Sampler ?
	    impl.options.sbt_descriptor_size_sampler_log2 : impl.options.sbt_descriptor_size_srv_uav_cbv_log2;
	shifted_word->add_id(builder.makeUintConstant(shamt));
	impl.add(shifted_word);
	loaded_word = shifted_word;

	return build_bindless_heap_offset(impl, loaded_word->id, reference.base_offset, dynamic_offset);
}

static spv::Id build_bindless_heap_offset_push_constant(Converter::Impl &impl, const Converter::Impl::ResourceReference &reference,
                                                        const llvm::Value *dynamic_offset)
{
	auto &builder = impl.builder();
	if (reference.push_constant_member >= (impl.root_constant_num_words + impl.root_descriptor_count) ||
	    reference.push_constant_member < impl.root_descriptor_count ||
	    impl.root_constant_id == 0)
	{
		LOGE("Descriptor table offset is out of push constant range.\n");
		return 0;
	}

	auto *descriptor_table = impl.allocate(
		spv::OpAccessChain,
		builder.makePointer(impl.options.inline_ubo_enable ? spv::StorageClassUniform : spv::StorageClassPushConstant,
		                    builder.makeUintType(32)));
	descriptor_table->add_id(impl.root_constant_id);

	if (impl.root_constant_arrayed)
	{
		descriptor_table->add_id(builder.makeUintConstant(impl.root_descriptor_count));
		descriptor_table->add_id(builder.makeUintConstant(reference.push_constant_member - impl.root_descriptor_count));
	}
	else
	{
		descriptor_table->add_id(builder.makeUintConstant(reference.push_constant_member));
	}

	impl.add(descriptor_table);

	auto *loaded_word = impl.allocate(spv::OpLoad, builder.makeUintType(32));
	loaded_word->add_id(descriptor_table->id);
	impl.add(loaded_word);

	return build_bindless_heap_offset(impl, loaded_word->id, reference.base_offset, dynamic_offset);
}

static spv::Id build_descriptor_qa_check(Converter::Impl &impl, spv::Id offset_id,
                                         DescriptorQATypeFlags type)
{
	// Only implemented for CBV_SRV_UAV heap.
	if (type == DESCRIPTOR_QA_TYPE_SAMPLER_BIT)
		return offset_id;

	if (impl.options.descriptor_qa.version != Version)
	{
		LOGE("Descriptor QA version is not %u.\n", Version);
		return offset_id;
	}

	auto &builder = impl.builder();
	auto *call_op = impl.allocate(spv::OpFunctionCall, builder.makeUintType(32));
	call_op->add_id(impl.spirv_module.get_helper_call_id(HelperCall::DescriptorQACheck));
	call_op->add_id(offset_id);
	call_op->add_id(builder.makeUintConstant(type));
	call_op->add_id(builder.makeUintConstant(++impl.descriptor_qa_counter));
	impl.add(call_op);
	return call_op->id;
}

static spv::Id build_descriptor_heap_robustness(Converter::Impl &impl, spv::Id offset_id)
{
	auto &builder = impl.builder();

	spv::Id size_id;

	if (impl.instrumentation.descriptor_heap_size_var_id)
	{
		auto *chain = impl.allocate(spv::OpAccessChain,
		                            builder.makePointer(spv::StorageClassUniform, builder.makeUintType(32)));
		chain->add_id(impl.instrumentation.descriptor_heap_size_var_id);
		chain->add_id(builder.makeUintConstant(0));
		impl.add(chain);

		auto *load = impl.allocate(spv::OpLoad, builder.makeUintType(32));
		load->add_id(chain->id);
		impl.add(load);

		size_id = load->id;
	}
	else
	{
		auto *op = impl.allocate(spv::OpArrayLength, builder.makeUintType(32));
		op->add_id(impl.instrumentation.descriptor_heap_introspection_var_id);
		op->add_literal(0);
		impl.add(op);

		size_id = op->id;
	}

    if (impl.options.instruction_instrumentation.enabled &&
            impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume)
    {
        // If we can just assert instead, go for that.
        auto *less_than = impl.allocate(spv::OpULessThan, builder.makeBoolType());
        less_than->add_id(offset_id);
        less_than->add_id(size_id);
        impl.add(less_than);

        auto *assert_in_bounds = impl.allocate(spv::OpAssumeTrueKHR);
        assert_in_bounds->add_id(less_than->id);
        impl.add(assert_in_bounds);
    }

    if (impl.options.descriptor_heap_robustness)
    {
        if (!impl.glsl_std450_ext)
            impl.glsl_std450_ext = builder.import("GLSL.std.450");

        auto *clamp_op = impl.allocate(spv::OpExtInst, builder.makeUintType(32));
        clamp_op->add_id(impl.glsl_std450_ext);
        clamp_op->add_literal(GLSLstd450UMin);
        clamp_op->add_id(offset_id);
        clamp_op->add_id(size_id);
        impl.add(clamp_op);
        return clamp_op->id;
    }

    return offset_id;
}

static spv::Id build_bindless_heap_offset(Converter::Impl &impl,
                                          const Converter::Impl::ResourceReference &reference,
                                          DescriptorQATypeFlags type,
                                          const llvm::Value *dynamic_offset)
{
	spv::Id offset_id;
	if (reference.local_root_signature_entry >= 0)
		offset_id = build_bindless_heap_offset_shader_record(impl, reference, dynamic_offset);
	else if (reference.push_constant_member != UINT32_MAX)
		offset_id = build_bindless_heap_offset_push_constant(impl, reference, dynamic_offset);
	else
	{
		if (reference.base_offset != 0)
		{
			LOGE("For SM 6.6 heaps, no constant offset can be applied.\n");
			return 0;
		}
		offset_id = impl.get_id_for_value(dynamic_offset);
	}

	bool need_heap_robustness_check =
	    impl.options.descriptor_heap_robustness ||
	    (impl.options.instruction_instrumentation.enabled &&
	     impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume);

	if (impl.options.descriptor_qa_enabled)
	{
		offset_id = build_descriptor_qa_check(impl, offset_id, type);
	}
	else if (need_heap_robustness_check && type != DESCRIPTOR_QA_TYPE_SAMPLER_BIT && dynamic_offset &&
	         (impl.instrumentation.descriptor_heap_introspection_var_id ||
	          impl.instrumentation.descriptor_heap_size_var_id))
	{
		offset_id = build_descriptor_heap_robustness(impl, offset_id);
	}

	return offset_id;
}

static spv::Id build_physical_address_indexing_from_ssbo(Converter::Impl &impl, spv::Id offset_id)
{
	auto &builder = impl.builder();

	if (impl.options.physical_address_descriptor_stride != 1)
	{
		auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		mul_op->add_id(builder.makeUintConstant(impl.options.physical_address_descriptor_stride));
		mul_op->add_id(offset_id);
		impl.add(mul_op);
		offset_id = mul_op->id;
	}

	if (impl.options.physical_address_descriptor_offset != 0)
	{
		auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_op->add_id(offset_id);
		add_op->add_id(builder.makeUintConstant(impl.options.physical_address_descriptor_offset));
		impl.add(add_op);
		offset_id = add_op->id;
	}

	return offset_id;
}

static spv::Id build_load_physical_uav_counter(Converter::Impl &impl, const Converter::Impl::ResourceReference &counter,
                                               const llvm::Value *offset, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);
	spv::Id ptr_id;

	if (impl.instrumentation.descriptor_heap_introspection_var_id == counter.var_id)
	{
		if (!impl.instrumentation.descriptor_heap_introspection_is_bda)
		{
			LOGE("When using introspection variable for UAV counter mapping, it must be BDA based.\n");
			return 0;
		}

		auto *chain_op =
			impl.allocate(spv::OpAccessChain, builder.makePointer(
				spv::StorageClassUniform,
				impl.instrumentation.descriptor_heap_introspection_block_ptr_type_id));

		chain_op->add_id(impl.instrumentation.descriptor_heap_introspection_var_id);
		chain_op->add_id(builder.makeUintConstant(0));
		impl.add(chain_op);

		auto *load_ptr = impl.allocate(
		    spv::OpLoad, impl.instrumentation.descriptor_heap_introspection_block_ptr_type_id);
		load_ptr->add_id(chain_op->id);
		impl.add(load_ptr);

        chain_op = impl.allocate(spv::OpInBoundsAccessChain,
                builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uvec2_type));
		chain_op->add_id(load_ptr->id);
		chain_op->add_id(builder.makeUintConstant(0));

		spv::Id offset_id = build_bindless_heap_offset(impl, counter, DESCRIPTOR_QA_TYPE_RAW_VA_BIT,
		                                               counter.base_resource_is_array ? offset : nullptr);
		chain_op->add_id(offset_id);
		chain_op->add_id(builder.makeUintConstant(0));
		chain_op->add_id(builder.makeUintConstant(impl.options.physical_address_descriptor_offset));
		impl.add(chain_op);

		ptr_id = chain_op->id;
	}
	else
	{
		auto *chain_op =
		    impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassStorageBuffer, uvec2_type));
		chain_op->add_id(counter.var_id);
		chain_op->add_id(builder.makeUintConstant(0));

		spv::Id offset_id = build_bindless_heap_offset(impl, counter, DESCRIPTOR_QA_TYPE_RAW_VA_BIT,
		                                               counter.base_resource_is_array ? offset : nullptr);
		offset_id = build_physical_address_indexing_from_ssbo(impl, offset_id);

		chain_op->add_id(offset_id);
		impl.add(chain_op);

		ptr_id = chain_op->id;
	}

	auto *load_op = impl.allocate(spv::OpLoad, uvec2_type);
	load_op->add_id(ptr_id);

	if (impl.instrumentation.descriptor_heap_introspection_var_id == counter.var_id)
	{
		load_op->add_literal(spv::MemoryAccessAlignedMask);
		load_op->add_literal(8);
	}

	impl.add(load_op);
	return load_op->id;
}

static spv::StorageClass get_resource_storage_class(Converter::Impl &impl, spv::Id base_resource_id)
{
	auto meta_itr = impl.handle_to_resource_meta.find(base_resource_id);
	auto storage = spv::StorageClassUniformConstant;
	if (meta_itr != impl.handle_to_resource_meta.end())
		storage = meta_itr->second.storage;

	return storage;
}

static void build_resource_bda_instrumentation(Converter::Impl &impl, spv::Id offset_id,
                                               Converter::Impl::ResourceMetaInstrumentation &instrumentation)
{
	if (!impl.options.instruction_instrumentation.enabled ||
	    impl.options.instruction_instrumentation.type != InstructionInstrumentationType::BufferSynchronizationValidation)
		return;

	if (instrumentation.bda_id != 0 || impl.instrumentation.descriptor_heap_introspection_var_id == 0)
		return;

	auto &builder = impl.builder();

	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);
	auto *chain = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassStorageBuffer, uvec2_type));
	chain->add_id(impl.instrumentation.descriptor_heap_introspection_var_id);
	chain->add_id(builder.makeUintConstant(0));
	chain->add_id(offset_id);
	chain->add_id(builder.makeUintConstant(0));
	chain->add_id(builder.makeUintConstant(0));
	impl.add(chain);

	auto *loaded = impl.allocate(spv::OpLoad, uvec2_type);
	loaded->add_id(chain->id);
	impl.add(loaded);
	instrumentation.bda_id = loaded->id;

	auto *extract = impl.allocate(spv::OpCompositeExtract, uint_type);
	extract->add_id(loaded->id);
	extract->add_literal(1);
	impl.add(extract);

	auto *shifted = impl.allocate(spv::OpShiftRightLogical, uint_type);
	shifted->add_id(extract->id);
	shifted->add_id(builder.makeUintConstant(16));
	impl.add(shifted);

	instrumentation.elem_size_id = shifted->id;
}

static spv::Id build_instrumentation_size_query(Converter::Impl &impl,
                                                spv::Id resource_id, spv::Id type_id,
                                                const Converter::Impl::ResourceReference &reference,
                                                spv::StorageClass storage)
{
	auto &builder = impl.builder();
	bool is_raw = reference.resource_kind == DXIL::ResourceKind::RawBuffer ||
	              reference.resource_kind == DXIL::ResourceKind::StructuredBuffer;

	if (reference.resource_kind == DXIL::ResourceKind::CBuffer)
	{
		// We cannot query size of a CBV, but those are read-only anyway, and we can assume a 64 KiB limit to
		// catch blatant OOB.
		return builder.makeUintConstant(64 * 1024);
	}
	else if (reference.resource_kind == DXIL::ResourceKind::TypedBuffer ||
	         (is_raw && storage == spv::StorageClassUniformConstant))
	{
		auto *num_elems = impl.allocate(spv::OpImageQuerySize, builder.makeUintType(32));
		num_elems->add_id(resource_id);
		impl.add(num_elems);
		builder.addCapability(spv::CapabilityImageQuery);

		if (is_raw)
		{
			auto *mul = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			mul->add_id(num_elems->id);
			mul->add_id(builder.makeUintConstant(4));
			impl.add(mul);
			return mul->id;
		}
		else
		{
			return num_elems->id;
		}
	}
	else if (is_raw)
	{
		// This is a little questionable for unaligned vectors like vec3, but it'll be good enough in practice.
		auto *array_len = impl.allocate(spv::OpArrayLength, builder.makeUintType(32));
		array_len->add_id(resource_id);
		array_len->add_literal(0);
		impl.add(array_len);

		spv::Id member_type_id = builder.getContainedTypeId(builder.getContainedTypeId(type_id, 0));
		uint32_t vec_size = 1;

		if (!builder.isScalarType(member_type_id))
		{
			vec_size = builder.getNumTypeComponents(member_type_id);
			member_type_id = builder.getContainedTypeId(member_type_id);
		}

		uint32_t scalar_width = builder.getScalarTypeWidth(member_type_id) / 8;

		auto *mul = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		mul->add_id(array_len->id);
		mul->add_id(builder.makeUintConstant(scalar_width * vec_size));
		impl.add(mul);

		return mul->id;
	}

	return 0;
}

static bool build_load_resource_handle(Converter::Impl &impl, spv::Id base_resource_id,
                                       const Converter::Impl::ResourceReference &reference,
                                       DescriptorQATypeFlagBits descriptor_type,
                                       const llvm::CallInst *instruction,
                                       llvm::Value *instruction_offset_value, bool instruction_is_non_uniform,
                                       bool &is_non_uniform,
                                       spv::Id *ptr_id, spv::Id *value_id, spv::Id *bindless_offset_id,
                                       Converter::Impl::ResourceMetaInstrumentation *instrumentation)
{
	auto &builder = impl.builder();

	spv::Id resource_id = base_resource_id;
	spv::Id type_id = builder.getDerefTypeId(resource_id);

	auto storage = get_resource_storage_class(impl, base_resource_id);
	is_non_uniform = false;

	// If we index based on SBT, we must assume non-uniform, even for resources
	// which are not arrayed, since in theory, the dispatch can process different SBTs concurrently,
	// perhaps even within same subgroup, so have to be defensive.
	if (reference.local_root_signature_entry >= 0)
		is_non_uniform = true;

	if (reference.base_resource_is_array || reference.bindless)
	{
		if (reference.base_resource_is_array && instruction_offset_value && instruction_is_non_uniform)
			is_non_uniform = true;

		type_id = builder.getContainedTypeId(type_id);
		Operation *op =
		    impl.allocate(spv::OpAccessChain, builder.makePointer(storage, type_id));
		op->add_id(resource_id);

		spv::Id offset_id;

		if (reference.bindless)
		{
			offset_id = build_bindless_heap_offset(
			    impl, reference, descriptor_type, reference.base_resource_is_array ? instruction_offset_value : nullptr);

			if (offset_id == 0)
				return false;

			if (bindless_offset_id)
				*bindless_offset_id = offset_id;

			if (instrumentation &&
			    (reference.resource_kind == DXIL::ResourceKind::CBuffer ||
			     reference.resource_kind == DXIL::ResourceKind::StructuredBuffer ||
			     reference.resource_kind == DXIL::ResourceKind::TypedBuffer ||
			     reference.resource_kind == DXIL::ResourceKind::RawBuffer))
			{
				build_resource_bda_instrumentation(impl, offset_id, *instrumentation);
			}
		}
		else
		{
			offset_id = build_adjusted_descriptor_indexing(
				impl, reference.base_offset,
				reference.base_resource_is_array ? instruction_offset_value : nullptr);

			if (bindless_offset_id)
				*bindless_offset_id = 0;
		}

		op->add_id(offset_id);

		// Some compilers require the index to be marked as NonUniformEXT, even if it not required by Vulkan spec.
		// Avoid SPIR-V validation error if same index is used for multiple resources.
		if (is_non_uniform && instruction_offset_value)
		{
			builder.addUniqueDecoration(offset_id, spv::DecorationNonUniformEXT);

			// Mark this operation as a sink candidate.
			// We have observed some really nasty bugs in the wild where a resource is loaded from,
			// but not actually needed before a branch. That branch will guard invalid usage.
			op->flags |= Operation::SinkableBit;
		}

		if (!is_non_uniform && instruction_offset_value &&
		    ((reference.resource_kind == DXIL::ResourceKind::CBuffer || impl.options.quirks.aggressive_nonuniform) &&
		     !impl.backend.skip_non_uniform_promotion &&
		     value_is_likely_non_uniform(impl, instruction_offset_value)))
		{
			// Native drivers seems to apply hacks and workarounds to workaround bugged games.
			// Only apply this for bindless CBV for now, unless we opt in to more aggressive checks.
			// I have not been able to prove this effect for other resource types so far.
			is_non_uniform = true;
		}

		if (impl.options.instruction_instrumentation.enabled &&
		    impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume &&
		    !is_non_uniform && reference.base_resource_is_array &&
		    instruction_offset_value &&
		    !value_is_statically_wave_uniform(impl, instruction_offset_value))
		{
			// Assert that the index is wave uniform.
			builder.addCapability(spv::CapabilityGroupNonUniformVote);

			auto *is_valid = impl.allocate(spv::OpGroupNonUniformAllEqual, builder.makeBoolType());
			is_valid->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
			is_valid->add_id(offset_id);
			impl.add(is_valid);

			auto *assert_op = impl.allocate(spv::OpAssumeTrueKHR);
			assert_op->add_id(is_valid->id);
			impl.add(assert_op);
		}

		impl.add(op);
		resource_id = op->id;
	}

	if (ptr_id)
		*ptr_id = resource_id;

	if (value_id)
	{
		if (storage == spv::StorageClassUniformConstant)
		{
			Operation *op = impl.allocate(spv::OpLoad, instruction, type_id);
			op->add_id(resource_id);
			impl.id_to_type[op->id] = type_id;
			impl.add(op);
			if (is_non_uniform)
				builder.addDecoration(op->id, spv::DecorationNonUniformEXT);
			*value_id = op->id;
		}
		else
		{
			*value_id = resource_id;
			impl.rewrite_value(instruction, resource_id);

			// Generally, we want to add NonUniformEXT after access chain for UBO/SSBO,
			// but there is a special case in non-uniform OpArrayLength, where we will use this pointer
			// directly, so mark it as non-uniform here.
			if (is_non_uniform)
				builder.addDecoration(resource_id, spv::DecorationNonUniformEXT);
		}

		if (instrumentation && instrumentation->bda_id && !instrumentation->resource_size_id)
			instrumentation->resource_size_id = build_instrumentation_size_query(impl, *value_id, type_id, reference, storage);
	}

	return true;
}

static spv::Id build_shader_record_access_chain(Converter::Impl &impl, unsigned local_root_signature_entry)
{
	auto &builder = impl.builder();

	spv::Id array_type_id = impl.shader_record_buffer_types[local_root_signature_entry];
	Operation *access_chain;

	if (impl.node_input.node_dispatch_push_id)
	{
		// Nodes
		spv::Id shader_record_buffer_id = emit_load_node_input_push_parameter(
			impl, NodeLocalRootSignatureBDA,
			builder.makePointer(spv::StorageClassPhysicalStorageBuffer,
			                    impl.node_input.shader_record_block_type_id));

		spv::Id ptr_array_type_id = builder.makePointer(spv::StorageClassPhysicalStorageBuffer, array_type_id);
		access_chain = impl.allocate(spv::OpAccessChain, ptr_array_type_id);
		access_chain->add_id(shader_record_buffer_id);
	}
	else
	{
		// RayTracing with ShaderRecordBufferKHR.
		spv::Id ptr_array_type_id = builder.makePointer(spv::StorageClassShaderRecordBufferKHR, array_type_id);
		access_chain = impl.allocate(spv::OpAccessChain, ptr_array_type_id);
		access_chain->add_id(impl.shader_record_buffer_id);
	}

	access_chain->add_id(builder.makeUintConstant(local_root_signature_entry));
	impl.add(access_chain);
	return access_chain->id;
}

static spv::Id build_root_descriptor_access_chain(Converter::Impl &impl, unsigned member_index)
{
	auto &builder = impl.builder();

	auto storage = impl.options.inline_ubo_enable ? spv::StorageClassUniform : spv::StorageClassPushConstant;

	spv::Id ptr_type_id = builder.makePointer(storage, builder.makeVectorType(builder.makeUintType(32), 2));
	auto *access_chain = impl.allocate(spv::OpAccessChain, ptr_type_id);
	access_chain->add_id(impl.root_constant_id);
	access_chain->add_id(builder.makeUintConstant(member_index));
	impl.add(access_chain);

	return access_chain->id;
}

static spv::Id build_root_descriptor_load_physical_pointer(Converter::Impl &impl,
                                                           const Converter::Impl::ResourceReference &reference)
{
	auto &builder = impl.builder();
	int local_root_signature_entry = reference.local_root_signature_entry;

	spv::Id ptr_id;
	if (local_root_signature_entry >= 0)
		ptr_id = build_shader_record_access_chain(impl, local_root_signature_entry);
	else
		ptr_id = build_root_descriptor_access_chain(impl, reference.push_constant_member);

	auto *load_ptr = impl.allocate(spv::OpLoad, builder.makeVectorType(builder.makeUintType(32), 2));
	load_ptr->add_id(ptr_id);

	if (local_root_signature_entry >= 0 && impl.node_input.shader_record_block_type_id != 0)
	{
		load_ptr->add_literal(spv::MemoryAccessAlignedMask);
		load_ptr->add_literal(8);
	}

	impl.add(load_ptr);
	return load_ptr->id;
}

static spv::Id build_load_physical_rtas(Converter::Impl &impl, const Converter::Impl::ResourceReference &reference,
                                        const llvm::Value *offset, bool non_uniform)
{
	auto &builder = impl.builder();
	spv::Id ptr_id;

	if (builder.getStorageClass(reference.var_id) == spv::StorageClassStorageBuffer)
	{
		spv::Id offset_id = build_bindless_heap_offset(impl, reference,
		                                               DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT |
		                                               DESCRIPTOR_QA_TYPE_RAW_VA_BIT,
		                                               reference.base_resource_is_array ? offset : nullptr);

		if (!non_uniform)
		{
			builder.addCapability(spv::CapabilityGroupNonUniformBallot);
			auto *broadcast_op = impl.allocate(spv::OpGroupNonUniformBroadcastFirst, builder.makeUintType(32));
			broadcast_op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
			broadcast_op->add_id(offset_id);
			impl.add(broadcast_op);
			offset_id = broadcast_op->id;
		}

		offset_id = build_physical_address_indexing_from_ssbo(impl, offset_id);

		spv::Id uvec2_type = builder.makeVectorType(builder.makeUintType(32), 2);
		auto *chain_op =
		    impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassStorageBuffer, uvec2_type));
		chain_op->add_id(reference.var_id);
		chain_op->add_id(builder.makeUintConstant(0));
		chain_op->add_id(offset_id);
		impl.add(chain_op);

		auto *load_op = impl.allocate(spv::OpLoad, uvec2_type);
		load_op->add_id(chain_op->id);
		impl.add(load_op);
		ptr_id = load_op->id;
	}
	else
	{
		ptr_id = build_root_descriptor_load_physical_pointer(impl, reference);
	}

	auto *convert_op = impl.allocate(spv::OpConvertUToAccelerationStructureKHR, builder.makeAccelerationStructureType());
	convert_op->add_id(ptr_id);
	impl.add(convert_op);
	return convert_op->id;
}

static bool resource_is_physical_rtas(Converter::Impl &impl, const Converter::Impl::ResourceReference &reference)
{
	if (reference.resource_kind != DXIL::ResourceKind::RTAccelerationStructure)
		return false;

	bool physical_pointer_heap = impl.builder().getStorageClass(reference.var_id) == spv::StorageClassStorageBuffer;
	if (physical_pointer_heap)
		return true;

	if (reference.local_root_signature_entry >= 0)
		return impl.local_root_signature[reference.local_root_signature_entry].type == LocalRootSignatureType::Descriptor;
	else
		return reference.root_descriptor;
}

static bool resource_is_physical_pointer(Converter::Impl &impl, const Converter::Impl::ResourceReference &reference)
{
	if (reference.local_root_signature_entry >= 0)
		return impl.local_root_signature[reference.local_root_signature_entry].type == LocalRootSignatureType::Descriptor;
	else
		return reference.root_descriptor;
}

static spv::Id build_load_buffer_offset(Converter::Impl &impl, Converter::Impl::ResourceReference &reference,
                                        Converter::Impl::ResourceMeta &meta,
                                        spv::Id offset_ssbo_id, spv::Id bindless_offset_id, bool non_uniform)
{
	auto &builder = impl.builder();

	if (!non_uniform)
	{
		// Allow scalar load of the offset if possible.
		Operation *scalar_op = impl.allocate(spv::OpGroupNonUniformBroadcastFirst, builder.makeUintType(32));
		scalar_op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		scalar_op->add_id(bindless_offset_id);
		impl.add(scalar_op);
		bindless_offset_id = scalar_op->id;
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	}

	bool untyped_buffer = meta.storage != spv::StorageClassUniformConstant &&
	                      meta.kind != DXIL::ResourceKind::TypedBuffer;
	unsigned layout_offset = untyped_buffer ? impl.options.offset_buffer_layout.untyped_offset :
	                         impl.options.offset_buffer_layout.typed_offset;

	if (impl.options.offset_buffer_layout.stride != 1)
	{
		Operation *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		scale_op->add_id(bindless_offset_id);
		scale_op->add_id(builder.makeUintConstant(impl.options.offset_buffer_layout.stride));
		impl.add(scale_op);
		bindless_offset_id = scale_op->id;
	}

	if (layout_offset)
	{
		Operation *bias_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		bias_op->add_id(bindless_offset_id);
		bias_op->add_id(builder.makeUintConstant(layout_offset));
		impl.add(bias_op);
		bindless_offset_id = bias_op->id;
	}

	spv::Id vec_type = builder.makeVectorType(builder.makeUintType(32), 2);

	Operation *chain_op = impl.allocate(spv::OpAccessChain,
	                                    builder.makePointer(spv::StorageClassStorageBuffer,
	                                                        vec_type));
	chain_op->add_id(offset_ssbo_id);
	chain_op->add_id(builder.makeUintConstant(0));
	chain_op->add_id(bindless_offset_id);
	impl.add(chain_op);

	Operation *load_op = impl.allocate(spv::OpLoad, vec_type);
	load_op->add_id(chain_op->id);
	impl.add(load_op);

	spv::Id offset_id = load_op->id;

	// Shift the offset buffer once if we can get away with it.
	if (untyped_buffer && !reference.aliased)
	{
		Operation *shift_op = impl.allocate(spv::OpShiftRightLogical, vec_type);
		shift_op->add_id(offset_id);

		unsigned ssbo_element_size =
			raw_vecsize_to_vecsize(meta.raw_component_vecsize) *
			raw_component_type_to_bits(meta.component_type) / 8;

		unsigned shamt = 0;
		while (ssbo_element_size > 1)
		{
			shamt++;
			ssbo_element_size >>= 1;
		}

		spv::Id const_2[2];
		const_2[0] = const_2[1] = builder.makeUintConstant(shamt);
		spv::Id const_vec = impl.build_constant_vector(builder.makeUintType(32), const_2, 2);

		shift_op->add_id(const_vec);
		impl.add(shift_op);

		offset_id = shift_op->id;
	}

	return offset_id;
}

static bool resource_kind_is_buffer(DXIL::ResourceKind kind)
{
	switch (kind)
	{
	case DXIL::ResourceKind::RawBuffer:
	case DXIL::ResourceKind::StructuredBuffer:
	case DXIL::ResourceKind::TypedBuffer:
	case DXIL::ResourceKind::CBuffer:
		return true;

	default:
		return false;
	}
}

static Converter::Impl::ResourceReference &get_resource_counter_reference(
	Converter::Impl &impl, const llvm::CallInst *instruction, unsigned resource_range)
{
	if (resource_range == UINT32_MAX)
		return impl.llvm_annotate_handle_uses[instruction].counter_reference;
	else
		return impl.uav_index_to_counter[resource_range];
}

static Converter::Impl::ResourceReference &get_resource_reference(
		Converter::Impl &impl, DXIL::ResourceType resource_type,
		const llvm::CallInst *instruction, unsigned resource_range)
{
	if (resource_range == UINT32_MAX)
	{
		return impl.llvm_annotate_handle_uses[instruction].reference;
	}
	else
	{
		switch (resource_type)
		{
		default:
		case DXIL::ResourceType::SRV:
			return impl.srv_index_to_reference[resource_range];
		case DXIL::ResourceType::UAV:
			return impl.uav_index_to_reference[resource_range];
		case DXIL::ResourceType::CBV:
			return impl.cbv_index_to_reference[resource_range];
		case DXIL::ResourceType::Sampler:
			return impl.sampler_index_to_reference[resource_range];
		}
	}
}

static spv::Id get_offset_buffer_variable(
		Converter::Impl &impl, DXIL::ResourceType resource_type,
		const llvm::CallInst *instruction, unsigned resource_range)
{
	if (resource_range == UINT32_MAX)
		return impl.llvm_annotate_handle_uses[instruction].offset_buffer_id;
	else if (resource_type == DXIL::ResourceType::SRV)
		return impl.srv_index_to_offset[resource_range];
	else
		return impl.uav_index_to_offset[resource_range];
}

static bool emit_create_handle(Converter::Impl &impl, const llvm::CallInst *instruction,
                               DXIL::ResourceType resource_type, unsigned resource_range,
                               llvm::Value *instruction_offset, bool non_uniform)
{
	auto &builder = impl.builder();
	switch (resource_type)
	{
	case DXIL::ResourceType::SRV:
	{
		auto &reference = get_resource_reference(impl, resource_type, instruction, resource_range);

		if (resource_is_physical_rtas(impl, reference))
		{
			spv::Id ptr_id = build_load_physical_rtas(impl, reference, instruction_offset, non_uniform);
			impl.rewrite_value(instruction, ptr_id);
			auto &meta = impl.handle_to_resource_meta[ptr_id];
			meta = {};
			meta.storage = spv::StorageClassGeneric;
			meta.kind = DXIL::ResourceKind::RTAccelerationStructure;
		}
		else if (resource_is_physical_pointer(impl, reference))
		{
			spv::Id ptr_id = build_root_descriptor_load_physical_pointer(impl, reference);
			impl.rewrite_value(instruction, ptr_id);
			auto &meta = impl.handle_to_resource_meta[ptr_id];
			meta = {};
			meta.stride = reference.stride;
			meta.storage = spv::StorageClassPhysicalStorageBuffer;
			meta.physical_pointer_meta.nonwritable = true;
			meta.kind = reference.resource_kind;
		}
		else
		{
			bool is_non_uniform = false;

			spv::Id representative_var_id = reference.var_id;
			if (!representative_var_id && !reference.var_alias_group.empty())
				representative_var_id = reference.var_alias_group.front().var_id;
			auto storage = get_resource_storage_class(impl, representative_var_id);

			DescriptorQATypeFlagBits descriptor_type;
			if (storage == spv::StorageClassStorageBuffer)
				descriptor_type = DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT;
			else if (resource_kind_is_buffer(reference.resource_kind))
				descriptor_type = DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT;
			else if (reference.resource_kind == DXIL::ResourceKind::RTAccelerationStructure)
				descriptor_type = DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT;
			else
				descriptor_type = DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT;

			Vector<Converter::Impl::RawDeclarationVariable> raw_declarations;
			spv::Id loaded_id = 0;
			spv::Id offset_id = 0;
			spv::Id resource_id = 0;
			raw_declarations.reserve(reference.var_alias_group.size());

			Converter::Impl::ResourceMetaInstrumentation instrumentation = {};

			if (reference.var_id)
			{
				resource_id = reference.var_id;
				if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type, instruction,
												instruction_offset, non_uniform, is_non_uniform,
												nullptr, &loaded_id, &offset_id, &instrumentation))
				{
					LOGE("Failed to load SRV resource handle.\n");
					return false;
				}
			}

			for (auto &alias : reference.var_alias_group)
			{
				resource_id = alias.var_id;
				if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type,
												instruction, instruction_offset, non_uniform, is_non_uniform,
												nullptr, &loaded_id, &offset_id, &instrumentation))
				{
					LOGE("Failed to load SRV resource handle.\n");
					return false;
				}

				raw_declarations.push_back({ alias.declaration, loaded_id });
			}

			auto &incoming_meta = impl.handle_to_resource_meta[resource_id];

			spv::Id offset_buffer_id = get_offset_buffer_variable(impl, resource_type, instruction, resource_range);
			if (offset_buffer_id)
			{
				offset_id = build_load_buffer_offset(impl, reference, incoming_meta,
				                                     offset_buffer_id, offset_id, non_uniform);
			}
			else
				offset_id = 0;

			auto &meta = impl.handle_to_resource_meta[loaded_id];
			meta = incoming_meta;
			meta.non_uniform = is_non_uniform;
			meta.index_offset_id = offset_id;
			meta.var_alias_group = std::move(raw_declarations);
			meta.aliased = reference.aliased;
			meta.physical_pointer_meta.nonwritable = true;
			meta.instrumentation = instrumentation;

			// The base array variable does not know what the stride is, promote that state here.
			if (reference.bindless)
				meta.stride = reference.stride;

			if (is_non_uniform)
			{
				spv::Id type_id = builder.getDerefTypeId(resource_id);
				type_id = builder.getContainedTypeId(type_id);

				if (builder.getTypeClass(type_id) != spv::OpTypeAccelerationStructureKHR)
				{
					if (meta.storage == spv::StorageClassStorageBuffer)
						builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexing);
					else if (builder.getTypeDimensionality(type_id) == spv::DimBuffer)
						builder.addCapability(spv::CapabilityUniformTexelBufferArrayNonUniformIndexing);
					else
						builder.addCapability(spv::CapabilitySampledImageArrayNonUniformIndexingEXT);
				}
				builder.addExtension("SPV_EXT_descriptor_indexing");
			}
		}
		break;
	}

	case DXIL::ResourceType::UAV:
	{
		if (emit_ags_resource_uav_handle(impl, instruction, resource_range))
			break;
		if (emit_nvapi_resource_uav_handle(impl, instruction, resource_range))
			break;

		auto &reference = get_resource_reference(impl, resource_type, instruction, resource_range);

		if (resource_is_physical_pointer(impl, reference))
		{
			spv::Id ptr_id = build_root_descriptor_load_physical_pointer(impl, reference);
			impl.rewrite_value(instruction, ptr_id);
			auto &meta = impl.handle_to_resource_meta[ptr_id];
			meta = {};
			meta.stride = reference.stride;
			meta.storage = spv::StorageClassPhysicalStorageBuffer;
			meta.physical_pointer_meta.coherent = reference.coherent;
			meta.physical_pointer_meta.rov = reference.rov;
			meta.kind = reference.resource_kind;
			meta.rov = reference.rov;
			meta.vkmm = reference.vkmm;
		}
		else
		{
			bool is_non_uniform = false;

			spv::Id representative_var_id = reference.var_id;
			if (!representative_var_id && !reference.var_alias_group.empty())
				representative_var_id = reference.var_alias_group.front().var_id;
			auto storage = get_resource_storage_class(impl, representative_var_id);

			DescriptorQATypeFlagBits descriptor_type;
			if (storage == spv::StorageClassStorageBuffer)
				descriptor_type = DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT;
			else if (resource_kind_is_buffer(reference.resource_kind))
				descriptor_type = DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT;
			else
				descriptor_type = DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT;

			Vector<Converter::Impl::RawDeclarationVariable> raw_declarations;
			spv::Id loaded_id = 0;
			spv::Id offset_id = 0;
			spv::Id resource_id = 0;
			spv::Id resource_ptr_id = 0;
			raw_declarations.reserve(reference.var_alias_group.size());

			Converter::Impl::ResourceMetaInstrumentation instrumentation = {};

			if (reference.var_id)
			{
				resource_id = reference.var_id;
				if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type, instruction,
				                                instruction_offset, non_uniform, is_non_uniform, &resource_ptr_id,
				                                &loaded_id, &offset_id, &instrumentation))
				{
					LOGE("Failed to load UAV resource handle.\n");
					return false;
				}
			}

			for (auto &alias : reference.var_alias_group)
			{
				resource_id = alias.var_id;
				if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type,
												instruction, instruction_offset, non_uniform, is_non_uniform,
												nullptr, &loaded_id, &offset_id, &instrumentation))
				{
					LOGE("Failed to load UAV resource handle.\n");
					return false;
				}

				raw_declarations.push_back({ alias.declaration, loaded_id });
			}

			auto &incoming_meta = impl.handle_to_resource_meta[resource_id];

			spv::Id offset_buffer_id = get_offset_buffer_variable(impl, resource_type, instruction, resource_range);
			if (offset_buffer_id)
			{
				offset_id = build_load_buffer_offset(impl, reference, incoming_meta,
				                                     offset_buffer_id, offset_id, non_uniform);
			}
			else
				offset_id = 0;

			auto &meta = impl.handle_to_resource_meta[loaded_id];
			meta = incoming_meta;
			meta.non_uniform = is_non_uniform;
			meta.index_offset_id = offset_id;
			meta.var_alias_group = std::move(raw_declarations);
			meta.aliased = reference.aliased;
			meta.rov = reference.rov;
			meta.vkmm = reference.vkmm;
			meta.physical_pointer_meta.coherent = reference.coherent;
			meta.instrumentation = instrumentation;

			// Image atomics requires the pointer to image and not OpTypeImage directly.
			meta.var_id = resource_ptr_id;

			// The base array variable does not know what the stride is, promote that state here.
			if (reference.bindless)
				meta.stride = reference.stride;

			if (is_non_uniform)
			{
				spv::Id type_id = builder.getDerefTypeId(resource_id);
				type_id = builder.getContainedTypeId(type_id);

				if (meta.storage == spv::StorageClassStorageBuffer)
					builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexing);
				else if (builder.getTypeDimensionality(type_id) == spv::DimBuffer)
					builder.addCapability(spv::CapabilityStorageTexelBufferArrayNonUniformIndexing);
				else
					builder.addCapability(spv::CapabilityStorageImageArrayNonUniformIndexing);
				builder.addExtension("SPV_EXT_descriptor_indexing");
			}

			if (impl.llvm_values_using_update_counter.count(instruction) != 0)
			{
				auto &counter_reference = get_resource_counter_reference(impl, instruction, resource_range);
				meta.counter_storage = counter_reference.resource_kind == DXIL::ResourceKind::RawBuffer ?
				                       spv::StorageClassStorageBuffer : spv::StorageClassUniformConstant;

				if (counter_reference.bindless)
				{
					if (counter_reference.resource_kind == DXIL::ResourceKind::Invalid)
					{
						meta.counter_var_id =
						    build_load_physical_uav_counter(impl, counter_reference, instruction_offset, instruction);
						meta.counter_storage = spv::StorageClassPhysicalStorageBuffer;
						// Don't support this since the physical pointer we get from heap is actually the counter.
						meta.instrumentation = {};
					}
					else
					{
						if (!build_load_resource_handle(impl, counter_reference.var_id, reference,
						                                DESCRIPTOR_QA_TYPE_RAW_VA_BIT,
						                                instruction, instruction_offset, non_uniform,
						                                is_non_uniform, &meta.counter_var_id, nullptr, nullptr, nullptr))
						{
							LOGE("Failed to load UAV counter pointer.\n");
							return false;
						}

						if (counter_reference.resource_kind == DXIL::ResourceKind::RawBuffer)
						{
							auto *chain = impl.allocate(
							    spv::OpAccessChain,
							    builder.makePointer(spv::StorageClassStorageBuffer, builder.makeUintType(32)));
							chain->add_id(meta.counter_var_id);
							chain->add_id(builder.makeUintConstant(0));
							impl.add(chain);
							meta.counter_var_id = chain->id;
							if (meta.non_uniform)
								builder.addDecoration(meta.counter_var_id, spv::DecorationNonUniformEXT);
						}
					}
				}
				else
				{
					meta.counter_var_id = counter_reference.var_id;
					if (counter_reference.base_resource_is_array)
					{
						offset_id = build_adjusted_descriptor_indexing(impl, reference.base_offset, instruction_offset);
						if (counter_reference.resource_kind == DXIL::ResourceKind::TypedBuffer)
						{
							spv::Id image_type_id = builder.getContainedTypeId(builder.getDerefTypeId(meta.counter_var_id));
							auto *chain = impl.allocate(
								spv::OpAccessChain,
								builder.makePointer(spv::StorageClassUniformConstant, image_type_id));
							chain->add_id(meta.counter_var_id);
							chain->add_id(offset_id);
							impl.add(chain);
							meta.counter_var_id = chain->id;
						}
						else
						{
							auto *chain = impl.allocate(
							    spv::OpAccessChain,
							    builder.makePointer(spv::StorageClassStorageBuffer, builder.makeUintType(32)));
							chain->add_id(meta.counter_var_id);
							chain->add_id(offset_id);
							chain->add_id(builder.makeUintConstant(0));
							impl.add(chain);
							meta.counter_var_id = chain->id;
							if (meta.non_uniform)
								builder.addDecoration(meta.counter_var_id, spv::DecorationNonUniformEXT);
						}
					}
					else if (counter_reference.resource_kind == DXIL::ResourceKind::RawBuffer)
					{
						auto *chain = impl.allocate(
							spv::OpAccessChain,
						builder.makePointer(spv::StorageClassStorageBuffer, builder.makeUintType(32)));
						chain->add_id(meta.counter_var_id);
						chain->add_id(builder.makeUintConstant(0));
						impl.add(chain);
						meta.counter_var_id = chain->id;
						if (meta.non_uniform)
							builder.addDecoration(meta.counter_var_id, spv::DecorationNonUniformEXT);
					}
				}
			}
		}
		break;
	}

	case DXIL::ResourceType::CBV:
	{
		auto &reference = get_resource_reference(impl, resource_type, instruction, resource_range);
		const LocalRootSignatureEntry *local_root_signature_entry = nullptr;
		if (reference.local_root_signature_entry >= 0)
			local_root_signature_entry = &impl.local_root_signature[reference.local_root_signature_entry];

		// Special case root constants since these resources point directly to
		// the push constant block or SBT and not to any concrete resource,
		// so we cannot deduce storage classes properly.

		if (resource_is_physical_pointer(impl, reference))
		{
			spv::Id ptr_id = build_root_descriptor_load_physical_pointer(impl, reference);
			auto &meta = impl.handle_to_resource_meta[ptr_id];
			meta = {};
			meta.stride = reference.stride;
			meta.storage = spv::StorageClassPhysicalStorageBuffer;
			meta.physical_pointer_meta.nonwritable = true;
			meta.kind = reference.resource_kind;
			impl.rewrite_value(instruction, ptr_id);
		}
		else if (reference.var_id != 0 && reference.var_id == impl.root_constant_id)
		{
			// Point directly to root constants.
			impl.rewrite_value(instruction, reference.var_id);
			unsigned member_offset = reference.push_constant_member;
			impl.handle_to_root_member_offset[instruction] = member_offset;
		}
		else if (local_root_signature_entry && local_root_signature_entry->type == LocalRootSignatureType::Constants)
		{
			// Access chain into the desired member once.
			spv::Id id = build_shader_record_access_chain(impl, reference.local_root_signature_entry);

			auto &meta = impl.handle_to_resource_meta[id];
			meta = {};
			meta.storage = spv::StorageClassShaderRecordBufferKHR;
			meta.kind = DXIL::ResourceKind::CBuffer;
			impl.handle_to_root_member_offset[instruction] = reference.local_root_signature_entry;
			impl.rewrite_value(instruction, id);
		}
		else
		{
			bool is_non_uniform = false;

			bool ssbo = reference.bindless && impl.options.bindless_cbv_ssbo_emulation;
			auto storage = ssbo ? spv::StorageClassStorageBuffer : spv::StorageClassUniform;
			auto descriptor_type = ssbo ? DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT : DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT;

			Vector<Converter::Impl::RawDeclarationVariable> raw_declarations;
			spv::Id loaded_id = 0;
			spv::Id resource_id = 0;
			raw_declarations.reserve(reference.var_alias_group.size());

			Converter::Impl::ResourceMetaInstrumentation instrumentation = {};

			if (reference.var_id)
			{
				resource_id = reference.var_id;
				if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type, instruction,
												instruction_offset, non_uniform, is_non_uniform,
												nullptr, &loaded_id, nullptr, &instrumentation))
				{
					LOGE("Failed to load CBV resource handle.\n");
					return false;
				}
			}

			for (auto &alias : reference.var_alias_group)
			{
				resource_id = alias.var_id;
				if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type,
												instruction, instruction_offset, non_uniform, is_non_uniform,
												nullptr, &loaded_id, nullptr, &instrumentation))
				{
					LOGE("Failed to load CBV resource handle.\n");
					return false;
				}

				raw_declarations.push_back({ alias.declaration, loaded_id });
			}

			auto &incoming_meta = impl.handle_to_resource_meta[resource_id];

			auto &meta = impl.handle_to_resource_meta[loaded_id];
			meta = incoming_meta;
			meta.non_uniform = is_non_uniform;
			meta.storage = storage;
			meta.var_alias_group = std::move(raw_declarations);
			meta.kind = DXIL::ResourceKind::CBuffer;
			meta.instrumentation = instrumentation;

			if (is_non_uniform)
			{
				if (ssbo)
					builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexingEXT);
				else
					builder.addCapability(spv::CapabilityUniformBufferArrayNonUniformIndexingEXT);
				builder.addExtension("SPV_EXT_descriptor_indexing");
			}
		}
		break;
	}

	case DXIL::ResourceType::Sampler:
	{
		auto &reference = get_resource_reference(impl, resource_type, instruction, resource_range);
		spv::Id base_sampler_id = reference.var_id;

		bool is_non_uniform = false;
		spv::Id loaded_id = 0;
		if (!build_load_resource_handle(impl, base_sampler_id, reference, DESCRIPTOR_QA_TYPE_SAMPLER_BIT, instruction,
		                                instruction_offset, non_uniform, is_non_uniform, nullptr, &loaded_id, nullptr, nullptr))
		{
			LOGE("Failed to load Sampler resource handle.\n");
			return false;
		}

		auto &meta = impl.handle_to_resource_meta[loaded_id];
		meta = impl.handle_to_resource_meta[base_sampler_id];
		meta.non_uniform = is_non_uniform;

		if (is_non_uniform)
		{
			builder.addCapability(spv::CapabilitySampledImageArrayNonUniformIndexingEXT);
			builder.addExtension("SPV_EXT_descriptor_indexing");
		}
		break;
	}

	default:
		return false;
	}

	return true;
}

static bool resource_handle_needs_sink(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return impl.options.descriptor_qa_enabled &&
	       impl.options.descriptor_qa_sink_handles &&
	       impl.resource_handles_needing_sink.count(instruction);
}

bool emit_create_handle_for_lib_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// Defer creating any handles since annotateHandle is actually going to do it.
	if (impl.llvm_annotate_handle_lib_uses.count(instruction))
		return true;

	if (resource_handle_needs_sink(impl, instruction))
	{
		impl.resource_handles_needing_sink.erase(impl.resource_handles_needing_sink.find(instruction));
		return true;
	}

	auto itr = impl.llvm_global_variable_to_resource_mapping.find(instruction->getOperand(1));
	if (itr == impl.llvm_global_variable_to_resource_mapping.end())
		return false;

	return emit_create_handle(impl, instruction, itr->second.type,
	                          itr->second.meta_index, itr->second.offset, itr->second.non_uniform);
}

bool emit_create_handle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (resource_handle_needs_sink(impl, instruction))
	{
		impl.resource_handles_needing_sink.erase(impl.resource_handles_needing_sink.find(instruction));
		return true;
	}

	uint32_t resource_type_operand, resource_range;

	if (!get_constant_operand(instruction, 1, &resource_type_operand))
		return false;
	if (!get_constant_operand(instruction, 2, &resource_range))
		return false;

	uint32_t non_uniform = 0;
	get_constant_operand(instruction, 4, &non_uniform);

	auto resource_type = static_cast<DXIL::ResourceType>(resource_type_operand);
	return emit_create_handle(impl, instruction, resource_type, resource_range,
	                          instruction->getOperand(3), non_uniform != 0);
}

bool emit_create_handle_from_heap_instruction(Converter::Impl &, const llvm::CallInst *)
{
	// Do nothing here. We cannot emit code before annotateHandle.
	return true;
}

bool emit_create_handle_from_binding_instruction(Converter::Impl &, const llvm::CallInst *)
{
	// Do nothing here. We cannot emit code before annotateHandle.
	return true;
}

bool get_annotate_handle_meta(Converter::Impl &impl, const llvm::CallInst *instruction,
                              AnnotateHandleMeta &meta)
{
	auto *handle = llvm::dyn_cast<llvm::CallInst>(instruction->getOperand(1));
	if (!handle)
		return false;

	uint32_t opcode;
	if (!get_constant_operand(handle, 0, &opcode))
		return false;

	meta.resource_op = DXIL::Op(opcode);
	uint32_t non_uniform_int = 0;

	if (meta.resource_op == DXIL::Op::CreateHandleFromHeap)
	{
		auto &annotation = impl.llvm_annotate_handle_uses[instruction];
		meta.resource_type = annotation.resource_type;
		meta.binding_index = UINT32_MAX; // Direct heap access.

		meta.offset = handle->getOperand(1);

		if (!get_constant_operand(handle, 3, &non_uniform_int))
			return false;
	}
	else if (meta.resource_op == DXIL::Op::CreateHandleFromBinding)
	{
		meta.offset = handle->getOperand(2);

		if (!get_constant_operand(handle, 3, &non_uniform_int))
			return false;

		if (auto *res_type = llvm::dyn_cast<llvm::ConstantAggregate>(handle->getOperand(1)))
		{
			if (res_type->getNumOperands() != 4)
				return false;

			uint32_t binding_range_lo = res_type->getOperand(0)->getUniqueInteger().getZExtValue();
			uint32_t binding_range_hi = res_type->getOperand(1)->getUniqueInteger().getZExtValue();
			uint32_t binding_space = res_type->getOperand(2)->getUniqueInteger().getZExtValue();
			meta.resource_type = DXIL::ResourceType(res_type->getOperand(3)->getUniqueInteger().getZExtValue());
			meta.binding_index = impl.find_binding_meta_index(
				binding_range_lo, binding_range_hi,
				binding_space, meta.resource_type);

			if (meta.binding_index == UINT32_MAX)
				return false;
		}
		else if (llvm::isa<llvm::ConstantAggregateZero>(handle->getOperand(1)))
		{
			meta.resource_type = DXIL::ResourceType(0);
			meta.binding_index = impl.find_binding_meta_index(0, 0, 0, meta.resource_type);
			if (meta.binding_index == UINT32_MAX)
				return false;
		}
		else
			return false;
	}
	else if (meta.resource_op == DXIL::Op::CreateHandleForLib)
	{
		auto itr = impl.llvm_global_variable_to_resource_mapping.find(handle->getOperand(1));
		if (itr == impl.llvm_global_variable_to_resource_mapping.end())
			return false;

		// Marks that the CreateHandleForLib is a dummy and should not actually emit a resource handle.
		impl.llvm_annotate_handle_lib_uses.insert(instruction->getOperand(1));

		meta.resource_type = itr->second.type;
		meta.binding_index = itr->second.meta_index;
		meta.offset = itr->second.offset;
		non_uniform_int = uint32_t(itr->second.non_uniform);
	}
	else
		return false;

	meta.non_uniform = non_uniform_int != 0;
	return true;
}

bool emit_annotate_handle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (resource_handle_needs_sink(impl, instruction))
	{
		impl.resource_handles_needing_sink.erase(impl.resource_handles_needing_sink.find(instruction));
		return true;
	}

	AnnotateHandleMeta meta = {};
	if (!get_annotate_handle_meta(impl, instruction, meta))
		return false;

	return emit_create_handle(impl, instruction, meta.resource_type,
	                          meta.binding_index, meta.offset, meta.non_uniform);
}

static bool build_bitcast_32x4_to_16x8_composite(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                 spv::Id loaded_id)
{
	auto &builder = impl.builder();

	Vector<spv::Id> member_types(8);
	spv::Id type_id = impl.get_type_id(get_composite_element_type(instruction->getType()));
	for (auto &type : member_types)
		type = type_id;

	spv::Id vec2_type_id = builder.makeVectorType(type_id, 2);

	spv::Id u32_composites[4];
	for (unsigned i = 0; i < 4; i++)
	{
		auto *extract_op = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(32));
		extract_op->add_id(loaded_id);
		extract_op->add_literal(i);
		impl.add(extract_op);
		u32_composites[i] = extract_op->id;
	}

	spv::Id u16_composites[8];
	for (unsigned i = 0; i < 4; i++)
	{
		auto *bitcast_op = impl.allocate(spv::OpBitcast, vec2_type_id);
		bitcast_op->add_id(u32_composites[i]);
		impl.add(bitcast_op);

		for (unsigned j = 0; j < 2; j++)
		{
			auto *extract = impl.allocate(spv::OpCompositeExtract, type_id);
			extract->add_id(bitcast_op->id);
			extract->add_literal(j);
			impl.add(extract);
			u16_composites[2 * i + j] = extract->id;
		}
	}

	spv::Id struct_type_id = impl.get_struct_type(member_types, 0, "CBVComposite16x8");
	auto *composite = impl.allocate(spv::OpCompositeConstruct, struct_type_id);
	for (auto &comp : u16_composites)
		composite->add_id(comp);
	impl.add(composite);
	impl.rewrite_value(instruction, composite->id);
	return true;
}

static bool emit_cbuffer_load_physical_pointer(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	bool scalar_load = !type_is_composite_return_value(instruction->getType());
	unsigned scalar_alignment;
	uint32_t alignment;

	const llvm::Type *result_component_type;

	if (!scalar_load)
	{
		result_component_type = get_composite_element_type(instruction->getType());
		scalar_alignment = get_type_scalar_alignment(impl, result_component_type);
		alignment = 16;
	}
	else
	{
		// DXIL emits the alignment, but we cannot trust it, DXC is completely buggy here and emits
		// obviously bogus alignment values.
		// Use scalar alignment.
		result_component_type = instruction->getType();
		alignment = get_type_scalar_alignment(impl, instruction->getType());
		scalar_alignment = alignment;
	}

	// Handle min16float where we want FP16 value, but FP32 physical.
	spv::Op value_cast_op = spv::OpNop;
	spv::Id physical_type_id = 0;
	get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op);

	spv::Id index_id;

	if (!scalar_load)
	{
		index_id = impl.get_id_for_value(instruction->getOperand(2));
	}
	else
	{
		unsigned addr_shift_log2 = raw_buffer_data_type_to_addr_shift_log2(impl, instruction->getType());
		index_id = build_index_divider(impl, instruction->getOperand(2), addr_shift_log2, 1);
	}

	auto *result_type = instruction->getType();
	unsigned physical_vecsize;
	spv::Id result_type_id;

	if (scalar_load)
	{
		result_type_id = impl.get_type_id(result_type);
		physical_vecsize = 1;
	}
	else
	{
		if (scalar_alignment != 2)
		{
			physical_vecsize = 16 / scalar_alignment;
			result_type_id = builder.makeVectorType(physical_type_id, physical_vecsize);
		}
		else
		{
			result_type_id = builder.makeVectorType(builder.makeFloatType(32), 4);
			physical_vecsize = 4;
		}
	}

	Converter::Impl::PhysicalPointerMeta ptr_meta = {};
	ptr_meta.nonwritable = true;
	ptr_meta.stride = alignment;
	ptr_meta.size = 64 * 1024;
	spv::Id ptr_type_id = impl.get_physical_pointer_block_type(result_type_id, ptr_meta);

	spv::Id loaded_id;

	if (impl.options.instruction_instrumentation.enabled &&
	    impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume &&
	    !llvm::isa<llvm::ConstantInt>(instruction->getOperand(2)))
	{
		auto *is_in_bounds = impl.allocate(spv::OpULessThan, builder.makeBoolType());
		is_in_bounds->add_id(index_id);
		is_in_bounds->add_id(builder.makeUintConstant(ptr_meta.size / alignment));
		impl.add(is_in_bounds);

		auto *assert_that = impl.allocate(spv::OpAssumeTrueKHR);
		assert_that->add_id(is_in_bounds->id);
		impl.add(assert_that);
	}

	if (impl.options.quirks.robust_physical_cbv && !llvm::isa<llvm::ConstantInt>(instruction->getOperand(2)))
	{
		spv::Id call_id = impl.spirv_module.get_robust_physical_cbv_load_call_id(result_type_id, ptr_type_id, alignment);
		auto *call_op = impl.allocate(spv::OpFunctionCall, result_type_id);
		call_op->add_id(call_id);
		call_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		call_op->add_id(index_id);
		impl.add(call_op);
		loaded_id = call_op->id;
		impl.rewrite_value(instruction, loaded_id);
	}
	else
	{
		auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id);
		ptr_bitcast_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(ptr_bitcast_op);

		// Out of bounds is undefined behavior for root descriptors.
		// Allows a compiler to assume that the index is unsigned and multiplying by stride does not overflow 32-bit space.
		auto *chain_op = impl.allocate(spv::OpInBoundsAccessChain,
		                               builder.makePointer(spv::StorageClassPhysicalStorageBuffer, result_type_id));
		chain_op->add_id(ptr_bitcast_op->id);
		chain_op->add_id(builder.makeUintConstant(0));
		chain_op->add_id(index_id);
		impl.add(chain_op);

		auto *load_op = impl.allocate(spv::OpLoad, instruction, result_type_id);
		load_op->add_id(chain_op->id);
		load_op->add_literal(spv::MemoryAccessAlignedMask);
		load_op->add_literal(alignment);
		impl.add(load_op);
		loaded_id = load_op->id;
	}

	// Handle f16x8 loads.
	if (!scalar_load && scalar_alignment == 2)
		return build_bitcast_32x4_to_16x8_composite(impl, instruction, loaded_id);
	else if (value_cast_op != spv::OpNop)
	{
		spv::Id type_id = impl.get_type_id(result_component_type);
		if (physical_vecsize != 1)
			type_id = builder.makeVectorType(type_id, physical_vecsize);
		auto *cast_op = impl.allocate(value_cast_op, type_id);
		cast_op->add_id(impl.get_id_for_value(instruction));
		impl.add(cast_op);
		impl.rewrite_value(instruction, cast_op->id);
	}

	return true;
}

static bool emit_cbuffer_load_from_uints(Converter::Impl &impl, const llvm::CallInst *instruction,
                                         spv::Id base_ptr,
                                         spv::StorageClass storage,
                                         unsigned index_offset, unsigned num_elements)
{
	auto &builder = impl.builder();

	// For shader record buffers and workgraph local root signature, we are more flexible.
	bool is_physical = storage == spv::StorageClassShaderRecordBufferKHR ||
	                   storage == spv::StorageClassPhysicalStorageBuffer;

	auto *constant_int = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(2));

	// It's UB code to get here anyway.
#if 0
	if (!constant_int)
	{
		if (!is_physical && !value_is_statically_wave_uniform(impl, instruction->getOperand(2)))
		{
			LOGE("Cannot dynamically index into push constants unless we prove dynamically uniform requirement.\n");
			return false;
		}
	}
#endif

	// CBufferLoad vs CBufferLoadLegacy
	bool scalar_load = !type_is_composite_return_value(instruction->getType());
	unsigned member_index = UINT32_MAX;
	spv::Id dynamic_member_index = 0;

	if (constant_int)
		member_index = unsigned(constant_int->getUniqueInteger().getZExtValue());
	else
		dynamic_member_index = impl.get_id_for_value(instruction->getOperand(2));

	// In scalar load, we index by byte offset. Ignore alignment, we read from registers.
	if (scalar_load)
	{
		if (dynamic_member_index)
		{
			auto *shr = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
			shr->add_id(dynamic_member_index);
			shr->add_id(builder.makeUintConstant(2));
			impl.add(shr);
			dynamic_member_index = shr->id;
		}
		else
		{
			if (member_index % 4)
			{
				LOGE("Scalar CBufferLoad on root constant buffer is not aligned to 4 bytes.\n");
				return false;
			}
			member_index /= 4;
		}

		if (get_type_scalar_alignment(impl, instruction->getType()) != 4)
		{
			LOGE("Attempting to use root constant buffer with non-32bit type.\n");
			return false;
		}
	}
	else
	{
		// In legacy load, we index in terms of float4[]s.
		if (!dynamic_member_index)
		{
			member_index *= 4;
		}
		else
		{
			auto *mul4 = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			mul4->add_id(builder.makeUintConstant(4));
			mul4->add_id(dynamic_member_index);
			impl.add(mul4);

			dynamic_member_index = mul4->id;
		}

		if (get_type_scalar_alignment(impl, get_composite_element_type(instruction->getType())) != 4)
		{
			LOGE("Attempting to use root constant buffer with non-32bit type.\n");
			return false;
		}
	}

	if (!dynamic_member_index)
	{
		member_index += index_offset;

		if (member_index >= num_elements)
		{
			LOGE("Root constant CBV is accessed out of bounds. (%u > %u).\n", member_index, num_elements);
			return false;
		}
	}
	else if (!is_physical && index_offset != impl.root_descriptor_count)
	{
		auto *dyn_offset = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		dyn_offset->add_id(dynamic_member_index);
		dyn_offset->add_id(builder.makeUintConstant(index_offset - impl.root_descriptor_count));
		impl.add(dyn_offset);
		dynamic_member_index = dyn_offset->id;
	}

	unsigned num_words = std::min(scalar_load ? 1u : 4u, dynamic_member_index ? UINT32_MAX : num_elements - member_index);

	auto *result_scalar_type = instruction->getType();
	if (!scalar_load)
		result_scalar_type = get_composite_element_type(result_scalar_type);

	// Root constants are emitted as uints as they are typically used as indices.
	bool need_bitcast = result_scalar_type->getTypeID() != llvm::Type::TypeID::IntegerTyID;

	spv::Id elements[4];
	for (unsigned i = 0; i < 4; i++)
	{
		if (i < num_words)
		{
			auto *op = impl.allocate(
				storage == spv::StorageClassPhysicalStorageBuffer ? spv::OpInBoundsAccessChain : spv::OpAccessChain,
				builder.makePointer(storage == spv::StorageClassPushConstant && impl.options.inline_ubo_enable ?
				                    spv::StorageClassUniform : storage,
				                    builder.makeUintType(32)));

			op->add_id(base_ptr);

			if (impl.root_constant_arrayed && !is_physical)
				op->add_id(builder.makeUintConstant(impl.root_descriptor_count));

			if (constant_int)
			{
				if (impl.root_constant_arrayed && !is_physical)
					op->add_id(builder.makeUintConstant(member_index + i - impl.root_descriptor_count));
				else
					op->add_id(builder.makeUintConstant(member_index + i));
			}
			else
			{
				auto *index_add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				index_add->add_id(dynamic_member_index);
				index_add->add_id(builder.makeUintConstant(i));
				impl.add(index_add);

				if (!impl.glsl_std450_ext)
					impl.glsl_std450_ext = builder.import("GLSL.std.450");

				// Robustness. This is illegal D3D12 code, so we don't really care if it's slow.
				auto *clamp = impl.allocate(spv::OpExtInst, builder.makeUintType(32));
				clamp->add_id(impl.glsl_std450_ext);
				clamp->add_literal(GLSLstd450UMin);
				clamp->add_id(index_add->id);

				if (is_physical)
					clamp->add_id(builder.makeUintConstant(num_elements - 1));
				else
					clamp->add_id(builder.makeUintConstant(impl.root_constant_num_words - 1));
				impl.add(clamp);

				op->add_id(clamp->id);
			}

			impl.add(op);

			auto *load_op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
			load_op->add_id(op->id);

			if (storage == spv::StorageClassPhysicalStorageBuffer)
			{
				load_op->add_literal(spv::MemoryAccessAlignedMask);
				load_op->add_literal(4);
			}

			impl.add(load_op);
			elements[i] = load_op->id;
		}
		else if (!scalar_load)
			elements[i] = builder.makeUintConstant(0);
	}

	spv::Id id;

	if (scalar_load)
		id = elements[0];
	else
		id = impl.build_vector(builder.makeUintType(32), elements, 4);

	spv::Op value_cast_op = spv::OpNop;
	spv::Id physical_type_id = 0;
	get_physical_load_store_cast_info(impl, result_scalar_type, physical_type_id, value_cast_op);

	if (need_bitcast)
	{
		spv::Id type_id = physical_type_id;
		if (!scalar_load)
			type_id = builder.makeVectorType(type_id, 4);

		auto *op = impl.allocate(spv::OpBitcast, instruction, type_id);
		op->add_id(id);
		impl.add(op);
		id = op->id;
	}
	else
	{
		impl.rewrite_value(instruction, id);
	}

	// To handle min16 types in root constants, we might have to narrow here.
	if (value_cast_op != spv::OpNop)
	{
		spv::Id type_id = impl.get_type_id(result_scalar_type);
		if (!scalar_load)
			type_id = builder.makeVectorType(type_id, 4);

		auto *cast_op = impl.allocate(value_cast_op, type_id);
		cast_op->add_id(id);
		impl.add(cast_op);
		impl.rewrite_value(instruction, cast_op->id);
	}

	return true;
}

static bool emit_cbuffer_load_shader_record(Converter::Impl &impl, const llvm::CallInst *instruction,
                                            unsigned local_root_signature_entry)
{
	auto &entry = impl.local_root_signature[local_root_signature_entry];
	return emit_cbuffer_load_from_uints(impl, instruction,
	                                    impl.get_id_for_value(instruction->getOperand(1)),
	                                    impl.node_input.shader_record_block_type_id ?
	                                    spv::StorageClassPhysicalStorageBuffer : spv::StorageClassShaderRecordBufferKHR,
	                                    0, entry.constants.num_words);
}

static bool emit_cbuffer_load_root_constant(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_cbuffer_load_from_uints(impl, instruction,
	                                    impl.root_constant_id,
	                                    spv::StorageClassPushConstant,
	                                    impl.handle_to_root_member_offset[instruction->getOperand(1)],
	                                    impl.root_constant_num_words + impl.root_descriptor_count);
}

bool cbuffer_supports_gep_punchthrough(Converter::Impl &impl, const llvm::Value *cbv_handle)
{
	auto itr = impl.llvm_value_to_cbv_resource_index_map.find(cbv_handle);

	// Bindless, cannot be push constant or local root sig.
	if (itr == impl.llvm_value_to_cbv_resource_index_map.end())
		return true;

	auto &ref = impl.cbv_index_to_reference[itr->second];

	// Block root constants for local root signatures as well.
	if (ref.local_root_signature_entry >= 0)
	{
		auto &entry = impl.local_root_signature[ref.local_root_signature_entry];
		return entry.type != LocalRootSignatureType::Constants;
	}

	if (ref.var_id != 0 && ref.var_id == impl.root_constant_id &&
	    !resource_is_physical_pointer(impl, ref))
	{
		return false;
	}

	// Root descriptor and table descriptors are fine.
	return true;
}

bool emit_gep_as_cbuffer_scalar_offset(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction,
                                       const llvm::Value *cbv_handle, uint32_t scalar_index_offset, uint32_t stride)
{
	auto &builder = impl.builder();
	spv::Id ptr_id = impl.get_id_for_value(cbv_handle);
	if (!ptr_id)
		return false;

	auto &meta = impl.handle_to_resource_meta[ptr_id];

	// We should have guarded against this.
	// Guard against shader record buffer to avoid having to deal with bitcasting.
	if (ptr_id == impl.root_constant_id || meta.storage == spv::StorageClassShaderRecordBufferKHR)
		return false;

	spv::Id array_index_id = impl.get_id_for_value(instruction->getOperand(2));

	bool expect_assume = impl.options.instruction_instrumentation.enabled &&
	                     impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume;

	if (meta.storage == spv::StorageClassPhysicalStorageBuffer &&
	    (impl.options.quirks.robust_physical_cbv_forwarding || expect_assume))
	{
		// Clamp the index to the range of the private array.
		// Otherwise we can rely on robustness to clean things up, but here we risk page faults.
		auto *aggregate_type = instruction->getOperand(0)->getType();
		if (auto *ptr_type = llvm::dyn_cast<llvm::PointerType>(aggregate_type))
		{
			if (auto *array_type = llvm::dyn_cast<llvm::ArrayType>(ptr_type->getPointerElementType()))
			{
				unsigned num_elements = array_type->getArrayNumElements();

				if (expect_assume)
				{
					auto *is_in_bounds = impl.allocate(spv::OpULessThan, builder.makeBoolType());
					is_in_bounds->add_id(array_index_id);
					is_in_bounds->add_id(builder.makeUintConstant(num_elements));
					impl.add(is_in_bounds);

					auto *assert_that = impl.allocate(spv::OpAssumeTrueKHR);
					assert_that->add_id(is_in_bounds->id);
					impl.add(assert_that);
				}

				if (impl.options.quirks.robust_physical_cbv_forwarding)
				{
					if (!impl.glsl_std450_ext)
						impl.glsl_std450_ext = builder.import("GLSL.std.450");

					auto *clamp_op = impl.allocate(spv::OpExtInst, builder.makeIntType(32));
					clamp_op->add_id(impl.glsl_std450_ext);
					clamp_op->add_literal(GLSLstd450SClamp);
					clamp_op->add_id(array_index_id);
					clamp_op->add_id(builder.makeIntConstant(0));
					clamp_op->add_id(builder.makeIntConstant(int(num_elements) - 1));
					impl.add(clamp_op);

					array_index_id = clamp_op->id;
				}
			}
		}
	}

	if (stride != 1)
	{
		auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
		mul_op->add_id(array_index_id);
		mul_op->add_id(builder.makeUintConstant(stride));
		impl.add(mul_op);
		array_index_id = mul_op->id;
	}

	if (scalar_index_offset != 0)
	{
		auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add_op->add_id(array_index_id);
		add_op->add_id(builder.makeUintConstant(scalar_index_offset));
		impl.add(add_op);
		array_index_id = add_op->id;
	}

	const auto *elem_type = instruction->getType()->getPointerElementType();
	spv::Id result_type_id = impl.get_type_id(elem_type);

	if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
	{
		Converter::Impl::PhysicalPointerMeta ptr_meta = {};
		ptr_meta.nonwritable = true;
		ptr_meta.stride = 4;
		ptr_meta.size = 64 * 1024;
		spv::Id ptr_type_id = impl.get_physical_pointer_block_type(result_type_id, ptr_meta);

		auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id);
		ptr_bitcast_op->add_id(ptr_id);
		impl.add(ptr_bitcast_op);

		auto *chain_op = impl.allocate(spv::OpInBoundsAccessChain, instruction,
		                               builder.makePointer(spv::StorageClassPhysicalStorageBuffer, result_type_id));
		chain_op->add_id(ptr_bitcast_op->id);
		chain_op->add_id(builder.makeUintConstant(0));
		chain_op->add_id(array_index_id);
		impl.add(chain_op);
	}
	else
	{
		RawType raw_type = elem_type->isIntegerTy() ? RawType::Integer : RawType::Float;
		ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, raw_type, RawWidth::B32, RawVecSize::V1);

		Operation *access_chain_op = impl.allocate(
		    spv::OpAccessChain, instruction, builder.makePointer(meta.storage, impl.get_type_id(elem_type)));
		access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), array_index_id });
		impl.add(access_chain_op);
	}

	return true;
}

bool emit_cbuffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	// This always returns a scalar.
	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	if (!ptr_id)
		return false;

	if (ptr_id == impl.root_constant_id)
	{
		return emit_cbuffer_load_root_constant(impl, instruction);
	}
	else
	{
		auto &meta = impl.handle_to_resource_meta[ptr_id];

		if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
		{
			return emit_cbuffer_load_physical_pointer(impl, instruction);
		}
		else if (meta.storage == spv::StorageClassShaderRecordBufferKHR)
		{
			return emit_cbuffer_load_shader_record(impl, instruction,
												   impl.handle_to_root_member_offset[instruction->getOperand(1)]);
		}

		// Handle min16float where we want FP16 value, but FP32 physical.
		spv::Op value_cast_op = spv::OpNop;
		spv::Id physical_type_id = 0;
		get_physical_load_store_cast_info(impl, instruction->getType(), physical_type_id, value_cast_op);

		unsigned addr_shift;
		RawWidth raw_width;
		switch (get_type_scalar_alignment(impl, instruction->getType()))
		{
		case 2:
			raw_width = RawWidth::B16;
			addr_shift = 1;
			break;

		case 4:
			raw_width = RawWidth::B32;
			addr_shift = 2;
			break;

		case 8:
			raw_width = RawWidth::B64;
			addr_shift = 3;
			break;

		default:
			return false;
		}

		RawType raw_type = instruction->getType()->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
		                   raw_width == RawWidth::B64 ?
		                   RawType::Integer : RawType::Float;

		unsigned raw_bits = raw_width_to_bits(raw_width);
		ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, raw_type, raw_width, RawVecSize::V1);

		spv::Id array_index_id = build_index_divider(impl, instruction->getOperand(2), addr_shift, 1);

		spv::Id element_type_id = raw_type == RawType::Integer ?
		    builder.makeUintType(raw_bits) : builder.makeFloatType(raw_bits);

		Operation *access_chain_op = impl.allocate(
				spv::OpAccessChain, builder.makePointer(meta.storage, element_type_id));
		access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), array_index_id });
		impl.add(access_chain_op);

		if (meta.non_uniform)
			builder.addDecoration(access_chain_op->id, spv::DecorationNonUniformEXT);

		bool need_bitcast = false;
		auto *result_type = instruction->getType();
		if (result_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && raw_width != RawWidth::B64)
			need_bitcast = true;

		Operation *load_op = impl.allocate(spv::OpLoad, instruction, element_type_id);
		load_op->add_id(access_chain_op->id);
		impl.add(load_op);

		if (need_bitcast)
		{
			Operation *op = impl.allocate(spv::OpBitcast, builder.makeUintType(raw_bits));
			op->add_id(load_op->id);
			impl.add(op);
			impl.rewrite_value(instruction, op->id);
		}

		// Handle min16float4 value cast scenarios.
		if (value_cast_op != spv::OpNop)
		{
			auto *cast_op = impl.allocate(value_cast_op, impl.get_type_id(instruction->getType()));
			cast_op->add_id(impl.get_id_for_value(instruction));
			impl.add(cast_op);
			impl.rewrite_value(instruction, cast_op->id);
		}

		return true;
	}
}

bool type_is_composite_return_value(llvm::Type *type)
{
	return type->getTypeID() == llvm::Type::TypeID::StructTyID || type->getTypeID() == llvm::Type::TypeID::VectorTyID;
}

llvm::Type *get_composite_element_type(llvm::Type *type)
{
	assert(type_is_composite_return_value(type));
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
		return vec->getElementType();
	else
		return type->getStructElementType(0);
}

bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	// This function returns a struct, but ignore that, and just return a vec4 for now.
	// extractvalue is used to pull out components and that works for vectors as well.
	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));
	if (!ptr_id)
		return false;

	if (ptr_id == impl.root_constant_id)
	{
		return emit_cbuffer_load_root_constant(impl, instruction);
	}
	else
	{
		auto &meta = impl.handle_to_resource_meta[ptr_id];

		auto *result_type = instruction->getType();

		if (!type_is_composite_return_value(result_type))
		{
			LOGE("CBufferLoadLegacy: return type must be struct or vector.\n");
			return false;
		}

		emit_buffer_synchronization_validation(impl, instruction, BDAOperation::Load);

		if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
		{
			return emit_cbuffer_load_physical_pointer(impl, instruction);
		}
		else if (meta.storage == spv::StorageClassShaderRecordBufferKHR)
		{
			return emit_cbuffer_load_shader_record(impl, instruction,
			                                       impl.handle_to_root_member_offset[instruction->getOperand(1)]);
		}

		// Handle min16float where we want FP16 value, but FP32 physical.
		auto *result_component_type = get_composite_element_type(result_type);
		spv::Op value_cast_op = spv::OpNop;
		spv::Id physical_type_id = 0;
		get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op);

		RawVecSize alias_vecsize;
		RawWidth alias_width;
		unsigned scalar_alignment = get_type_scalar_alignment(impl, result_component_type);
		unsigned bits, vecsize;

		if (scalar_alignment == 8)
		{
			alias_width = RawWidth::B64;
			alias_vecsize = RawVecSize::V2;
		}
		else
		{
			alias_width = RawWidth::B32;
			alias_vecsize = RawVecSize::V4;
		}

		bits = raw_width_to_bits(alias_width);
		vecsize = raw_vecsize_to_vecsize(alias_vecsize);

		RawType raw_type = result_component_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
		                   scalar_alignment == 8 ? RawType::Integer : RawType::Float;

		ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, raw_type, alias_width, alias_vecsize);

		spv::Id vec4_index = impl.get_id_for_value(instruction->getOperand(2));

		spv::Id element_type_id = raw_type == RawType::Integer ?
		                          builder.makeUintType(bits) : builder.makeFloatType(bits);

		spv::Id vector_type_id = builder.makeVectorType(element_type_id, vecsize);
		Operation *access_chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(meta.storage, vector_type_id));
		access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), vec4_index });
		impl.add(access_chain_op);

		if (meta.non_uniform)
			builder.addDecoration(access_chain_op->id, spv::DecorationNonUniformEXT);

		bool need_bitcast = false;
		if (result_component_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && scalar_alignment < 8)
			need_bitcast = true;

		Operation *load_op = impl.allocate(spv::OpLoad, instruction, vector_type_id);
		load_op->add_id(access_chain_op->id);
		impl.add(load_op);

		if (scalar_alignment == 2)
		{
			// Special case, need to bitcast and build a struct with 8 elements instead.
			if (!build_bitcast_32x4_to_16x8_composite(impl, instruction, load_op->id))
				return false;
		}
		else if (need_bitcast)
		{
			spv::Id uint_vector_type_id = builder.makeVectorType(builder.makeUintType(bits), vecsize);
			Operation *op = impl.allocate(spv::OpBitcast, uint_vector_type_id);

			op->add_id(load_op->id);
			impl.add(op);
			impl.rewrite_value(instruction, op->id);
		}

		// If we have min-precision loads, we might have to truncate here.
		if (value_cast_op != spv::OpNop)
		{
			auto *cast_op = impl.allocate(value_cast_op, builder.makeVectorType(impl.get_type_id(result_component_type), vecsize));
			cast_op->add_id(impl.get_id_for_value(instruction));
			impl.add(cast_op);
			impl.rewrite_value(instruction, cast_op->id);
		}
	}

	return true;
}

bool resource_handle_is_uniform_readonly_descriptor(Converter::Impl &impl, const llvm::Value *value)
{
	spv::Id ptr_id = impl.get_id_for_value(value);
	if (!ptr_id)
		return false;

	auto &meta = impl.handle_to_resource_meta[ptr_id];

	// Root constants must be wave uniform unless waves are merged across different state calls which is ridiculous.
	// Anything loaded from record buffer is probably not wave uniform;
	// waves are merged arbitrarily, so we cannot assume anything.
	if (ptr_id == impl.root_constant_id)
		return true;

	// Only consider an untyped load to be uniform.
	// There are reasonable use cases for scalaizing a typed load,
	// since those tend to never go through scalar caches.
	if (meta.storage != spv::StorageClassPhysicalStorageBuffer &&
	    meta.storage != spv::StorageClassStorageBuffer &&
	    meta.storage != spv::StorageClassUniform)
	{
		return false;
	}

	auto *instruction = llvm::cast<llvm::CallInst>(value);
	Converter::Impl::ResourceReference *reference = nullptr;
	DXIL::ResourceType resource_type = {};

	if (value_is_dx_op_instrinsic(value, DXIL::Op::CreateHandle))
	{
		uint32_t resource_type_operand, resource_range;
		if (!get_constant_operand(instruction, 1, &resource_type_operand))
			return false;
		if (!get_constant_operand(instruction, 2, &resource_range))
			return false;

		resource_type = static_cast<DXIL::ResourceType>(resource_type_operand);
		reference = &get_resource_reference(impl, resource_type, instruction, resource_range);
	}
	else if (value_is_dx_op_instrinsic(value, DXIL::Op::CreateHandleForLib))
	{
		auto itr = impl.llvm_global_variable_to_resource_mapping.find(instruction->getOperand(1));
		if (itr == impl.llvm_global_variable_to_resource_mapping.end())
			return false;
		resource_type = itr->second.type;
		reference = &get_resource_reference(impl, itr->second.type, instruction, itr->second.meta_index);
	}
	else if (value_is_dx_op_instrinsic(value, DXIL::Op::AnnotateHandle))
	{
		AnnotateHandleMeta annotate_meta = {};
		if (!get_annotate_handle_meta(impl, instruction, annotate_meta))
			return false;
		resource_type = annotate_meta.resource_type;
		reference = &get_resource_reference(impl, annotate_meta.resource_type, instruction, annotate_meta.binding_index);
	}

	if (resource_type != DXIL::ResourceType::SRV && resource_type != DXIL::ResourceType::CBV)
		return false;

	if (!reference)
		return false;

	// Anything referencing local root signature is questionable to assume anything about.
	if (reference->local_root_signature_entry >= 0)
		return false;

	// For any array, be afraid. Even if the index is dynamically uniform, it might become not wave uniform
	// due to multi-draw shenanigans. A constant index would work, but that's going a bit too far for
	// a simple peephole ...
	if (reference->base_resource_is_array)
		return false;

	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_resources.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "GLSL.std.450.h"
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_load_input_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool spirv_semantics);

template <bool spirv_semantics>
static bool emit_load_input_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_load_input_instruction(impl, instruction, spirv_semantics);
}

bool emit_interpolate_instruction(GLSLstd450 opcode, Converter::Impl &impl, const llvm::CallInst *instruction, bool extended);
bool emit_store_output_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_create_handle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_create_handle_for_lib_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_create_handle_from_heap_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_create_handle_from_binding_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_annotate_handle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_cbuffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_gep_as_cbuffer_scalar_offset(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction,
                                       const llvm::Value *cbv_handle, uint32_t scalar_index_offset, uint32_t stride);
bool cbuffer_supports_gep_punchthrough(Converter::Impl &impl, const llvm::Value *cbv_handle);
bool emit_load_draw_parameter_instruction(spv::BuiltIn builtin, Converter::Impl &impl, const llvm::CallInst *instruction);

template <GLSLstd450 opcode, bool extended>
static inline bool emit_interpolate_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_interpolate_instruction(opcode, impl, instruction, extended);
}

template <spv::BuiltIn builtin>
static inline bool emit_load_draw_parameter_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_load_draw_parameter_instruction(builtin, impl, instruction);
}

bool emit_load_control_point_count_in(Converter::Impl &impl, const llvm::CallInst *instruction);

struct AnnotateHandleMeta
{
	DXIL::Op resource_op;
	DXIL::ResourceType resource_type;
	llvm::Value *offset;
	uint32_t binding_index;
	bool non_uniform;
};
bool get_annotate_handle_meta(Converter::Impl &impl, const llvm::CallInst *instruction, AnnotateHandleMeta &meta);

bool resource_handle_is_uniform_readonly_descriptor(Converter::Impl &impl, const llvm::Value *value);

bool type_is_composite_return_value(llvm::Type *type);
llvm::Type *get_composite_element_type(llvm::Type *type);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_sampling.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_sampling.hpp"
#include "dxil_common.hpp"
#include "dxil_resources.hpp"
#include "spirv_module.hpp"
#include "logging.hpp"
#include "opcodes/converter_impl.hpp"
#include "dxil_ags.hpp"

namespace dxil_spv
{
bool get_image_dimensions_query_size(Converter::Impl &impl, spv::Builder &builder, spv::Id image_id,
                                     uint32_t *num_coords)
{
	spv::Id image_type_id = impl.get_type_id(image_id);
	spv::Dim dim = builder.getTypeDimensionality(image_type_id);
	bool is_array = builder.isArrayedImageType(image_type_id);

	switch (dim)
	{
	case spv::DimSubpassData:
		*num_coords = 0;
		break;

	case spv::Dim1D:
	case spv::DimBuffer:
		*num_coords = 1;
		break;

	case spv::Dim2D:
	case spv::DimCube:
		*num_coords = 2;
		break;

	case spv::Dim3D:
		*num_coords = 3;
		break;

	default:
		LOGE("Unexpected sample dimensionality.\n");
		return false;
	}

	if (is_array)
		(*num_coords)++;

	return true;
}

bool get_image_dimensions(Converter::Impl &impl, spv::Id image_id, uint32_t *num_coords, uint32_t *num_dimensions)
{
	auto &builder = impl.builder();
	spv::Id image_type_id = impl.get_type_id(image_id);
	spv::Dim dim = builder.getTypeDimensionality(image_type_id);
	bool arrayed = builder.isArrayedImageType(image_type_id);

	switch (dim)
	{
	case spv::DimSubpassData:
		*num_coords = 0;
		break;

	case spv::Dim1D:
	case spv::DimBuffer:
		*num_dimensions = 1;
		break;

	case spv::Dim2D:
		*num_dimensions = 2;
		break;

	case spv::Dim3D:
	case spv::DimCube:
		*num_dimensions = 3;
		break;

	default:
		LOGE("Unexpected sample dimensionality.\n");
		return false;
	}

	*num_coords = *num_dimensions + unsigned(arrayed);
	return true;
}

static spv::Id build_texel_offset_vector(Converter::Impl &impl,
                                         const spv::Id *offsets, unsigned num_coords,
                                         uint32_t image_ops, bool is_gather_instruction)
{
	auto &builder = impl.builder();

	spv::Id int_type = builder.makeIntegerType(32, true);
	spv::Id offset_vec;

	if (image_ops & spv::ImageOperandsConstOffsetMask)
		offset_vec = impl.build_constant_vector(int_type, offsets, num_coords);
	else
		offset_vec = impl.build_vector(int_type, offsets, num_coords);

	// Gather has a larger range for offsets,
	// and we shouldn't try to mask anything.
	if ((image_ops & spv::ImageOperandsOffsetMask) && !is_gather_instruction)
	{
		// SM 6.7 requires signed int wrap for the dynamic offset within range.
		spv::Id ivec_type = impl.build_vector_type(int_type, num_coords);
		auto *bfe = impl.allocate(spv::OpBitFieldSExtract, ivec_type);
		bfe->add_id(offset_vec);
		bfe->add_id(builder.makeIntConstant(0));
		bfe->add_id(builder.makeIntConstant(4));
		impl.add(bfe);
		offset_vec = bfe->id;
	}

	return offset_vec;
}

static bool get_texel_offsets(Converter::Impl &impl, const llvm::CallInst *instruction, uint32_t &image_flags,
                              unsigned base_operand, unsigned num_coords, spv::Id *offsets,
                              bool instruction_is_gather)
{
	auto &builder = impl.builder();

	bool is_const_offset = true;
	bool has_non_zero_offset = false;

	for (unsigned i = 0; i < num_coords; i++)
	{
		// Treat undefined offset as 0, since we can.
		if (!llvm::isa<llvm::UndefValue>(instruction->getOperand(base_operand + i)))
		{
			auto *constant_arg = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(base_operand + i));
			if (constant_arg)
			{
				if (constant_arg->getUniqueInteger().getSExtValue() != 0)
					has_non_zero_offset = true;
			}
			else
			{
				builder.addCapability(spv::CapabilityImageGatherExtended);
				is_const_offset = false;
				has_non_zero_offset = true;
				break;
			}
		}
	}

	// Don't bother emitting offset if they are all 0.
	if (!has_non_zero_offset)
		return true;

	if (is_const_offset)
		image_flags |= spv::ImageOperandsConstOffsetMask;
	else
		image_flags |= spv::ImageOperandsOffsetMask;

	for (unsigned i = 0; i < num_coords; i++)
	{
		if (!llvm::isa<llvm::UndefValue>(instruction->getOperand(base_operand + i)))
		{
			auto operand = instruction->getOperand(base_operand + i);
			if (is_const_offset)
			{
				auto *constant_arg = llvm::dyn_cast<llvm::ConstantInt>(operand);
				offsets[i] = builder.makeIntConstant(int(constant_arg->getUniqueInteger().getSExtValue()));
			}
			else
			{
				// Makes sure when we build the array, it's the correct element type.
				auto *cast_op = impl.allocate(spv::OpBitcast, builder.makeIntegerType(32, true));
				impl.add(cast_op);
				cast_op->add_id(impl.get_id_for_value(operand));
				offsets[i] = cast_op->id;
			}
		}
		else
			offsets[i] = builder.makeIntConstant(0);
	}

	return true;
}

static void build_gradient(Converter::Impl &impl, const spv::Id *coord, unsigned num_coords,
                           spv::Id &grad_x, spv::Id &grad_y, spv::Id bias_id, uint32_t &ops)
{
	auto &builder = impl.builder();
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id vec_type = builder.makeVectorType(f32_type, num_coords);
	spv::Id coord_vec = impl.build_vector(builder.makeFloatType(32), coord, num_coords);
	builder.addCapability(spv::CapabilityGroupNonUniformQuad);

	auto *shuf_x = impl.allocate(spv::OpGroupNonUniformQuadSwap, vec_type);
	shuf_x->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	shuf_x->add_id(coord_vec);
	shuf_x->add_id(builder.makeUintConstant(0));

	auto *shuf_y = impl.allocate(spv::OpGroupNonUniformQuadSwap, vec_type);
	shuf_y->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	shuf_y->add_id(coord_vec);
	shuf_y->add_id(builder.makeUintConstant(1));

	// Sign does not matter here. LOD computation takes absolute values.
	auto *horiz_sub = impl.allocate(spv::OpFSub, vec_type);
	auto *vert_sub = impl.allocate(spv::OpFSub, vec_type);
	horiz_sub->add_id(coord_vec);
	horiz_sub->add_id(shuf_x->id);
	vert_sub->add_id(coord_vec);
	vert_sub->add_id(shuf_y->id);

	impl.add(shuf_x);
	impl.add(shuf_y);
	impl.add(horiz_sub);
	impl.add(vert_sub);

	grad_x = horiz_sub->id;
	grad_y = vert_sub->id;

	if (ops & spv::ImageOperandsBiasMask)
	{
		ops &= ~spv::ImageOperandsBiasMask;

		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		auto *exp2 = impl.allocate(spv::OpExtInst, f32_type);
		exp2->add_id(impl.glsl_std450_ext);
		exp2->add_literal(GLSLstd450Exp2);
		exp2->add_id(bias_id);
		impl.add(exp2);

		auto *scale_x = impl.allocate(spv::OpVectorTimesScalar, vec_type);
		auto *scale_y = impl.allocate(spv::OpVectorTimesScalar, vec_type);
		scale_x->add_id(grad_x);
		scale_x->add_id(exp2->id);
		scale_y->add_id(grad_y);
		scale_y->add_id(exp2->id);

		impl.add(scale_x);
		impl.add(scale_y);
		grad_x = scale_x->id;
		grad_y = scale_y->id;
	}

	ops |= spv::ImageOperandsGradMask;
}

static spv::Id emit_workaround_clamp_cube_mips(Converter::Impl &impl, spv::Id image_id, spv::Id bias_level_argument)
{
	auto &builder = impl.builder();

	if (builder.getTypeDimensionality(impl.get_type_id(image_id)) != spv::DimCube)
		return bias_level_argument;

	// Very specific workaround where the game is only computing mips down to
	// 8x8 despite having all mips in the chain. Sigh ...
	auto *lods = impl.allocate(spv::OpImageQueryLevels, builder.makeIntType(32));
	lods->add_id(image_id);
	impl.add(lods);

	auto *sub4 = impl.allocate(spv::OpISub, builder.makeIntType(32));
	sub4->add_id(lods->id);
	sub4->add_id(builder.makeUintConstant(4));
	impl.add(sub4);

	auto *conv = impl.allocate(spv::OpConvertSToF, builder.makeFloatType(32));
	conv->add_id(sub4->id);
	impl.add(conv);

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	auto *min_op = impl.allocate(spv::OpExtInst, builder.makeFloatType(32));
	min_op->add_id(impl.glsl_std450_ext);
	min_op->add_literal(GLSLstd450FMin);
	min_op->add_id(conv->id);
	min_op->add_id(bias_level_argument);
	impl.add(min_op);

	return min_op->id;
}

bool emit_sample_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	bool comparison_sampling = opcode == DXIL::Op::SampleCmp ||
	                           opcode == DXIL::Op::SampleCmpLevelZero ||
	                           opcode == DXIL::Op::SampleCmpLevel ||
	                           opcode == DXIL::Op::SampleCmpBias;

	bool force_explicit_lod = impl.execution_mode_meta.synthesize_dummy_derivatives;

	// Elide dead loads.
	if (!comparison_sampling && !impl.composite_is_accessed(instruction))
		return true;

	auto &builder = impl.builder();

	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id sampler_id = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id combined_image_sampler_id = impl.build_sampled_image(image_id, sampler_id, comparison_sampling);
	const auto &meta = impl.handle_to_resource_meta[image_id];

	unsigned num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	if (num_coords == 0)
	{
		LOGE("Cannot use sampling instructions with input attachment.\n");
		return false;
	}

	spv::Id coord[4] = {};
	for (unsigned i = 0; i < num_coords_full; i++)
		coord[i] = impl.get_id_for_value(instruction->getOperand(i + 3));

	uint32_t image_ops = 0;

	if (force_explicit_lod)
	{
		if (opcode == DXIL::Op::Sample || opcode == DXIL::Op::SampleBias)
			opcode = DXIL::Op::SampleLevel;
		else if (opcode == DXIL::Op::SampleCmp || opcode == DXIL::Op::SampleCmpBias)
			opcode = DXIL::Op::SampleCmpLevelZero;
		else
			force_explicit_lod = false;
	}

	if (opcode == DXIL::Op::Sample || opcode == DXIL::Op::SampleBias ||
	    opcode == DXIL::Op::SampleCmpBias || opcode == DXIL::Op::SampleCmp)
	{
		emit_expect_assume_quad_uniform(impl);
	}

	if (opcode == DXIL::Op::SampleLevel || opcode == DXIL::Op::SampleCmpLevelZero || opcode == DXIL::Op::SampleCmpLevel)
		image_ops |= spv::ImageOperandsLodMask;
	else if (opcode == DXIL::Op::SampleBias || opcode == DXIL::Op::SampleCmpBias)
		image_ops |= spv::ImageOperandsBiasMask;

	spv::Id offsets[3] = {};
	if (!get_texel_offsets(impl, instruction, image_ops, 7, num_coords, offsets, false))
		return false;

	spv::Id dref_id = 0;

	if (comparison_sampling)
		dref_id = impl.get_id_for_value(instruction->getOperand(10));

	spv::Id bias_level_argument = 0;
	spv::Id min_lod_argument = 0;
	unsigned bias_level_argument_index = comparison_sampling ? 11 : 10;
	unsigned min_lod_argument_index = 11;

	if (opcode == DXIL::Op::Sample)
		min_lod_argument_index = 10;
	else if (opcode == DXIL::Op::SampleCmpBias)
		min_lod_argument_index = 12;

	auto &access_meta = impl.llvm_composite_meta[instruction];
	bool sparse = (access_meta.access_mask & (1u << 4)) != 0;
	if (sparse)
		builder.addCapability(spv::CapabilitySparseResidency);

	if (opcode == DXIL::Op::Sample || opcode == DXIL::Op::SampleCmp || opcode == DXIL::Op::SampleBias || opcode == DXIL::Op::SampleCmpBias)
	{
		auto min_lod_argument_value = instruction->getOperand(min_lod_argument_index);
		if (llvm::isa<llvm::UndefValue>(min_lod_argument_value))
		{
			// No value set
		}
		else if (llvm::isa<llvm::ConstantFP>(min_lod_argument_value) && llvm::cast<llvm::ConstantFP>(min_lod_argument_value)->getValueAPF().convertToFloat() == 0)
		{
			// LOD values less than 0 are invalid, so this is a no-op.
		}
		else
		{
			min_lod_argument = impl.get_id_for_value(min_lod_argument_value);
			image_ops |= spv::ImageOperandsMinLodMask;
			builder.addCapability(spv::CapabilityMinLod);
		}
	}

	if (force_explicit_lod)
		bias_level_argument = builder.makeFloatConstant(0.0f);
	else if (opcode == DXIL::Op::SampleBias || opcode == DXIL::Op::SampleCmpBias || opcode == DXIL::Op::SampleLevel || opcode == DXIL::Op::SampleCmpLevel)
		bias_level_argument = impl.get_id_for_value(instruction->getOperand(bias_level_argument_index));
	else
		bias_level_argument = builder.makeFloatConstant(0.0f);

	if (!force_explicit_lod && impl.options.quirks.assume_broken_sub_8x8_cube_mips && opcode == DXIL::Op::SampleLevel)
		bias_level_argument = emit_workaround_clamp_cube_mips(impl, image_id, bias_level_argument);

	spv::Op spv_op;

	switch (opcode)
	{
	case DXIL::Op::SampleLevel:
		spv_op = sparse ? spv::OpImageSparseSampleExplicitLod : spv::OpImageSampleExplicitLod;
		break;

	case DXIL::Op::Sample:
	case DXIL::Op::SampleBias:
		spv_op = sparse ? spv::OpImageSparseSampleImplicitLod : spv::OpImageSampleImplicitLod;
		break;

	case DXIL::Op::SampleCmp:
	case DXIL::Op::SampleCmpBias:
		spv_op = sparse ? spv::OpImageSparseSampleDrefImplicitLod : spv::OpImageSampleDrefImplicitLod;
		break;

	case DXIL::Op::SampleCmpLevel:
	case DXIL::Op::SampleCmpLevelZero:
		spv_op = sparse ? spv::OpImageSparseSampleDrefExplicitLod : spv::OpImageSampleDrefExplicitLod;
		break;

	default:
		return false;
	}

	auto effective_component_type = Converter::Impl::get_effective_typed_resource_type(meta.component_type);
	spv::Id texel_type = impl.get_type_id(effective_component_type, 1, comparison_sampling ? 1 : 4);
	spv::Id grad_x = 0, grad_y = 0;
	spv::Id sample_type;

	if (sparse)
		sample_type = impl.get_struct_type({ builder.makeUintType(32), texel_type }, 0, "SparseTexel");
	else
		sample_type = texel_type;

	bool grad_rewrite = impl.execution_model != spv::ExecutionModelFragment &&
	                    !impl.options.compute_shader_derivatives;

	if (grad_rewrite)
	{
		switch (spv_op)
		{
		case spv::OpImageSampleImplicitLod:
			spv_op = spv::OpImageSampleExplicitLod;
			break;

		case spv::OpImageSparseSampleImplicitLod:
			spv_op = spv::OpImageSparseSampleExplicitLod;
			break;

		case spv::OpImageSampleDrefImplicitLod:
			spv_op = spv::OpImageSampleDrefExplicitLod;
			break;

		case spv::OpImageSparseSampleDrefImplicitLod:
			spv_op = spv::OpImageSparseSampleDrefExplicitLod;
			break;

		default:
			grad_rewrite = false;
			break;
		}
	}

	if (grad_rewrite)
		build_gradient(impl, coord, num_coords, grad_x, grad_y, bias_level_argument, image_ops);

	// Comparison sampling only returns a scalar, so we'll need to splat out result.
	Operation *op = impl.allocate(spv_op, instruction, sample_type);

	if (!sparse)
		impl.decorate_relaxed_precision(get_composite_element_type(instruction->getType()), op->id, true);

	op->add_id(combined_image_sampler_id);
	op->add_id(impl.build_vector(builder.makeFloatType(32), coord, num_coords_full));

	if (dref_id)
		op->add_id(dref_id);

	op->add_literal(image_ops);

	if (image_ops & (spv::ImageOperandsBiasMask | spv::ImageOperandsLodMask))
		op->add_id(bias_level_argument);

	if (image_ops & spv::ImageOperandsGradMask)
	{
		op->add_id(grad_x);
		op->add_id(grad_y);
	}

	if (image_ops & (spv::ImageOperandsConstOffsetMask | spv::ImageOperandsOffsetMask))
	{
		spv::Id offset_vec = build_texel_offset_vector(impl, offsets, num_coords, image_ops, false);
		op->add_id(offset_vec);
	}

	if (image_ops & spv::ImageOperandsMinLodMask)
		op->add_id(min_lod_argument);

	impl.add(op);

	auto *target_type = get_composite_element_type(instruction->getType());

	if (sparse)
	{
		// Repack return arguments from { i32, Tx4 } into { T, T, T, T, i32 } which DXIL expects.
		impl.repack_sparse_feedback(meta.component_type, comparison_sampling ? 1 : 4, instruction, target_type);
	}
	else if (comparison_sampling)
	{
		spv::Id loaded_id = op->id;
		auto tmp = meta.component_type;
		impl.fixup_load_type_typed(tmp, 1, loaded_id, target_type);
		Operation *splat_op =
			impl.allocate(spv::OpCompositeConstruct, builder.makeVectorType(impl.get_type_id(target_type), 4));
		splat_op->add_ids({ loaded_id, loaded_id, loaded_id, loaded_id });
		impl.add(splat_op);
		impl.rewrite_value(instruction, splat_op->id);
	}
	else
	{
		impl.fixup_load_type_typed(meta.component_type, 4, instruction, target_type);
	}

	return true;
}

static bool binary_op_is_multiple_of_derivative(const llvm::Value *grad_value, const llvm::Value *coord_value,
                                                DXIL::Op candidate_coarse_op, DXIL::Op candidate_fine_op,
                                                const llvm::Value *&multiple)
{
	const auto *bin_op = llvm::dyn_cast<llvm::BinaryOperator>(grad_value);
	if (!bin_op)
		return false;

	if (bin_op->getOpcode() != llvm::BinaryOperator::BinaryOps::FMul)
		return false;

	// Play fast and loose if we can :3
	if (!bin_op->isFast())
		return false;

	auto *a = bin_op->getOperand(0);
	auto *b = bin_op->getOperand(1);

	if ((value_is_dx_op_instrinsic(a, candidate_coarse_op) ||
	     value_is_dx_op_instrinsic(a, candidate_fine_op)))
	{
		if (llvm::cast<llvm::CallInst>(a)->getOperand(1) == coord_value)
		{
			multiple = b;
			return true;
		}
	}
	else if ((value_is_dx_op_instrinsic(b, candidate_coarse_op) ||
	          value_is_dx_op_instrinsic(b, candidate_fine_op)))
	{
		if (llvm::cast<llvm::CallInst>(b)->getOperand(1) == coord_value)
		{
			multiple = a;
			return true;
		}
	}

	return false;
}

static spv::Id sample_grad_is_lod_bias(Converter::Impl &impl, const llvm::CallInst *instruction, unsigned num_coords,
                                       unsigned grad_argument_index)
{
	if (!impl.current_block)
		return 0;

	if (!impl.options.grad_opt.enabled)
		return 0;

	const llvm::Value *mult_grad_x[3] = {};
	const llvm::Value *mult_grad_y[3] = {};

	for (unsigned i = 0; i < num_coords; i++)
	{
		auto *coord = instruction->getOperand(i + 3);

		// Don't bother trying to fiddle with constant expressions.
		// All derivatives are expected to constant-fold.
		if (llvm::isa<llvm::Constant>(coord))
			return 0;

		auto *grad_x = instruction->getOperand(i + grad_argument_index);
		auto *grad_y = instruction->getOperand(i + grad_argument_index + 3);

		// Gradients must have been computed in this block,
		// otherwise we might introduce bugs with helper lanes being deactivated,
		// since this that scenario is the primary use case for grad in the first place ...
		spv::Id grad_x_id = impl.get_id_for_value(grad_x);
		spv::Id grad_y_id = impl.get_id_for_value(grad_y);
		if (!grad_x_id || !grad_y_id)
			return 0;

		for (auto &op : *impl.current_block)
		{
			if (op->id == grad_x_id)
				grad_x_id = 0;
			if (op->id == grad_y_id)
				grad_y_id = 0;
		}

		if (grad_x_id || grad_y_id)
			return 0;

		if (!binary_op_is_multiple_of_derivative(grad_x, coord, DXIL::Op::DerivCoarseX, DXIL::Op::DerivFineX, mult_grad_x[i]) ||
		    !binary_op_is_multiple_of_derivative(grad_y, coord, DXIL::Op::DerivCoarseY, DXIL::Op::DerivFineY, mult_grad_y[i]))
			return 0;
	}

	for (unsigned i = 0; i < num_coords; i++)
	{
		if (!mult_grad_x[i] || !mult_grad_y[i])
			return 0;

		if (i > 0)
		{
			if (mult_grad_x[i] != mult_grad_x[0])
				return 0;
			if (mult_grad_y[i] != mult_grad_y[0])
				return 0;
		}
	}

	// We can only apply uniform scale with textureBias, unless we have app-opt.
	if (mult_grad_x[0] != mult_grad_y[0] && !impl.options.grad_opt.assume_uniform_scale)
		return 0;

	auto &builder = impl.builder();
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	spv::Id min_value_id;

	spv::Id fp32_type = builder.makeFloatType(32);

	Operation *abs_x_op = impl.allocate(spv::OpExtInst, fp32_type);
	abs_x_op->add_id(impl.glsl_std450_ext);
	abs_x_op->add_literal(GLSLstd450FAbs);
	abs_x_op->add_id(impl.get_id_for_value(mult_grad_x[0]));
	impl.add(abs_x_op);

	if (mult_grad_x[0] != mult_grad_y[0])
	{
		Operation *abs_y_op = impl.allocate(spv::OpExtInst, fp32_type);
		abs_y_op->add_id(impl.glsl_std450_ext);
		abs_y_op->add_literal(GLSLstd450FAbs);
		abs_y_op->add_id(impl.get_id_for_value(mult_grad_y[0]));
		impl.add(abs_y_op);

		Operation *min_op = impl.allocate(spv::OpExtInst, fp32_type);
		min_op->add_id(impl.glsl_std450_ext);
		min_op->add_literal(GLSLstd450FMin);
		min_op->add_id(abs_x_op->id);
		min_op->add_id(abs_y_op->id);

		impl.add(min_op);
		min_value_id = min_op->id;
	}
	else
	{
		min_value_id = abs_x_op->id;
	}

	Operation *op = impl.allocate(spv::OpExtInst, fp32_type);
	op->add_id(impl.glsl_std450_ext);
	op->add_literal(GLSLstd450Log2);
	op->add_id(min_value_id);
	impl.add(op);

	return op->id;
}

bool emit_sample_grad_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	bool comparison_sampling = opcode == DXIL::Op::SampleCmpGrad;

	// Elide dead loads.
	if (!comparison_sampling && !impl.composite_is_accessed(instruction))
		return true;

	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id sampler_id = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id combined_image_sampler_id = impl.build_sampled_image(image_id, sampler_id, comparison_sampling);
	const auto &meta = impl.handle_to_resource_meta[image_id];

	unsigned num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	if (num_coords == 0)
	{
		LOGE("Cannot use sampling instructions with input attachment.\n");
		return false;
	}

	uint32_t image_ops = spv::ImageOperandsGradMask;

	spv::Id coord[4] = {};
	for (unsigned i = 0; i < num_coords_full; i++)
		coord[i] = impl.get_id_for_value(instruction->getOperand(i + 3));

	spv::Id offsets[3] = {};
	if (!get_texel_offsets(impl, instruction, image_ops, 7, num_coords, offsets, false))
		return false;

	unsigned grad_argument_index = comparison_sampling ? 11 : 10;
	spv::Id bias_id = sample_grad_is_lod_bias(impl, instruction, num_coords, grad_argument_index);

	spv::Id grad_x[3] = {};
	spv::Id grad_y[3] = {};

	if (bias_id)
	{
		image_ops = spv::ImageOperandsBiasMask;
	}
	else
	{
		for (unsigned i = 0; i < num_coords; i++)
			grad_x[i] = impl.get_id_for_value(instruction->getOperand(i + grad_argument_index));
		for (unsigned i = 0; i < num_coords; i++)
			grad_y[i] = impl.get_id_for_value(instruction->getOperand(i + grad_argument_index + 3));
	}

	unsigned min_lod_argument_index = comparison_sampling ? 17 : 16;
	spv::Id aux_argument = 0;
	auto min_lod_argument_value = instruction->getOperand(min_lod_argument_index);
	if (llvm::isa<llvm::UndefValue>(min_lod_argument_value))
	{
		// No value set
	}
	else if (llvm::isa<llvm::ConstantFP>(min_lod_argument_value) && llvm::cast<llvm::ConstantFP>(min_lod_argument_value)->getValueAPF().convertToFloat() == 0)
	{
		// LOD values less than 0 are invalid, so this is a no-op.
	}
	else
	{
		aux_argument = impl.get_id_for_value(min_lod_argument_value);
		image_ops |= spv::ImageOperandsMinLodMask;
		builder.addCapability(spv::CapabilityMinLod);
	}

	auto &access_meta = impl.llvm_composite_meta[instruction];
	bool sparse = (access_meta.access_mask & (1u << 4)) != 0;
	if (sparse)
		builder.addCapability(spv::CapabilitySparseResidency);

	auto effective_component_type = Converter::Impl::get_effective_typed_resource_type(meta.component_type);
	spv::Id texel_type = impl.get_type_id(effective_component_type, 1, comparison_sampling ? 1 : 4);
	spv::Id sample_type;

	if (sparse)
		sample_type = impl.get_struct_type({ builder.makeUintType(32), texel_type }, 0, "SparseTexel");
	else
		sample_type = texel_type;

	spv::Id dref_id = 0;
	spv::Op spv_op;
	if (comparison_sampling)
	{
		if (bias_id)
			spv_op = sparse ? spv::OpImageSparseSampleDrefImplicitLod : spv::OpImageSampleDrefImplicitLod;
		else
			spv_op = sparse ? spv::OpImageSparseSampleDrefExplicitLod : spv::OpImageSampleDrefExplicitLod;

		dref_id = impl.get_id_for_value(instruction->getOperand(10));
	}
	else
	{
		if (bias_id)
			spv_op = sparse ? spv::OpImageSparseSampleImplicitLod : spv::OpImageSampleImplicitLod;
		else
			spv_op = sparse ? spv::OpImageSparseSampleExplicitLod : spv::OpImageSampleExplicitLod;
	}

	Operation *op = impl.allocate(spv_op, instruction, sample_type);

	if (!sparse)
		impl.decorate_relaxed_precision(get_composite_element_type(instruction->getType()), op->id, true);

	op->add_id(combined_image_sampler_id);
	op->add_id(impl.build_vector(builder.makeFloatType(32), coord, num_coords_full));

	if (dref_id)
		op->add_id(dref_id);

	op->add_literal(image_ops);

	if (image_ops & spv::ImageOperandsGradMask)
	{
		op->add_id(impl.build_vector(builder.makeFloatType(32), grad_x, num_coords));
		op->add_id(impl.build_vector(builder.makeFloatType(32), grad_y, num_coords));
	}
	else if (image_ops & spv::ImageOperandsBiasMask)
		op->add_id(bias_id);

	if (image_ops & (spv::ImageOperandsConstOffsetMask | spv::ImageOperandsOffsetMask))
		op->add_id(build_texel_offset_vector(impl, offsets, num_coords, image_ops, false));

	if (image_ops & spv::ImageOperandsMinLodMask)
		op->add_id(aux_argument);

	impl.add(op);

	auto *target_type = get_composite_element_type(instruction->getType());

	if (sparse)
		impl.repack_sparse_feedback(meta.component_type, comparison_sampling ? 1 : 4, instruction, target_type);
	else if (comparison_sampling)
	{
		spv::Id loaded_id = op->id;
		auto tmp = meta.component_type;
		impl.fixup_load_type_typed(tmp, 1, loaded_id, target_type);
		Operation *splat_op =
			impl.allocate(spv::OpCompositeConstruct, builder.makeVectorType(impl.get_type_id(target_type), 4));
		splat_op->add_ids({ loaded_id, loaded_id, loaded_id, loaded_id });
		impl.add(splat_op);
		impl.rewrite_value(instruction, splat_op->id);
	}
	else
	{
		impl.fixup_load_type_typed(meta.component_type, 4, instruction, target_type);
	}

	return true;
}

bool emit_texture_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// Elide dead loads.
	if (!impl.composite_is_accessed(instruction))
		return true;

	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id image_type_id = impl.get_type_id(image_id);
	const auto &meta = impl.handle_to_resource_meta[image_id];

	bool is_uav = builder.isStorageImageType(image_type_id);
	uint32_t image_ops = 0;

	spv::Id mip_or_sample = 0;
	if (!llvm::isa<llvm::UndefValue>(instruction->getOperand(2)))
	{
		mip_or_sample = impl.get_id_for_value(instruction->getOperand(2));
		if (builder.isMultisampledImageType(image_type_id))
			image_ops |= spv::ImageOperandsSampleMask;
		else
			image_ops |= spv::ImageOperandsLodMask;
	}

	spv::Id coord[3] = {};

	unsigned num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	// Cubes are not supported here.
	if (num_coords_full > 3)
		return false;

	for (unsigned i = 0; i < num_coords_full; i++)
		coord[i] = impl.get_id_for_value(instruction->getOperand(i + 3));

	spv::Id offsets[4] = {};
	if (num_coords != 0 && !get_texel_offsets(impl, instruction, image_ops, 6, num_coords, offsets, false))
		return false;

	auto &access_meta = impl.llvm_composite_meta[instruction];
	bool sparse = (access_meta.access_mask & (1u << 4)) != 0;
	if (sparse)
	{
		if (num_coords == 0)
		{
			LOGE("InputAttachment does not support sparse residency query.\n");
			return false;
		}
		builder.addCapability(spv::CapabilitySparseResidency);
	}

	auto effective_component_type = Converter::Impl::get_effective_typed_resource_type(meta.component_type);
	spv::Id texel_type = impl.get_type_id(effective_component_type, 1, 4);
	spv::Id sample_type;

	if (sparse)
		sample_type = impl.get_struct_type({ builder.makeUintType(32), texel_type }, 0, "SparseTexel");
	else
		sample_type = texel_type;

	spv::Op opcode;
	if (num_coords == 0)
	{
		opcode = spv::OpImageRead;
		image_ops &= ~spv::ImageOperandsLodMask;
	}
	else if (is_uav)
		opcode = sparse ? spv::OpImageSparseRead : spv::OpImageRead;
	else
		opcode = sparse ? spv::OpImageSparseFetch : spv::OpImageFetch;

	Operation *op = impl.allocate(opcode, instruction, sample_type);
	if (!sparse)
		impl.decorate_relaxed_precision(get_composite_element_type(instruction->getType()), op->id, true);

	spv::Id coord_id;
	if (num_coords == 0) // SubpassInput
		coord_id = impl.build_splat_constant_vector(builder.makeUintType(32), builder.makeUintConstant(0), 2);
	else
		coord_id = impl.build_vector(builder.makeUintType(32), coord, num_coords_full);

	if (!is_uav && (image_ops & spv::ImageOperandsOffsetMask))
	{
		// Don't need fancy features for fetch, just do arith.
		for (unsigned i = num_coords; i < num_coords_full; i++)
			offsets[i] = builder.makeIntConstant(0);
		auto *add_op = impl.allocate(spv::OpIAdd, impl.build_vector_type(builder.makeIntType(32), num_coords_full));
		add_op->add_id(coord_id);
		add_op->add_id(build_texel_offset_vector(impl, offsets, num_coords_full, image_ops, false));
		impl.add(add_op);
		coord_id = add_op->id;
		image_ops &= ~spv::ImageOperandsOffsetMask;
	}

	op->add_ids({ image_id, coord_id });
	op->add_literal(image_ops);

	if (!is_uav)
	{
		if (image_ops & spv::ImageOperandsLodMask)
			op->add_id(mip_or_sample);

		if (image_ops & spv::ImageOperandsConstOffsetMask)
			op->add_id(build_texel_offset_vector(impl, offsets, num_coords, image_ops, false));
	}

	if (image_ops & spv::ImageOperandsSampleMask)
	{
		op->add_id(mip_or_sample);
		if (is_uav)
			impl.builder().addCapability(spv::CapabilityStorageImageMultisample);
	}

	add_vkmm_access_qualifiers(impl, op, meta.vkmm);
	impl.add(op, meta.rov);

	auto *target_type = get_composite_element_type(instruction->getType());

	if (sparse)
		impl.repack_sparse_feedback(meta.component_type, 4, instruction, target_type);
	else
		impl.fixup_load_type_typed(meta.component_type, 4, instruction, target_type);
	return true;
}

bool emit_get_dimensions_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool extended)
{
	if (!impl.composite_is_accessed(instruction))
		return true;

	auto &builder = impl.builder();
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id image_type_id = impl.get_type_id(image_id);
	auto &meta = impl.handle_to_resource_meta[image_id];

	uint32_t ssbo_element_size = 4;
	uint32_t divider = 1;

	if (extended && !get_constant_operand(instruction, 3, &divider))
		return false;

	Operation *levels_or_samples_op = nullptr;
	Operation *dimensions_op = nullptr;
	auto &access_meta = impl.llvm_composite_meta[instruction];
	uint32_t num_coords = 0;
	bool has_samples = false;
	bool has_lod = false;

	if (meta.storage == spv::StorageClassStorageBuffer)
	{
		if (meta.index_offset_id)
		{
			dimensions_op = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
			dimensions_op->add_id(meta.index_offset_id);
			dimensions_op->add_literal(1);
		}
		else
		{
			dimensions_op = impl.allocate(spv::OpArrayLength, builder.makeUintType(32));
			dimensions_op->add_id(image_id);
			dimensions_op->add_literal(0);
		}

		ssbo_element_size = raw_vecsize_to_vecsize(meta.raw_component_vecsize) *
		                    raw_component_type_to_bits(meta.component_type) / 8;

		impl.add(dimensions_op);
		num_coords = 1;
	}
	else if (meta.index_offset_id)
	{
		dimensions_op = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
		dimensions_op->add_id(meta.index_offset_id);
		dimensions_op->add_literal(1);
		impl.add(dimensions_op);
		num_coords = 1;
	}
	else
	{
		if (!get_image_dimensions_query_size(impl, builder, image_id, &num_coords))
			return false;

		bool is_uav = builder.isStorageImageType(image_type_id);
		has_samples = builder.isMultisampledImageType(image_type_id);
		has_lod = !is_uav && builder.getTypeDimensionality(image_type_id) != spv::DimBuffer && !has_samples;

		spv::Id dim_type_id = builder.makeUintType(32);
		if (num_coords > 1)
			dim_type_id = builder.makeVectorType(dim_type_id, num_coords);

		// 22.4.14 resinfo. Querying for mip level out of range should return 0.
		spv::Id level_in_range_id = 0;

		if ((access_meta.access_mask & 7) != 0)
		{
			dimensions_op = impl.allocate(has_lod ? spv::OpImageQuerySizeLod : spv::OpImageQuerySize, dim_type_id);
			dimensions_op->add_id(image_id);
			if (has_lod)
				dimensions_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
			impl.add(dimensions_op);

			if (has_lod)
			{
				auto *level_operand = instruction->getOperand(2);
				if (!llvm::isa<llvm::ConstantInt>(level_operand) ||
				    llvm::cast<llvm::ConstantInt>(level_operand)->getUniqueInteger().getZExtValue() != 0)
				{
					auto *levels = impl.allocate(spv::OpImageQueryLevels, builder.makeUintType(32));
					levels->add_id(image_id);
					impl.add(levels);

					auto *cmp = impl.allocate(spv::OpULessThan, builder.makeBoolType());
					cmp->add_id(impl.get_id_for_value(level_operand));
					cmp->add_id(levels->id);
					impl.add(cmp);

					level_in_range_id = cmp->id;

					if (num_coords > 1)
					{
						// Not needed in SPIR-V 1.4+, but for now we can emit SPIR-V 1.3.
						spv::Id coords_vec[4];
						for (uint32_t i = 0; i < num_coords; i++)
							coords_vec[i] = cmp->id;
						level_in_range_id = impl.build_vector(builder.makeBoolType(), coords_vec, num_coords);
					}

					auto *select = impl.allocate(spv::OpSelect, dim_type_id);
					select->add_id(level_in_range_id);
					select->add_id(dimensions_op->id);
					select->add_id(builder.makeNullConstant(dim_type_id));
					impl.add(select);

					dimensions_op = select;

					// If we also need levels, no need to query twice.
					if ((access_meta.access_mask & (1u << 3)) != 0)
						levels_or_samples_op = levels;
				}
			}
		}
		else
			num_coords = 0;
	}

	if (meta.kind == DXIL::ResourceKind::RawBuffer)
	{
		if (divider != 1 && ssbo_element_size % divider == 0)
		{
			// Fold the mult/div. A compiler cannot do it since IMul may overflow in theory.
			ssbo_element_size /= divider;
			divider = 1;
		}

		if (ssbo_element_size != 1)
		{
			Operation *byte_size_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
			byte_size_op->add_ids({ dimensions_op->id, builder.makeUintConstant(ssbo_element_size) });
			impl.add(byte_size_op);
			dimensions_op = byte_size_op;
		}

		if (divider != 1)
		{
			Operation *elements_op = impl.allocate(spv::OpUDiv, builder.makeUintType(32));
			elements_op->add_ids({ dimensions_op->id, builder.makeUintConstant(divider) });
			impl.add(elements_op);
			dimensions_op = elements_op;
		}
	}
	else if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
	{
		Operation *elem_count_op = impl.allocate(spv::OpUDiv, builder.makeUintType(32));

		if (meta.index_offset_id != 0)
		{
			// If the offset buffer is pre-shifted, shift the divider as well.
			if (!meta.aliased)
				elem_count_op->add_ids({ dimensions_op->id, builder.makeUintConstant(meta.stride / ssbo_element_size) });
			else
				elem_count_op->add_ids({ dimensions_op->id, builder.makeUintConstant(meta.stride) });
		}
		else
			elem_count_op->add_ids({ dimensions_op->id, builder.makeUintConstant(meta.stride / ssbo_element_size) });

		impl.add(elem_count_op);
		dimensions_op = elem_count_op;
	}

	if (!levels_or_samples_op && (access_meta.access_mask & (1u << 3)) != 0)
	{
		if (has_lod)
		{
			levels_or_samples_op = impl.allocate(spv::OpImageQueryLevels, builder.makeUintType(32));
			levels_or_samples_op->add_id(image_id);
			impl.add(levels_or_samples_op);
		}
		else if (has_samples)
		{
			levels_or_samples_op = impl.allocate(spv::OpImageQuerySamples, builder.makeUintType(32));
			levels_or_samples_op->add_id(image_id);
			impl.add(levels_or_samples_op);
		}
	}

	assert(levels_or_samples_op || dimensions_op);

	if (!levels_or_samples_op && dimensions_op)
	{
		if (num_coords == 1)
			access_meta.forced_composite = false;
		impl.rewrite_value(instruction, dimensions_op->id);
	}
	else if (dimensions_op)
	{
		Operation *op =
		    impl.allocate(spv::OpCompositeConstruct, instruction, builder.makeVectorType(builder.makeUintType(32), 4));
		op->add_id(dimensions_op->id);

		// This element cannot be statically accessed if we don't have LOD, so don't bother returning anything here.
		// Otherwise, we need to pad out.
		for (unsigned i = num_coords; i < 3; i++)
			op->add_id(builder.createUndefined(builder.makeUintType(32)));
		op->add_id(levels_or_samples_op->id);

		impl.add(op);
	}
	else
	{
		// Redirect any extract from this scalar to be preserved as-is.
		// Pretend this is a scalar even if Levels/Samples query is supposed to live in W.
		access_meta.forced_composite = false;
		access_meta.access_mask = 1;
		access_meta.components = 1;
		impl.rewrite_value(instruction, levels_or_samples_op->id);
	}

	builder.addCapability(spv::CapabilityImageQuery);
	return true;
}

bool emit_texture_store_instruction_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction, bool multi_sampled)
{
	auto &builder = impl.builder();

	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));

	// Deferred 64-bit atomic. Resolve in a later AGS atomic.
	if (emit_ags_texture_store(impl, instruction, image_id, multi_sampled))
		return true;
	if (emit_nvapi_buffer_store(impl, instruction, image_id))
		return true;

	const auto &meta = impl.handle_to_resource_meta[image_id];
	spv::Id coord[3] = {};

	unsigned num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	// Cubes are not supported here.
	if (num_coords_full > 3)
		return false;

	for (unsigned i = 0; i < num_coords_full; i++)
		coord[i] = impl.get_id_for_value(instruction->getOperand(i + 2));

	spv::Id write_values[4] = {};
	for (unsigned i = 0; i < 4; i++)
	{
		impl.register_externally_visible_write(instruction->getOperand(i + 5));
		write_values[i] = impl.get_id_for_value(instruction->getOperand(i + 5));
	}

	// Ignore write mask. We cannot do anything meaningful about it.
	// The write mask must cover all components in the image, and there is no "sliced write" support for typed resources.

	Operation *op = impl.allocate(spv::OpImageWrite);

	op->add_id(image_id);
	op->add_id(impl.build_vector(builder.makeUintType(32), coord, num_coords_full));

	spv::Id store_id = impl.build_vector(impl.get_type_id(instruction->getOperand(5)->getType()), write_values, 4);
	store_id = impl.fixup_store_type_typed(meta.component_type, 4, store_id);
	op->add_id(store_id);
	builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat);

	if (multi_sampled)
	{
		spv::Id sample_id = impl.get_id_for_value(instruction->getOperand(10));
		op->add_literal(spv::ImageOperandsSampleMask);
		op->add_id(sample_id);
		builder.addCapability(spv::CapabilityStorageImageMultisample);
	}

	add_vkmm_access_qualifiers(impl, op, meta.vkmm);

	impl.add(op, meta.rov);
	return true;
}

bool emit_texture_gather_instruction(bool compare, bool raw, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// Elide dead loads.
	if (!impl.composite_is_accessed(instruction))
		return true;

	auto &builder = impl.builder();

	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id sampler_id = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id combined_image_sampler_id = impl.build_sampled_image(image_id, sampler_id, compare);
	const auto &meta = impl.handle_to_resource_meta[image_id];

	uint32_t num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	if (num_coords == 0)
	{
		LOGE("Cannot use gather instructions with input attachment.\n");
		return false;
	}

	spv::Id coords[4] = {};
	spv::Id offsets[2] = {};
	uint32_t image_flags = 0;

	for (unsigned i = 0; i < num_coords_full; i++)
		coords[i] = impl.get_id_for_value(instruction->getOperand(3 + i));
	spv::Id coord_id = impl.build_vector(builder.makeFloatType(32), coords, num_coords_full);

	if (num_coords == 2 && !get_texel_offsets(impl, instruction, image_flags, 7, num_coords, offsets, true))
		return false;

	spv::Id aux_id;

	if (!raw)
	{
		if (compare)
		{
			// TextureGatherCmp has a component here. Perhaps it is to select depth vs stencil?
			aux_id = impl.get_id_for_value(instruction->getOperand(10));
		}
		else
			aux_id = impl.get_id_for_value(instruction->getOperand(9));
	}
	else
		aux_id = builder.makeUintConstant(0);

	auto &access_meta = impl.llvm_composite_meta[instruction];
	bool sparse = (access_meta.access_mask & (1u << 4)) != 0;
	if (sparse)
		builder.addCapability(spv::CapabilitySparseResidency);

	auto effective_component_type = Converter::Impl::get_effective_typed_resource_type(meta.component_type);
	spv::Id texel_type = impl.get_type_id(effective_component_type, 1, 4);
	spv::Id sample_type;

	if (sparse)
		sample_type = impl.get_struct_type({ builder.makeUintType(32), texel_type }, 0, "SparseTexel");
	else
		sample_type = texel_type;

	bool raw_gather64 = raw && get_composite_element_type(instruction->getType())->getIntegerBitWidth() == 64;

	spv::Op opcode;
	if (compare)
		opcode = sparse ? spv::OpImageSparseDrefGather : spv::OpImageDrefGather;
	else
		opcode = sparse ? spv::OpImageSparseGather : spv::OpImageGather;

	Operation *op = impl.allocate(opcode, instruction, sample_type);
	if (!sparse)
		impl.decorate_relaxed_precision(get_composite_element_type(instruction->getType()), op->id, true);

	op->add_ids({ combined_image_sampler_id, coord_id, aux_id });

	spv::Id texel_offset_id = 0;
	if (image_flags)
	{
		op->add_literal(image_flags);
		texel_offset_id = build_texel_offset_vector(impl, offsets, num_coords, image_flags, true);
		op->add_id(texel_offset_id);
	}

	impl.add(op);

	if (raw_gather64)
	{
		Operation *op_green = impl.allocate(spv::OpImageGather, texel_type);
		op_green->add_ids({ combined_image_sampler_id, coord_id, builder.makeUintConstant(1) });

		if (image_flags)
		{
			op_green->add_literal(image_flags);
			op_green->add_id(texel_offset_id);
		}

		impl.add(op_green);

		spv::Id components64[4];
		spv::Id gather_result;

		if (sparse)
		{
			auto *extract_value = impl.allocate(spv::OpCompositeExtract, texel_type);
			gather_result = extract_value->id;
			extract_value->add_id(op->id);
			extract_value->add_literal(1);
			impl.add(extract_value);
		}
		else
			gather_result = op->id;

		for (unsigned i = 0; i < 4; i++)
		{
			auto *extr0 = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));
			auto *extr1 = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(32));

			extr0->add_id(gather_result);
			extr0->add_literal(i);
			extr1->add_id(op_green->id);
			extr1->add_literal(i);

			impl.add(extr0);
			impl.add(extr1);

			spv::Id components[2] = { extr0->id, extr1->id };
			spv::Id value = impl.build_vector(builder.makeUintType(32), components, 2);
			auto *bitcast_64 = impl.allocate(spv::OpBitcast, builder.makeUintType(64));
			bitcast_64->add_id(value);
			components64[i] = bitcast_64->id;

			impl.add(bitcast_64);
		}

		spv::Id u64_vector = impl.build_vector(builder.makeUintType(64), components64, 4);

		if (sparse)
		{
			auto *target_type = get_composite_element_type(instruction->getType());
			impl.repack_sparse_feedback(DXIL::ComponentType::U64, 4, instruction, target_type, u64_vector);
		}
		else
		{
			impl.rewrite_value(instruction, u64_vector);
		}
	}
	else
	{
		auto *target_type = get_composite_element_type(instruction->getType());

		if (sparse)
			impl.repack_sparse_feedback(meta.component_type, 4, instruction, target_type);
		else
			impl.fixup_load_type_typed(meta.component_type, 4, instruction, target_type);
	}

	return true;
}

static spv::Id build_lod_from_gradient(Converter::Impl &impl, spv::Id grad_x, spv::Id grad_y)
{
	auto &builder = impl.builder();
	spv::Id f32_type = builder.makeFloatType(32);

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	auto *dot_x = impl.allocate(spv::OpDot, f32_type);
	dot_x->add_id(grad_x);
	dot_x->add_id(grad_x);
	impl.add(dot_x);

	auto *dot_y = impl.allocate(spv::OpDot, f32_type);
	dot_y->add_id(grad_y);
	dot_y->add_id(grad_y);
	impl.add(dot_y);

	auto *dot_max = impl.allocate(spv::OpExtInst, f32_type);
	dot_max->add_id(impl.glsl_std450_ext);
	dot_max->add_literal(GLSLstd450FMax);
	dot_max->add_id(dot_x->id);
	dot_max->add_id(dot_y->id);
	impl.add(dot_max);

	auto *log_op = impl.allocate(spv::OpExtInst, f32_type);
	log_op->add_id(impl.glsl_std450_ext);
	log_op->add_literal(GLSLstd450Log2);
	log_op->add_id(dot_max->id);
	impl.add(log_op);

	auto *half_mul = impl.allocate(spv::OpFMul, f32_type);
	half_mul->add_id(log_op->id);
	half_mul->add_id(builder.makeFloatConstant(0.5f));
	impl.add(half_mul);

	return half_mul->id;
}

static spv::Id emit_cube_extract_str(Converter::Impl &impl, spv::Id coord,
                                     spv::Id z_major_id, spv::Id y_major_id)
{
	auto &builder = impl.builder();
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id vec3_type = builder.makeVectorType(f32_type, 3);

	auto *y_major_shuffle = impl.allocate(spv::OpVectorShuffle, vec3_type);
	y_major_shuffle->add_id(coord);
	y_major_shuffle->add_id(coord);
	y_major_shuffle->add_literal(0);
	y_major_shuffle->add_literal(2);
	y_major_shuffle->add_literal(1);
	impl.add(y_major_shuffle);

	auto *x_major_shuffle = impl.allocate(spv::OpVectorShuffle, vec3_type);
	x_major_shuffle->add_id(coord);
	x_major_shuffle->add_id(coord);
	x_major_shuffle->add_literal(2);
	x_major_shuffle->add_literal(1);
	x_major_shuffle->add_literal(0);

	impl.add(x_major_shuffle);
	auto *z_major_splat3 = impl.allocate(spv::OpCompositeConstruct, builder.makeVectorType(builder.makeBoolType(), 3));
	for (unsigned i = 0; i < 3; i++)
		z_major_splat3->add_id(z_major_id);
	impl.add(z_major_splat3);

	auto *y_major_splat4 = impl.allocate(spv::OpCompositeConstruct, builder.makeVectorType(builder.makeBoolType(), 3));
	for (unsigned i = 0; i < 3; i++)
		y_major_splat4->add_id(y_major_id);
	impl.add(y_major_splat4);

	auto *yx_select = impl.allocate(spv::OpSelect, vec3_type);
	yx_select->add_id(y_major_splat4->id);
	yx_select->add_id(y_major_shuffle->id);
	yx_select->add_id(x_major_shuffle->id);
	impl.add(yx_select);

	auto *reordered = impl.allocate(spv::OpSelect, vec3_type);
	reordered->add_id(z_major_splat3->id);
	reordered->add_id(coord);
	reordered->add_id(yx_select->id);
	impl.add(reordered);

	return reordered->id;
}

static void emit_calculate_lod_cube_gradient_transform(Converter::Impl &impl,
                                                       spv::Id coord_vec, spv::Id dx, spv::Id dy,
                                                       spv::Id &out_dx, spv::Id &out_dy)
{
	auto &builder = impl.builder();
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id vec2_type = builder.makeVectorType(f32_type, 2);
	spv::Id vec3_type = builder.makeVectorType(f32_type, 3);
	spv::Id vec4_type = builder.makeVectorType(f32_type, 4);

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	builder.addName(coord_vec, "coord");
	builder.addName(dx, "d_coord_dx");
	builder.addName(dy, "d_coord_dy");

	// See 7.18.11 of D3D11 functional spec
	// 16.5.6 - Cube Map Derivative Transform in Vulkan spec
	auto *coord_abs = impl.allocate(spv::OpExtInst, vec3_type);
	coord_abs->add_id(impl.glsl_std450_ext);
	coord_abs->add_literal(GLSLstd450FAbs);
	coord_abs->add_id(coord_vec);
	impl.add(coord_abs);
	builder.addName(coord_abs->id, "abs_uvw");

	spv::Id max_component_id = 0;
	spv::Id abs_components[3];
	for (unsigned i = 0; i < 3; i++)
	{
		auto *extract = impl.allocate(spv::OpCompositeExtract, f32_type);
		extract->add_id(coord_abs->id);
		extract->add_literal(i);
		impl.add(extract);
		abs_components[i] = extract->id;

		if (max_component_id != 0)
		{
			auto *max_comp = impl.allocate(spv::OpExtInst, f32_type);
			max_comp->add_id(impl.glsl_std450_ext);
			max_comp->add_literal(GLSLstd450FMax);
			max_comp->add_id(extract->id);
			max_comp->add_id(max_component_id);
			impl.add(max_comp);
			max_component_id = max_comp->id;
		}
		else
			max_component_id = extract->id;
	}

	builder.addName(max_component_id, "max_component");

	// Shuffle components based on major axis.
	// Ordering of the minor axes isn't interesting since our LOD computation is rotationally invariant and sign invariant,
	// and cube faces are square.

	// Z takes precedence, then Y.
	auto *z_major = impl.allocate(spv::OpFOrdGreaterThanEqual, builder.makeBoolType());
	z_major->add_id(abs_components[2]);
	z_major->add_id(max_component_id);
	impl.add(z_major);
	builder.addName(z_major->id, "z_major");

	auto *y_major = impl.allocate(spv::OpFOrdGreaterThanEqual, builder.makeBoolType());
	y_major->add_id(abs_components[1]);
	y_major->add_id(max_component_id);
	impl.add(y_major);
	builder.addName(y_major->id, "y_major");

	auto *r_sqr = impl.allocate(spv::OpFMul, f32_type);
	r_sqr->add_id(max_component_id);
	r_sqr->add_id(max_component_id);
	impl.add(r_sqr);

	auto *r_sqr_inv_half = impl.allocate(spv::OpFDiv, f32_type);
	r_sqr_inv_half->add_id(builder.makeFloatConstant(0.5f));
	r_sqr_inv_half->add_id(r_sqr->id);
	impl.add(r_sqr_inv_half);

	// [ dsdx, dtdx, drdx ]
	spv::Id d_coord_dx = emit_cube_extract_str(impl, dx, z_major->id, y_major->id);
	// [ dsdy, dtdy, drdy ]
	spv::Id d_coord_dy = emit_cube_extract_str(impl, dy, z_major->id, y_major->id);
	// [ s, t, r ]
	spv::Id coords = emit_cube_extract_str(impl, coord_vec, z_major->id, y_major->id);

	builder.addName(d_coord_dx, "d_str_dx");
	builder.addName(d_coord_dy, "d_str_dy");
	builder.addName(coords, "str");

	// [ dsdx, dtdx, dsdy, dtdy ]
	auto *st_gradients = impl.allocate(spv::OpVectorShuffle, vec4_type);
	st_gradients->add_id(d_coord_dx);
	st_gradients->add_id(d_coord_dy);
	st_gradients->add_literal(0);
	st_gradients->add_literal(3);
	st_gradients->add_literal(1);
	st_gradients->add_literal(4);
	impl.add(st_gradients);

	// [ s, s, t, t ]
	auto *st_coords = impl.allocate(spv::OpVectorShuffle, vec4_type);
	st_coords->add_id(coords);
	st_coords->add_id(coords);
	st_coords->add_literal(0);
	st_coords->add_literal(0);
	st_coords->add_literal(1);
	st_coords->add_literal(1);
	impl.add(st_coords);

	// [ drdx, drdy, drdx, drdy ]
	auto *r_gradients = impl.allocate(spv::OpVectorShuffle, vec4_type);
	r_gradients->add_id(d_coord_dx);
	r_gradients->add_id(d_coord_dy);
	r_gradients->add_literal(2);
	r_gradients->add_literal(5);
	r_gradients->add_literal(2);
	r_gradients->add_literal(5);
	impl.add(r_gradients);

	// |r_c| * [ dsdx, dtdx, dsdy, dtdy ]
	auto *st_gradient_times_r = impl.allocate(spv::OpVectorTimesScalar, vec4_type);
	st_gradient_times_r->add_id(st_gradients->id);
	st_gradient_times_r->add_id(max_component_id);
	impl.add(st_gradient_times_r);

	// [ s, s, t, t ] * [ drdx, drdy, drdx, drdy ]
	auto *coord_times_r_gradient = impl.allocate(spv::OpFMul, vec4_type);
	coord_times_r_gradient->add_id(st_coords->id);
	coord_times_r_gradient->add_id(r_gradients->id);
	impl.add(coord_times_r_gradient);

	auto *sub = impl.allocate(spv::OpFSub, vec4_type);
	sub->add_id(st_gradient_times_r->id);
	sub->add_id(coord_times_r_gradient->id);
	impl.add(sub);

	auto *mul = impl.allocate(spv::OpVectorTimesScalar, vec4_type);
	mul->add_id(sub->id);
	mul->add_id(r_sqr_inv_half->id);
	impl.add(mul);

	builder.addName(mul->id, "cube_gradients");

	// [ ds'dx, dt'dx ]
	auto *grad_x_extract = impl.allocate(spv::OpVectorShuffle, vec2_type);
	grad_x_extract->add_id(mul->id);
	grad_x_extract->add_id(mul->id);
	grad_x_extract->add_literal(0);
	grad_x_extract->add_literal(2);
	impl.add(grad_x_extract);

	// [ ds'dy, dt'dy ]
	auto *grad_y_extract = impl.allocate(spv::OpVectorShuffle, vec2_type);
	grad_y_extract->add_id(mul->id);
	grad_y_extract->add_id(mul->id);
	grad_y_extract->add_literal(1);
	grad_y_extract->add_literal(3);
	impl.add(grad_y_extract);

	out_dx = grad_x_extract->id;
	out_dy = grad_y_extract->id;
}

static bool emit_calculate_lod_instruction_fallback(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	LOGW("Emitting non-conformant CalculateLevelOfDetail. Missing NV_compute_shader_derivatives.\n");
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityImageQuery);
	builder.addCapability(spv::CapabilityGroupNonUniformQuad);

	// Best effort workaround. Extremely unlikely that this will be a real problem in practice.
	// Only Pascal should ever hit this path.
	// Assumes sampler with no LOD bias and no mip clamp + trilinear.
	// Same concerns as sampler feedback where we need side channel data to be conformant.

	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));

	uint32_t num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	spv::Id coords[3] = {};
	for (unsigned i = 0; i < num_coords; i++)
		coords[i] = impl.get_id_for_value(instruction->getOperand(3 + i));
	spv::Id coord_vec = impl.build_vector(builder.makeFloatType(32), coords, num_coords);

	spv::Id i32_type = builder.makeIntType(32);
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id fvec_type = builder.makeVectorType(f32_type, num_coords);

	auto *swap_x = impl.allocate(spv::OpGroupNonUniformQuadSwap, fvec_type);
	swap_x->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	swap_x->add_id(coord_vec);
	swap_x->add_id(builder.makeUintConstant(0));
	impl.add(swap_x);

	auto *swap_y = impl.allocate(spv::OpGroupNonUniformQuadSwap, fvec_type);
	swap_y->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	swap_y->add_id(coord_vec);
	swap_y->add_id(builder.makeUintConstant(1));
	impl.add(swap_y);

	auto *grad_x = impl.allocate(spv::OpFSub, fvec_type);
	grad_x->add_id(swap_x->id);
	grad_x->add_id(coord_vec);
	impl.add(grad_x);

	auto *grad_y = impl.allocate(spv::OpFSub, fvec_type);
	grad_y->add_id(swap_y->id);
	grad_y->add_id(coord_vec);
	impl.add(grad_y);

	uint32_t num_size_coords;
	if (!get_image_dimensions_query_size(impl, builder, image_id, &num_size_coords))
		return false;

	spv::Id grad_x_id = grad_x->id;
	spv::Id grad_y_id = grad_y->id;
	if (builder.getTypeDimensionality(impl.get_type_id(image_id)) == spv::DimCube)
	{
		emit_calculate_lod_cube_gradient_transform(impl, coord_vec,
		                                           grad_x->id, grad_y->id,
		                                           grad_x_id, grad_y_id);

		// Projected down to 2 dimensions now.
		fvec_type = builder.makeVectorType(f32_type, 2);
		num_coords = 2;
	}

	auto *query_op = impl.allocate(spv::OpImageQuerySizeLod, builder.makeVectorType(i32_type, num_size_coords));
	query_op->add_ids({ image_id, builder.makeIntConstant(0) });
	impl.add(query_op);

	if (num_coords != num_size_coords)
	{
		auto *shuffle_op = impl.allocate(spv::OpVectorShuffle, builder.makeVectorType(i32_type, num_coords));
		shuffle_op->add_id(query_op->id);
		shuffle_op->add_id(query_op->id);
		for (unsigned i = 0; i < num_coords; i++)
			shuffle_op->add_literal(i);
		impl.add(shuffle_op);

		query_op = shuffle_op;
	}

	auto *fconv = impl.allocate(spv::OpConvertSToF, fvec_type);
	fconv->add_id(query_op->id);
	impl.add(fconv);

	auto *scale_x = impl.allocate(spv::OpFMul, fvec_type);
	scale_x->add_ids({ grad_x_id, fconv->id });
	impl.add(scale_x);

	auto *scale_y = impl.allocate(spv::OpFMul, fvec_type);
	scale_y->add_ids({ grad_y_id, fconv->id });
	impl.add(scale_y);

	spv::Id lod = build_lod_from_gradient(impl, scale_x->id, scale_y->id);

	auto *clamped_value = llvm::cast<llvm::ConstantInt>(instruction->getOperand(6));
	bool clamped = clamped_value->getUniqueInteger().getZExtValue() != 0;

	if (clamped)
	{
		auto *levels_op = impl.allocate(spv::OpImageQueryLevels, builder.makeIntType(32));
		levels_op->add_id(image_id);
		impl.add(levels_op);

		auto *f_levels = impl.allocate(spv::OpConvertSToF, f32_type);
		f_levels->add_id(levels_op->id);
		impl.add(f_levels);

		auto *max_level = impl.allocate(spv::OpFSub, f32_type);
		max_level->add_id(f_levels->id);
		max_level->add_id(builder.makeFloatConstant(-1.0f));
		impl.add(max_level);

		// In case of null descriptor, make sure we end up with 0, so do min, then max.
		auto *min_op = impl.allocate(spv::OpExtInst, f32_type);
		min_op->add_id(impl.glsl_std450_ext);
		min_op->add_literal(GLSLstd450FMin);
		min_op->add_ids({ lod, max_level->id });
		impl.add(min_op);

		auto *max_op = impl.allocate(spv::OpExtInst, f32_type);
		max_op->add_id(impl.glsl_std450_ext);
		max_op->add_literal(GLSLstd450FMax);
		max_op->add_ids({ min_op->id, builder.makeFloatConstant(0.0f) });
		impl.add(max_op);

		lod = max_op->id;
	}
	else
	{
		// The assumption is still fixed point,
		// and real hardware will not return -inf here. Clamp to [-128, 128] which has been observed in the wild.
		auto *clamp_op = impl.allocate(spv::OpExtInst, f32_type);
		clamp_op->add_id(impl.glsl_std450_ext);
		clamp_op->add_literal(GLSLstd450FClamp);
		clamp_op->add_id(lod);
		clamp_op->add_id(builder.makeFloatConstant(-128.0f));
		clamp_op->add_id(builder.makeFloatConstant(+128.0f));
		impl.add(clamp_op);

		lod = clamp_op->id;
	}

	impl.rewrite_value(instruction, lod);
	return true;
}

bool emit_calculate_lod_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool extended)
{
	auto &builder = impl.builder();
	if (impl.execution_mode_meta.synthesize_dummy_derivatives)
	{
		impl.rewrite_value(instruction, builder.makeFloatConstant(0.0f));
		return true;
	}

	emit_expect_assume_quad_uniform(impl);

	if (impl.execution_model != spv::ExecutionModelFragment &&
	    !impl.options.compute_shader_derivatives)
	{
		return emit_calculate_lod_instruction_fallback(impl, instruction);
	}

	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id sampler_id = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id combined_image_sampler_id = impl.build_sampled_image(image_id, sampler_id, false);

	uint32_t num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	if (num_coords == 0)
	{
		LOGE("Cannot use calculate lod instructions with input attachment.\n");
		return false;
	}

	spv::Id coords[3] = {};
	for (unsigned i = 0; i < num_coords; i++)
		coords[i] = impl.get_id_for_value(instruction->getOperand(3 + i));

	bool clamped = false;

	// Internal extension to better match DXBC/SPIR-V.
	if (!extended)
	{
		auto *clamped_value = llvm::cast<llvm::ConstantInt>(instruction->getOperand(6));
		clamped = clamped_value->getUniqueInteger().getZExtValue() != 0;
	}

	Operation *query_op = impl.allocate(spv::OpImageQueryLod, builder.makeVectorType(builder.makeFloatType(32), 2));
	query_op->add_ids({ combined_image_sampler_id, impl.build_vector(builder.makeFloatType(32), coords, num_coords) });
	impl.add(query_op);

	if (!extended)
	{
		Operation *op = impl.allocate(spv::OpCompositeExtract, instruction);
		op->add_id(query_op->id);
		op->add_literal(clamped ? 0u : 1u);
		impl.add(op);
	}
	else
	{
		impl.rewrite_value(instruction, query_op->id);
	}

	builder.addCapability(spv::CapabilityImageQuery);
	return true;
}

static void build_sample_position_lut(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	// Standard sample locations from the Vulkan spec.
	static const float standard_sample_positions[][2] = {
		// 1 sample
		{ 0.0 / 16.0, 0.0 / 16.0 },
		// 2 samples
		{ 4.0 / 16.0, 4.0 / 16.0 },
		{ -4.0 / 16.0, -4.0 / 16.0 },
		// 4 samples
		{ -2.0 / 16.0, -6.0 / 16.0 },
		{ 6.0 / 16.0, -2.0 / 16.0 },
		{ -6.0 / 16.0, 2.0 / 16.0 },
		{ 2.0 / 16.0, 6.0 / 16.0 },
		// 8 samples
		{ 1.0 / 16.0, -3.0 / 16.0 },
		{ -1.0 / 16.0, 3.0 / 16.0 },
		{ 5.0 / 16.0, 1.0 / 16.0 },
		{ -3.0 / 16.0, -5.0 / 16.0 },
		{ -5.0 / 16.0, 5.0 / 16.0 },
		{ -7.0 / 16.0, -1.0 / 16.0 },
		{ 3.0 / 16.0, 7.0 / 16.0 },
		{ 7.0 / 16.0, -7.0 / 16.0 },
		// 16 samples
		{ 1.0 / 16.0, 1.0 / 16.0 },
		{ -1.0 / 16.0, -3.0 / 16.0 },
		{ -3.0 / 16.0, 2.0 / 16.0 },
		{ 4.0 / 16.0, -1.0 / 16.0 },
		{ -5.0 / 16.0, -2.0 / 16.0 },
		{ 2.0 / 16.0, 5.0 / 16.0 },
		{ 5.0 / 16.0, 3.0 / 16.0 },
		{ 3.0 / 16.0, -5.0 / 16.0 },
		{ -2.0 / 16.0, 6.0 / 16.0 },
		{ 0.0 / 16.0, -7.0 / 16.0 },
		{ -4.0 / 16.0, -6.0 / 16.0 },
		{ -6.0 / 16.0, 4.0 / 16.0 },
		{ -8.0 / 16.0, 0.0 / 16.0 },
		{ 7.0 / 16.0, -4.0 / 16.0 },
		{ 6.0 / 16.0, 7.0 / 16.0 },
		{ -7.0 / 16.0, -8.0 / 16.0 },
	};

	if (!impl.texture_sample_pos_lut_id)
	{
		spv::Id vec2_type = builder.makeVectorType(builder.makeFloatType(32), 2);

		constexpr size_t num_pos = sizeof(standard_sample_positions) / sizeof(standard_sample_positions[0]);
		Vector<spv::Id> constituents(num_pos);
		for (size_t i = 0; i < num_pos; i++)
		{
			spv::Id elems[2];
			for (unsigned j = 0; j < 2; j++)
				elems[j] = builder.makeFloatConstant(standard_sample_positions[i][j]);
			constituents[i] = impl.build_constant_vector(builder.makeFloatType(32), elems, 2);
		}

		spv::Id array_type = builder.makeArrayType(vec2_type, builder.makeUintConstant(num_pos), 0);
		spv::Id lut_id = builder.makeCompositeConstant(array_type, constituents);
		impl.texture_sample_pos_lut_id = impl.create_variable_with_initializer(spv::StorageClassPrivate, array_type,
		                                                                       lut_id, "Texture2DMSSamplePositionLUT");
	}
}

static spv::Id build_rasterizer_sample_count(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	if (!impl.rasterizer_sample_count_id)
	{
		if (impl.options.rasterizer_sample_count_spec_constant)
		{
			impl.rasterizer_sample_count_id = builder.makeUintConstant(1, true);
			builder.addDecoration(impl.rasterizer_sample_count_id, spv::DecorationSpecId,
			                      impl.options.rasterizer_sample_count);
		}
		else
			impl.rasterizer_sample_count_id = builder.makeUintConstant(impl.options.rasterizer_sample_count, false);
	}
	return impl.rasterizer_sample_count_id;
}

bool emit_get_render_target_sample_count(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	impl.rewrite_value(instruction, build_rasterizer_sample_count(impl));
	return true;
}

bool emit_check_access_fully_mapped_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilitySparseResidency);
	auto *op = impl.allocate(spv::OpImageSparseTexelsResident, instruction);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
	impl.add(op);
	return true;
}

bool emit_get_sample_position(Converter::Impl &impl, const llvm::CallInst *instruction, bool image)
{
	auto &builder = impl.builder();

	spv::Id sample_count_id;
	if (image)
	{
		spv::Id image_id = impl.get_id_for_value(instruction->getOperand(1));

		auto *query_samples_op = impl.allocate(spv::OpImageQuerySamples, builder.makeUintType(32));
		query_samples_op->add_id(image_id);
		impl.add(query_samples_op);
		sample_count_id = query_samples_op->id;
	}
	else
	{
		sample_count_id = build_rasterizer_sample_count(impl);
	}

	spv::Id sample_index_id = impl.get_id_for_value(instruction->getOperand(image ? 2 : 1));

	// Build the LUT if we have to.
	build_sample_position_lut(impl);

	// Sample count is only POT, so table starts at N - 1.
	auto *lut_base_offset_op = impl.allocate(spv::OpISub, builder.makeUintType(32));
	lut_base_offset_op->add_ids({ sample_count_id, builder.makeUintConstant(1) });
	impl.add(lut_base_offset_op);

	// Build LUT offset.
	auto *lut_offset_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
	lut_offset_op->add_ids({ lut_base_offset_op->id, sample_index_id });
	impl.add(lut_offset_op);

	// Range check sample index against actual texture.
	auto *cmp0_op = impl.allocate(spv::OpULessThan, builder.makeBoolType());
	cmp0_op->add_ids({ sample_index_id, sample_count_id });
	impl.add(cmp0_op);

	// Range check sample index against max supported 16.
	auto *cmp1_op = impl.allocate(spv::OpULessThanEqual, builder.makeBoolType());
	cmp1_op->add_ids({ sample_count_id, builder.makeUintConstant(16) });
	impl.add(cmp1_op);

	auto *cmp_op = impl.allocate(spv::OpLogicalAnd, builder.makeBoolType());
	cmp_op->add_ids({ cmp0_op->id, cmp1_op->id });
	impl.add(cmp_op);

	auto *final_lut_index_op = impl.allocate(spv::OpSelect, builder.makeUintType(32));
	final_lut_index_op->add_ids({ cmp_op->id, lut_offset_op->id, builder.makeUintConstant(0) });
	impl.add(final_lut_index_op);

	spv::Id vec2_type = builder.makeVectorType(builder.makeFloatType(32), 2);

	auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPrivate, vec2_type));
	chain_op->add_ids({ impl.texture_sample_pos_lut_id, final_lut_index_op->id });
	impl.add(chain_op);

	auto *load_op = impl.allocate(spv::OpLoad, instruction, vec2_type);
	load_op->add_id(chain_op->id);
	impl.add(load_op);

	builder.addCapability(spv::CapabilityImageQuery);
	return true;
}

static spv::Id emit_query_size(Converter::Impl &impl, spv::Id image_id,
                               bool array, spv::Id lod_id = 0)
{
	auto &builder = impl.builder();
	spv::Id i32_type = builder.makeIntType(32);

	auto *query_size_level_zero = impl.allocate(
		lod_id ? spv::OpImageQuerySizeLod : spv::OpImageQuerySize,
		builder.makeVectorType(i32_type, array ? 3 : 2));
	query_size_level_zero->add_id(image_id);
	if (lod_id)
		query_size_level_zero->add_id(lod_id);
	impl.add(query_size_level_zero);

	if (array)
	{
		auto *extract = impl.allocate(spv::OpVectorShuffle, builder.makeVectorType(i32_type, 2));
		extract->add_id(query_size_level_zero->id);
		extract->add_id(query_size_level_zero->id);
		extract->add_literal(0);
		extract->add_literal(1);
		impl.add(extract);

		query_size_level_zero = extract;
	}

	return query_size_level_zero->id;
}

// A software implementation of sampler feedback is by nature quite brittle.
// Implement a best-effort solution.

static spv::Id emit_accessed_lod(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction,
                                 spv::Id image_id, spv::Id sampler_id, spv::Id coord_id, spv::Id max_level_id,
                                 spv::Id *gradient_mod_id, bool array)
{
	auto &builder = impl.builder();
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id fvec_type = builder.makeVectorType(f32_type, 2);
	spv::Id access_lod_id;

	*gradient_mod_id = 0;

	if (opcode == DXIL::Op::WriteSamplerFeedback || opcode == DXIL::Op::WriteSamplerFeedbackBias)
	{
		spv::Id combined_image_sampler_id = impl.build_sampled_image(image_id, sampler_id, false);

		// This is more or less correct.
		// We can also detect if tri-linear should be used by checking fractional clamped LOD.
		spv::Id modified_coord_id;

		if (opcode == DXIL::Op::WriteSamplerFeedbackBias)
		{
			// Fake a larger or smaller gradient to get effective LOD bias.
			// In D3D11 functional spec, out of range LOD bias is undefined, so don't bother clamping.
			auto *exp = impl.allocate(spv::OpExtInst, f32_type);
			exp->add_id(impl.glsl_std450_ext);
			exp->add_literal(GLSLstd450Exp2);
			exp->add_id(impl.get_id_for_value(instruction->getOperand(8)));
			impl.add(exp);

			*gradient_mod_id = exp->id;

			// Bias can technically be unique per pixel in a quad, so we have to compute LOD 4 times,
			// but make the assumption that LOD bias is constant over a quad to stay sane.
			// The common use for LOD bias for streaming is TAA related either way.
			// Workaround is to QuadBroadcast(exp, i), then compute LOD 4 times, then select the appropriate LOD.
			auto *quad_first = impl.allocate(spv::OpGroupNonUniformQuadBroadcast, f32_type);
			quad_first->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
			quad_first->add_id(exp->id);
			quad_first->add_id(builder.makeUintConstant(0));
			impl.add(quad_first);
			builder.addCapability(spv::CapabilityGroupNonUniformQuad);

			auto *mul = impl.allocate(spv::OpVectorTimesScalar, fvec_type);
			mul->add_id(coord_id);
			mul->add_id(quad_first->id);
			impl.add(mul);
			modified_coord_id = mul->id;
		}
		else
		{
			modified_coord_id = coord_id;
		}

		auto *query_op = impl.allocate(spv::OpImageQueryLod, builder.makeVectorType(f32_type, 2));
		query_op->add_ids({ combined_image_sampler_id, modified_coord_id });
		impl.add(query_op);

		auto *extract_op = impl.allocate(spv::OpCompositeExtract, f32_type);
		extract_op->add_id(query_op->id);
		extract_op->add_literal(0);
		impl.add(extract_op);
		access_lod_id = extract_op->id;
	}
	else if (opcode == DXIL::Op::WriteSamplerFeedbackLevel)
	{
		access_lod_id = impl.get_id_for_value(instruction->getOperand(8));
	}
	else if (opcode == DXIL::Op::WriteSamplerFeedbackGrad)
	{
		auto *fp_size = impl.allocate(spv::OpConvertSToF, fvec_type);
		fp_size->add_id(emit_query_size(impl, image_id, array, builder.makeIntConstant(0)));
		impl.add(fp_size);

		spv::Id ddx[2];
		spv::Id ddy[2];
		for (unsigned i = 0; i < 2; i++)
		{
			ddx[i] = impl.get_id_for_value(instruction->getOperand(8 + i));
			ddy[i] = impl.get_id_for_value(instruction->getOperand(11 + i));
		}

		// Spec is very lenient on how LOD can be computed, but choose the D3D11 spec wording.
		// Based it on maximum gradient length, which is rotationally invariant and matches
		// observed behavior on AMD and Intel. NV + aniso is ... weird and non-compliant.

		spv::Id ddx_id = impl.build_vector(f32_type, ddx, 2);
		spv::Id ddy_id = impl.build_vector(f32_type, ddy, 2);

		auto *grad_x = impl.allocate(spv::OpFMul, fvec_type);
		grad_x->add_id(fp_size->id);
		grad_x->add_id(ddx_id);
		impl.add(grad_x);

		auto *grad_y = impl.allocate(spv::OpFMul, fvec_type);
		grad_y->add_id(fp_size->id);
		grad_y->add_id(ddy_id);
		impl.add(grad_y);

		access_lod_id = build_lod_from_gradient(impl, grad_x->id, grad_y->id);
	}
	else
		return 0;

	if (opcode == DXIL::Op::WriteSamplerFeedbackLevel || opcode == DXIL::Op::WriteSamplerFeedbackGrad)
	{
		auto *max_level_fp = impl.allocate(spv::OpConvertSToF, f32_type);
		max_level_fp->add_id(max_level_id);
		impl.add(max_level_fp);

		// We don't know sampler clamping here, so we'll have to assume min/maxLOD covers more than image view.
		// We also don't know about tri-linear vs bi-linear at all, so we have to assume tri-linear
		// if explicit LOD is fractional.
		// NClamp in case the gradient causes NaN for whatever reason.
		auto *clamped_lod = impl.allocate(spv::OpExtInst, f32_type);
		clamped_lod->add_id(impl.glsl_std450_ext);
		clamped_lod->add_literal(GLSLstd450NClamp);
		clamped_lod->add_id(access_lod_id);
		clamped_lod->add_id(builder.makeFloatConstant(0.0f));
		clamped_lod->add_id(max_level_fp->id);
		impl.add(clamped_lod);

		access_lod_id = clamped_lod->id;
	}

	if (opcode != DXIL::Op::WriteSamplerFeedbackLevel)
	{
		// Undocumented feature in spec. There's an extra MinMipClamp operand here.
		// We cannot implement that completely accurately since we need QueryLod + MinMip, which
		// does not exist, but just YOLO it.
		// Normally, that clamp value will be integer, which should be fine.
		// This comes after [0, levels - 1] clamp, since if we clamp LOD to out of bounds of view,
		// we end up accessing OOB.
		auto *clamp_value = instruction->getOperand(instruction->getNumOperands() - 1);
		if (!llvm::isa<llvm::UndefValue>(clamp_value))
		{
			auto *clamp_lod = impl.allocate(spv::OpExtInst, f32_type);
			clamp_lod->add_id(impl.glsl_std450_ext);
			clamp_lod->add_literal(GLSLstd450FMax);
			clamp_lod->add_id(access_lod_id);
			clamp_lod->add_id(impl.get_id_for_value(clamp_value));
			impl.add(clamp_lod);

			// LOD 15 bits are reserved.
			auto *max_lod_clamp = impl.allocate(spv::OpExtInst, f32_type);
			max_lod_clamp->add_id(impl.glsl_std450_ext);
			max_lod_clamp->add_literal(GLSLstd450FMin);
			max_lod_clamp->add_id(clamp_lod->id);
			max_lod_clamp->add_id(builder.makeFloatConstant(14.0f));
			impl.add(max_lod_clamp);

			access_lod_id = max_lod_clamp->id;
		}
	}

	return access_lod_id;
}

static spv::Id emit_mip_region_size_log2(Converter::Impl &impl, const llvm::CallInst *instruction,
                                         spv::Id feedback_image_id, bool arrayed)
{
	auto &builder = impl.builder();
	spv::Id i32_type = builder.makeIntType(32);
	spv::Id ivec_type = builder.makeVectorType(i32_type, 2);

	spv::Id feedback_size_id;

	if (arrayed)
	{
		auto *query_feedback_size = impl.allocate(spv::OpImageQuerySize, builder.makeVectorType(i32_type, 3));
		query_feedback_size->add_id(feedback_image_id);
		impl.add(query_feedback_size);

		auto *extract = impl.allocate(spv::OpVectorShuffle, ivec_type);
		extract->add_id(query_feedback_size->id);
		extract->add_id(query_feedback_size->id);
		extract->add_literal(0);
		extract->add_literal(1);
		impl.add(extract);

		feedback_size_id = extract->id;
	}
	else
	{
		auto *query_feedback_size = impl.allocate(spv::OpImageQuerySize, ivec_type);
		query_feedback_size->add_id(feedback_image_id);
		impl.add(query_feedback_size);

		feedback_size_id = query_feedback_size->id;
	}

	auto *masked_size = impl.allocate(spv::OpBitwiseAnd, ivec_type);
	masked_size->add_id(feedback_size_id);
	masked_size->add_id(impl.build_splat_constant_vector(i32_type, builder.makeIntConstant(15), 2));
	impl.add(masked_size);

	return masked_size->id;
}

struct ScalingFactors
{
	spv::Id normalized_scale_id;
	spv::Id float_size_id;
	spv::Id unnormalized_exponent_id;
};

static ScalingFactors emit_scaling_factor_to_mip_region_space(
	Converter::Impl &impl, const llvm::CallInst *instruction,
	spv::Id image_id, spv::Id lod_id, spv::Id mip_region_size_log2, bool arrayed)
{
	auto &builder = impl.builder();
	spv::Id i32_type = builder.makeIntType(32);
	spv::Id ivec_type = builder.makeVectorType(i32_type, 2);
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id fvec_type = builder.makeVectorType(f32_type, 2);

	// Query size of top-level mip we're writing to.
	auto *fp_size = impl.allocate(spv::OpConvertSToF, fvec_type);
	fp_size->add_id(emit_query_size(impl, image_id, arrayed, lod_id));
	impl.add(fp_size);

	// The lower 4 bits of feedback image store log2(MipRegionWidth).
	// X dimension for width and Y for height.
	// Clever hackery that avoids having to pass down an extra side buffer ... :V
	auto *to_mip_region_space = impl.allocate(spv::OpSNegate, ivec_type);
	to_mip_region_space->add_id(mip_region_size_log2);
	impl.add(to_mip_region_space);

	// Scale the UV coordinates into sub-pixel coordinates in mip-region space.
	auto *ld_exp = impl.allocate(spv::OpExtInst, fvec_type);
	ld_exp->add_id(impl.glsl_std450_ext);
	ld_exp->add_literal(GLSLstd450Ldexp);
	ld_exp->add_id(fp_size->id);
	ld_exp->add_id(to_mip_region_space->id);
	impl.add(ld_exp);

	ScalingFactors factors = {};
	factors.normalized_scale_id = ld_exp->id;
	factors.float_size_id = fp_size->id;
	factors.unnormalized_exponent_id = to_mip_region_space->id;
	return factors;
}

static spv::Id emit_max_level(Converter::Impl &impl, spv::Id image_id)
{
	auto &builder = impl.builder();
	spv::Id i32_type = builder.makeIntType(32);

	auto *query_levels = impl.allocate(spv::OpImageQueryLevels, i32_type);
	query_levels->add_id(image_id);
	impl.add(query_levels);

	auto *max_level = impl.allocate(spv::OpISub, i32_type);
	max_level->add_id(query_levels->id);
	max_level->add_id(builder.makeIntConstant(1));
	impl.add(max_level);

	return max_level->id;
}

struct LodSelection
{
	spv::Id fine_lod_id;
	spv::Id coarse_lod_id;
	spv::Id trilinear_enable_id;
};

static LodSelection emit_trilinear_lods(Converter::Impl &impl, spv::Id access_lod_id, spv::Id max_level_id)
{
	LodSelection lods = {};
	auto &builder = impl.builder();
	spv::Id i32_type = builder.makeIntType(32);
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id bool_type = builder.makeBoolType();

	auto *fine_lod = impl.allocate(spv::OpConvertFToS, i32_type);
	fine_lod->add_id(access_lod_id);
	impl.add(fine_lod);

	lods.fine_lod_id = fine_lod->id;

	// Trilinear only kicks in one the fractional value hits 1 / 256.
	// It does not trigger on 1 / 512 from real-world testing for example.
	// Use mid-point between the two. Avoids subtle rounding issues in FP add.
	auto *coarse_bias_lod = impl.allocate(spv::OpFAdd, f32_type);
	coarse_bias_lod->add_id(access_lod_id);
	coarse_bias_lod->add_id(builder.makeFloatConstant(1.0f - 0.75f / 256.0f));
	impl.add(coarse_bias_lod);

	auto *coarse_lod = impl.allocate(spv::OpConvertFToS, i32_type);
	coarse_lod->add_id(coarse_bias_lod->id);
	impl.add(coarse_lod);

	auto *clamped_lod = impl.allocate(spv::OpExtInst, i32_type);
	clamped_lod->add_id(impl.glsl_std450_ext);
	clamped_lod->add_literal(GLSLstd450SMin);
	clamped_lod->add_id(coarse_lod->id);
	clamped_lod->add_id(max_level_id);
	impl.add(clamped_lod);
	lods.coarse_lod_id = coarse_lod->id;

	auto *trilinear = impl.allocate(spv::OpINotEqual, bool_type);
	trilinear->add_id(fine_lod->id);
	trilinear->add_id(coarse_lod->id);
	impl.add(trilinear);
	lods.trilinear_enable_id = trilinear->id;

	return lods;
}

static spv::Id emit_gradient_extent_normalized(Converter::Impl &impl, spv::Id coord_id, spv::Id grad_scale_id)
{
	auto &builder = impl.builder();
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id fvec_type = builder.makeVectorType(f32_type, 2);

	auto *ddx = impl.allocate(spv::OpDPdx, fvec_type);
	ddx->add_id(coord_id);
	impl.add(ddx);

	auto *ddy = impl.allocate(spv::OpDPdy, fvec_type);
	ddy->add_id(coord_id);
	impl.add(ddy);

	auto *ddx_abs = impl.allocate(spv::OpExtInst, fvec_type);
	ddx_abs->add_id(impl.glsl_std450_ext);
	ddx_abs->add_literal(GLSLstd450FAbs);
	ddx_abs->add_id(ddx->id);
	impl.add(ddx_abs);

	auto *ddy_abs = impl.allocate(spv::OpExtInst, fvec_type);
	ddy_abs->add_id(impl.glsl_std450_ext);
	ddy_abs->add_literal(GLSLstd450FAbs);
	ddy_abs->add_id(ddy->id);
	impl.add(ddy_abs);

	auto *ddx_abs_0 = impl.allocate(spv::OpCompositeExtract, f32_type);
	ddx_abs_0->add_id(ddx_abs->id);
	ddx_abs_0->add_literal(0);
	impl.add(ddx_abs_0);

	auto *ddy_abs_0 = impl.allocate(spv::OpCompositeExtract, f32_type);
	ddy_abs_0->add_id(ddy_abs->id);
	ddy_abs_0->add_literal(0);
	impl.add(ddy_abs_0);

	auto *ddx_abs_1 = impl.allocate(spv::OpCompositeExtract, f32_type);
	ddx_abs_1->add_id(ddx_abs->id);
	ddx_abs_1->add_literal(1);
	impl.add(ddx_abs_1);

	auto *ddy_abs_1 = impl.allocate(spv::OpCompositeExtract, f32_type);
	ddy_abs_1->add_id(ddy_abs->id);
	ddy_abs_1->add_literal(1);
	impl.add(ddy_abs_1);

	auto *extent_x = impl.allocate(spv::OpExtInst, f32_type);
	extent_x->add_id(impl.glsl_std450_ext);
	extent_x->add_literal(GLSLstd450FMax);
	extent_x->add_id(ddx_abs_0->id);
	extent_x->add_id(ddy_abs_0->id);
	impl.add(extent_x);

	auto *extent_y = impl.allocate(spv::OpExtInst, f32_type);
	extent_y->add_id(impl.glsl_std450_ext);
	extent_y->add_literal(GLSLstd450FMax);
	extent_y->add_id(ddx_abs_1->id);
	extent_y->add_id(ddy_abs_1->id);
	impl.add(extent_y);

	spv::Id extent_ids[2] = {extent_x->id, extent_y->id};
	spv::Id extent = impl.build_vector(f32_type, extent_ids, 2);

	// Extremely weird behavior observed on both NV and Intel.
	// It seems that if you have LOD bias, the effective gradient is scaled accordingly.
	// This means the aniso footprint is scaled too, which is extremely weird and definitely
	// not accurate to spec how I understand it ...
	// TODO: Does sampler LOD bias affect this too?
	if (grad_scale_id)
	{
		auto *grad_mul = impl.allocate(spv::OpVectorTimesScalar, fvec_type);
		grad_mul->add_id(extent);
		grad_mul->add_id(grad_scale_id);
		impl.add(grad_mul);
		extent = grad_mul->id;
	}

	return extent;
}

bool emit_write_sampler_feedback_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	builder.addCapability(spv::CapabilityImageQuery);
	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	spv::Id feedback_id = impl.get_id_for_value(instruction->getOperand(1));
	spv::Id image_id = impl.get_id_for_value(instruction->getOperand(2));
	spv::Id sampler_id = impl.get_id_for_value(instruction->getOperand(3));
	const auto &meta = impl.handle_to_resource_meta[feedback_id];

	uint32_t num_coords_full = 0, num_coords = 0;
	if (!get_image_dimensions(impl, image_id, &num_coords_full, &num_coords))
		return false;

	// Should not happen.
	if (num_coords != 2)
		return false;

	spv::Id bool_type = builder.makeBoolType();
	spv::Id i32_type = builder.makeIntType(32);
	spv::Id f32_type = builder.makeFloatType(32);
	spv::Id bvec_type = builder.makeVectorType(bool_type, 2);
	spv::Id ivec_type = builder.makeVectorType(i32_type, 2);
	spv::Id fvec_type = builder.makeVectorType(f32_type, 2);

	spv::Id int_layer_id = 0;
	if (num_coords_full == 3)
	{
		// Vulkan allows for two implementation of this rounding, either RTE or floor(v + 0.5).
		// Pick the suggested one. This matches real-world implementations.
		auto *rounded_layer = impl.allocate(spv::OpExtInst, f32_type);
		rounded_layer->add_id(impl.glsl_std450_ext);
		rounded_layer->add_literal(GLSLstd450RoundEven);
		rounded_layer->add_id(impl.get_id_for_value(instruction->getOperand(6)));
		impl.add(rounded_layer);

		auto *int_layer = impl.allocate(spv::OpConvertFToS, i32_type);
		int_layer->add_id(rounded_layer->id);
		impl.add(int_layer);

		int_layer_id = int_layer->id;
	}

	spv::Id coord_ids[2];
	for (uint32_t i = 0; i < 2; i++)
		coord_ids[i] = impl.get_id_for_value(instruction->getOperand(4 + i));
	spv::Id coord_id = impl.build_vector(f32_type, coord_ids, 2);
	spv::Id mip_region_size_log2 = emit_mip_region_size_log2(impl, instruction, feedback_id, num_coords_full == 3);
	spv::Id max_level_id = emit_max_level(impl, image_id);

	// This part will change quite dramatically based on op-code.
	// Computing the actual LOD to access is quite painful.
	// We cannot do it properly for most interesting cases without having side band information
	// about sampler state. In real-world scenarios, applications will likely adhere to whatever mip level
	// we decide to access either way, so it shouldn't lead to serious problems.
	spv::Id grad_scale_id;
	spv::Id access_lod_id = emit_accessed_lod(opcode, impl, instruction, image_id, sampler_id, coord_id,
	                                          max_level_id, &grad_scale_id, num_coords_full == 3);

	auto lods = emit_trilinear_lods(impl, access_lod_id, max_level_id);

	spv::Id normalized_grad_extent = 0;
	if (opcode == DXIL::Op::WriteSamplerFeedback || opcode == DXIL::Op::WriteSamplerFeedbackBias)
		normalized_grad_extent = emit_gradient_extent_normalized(impl, coord_id, grad_scale_id);

	// Assume that if we get coordinates above 1.0 we intend to WRAP.
	// Only WRAP and CLAMP is supported with sampler feedback (but WRAP is kinda broken on NV hardware).
	// After wrap, assume CLAMP semantics for purposes of filtering.
	auto *fract_uv_op = impl.allocate(spv::OpExtInst, fvec_type);
	fract_uv_op->add_id(impl.glsl_std450_ext);
	fract_uv_op->add_literal(GLSLstd450Fract);
	fract_uv_op->add_id(coord_id);
	impl.add(fract_uv_op);

	for (unsigned lod_iteration = 0; lod_iteration < 2; lod_iteration++)
	{
		spv::Id lod_id = lod_iteration ? lods.coarse_lod_id : lods.fine_lod_id;

		auto scaling = emit_scaling_factor_to_mip_region_space(
			impl, instruction, image_id, lod_id,
			mip_region_size_log2, num_coords_full == 3);

		auto *mul_mip_region_coord = impl.allocate(spv::OpFMul, fvec_type);
		mul_mip_region_coord->add_id(fract_uv_op->id);
		mul_mip_region_coord->add_id(scaling.normalized_scale_id);
		impl.add(mul_mip_region_coord);

		// We have to assume the worst w.r.t. filtering / aniso.
		// Simplify this with a ton with crude, but effective approximations.
		// Compute derivatives in mip region space.
		// Based on these derivatives, we get a bounding box.
		// The size of this bounding box is then clamped to a minimum and maximum size.
		// At minimum, we have 1x aniso / bilinear. Extent is +/- 0.5 texels.
		// At maximum, we have 16x aniso. Extent is +/- 8 texels.
		// Need to look at derivatives so that we can compute conservative aniso extent.

		// For implicit LOD, we compute the gradient and try to recover aniso information.
		spv::Id clamped_extent_id;

		if (opcode == DXIL::Op::WriteSamplerFeedback || opcode == DXIL::Op::WriteSamplerFeedbackBias)
		{
			auto *scale_size = impl.allocate(spv::OpFMul, fvec_type);
			scale_size->add_id(normalized_grad_extent);
			scale_size->add_id(scaling.float_size_id);
			impl.add(scale_size);

			// For integer LODs, we will end up computing an approximate integer due to FP derivatives.
			// It would be very bad if we got a positive error and rounded up a full POT.
			// LODs tend to be snapped to fixed point, so use a sufficient threshold to avoid these errors.
			// This value is chosen somewhat arbitrarily, but one subtexel worth of bias seems reasonable.
			auto *rounding_bias = impl.allocate(spv::OpFSub, fvec_type);
			rounding_bias->add_id(scale_size->id);
			rounding_bias->add_id(
				impl.build_splat_constant_vector(f32_type, builder.makeFloatConstant(1.0f / 256.0f), 2));
			impl.add(rounding_bias);

			// With aniso factors, implementations are allowed to round up the aniso factor
			// to nearest supported value.
			// From exhaustive testing, NV and Intel have somewhat different behavior, but in general,
			// all POT factors behave accurately with this implementation.
			// Oddly enough, the extent is enlongated as well when rounding up, which is also not spec compliant ...
			// What we want to do is round up to next POT and round up extent similarly,
			// and the cutest way to do this is with frexp. The exponent represents next POT in magnitude.
			// We can then clamp that frexp to [unormalized_exponent_id, unnormalized_exponent_id + 4].
			// We're robust against NaN since the exponent is undefined, but we'll clamp that below in integer space.
			auto *frexp = impl.allocate(spv::OpExtInst, builder.makeStructResultType(fvec_type, ivec_type));
			frexp->add_id(impl.glsl_std450_ext);
			frexp->add_literal(GLSLstd450FrexpStruct);
			frexp->add_id(rounding_bias->id);
			impl.add(frexp);

			auto *extract = impl.allocate(spv::OpCompositeExtract, ivec_type);
			extract->add_id(frexp->id);
			extract->add_literal(1);
			impl.add(extract);

			auto *sclamp = impl.allocate(spv::OpExtInst, ivec_type);
			sclamp->add_id(impl.glsl_std450_ext);
			sclamp->add_literal(GLSLstd450SClamp);
			sclamp->add_id(extract->id);
			sclamp->add_id(impl.build_splat_constant_vector(i32_type, builder.makeIntConstant(0), num_coords));
			sclamp->add_id(impl.build_splat_constant_vector(i32_type, builder.makeIntConstant(4), num_coords));
			impl.add(sclamp);

			auto *shift_exp = impl.allocate(spv::OpIAdd, ivec_type);
			shift_exp->add_id(sclamp->id);
			shift_exp->add_id(scaling.unnormalized_exponent_id);
			impl.add(shift_exp);

			// For aniso, the conservative filter extent is half the major length.
			// Halve the bounding box extents as a rough approximation.
			// For bilinear, the footprint can be found in [-0.5, 0.5] texels.
			// Fuse the multiply and exp2() here in ldexp.
			auto *fextent = impl.allocate(spv::OpExtInst, fvec_type);
			fextent->add_id(impl.glsl_std450_ext);
			fextent->add_literal(GLSLstd450Ldexp);
			fextent->add_id(impl.build_splat_constant_vector(f32_type, builder.makeFloatConstant(0.5f), num_coords));
			fextent->add_id(shift_exp->id);
			impl.add(fextent);

			// We only support signalling access for nearest neighbor.
			// For 16x aniso with 4x4 region, this can go wrong, but we don't really care.
			// Worst case, we can clamp to 16x16 region internally and upsample the region map on resolve.
			// Maximum +/- 0.5 regions is allowed here, this ensures that we access maximum 2x2 blocks.
			auto *clamped_extent = impl.allocate(spv::OpExtInst, fvec_type);
			clamped_extent->add_id(impl.glsl_std450_ext);
			clamped_extent->add_literal(GLSLstd450FMin);
			clamped_extent->add_id(fextent->id);
			clamped_extent->add_id(
				impl.build_splat_constant_vector(f32_type, builder.makeFloatConstant(0.5f), num_coords));
			impl.add(clamped_extent);

			clamped_extent_id = clamped_extent->id;
		}
		else
		{
			// Level is incompatible with aniso, so always assume simple bilinear. Use fixed +/- 0.5 pixel footprint.
			// For explicit gradient, we also assume no aniso,
			// since computing aniso-factors can cause far too fine mips to be requested if this was not intended by app.
			// We have no good way to detect this, and trying to compute aniso factors
			// accurately to hardware is a losing game since every implementation applies heavy approximations.
			auto *min_extent = impl.allocate(spv::OpExtInst, fvec_type);
			min_extent->add_id(impl.glsl_std450_ext);
			min_extent->add_literal(GLSLstd450Ldexp);
			min_extent->add_id(impl.build_splat_constant_vector(f32_type, builder.makeFloatConstant(0.5f), num_coords));
			min_extent->add_id(scaling.unnormalized_exponent_id);
			impl.add(min_extent);

			clamped_extent_id = min_extent->id;
		}

		auto *lo_foot_print = impl.allocate(spv::OpFSub, fvec_type);
		lo_foot_print->add_id(mul_mip_region_coord->id);
		lo_foot_print->add_id(clamped_extent_id);
		impl.add(lo_foot_print);

		auto *lo_clamped = impl.allocate(spv::OpExtInst, fvec_type);
		lo_clamped->add_id(impl.glsl_std450_ext);
		lo_clamped->add_literal(GLSLstd450FMax);
		lo_clamped->add_id(lo_foot_print->id);
		lo_clamped->add_id(impl.build_splat_constant_vector(f32_type, builder.makeFloatConstant(0.0f), num_coords));
		impl.add(lo_clamped);

		auto *hi_foot_print = impl.allocate(spv::OpFAdd, fvec_type);
		hi_foot_print->add_id(mul_mip_region_coord->id);
		hi_foot_print->add_id(clamped_extent_id);
		impl.add(hi_foot_print);

		auto *lo_int = impl.allocate(spv::OpConvertFToS, ivec_type);
		lo_int->add_id(lo_clamped->id);
		impl.add(lo_int);

		auto *hi_int = impl.allocate(spv::OpConvertFToS, ivec_type);
		hi_int->add_id(hi_foot_print->id);
		impl.add(hi_int);

		auto *not_equal = impl.allocate(spv::OpINotEqual, bvec_type);
		not_equal->add_id(lo_int->id);
		not_equal->add_id(hi_int->id);
		impl.add(not_equal);

		auto *visit_horiz = impl.allocate(spv::OpCompositeExtract, bool_type);
		visit_horiz->add_id(not_equal->id);
		visit_horiz->add_literal(0);
		impl.add(visit_horiz);

		auto *visit_vert = impl.allocate(spv::OpCompositeExtract, bool_type);
		visit_vert->add_id(not_equal->id);
		visit_vert->add_literal(1);
		impl.add(visit_vert);

		auto *visit_diag = impl.allocate(spv::OpLogicalAnd, bool_type);
		visit_diag->add_id(visit_horiz->id);
		visit_diag->add_id(visit_vert->id);
		impl.add(visit_diag);

		const spv::Id bit_ids[3] = {visit_horiz->id, visit_vert->id, visit_diag->id};
		spv::Id merged_bit = builder.makeIntConstant(1);

		for (unsigned i = 0; i < 3; i++)
		{
			auto *select_op = impl.allocate(spv::OpSelect, i32_type);
			select_op->add_id(bit_ids[i]);
			select_op->add_id(builder.makeIntConstant(2 << i));
			select_op->add_id(builder.makeIntConstant(0));
			impl.add(select_op);

			auto *or_op = impl.allocate(spv::OpBitwiseOr, i32_type);
			or_op->add_id(merged_bit);
			or_op->add_id(select_op->id);
			impl.add(or_op);

			merged_bit = or_op->id;
		}

		spv::Id u64_type = builder.makeUintType(64);
		auto *u64_conv = impl.allocate(spv::OpSConvert, u64_type);
		u64_conv->add_id(merged_bit);
		impl.add(u64_conv);

		auto *shamt = impl.allocate(spv::OpIMul, i32_type);
		shamt->add_id(lod_id);
		shamt->add_id(builder.makeIntConstant(4));
		impl.add(shamt);

		auto *shamt64 = impl.allocate(spv::OpSConvert, u64_type);
		shamt64->add_id(shamt->id);
		impl.add(shamt64);

		auto *shift_op = impl.allocate(spv::OpShiftLeftLogical, u64_type);
		shift_op->add_id(u64_conv->id);
		shift_op->add_id(shamt64->id);
		impl.add(shift_op);

		spv::Id comp_id;
		if (int_layer_id)
		{
			auto *comp = impl.allocate(spv::OpCompositeConstruct, builder.makeVectorType(i32_type, num_coords_full));
			comp->add_id(lo_int->id);
			comp->add_id(int_layer_id);
			impl.add(comp);
			comp_id = comp->id;
		}
		else
		{
			comp_id = lo_int->id;
		}

		spv::Id participate_id;
		if (impl.execution_model == spv::ExecutionModelFragment)
		{
			auto *is_helper = impl.allocate(spv::OpIsHelperInvocationEXT, bool_type);
			impl.add(is_helper);

			auto *not_op = impl.allocate(spv::OpLogicalNot, bool_type);
			not_op->add_id(is_helper->id);
			impl.add(not_op);

			participate_id = not_op->id;

			if (lod_iteration != 0)
			{
				auto *trilinear_and = impl.allocate(spv::OpLogicalAnd, bool_type);
				trilinear_and->add_id(participate_id);
				trilinear_and->add_id(lods.trilinear_enable_id);
				impl.add(trilinear_and);
				participate_id = trilinear_and->id;
			}
		}
		else
		{
			if (lod_iteration == 0)
				participate_id = builder.makeBoolConstant(true);
			else
				participate_id = lods.trilinear_enable_id;
		}

		HelperCall helper_call;
		if (num_coords_full == 3 && meta.non_uniform)
			helper_call = HelperCall::AtomicImageArrayR64CompactNonUniform;
		else if (num_coords_full == 3)
			helper_call = HelperCall::AtomicImageArrayR64Compact;
		else if (meta.non_uniform)
			helper_call = HelperCall::AtomicImageR64CompactNonUniform;
		else
			helper_call = HelperCall::AtomicImageR64Compact;

		spv::Id call_id = impl.spirv_module.get_helper_call_id(helper_call);
		auto *call = impl.allocate(spv::OpFunctionCall, builder.makeVoidType());
		call->add_id(call_id);
		call->add_id(meta.var_id);
		call->add_id(comp_id);
		call->add_id(shift_op->id);
		call->add_id(participate_id);
		impl.add(call);
	}

	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_sampling.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool get_image_dimensions(Converter::Impl &impl, spv::Id image_id, uint32_t *num_coords, uint32_t *num_dimensions);

bool emit_sample_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_sample_grad_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_texture_gather_instruction(bool compare, bool raw, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_texture_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_texture_store_instruction_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction, bool multi_sampled);
bool emit_get_dimensions_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool extended);

template <bool extended>
bool emit_get_dimensions_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_get_dimensions_instruction(impl, instruction, extended);
}

bool emit_calculate_lod_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, bool extended);

template <bool extended>
bool emit_calculate_lod_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_calculate_lod_instruction(impl, instruction, extended);
}

bool emit_get_sample_position(Converter::Impl &impl, const llvm::CallInst *instruction, bool image);
bool emit_get_render_target_sample_count(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_check_access_fully_mapped_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_write_sampler_feedback_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);

template <DXIL::Op opcode>
bool emit_write_sampler_feedback_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_write_sampler_feedback_instruction(opcode, impl, instruction);
}

template <DXIL::Op opcode>
static inline bool emit_sample_instruction_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_sample_instruction(opcode, impl, instruction);
}

template <DXIL::Op opcode>
static inline bool emit_sample_grad_instruction_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_sample_grad_instruction(opcode, impl, instruction);
}

template <bool multi_sampled>
static inline bool emit_texture_store_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_texture_store_instruction_dispatch(impl, instruction, multi_sampled);
}

template <bool compare, bool raw>
static inline bool emit_texture_gather_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_texture_gather_instruction(compare, raw, impl, instruction);
}

template <bool image>
static inline bool emit_get_sample_position_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_get_sample_position(impl, instruction, image);
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_tessellation.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_tessellation.hpp"
#include "dxil_common.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include "logging.hpp"

namespace dxil_spv
{
bool emit_store_patch_constant_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	uint32_t output_element_index;
	if (!get_constant_operand(instruction, 1, &output_element_index))
		return false;

	const auto &meta = impl.patch_elements_meta[output_element_index];
	uint32_t var_id = meta.id;

	if (meta.lowering)
		var_id = impl.execution_mode_meta.patch_lowering_array_var_id;

	spv::Id ptr_id;

	spv::Id output_type_id = builder.getDerefTypeId(var_id);

	bool row_index = false;
	if (builder.isArrayType(output_type_id))
	{
		row_index = true;
		output_type_id = builder.getContainedTypeId(output_type_id);
	}
	uint32_t num_cols = builder.getNumTypeComponents(output_type_id);

	if (row_index || num_cols > 1)
	{
		Operation *op = impl.allocate(
			spv::OpAccessChain, builder.makePointer(
				meta.lowering ? spv::StorageClassPrivate : spv::StorageClassOutput,
				builder.getScalarTypeId(output_type_id)));

		ptr_id = op->id;
		op->add_id(var_id);

		if (row_index)
		{
			spv::Id row_id = impl.get_id_for_value(instruction->getOperand(2));
			if (meta.lowering && meta.start_row != 0)
			{
				auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				add_op->add_id(row_id);
				add_op->add_id(builder.makeUintConstant(meta.start_row));
				impl.add(add_op);

				row_id = add_op->id;
			}
			else if (!meta.lowering && meta.semantic_offset)
			{
				auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				add_op->add_id(row_id);
				add_op->add_id(builder.makeUintConstant(meta.semantic_offset));
				impl.add(add_op);

				row_id = add_op->id;
			}

			op->add_id(row_id);
		}

		if (num_cols > 1)
		{
			spv::Id col_id = impl.get_id_for_value(instruction->getOperand(3), 32);
			if (meta.lowering && meta.start_col != 0)
			{
				auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				add_op->add_id(col_id);
				add_op->add_id(builder.makeUintConstant(meta.start_col));
				impl.add(add_op);

				col_id = add_op->id;
			}
			op->add_id(col_id);
		}

		impl.add(op);
	}
	else
		ptr_id = var_id;

	auto *storage_type = instruction->getOperand(4)->getType();

	spv::Id store_value = impl.get_id_for_value(instruction->getOperand(4));
	impl.register_externally_visible_write(instruction->getOperand(4));

	// Tess factors are for some reason classified as inputs
	spv::BuiltIn builtin = { };

	if (impl.options.max_tess_factor &&
	    impl.spirv_module.query_builtin_shader_input(meta.id, &builtin) &&
	    (builtin == spv::BuiltInTessLevelInner || builtin == spv::BuiltInTessLevelOuter) &&
	    storage_type->isFloatingPointTy() && !type_is_64bit(storage_type))
	{
		spv::Id max_tess_factor_id = builder.makeFloatConstant(impl.options.max_tess_factor);

		// Don't bother bit-twiddling this into an fp16 constant manually
		if (type_is_16bit(storage_type))
		{
			max_tess_factor_id = impl.build_value_cast(max_tess_factor_id, DXIL::ComponentType::F32,
			                                           DXIL::ComponentType::F16, 1);
		}

		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		auto *min_op = impl.allocate(spv::OpExtInst, impl.get_type_id(storage_type));
		min_op->add_id(impl.glsl_std450_ext);
		min_op->add_literal(GLSLstd450::GLSLstd450NMin);
		min_op->add_id(store_value);
		min_op->add_id(max_tess_factor_id);
		impl.add(min_op);

		store_value = min_op->id;
	}

	Operation *op = impl.allocate(spv::OpStore);
	op->add_id(ptr_id);

	if (meta.lowering)
	{
		if (type_is_64bit(storage_type))
		{
			LOGE("Lowering for dxbc 64-bit patch output not supported.\n");
			return false;
		}

		if (!storage_type->isIntegerTy())
		{
			auto *cast_op = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
			cast_op->add_id(store_value);
			impl.add(cast_op);
			store_value = cast_op->id;
		}
	}
	else
	{
		store_value = impl.fixup_store_type_io(meta.component_type, 1, store_value);
	}

	op->add_id(store_value);
	impl.add(op);
	return true;
}

bool emit_load_output_generic_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	uint32_t input_element_index;
	if (!get_constant_operand(instruction, 1, &input_element_index))
		return false;

	// Normally this is only used for control point reads in tess, but here we make it generic to
	// allow reading outputs at any time.
	// Technically mesh shader path could go here, but custom IR doesn't support that.
	bool is_hull = impl.execution_model == spv::ExecutionModelTessellationControl;
	bool array_index = !llvm::isa<llvm::UndefValue>(instruction->getOperand(4)) && is_hull;

	if (is_hull && !array_index)
	{
		LOGE("Hull outputs must be read with array index.\n");
		return false;
	}

	const auto &meta = impl.output_elements_meta[input_element_index];
	uint32_t var_id = meta.id;

	spv::Id input_type_id = builder.getDerefTypeId(var_id);

	if (array_index)
		input_type_id = builder.getContainedTypeId(input_type_id);

	bool row_index = false;
	if (builder.isArrayType(input_type_id))
	{
		row_index = true;
		input_type_id = builder.getContainedTypeId(input_type_id);
	}

	uint32_t num_cols = builder.getNumTypeComponents(input_type_id);

	// Need to deal with signed vs unsigned here.
	Operation *op = impl.allocate(
	    spv::OpAccessChain, builder.makePointer(spv::StorageClassOutput, impl.get_type_id(meta.component_type, 1, 1)));
	spv::Id ptr_id = op->id;

	op->add_id(var_id);
	if (array_index)
		op->add_id(impl.get_id_for_value(instruction->getOperand(4)));
	if (row_index)
		op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
	if (num_cols > 1)
		op->add_id(impl.get_id_for_value(instruction->getOperand(3), 32));

	impl.add(op);

	// Need to deal with signed vs unsigned here.
	op = impl.allocate(spv::OpLoad, instruction, impl.get_type_id(meta.component_type, 1, 1));
	op->add_id(ptr_id);
	impl.add(op);

	// Need to bitcast after we load.
	impl.fixup_load_type_io(meta.component_type, 1, instruction);
	return true;
}

bool emit_domain_location_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id tess_coord_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInTessCoord);

	auto *op =
	    impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassInput, builder.makeFloatType(32)));
	op->add_id(tess_coord_id);
	op->add_id(impl.get_id_for_value(instruction->getOperand(1), 32));
	impl.add(op);
	tess_coord_id = op->id;

	op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(tess_coord_id);
	impl.add(op);

	return true;
}

bool emit_output_control_point_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId);

	auto *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);
	impl.add(op);

	return true;
}

bool emit_load_patch_constant_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	uint32_t output_element_index;
	if (!get_constant_operand(instruction, 1, &output_element_index))
		return false;

	const auto &meta = impl.patch_elements_meta[output_element_index];
	uint32_t var_id = meta.id;
	spv::Id ptr_id;

	spv::Id output_type_id = builder.getDerefTypeId(var_id);

	bool row_index = false;
	if (builder.isArrayType(output_type_id))
	{
		row_index = true;
		output_type_id = builder.getContainedTypeId(output_type_id);
	}
	uint32_t num_cols = builder.getNumTypeComponents(output_type_id);

	spv::StorageClass storage = impl.execution_model == spv::ExecutionModelTessellationEvaluation ?
	                                spv::StorageClassInput :
	                                spv::StorageClassOutput;

	spv::Id load_type_id;

	if (row_index || num_cols > 1)
	{
		load_type_id = builder.getScalarTypeId(output_type_id);
		Operation *op =
		    impl.allocate(spv::OpAccessChain, builder.makePointer(storage, load_type_id));
		ptr_id = op->id;
		op->add_id(var_id);

		if (row_index)
		{
			spv::Id row_id = impl.get_id_for_value(instruction->getOperand(2));

			if (meta.semantic_offset != 0)
			{
				auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
				add_op->add_id(row_id);
				add_op->add_id(builder.makeUintConstant(meta.semantic_offset));
				impl.add(add_op);

				row_id = add_op->id;
			}

			op->add_id(row_id);
		}

		if (num_cols > 1)
			op->add_id(impl.get_id_for_value(instruction->getOperand(3), 32));

		impl.add(op);
	}
	else
	{
		ptr_id = var_id;
		load_type_id = output_type_id;
	}

	Operation *op = impl.allocate(spv::OpLoad, instruction, load_type_id);
	op->add_id(ptr_id);
	impl.add(op);

	impl.fixup_load_type_io(meta.component_type, 1, instruction);
	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_tessellation.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_store_patch_constant_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_load_output_generic_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_domain_location_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_load_patch_constant_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_output_control_point_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_waveops.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_waveops.hpp"
#include "dxil_common.hpp"
#include "dxil_resources.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include <limits>

namespace dxil_spv
{
static bool wave_op_needs_helper_lane_masking(Converter::Impl &impl)
{
	return impl.execution_model == spv::ExecutionModelFragment &&
	       impl.options.strict_helper_lane_waveops &&
	       !impl.execution_mode_meta.waveops_include_helper_lanes;
}

bool emit_wave_is_first_lane_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	if (wave_op_needs_helper_lane_masking(impl))
	{
		auto *is_helper = impl.allocate(spv::OpIsHelperInvocationEXT, builder.makeBoolType());
		impl.add(is_helper);

		spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::WaveIsFirstLaneMasked);
		auto *op = impl.allocate(spv::OpFunctionCall, instruction);
		op->add_id(call_id);
		op->add_id(is_helper->id);
		impl.add(op);
	}
	else
	{
		auto *op = impl.allocate(spv::OpGroupNonUniformElect, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));

		builder.addCapability(spv::CapabilityGroupNonUniform);
		impl.add(op);
	}

	return true;
}

static void add_vkmm_volatile(Converter::Impl &impl, Operation *op, spv::BuiltIn builtin)
{
	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan &&
	    impl.spirv_module.builtin_requires_volatile(builtin))
	{
		op->add_literal(spv::MemoryAccessVolatileMask);
	}
}

bool emit_wave_builtin_instruction(spv::BuiltIn builtin, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (impl.options.force_subgroup_size && builtin == spv::BuiltInSubgroupSize)
	{
		impl.rewrite_value(instruction, impl.builder().makeUintConstant(impl.options.force_subgroup_size));
		return true;
	}

	spv::Id var_id = impl.spirv_module.get_builtin_shader_input(builtin);
	auto *op = impl.allocate(spv::OpLoad, instruction);
	op->add_id(var_id);

	add_vkmm_volatile(impl, op, builtin);

	impl.add(op);
	return true;
}

bool emit_wave_boolean_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// Given how ReadFirstLane is questionable w.r.t undef, be defensive here for now.
#if 0
	if (opcode == spv::OpGroupNonUniformAllEqual && wave_op_needs_helper_lane_masking(impl))
	{
		auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
		impl.add(is_helper_lane);

		spv::Id call_id = impl.spirv_module.get_helper_call_id(
		    HelperCall::WaveActiveAllEqualMasked, impl.get_type_id(instruction->getOperand(1)->getType()));
		auto *op = impl.allocate(spv::OpFunctionCall, instruction);
		op->add_id(call_id);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		op->add_id(is_helper_lane->id);

		impl.add(op);
		return true;
	}
#endif

	auto &builder = impl.builder();
	auto *op = impl.allocate(opcode, instruction);
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));

	spv::Id value = impl.get_id_for_value(instruction->getOperand(1));

	if (wave_op_needs_helper_lane_masking(impl))
	{
		// Helper lanes cannot affect the result, but let them participate.
		// Just force a specific boolean value here that ensures invariant result.

		if (opcode == spv::OpGroupNonUniformAny)
		{
			auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
			impl.add(is_helper_lane);

			// Force false for helpers.
			auto *is_active = impl.allocate(spv::OpLogicalNot, impl.builder().makeBoolType());
			is_active->add_id(is_helper_lane->id);
			impl.add(is_active);
			auto *and_op = impl.allocate(spv::OpLogicalAnd, impl.builder().makeBoolType());
			and_op->add_id(value);
			and_op->add_id(is_active->id);
			impl.add(and_op);
			value = and_op->id;
		}
		else if (opcode == spv::OpGroupNonUniformAll)
		{
			auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
			impl.add(is_helper_lane);

			// Force true for helpers.
			auto *or_op = impl.allocate(spv::OpLogicalOr, impl.builder().makeBoolType());
			or_op->add_id(value);
			or_op->add_id(is_helper_lane->id);
			impl.add(or_op);
			value = or_op->id;
		}
	}

	op->add_id(value);

	builder.addCapability(spv::CapabilityGroupNonUniformVote);
	impl.add(op);
	return true;
}

static spv::Id build_masked_ballot(Converter::Impl &impl, spv::Id input_value)
{
	auto &builder = impl.builder();

	auto *is_helper = impl.allocate(spv::OpIsHelperInvocationEXT, builder.makeBoolType());
	impl.add(is_helper);

	auto *is_active = impl.allocate(spv::OpLogicalNot, builder.makeBoolType());
	is_active->add_id(is_helper->id);
	impl.add(is_active);

	auto *is_active_ballot = impl.allocate(spv::OpLogicalAnd, builder.makeBoolType());
	is_active_ballot->add_ids({ input_value, is_active->id });
	impl.add(is_active_ballot);

	return is_active_ballot->id;
}

bool emit_wave_ballot_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id bool_value;

	bool_value = impl.get_id_for_value(instruction->getOperand(1));
	if (wave_op_needs_helper_lane_masking(impl))
		bool_value = build_masked_ballot(impl, bool_value);

	auto *op = impl.allocate(spv::OpGroupNonUniformBallot, instruction,
	                         builder.makeVectorType(builder.makeUintType(32), 4));
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	op->add_id(bool_value);

	impl.add(op);
	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	return true;
}

bool value_is_statically_wave_uniform(Converter::Impl &impl, const llvm::Value *value)
{
	// A surprising amount of shaders try to broadcast a value that is provably wave-uniform already.
	// Just forward this directly ...
	// This is an SSA value, so the input must dominate the use. The active threads here must be a subset of
	// the active threads when the wave uniform value was generated, so it is impossible for the input value to
	// not be wave uniform. We might end up promoting an undef value to not undef, but that is fine, since undef is ...
	// well, undef.

	if (const auto *unary = llvm::dyn_cast<llvm::UnaryOperator>(value))
	{
		return value_is_statically_wave_uniform(impl, unary->getOperand(0));
	}
	else if (const auto *binary = llvm::dyn_cast<llvm::BinaryOperator>(value))
	{
		return value_is_statically_wave_uniform(impl, binary->getOperand(0)) &&
		       value_is_statically_wave_uniform(impl, binary->getOperand(1));
	}

	if (value_is_dx_op_instrinsic(value, DXIL::Op::AnnotateNodeHandle))
	{
		auto *node = llvm::cast<llvm::CallInst>(value)->getOperand(1);
		// This is a static index.
		if (!value_is_dx_op_instrinsic(node, DXIL::Op::IndexNodeHandle))
			return true;

		// If the array index is wave uniform, the handle is wave uniform.
		value = llvm::cast<llvm::CallInst>(node)->getOperand(2);
	}

	// Buffer loads usually go through extractvalue first.
	// Ignore extractelement, it's only used in esoteric cases in DXR.
	if (const auto *extract = llvm::dyn_cast<llvm::ExtractValueInst>(value))
		value = extract->getAggregateOperand();

	if (llvm::isa<llvm::Constant>(value))
		return true;

	if (value_is_dx_op_instrinsic(value, DXIL::Op::WaveActiveOp) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::WaveActiveAllEqual) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::WaveActiveBit) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::WaveActiveBallot) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::WaveAnyTrue) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::WaveAllTrue) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::WaveReadLaneFirst) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::GroupId))
	{
		return true;
	}

	// Also detect loading a provably uniform value. This happens as well for some reason ...
	if (value_is_dx_op_instrinsic(value, DXIL::Op::CBufferLoadLegacy) ||
	    value_is_dx_op_instrinsic(value, DXIL::Op::CBufferLoad))
	{
		auto *call_op = llvm::cast<llvm::CallInst>(value);
		return value_is_statically_wave_uniform(impl, call_op->getOperand(2)) &&
		       resource_handle_is_uniform_readonly_descriptor(impl, call_op->getOperand(1));
	}
	else if (value_is_dx_op_instrinsic(value, DXIL::Op::BufferLoad) ||
	         value_is_dx_op_instrinsic(value, DXIL::Op::RawBufferLoad))
	{
		auto *call_op = llvm::cast<llvm::CallInst>(value);

		// For byte-address-buffers, arg 3 will be undef and treated as wave uniform (it's a constant).
		return value_is_statically_wave_uniform(impl, call_op->getOperand(2)) &&
		       value_is_statically_wave_uniform(impl, call_op->getOperand(3)) &&
		       resource_handle_is_uniform_readonly_descriptor(impl, call_op->getOperand(1));
	}

	return false;
}

static bool value_depends_on_dxil_op(
    const llvm::Instruction *inst, UnorderedSet<const llvm::Instruction *> &visit_cache,
    const DXIL::Op *ops, unsigned ops_count)
{
	if (visit_cache.count(inst))
		return false;
	visit_cache.insert(inst);

	for (unsigned i = 0; i < ops_count; i++)
		if (value_is_dx_op_instrinsic(inst, ops[i]))
			return true;

	if (const auto *phi = llvm::dyn_cast<llvm::PHINode>(inst))
	{
		for (uint32_t i = 0; i < phi->getNumIncomingValues(); i++)
			if (const auto *dependent_inst = llvm::dyn_cast<llvm::Instruction>(phi->getIncomingValue(i)))
				if (value_depends_on_dxil_op(dependent_inst, visit_cache, ops, ops_count))
					return true;
	}
	else
	{
		for (uint32_t i = 0; i < inst->getNumOperands(); i++)
			if (const auto *dependent_inst = llvm::dyn_cast<llvm::Instruction>(inst->getOperand(i)))
				if (value_depends_on_dxil_op(dependent_inst, visit_cache, ops, ops_count))
					return true;
	}

	return false;
}

bool value_is_likely_non_uniform(Converter::Impl &, const llvm::Value *value)
{
	// If the index is loaded from VS input or PS varying, it's almost guaranteed to be nonuniform in some way.
	// Similar with using InstanceID as bindless index.
	UnorderedSet<const llvm::Instruction *> visit_cache;
	if (const auto *inst = llvm::dyn_cast<llvm::Instruction>(value))
	{
		static const DXIL::Op ops[] = { DXIL::Op::LoadInput, DXIL::Op::InstanceID };
		return value_depends_on_dxil_op(inst, visit_cache, ops, 2);
	}
	else
		return false;
}

static bool value_is_local_invocation_index_dependent(const llvm::Value *value)
{
	// If the index is loaded from VS input or PS varying, it's almost guaranteed to be nonuniform in some way.
	// Similar with using InstanceID as bindless index.
	UnorderedSet<const llvm::Instruction *> visit_cache;
	if (const auto *inst = llvm::dyn_cast<llvm::Instruction>(value))
	{
		static const DXIL::Op ops[] = { DXIL::Op::ThreadIdInGroup, DXIL::Op::FlattenedThreadIdInGroup };
		return value_depends_on_dxil_op(inst, visit_cache, ops, 2);
	}
	else
		return false;
}

bool emit_wave_read_lane_first_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (value_is_statically_wave_uniform(impl, instruction->getOperand(1)))
	{
		// This is hit where you least expect ;_;
		impl.rewrite_value(instruction, impl.get_id_for_value(instruction->getOperand(1)));
		return true;
	}

	if (wave_op_needs_helper_lane_masking(impl) && impl.wave_op_forced_helper_lanes.count(instruction) == 0)
	{
		auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
		impl.add(is_helper_lane);

		spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::WaveReadFirstLaneMasked,
		                                                       impl.get_type_id(instruction->getOperand(1)->getType()));
		auto *op = impl.allocate(spv::OpFunctionCall, instruction);
		op->add_id(call_id);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		op->add_id(is_helper_lane->id);
		impl.add(op);
	}
	else
	{
		auto &builder = impl.builder();
		auto *op = impl.allocate(spv::OpGroupNonUniformBroadcastFirst, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

		impl.add(op);
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	}

	return true;
}

static bool execution_model_can_quad_op(spv::ExecutionModel model)
{
	return model == spv::ExecutionModelFragment ||
	       model == spv::ExecutionModelGLCompute;
}

static bool value_is_wave_lane(const llvm::Value *value)
{
	auto *dxil_op = llvm::dyn_cast<llvm::CallInst>(value);
	if (!dxil_op)
		return false;
	if (strncmp(dxil_op->getCalledFunction()->getName().data(), "dx.op", 5) != 0)
		return false;

	uint32_t op;
	if (!get_constant_operand(dxil_op, 0, &op))
		return false;

	return DXIL::Op(op) == DXIL::Op::WaveGetLaneIndex;
}

static bool get_constant_int(const llvm::Value *value, uint32_t *const_value)
{
	if (const auto *c = llvm::dyn_cast<llvm::ConstantInt>(value))
	{
		*const_value = uint32_t(c->getUniqueInteger().getZExtValue());
		return true;
	}
	else
		return false;
}

static bool get_constant_xor_lane(const llvm::Value *lane, uint32_t *xor_lane)
{
	auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(lane);
	if (!binop)
		return false;

	if (binop->getOpcode() != llvm::BinaryOperator::BinaryOps::Xor)
		return false;

	auto *lhs = binop->getOperand(0);
	auto *rhs = binop->getOperand(1);
	bool lhs_is_wave_lane = value_is_wave_lane(lhs);
	bool rhs_is_wave_lane = value_is_wave_lane(rhs);

	if (lhs_is_wave_lane && llvm::isa<llvm::ConstantInt>(rhs))
		return get_constant_int(rhs, xor_lane);
	else if (rhs_is_wave_lane && llvm::isa<llvm::ConstantInt>(lhs))
		return get_constant_int(lhs, xor_lane);

	return false;
}

static bool get_constant_quad_lane(const llvm::Value *lane, uint32_t *quad_lane)
{
	// Cases to consider:
	// - (gl_SubgroupInvocationID & ~3u) | C, where C is [0, 1, 2, 3].
	// - (gl_SubgroupInvocationID & ~3u) + C, where C is [0, 1, 2, 3].
	// - (gl_SubgroupInvocationID & ~3u) -> C = 0
	// - (gl_SubgroupInvocationID | 3u) -> C = 3
	auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(lane);
	if (!binop)
		return false;

	auto *lhs = binop->getOperand(0);
	auto *rhs = binop->getOperand(1);
	bool lhs_is_wave_lane = value_is_wave_lane(lhs);
	bool rhs_is_wave_lane = value_is_wave_lane(rhs);
	uint32_t secondary_quad_lane = 0;

	if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Or)
	{
		if ((get_constant_int(lhs, quad_lane) && rhs_is_wave_lane) ||
		    (lhs_is_wave_lane && get_constant_int(rhs, quad_lane)))
		{
			// Case 3
			return *quad_lane == 3;
		}
		else if ((get_constant_int(lhs, quad_lane) && get_constant_quad_lane(rhs, &secondary_quad_lane)) ||
		         (get_constant_quad_lane(lhs, &secondary_quad_lane) && get_constant_int(rhs, quad_lane)))
		{
			// Case 0
			return *quad_lane < 4 && secondary_quad_lane == 0;
		}
	}
	else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::And)
	{
		if ((get_constant_int(lhs, quad_lane) && rhs_is_wave_lane) ||
		    (lhs_is_wave_lane && get_constant_int(rhs, quad_lane)))
		{
			// Case 2
			if (*quad_lane == ~3u)
			{
				*quad_lane = 0;
				return true;
			}
		}
	}
	else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add)
	{
		if ((get_constant_int(lhs, quad_lane) && get_constant_quad_lane(rhs, &secondary_quad_lane)) ||
		    (get_constant_quad_lane(lhs, &secondary_quad_lane) && get_constant_int(rhs, quad_lane)))
		{
			// Case 0
			return *quad_lane < 4 && secondary_quad_lane == 0;
		}
	}

	return false;
}

static bool lane_is_wave32_masked(const llvm::Value *lane)
{
	const auto *bin_op = llvm::dyn_cast<llvm::BinaryOperator>(lane);
	if (!bin_op)
		return false;

	if (bin_op->getOpcode() != llvm::BinaryOperator::BinaryOps::And &&
	    bin_op->getOpcode() != llvm::BinaryOperator::BinaryOps::URem)
		return false;

	auto *op0 = bin_op->getOperand(0);
	auto *op1 = bin_op->getOperand(1);

	if (llvm::isa<llvm::ConstantInt>(op1))
		std::swap(op0, op1);

	const auto *const_mask = llvm::dyn_cast<llvm::ConstantInt>(op0);
	if (!const_mask)
		return false;

	uint32_t mask = const_mask->getUniqueInteger().getZExtValue();
	return (bin_op->getOpcode() == llvm::BinaryOperator::BinaryOps::And && mask == 31) ||
	       (bin_op->getOpcode() == llvm::BinaryOperator::BinaryOps::URem && mask == 32);
}

bool emit_wave_read_lane_at_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	auto *lane = instruction->getOperand(2);

	Operation *op;
	if (llvm::isa<llvm::ConstantInt>(lane))
	{
		op = impl.allocate(spv::OpGroupNonUniformBroadcast, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		op->add_id(impl.get_id_for_value(lane));
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	}
	else
	{
		// Some games seem to use WaveReadLaneAt where it should have used QuadReadLaneAt.
		// This generates a flurry of ds_permute instructions, where it could have used implicit quad shuffle instead.
		// Rather than emitting a ton of shuffles, try to optimize the statements back to a quad broadcast.

		uint32_t const_lane = 0;
		if (execution_model_can_quad_op(impl.execution_model) && get_constant_quad_lane(lane, &const_lane))
		{
			op = impl.allocate(spv::OpGroupNonUniformQuadBroadcast, instruction);
			op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
			op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
			op->add_id(builder.makeUintConstant(const_lane));
			builder.addCapability(spv::CapabilityGroupNonUniformQuad);
		}
		else if (get_constant_xor_lane(lane, &const_lane))
		{
			if (execution_model_can_quad_op(impl.execution_model) && const_lane < 4 && const_lane > 0)
			{
				// Here we assume that derivative groups are 1D.
				// This is always the case since we rewrite LocalInvocationID.
				op = impl.allocate(spv::OpGroupNonUniformQuadSwap, instruction);
				op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
				op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
				op->add_id(builder.makeUintConstant(const_lane - 1u));
				builder.addCapability(spv::CapabilityGroupNonUniformQuad);
			}
			else
			{
				op = impl.allocate(spv::OpGroupNonUniformShuffleXor, instruction);
				op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
				op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
				op->add_id(builder.makeUintConstant(const_lane));
				builder.addCapability(spv::CapabilityGroupNonUniformShuffle);
			}
		}
		else
		{
			op = impl.allocate(spv::OpGroupNonUniformShuffle, instruction);
			op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
			op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
			op->add_id(impl.get_id_for_value(lane));
			builder.addCapability(spv::CapabilityGroupNonUniformShuffle);

			// For shuffle, wave64 is particularly slow on RDNA, so suggest wave32.
			impl.suggest_maximum_wave_size(32);

			if (impl.execution_model == spv::ExecutionModelGLCompute ||
			    impl.execution_model == spv::ExecutionModelMeshEXT ||
			    impl.execution_model == spv::ExecutionModelTaskEXT)
			{
				if (!impl.shader_analysis.has_group_shared_barrier &&
				    impl.execution_mode_meta.workgroup_threads[0] == 32 &&
				    impl.execution_mode_meta.workgroup_threads[1] == 1 &&
				    impl.execution_mode_meta.workgroup_threads[2] == 1 &&
				    lane_is_wave32_masked(lane))
				{
					// Intel workaround. Some shaders may simply assume that wave16 doesn't exist.
					// The heuristic is to check if shader masks the lane by a constant 31
					// and no group-shared is used, which would suggest that shader considers wave size < 32.
					impl.suggest_minimum_wave_size(32);
				}
			}
		}
	}

	impl.shader_analysis.require_subgroup_shuffles = true;

	impl.add(op);
	return true;
}

bool emit_wave_bit_count_instruction(spv::GroupOperation operation, Converter::Impl &impl,
                                     const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	auto *ballot_op = impl.allocate(spv::OpGroupNonUniformBallot, builder.makeVectorType(builder.makeUintType(32), 4));
	ballot_op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));

	spv::Id bool_value = impl.get_id_for_value(instruction->getOperand(1));
	if (wave_op_needs_helper_lane_masking(impl))
		bool_value = build_masked_ballot(impl, bool_value);
	ballot_op->add_id(bool_value);

	impl.add(ballot_op);

	auto *op = impl.allocate(spv::OpGroupNonUniformBallotBitCount, instruction);
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	op->add_literal(operation);
	op->add_id(ballot_op->id);

	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	impl.add(op);
	return true;
}

static spv::Op select_opcode(const llvm::CallInst *instruction, spv::Op fp, spv::Op s, spv::Op u)
{
	uint32_t sign_kind;
	if (!get_constant_operand(instruction, 3, &sign_kind))
		return spv::OpNop;

	if (instruction->getType()->getTypeID() != llvm::Type::TypeID::IntegerTyID)
		return fp;
	else if (static_cast<DXIL::SignedOpKind>(sign_kind) == DXIL::SignedOpKind::Signed)
		return s;
	else
		return u;
}

static spv::Id build_mask_reduction_input_arith(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                spv::Id input_value, DXIL::WaveOpKind op_kind)
{
	auto &builder = impl.builder();

	// For arithmetic cases, we can just replace the input with a sentinel value
	// if we're a helper lane.
	auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
	impl.add(is_helper_lane);

	uint32_t sign_kind;
	if (!get_constant_operand(instruction, 3, &sign_kind))
		return 0;

#define DECLARE_TYPE_TEMPLATE(type, product, sum, min, max) do { \
	switch (op_kind) \
	{ \
	case DXIL::WaveOpKind::Product: \
		replacement_value = builder.make##type##Constant(product); \
		break; \
	case DXIL::WaveOpKind::Sum: \
		replacement_value = builder.make##type##Constant(sum); \
		break; \
	case DXIL::WaveOpKind::Min: \
		replacement_value = builder.make##type##Constant(min); \
		break; \
	case DXIL::WaveOpKind::Max: \
		replacement_value = builder.make##type##Constant(max); \
		break; \
	} \
} while(0)

	spv::Id replacement_value;
	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID)
	{
		DECLARE_TYPE_TEMPLATE(Float,
		                      1.0f, 0.0f,
		                      std::numeric_limits<float>::infinity(),
		                      -std::numeric_limits<float>::infinity());
	}
	else if (instruction->getType()->getTypeID() == llvm::Type::TypeID::DoubleTyID)
	{
		DECLARE_TYPE_TEMPLATE(Double,
		                      1.0, 0.0,
		                      std::numeric_limits<double>::infinity(),
		                      -std::numeric_limits<double>::infinity());
	}
	else if (instruction->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID)
	{
		DECLARE_TYPE_TEMPLATE(Float16, 0x3c00, 0, 0x7c00, 0xfc00);
	}
	else if (static_cast<DXIL::SignedOpKind>(sign_kind) == DXIL::SignedOpKind::Signed)
	{
		switch (instruction->getOperand(1)->getType()->getIntegerBitWidth())
		{
		case 16:
			DECLARE_TYPE_TEMPLATE(Uint16, 1, 0,
			                      uint16_t(std::numeric_limits<int16_t>::max()),
			                      uint16_t(std::numeric_limits<int16_t>::min()));
			break;

		case 32:
			DECLARE_TYPE_TEMPLATE(Uint, 1, 0,
			                      uint32_t(std::numeric_limits<int32_t>::max()),
			                      uint32_t(std::numeric_limits<int32_t>::min()));
			break;

		case 64:
			DECLARE_TYPE_TEMPLATE(Uint64, 1, 0,
			                      uint64_t(std::numeric_limits<int64_t>::max()),
			                      uint64_t(std::numeric_limits<int64_t>::min()));
			break;

		default:
			return 0;
		}
	}
	else
	{
		switch (instruction->getOperand(1)->getType()->getIntegerBitWidth())
		{
		case 16:
			DECLARE_TYPE_TEMPLATE(Uint16, 1, 0,
			                      std::numeric_limits<uint16_t>::max(),
			                      std::numeric_limits<uint16_t>::min());
			break;

		case 32:
			DECLARE_TYPE_TEMPLATE(Uint, 1, 0,
			                      std::numeric_limits<uint32_t>::max(),
			                      std::numeric_limits<uint32_t>::min());
			break;

		case 64:
			DECLARE_TYPE_TEMPLATE(Uint64, 1, 0,
			                      std::numeric_limits<uint64_t>::max(),
			                      std::numeric_limits<uint64_t>::min());
			break;

		default:
			return 0;
		}
	}

	auto *replace_op = impl.allocate(spv::OpSelect, impl.get_type_id(instruction->getOperand(1)->getType()));
	replace_op->add_id(is_helper_lane->id);
	replace_op->add_id(replacement_value);
	replace_op->add_id(input_value);
	impl.add(replace_op);
	return replace_op->id;
}

static spv::Id build_mask_reduction_input_bitwise(Converter::Impl &impl, const llvm::CallInst *instruction,
                                                  spv::Id input_value, DXIL::WaveBitOpKind op_kind)
{
	auto &builder = impl.builder();

	// For bitwise cases, we can just replace the input with a sentinel value
	// if we're a helper lane.
	auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
	impl.add(is_helper_lane);

	spv::Id replacement_value;
	switch (instruction->getOperand(1)->getType()->getIntegerBitWidth())
	{
	case 16:
		replacement_value = builder.makeUint16Constant(
		    op_kind == DXIL::WaveBitOpKind::And ? std::numeric_limits<uint16_t>::max() : 0u);
		break;

	case 32:
		replacement_value = builder.makeUintConstant(
			op_kind == DXIL::WaveBitOpKind::And ? std::numeric_limits<uint32_t>::max() : 0u);
		break;

	case 64:
		replacement_value = builder.makeUint64Constant(
			op_kind == DXIL::WaveBitOpKind::And ? std::numeric_limits<uint64_t>::max() : 0u);
		break;

	default:
		replacement_value = 0;
		break;
	}

	auto *replace_op = impl.allocate(spv::OpSelect, impl.get_type_id(instruction->getOperand(1)->getType()));
	replace_op->add_id(is_helper_lane->id);
	replace_op->add_id(replacement_value);
	replace_op->add_id(input_value);
	impl.add(replace_op);
	return replace_op->id;
}

static spv::Op select_opcode(const llvm::CallInst *instruction, spv::Op fp, spv::Op i)
{
	if (instruction->getType()->getTypeID() != llvm::Type::TypeID::IntegerTyID)
		return fp;
	else
		return i;
}

bool emit_wave_active_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	spv::Op opcode = spv::OpNop;

	uint32_t op_kind;
	if (!get_constant_operand(instruction, 2, &op_kind))
		return false;

	switch (DXIL::WaveOpKind(op_kind))
	{
	case DXIL::WaveOpKind::Sum:
		opcode = select_opcode(instruction, spv::OpGroupNonUniformFAdd, spv::OpGroupNonUniformIAdd,
		                       spv::OpGroupNonUniformIAdd);
		break;

	case DXIL::WaveOpKind::Product:
		opcode = select_opcode(instruction, spv::OpGroupNonUniformFMul, spv::OpGroupNonUniformIMul,
		                       spv::OpGroupNonUniformIMul);
		break;

	case DXIL::WaveOpKind::Min:
		opcode = select_opcode(instruction, spv::OpGroupNonUniformFMin, spv::OpGroupNonUniformSMin,
		                       spv::OpGroupNonUniformUMin);
		break;

	case DXIL::WaveOpKind::Max:
		opcode = select_opcode(instruction, spv::OpGroupNonUniformFMax, spv::OpGroupNonUniformSMax,
		                       spv::OpGroupNonUniformUMax);
		break;
	}

	auto *op = impl.allocate(opcode, instruction);
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	op->add_literal(spv::GroupOperationReduce);

	spv::Id input_value = impl.get_id_for_value(instruction->getOperand(1));
	if (wave_op_needs_helper_lane_masking(impl))
		input_value = build_mask_reduction_input_arith(impl, instruction, input_value, DXIL::WaveOpKind(op_kind));

	op->add_id(input_value);

	impl.add(op);

	builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);

	if (impl.execution_model == spv::ExecutionModelGLCompute ||
	    impl.execution_model == spv::ExecutionModelMeshEXT ||
	    impl.execution_model == spv::ExecutionModelTaskEXT)
	{
		if (impl.execution_mode_meta.workgroup_threads[0] == 32 &&
		    impl.execution_mode_meta.workgroup_threads[1] == 1 &&
		    impl.execution_mode_meta.workgroup_threads[2] == 1 &&
		    value_is_local_invocation_index_dependent(instruction->getOperand(1)))
		{
			// Intel workaround. Some shaders may simply assume that wave16 doesn't exist.
			// The heuristic is to check if the broadcast depends on LocalInvocationID.
			// On wave32, LocalInvocationID is vaguely equal to SubgroupInvocationID,
			// but on Wave16, that assumption does not hold, so a shader doing that looks sus.
			impl.suggest_minimum_wave_size(32);
		}
	}

	return true;
}

bool emit_wave_prefix_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	spv::Op opcode;

	uint32_t op_kind;
	if (!get_constant_operand(instruction, 2, &op_kind))
		return false;

	switch (static_cast<DXIL::WaveOpKind>(op_kind))
	{
	case DXIL::WaveOpKind::Sum:
		opcode = select_opcode(instruction, spv::OpGroupNonUniformFAdd, spv::OpGroupNonUniformIAdd);
		break;

	case DXIL::WaveOpKind::Product:
		opcode = select_opcode(instruction, spv::OpGroupNonUniformFMul, spv::OpGroupNonUniformIMul);
		break;

	default:
		return false;
	}

	auto *op = impl.allocate(opcode, instruction);
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	op->add_literal(spv::GroupOperationExclusiveScan);

	spv::Id input_value = impl.get_id_for_value(instruction->getOperand(1));
	if (wave_op_needs_helper_lane_masking(impl))
		input_value = build_mask_reduction_input_arith(impl, instruction, input_value, DXIL::WaveOpKind(op_kind));

	op->add_id(input_value);

	impl.add(op);

	builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);
	return true;
}

bool emit_wave_multi_prefix_count_bits_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	// There is no NV equivalent for this one.

	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::WaveMultiPrefixCountBits);
	auto *op = impl.allocate(spv::OpFunctionCall, instruction);
	op->add_id(call_id);

	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

	spv::Id ballot[4];
	for (unsigned i = 0; i < 4; i++)
		ballot[i] = impl.get_id_for_value(instruction->getOperand(2 + i));
	op->add_id(impl.build_vector(builder.makeUintType(32), ballot, 4));

	if (!impl.options.nv_subgroup_partition_enabled &&
	    wave_op_needs_helper_lane_masking(impl))
	{
		auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
		impl.add(is_helper_lane);
		op->add_id(is_helper_lane->id);
	}

	impl.add(op);

	return true;
}

static spv::Id emit_masked_ballot(Converter::Impl &impl, spv::Id value)
{
	auto &builder = impl.builder();

	spv::Id uvec4_type = builder.makeVectorType(builder.makeUintType(32), 4);
	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	auto *ballot_op = impl.allocate(spv::OpGroupNonUniformBallot, uvec4_type);
	ballot_op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));

	spv::Id helper_ballot = 0;
	spv::Id is_helper_id = 0;
	spv::Id ballot_id;

	if (wave_op_needs_helper_lane_masking(impl))
	{
		auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, builder.makeBoolType());
		impl.add(is_helper_lane);
		is_helper_id = is_helper_lane->id;

		auto *helper_op = impl.allocate(spv::OpGroupNonUniformBallot,
		                                builder.makeVectorType(builder.makeUintType(32), 4));
		helper_op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		helper_op->add_id(is_helper_id);
		impl.add(helper_op);
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
		helper_ballot = helper_op->id;

		auto *not_op = impl.allocate(spv::OpLogicalNot, builder.makeBoolType());
		not_op->add_id(is_helper_id);
		impl.add(not_op);

		ballot_id = not_op->id;
	}
	else
	{
		ballot_id = builder.makeBoolConstant(true);
	}

	ballot_op->add_id(ballot_id);
	impl.add(ballot_op);

	auto *and_op = impl.allocate(spv::OpBitwiseAnd, uvec4_type);
	and_op->add_id(value);
	and_op->add_id(ballot_op->id);
	impl.add(and_op);

	if (wave_op_needs_helper_lane_masking(impl))
	{
		// Helper lanes may participate, and they will need a valid partition mask, since the SPV_NV extension
		// requires this.
		const spv::Id is_helper_ids[] = { is_helper_id, is_helper_id, is_helper_id, is_helper_id };
		spv::Id bvec_splat_id = impl.build_vector(builder.makeBoolType(), is_helper_ids, 4);

		auto *ballot_select = impl.allocate(spv::OpSelect, builder.makeVectorType(builder.makeUintType(32), 4));
		ballot_select->add_id(bvec_splat_id);
		ballot_select->add_id(helper_ballot);
		ballot_select->add_id(and_op->id);
		impl.add(ballot_select);

		return ballot_select->id;
	}
	else
	{
		return and_op->id;
	}
}

bool emit_wave_multi_prefix_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	uint32_t op_kind;
	if (!get_constant_operand(instruction, 6, &op_kind))
		return false;

	HelperCall helper_call;
	spv::Op partitioned_op;
	bool fp = !instruction->getOperand(1)->getType()->isIntegerTy();

	switch (static_cast<DXIL::WaveMultiPrefixOpKind>(op_kind))
	{
	case DXIL::WaveMultiPrefixOpKind::Sum:
		helper_call = fp ? HelperCall::WaveMultiPrefixFAdd : HelperCall::WaveMultiPrefixIAdd;
		partitioned_op = fp ? spv::OpGroupNonUniformFAdd : spv::OpGroupNonUniformIAdd;
		break;

	case DXIL::WaveMultiPrefixOpKind::Product:
		helper_call = fp ? HelperCall::WaveMultiPrefixFMul : HelperCall::WaveMultiPrefixIMul;
		partitioned_op = fp ? spv::OpGroupNonUniformFMul : spv::OpGroupNonUniformIMul;
		break;

	case DXIL::WaveMultiPrefixOpKind::And:
		helper_call = HelperCall::WaveMultiPrefixBitAnd;
		partitioned_op = spv::OpGroupNonUniformBitwiseAnd;
		break;

	case DXIL::WaveMultiPrefixOpKind::Or:
		helper_call = HelperCall::WaveMultiPrefixBitOr;
		partitioned_op = spv::OpGroupNonUniformBitwiseOr;
		break;

	case DXIL::WaveMultiPrefixOpKind::Xor:
		helper_call = HelperCall::WaveMultiPrefixBitXor;
		partitioned_op = spv::OpGroupNonUniformBitwiseXor;
		break;

	default:
		return false;
	}

	spv::Id type_id = impl.get_type_id(instruction->getOperand(1)->getType());

	Operation *op;

	if (impl.options.nv_subgroup_partition_enabled)
	{
		builder.addExtension("SPV_NV_shader_subgroup_partitioned");
		builder.addCapability(spv::CapabilityGroupNonUniformPartitionedNV);
		op = impl.allocate(partitioned_op, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_literal(spv::GroupOperationPartitionedExclusiveScanNV);
	}
	else
	{
		spv::Id call_id = impl.spirv_module.get_helper_call_id(helper_call, type_id);
		op = impl.allocate(spv::OpFunctionCall, instruction);
		op->add_id(call_id);
	}

	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

	spv::Id ballot[4];
	for (unsigned i = 0; i < 4; i++)
		ballot[i] = impl.get_id_for_value(instruction->getOperand(2 + i));

	spv::Id ballot_value = impl.build_vector(builder.makeUintType(32), ballot, 4);

	// Have to explicitly ignore inactive lanes here to make sure that lanes in the same partition have equal
	// partition values.
	if (impl.options.nv_subgroup_partition_enabled)
		ballot_value = emit_masked_ballot(impl, ballot_value);

	op->add_id(ballot_value);

	if (!impl.options.nv_subgroup_partition_enabled &&
	    wave_op_needs_helper_lane_masking(impl))
	{
		auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
		impl.add(is_helper_lane);
		op->add_id(is_helper_lane->id);
	}

	impl.add(op);

	return true;
}

bool emit_wave_active_bit_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	spv::Op opcode = spv::OpNop;

	uint32_t op_kind;
	if (!get_constant_operand(instruction, 2, &op_kind))
		return false;

	switch (static_cast<DXIL::WaveBitOpKind>(op_kind))
	{
	case DXIL::WaveBitOpKind::And:
		opcode = spv::OpGroupNonUniformBitwiseAnd;
		break;

	case DXIL::WaveBitOpKind::Or:
		opcode = spv::OpGroupNonUniformBitwiseOr;
		break;

	case DXIL::WaveBitOpKind::Xor:
		opcode = spv::OpGroupNonUniformBitwiseXor;
		break;
	}

	auto *op = impl.allocate(opcode, instruction);
	op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
	op->add_literal(spv::GroupOperationReduce);

	spv::Id input_value = impl.get_id_for_value(instruction->getOperand(1));
	if (wave_op_needs_helper_lane_masking(impl))
		input_value = build_mask_reduction_input_bitwise(impl, instruction, input_value, DXIL::WaveBitOpKind(op_kind));

	op->add_id(input_value);

	impl.add(op);

	builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);
	return true;
}

bool emit_wave_quad_vote_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	uint32_t vote_kind;
	if (!get_constant_operand(instruction, 2, &vote_kind))
		return false;

	emit_expect_assume_quad_uniform(impl);

	if (impl.options.supports_quad_control)
	{
		auto &builder = impl.builder();
		builder.addExtension("SPV_KHR_quad_control");
		builder.addCapability(spv::CapabilityQuadControlKHR);

		spv::Op opcode = vote_kind ? spv::OpGroupNonUniformQuadAllKHR : spv::OpGroupNonUniformQuadAnyKHR;

		auto *op = impl.allocate(opcode, instruction);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(op);
	}
	else
	{
		// All lanes must be active according to SM 6.7 (and backed up by tests), so quadBroadcast
		// pattern is valid and can be optimized by compilers into the intrinsic short form.
		spv::Id call_id = impl.spirv_module.get_helper_call_id(vote_kind ? HelperCall::QuadAll : HelperCall::QuadAny);

		auto *op = impl.allocate(spv::OpFunctionCall, instruction);
		op->add_id(call_id);
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(op);
	}

	if (impl.execution_model == spv::ExecutionModelFragment)
		impl.execution_mode_meta.needs_quad_derivatives = true;

	return true;
}

bool emit_wave_quad_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	if (impl.execution_mode_meta.synthesize_dummy_derivatives)
	{
		impl.rewrite_value(instruction, impl.get_id_for_value(instruction->getOperand(1)));
		return true;
	}

	emit_expect_assume_quad_uniform(impl);

	// Matches with SPIR-V.
	uint32_t swap_kind;
	if (!get_constant_operand(instruction, 2, &swap_kind))
		return false;

	Operation *op;
	if (execution_model_can_quad_op(impl.execution_model))
	{
		op = impl.allocate(spv::OpGroupNonUniformQuadSwap, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		op->add_id(builder.makeUintConstant(swap_kind));
		builder.addCapability(spv::CapabilityGroupNonUniformQuad);
	}
	else
	{
		// Use ShuffleXor for non-fragment stages.
		op = impl.allocate(spv::OpGroupNonUniformShuffleXor, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

		// 1: Horizontal, 2: Vertical, 3: Diagonal.
		op->add_id(builder.makeUintConstant(swap_kind + 1));
		builder.addCapability(spv::CapabilityGroupNonUniformShuffle);
	}

	impl.add(op);
	return true;
}

static spv::Id emit_current_ballot_value(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	builder.addCapability(spv::CapabilityGroupNonUniformBallot);

	// Very stupid workaround. Check if the lane is active, and mask in 0 if not.
	if (!impl.memoized.current_ballot_value_id)
	{
		auto *ballot = impl.allocate(
			spv::OpGroupNonUniformBallot, builder.makeVectorType(builder.makeUintType(32), 4));
		ballot->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		ballot->add_id(builder.makeBoolConstant(true));
		impl.add(ballot);
		impl.memoized.current_ballot_value_id = ballot->id;
	}

	return impl.memoized.current_ballot_value_id;
}

static spv::Id emit_current_subgroup_quad_index(Converter::Impl &impl)
{
	auto &builder = impl.builder();

	if (!impl.memoized.current_subgroup_quad_index_id)
	{
		spv::Id var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSubgroupLocalInvocationId);
		auto *load_op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
		load_op->add_id(var_id);
		impl.add(load_op);

		auto *mask = impl.allocate(spv::OpBitwiseAnd, builder.makeUintType(32));
		mask->add_id(load_op->id);
		mask->add_id(builder.makeUintConstant(~3u));
		impl.add(mask);

		impl.memoized.current_subgroup_quad_index_id = mask->id;
	}

	return impl.memoized.current_subgroup_quad_index_id;
}

spv::Id emit_current_quad_lane_active(Converter::Impl &impl, uint32_t quad_lane)
{
	auto &builder = impl.builder();

	if (!impl.memoized.current_quad_lane_active_id[quad_lane])
	{
		auto *add = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
		add->add_id(emit_current_subgroup_quad_index(impl));
		add->add_id(builder.makeUintConstant(quad_lane));
		impl.add(add);

		auto *extract = impl.allocate(spv::OpGroupNonUniformBallotBitExtract, builder.makeBoolType());
		extract->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		extract->add_id(emit_current_ballot_value(impl));
		extract->add_id(add->id);
		impl.add(extract);

		impl.memoized.current_quad_lane_active_id[quad_lane] = extract->id;
	}

	return impl.memoized.current_quad_lane_active_id[quad_lane];
}

bool emit_wave_quad_read_lane_at_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();

	if (impl.execution_mode_meta.synthesize_dummy_derivatives)
	{
		impl.rewrite_value(instruction, impl.get_id_for_value(instruction->getOperand(1)));
		return true;
	}

	emit_expect_assume_quad_uniform(impl);

	auto *lane = instruction->getOperand(2);

	Operation *op;
	if (execution_model_can_quad_op(impl.execution_model) && llvm::isa<llvm::ConstantInt>(lane))
	{
		op = impl.allocate(spv::OpGroupNonUniformQuadBroadcast, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		op->add_id(impl.get_id_for_value(lane));
		impl.add(op);

		builder.addCapability(spv::CapabilityGroupNonUniformQuad);

		if (impl.options.quirks.robust_compute_quad_broadcast && impl.execution_model == spv::ExecutionModelGLCompute)
		{
			// Very stupid workaround. Check if the lane is active, and mask in 0 if not.
			uint32_t quad_lane = 0;
			if (!get_constant_int(lane, &quad_lane) || quad_lane >= 4)
				return false;

			auto *sel = impl.allocate(spv::OpSelect, impl.get_type_id(instruction->getType()));
			sel->add_id(emit_current_quad_lane_active(impl, quad_lane));
			sel->add_id(op->id);
			sel->add_id(builder.makeNullConstant(impl.get_type_id(instruction->getType())));
			impl.add(sel);

			impl.rewrite_value(instruction, sel->id);
		}
	}
	else
	{
		// Have to emulate this with Shuffle.
		spv::Id local_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInSubgroupLocalInvocationId);
		{
			Operation *load_op = impl.allocate(spv::OpLoad, builder.makeUintType(32));
			load_op->add_id(local_id);
			add_vkmm_volatile(impl, load_op, spv::BuiltInSubgroupLocalInvocationId);
			impl.add(load_op);
			local_id = load_op->id;
		}

		{
			Operation *mask_op = impl.allocate(spv::OpBitwiseAnd, builder.makeUintType(32));
			mask_op->add_id(local_id);
			mask_op->add_id(builder.makeUintConstant(~3u));
			impl.add(mask_op);
			local_id = mask_op->id;
		}

		{
			Operation *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
			add_op->add_id(local_id);
			add_op->add_id(impl.get_id_for_value(lane));
			impl.add(add_op);
			local_id = add_op->id;
		}

		// Use Shuffle for non-fragment stages.
		op = impl.allocate(spv::OpGroupNonUniformShuffle, instruction);
		op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
		op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		op->add_id(local_id);

		builder.addCapability(spv::CapabilityGroupNonUniformShuffle);

		// For shuffle, wave64 is particularly slow on RDNA, so suggest wave32.
		impl.suggest_maximum_wave_size(32);
		impl.add(op);
	}

	impl.shader_analysis.require_subgroup_shuffles = true;
	return true;
}

bool emit_wave_match_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto &builder = impl.builder();
	spv::Id type_id = impl.get_type_id(instruction->getOperand(1)->getType());
	spv::Id value_id = impl.get_id_for_value(instruction->getOperand(1));

	// It's not safe to use FOrdEqual since a loop with NaN will never compare equal to BroadcastFirst().
	// Make sure we compare equal with uint.
	spv::Op cast_op = spv::OpNop;
	switch (instruction->getOperand(1)->getType()->getTypeID())
	{
	case llvm::Type::TypeID::HalfTyID:
		type_id = builder.makeUintType(impl.support_native_fp16_operations() ? 16 : 32);
		cast_op = spv::OpBitcast;
		break;

	case llvm::Type::TypeID::FloatTyID:
		type_id = builder.makeUintType(32);
		cast_op = spv::OpBitcast;
		break;

	case llvm::Type::TypeID::DoubleTyID:
		type_id = builder.makeUintType(64);
		cast_op = spv::OpBitcast;
		break;

	default:
		break;
	}

	if (cast_op != spv::OpNop)
	{
		auto *bitcast_op = impl.allocate(cast_op, type_id);
		bitcast_op->add_id(value_id);
		value_id = bitcast_op->id;
		impl.add(bitcast_op);
	}

	if (impl.options.nv_subgroup_partition_enabled)
	{
		builder.addExtension("SPV_NV_shader_subgroup_partitioned");
		builder.addCapability(spv::CapabilityGroupNonUniformPartitionedNV);
		auto *op = impl.allocate(spv::OpGroupNonUniformPartitionNV, instruction,
		                         builder.makeVectorType(builder.makeUintType(32), 4));
		op->add_id(value_id);
		impl.add(op);

		if (wave_op_needs_helper_lane_masking(impl))
		{
			auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, builder.makeBoolType());
			impl.add(is_helper_lane);

			auto *not_op = impl.allocate(spv::OpLogicalNot, builder.makeBoolType());
			not_op->add_id(is_helper_lane->id);
			impl.add(not_op);

			auto *non_helper_op = impl.allocate(spv::OpGroupNonUniformBallot,
			                                    builder.makeVectorType(builder.makeUintType(32), 4));
			non_helper_op->add_id(builder.makeUintConstant(spv::ScopeSubgroup));
			non_helper_op->add_id(not_op->id);
			impl.add(non_helper_op);
			builder.addCapability(spv::CapabilityGroupNonUniformBallot);

			auto *and_op = impl.allocate(spv::OpBitwiseAnd, builder.makeVectorType(builder.makeUintType(32), 4));
			and_op->add_id(op->id);
			and_op->add_id(non_helper_op->id);
			impl.add(and_op);

			impl.rewrite_value(instruction, and_op->id);
		}
	}
	else
	{
		spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::WaveMatch, type_id);
		auto *call_op =
		    impl.allocate(spv::OpFunctionCall, instruction, builder.makeVectorType(builder.makeUintType(32), 4));
		call_op->add_id(call_id);
		call_op->add_id(value_id);

		if (wave_op_needs_helper_lane_masking(impl))
		{
			auto *is_helper_lane = impl.allocate(spv::OpIsHelperInvocationEXT, impl.builder().makeBoolType());
			impl.add(is_helper_lane);
			call_op->add_id(is_helper_lane->id);
		}

		impl.add(call_op);
	}

	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_waveops.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_wave_is_first_lane_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_builtin_instruction(spv::BuiltIn builtin, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_boolean_instruction(spv::Op opcode, Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_ballot_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_read_lane_first_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_read_lane_at_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

bool emit_wave_bit_count_instruction(spv::GroupOperation operation, Converter::Impl &impl,
                                     const llvm::CallInst *instruction);

bool emit_wave_active_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_active_bit_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_prefix_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_multi_prefix_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_multi_prefix_count_bits_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_quad_op_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_quad_vote_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_quad_read_lane_at_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_wave_match_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);

template <spv::GroupOperation operation>
static inline bool emit_wave_bit_count_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_wave_bit_count_instruction(operation, impl, instruction);
}

template <spv::Op opcode>
static inline bool emit_wave_boolean_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_wave_boolean_instruction(opcode, impl, instruction);
}

template <spv::BuiltIn builtin>
static inline bool emit_wave_builtin_dispatch(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	return emit_wave_builtin_instruction(builtin, impl, instruction);
}

bool value_is_statically_wave_uniform(Converter::Impl &impl, const llvm::Value *value);
bool value_is_likely_non_uniform(Converter::Impl &impl, const llvm::Value *value);

spv::Id emit_current_quad_lane_active(Converter::Impl &impl, uint32_t quad_lane);
} // namespace dxil_spv


================================================
FILE: opcodes/dxil/dxil_workgraph.cpp
================================================
/* Copyright (c) 2024 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "dxil_workgraph.hpp"
#include "dxil_common.hpp"
#include "dxil_waveops.hpp"
#include "opcodes/converter_impl.hpp"
#include "spirv_module.hpp"
#include "node.hpp"

namespace dxil_spv
{
uint32_t get_node_io_from_annotate_handle(const llvm::CallInst *inst)
{
	auto *type_operand = llvm::cast<llvm::ConstantAggregate>(inst->getOperand(2));
	uint32_t node_io_flags = llvm::cast<llvm::ConstantInt>(type_operand->getOperand(0))->getUniqueInteger().getZExtValue();
	return node_io_flags;
}

static uint32_t get_node_stride_from_annotate_handle(const llvm::CallInst *inst)
{
	// This is a red herring, since the flags are wrong. Gotta keep digging ... >_<
	if (value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::IndexNodeHandle))
	{
		auto *index_handle = llvm::cast<llvm::CallInst>(inst->getOperand(1));
		inst = llvm::cast<llvm::CallInst>(index_handle->getOperand(1));
	}

	auto *type_operand = llvm::cast<llvm::ConstantAggregate>(inst->getOperand(2));
	uint32_t node_io_flags = llvm::cast<llvm::ConstantInt>(type_operand->getOperand(0))->getUniqueInteger().getZExtValue();
	uint32_t stride = llvm::cast<llvm::ConstantInt>(type_operand->getOperand(1))->getUniqueInteger().getZExtValue();

	if ((node_io_flags & DXIL::NodeIOTrackRWInputSharingBit) != 0)
	{
		stride = (stride + 3u) & ~3u;
		stride += 4;
	}

	return stride;
}

static uint32_t get_node_meta_index_from_annotate_handle(const llvm::CallInst *inst)
{
	// Crawl through the SSA noise until we find CreateNodeOutputHandle.
	while (!value_is_dx_op_instrinsic(inst, DXIL::Op::CreateNodeOutputHandle))
	{
		if (value_is_dx_op_instrinsic(inst, DXIL::Op::AnnotateNodeRecordHandle) ||
		    value_is_dx_op_instrinsic(inst, DXIL::Op::AnnotateNodeHandle) ||
		    value_is_dx_op_instrinsic(inst, DXIL::Op::IndexNodeHandle) ||
		    value_is_dx_op_instrinsic(inst, DXIL::Op::AllocateNodeOutputRecords))
		{
			inst = llvm::cast<llvm::CallInst>(inst->getOperand(1));
		}
	}

	uint32_t meta_index = UINT32_MAX;
	if (value_is_dx_op_instrinsic(inst, DXIL::Op::CreateNodeOutputHandle))
		get_constant_operand(inst, 1, &meta_index);

	return meta_index;
}

spv::Id emit_load_node_input_push_parameter(
	Converter::Impl &impl, NodeInputParameter param, spv::Id type)
{
	auto *access_offset_point = impl.allocate(spv::OpAccessChain, impl.builder().makePointer(
		spv::StorageClassPushConstant, type));
	access_offset_point->add_id(impl.node_input.node_dispatch_push_id);
	access_offset_point->add_id(impl.builder().makeUintConstant(param));
	impl.add(access_offset_point);

	auto *load_offset_point = impl.allocate(spv::OpLoad, type);
	load_offset_point->add_id(access_offset_point->id);
	impl.add(load_offset_point);

	return load_offset_point->id;
}

static spv::Id emit_load_node_input_push_pointer(
	Converter::Impl &impl, spv::Id ptr_id, spv::Id type_id, uint32_t alignment)
{
	auto *access_chain = impl.allocate(spv::OpAccessChain, impl.builder().makePointer(
		spv::StorageClassPhysicalStorageBuffer, type_id));
	access_chain->add_id(ptr_id);
	access_chain->add_id(impl.builder().makeUintConstant(0));
	impl.add(access_chain);

	auto *load_op = impl.allocate(spv::OpLoad, type_id);
	load_op->add_id(access_chain->id);
	load_op->add_literal(spv::MemoryAccessAlignedMask);
	load_op->add_literal(alignment);
	impl.add(load_op);

	return load_op->id;
}

static spv::Id emit_build_output_payload_offset(
    Converter::Impl &impl, const llvm::Value *base_offset, const llvm::Value *index, uint32_t stride)
{
	auto &builder = impl.builder();
	auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
	mul_op->add_id(impl.get_id_for_value(index));
	mul_op->add_id(builder.makeUintConstant(stride));
	impl.add(mul_op);

	auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(32));
	add_op->add_id(impl.get_id_for_value(base_offset));
	add_op->add_id(mul_op->id);
	impl.add(add_op);

	return add_op->id;
}

static spv::Id emit_build_input_payload_offset(Converter::Impl &impl, const llvm::Value *value)
{
	// There is no extra offset in broadcast / thread. We've already resolved it.
	if (impl.node_input.launch_type != DXIL::NodeLaunchType::Coalescing)
		return 0;

	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);

	// Compute the effective offset.
	auto *load_op = impl.allocate(spv::OpLoad, u32_type);
	load_op->add_id(impl.node_input.private_coalesce_offset_id);
	impl.add(load_op);

	auto *add_op = impl.allocate(spv::OpIAdd, u32_type);
	add_op->add_id(load_op->id);
	add_op->add_id(impl.get_id_for_value(value));
	impl.add(add_op);

	const spv::Id aux_ids[] = {
		impl.node_input.is_entry_point_id,
		impl.node_input.private_stride_var_id,
		impl.node_input.u32_array_ptr_type_id,
	};

	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::NodeCoalescePayloadOffset,
	                                                       aux_ids, sizeof(aux_ids) / sizeof(aux_ids[0]));

	spv::Id ptr_offset_id =
		emit_load_node_input_push_parameter(impl, NodePayloadStrideOrOffsetsBDA, builder.makeVectorType(u32_type, 2));

	auto *call = impl.allocate(spv::OpFunctionCall, u32_type);
	call->add_id(call_id);
	call->add_id(add_op->id);
	call->add_id(ptr_offset_id);
	impl.add(call);

	auto *conv_op = impl.allocate(spv::OpUConvert, builder.makeUintType(64));
	conv_op->add_id(call->id);
	impl.add(conv_op);
	return conv_op->id;
}

bool emit_allocate_node_output_records(Converter::Impl &impl, const llvm::CallInst *inst)
{
	auto &builder = impl.builder();
	uint32_t is_per_thread = 0;
	if (!get_constant_operand(inst, 3, &is_per_thread))
		return false;

	if (!value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::AnnotateNodeHandle))
		return false;

	uint32_t stride = get_node_stride_from_annotate_handle(llvm::cast<llvm::CallInst>(inst->getOperand(1)));
	spv::Id node_index = impl.get_id_for_value(inst->getOperand(1));
	spv::Id alloc_count = impl.get_id_for_value(inst->getOperand(2));

	HelperCall required_call;
	if (is_per_thread)
	{
		if (value_is_statically_wave_uniform(impl, inst->getOperand(1)))
			required_call = HelperCall::AllocateThreadNodeRecords;
		else
			required_call = HelperCall::AllocateThreadNodeRecordsWaterfall;
	}
	else
		required_call = HelperCall::AllocateGroupNodeRecords;

	spv::Id call_id = impl.spirv_module.get_helper_call_id(required_call);

	auto *call_op = impl.allocate(spv::OpFunctionCall, inst, builder.makeUintType(32));
	call_op->add_id(call_id);
	spv::Id atomic_addr = emit_load_node_input_push_parameter(impl, NodePayloadOutputAtomicBDA, builder.makeUintType(64));
	call_op->add_id(atomic_addr);
	call_op->add_id(node_index);
	call_op->add_id(alloc_count);
	call_op->add_id(builder.makeUintConstant(stride));
	call_op->add_id(emit_load_node_input_push_parameter(impl, NodePayloadOutputOffset, builder.makeUintType(32)));
	impl.add(call_op);

	return true;
}

bool emit_get_node_record_ptr(Converter::Impl &impl, const llvm::CallInst *inst)
{
	auto &builder = impl.builder();
	if (!value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::AnnotateNodeRecordHandle))
		return false;

	auto *annotation = llvm::cast<llvm::CallInst>(inst->getOperand(1));
	auto *type_operand = llvm::cast<llvm::ConstantAggregate>(annotation->getOperand(2));

	uint32_t node_io_flags =
	    llvm::cast<llvm::ConstantInt>(type_operand->getOperand(0))->getUniqueInteger().getZExtValue();

	Converter::Impl::TypeLayoutFlags flags = Converter::Impl::TYPE_LAYOUT_PHYSICAL_BIT;

	if ((node_io_flags & DXIL::NodeIOReadWriteBit) == 0)
		flags |= Converter::Impl::TYPE_LAYOUT_READ_ONLY_BIT;
	// We might have to promote this to coherent if device-scope barrier is used just like normal UAVs.
	if ((node_io_flags & DXIL::NodeIOGloballyCoherentBit) != 0)
		flags |= Converter::Impl::TYPE_LAYOUT_COHERENT_BIT;

	if (impl.execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
	{
		if ((node_io_flags & DXIL::NodeIOInputBit) != 0 && impl.shader_analysis.require_node_input_group_coherence)
			flags |= Converter::Impl::TYPE_LAYOUT_COHERENT_BIT;
		else if ((node_io_flags & DXIL::NodeIOOutputBit) != 0 && impl.shader_analysis.require_node_output_group_coherence)
			flags |= Converter::Impl::TYPE_LAYOUT_COHERENT_BIT;
	}

	spv::Id physical_block_type_id =
	    impl.get_type_id(inst->getType(), Converter::Impl::TYPE_LAYOUT_BLOCK_BIT | flags);
	spv::Id physical_type_id = impl.get_type_id(inst->getType(), Converter::Impl::TYPE_LAYOUT_PHYSICAL_BIT);

	spv::Id addr;

	if (value_is_dx_op_instrinsic(annotation->getOperand(1), DXIL::Op::AllocateNodeOutputRecords))
	{
		spv::Id base_addr = emit_load_node_input_push_parameter(impl, NodePayloadOutputBDA, builder.makeUintType(64));

		// The NodeIOTracking flags only exist on the annotation for AllocateNodeOutputRecords for some reason ...
		// Probably a DXC bug, but oh well.
		auto *alloc_node_outputs = llvm::cast<llvm::CallInst>(annotation->getOperand(1));
		auto *real_annotation = llvm::cast<llvm::CallInst>(alloc_node_outputs->getOperand(1));

		spv::Id offset_id = emit_build_output_payload_offset(impl, annotation->getOperand(1), inst->getOperand(2),
		                                                     get_node_stride_from_annotate_handle(real_annotation));

		auto *cast_op = impl.allocate(spv::OpUConvert, builder.makeUintType(64));
		cast_op->add_id(offset_id);
		impl.add(cast_op);

		auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(64));
		add_op->add_id(base_addr);
		add_op->add_id(cast_op->id);
		impl.add(add_op);

		addr = add_op->id;
	}
	else
	{
		auto *load_op = impl.allocate(spv::OpLoad, builder.makeUintType(64));
		load_op->add_id(impl.node_input.private_bda_var_id);
		impl.add(load_op);
		addr = load_op->id;

		spv::Id addr_offset_id = emit_build_input_payload_offset(impl, inst->getOperand(2));

		if (addr_offset_id != 0)
		{
			auto *add_op = impl.allocate(spv::OpIAdd, builder.makeUintType(64));
			add_op->add_id(addr);
			add_op->add_id(addr_offset_id);
			impl.add(add_op);
			addr = add_op->id;
		}
	}

	auto *cast_op = impl.allocate(spv::OpConvertUToPtr, physical_block_type_id);
	cast_op->add_id(addr);
	impl.add(cast_op);

	// Start the chain at the first member.
	// This way we get coherency / read-only to propagate properly.
	auto *chain_op = impl.allocate(spv::OpAccessChain, inst, physical_type_id);
	chain_op->add_id(cast_op->id);
	chain_op->add_id(builder.makeUintConstant(0));
	impl.add(chain_op);

	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan &&
	    (flags & Converter::Impl::TYPE_LAYOUT_COHERENT_BIT) != 0)
	{
		// Need to propgate this information down.
		impl.llvm_vkmm_coherent_ptrs.push_back(inst);
	}

	return true;
}

bool emit_increment_output_count(Converter::Impl &impl, const llvm::CallInst *inst)
{
	auto &builder = impl.builder();

	uint32_t is_per_thread = 0;
	if (!get_constant_operand(inst, 3, &is_per_thread))
		return false;

	if (!value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::AnnotateNodeHandle))
		return false;

	spv::Id call_id = impl.spirv_module.get_helper_call_id(
	    is_per_thread ? HelperCall::ThreadIncrementOutputCount : HelperCall::GroupIncrementOutputCount);

	spv::Id atomic_addr = emit_load_node_input_push_parameter(impl, NodePayloadOutputAtomicBDA, builder.makeUintType(64));
	spv::Id node_index = impl.get_id_for_value(inst->getOperand(1));
	spv::Id alloc_count = impl.get_id_for_value(inst->getOperand(2));

	auto *call_op = impl.allocate(spv::OpFunctionCall, builder.makeVoidType());
	call_op->add_id(call_id);
	call_op->add_id(atomic_addr);
	call_op->add_id(node_index);
	call_op->add_id(alloc_count);
	impl.add(call_op);

	return true;
}

bool emit_output_complete(Converter::Impl &impl, const llvm::CallInst *inst)
{
	return true;
}

bool emit_get_input_record_count(Converter::Impl &impl, const llvm::CallInst *inst)
{
	spv::Id u32_type = impl.builder().makeUintType(32);
	auto *load_op = impl.allocate(spv::OpLoad, inst, u32_type);
	load_op->add_id(impl.node_input.private_coalesce_count_id);
	impl.add(load_op);
	return true;
}

bool emit_finished_cross_group_sharing(Converter::Impl &impl, const llvm::CallInst *inst)
{
	spv::Id call_id = impl.spirv_module.get_helper_call_id(HelperCall::FinishCrossGroupSharing);

	auto *load_op = impl.allocate(spv::OpLoad, impl.builder().makeUintType(64));
	load_op->add_id(impl.node_input.private_bda_var_id);
	impl.add(load_op);

	auto *add_op = impl.allocate(spv::OpIAdd, impl.builder().makeUintType(64));
	add_op->add_id(load_op->id);
	// Use the last 4 bytes of the node as the u32 cookie.
	// We don't have the node pointer type here, so this is the most convenient approach.
	add_op->add_id(impl.builder().makeUint64Constant(impl.node_input.payload_stride - 4));
	impl.add(add_op);

	auto *call_op = impl.allocate(spv::OpFunctionCall, inst, impl.builder().makeBoolType());
	call_op->add_id(call_id);
	call_op->add_id(add_op->id);
	impl.add(call_op);

	return true;
}

static bool emit_barrier(Converter::Impl &impl, uint32_t memory_flags, uint32_t semantic_flags)
{
	auto &builder = impl.builder();
	bool is_sync = (semantic_flags & DXIL::GroupSyncBit) != 0;
	auto *op = impl.allocate(is_sync ? spv::OpControlBarrier : spv::OpMemoryBarrier);
	if (is_sync)
		op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));

	bool needs_device_memory_scope = false;
	if ((semantic_flags & DXIL::DeviceScopeBit) != 0)
		needs_device_memory_scope = true;

	// We only ever need explicit Vis/Avail in workgroup scope for VkMM.
	// globallycoherent is handled per-operation,
	// and we only need acquire release to ensure ordering.
	spv::Scope memory_scope;
	if (needs_device_memory_scope && impl.execution_mode_meta.memory_model == spv::MemoryModelGLSL450)
		memory_scope = spv::ScopeDevice;
	else
		memory_scope = spv::ScopeWorkgroup;

	op->add_id(builder.makeUintConstant(memory_scope));

	uint32_t semantics = spv::MemorySemanticsAcquireReleaseMask;
	if ((memory_flags & (DXIL::MemoryTypeNodeInputBit | DXIL::MemoryTypeNodeOutputBit | DXIL::MemoryTypeUavBit)) != 0)
		semantics |= spv::MemorySemanticsUniformMemoryMask;
	if ((memory_flags & DXIL::MemoryTypeUavBit) != 0)
		semantics |= spv::MemorySemanticsImageMemoryMask;
	if ((memory_flags & DXIL::MemoryTypeGroupSharedBit) != 0)
		semantics |= spv::MemorySemanticsWorkgroupMemoryMask;

	if (impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan)
		semantics |= spv::MemorySemanticsMakeAvailableMask | spv::MemorySemanticsMakeVisibleMask;

	op->add_id(builder.makeUintConstant(semantics));
	impl.add(op);

	return true;
}

bool emit_barrier_by_memory_type(Converter::Impl &impl, const llvm::CallInst *inst)
{
	uint32_t memory_flags = 0;
	uint32_t semantics_flags = 0;
	if (!get_constant_operand(inst, 1, &memory_flags) ||
	    !get_constant_operand(inst, 2, &semantics_flags))
	{
		return false;
	}

	return emit_barrier(impl, memory_flags, semantics_flags);
}

bool emit_barrier_by_memory_handle(Converter::Impl &impl, const llvm::CallInst *inst)
{
	uint32_t semantics = 0;
	if (!get_constant_operand(inst, 2, &semantics))
		return false;

	if (value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::AnnotateHandle))
	{
		// Plain UAV.
		return emit_barrier(impl, DXIL::MemoryTypeUavBit, semantics);
	}
	else
		return false;
}

bool emit_barrier_by_node_record_handle(Converter::Impl &impl, const llvm::CallInst *inst)
{
	uint32_t semantics = 0;
	if (!get_constant_operand(inst, 2, &semantics))
		return false;

	if (value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::AnnotateNodeRecordHandle))
	{
		auto *annotation = llvm::cast<llvm::CallInst>(inst->getOperand(1));
		uint32_t node_io = get_node_io_from_annotate_handle(annotation);
		if ((node_io & DXIL::NodeIOInputBit) != 0)
			return emit_barrier(impl, DXIL::MemoryTypeNodeInputBit, semantics);
		else if ((node_io & DXIL::NodeIOOutputBit) != 0)
			return emit_barrier(impl, DXIL::MemoryTypeNodeOutputBit, semantics);
		else
			return false;
	}
	else
		return false;
}

bool emit_index_node_handle(Converter::Impl &impl, const llvm::CallInst *inst)
{
	// This is fairly easy, just add. This becomes the index into atomic array.
	auto &builder = impl.builder();
	auto *add_op = impl.allocate(spv::OpIAdd, inst, builder.makeUintType(32));
	add_op->add_id(impl.get_id_for_value(inst->getOperand(1)));
	add_op->add_id(impl.get_id_for_value(inst->getOperand(2)));
	impl.add(add_op);
	return true;
}

bool emit_annotate_node_handle(Converter::Impl &impl, const llvm::CallInst *inst)
{
	// Not sure why there are two separate opcodes for annotating a handle, but DXIL's gonna DXIL ...
	if (value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::IndexNodeHandle))
	{
		// Trivially forward the annotation.
		impl.rewrite_value(inst, impl.get_id_for_value(inst->getOperand(1)));
		return true;
	}

	if (!value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::CreateNodeOutputHandle))
		return false;

	auto *node_output = llvm::cast<llvm::CallInst>(inst->getOperand(1));
	uint32_t node_meta_index = 0;
	if (!get_constant_operand(node_output, 1, &node_meta_index))
		return false;

	if (node_meta_index >= impl.node_outputs.size())
		return false;

	auto &node_meta = impl.node_outputs[node_meta_index];
	impl.rewrite_value(inst, node_meta.spec_constant_node_index);
	return true;
}

bool emit_create_node_input_record_handle(Converter::Impl &, const llvm::CallInst *)
{
	// Node input index must be 0, since there's only one node input.
	// Do nothing here. We have to annotate the handle first, and there we can look at the node instruction.
	return true;
}

bool emit_create_node_output_handle(Converter::Impl &, const llvm::CallInst *)
{
	// Do nothing here. We have to annotate the handle first, and there we can look at the node instruction.
	return true;
}

bool emit_annotate_node_record_handle(Converter::Impl &, const llvm::CallInst *inst)
{
	return value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::CreateNodeInputRecordHandle) ||
	       value_is_dx_op_instrinsic(inst->getOperand(1), DXIL::Op::AllocateNodeOutputRecords);
}

bool emit_node_output_is_valid(Converter::Impl &impl, const llvm::CallInst *inst)
{
	auto &builder = impl.builder();

	uint32_t output_meta_index = get_node_meta_index_from_annotate_handle(llvm::cast<llvm::CallInst>(inst->getOperand(1)));
	if (output_meta_index >= impl.node_outputs.size())
		return false;

	auto &node_meta = impl.node_outputs[output_meta_index];
	if (node_meta.is_recursive)
	{
		spv::Id id = emit_load_node_input_push_parameter(impl, NodeRemainingRecursionLevels, builder.makeUintType(32));
		auto *eq_op = impl.allocate(spv::OpINotEqual, inst);
		eq_op->add_id(id);
		eq_op->add_id(builder.makeUintConstant(0));
		impl.add(eq_op);
	}
	else
	{
		// TODO: For now, assume that every output is valid. This will need a lot of spec constant magic later.
		impl.rewrite_value(inst, builder.makeBoolConstant(true));
	}

	return true;
}

bool emit_get_remaining_recursion_levels(Converter::Impl &impl, const llvm::CallInst *inst)
{
	auto &builder = impl.builder();
	spv::Id id = emit_load_node_input_push_parameter(impl, NodeRemainingRecursionLevels, builder.makeUintType(32));
	impl.rewrite_value(inst, id);
	return true;
}

static spv::Id emit_real_builtin_id(Converter::Impl &impl, spv::Id &id, spv::BuiltIn builtin)
{
	if (id)
		return id;

	// Build our own true workgroup ID. This is hidden from application.
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id uvec3_type = builder.makeVectorType(u32_type, 3);
	spv::Id workgroup_id = impl.create_variable(spv::StorageClassInput, uvec3_type);
	builder.addDecoration(workgroup_id, spv::DecorationBuiltIn, builtin);
	id = workgroup_id;
	return id;
}

static spv::Id emit_real_workgroup_id(Converter::Impl &impl)
{
	return emit_real_builtin_id(impl, impl.node_input.real_workgroup_id, spv::BuiltInWorkgroupId);
}

static spv::Id emit_real_global_invocation_id(Converter::Impl &impl)
{
	return emit_real_builtin_id(impl, impl.node_input.real_global_invocation_id, spv::BuiltInGlobalInvocationId);
}

static spv::Id emit_linear_node_index(Converter::Impl &impl)
{
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id uvec3_type = builder.makeVectorType(u32_type, 3);
	spv::Id workgroup_id = emit_real_workgroup_id(impl);

	auto *load_wg_id = impl.allocate(spv::OpLoad, uvec3_type);
	load_wg_id->add_id(workgroup_id);
	impl.add(load_wg_id);

	auto *wg_x = impl.allocate(spv::OpCompositeExtract, u32_type);
	wg_x->add_id(load_wg_id->id);
	wg_x->add_literal(0);
	impl.add(wg_x);

	auto *wg_y = impl.allocate(spv::OpCompositeExtract, u32_type);
	wg_y->add_id(load_wg_id->id);
	wg_y->add_literal(1);
	impl.add(wg_y);

	auto *y_node_index = impl.allocate(spv::OpIMul, u32_type);
	y_node_index->add_id(wg_y->id);
	y_node_index->add_id(builder.makeUintConstant(32 * 1024));
	impl.add(y_node_index);

	auto *linear_wg_index = impl.allocate(spv::OpIAdd, u32_type);
	linear_wg_index->add_id(y_node_index->id);
	linear_wg_index->add_id(wg_x->id);
	impl.add(linear_wg_index);

	spv::Id linear_id;

	if (impl.node_input.launch_type == DXIL::NodeLaunchType::Thread)
	{
		spv::Id workgroup_size = builder.makeUintConstant(1, true);
		builder.addDecoration(workgroup_size, spv::DecorationSpecId, 0); // TODO: Make this configurable.
		builder.addName(workgroup_size, "ThreadGroupSize");

		auto *mul_wg_index = impl.allocate(spv::OpIMul, u32_type);
		mul_wg_index->add_id(linear_wg_index->id);
		mul_wg_index->add_id(workgroup_size);
		impl.add(mul_wg_index);

		spv::Id local_invocation_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInLocalInvocationIndex);
		auto *load_local = impl.allocate(spv::OpLoad, u32_type);
		load_local->add_id(local_invocation_id);
		impl.add(load_local);

		auto *add_op = impl.allocate(spv::OpIAdd, u32_type);
		add_op->add_id(mul_wg_index->id);
		add_op->add_id(load_local->id);
		impl.add(add_op);
		linear_id = add_op->id;
	}
	else if (impl.node_input.launch_type == DXIL::NodeLaunchType::Coalescing)
	{
		auto *mul_wg_index = impl.allocate(spv::OpIMul, u32_type);
		mul_wg_index->add_id(linear_wg_index->id);
		mul_wg_index->add_id(builder.makeUintConstant(impl.node_input.coalesce_stride));
		impl.add(mul_wg_index);
		linear_id = mul_wg_index->id;
	}
	else
	{
		linear_id = linear_wg_index->id;
	}

	spv::Id linear_ptr = emit_load_node_input_push_parameter(
	    impl, NodeLinearOffsetBDA, impl.node_input.u32_ptr_type_id);
	spv::Id linear_offset = emit_load_node_input_push_pointer(
	    impl, linear_ptr, u32_type, sizeof(uint32_t));

	auto *offset_op = impl.allocate(spv::OpIAdd, u32_type);
	offset_op->add_id(linear_id);
	offset_op->add_id(linear_offset);
	impl.add(offset_op);

	if (impl.node_input.launch_type == DXIL::NodeLaunchType::Broadcasting)
	{
		// Select between static payload and WorkGroupID.xy driven payload.
		auto *linear_id_select = impl.allocate(spv::OpSelect, u32_type);
		linear_id_select->add_id(impl.node_input.is_static_broadcast_node_id);
		linear_id_select->add_id(linear_offset);
		linear_id_select->add_id(offset_op->id);
		impl.add(linear_id_select);
		return linear_id_select->id;
	}
	else
		return offset_op->id;
}

static bool emit_payload_pointer_resolve(Converter::Impl &impl, spv::Id linear_node_index_id,
                                         DXIL::NodeLaunchType launch_type, bool is_entry_point)
{
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id u64_type = builder.makeUintType(64);

	if (launch_type != DXIL::NodeLaunchType::Coalescing && impl.node_input.private_bda_var_id)
	{
		auto *payload_base = impl.allocate(spv::OpLoad, u64_type);
		payload_base->add_id(impl.node_input.private_bda_var_id);
		impl.add(payload_base);

		spv::Id payload_offset_id;

		// Entry points are linear. Read stride (this can be sourced from GPU buffer, so has to be a pointer).
		if (is_entry_point)
		{
			auto *stride_load = impl.allocate(spv::OpLoad, u32_type);
			stride_load->add_id(impl.node_input.private_stride_var_id);
			impl.add(stride_load);

			auto *payload_offset = impl.allocate(spv::OpIMul, u32_type);
			payload_offset->add_id(linear_node_index_id);
			payload_offset->add_id(stride_load->id);
			impl.add(payload_offset);

			payload_offset_id = payload_offset->id;
		}
		else
		{
			spv::Id uvec2_type = builder.makeVectorType(u32_type, 2);
			spv::Id payload_stride_ptr_uvec2 = emit_load_node_input_push_parameter(
				impl, NodePayloadStrideOrOffsetsBDA, uvec2_type);

			auto *cast_op = impl.allocate(spv::OpBitcast, impl.node_input.u32_array_ptr_type_id);
			cast_op->add_id(payload_stride_ptr_uvec2);
			impl.add(cast_op);

			// Load offset to payload indirectly.
			auto *offset_chain = impl.allocate(
				spv::OpInBoundsAccessChain,
				builder.makePointer(spv::StorageClassPhysicalStorageBuffer, u32_type));

			offset_chain->add_id(cast_op->id);
			offset_chain->add_id(builder.makeUintConstant(0));
			offset_chain->add_id(linear_node_index_id);
			impl.add(offset_chain);

			auto *load_op = impl.allocate(spv::OpLoad, u32_type);
			load_op->add_id(offset_chain->id);
			load_op->add_literal(spv::MemoryAccessAlignedMask);
			load_op->add_literal(sizeof(uint32_t));
			impl.add(load_op);

			payload_offset_id = load_op->id;
		}

		auto *upconv = impl.allocate(spv::OpUConvert, u64_type);
		upconv->add_id(payload_offset_id);
		impl.add(upconv);

		auto *offset_payload = impl.allocate(spv::OpIAdd, u64_type);
		offset_payload->add_id(payload_base->id);
		offset_payload->add_id(upconv->id);
		impl.add(offset_payload);

		auto *store_op = impl.allocate(spv::OpStore);
		store_op->add_id(impl.node_input.private_bda_var_id);
		store_op->add_id(offset_payload->id);
		impl.add(store_op);
	}
	else if (launch_type == DXIL::NodeLaunchType::Coalescing)
	{
		// For Coalesce, we can load an array of payloads, have to defer the resolve.
		// Fortunately, we don't have to read the payload in dispatcher, so we're okay.
		spv::Id end_ptr = emit_load_node_input_push_parameter(impl, NodeEndNodesBDA, impl.node_input.u32_ptr_type_id);
		spv::Id end_id = emit_load_node_input_push_pointer(impl, end_ptr, u32_type, sizeof(uint32_t));

		auto *store_op = impl.allocate(spv::OpStore);
		store_op->add_id(impl.node_input.private_coalesce_offset_id);
		store_op->add_id(linear_node_index_id);
		impl.add(store_op);

		auto *count_id = impl.allocate(spv::OpISub, u32_type);
		count_id->add_id(end_id);
		count_id->add_id(linear_node_index_id);
		impl.add(count_id);

		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");
		auto *min_id = impl.allocate(spv::OpExtInst, u32_type);
		min_id->add_id(impl.glsl_std450_ext);
		min_id->add_literal(GLSLstd450UMin);
		min_id->add_id(count_id->id);
		min_id->add_id(builder.makeUintConstant(impl.node_input.coalesce_stride));
		impl.add(min_id);

		store_op = impl.allocate(spv::OpStore);
		store_op->add_id(impl.node_input.private_coalesce_count_id);
		store_op->add_id(min_id->id);
		impl.add(store_op);
	}

	return true;
}

static spv::Id emit_dispatch_bit_count(Converter::Impl &impl, spv::Id count_minus_1)
{
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	auto *bits = impl.allocate(spv::OpExtInst, u32_type);
	bits->add_id(impl.glsl_std450_ext);
	bits->add_literal(GLSLstd450FindUMsb);
	bits->add_id(count_minus_1);
	impl.add(bits);

	auto *plus1 = impl.allocate(spv::OpIAdd, u32_type);
	plus1->add_id(bits->id);
	plus1->add_id(builder.makeUintConstant(1));
	impl.add(plus1);

	return plus1->id;
}

static CFGNode *emit_workgraph_dispatcher_path_broadcast_static_payload(
	Converter::Impl &impl, CFGNode *path, spv::Id main_entry_id)
{
	impl.current_block = &path->ir.operations;
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id uvec3_type = builder.makeVectorType(u32_type, 3);

	spv::Id workgroup_var_id = emit_real_workgroup_id(impl);
	spv::Id global_invocation_id = emit_real_global_invocation_id(impl);

	auto *load_wg = impl.allocate(spv::OpLoad, uvec3_type);
	load_wg->add_id(workgroup_var_id);
	impl.add(load_wg);

	auto *load_gid = impl.allocate(spv::OpLoad, uvec3_type);
	load_gid->add_id(global_invocation_id);
	impl.add(load_gid);

	auto *store_op = impl.allocate(spv::OpStore);
	store_op->add_id(impl.spirv_module.get_builtin_shader_input(spv::BuiltInWorkgroupId));
	store_op->add_id(load_wg->id);
	impl.add(store_op);

	store_op = impl.allocate(spv::OpStore);
	store_op->add_id(impl.spirv_module.get_builtin_shader_input(spv::BuiltInGlobalInvocationId));
	store_op->add_id(load_gid->id);
	impl.add(store_op);

	auto *call_op = impl.allocate(spv::OpFunctionCall, impl.builder().makeVoidType());
	call_op->add_id(main_entry_id);
	impl.add(call_op);

	return path;
}

static CFGNode *emit_workgraph_dispatcher_path_broadcast_amplified(
    Converter::Impl &impl, CFGNodePool &pool, CFGNode *path,
    spv::Id main_entry_id, bool broadcast_has_max_grid)
{
	impl.current_block = &path->ir.operations;
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	spv::Id u64_type = builder.makeUintType(64);
	spv::Id uvec3_type = builder.makeVectorType(u32_type, 3);
	spv::Id real_workgroup_var_id = emit_real_workgroup_id(impl);

	// Load gl_WorkGroupID.
	auto *load_wg = impl.allocate(spv::OpLoad, uvec3_type);
	load_wg->add_id(real_workgroup_var_id);
	impl.add(load_wg);

	// gl_WorkGroupID.z.
	auto *dispatch_z = impl.allocate(spv::OpCompositeExtract, u32_type);
	dispatch_z->add_id(load_wg->id);
	dispatch_z->add_literal(2);
	impl.add(dispatch_z);

	spv::Id num_workgroups_var_id = impl.spirv_module.get_builtin_shader_input(spv::BuiltInNumWorkgroups);
	auto *num_workgroups = impl.allocate(spv::OpLoad, uvec3_type);
	num_workgroups->add_id(num_workgroups_var_id);
	impl.add(num_workgroups);

	auto *num_workgroups_z = impl.allocate(spv::OpCompositeExtract, u32_type);
	num_workgroups_z->add_id(num_workgroups->id);
	num_workgroups_z->add_literal(2);
	impl.add(num_workgroups_z);

	// The grid size is embedded in the payload itself.
	spv::Id target_dispatch_grid_minus_1[3] = {};
	spv::Id workgroup_bit_offset[3] = {};
	spv::Id workgroup_bit_count[3] = {};
	bool dimension_is_trivial[3] = {};
	spv::Id upper_dimension_size_id = 0;
	spv::Id target_amplification_rate_id = 0;
	spv::Id workgroup_size_id = 0;

	if (!impl.glsl_std450_ext)
		impl.glsl_std450_ext = builder.import("GLSL.std.450");

	if (broadcast_has_max_grid && impl.node_input.dispatch_grid.count)
	{
		spv::Id target_dispatch_grid[3];

		auto *load_bda = impl.allocate(spv::OpLoad, u64_type);
		load_bda->add_id(impl.node_input.private_bda_var_id);
		impl.add(load_bda);

		auto *offset_ptr = impl.allocate(spv::OpIAdd, u64_type);
		offset_ptr->add_id(load_bda->id);
		offset_ptr->add_id(builder.makeUint64Constant(impl.node_input.dispatch_grid.offset));
		impl.add(offset_ptr);

		spv::Id u32_grid_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1,
		                                            impl.node_input.dispatch_grid.count);
		spv::Id grid_type_id = impl.get_type_id(impl.node_input.dispatch_grid.component_type,
		                                        1, impl.node_input.dispatch_grid.count);

		auto *grid_cast = impl.allocate(
		    spv::OpBitcast, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, grid_type_id));
		grid_cast->add_id(offset_ptr->id);
		impl.add(grid_cast);

		auto *load_grid = impl.allocate(spv::OpLoad, grid_type_id);
		load_grid->add_id(grid_cast->id);
		load_grid->add_literal(spv::MemoryAccessAlignedMask);
		load_grid->add_literal(impl.node_input.dispatch_grid.component_type == DXIL::ComponentType::U32 ?
		                           sizeof(uint32_t) : sizeof(uint16_t));
		impl.add(load_grid);

		if (impl.node_input.dispatch_grid.component_type == DXIL::ComponentType::U16)
		{
			auto *conv_op = impl.allocate(spv::OpUConvert, u32_grid_type_id);
			conv_op->add_id(load_grid->id);
			impl.add(conv_op);
			load_grid = conv_op;
		}

		for (uint32_t i = 0; i < impl.node_input.dispatch_grid.count; i++)
		{
			if (impl.node_input.dispatch_grid.count == 1)
			{
				target_dispatch_grid[i] = load_grid->id;
			}
			else
			{
				auto *ext = impl.allocate(spv::OpCompositeExtract, u32_type);
				ext->add_id(load_grid->id);
				ext->add_literal(i);
				impl.add(ext);
				target_dispatch_grid[i] = ext->id;
			}

			auto *sub = impl.allocate(spv::OpISub, u32_type);
			sub->add_id(target_dispatch_grid[i]);
			sub->add_id(builder.makeUintConstant(1));
			impl.add(sub);

			target_dispatch_grid_minus_1[i] = sub->id;
			workgroup_bit_count[i] = emit_dispatch_bit_count(impl, target_dispatch_grid_minus_1[i]);
		}

		upper_dimension_size_id = target_dispatch_grid[impl.node_input.dispatch_grid.count - 1];

		for (uint32_t i = impl.node_input.dispatch_grid.count; i < 3; i++)
		{
			target_dispatch_grid_minus_1[i] = builder.makeUintConstant(0);
			workgroup_bit_count[i] = builder.makeUintConstant(0);
			dimension_is_trivial[i] = true;
		}
	}
	else
	{
		for (uint32_t i = 0; i < 3; i++)
			target_dispatch_grid_minus_1[i] = impl.node_input.max_broadcast_grid_minus_1_id[i];
	}

	if (broadcast_has_max_grid && impl.node_input.dispatch_grid.count)
	{
		workgroup_bit_offset[0] = builder.makeUintConstant(0);
		workgroup_bit_offset[1] = workgroup_bit_count[0];
		auto *count_add = impl.allocate(spv::OpIAdd, u32_type);
		count_add->add_id(workgroup_bit_count[0]);
		count_add->add_id(workgroup_bit_count[1]);
		impl.add(count_add);
		workgroup_bit_offset[2] = count_add->id;

		// We want to avoid expensive non-constant udivs if we can help it.
		auto *shift = impl.allocate(spv::OpShiftLeftLogical, u32_type);
		shift->add_id(upper_dimension_size_id);
		shift->add_id(workgroup_bit_offset[impl.node_input.dispatch_grid.count - 1]);
		impl.add(shift);
		target_amplification_rate_id = shift->id;

		workgroup_size_id = impl.build_vector(u32_type, target_dispatch_grid_minus_1, 3);

		Operation *is_any_zero;

		if (impl.node_input.dispatch_grid.count > 1)
		{
			auto *is_zero = impl.allocate(
				spv::OpSLessThan, builder.makeVectorType(builder.makeBoolType(), impl.node_input.dispatch_grid.count));

			if (impl.node_input.dispatch_grid.count == 3)
				is_zero->add_id(workgroup_size_id);
			else
				is_zero->add_id(impl.build_vector(u32_type, target_dispatch_grid_minus_1, impl.node_input.dispatch_grid.count));

			is_zero->add_id(impl.build_splat_constant_vector(u32_type, builder.makeUintConstant(0),
			                                                 impl.node_input.dispatch_grid.count));
			impl.add(is_zero);

			is_any_zero = impl.allocate(spv::OpAny, builder.makeBoolType());
			is_any_zero->add_id(is_zero->id);
			impl.add(is_any_zero);
		}
		else
		{
			is_any_zero = impl.allocate(spv::OpSLessThan, builder.makeBoolType());
			is_any_zero->add_id(target_dispatch_grid_minus_1[0]);
			is_any_zero->add_id(builder.makeUintConstant(0));
			impl.add(is_any_zero);
		}

		auto *select_rate = impl.allocate(spv::OpSelect, u32_type);
		select_rate->add_id(is_any_zero->id);
		select_rate->add_id(builder.makeUintConstant(0));
		select_rate->add_id(target_amplification_rate_id);
		impl.add(select_rate);

		target_amplification_rate_id = select_rate->id;
	}
	else
	{
		spv::Id amp_xy = builder.createSpecConstantOp(
			spv::OpIMul, u32_type,
			{ impl.node_input.max_broadcast_grid_id[0], impl.node_input.max_broadcast_grid_id[1] }, {});

		target_amplification_rate_id = builder.createSpecConstantOp(
			spv::OpIMul, u32_type,
			{ amp_xy, impl.node_input.max_broadcast_grid_id[2] }, {});
	}

	// Loop over target_amplification_rate_id.
	auto *loop_header = pool.create_node();
	auto *loop_body = pool.create_node();
	auto *loop_continue = pool.create_node();
	auto *loop_merge = pool.create_node();
	loop_header->name = "loop-header";
	loop_body->name = "loop-body";
	loop_continue->name = "loop-continue";
	loop_merge->name = "loop-merge";
	path->name = "path";

	path->ir.terminator.type = Terminator::Type::Branch;
	path->ir.terminator.direct_block = loop_header;
	path->add_branch(loop_header);

	// Bit-extract the real workgroup size.
	// Reroll the dispatch grid into a 3-dimensional vector.
	spv::Id amp_id = builder.getUniqueId();
	builder.addName(amp_id, "amplification_index");

	auto *dispatch_increment = impl.allocate(spv::OpIAdd, u32_type);
	dispatch_increment->add_id(amp_id);
	dispatch_increment->add_id(num_workgroups_z->id);
	impl.current_block = &loop_continue->ir.operations;
	impl.add(dispatch_increment);

	loop_continue->ir.terminator.type = Terminator::Type::Branch;
	loop_continue->ir.terminator.direct_block = loop_header;
	loop_continue->add_branch(loop_header);

	PHI phi;
	phi.id = amp_id;
	phi.type_id = u32_type;
	phi.incoming.push_back({ path, dispatch_z->id });
	phi.incoming.push_back({ loop_continue, dispatch_increment->id });

	auto *loop_cond = impl.allocate(spv::OpULessThan, builder.makeBoolType());
	loop_cond->add_id(phi.id);
	loop_cond->add_id(target_amplification_rate_id);
	impl.current_block = &loop_header->ir.operations;
	impl.add(loop_cond);

	loop_header->ir.terminator.type = Terminator::Type::Condition;
	loop_header->ir.terminator.true_block = loop_body;
	loop_header->ir.terminator.false_block = loop_merge;
	loop_header->ir.terminator.conditional_id = loop_cond->id;
	loop_header->add_branch(loop_body);
	loop_header->add_branch(loop_merge);
	loop_header->ir.phi.push_back(std::move(phi));

	impl.current_block = &loop_body->ir.operations;

	spv::Id workgroup_elems[3];

	if (broadcast_has_max_grid && impl.node_input.dispatch_grid.count)
	{
		for (uint32_t i = 0; i < 3; i++)
		{
			if (dimension_is_trivial[i])
			{
				workgroup_elems[i] = builder.makeUintConstant(0);
			}
			else
			{
				auto *ext = impl.allocate(spv::OpBitFieldUExtract, u32_type);
				ext->add_id(amp_id);
				ext->add_id(workgroup_bit_offset[i]);
				ext->add_id(workgroup_bit_count[i]);
				impl.add(ext);
				workgroup_elems[i] = ext->id;
			}
		}
	}
	else
	{
		auto *wg_x = impl.allocate(spv::OpUMod, u32_type);
		wg_x->add_id(amp_id);
		wg_x->add_id(impl.node_input.max_broadcast_grid_id[0]);
		impl.add(wg_x);
		workgroup_elems[0] = wg_x->id;

		auto *wg_yz = impl.allocate(spv::OpUDiv, u32_type);
		wg_yz->add_id(amp_id);
		wg_yz->add_id(impl.node_input.max_broadcast_grid_id[0]);
		impl.add(wg_yz);

		auto *wg_y = impl.allocate(spv::OpUMod, u32_type);
		wg_y->add_id(wg_yz->id);
		wg_y->add_id(impl.node_input.max_broadcast_grid_id[1]);
		impl.add(wg_y);

		auto *wg_z = impl.allocate(spv::OpUDiv, u32_type);
		wg_z->add_id(wg_yz->id);
		wg_z->add_id(impl.node_input.max_broadcast_grid_id[1]);
		impl.add(wg_z);

		workgroup_elems[1] = wg_y->id;
		workgroup_elems[2] = wg_z->id;
	}

	spv::Id workgroup_id = impl.build_vector(u32_type, workgroup_elems, 3);

	auto *store_op = impl.allocate(spv::OpStore);
	store_op->add_id(impl.spirv_module.get_builtin_shader_input(spv::BuiltInWorkgroupId));
	store_op->add_id(workgroup_id);
	impl.add(store_op);

	auto *load_local_id = impl.allocate(spv::OpLoad, uvec3_type);
	load_local_id->add_id(impl.spirv_module.get_builtin_shader_input(spv::BuiltInLocalInvocationId));
	impl.add(load_local_id);

	auto *mul_op = impl.allocate(spv::OpIMul, uvec3_type);
	mul_op->add_id(impl.node_input.thread_group_size_id);
	mul_op->add_id(workgroup_id);
	impl.add(mul_op);

	auto *add_op = impl.allocate(spv::OpIAdd, uvec3_type);
	add_op->add_id(mul_op->id);
	add_op->add_id(load_local_id->id);
	impl.add(add_op);

	store_op = impl.allocate(spv::OpStore);
	store_op->add_id(impl.spirv_module.get_builtin_shader_input(spv::BuiltInGlobalInvocationId));
	store_op->add_id(add_op->id);
	impl.add(store_op);

	if (broadcast_has_max_grid && impl.node_input.dispatch_grid.count)
	{
		auto *mask_op = impl.allocate(spv::OpULessThanEqual, builder.makeVectorType(builder.makeBoolType(), 3));
		mask_op->add_id(workgroup_id);
		mask_op->add_id(impl.build_vector(u32_type, target_dispatch_grid_minus_1, 3));
		impl.add(mask_op);

		auto *all_op = impl.allocate(spv::OpAll, builder.makeBoolType());
		all_op->add_id(mask_op->id);
		impl.add(all_op);

		auto *call_block = pool.create_node();
		call_block->name = "caller";

		loop_body->ir.terminator.type = Terminator::Type::Condition;
		loop_body->ir.terminator.conditional_id = all_op->id;
		loop_body->ir.terminator.true_block = call_block;
		loop_body->ir.terminator.false_block = loop_continue;
		loop_body->add_branch(call_block);
		loop_body->add_branch(loop_continue);
		call_block->add_branch(loop_continue);
		call_block->ir.terminator.type = Terminator::Type::Branch;
		call_block->ir.terminator.direct_block = loop_continue;
		impl.current_block = &call_block->ir.operations;
	}
	else
	{
		loop_body->ir.terminator.type = Terminator::Type::Branch;
		loop_body->ir.terminator.direct_block = loop_continue;
		loop_body->add_branch(loop_continue);
	}

	auto *call_op = impl.allocate(spv::OpFunctionCall, impl.builder().makeVoidType());
	call_op->add_id(main_entry_id);
	impl.add(call_op);

	auto *barrier_op = impl.allocate(spv::OpControlBarrier);
	barrier_op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));
	barrier_op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));
	barrier_op->add_id(builder.getWorkgroupBarrierSemanticsId());
	impl.add(barrier_op);

	return loop_merge;
}

static CFGNode *emit_workgraph_dispatcher_path_broadcast(
	Converter::Impl &impl, CFGNodePool &pool, CFGNode *path,
	spv::Id main_entry_id, spv::Id linear_node_index_id,
    bool broadcast_has_max_grid, bool is_entry_point)
{
	impl.current_block = &path->ir.operations;
	emit_payload_pointer_resolve(impl, linear_node_index_id, DXIL::NodeLaunchType::Broadcasting, is_entry_point);

	if (broadcast_has_max_grid)
	{
		return emit_workgraph_dispatcher_path_broadcast_amplified(
			impl, pool, path, main_entry_id, broadcast_has_max_grid);
	}
	else
	{
		auto *static_path = pool.create_node();
		auto *amplified_path = pool.create_node();
		auto *merge_block = pool.create_node();

		auto *static_end = emit_workgraph_dispatcher_path_broadcast_static_payload(
			impl, static_path, main_entry_id);

		auto *amplified_end = emit_workgraph_dispatcher_path_broadcast_amplified(
			impl, pool, amplified_path, main_entry_id, broadcast_has_max_grid);

		path->ir.terminator.type = Terminator::Type::Condition;
		path->ir.terminator.conditional_id = impl.node_input.is_static_broadcast_node_id;
		path->ir.terminator.true_block = static_path;
		path->ir.terminator.false_block = amplified_path;
		path->add_branch(static_path);
		path->add_branch(amplified_path);

		static_end->ir.terminator.type = Terminator::Type::Branch;
		static_end->ir.terminator.direct_block = merge_block;
		static_end->add_branch(merge_block);

		amplified_end->ir.terminator.type = Terminator::Type::Branch;
		amplified_end->ir.terminator.direct_block = merge_block;
		amplified_end->add_branch(merge_block);

		return merge_block;
	}
}

static CFGNode *emit_workgraph_dispatcher_path_thread(
    Converter::Impl &impl, CFGNodePool &pool, CFGNode *path,
    spv::Id main_entry_id, spv::Id linear_node_index_id, bool is_entry_point)
{
	auto &builder = impl.builder();
	spv::Id u32_type = builder.makeUintType(32);
	impl.current_block = &path->ir.operations;

	// Execution mask is just based on linear index < total threads.
	// Nice and easy.
	spv::Id end_ptr = emit_load_node_input_push_parameter(impl, NodeEndNodesBDA, impl.node_input.u32_ptr_type_id);
	spv::Id end_id = emit_load_node_input_push_pointer(impl, end_ptr, u32_type, sizeof(uint32_t));
	auto *compare_op = impl.allocate(spv::OpULessThan, builder.makeBoolType());
	compare_op->add_id(linear_node_index_id);
	compare_op->add_id(end_id);
	impl.add(compare_op);

	auto *call_block = pool.create_node();
	auto *merge_block = pool.create_node();

	path->ir.terminator.type = Terminator::Type::Condition;
	path->ir.terminator.conditional_id = compare_op->id;
	path->ir.terminator.true_block = call_block;
	path->ir.terminator.false_block = merge_block;
	path->add_branch(call_block);
	path->add_branch(merge_block);
	call_block->add_branch(merge_block);
	call_block->ir.terminator.type = Terminator::Type::Branch;
	call_block->ir.terminator.direct_block = merge_block;
	impl.current_block = &call_block->ir.operations;

	emit_payload_pointer_resolve(impl, linear_node_index_id, DXIL::NodeLaunchType::Thread, is_entry_point);
	auto *call_op = impl.allocate(spv::OpFunctionCall, impl.builder().makeVoidType());
	call_op->add_id(main_entry_id);
	impl.add(call_op);

	return merge_block;
}

static CFGNode *emit_workgraph_dispatcher_path_coalescing(
    Converter::Impl &impl, CFGNodePool &, CFGNode *path,
    spv::Id main_entry_id, spv::Id linear_node_index_id)
{
	impl.current_block = &path->ir.operations;
	emit_payload_pointer_resolve(impl, linear_node_index_id, DXIL::NodeLaunchType::Coalescing, true);
	auto *call_op = impl.allocate(spv::OpFunctionCall, impl.builder().makeVoidType());
	call_op->add_id(main_entry_id);
	impl.add(call_op);
	return path;
}

static CFGNode *emit_workgraph_dispatcher_path(
	Converter::Impl &impl, CFGNode *return_block,
	CFGNodePool &pool,
	DXIL::NodeLaunchType launch_type,
	bool broadcast_has_max_grid,
	spv::Id main_entry_id,
	spv::Id linear_node_index_id,
	bool is_entry_point)
{
	auto *path = pool.create_node();
	CFGNode *end_path;

	if (launch_type == DXIL::NodeLaunchType::Broadcasting)
	{
		end_path = emit_workgraph_dispatcher_path_broadcast(
			impl, pool, path, main_entry_id, linear_node_index_id,
			broadcast_has_max_grid, is_entry_point);
	}
	else if (launch_type == DXIL::NodeLaunchType::Thread)
	{
		end_path = emit_workgraph_dispatcher_path_thread(
			impl, pool, path, main_entry_id, linear_node_index_id,
			is_entry_point);
	}
	else
	{
		end_path = emit_workgraph_dispatcher_path_coalescing(
			impl, pool, path, main_entry_id, linear_node_index_id);
	}

	impl.current_block = &end_path->ir.operations;
	end_path->ir.terminator.type = Terminator::Type::Branch;
	end_path->ir.terminator.direct_block = return_block;
	end_path->add_branch(return_block);

	return path;
}

bool emit_workgraph_dispatcher(Converter::Impl &impl, CFGNodePool &pool, CFGNode *entry, spv::Id main_entry_id)
{
	auto &builder = impl.builder();

	spv::Id linear_node_index_id = emit_linear_node_index(impl);
	if (!linear_node_index_id)
		return false;

	if (impl.node_input.private_bda_var_id)
	{
		spv::Id u64_type = builder.makeUintType(64);
		spv::Id payload_base = emit_load_node_input_push_parameter(impl, NodePayloadBDA, u64_type);
		auto *store_op = impl.allocate(spv::OpStore);
		store_op->add_id(impl.node_input.private_bda_var_id);
		store_op->add_id(payload_base);
		impl.add(store_op);

		auto *is_entry_block = pool.create_node();
		auto *after_is_entry_block = pool.create_node();
		entry->ir.terminator.type = Terminator::Type::Condition;
		entry->ir.terminator.conditional_id = impl.node_input.is_entry_point_id;
		entry->ir.terminator.true_block = is_entry_block;
		entry->ir.terminator.false_block = after_is_entry_block;
		entry->add_branch(is_entry_block);
		entry->add_branch(after_is_entry_block);

		impl.current_block = &is_entry_block->ir.operations;
		{
			spv::Id u32_type = builder.makeUintType(32);
			spv::Id uvec2_type = builder.makeVectorType(u32_type, 2);
			spv::Id stride_base = emit_load_node_input_push_parameter(impl, NodePayloadStrideOrOffsetsBDA, uvec2_type);

			auto *extracted = impl.allocate(spv::OpCompositeExtract, u32_type);
			extracted->add_id(stride_base);
			extracted->add_literal(0);
			impl.add(extracted);

			store_op = impl.allocate(spv::OpStore);
			store_op->add_id(impl.node_input.private_stride_var_id);
			store_op->add_id(extracted->id);
			impl.add(store_op);

			auto *masked_block = pool.create_node();
			impl.current_block = &masked_block->ir.operations;

			// Resolve Payload pointer
			{
				auto *cast_op = impl.allocate(spv::OpConvertUToPtr, impl.node_input.u64_ptr_type_id);
				cast_op->add_id(payload_base);
				impl.add(cast_op);

				auto *chain_op = impl.allocate(spv::OpAccessChain,
				                               builder.makePointer(spv::StorageClassPhysicalStorageBuffer,
				                                                   u64_type));
				chain_op->add_id(cast_op->id);
				chain_op->add_id(builder.makeUintConstant(0));
				impl.add(chain_op);

				auto *load_op = impl.allocate(spv::OpLoad, u64_type);
				load_op->add_id(chain_op->id);
				load_op->add_literal(spv::MemoryAccessAlignedMask);
				load_op->add_literal(sizeof(uint64_t));
				impl.add(load_op);

				store_op = impl.allocate(spv::OpStore);
				store_op->add_id(impl.node_input.private_bda_var_id);
				store_op->add_id(load_op->id);
				impl.add(store_op);
			}

			// Resolve stride pointer
			{
				auto *cast_op = impl.allocate(spv::OpBitcast, impl.node_input.u32_ptr_type_id);
				cast_op->add_id(stride_base);
				impl.add(cast_op);

				auto *chain_op = impl.allocate(spv::OpAccessChain,
				                               builder.makePointer(spv::StorageClassPhysicalStorageBuffer,
				                                                   u32_type));
				chain_op->add_id(cast_op->id);
				chain_op->add_id(builder.makeUintConstant(0));
				impl.add(chain_op);

				auto *load_op = impl.allocate(spv::OpLoad, u32_type);
				load_op->add_id(chain_op->id);
				load_op->add_literal(spv::MemoryAccessAlignedMask);
				load_op->add_literal(sizeof(uint32_t));
				impl.add(load_op);

				store_op = impl.allocate(spv::OpStore);
				store_op->add_id(impl.node_input.private_stride_var_id);
				store_op->add_id(load_op->id);
				impl.add(store_op);
			}

			auto *merge = pool.create_node();

			is_entry_block->ir.terminator.type = Terminator::Type::Condition;
			is_entry_block->ir.terminator.conditional_id = impl.node_input.is_indirect_payload_stride_id;
			is_entry_block->ir.terminator.true_block = masked_block;
			is_entry_block->ir.terminator.false_block = merge;
			is_entry_block->add_branch(masked_block);
			is_entry_block->add_branch(merge);
			masked_block->add_branch(merge);
			masked_block->ir.terminator.type = Terminator::Type::Branch;
			masked_block->ir.terminator.direct_block = merge;

			merge->ir.terminator.type = Terminator::Type::Branch;
			merge->ir.terminator.direct_block = after_is_entry_block;
			merge->add_branch(after_is_entry_block);
		}

		entry = after_is_entry_block;
		impl.current_block = &entry->ir.operations;
	}

	auto *return_block = pool.create_node();
	return_block->ir.terminator.type = Terminator::Type::Return;

	const auto make_cond_branch = [](CFGNode *header, CFGNode *true_block, CFGNode *false_block, spv::Id cond_id) {
		header->ir.terminator.type = Terminator::Type::Condition;
		header->ir.terminator.conditional_id = cond_id;
		header->ir.terminator.true_block = true_block;
		header->ir.terminator.false_block = false_block;
		header->add_branch(true_block);
		header->add_branch(false_block);
	};

	switch (impl.node_input.launch_type)
	{
	case DXIL::NodeLaunchType::Broadcasting:
	{
		auto *masked_broadcast_path_entry = emit_workgraph_dispatcher_path(
		    impl, return_block, pool, DXIL::NodeLaunchType::Broadcasting,
		    true, main_entry_id, linear_node_index_id, true);
		auto *broadcast_path_entry = emit_workgraph_dispatcher_path(
		    impl, return_block, pool, DXIL::NodeLaunchType::Broadcasting,
		    false, main_entry_id, linear_node_index_id, true);

		auto *masked_broadcast_path_node = emit_workgraph_dispatcher_path(
		    impl, return_block, pool, DXIL::NodeLaunchType::Broadcasting,
		    true, main_entry_id, linear_node_index_id, false);
		auto *broadcast_path_node = emit_workgraph_dispatcher_path(
		    impl, return_block, pool, DXIL::NodeLaunchType::Broadcasting,
		    false, main_entry_id, linear_node_index_id, false);

		auto *is_masked = pool.create_node();
		auto *is_unmasked = pool.create_node();

		make_cond_branch(entry, is_masked, is_unmasked, impl.node_input.broadcast_has_max_grid_id);
		make_cond_branch(is_masked, masked_broadcast_path_entry, masked_broadcast_path_node, impl.node_input.is_entry_point_id);
		make_cond_branch(is_unmasked, broadcast_path_entry, broadcast_path_node, impl.node_input.is_entry_point_id);

		break;
	}

	case DXIL::NodeLaunchType::Thread:
	{
		auto *thread_path_entry = emit_workgraph_dispatcher_path(
		    impl, return_block, pool, DXIL::NodeLaunchType::Thread,
		    false, main_entry_id, linear_node_index_id, true);

		auto *thread_path_node = emit_workgraph_dispatcher_path(
		    impl, return_block, pool, DXIL::NodeLaunchType::Thread,
		    false, main_entry_id, linear_node_index_id, false);

		make_cond_branch(entry, thread_path_entry, thread_path_node, impl.node_input.is_entry_point_id);
		break;
	}

	case DXIL::NodeLaunchType::Coalescing:
	{
		auto *coalesce_path = emit_workgraph_dispatcher_path(
			impl, return_block, pool, DXIL::NodeLaunchType::Coalescing,
			false, main_entry_id, linear_node_index_id, true);
		entry->ir.terminator.type = Terminator::Type::Branch;
		entry->ir.terminator.direct_block = coalesce_path;
		entry->add_branch(coalesce_path);
		break;
	}

	default:
		break;
	}

	return true;
}
}


================================================
FILE: opcodes/dxil/dxil_workgraph.hpp
================================================
/* Copyright (c) 2024 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once
#include "opcodes/opcodes.hpp"

namespace dxil_spv
{
bool emit_allocate_node_output_records(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_get_node_record_ptr(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_increment_output_count(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_output_complete(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_get_input_record_count(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_finished_cross_group_sharing(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_barrier_by_memory_type(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_barrier_by_memory_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_barrier_by_node_record_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_index_node_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_annotate_node_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_create_node_input_record_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_create_node_output_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_annotate_node_record_handle(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_node_output_is_valid(Converter::Impl &impl, const llvm::CallInst *inst);
bool emit_get_remaining_recursion_levels(Converter::Impl &impl, const llvm::CallInst *inst);

uint32_t get_node_io_from_annotate_handle(const llvm::CallInst *inst);

enum NodeInputParameter
{
	NodePayloadBDA = 0,
	NodeLinearOffsetBDA = 1,
	NodeEndNodesBDA = 2,
	NodePayloadStrideOrOffsetsBDA = 3,
	NodePayloadOutputBDA = 4,
	NodePayloadOutputAtomicBDA = 5,
	NodeLocalRootSignatureBDA = 6,
	NodePayloadOutputOffset = 7,
	NodeRemainingRecursionLevels = 8
};

enum NodeSpecConstant
{
	NodeSpecIdGroupSizeX = 0,
	NodeSpecIdGroupSizeY = 1,
	NodeSpecIdGroupSizeZ = 2,
	NodeSpecIdIsEntryPoint = 3,
	NodeSpecIdIndirectPayloadStride = 4,
	NodeSpecIdDispatchGridIsUpperBound = 5,
	NodeSpecIdIsStaticBroadcastNode = 6,
	NodeSpecIdMaxBroadcastGridX = 7,
	NodeSpecIdMaxBroadcastGridY = 8,
	NodeSpecIdMaxBroadcastGridZ = 9,
	NodeSpecIdOutputBase = 100,
};

spv::Id emit_load_node_input_push_parameter(
	Converter::Impl &impl, NodeInputParameter param, spv::Id type);

bool emit_workgraph_dispatcher(Converter::Impl &impl, CFGNodePool &pool, CFGNode *entry, spv::Id main_entry_id);
}


================================================
FILE: opcodes/opcodes.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "dxil_converter.hpp"
#include "ir.hpp"

#ifdef HAVE_LLVMBC
#include "cast.hpp"
#include "instruction.hpp"
#include "module.hpp"
#include "value.hpp"
#else
#include <llvm/IR/Instructions.h>
#endif

#include <vector>

namespace dxil_spv
{
enum class RawType { Integer, Float, Count };
enum class RawWidth { B8 = 0, B16, B32, B64, Count };
enum class RawVecSize { V1 = 0, V2, V3, V4, Count };

enum class MagicResource : uint8_t
{
	None,
	AGS
};

struct AgsInstruction
{
	enum { MaxInstructions = 21 };
	uint32_t opcode;
	uint32_t phase;
	uint32_t immediate;
};
}


================================================
FILE: opcodes/opcodes_dxil_builtins.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "opcodes_dxil_builtins.hpp"
#include "converter_impl.hpp"
#include "logging.hpp"
#include "opcodes/dxil/dxil_arithmetic.hpp"
#include "opcodes/dxil/dxil_buffer.hpp"
#include "opcodes/dxil/dxil_common.hpp"
#include "opcodes/dxil/dxil_compute.hpp"
#include "opcodes/dxil/dxil_geometry.hpp"
#include "opcodes/dxil/dxil_pixel_ops.hpp"
#include "opcodes/dxil/dxil_resources.hpp"
#include "opcodes/dxil/dxil_sampling.hpp"
#include "opcodes/dxil/dxil_tessellation.hpp"
#include "opcodes/dxil/dxil_waveops.hpp"
#include "opcodes/dxil/dxil_ray_tracing.hpp"
#include "opcodes/dxil/dxil_mesh.hpp"
#include "opcodes/dxil/dxil_ags.hpp"
#include "opcodes/dxil/dxil_nvapi.hpp"
#include "opcodes/dxil/dxil_workgraph.hpp"

namespace dxil_spv
{
struct DXILDispatcher
{
#define OP(x) builder_lut[unsigned(DXIL::Op::x)]
	DXILDispatcher() noexcept
	{
		// Work around lack of designated initializers in C++.

		// dxil_resources.hpp
		OP(LoadInput) = emit_load_input_dispatch<false>;
		OP(ExtendedSpirvLoadInput) = emit_load_input_dispatch<true>;
		// Basically exactly the same, where gsVertexAxis is replaced with vertexIndex.
		OP(AttributeAtVertex) = emit_load_input_dispatch<false>;
		OP(StoreOutput) = emit_store_output_instruction;
		OP(CreateHandle) = emit_create_handle_instruction;
		OP(CreateHandleForLib) = emit_create_handle_for_lib_instruction;
		OP(CBufferLoadLegacy) = emit_cbuffer_load_legacy_instruction;
		OP(CBufferLoad) = emit_cbuffer_load_instruction;
		OP(EvalSnapped) = emit_interpolate_dispatch<GLSLstd450InterpolateAtOffset, false>;
		OP(ExtendedEvalSnapped) = emit_interpolate_dispatch<GLSLstd450InterpolateAtOffset, true>;
		OP(EvalSampleIndex) = emit_interpolate_dispatch<GLSLstd450InterpolateAtSample, false>;
		OP(EvalCentroid) = emit_interpolate_dispatch<GLSLstd450InterpolateAtCentroid, false>;
		OP(AnnotateHandle) = emit_annotate_handle_instruction;
		OP(CreateHandleFromHeap) = emit_create_handle_from_heap_instruction;
		OP(CreateHandleFromBinding) = emit_create_handle_from_binding_instruction;
		OP(StartVertexLocation) = emit_load_draw_parameter_dispatch<spv::BuiltInBaseVertex>;
		OP(StartInstanceLocation) = emit_load_draw_parameter_dispatch<spv::BuiltInBaseInstance>;
		OP(ExtendedSpirvControlPointCountIn) = emit_load_control_point_count_in;

		// dxil_sampling.hpp
		OP(Sample) = emit_sample_instruction_dispatch<DXIL::Op::Sample>;
		OP(SampleBias) = emit_sample_instruction_dispatch<DXIL::Op::SampleBias>;
		OP(SampleLevel) = emit_sample_instruction_dispatch<DXIL::Op::SampleLevel>;
		OP(SampleCmp) = emit_sample_instruction_dispatch<DXIL::Op::SampleCmp>;
		OP(SampleCmpLevelZero) = emit_sample_instruction_dispatch<DXIL::Op::SampleCmpLevelZero>;
		OP(SampleCmpLevel) = emit_sample_instruction_dispatch<DXIL::Op::SampleCmpLevel>;
		OP(SampleCmpBias) = emit_sample_instruction_dispatch<DXIL::Op::SampleCmpBias>;
		OP(SampleGrad) = emit_sample_grad_instruction_dispatch<DXIL::Op::SampleGrad>;
		OP(SampleCmpGrad) = emit_sample_grad_instruction_dispatch<DXIL::Op::SampleCmpGrad>;
		OP(TextureLoad) = emit_texture_load_instruction;
		OP(TextureStore) = emit_texture_store_instruction<false>;
		OP(TextureStoreSample) = emit_texture_store_instruction<true>;
		OP(GetDimensions) = emit_get_dimensions_dispatch<false>;
		OP(ExtendedGetDimensions) = emit_get_dimensions_dispatch<true>;
		OP(TextureGather) = emit_texture_gather_dispatch<false, false>;
		OP(TextureGatherRaw) = emit_texture_gather_dispatch<false, true>;
		OP(TextureGatherCmp) = emit_texture_gather_dispatch<true, false>;
		OP(CalculateLOD) = emit_calculate_lod_dispatch<false>;
		OP(ExtendedCalculateLOD) = emit_calculate_lod_dispatch<true>;
		OP(Texture2DMSGetSamplePosition) = emit_get_sample_position_dispatch<true>;
		OP(RenderTargetGetSamplePosition) = emit_get_sample_position_dispatch<false>;
		OP(RenderTargetGetSampleCount) = emit_get_render_target_sample_count;
		OP(CheckAccessFullyMapped) = emit_check_access_fully_mapped_instruction;
		OP(WriteSamplerFeedback) = emit_write_sampler_feedback_instruction<DXIL::Op::WriteSamplerFeedback>;
		OP(WriteSamplerFeedbackLevel) = emit_write_sampler_feedback_instruction<DXIL::Op::WriteSamplerFeedbackLevel>;
		OP(WriteSamplerFeedbackGrad) = emit_write_sampler_feedback_instruction<DXIL::Op::WriteSamplerFeedbackGrad>;
		OP(WriteSamplerFeedbackBias) = emit_write_sampler_feedback_instruction<DXIL::Op::WriteSamplerFeedbackBias>;

		// dxil_buffer.hpp
		OP(BufferLoad) = emit_buffer_load_instruction;
		OP(RawBufferLoad) = emit_raw_buffer_load_instruction;
		OP(BufferStore) = emit_buffer_store_instruction;
		OP(RawBufferStore) = emit_raw_buffer_store_instruction;
		OP(BufferUpdateCounter) = emit_buffer_update_counter_instruction;
		OP(AtomicBinOp) = emit_atomic_binop_instruction;
		OP(AtomicCompareExchange) = emit_atomic_cmpxchg_instruction;

		// dxil_arithmetic.hpp
		OP(Saturate) = emit_saturate_instruction;

		OP(FMin) = std450_binary_dispatch<GLSLstd450NMin>;
		OP(FMax) = std450_binary_dispatch<GLSLstd450NMax>;
		OP(IMin) = std450_binary_dispatch<GLSLstd450SMin>;
		OP(IMax) = std450_binary_dispatch<GLSLstd450SMax>;
		OP(UMin) = std450_binary_dispatch<GLSLstd450UMin>;
		OP(UMax) = std450_binary_dispatch<GLSLstd450UMax>;
		OP(IMul) = wide_arith_dispatch<spv::OpSMulExtended, false>;
		OP(UMul) = wide_arith_dispatch<spv::OpUMulExtended, false>;
		OP(UAddc) = wide_arith_dispatch<spv::OpIAddCarry, false>;
		OP(USubb) = wide_arith_dispatch<spv::OpISubBorrow, false>;
		OP(ExtendedSpirvSMulExtended) = wide_arith_dispatch<spv::OpSMulExtended, true>;
		OP(ExtendedSpirvUMulExtended) = wide_arith_dispatch<spv::OpUMulExtended, true>;
		OP(ExtendedSpirvIAddCarry) = wide_arith_dispatch<spv::OpIAddCarry, true>;
		OP(ExtendedSpirvISubBorrow) = wide_arith_dispatch<spv::OpISubBorrow, true>;
		OP(UDiv) = emit_dxbc_udiv_instruction;
		OP(IsNan) = unary_dispatch<spv::OpIsNan>;
		OP(IsInf) = unary_dispatch<spv::OpIsInf>;
		OP(IsFinite) = emit_isfinite_instruction;
		OP(ExtendedFClamp) = std450_trinary_dispatch<GLSLstd450NClamp>;
		OP(ExtendedIClamp) = std450_trinary_dispatch<GLSLstd450SClamp>;
		OP(ExtendedUClamp) = std450_trinary_dispatch<GLSLstd450UClamp>;
		OP(ExtendedPow) = std450_binary_dispatch<GLSLstd450Pow>;

		OP(Cos) = std450_unary_dispatch<GLSLstd450Cos>;
		OP(Sin) = std450_unary_dispatch<GLSLstd450Sin>;
		OP(Tan) = std450_unary_dispatch<GLSLstd450Tan>;
		OP(Acos) = std450_unary_dispatch<GLSLstd450Acos>;
		OP(Asin) = std450_unary_dispatch<GLSLstd450Asin>;
		OP(Atan) = std450_unary_dispatch<GLSLstd450Atan>;
		OP(Hcos) = std450_unary_dispatch<GLSLstd450Cosh>;
		OP(Hsin) = std450_unary_dispatch<GLSLstd450Sinh>;
		OP(Htan) = std450_unary_dispatch<GLSLstd450Tanh>;
		OP(Exp) = std450_unary_dispatch<GLSLstd450Exp2>;
		OP(Log) = std450_unary_dispatch<GLSLstd450Log2>;

		OP(Rsqrt) = std450_unary_dispatch<GLSLstd450InverseSqrt>;
		OP(Sqrt) = std450_unary_dispatch<GLSLstd450Sqrt>;
		OP(FAbs) = std450_unary_dispatch<GLSLstd450FAbs>;
		OP(Frc) = std450_unary_dispatch<GLSLstd450Fract>;
		OP(ExtendedIAbs) = std450_unary_dispatch<GLSLstd450SAbs>;

		OP(Round_ne) = std450_unary_dispatch<GLSLstd450RoundEven>;
		OP(Round_ni) = std450_unary_dispatch<GLSLstd450Floor>;
		OP(Round_pi) = std450_unary_dispatch<GLSLstd450Ceil>;
		OP(Round_z) = std450_unary_dispatch<GLSLstd450Trunc>;

		OP(Bfrev) = emit_bit_reverse_instruction;
		OP(Countbits) = emit_bit_count_instruction;
		OP(FirstbitLo) = emit_find_low_bit_instruction;
		OP(FirstbitSHi) = emit_find_high_bit_dispatch<GLSLstd450FindSMsb>;
		OP(FirstbitHi) = emit_find_high_bit_dispatch<GLSLstd450FindUMsb>;

		OP(Dot2) = emit_dot_dispatch<2>;
		OP(Dot3) = emit_dot_dispatch<3>;
		OP(Dot4) = emit_dot_dispatch<4>;

		OP(Fma) = std450_trinary_dispatch<GLSLstd450Fma>;
		OP(FMad) = emit_fmad_instruction;
		OP(IMad) = emit_imad_instruction;
		OP(UMad) = emit_imad_instruction;

		OP(Dot4AddI8Packed) = emit_i8_dot_instruction<true>;
		OP(Dot4AddU8Packed) = emit_i8_dot_instruction<false>;
		OP(Dot2AddHalf) = emit_dot2_add_half_instruction;

		OP(Ibfe) = emit_bfe_dispatch<spv::OpBitFieldSExtract, false>;
		OP(Ubfe) = emit_bfe_dispatch<spv::OpBitFieldUExtract, false>;
		OP(Bfi) = emit_bfi_dispatch<false>;
		OP(ExtendedSpirvIbfe) = emit_bfe_dispatch<spv::OpBitFieldSExtract, true>;
		OP(ExtendedSpirvUbfe) = emit_bfe_dispatch<spv::OpBitFieldUExtract, true>;
		OP(ExtendedSpirvBfi) = emit_bfi_dispatch<true>;
		OP(ExtendedSpirvFindLSB) = std450_unary_dispatch<GLSLstd450FindILsb>;
		OP(ExtendedSpirvIFindMSB) = std450_unary_dispatch<GLSLstd450FindSMsb>;
		OP(ExtendedSpirvUFindMSB) = std450_unary_dispatch<GLSLstd450FindUMsb>;

		OP(MakeDouble) = emit_make_double_instruction;
		OP(SplitDouble) = emit_split_double_instruction;
		OP(LegacyF16ToF32) = emit_legacy_f16_to_f32_instruction;
		OP(LegacyF32ToF16) = emit_legacy_f32_to_f16_instruction;
		OP(ExtendedLegacyF16ToF32) = std450_unary_dispatch<GLSLstd450UnpackHalf2x16>;
		OP(ExtendedLegacyF32ToF16) = std450_unary_dispatch<GLSLstd450PackHalf2x16>;
		OP(LegacyDoubleToFloat) = emit_legacy_double_conv_dispatch<spv::OpFConvert>;
		OP(LegacyDoubleToSInt32) = emit_legacy_double_conv_dispatch<spv::OpConvertFToS>;
		OP(LegacyDoubleToUInt32) = emit_legacy_double_conv_dispatch<spv::OpConvertFToU>;
		OP(BitcastF16toI16) = emit_bitcast_instruction;
		OP(BitcastI16toF16) = emit_bitcast_instruction;
		OP(BitcastF32toI32) = emit_bitcast_instruction;
		OP(BitcastI32toF32) = emit_bitcast_instruction;
		OP(BitcastF64toI64) = emit_bitcast_instruction;
		OP(BitcastI64toF64) = emit_bitcast_instruction;
		OP(Unpack4x8) = emit_unpack4x8_instruction;
		OP(Pack4x8) = emit_pack4x8_instruction;

		OP(Msad) = emit_msad_instruction;

		// dxil_compute.hpp
		OP(Barrier) = emit_barrier_instruction;
		OP(ThreadId) = emit_thread_id_load_dispatch<spv::BuiltInGlobalInvocationId>;
		OP(GroupId) = emit_thread_id_load_dispatch<spv::BuiltInWorkgroupId>;
		OP(ThreadIdInGroup) = emit_thread_id_load_dispatch<spv::BuiltInLocalInvocationId>;
		OP(FlattenedThreadIdInGroup) = emit_thread_id_load_dispatch<spv::BuiltInLocalInvocationIndex>;

		// dxil_pixel_ops.hpp
		OP(Discard) = emit_discard_instruction;
		OP(DerivCoarseX) = emit_derivative_dispatch<spv::OpDPdxCoarse>;
		OP(DerivCoarseY) = emit_derivative_dispatch<spv::OpDPdyCoarse>;
		OP(DerivFineX) = emit_derivative_dispatch<spv::OpDPdxFine>;
		OP(DerivFineY) = emit_derivative_dispatch<spv::OpDPdyFine>;
		OP(SampleIndex) = emit_sample_index_instruction;
		OP(Coverage) = emit_coverage_instruction;
		OP(InnerCoverage) = emit_inner_coverage_instruction;
		OP(IsHelperLane) = emit_is_helper_lane_instruction;
		OP(ExtendedDeriv) = emit_extended_derivative_instruction;

		// dxil_geometry.hpp
		OP(EmitStream) = emit_stream_instruction;
		OP(CutStream) = emit_cut_stream_instruction;
		OP(EmitThenCutStream) = emit_then_cut_stream_instruction;
		OP(GSInstanceID) = emit_gs_instance_instruction;
		OP(PrimitiveID) = emit_primitive_id_instruction;
		OP(ViewID) = emit_view_id_instruction;

		// dxil_tessellation.hpp
		OP(StorePatchConstant) = emit_store_patch_constant_instruction;
		OP(LoadOutputControlPoint) = emit_load_output_generic_instruction;
		OP(DomainLocation) = emit_domain_location_instruction;
		OP(LoadPatchConstant) = emit_load_patch_constant_instruction;
		OP(OutputControlPointID) = emit_output_control_point_instruction;

		// dxil_waveops.hpp
		OP(WaveIsFirstLane) = emit_wave_is_first_lane_instruction;
		OP(WaveGetLaneCount) = emit_wave_builtin_dispatch<spv::BuiltInSubgroupSize>;
		OP(WaveGetLaneIndex) = emit_wave_builtin_dispatch<spv::BuiltInSubgroupLocalInvocationId>;
		OP(WaveAllTrue) = emit_wave_boolean_dispatch<spv::OpGroupNonUniformAll>;
		OP(WaveAnyTrue) = emit_wave_boolean_dispatch<spv::OpGroupNonUniformAny>;
		OP(WaveActiveAllEqual) = emit_wave_boolean_dispatch<spv::OpGroupNonUniformAllEqual>;
		OP(WaveActiveBallot) = emit_wave_ballot_instruction;
		OP(WaveReadLaneFirst) = emit_wave_read_lane_first_instruction;
		OP(WaveReadLaneAt) = emit_wave_read_lane_at_instruction;
		OP(WaveAllBitCount) = emit_wave_bit_count_dispatch<spv::GroupOperationReduce>;
		OP(WavePrefixBitCount) = emit_wave_bit_count_dispatch<spv::GroupOperationExclusiveScan>;
		OP(WaveActiveOp) = emit_wave_active_op_instruction;
		OP(WaveActiveBit) = emit_wave_active_bit_instruction;
		OP(WavePrefixOp) = emit_wave_prefix_op_instruction;
		OP(QuadOp) = emit_wave_quad_op_instruction;
		OP(QuadVote) = emit_wave_quad_vote_instruction;
		OP(QuadReadLaneAt) = emit_wave_quad_read_lane_at_instruction;
		OP(WaveMatch) = emit_wave_match_instruction;
		OP(WaveMultiPrefixBitCount) = emit_wave_multi_prefix_count_bits_instruction;
		OP(WaveMultiPrefixOp) = emit_wave_multi_prefix_op_instruction;

		// dxil_ray_tracing.cpp
		OP(TraceRay) = emit_trace_ray_instruction;
		OP(DispatchRaysIndex) = emit_dispatch_rays_index_instruction;
		OP(DispatchRaysDimensions) = emit_dispatch_rays_dimensions_instruction;
		OP(ObjectRayOrigin) = emit_object_ray_origin_instruction;
		OP(WorldRayOrigin) = emit_world_ray_origin_instruction;
		OP(ObjectRayDirection) = emit_object_ray_direction_instruction;
		OP(WorldRayDirection) = emit_world_ray_direction_instruction;
		OP(RayTMin) = emit_ray_t_min_instruction;
		OP(RayTCurrent) = emit_ray_t_current_instruction;
		OP(WorldToObject) = emit_world_to_object_instruction;
		OP(ObjectToWorld) = emit_object_to_world_instruction;
		OP(InstanceID) = emit_ray_tracing_instance_id_instruction;
		OP(InstanceIndex) = emit_ray_tracing_instance_index_instruction;
		OP(RayQuery_GeometryIndex) = emit_ray_tracing_geometry_index_instruction;
		OP(PrimitiveIndex) = emit_ray_tracing_primitive_index_instruction;
		OP(RayFlags) = emit_ray_tracing_ray_flags_instruction;
		OP(HitKind) = emit_ray_tracing_hit_kind_instruction;
		OP(ReportHit) = emit_ray_tracing_report_hit;
		OP(AcceptHitAndEndSearch) = emit_ray_tracing_accept_hit_and_end_search;
		OP(IgnoreHit) = emit_ray_tracing_ignore_hit;
		OP(CallShader) = emit_ray_tracing_call_shader;

		// Ray query
		OP(AllocateRayQuery) = emit_allocate_ray_query;
		OP(RayQuery_TraceRayInline) = emit_ray_query_trace_ray_inline_instruction;
		OP(RayQuery_Proceed) = emit_ray_query_proceed_instruction;
		OP(RayQuery_Abort) = emit_ray_query_abort_instruction;

		// Global status
		OP(RayQuery_CandidateType) = emit_ray_query_intersection_type_instruction<spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CommittedStatus) = emit_ray_query_intersection_type_instruction<spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;

		// System variables
		OP(RayQuery_RayFlags) = emit_ray_query_system_value_instruction<spv::OpRayQueryGetRayFlagsKHR, 1>;
		OP(RayQuery_RayTMin) = emit_ray_query_system_value_instruction<spv::OpRayQueryGetRayTMinKHR, 1>;
		OP(RayQuery_WorldRayDirection) = emit_ray_query_system_value_instruction<spv::OpRayQueryGetWorldRayDirectionKHR, 3>;
		OP(RayQuery_WorldRayOrigin) = emit_ray_query_system_value_instruction<spv::OpRayQueryGetWorldRayOriginKHR, 3>;

		// Candidates
		OP(RayQuery_CommitNonOpaqueTriangleHit) = emit_ray_query_commit_non_opaque_triangle_instruction;
		OP(RayQuery_CommitProceduralPrimitiveHit) = emit_ray_query_commit_procedural_primitive_instruction;
		OP(RayQuery_CandidateProceduralPrimitiveNonOpaque) = emit_ray_query_candidate_procedural_primitive_non_opaque_instruction;

		// Getters (candidate)
		OP(RayQuery_CandidateTriangleRayT) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionTKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateInstanceIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionInstanceIdKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateInstanceID) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionInstanceCustomIndexKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateGeometryIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionGeometryIndexKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidatePrimitiveIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionPrimitiveIndexKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateObjectRayOrigin) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionObjectRayOriginKHR, 3,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateObjectRayDirection) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionObjectRayDirectionKHR, 3,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateTriangleBarycentrics) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionBarycentricsKHR, 2,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateTriangleFrontFace) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionFrontFaceKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateInstanceContributionToHitGroupIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR, 1,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateWorldToObject3x4) =
			emit_ray_query_get_matrix_value_instruction<spv::OpRayQueryGetIntersectionWorldToObjectKHR,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;
		OP(RayQuery_CandidateObjectToWorld3x4) =
			emit_ray_query_get_matrix_value_instruction<spv::OpRayQueryGetIntersectionObjectToWorldKHR,
				spv::RayQueryIntersectionRayQueryCandidateIntersectionKHR>;

		// Getters (committed)
		OP(RayQuery_CommittedRayT) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionTKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedInstanceIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionInstanceIdKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedInstanceID) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionInstanceCustomIndexKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedGeometryIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionGeometryIndexKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedPrimitiveIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionPrimitiveIndexKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedObjectRayOrigin) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionObjectRayOriginKHR, 3,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedObjectRayDirection) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionObjectRayDirectionKHR, 3,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedTriangleBarycentrics) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionBarycentricsKHR, 2,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedTriangleFrontFace) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionFrontFaceKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedInstanceContributionToHitGroupIndex) =
			emit_ray_query_get_value_instruction<spv::OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR, 1,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedWorldToObject3x4) =
			emit_ray_query_get_matrix_value_instruction<spv::OpRayQueryGetIntersectionWorldToObjectKHR,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		OP(RayQuery_CommittedObjectToWorld3x4) =
			emit_ray_query_get_matrix_value_instruction<spv::OpRayQueryGetIntersectionObjectToWorldKHR,
				spv::RayQueryIntersectionRayQueryCommittedIntersectionKHR>;
		////////////

		// dxil_mesh.cpp
		OP(SetMeshOutputCounts) = emit_set_mesh_output_counts_instruction;
		OP(EmitIndices) = emit_emit_indices_instruction;
		OP(GetMeshPayload) = emit_get_mesh_payload_instruction;
		OP(StoreVertexOutput) = emit_store_vertex_output_instruction;
		OP(StorePrimitiveOutput) = emit_store_primitive_output_instruction;
		OP(DispatchMesh) = emit_dispatch_mesh_instruction;

		// dxil_workgraph.hpp
		OP(AllocateNodeOutputRecords) = emit_allocate_node_output_records;
		OP(GetNodeRecordPtr) = emit_get_node_record_ptr;
		OP(IncrementOutputCount) = emit_increment_output_count;
		OP(OutputComplete) = emit_output_complete;
		OP(GetInputRecordCount) = emit_get_input_record_count;
		OP(FinishedCrossGroupSharing) = emit_finished_cross_group_sharing;
		OP(BarrierByMemoryType) = emit_barrier_by_memory_type;
		OP(BarrierByMemoryHandle) = emit_barrier_by_memory_handle;
		OP(BarrierByNodeRecordHandle) = emit_barrier_by_node_record_handle;
		OP(IndexNodeHandle) = emit_index_node_handle;
		OP(AnnotateNodeHandle) = emit_annotate_node_handle;
		OP(CreateNodeInputRecordHandle) = emit_create_node_input_record_handle;
		OP(CreateNodeOutputHandle) = emit_create_node_output_handle;
		OP(AnnotateNodeRecordHandle) = emit_annotate_node_record_handle;
		OP(NodeOutputIsValid) = emit_node_output_is_valid;
		OP(GetRemainingRecursionLevels) = emit_get_remaining_recursion_levels;
	}

#undef OP

	DXILOperationBuilder builder_lut[unsigned(DXIL::Op::Count)] = {};
};

// Sets up LUT once.
static DXILDispatcher global_dispatcher;

bool emit_dxil_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// The opcode is encoded as a constant integer.
	uint32_t opcode;
	if (!get_constant_operand(instruction, 0, &opcode))
		return false;

	if (opcode >= unsigned(DXIL::Op::Count))
	{
		LOGE("DXIL opcode %u is out of range.\n", opcode);
		return false;
	}

	if (global_dispatcher.builder_lut[opcode] == nullptr)
	{
		LOGE("Unimplemented DXIL opcode %u\n", opcode);
		return false;
	}

	if (!global_dispatcher.builder_lut[opcode](impl, instruction))
	{
		LOGE("Failed DXIL opcode %u.\n", opcode);
		return false;
	}
	return true;
}

bool dxil_instruction_has_side_effects(const llvm::CallInst *instruction)
{
	uint32_t opcode;
	if (!get_constant_operand(instruction, 0, &opcode))
		return false;

	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::VoidTyID)
		return true;

	bool ret;

	// Most of these are covered by the void check, but be exhaustive for completeness.
	switch (DXIL::Op(opcode))
	{
	case DXIL::Op::StoreOutput:
	case DXIL::Op::TextureStore:
	case DXIL::Op::BufferStore:
	case DXIL::Op::BufferUpdateCounter:
	case DXIL::Op::AtomicBinOp:
	case DXIL::Op::AtomicCompareExchange:
	case DXIL::Op::Barrier:
	case DXIL::Op::Discard:
	case DXIL::Op::EmitStream:
	case DXIL::Op::CutStream:
	case DXIL::Op::EmitThenCutStream:
	case DXIL::Op::StorePatchConstant:
	case DXIL::Op::RawBufferStore:
	case DXIL::Op::IgnoreHit:
	case DXIL::Op::AcceptHitAndEndSearch:
	case DXIL::Op::TraceRay:
	case DXIL::Op::ReportHit:
	case DXIL::Op::CallShader:
	case DXIL::Op::SetMeshOutputCounts:
	case DXIL::Op::EmitIndices:
	case DXIL::Op::StoreVertexOutput:
	case DXIL::Op::StorePrimitiveOutput:
	case DXIL::Op::DispatchMesh:
	case DXIL::Op::WriteSamplerFeedback:
	case DXIL::Op::WriteSamplerFeedbackBias:
	case DXIL::Op::WriteSamplerFeedbackLevel:
	case DXIL::Op::WriteSamplerFeedbackGrad:
	case DXIL::Op::RayQuery_TraceRayInline:
	case DXIL::Op::RayQuery_Proceed:
	case DXIL::Op::RayQuery_Abort:
	case DXIL::Op::RayQuery_CommitNonOpaqueTriangleHit:
	case DXIL::Op::RayQuery_CommitProceduralPrimitiveHit:
	case DXIL::Op::TextureStoreSample:
		ret = true;
		break;

	default:
		ret = false;
		break;
	}

	return ret;
}

static void update_raw_access_tracking_from_vector_type(
    AccessTracking &tracking, const llvm::Type *type, RawVecSize vec_size)
{
	// For SSBOs, always use uint types, except for 64-bit since Float64 vs Int64 are two separate features.
	if (type->getTypeID() == llvm::Type::TypeID::HalfTyID)
		tracking.raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B16)][int(vec_size)] = true;
	else if (type->getTypeID() == llvm::Type::TypeID::FloatTyID)
		tracking.raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B32)][int(vec_size)] = true;
	else if (type->getTypeID() == llvm::Type::TypeID::DoubleTyID)
		tracking.raw_access_buffer_declarations[int(RawType::Float)][int(RawWidth::B64)][int(vec_size)] = true;
	else if (type->getTypeID() == llvm::Type::TypeID::IntegerTyID)
	{
		if (type->getIntegerBitWidth() == 16)
			tracking.raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B16)][int(vec_size)] = true;
		else if (type->getIntegerBitWidth() == 32)
			tracking.raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B32)][int(vec_size)] = true;
		else if (type->getIntegerBitWidth() == 64)
			tracking.raw_access_buffer_declarations[int(RawType::Integer)][int(RawWidth::B64)][int(vec_size)] = true;
	}
}

static void update_raw_access_tracking_from_scalar_type(AccessTracking &tracking, const llvm::Type *type)
{
	update_raw_access_tracking_from_vector_type(tracking, type, RawVecSize::V1);
}

static void update_raw_access_tracking_for_byte_address(
	Converter::Impl &impl,
	AccessTracking &tracking,
	const llvm::Type *type,
	const llvm::Value *byte_offset,
	uint32_t mask)
{
    // If we have raw access chains, we don't bother trying to vectorize the SSBOs.
    // Just emit one alias and we can go from there.
    if (!impl.options.nv_raw_access_chains)
    {
        auto vec = raw_access_byte_address_vectorize(impl, type, byte_offset, mask);
        update_raw_access_tracking_from_vector_type(tracking, type, vec);
    }
}

static void update_raw_access_tracking_for_structured(
	Converter::Impl &impl,
	AccessTracking &tracking,
	const llvm::Type *type,
	const llvm::Value *index,
	unsigned stride,
	const llvm::Value *byte_offset,
	uint32_t mask)
{
    if (!impl.options.nv_raw_access_chains)
    {
        auto vec = raw_access_structured_vectorize(impl, type, index, stride, byte_offset, mask);
        update_raw_access_tracking_from_vector_type(tracking, type, vec);
    }
}

static void analyze_descriptor_handle_sink(Converter::Impl &impl,
                                           const llvm::CallInst *instruction,
                                           const llvm::BasicBlock *bb)
{
	// Even if we only use a resource in a branch in HLSL code, DXC might still unconditionally load
	// the resource handle, which means descriptor QA checks might not be entirely accurate.
	// To ensure we handle control flow correctly, sink instruction to BBs which actually use the
	// resource handle.
	unsigned count = instruction->getNumOperands();
	for (unsigned i = 1; i < count; i++)
	{
		if (auto *call_op = llvm::dyn_cast<llvm::CallInst>(instruction->getOperand(i)))
		{
			auto itr = impl.resource_handle_to_block.find(call_op);
			if (itr != impl.resource_handle_to_block.end())
			{
				auto *orig_bb = itr->second;
				if (!impl.resource_handle_is_conservative.count(call_op) && orig_bb != bb)
				{
					impl.resource_handles_needing_sink.insert(call_op);
					auto &sinks = impl.bb_to_sinks[bb];
					if (std::find(sinks.begin(), sinks.end(), call_op) == sinks.end())
						sinks.push_back(call_op);
				}
				else
				{
					// If we use the handle in the same block we created it,
					// don't try to sink anything. One QA check is enough.
					impl.resource_handle_is_conservative.insert(call_op);
				}
			}
		}
	}
}

static Converter::Impl::RawBufferMeta
get_resource_meta_from_buffer_op(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	auto itr = impl.llvm_value_to_srv_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_srv_resource_index_map.end())
		return impl.get_raw_buffer_meta(DXIL::ResourceType::SRV, itr->second);

	itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_uav_resource_index_map.end())
		return impl.get_raw_buffer_meta(DXIL::ResourceType::UAV, itr->second);

	auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1));
	if (annotate_itr != impl.llvm_annotate_handle_uses.end())
		return { annotate_itr->second.resource_kind, annotate_itr->second.stride };

	LOGE("No resource?\n");
	return { DXIL::ResourceKind::Invalid, 0 };
}

bool analyze_alloca_cbv_forwarding_pre_resource_emit(Converter::Impl &impl, const llvm::Type *scalar_type,
                                                     AllocaCBVForwardingTracking &tracking)
{
	if (!tracking.cbv_handle)
		return true;

	if (!tracking.has_load || !tracking.stride)
	{
		tracking.cbv_handle = nullptr;
		return true;
	}

	// Robustness purposes, if we cannot prove that all input data was read,
	// we may falsely cause an OOB read on BDA to happen when we redirect a load to it.
	if (tracking.highest_store_index < tracking.min_highest_store_index)
	{
		tracking.cbv_handle = nullptr;
		return true;
	}

	AccessTracking *cbv_tracking = nullptr;
	auto itr = impl.llvm_value_to_cbv_resource_index_map.find(tracking.cbv_handle);
	if (itr != impl.llvm_value_to_cbv_resource_index_map.end())
		cbv_tracking = &impl.cbv_access_tracking[itr->second];

	if (!cbv_tracking)
	{
		auto annotate_itr = impl.llvm_annotate_handle_uses.find(tracking.cbv_handle);
		if (annotate_itr != impl.llvm_annotate_handle_uses.end())
			cbv_tracking = &annotate_itr->second.tracking;
	}

	if (cbv_tracking)
	{
		auto raw_type = scalar_type->isFloatingPointTy() ? RawType::Float : RawType::Integer;
		cbv_tracking->raw_access_buffer_declarations[int(raw_type)][int(RawWidth::B32)][int(RawVecSize::V1)] = true;
	}

	return true;
}

bool analyze_alloca_cbv_forwarding_post_resource_emit(Converter::Impl &impl, AllocaCBVForwardingTracking &tracking)
{
	if (!tracking.cbv_handle)
		return true;

	// We don't know this until after resource creation.
	if (!cbuffer_supports_gep_punchthrough(impl, tracking.cbv_handle))
	{
		tracking.cbv_handle = nullptr;
		return true;
	}

	return true;
}

static void analyze_dxil_cbuffer_load(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	AccessTracking *tracking = nullptr;
	auto itr = impl.llvm_value_to_cbv_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_cbv_resource_index_map.end())
		tracking = &impl.cbv_access_tracking[itr->second];

	if (!tracking)
	{
		auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1));
		if (annotate_itr != impl.llvm_annotate_handle_uses.end())
			tracking = &annotate_itr->second.tracking;
	}

	if (tracking)
	{
		// For root parameters this is not normally allowed by D3D12, but some games are broken.
		// We can work around it robustly since it's legal code in Vulkan (if dynamically uniform).
		if (!llvm::isa<llvm::ConstantInt>(instruction->getOperand(2)))
			tracking->dynamically_indexed_cbv = true;

		if (type_is_composite_return_value(instruction->getType()))
		{
			// Legacy float4 model. However, it seems like DXIL also supports f16x8, f32x4 and f64x2 ... :(
			switch (get_type_scalar_alignment(impl, get_composite_element_type(instruction->getType())))
			{
			case 2:
			case 4:
				// We'll bit-cast on-demand for f16x8.
				tracking->raw_access_buffer_declarations[int(RawType::Float)][int(RawWidth::B32)][int(RawVecSize::V4)] = true;
				break;

			case 8:
			{
				// Need aliases here to handle difference in Float64 vs Int64 support.
				// For 16-bit, support is gated behind both types.
				bool is_float = get_composite_element_type(instruction->getType())->getTypeID() == llvm::Type::TypeID::DoubleTyID;
				tracking->raw_access_buffer_declarations[int(is_float ? RawType::Float : RawType::Integer)][int(RawWidth::B64)][int(RawVecSize::V2)] = true;
				break;
			}

			default:
				break;
			}
		}
		else
		{
			switch (get_type_scalar_alignment(impl, instruction->getType()))
			{
			case 2:
				tracking->raw_access_buffer_declarations[int(RawType::Float)][int(RawWidth::B16)][int(RawVecSize::V1)] = true;
				break;

			case 4:
				tracking->raw_access_buffer_declarations[int(RawType::Float)][int(RawWidth::B32)][int(RawVecSize::V1)] = true;
				break;

			case 8:
			{
				// Need aliases here to handle difference in Float64 vs Int64 support.
				// For 16-bit, support is gated behind both types.
				bool is_float = instruction->getType()->getTypeID() == llvm::Type::TypeID::DoubleTyID;
				tracking->raw_access_buffer_declarations[int(is_float ? RawType::Float : RawType::Integer)][int(RawWidth::B64)][int(RawVecSize::V1)] = true;
				break;
			}

			default:
				break;
			}
		}
	}
}

// dxilconv is broken and doesn't flag the UAV counter bit in metadata in all situations.
static void analyze_dxil_atomic_counter(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	if (analyze_nvapi_buffer_update_counter(impl, instruction))
		return;

	AccessTracking *tracking = nullptr;

	auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_uav_resource_index_map.end())
		tracking = &impl.uav_access_tracking[itr->second];

	if (tracking)
		tracking->has_counter = true;
}

static void analyze_dxil_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode)
{
	AccessTracking *tracking = nullptr;

	// In DXIL, whether or not an opcode is sparse depends on if the 4th argument is statically used by SSA ...
	auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_uav_resource_index_map.end())
		tracking = &impl.uav_access_tracking[itr->second];

	if (!tracking)
	{
		itr = impl.llvm_value_to_srv_resource_index_map.find(instruction->getOperand(1));
		if (itr != impl.llvm_value_to_srv_resource_index_map.end())
			tracking = &impl.srv_access_tracking[itr->second];
	}

	if (!tracking)
	{
		auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1));
		if (annotate_itr != impl.llvm_annotate_handle_uses.end())
			tracking = &annotate_itr->second.tracking;
	}

	if (tracking)
	{
		if (analyze_ags_buffer_load(impl, instruction, tracking))
			return;
		if (analyze_nvapi_buffer_load(impl, instruction, tracking))
			return;

		tracking->has_read = true;

		if (opcode != DXIL::Op::TextureLoad)
		{
			auto meta = get_resource_meta_from_buffer_op(impl, instruction);

			uint32_t access_mask = 0;
			auto composite_itr = impl.llvm_composite_meta.find(instruction);
			if (composite_itr != impl.llvm_composite_meta.end())
				access_mask = composite_itr->second.access_mask & 0xfu;

			// Smear read masks.
			access_mask |= access_mask >> 1u;
			access_mask |= access_mask >> 2u;

			if (meta.kind == DXIL::ResourceKind::RawBuffer)
			{
				update_raw_access_tracking_for_byte_address(impl, *tracking,
				                                            get_composite_element_type(instruction->getType()),
				                                            instruction->getOperand(2), access_mask);
			}
			else if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
			{
				update_raw_access_tracking_for_structured(impl, *tracking,
				                                          get_composite_element_type(instruction->getType()),
				                                          instruction->getOperand(2),
				                                          meta.stride,
				                                          instruction->getOperand(3),
				                                          access_mask);
			}
		}
	}
}

static void analyze_dxil_buffer_store(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode)
{
	AccessTracking *tracking = nullptr;

	auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_uav_resource_index_map.end())
		tracking = &impl.uav_access_tracking[itr->second];

	if (!tracking)
	{
		auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1));
		if (annotate_itr != impl.llvm_annotate_handle_uses.end())
			tracking = &annotate_itr->second.tracking;
	}

	if (tracking)
	{
		tracking->has_written = true;

		analyze_ags_buffer_store(impl, instruction, tracking, opcode);
		analyze_nvapi_buffer_store(impl, instruction, tracking, opcode);

		if (opcode != DXIL::Op::TextureStore && opcode != DXIL::Op::TextureStoreSample)
		{
			auto meta = get_resource_meta_from_buffer_op(impl, instruction);

			if (meta.kind == DXIL::ResourceKind::RawBuffer)
			{
				unsigned mask = llvm::cast<llvm::ConstantInt>(instruction->getOperand(8))->getUniqueInteger().getZExtValue();
				update_raw_access_tracking_for_byte_address(impl, *tracking,
				                                            instruction->getOperand(4)->getType(),
				                                            instruction->getOperand(2), mask);
			}
			else if (meta.kind == DXIL::ResourceKind::StructuredBuffer)
			{
				unsigned mask = llvm::cast<llvm::ConstantInt>(instruction->getOperand(8))->getUniqueInteger().getZExtValue();
				update_raw_access_tracking_for_structured(impl, *tracking,
				                                          instruction->getOperand(4)->getType(),
				                                          instruction->getOperand(2),
				                                          meta.stride,
				                                          instruction->getOperand(3),
				                                          mask);
			}
		}
		impl.shader_analysis.has_side_effects = true;
	}
}

static bool analyze_dxil_atomic_op(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	AccessTracking *tracking = nullptr;

	auto itr = impl.llvm_value_to_uav_resource_index_map.find(instruction->getOperand(1));
	if (itr != impl.llvm_value_to_uav_resource_index_map.end())
	{
		if (analyze_prepass_ags_dxil_atomic_op(impl, instruction, itr->second))
			return true;

		tracking = &impl.uav_access_tracking[itr->second];
	}

	auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1));
	if (annotate_itr != impl.llvm_annotate_handle_uses.end())
		tracking = &annotate_itr->second.tracking;

	if (tracking)
	{
		tracking->has_read = true;
		tracking->has_written = true;
		tracking->has_atomic = true;

		uint32_t op = 0;
		get_constant_operand(instruction, 0, &op);

		switch (DXIL::Op(op))
		{
		case DXIL::Op::AtomicBinOp:
		case DXIL::Op::AtomicCompareExchange:
			if (instruction->getType()->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
			    instruction->getType()->getIntegerBitWidth() == 64)
				tracking->has_atomic_64bit = true;
			break;

		default:
			// Feedback, always 64-bit.
			tracking->has_atomic_64bit = true;
			break;
		}

		auto meta = get_resource_meta_from_buffer_op(impl, instruction);
		if (meta.kind == DXIL::ResourceKind::RawBuffer || meta.kind == DXIL::ResourceKind::StructuredBuffer)
			update_raw_access_tracking_from_scalar_type(*tracking, instruction->getType());

		impl.shader_analysis.has_side_effects = true;
	}

	return true;
}

bool analyze_dxil_instruction_secondary_pass(Converter::Impl &impl, const llvm::CallInst *instruction)
{
	// The opcode is encoded as a constant integer.
	uint32_t opcode;
	if (!get_constant_operand(instruction, 0, &opcode))
		return false;

	auto op = static_cast<DXIL::Op>(opcode);

	switch (op)
	{
	case DXIL::Op::BufferLoad:
	case DXIL::Op::RawBufferLoad:
		analyze_dxil_buffer_load(impl, instruction, op);
		break;

	case DXIL::Op::AtomicCompareExchange:
	case DXIL::Op::AtomicBinOp:
	case DXIL::Op::WriteSamplerFeedback:
	case DXIL::Op::WriteSamplerFeedbackBias:
	case DXIL::Op::WriteSamplerFeedbackGrad:
	case DXIL::Op::WriteSamplerFeedbackLevel:
		// Writing sampler feedback is more or less equivalent to an atomic.
		if (!analyze_dxil_atomic_op(impl, instruction))
			return false;
		break;

	case DXIL::Op::TextureStore:
	case DXIL::Op::BufferStore:
	case DXIL::Op::RawBufferStore:
		// TextureStore only needed here to track AGS U64 image atomics.
		analyze_dxil_buffer_store(impl, instruction, op);
		break;

	case DXIL::Op::BufferUpdateCounter:
		analyze_dxil_atomic_counter(impl, instruction);
		break;

	case DXIL::Op::TraceRay:
	{
		// Mark alloca'd variables which should be considered as payloads rather than StorageClassFunction.
		// Moved to secondary pass to help NVAPI analysis since it uses TraceRay for nefarious needs,
		// and we need to have completed NVAPI analysis first.
		if (analyze_nvapi_trace_ray(impl, instruction))
			break;

		if (const auto *alloca_inst = llvm::dyn_cast<llvm::AllocaInst>(instruction->getOperand(15)))
		{
			auto storage = impl.get_effective_storage_class(alloca_inst, spv::StorageClassFunction);
			if (storage != spv::StorageClassFunction && storage != spv::StorageClassRayPayloadKHR)
			{
				impl.handle_to_storage_class[alloca_inst] = spv::StorageClassFunction;
				if (!impl.get_needs_temp_storage_copy(alloca_inst))
					impl.needs_temp_storage_copy.insert(alloca_inst);
			}
			else if (!impl.get_needs_temp_storage_copy(alloca_inst))
			{
				impl.handle_to_storage_class[alloca_inst] = spv::StorageClassRayPayloadKHR;
			}
		}

		if (const auto *flags_inst = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(2)))
		{
			auto value = flags_inst->getUniqueInteger().getZExtValue();
			if ((value & (spv::RayFlagsSkipTrianglesKHRMask | spv::RayFlagsSkipAABBsKHRMask)) != 0)
			{
				impl.shader_analysis.can_require_primitive_culling = true;
			}
			if ((value & spv::RayFlagsForceOpacityMicromap2StateEXTMask) != 0)
			{
				impl.shader_analysis.can_require_opacity_micromap = true;
			}
		}
		else
		{
			// Non constant flags, so we must be conservative.
			impl.shader_analysis.can_require_primitive_culling = true;
			impl.shader_analysis.can_require_opacity_micromap = true;
		}

		break;
	}

	case DXIL::Op::CallShader:
	{
		// Mark alloca'd variables which should be considered as payloads rather than StorageClassFunction.
		// Moved to secondary pass to help NVAPI analysis since it uses CallShader for nefarious needs,
		// and we need to have completed NVAPI analysis first.
		if (analyze_nvapi_call_shader(impl, instruction))
			break;

		if (const auto *alloca_inst = llvm::dyn_cast<llvm::AllocaInst>(instruction->getOperand(2)))
		{
			auto storage = impl.get_effective_storage_class(alloca_inst, spv::StorageClassFunction);
			if (storage != spv::StorageClassFunction && storage != spv::StorageClassCallableDataKHR)
			{
				impl.handle_to_storage_class[alloca_inst] = spv::StorageClassFunction;
				if (!impl.get_needs_temp_storage_copy(alloca_inst))
					impl.needs_temp_storage_copy.insert(alloca_inst);
			}
			else if (!impl.get_needs_temp_storage_copy(alloca_inst))
			{
				impl.handle_to_storage_class[alloca_inst] = spv::StorageClassCallableDataKHR;
			}
		}
		break;
	}

	default:
		break;
	}

	return true;
}

bool analyze_dxil_instruction_primary_pass(Converter::Impl &impl, const llvm::CallInst *instruction, const llvm::BasicBlock *bb)
{
	// The opcode is encoded as a constant integer.
	uint32_t opcode;
	if (!get_constant_operand(instruction, 0, &opcode))
		return false;

	if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles)
		analyze_descriptor_handle_sink(impl, instruction, bb);

	// Mark struct types which are never really used as "proper" struct types.
	// We prefer to use normal vector types instead when possible.
	// The only real exception to this rule is when using sparse.
	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::StructTyID &&
	    instruction->getType()->getStructNumElements() >= 4)
	{
		if (std::find(impl.llvm_dxil_op_fake_struct_types.begin(),
		              impl.llvm_dxil_op_fake_struct_types.end(),
		              instruction->getType()) == impl.llvm_dxil_op_fake_struct_types.end())
		{
			impl.llvm_dxil_op_fake_struct_types.push_back(instruction->getType());
		}
	}

	auto op = static_cast<DXIL::Op>(opcode);

	switch (op)
	{
	case DXIL::Op::CreateHandle:
	{
		uint32_t resource_type_operand, resource_range;
		if (!get_constant_operand(instruction, 1, &resource_type_operand))
			return false;
		if (!get_constant_operand(instruction, 2, &resource_range))
			return false;

		if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::UAV)
			impl.llvm_value_to_uav_resource_index_map[instruction] = resource_range;
		else if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::SRV)
			impl.llvm_value_to_srv_resource_index_map[instruction] = resource_range;
		else if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::CBV)
			impl.llvm_value_to_cbv_resource_index_map[instruction] = resource_range;

		if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles)
			impl.resource_handle_to_block[instruction] = bb;
		break;
	}

	case DXIL::Op::CreateHandleForLib:
	{
		auto itr = impl.llvm_global_variable_to_resource_mapping.find(instruction->getOperand(1));
		if (itr == impl.llvm_global_variable_to_resource_mapping.end())
			return false;

		if (itr->second.type == DXIL::ResourceType::UAV)
			impl.llvm_value_to_uav_resource_index_map[instruction] = itr->second.meta_index;
		else if (itr->second.type == DXIL::ResourceType::SRV)
			impl.llvm_value_to_srv_resource_index_map[instruction] = itr->second.meta_index;
		else if (itr->second.type == DXIL::ResourceType::CBV)
			impl.llvm_value_to_cbv_resource_index_map[instruction] = itr->second.meta_index;

		impl.llvm_active_global_resource_variables.insert(itr->second.variable);

		if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles)
			impl.resource_handle_to_block[instruction] = bb;
		break;
	}

	case DXIL::Op::AnnotateHandle:
	{
		AnnotateHandleMeta meta = {};
		if (!get_annotate_handle_meta(impl, instruction, meta))
			return false;

		if (meta.resource_op == DXIL::Op::CreateHandleFromHeap)
		{
			// Annotating handle forms the real resource.
			// CreateHandleFromHeap merely holds the index / nonuniform.
			// For analysis purposes, this is irrelevant.
			auto ordinal = unsigned(impl.llvm_annotate_handle_uses.size());
			auto &use = impl.llvm_annotate_handle_uses[instruction];
			use = {};
			use.ordinal = ordinal;

			auto *constant = llvm::dyn_cast<llvm::ConstantAggregate>(instruction->getOperand(2));
			if (!constant || constant->getType()->getTypeID() != llvm::Type::TypeID::StructTyID ||
			    constant->getNumOperands() != 2)
			{
				LOGE("AnnotateHandle takes a ConstantAggregate.\n");
				return false;
			}

			uint32_t type = constant->getOperand(0)->getUniqueInteger().getZExtValue();
			uint32_t params = constant->getOperand(1)->getUniqueInteger().getZExtValue();

			// The encoding here is very ... peculiar.
			constexpr uint32_t AnnotateUAVMask = 1u << 12;
			constexpr uint32_t AnnotateROVMask = 1u << 13;
			constexpr uint32_t AnnotateGloballyCoherentMask = 1u << 14;
			constexpr uint32_t AnnotateCounterMask = 1u << 15;
			use.resource_kind = DXIL::ResourceKind(type & 0xff);
			switch (use.resource_kind)
			{
			case DXIL::ResourceKind::CBuffer:
				use.resource_type = DXIL::ResourceType::CBV;
				break;

			case DXIL::ResourceKind::Sampler:
				use.resource_type = DXIL::ResourceType::Sampler;
				break;

			default:
				use.resource_type = (type & AnnotateUAVMask) != 0 ? DXIL::ResourceType::UAV : DXIL::ResourceType::SRV;
				use.coherent = (type & AnnotateGloballyCoherentMask) != 0;
				use.rov = (type & AnnotateROVMask) != 0;
				use.counter = (type & AnnotateCounterMask) != 0;
				break;
			}

			if (use.resource_kind == DXIL::ResourceKind::StructuredBuffer)
				use.stride = params;
			else if (use.resource_kind != DXIL::ResourceKind::RawBuffer &&
			         use.resource_kind != DXIL::ResourceKind::CBuffer)
				use.component_type = DXIL::ComponentType(params & 0xff);
		}
		else if (meta.resource_op == DXIL::Op::CreateHandleFromBinding ||
		         meta.resource_op == DXIL::Op::CreateHandleForLib)
		{
			if (meta.resource_type == DXIL::ResourceType::UAV)
				impl.llvm_value_to_uav_resource_index_map[instruction] = meta.binding_index;
			else if (meta.resource_type == DXIL::ResourceType::SRV)
				impl.llvm_value_to_srv_resource_index_map[instruction] = meta.binding_index;
			else if (meta.resource_type == DXIL::ResourceType::CBV)
				impl.llvm_value_to_cbv_resource_index_map[instruction] = meta.binding_index;
		}

		if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles)
			impl.resource_handle_to_block[instruction] = bb;
		break;
	}

	case DXIL::Op::TextureLoad:
		analyze_dxil_buffer_load(impl, instruction, op);
		break;

	case DXIL::Op::TextureStore:
	case DXIL::Op::TextureStoreSample:
		analyze_dxil_buffer_store(impl, instruction, op);
		break;

	case DXIL::Op::CBufferLoad:
	case DXIL::Op::CBufferLoadLegacy:
		analyze_dxil_cbuffer_load(impl, instruction);
		break;

	case DXIL::Op::BufferUpdateCounter:
	{
		if (nvapi_buffer_update_counter_filter(impl, instruction))
			break;
		impl.llvm_values_using_update_counter.insert(instruction->getOperand(1));
		impl.shader_analysis.has_side_effects = true;
		break;
	}

	case DXIL::Op::ReportHit:
	{
		auto *payload = instruction->getOperand(3);
		auto *type = payload->getType();
		if (impl.llvm_hit_attribute_output_type && impl.llvm_hit_attribute_output_type != type)
		{
			LOGE("Hit attribute types must match.\n");
			return false;
		}
		impl.llvm_hit_attribute_output_type = type;
		break;
	}

	case DXIL::Op::Discard:
		impl.shader_analysis.discards = true;
		break;

	case DXIL::Op::AttributeAtVertex:
	{
		// If this is used, promote a vertex input to PerVertex.
		uint32_t input_sig_index = 0;
		if (!get_constant_operand(instruction, 1, &input_sig_index))
			return false;
		impl.llvm_attribute_at_vertex_indices.insert(input_sig_index);
		break;
	}

	case DXIL::Op::ExtendedCalculateLOD:
	case DXIL::Op::ExtendedDeriv:
	case DXIL::Op::DerivCoarseX:
	case DXIL::Op::DerivCoarseY:
	case DXIL::Op::DerivFineX:
	case DXIL::Op::DerivFineY:
	case DXIL::Op::CalculateLOD:
	case DXIL::Op::SampleBias:
	case DXIL::Op::Sample:
	case DXIL::Op::SampleCmp:
	case DXIL::Op::SampleCmpBias:
		if (impl.execution_model == spv::ExecutionModelGLCompute ||
		    impl.execution_model == spv::ExecutionModelTaskEXT ||
		    impl.execution_model == spv::ExecutionModelMeshEXT)
		{
			// We're trying to do shader derivatives outside fragment, uh oh.
			// Also, we need to map 4 lanes to invocation IDs.
			// Either, we will set up a 1D mapping with ComputeDerivativeGroupLinearNV and run the code as-is,
			// or we run with LinearNV, but rewrite the thread IDs to fake 2D grouping.
			// We could rely on QuadNV here, but it's not widely supported.
			impl.shader_analysis.require_compute_shader_derivatives = true;
		}
		else if (impl.options.instruction_instrumentation.enabled &&
		         impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume &&
		         impl.execution_model == spv::ExecutionModelFragment)
		{
			impl.shader_analysis.need_maximal_reconvergence_helper_call = true;
		}
		break;

	case DXIL::Op::WaveIsFirstLane:
	case DXIL::Op::WaveGetLaneIndex:
	case DXIL::Op::WaveGetLaneCount:
	case DXIL::Op::WaveAnyTrue:
	case DXIL::Op::WaveAllTrue:
	case DXIL::Op::WaveActiveAllEqual:
	case DXIL::Op::WaveActiveBallot:
	case DXIL::Op::WaveReadLaneAt:
	case DXIL::Op::WaveReadLaneFirst:
	case DXIL::Op::WaveActiveOp:
	case DXIL::Op::WaveActiveBit:
	case DXIL::Op::WavePrefixOp:
	case DXIL::Op::WaveAllBitCount:
	case DXIL::Op::WavePrefixBitCount:
		impl.shader_analysis.require_subgroups = true;
		break;

	case DXIL::Op::QuadOp:
	case DXIL::Op::QuadVote:
	case DXIL::Op::QuadReadLaneAt:
		if (impl.execution_model == spv::ExecutionModelGLCompute ||
		    impl.execution_model == spv::ExecutionModelTaskEXT ||
		    impl.execution_model == spv::ExecutionModelMeshEXT)
		{
			uint32_t sm_major = 0, sm_minor = 0;
			Converter::Impl::get_shader_model(impl.bitcode_parser.get_module(), nullptr, &sm_major, &sm_minor);
			if (sm_major * 1000 + sm_minor >= 6006)
			{
				// In SM 6.6, the semantics of quad ops in compute changes.
				// They follow compute shader derivative rules,
				// where they might be 1D or 2D based on workgroup size.
				impl.shader_analysis.require_compute_shader_derivatives = true;
			}
		}

		impl.shader_analysis.require_subgroups = true;
		break;

	case DXIL::Op::LegacyF16ToF32:
		// Very specific check for HZD invariance. See f32_to_f16 code for details.
		if (instruction->hasMetadata("dx.precise") || impl.options.force_precise)
			impl.shader_analysis.precise_f16_to_f32_observed = true;
		break;

	case DXIL::Op::DispatchMesh:
		impl.handle_to_storage_class[instruction->getOperand(4)] = spv::StorageClassTaskPayloadWorkgroupEXT;
		break;

	case DXIL::Op::Barrier:
	{
		uint32_t operation;
		if (!get_constant_operand(instruction, 1, &operation))
			return false;

		// See D3D11 functional spec: 7.14.4 Global vs Group/Local Coherency on Non-Atomic UAV Reads.
		// In the GLSL memory model, we need coherent between invocations in general.
		// There is no guarantee for intra-workgroup coherence sadly :(
		if ((operation & (DXIL::AccessUAVThreadGroup | DXIL::AccessUAVGlobal)) != 0)
			impl.shader_analysis.require_uav_thread_group_coherence = true;
		if ((operation & DXIL::AccessGroupShared) != 0)
		{
			impl.shader_analysis.has_group_shared_barrier = true;
			if (impl.options.quirks.promote_group_to_device_memory_barrier)
				impl.shader_analysis.require_uav_thread_group_coherence = true;
		}
		break;
	}

	case DXIL::Op::BarrierByMemoryType:
	{
		uint32_t memory_flags = 0;
		uint32_t semantic_flags = 0;
		if (!get_constant_operand(instruction, 1, &memory_flags))
			return false;
		if (!get_constant_operand(instruction, 2, &semantic_flags))
			return false;

		if ((semantic_flags & DXIL::GroupScopeBit) != 0)
		{
			// Similar. If we observe a UAV + Group barrier, we need to consider coherence for any UAV.
			// For DeviceScope bit, shader already needs to declare with globallycoherent for that to work.
			if ((memory_flags & DXIL::MemoryTypeUavBit) != 0)
				impl.shader_analysis.require_uav_thread_group_coherence = true;
			if ((memory_flags & DXIL::MemoryTypeNodeOutputBit) != 0)
				impl.shader_analysis.require_node_output_group_coherence = true;
			if ((memory_flags & DXIL::MemoryTypeNodeInputBit) != 0)
				impl.shader_analysis.require_node_input_group_coherence = true;
			if ((memory_flags & DXIL::MemoryTypeGroupSharedBit) != 0)
			{
				impl.shader_analysis.has_group_shared_barrier = true;
				if (impl.options.quirks.promote_group_to_device_memory_barrier)
					impl.shader_analysis.require_uav_thread_group_coherence = true;
			}
		}

		break;
	}

	case DXIL::Op::BarrierByNodeRecordHandle:
	{
		uint32_t semantics = 0;
		if (!get_constant_operand(instruction, 2, &semantics))
			return false;

		if ((semantics & DXIL::GroupScopeBit) != 0)
		{
			if (!value_is_dx_op_instrinsic(instruction->getOperand(1), DXIL::Op::AnnotateNodeRecordHandle))
				return false;

			auto *annotation = llvm::cast<llvm::CallInst>(instruction->getOperand(1));
			uint32_t node_io = get_node_io_from_annotate_handle(annotation);

			// If the resource isn't declared as globally coherent, promote.
			// TODO: Could promote on a per-node basis, but seems overkill for now.
			if ((node_io & DXIL::NodeIOGloballyCoherentBit) == 0)
			{
				if ((node_io & DXIL::NodeIOInputBit) != 0)
					impl.shader_analysis.require_node_input_group_coherence = true;
				else if ((node_io & DXIL::NodeIOOutputBit) != 0)
					impl.shader_analysis.require_node_output_group_coherence = true;
			}
		}

		break;
	}

	case DXIL::Op::BarrierByMemoryHandle:
	{
		uint32_t semantics = 0;
		if (!get_constant_operand(instruction, 2, &semantics))
			return false;

		if ((semantics & DXIL::GroupScopeBit) != 0)
		{
			if (!value_is_dx_op_instrinsic(instruction->getOperand(1), DXIL::Op::AnnotateHandle))
				return false;

			auto *annotation = llvm::cast<llvm::CallInst>(instruction->getOperand(1));
			auto *aggregate = llvm::cast<llvm::ConstantAggregate>(annotation->getOperand(2));

			uint32_t type = aggregate->getOperand(0)->getUniqueInteger().getZExtValue();
			constexpr uint32_t AnnotateUAVMask = 1u << 12;
			constexpr uint32_t AnnotateGloballyCoherentMask = 1u << 14;

			// If the resource isn't declared as globally coherent, promote.
			// TODO: Could promote on a per-resource basis, but seems overkill for now.
			if ((type & AnnotateUAVMask) != 0 && (type & AnnotateGloballyCoherentMask) == 0)
				impl.shader_analysis.require_uav_thread_group_coherence = true;
		}

		break;
	}

	case DXIL::Op::AllocateNodeOutputRecords:
	{
		uint32_t is_per_thread = 0;
		// Per-thread allocator needs careful subgroup operations in potential control flow.
		if (get_constant_operand(instruction, 2, &is_per_thread) && is_per_thread)
			impl.shader_analysis.need_maximal_reconvergence_helper_call = true;
		break;
	}

	case DXIL::Op::AtomicCompareExchange:
		if (!analyze_ags_dxil_cmpxchg_op(impl, instruction))
			return false;
		break;

	case DXIL::Op::RayQuery_TraceRayInline:
	{
		// Any ray query object being used must be initialized first with TraceRayInline,
		// so there isn't much point in testing every instruction.

		// If every TraceRayInline uses the same handle value, we can collapse all ray query
		// object allocations into one, since there cannot be concurrent, valid instances in flight.
		auto *object = instruction->getOperand(1);

		if (impl.shader_analysis.ray_query.reference_handle_value &&
			impl.shader_analysis.ray_query.reference_handle_value != object)
		{
			impl.shader_analysis.ray_query.uses_divergent_handles = true;
		}
		else
		{
			impl.shader_analysis.ray_query.reference_handle_value = object;
		}

		if (!value_is_dx_op_instrinsic(object, DXIL::Op::AllocateRayQuery))
			impl.shader_analysis.ray_query.uses_non_direct_indexing = true;

		break;
	}

	case DXIL::Op::AllocateRayQuery:
		// If we have to do worst-case allocation.
		impl.shader_analysis.ray_query.num_ray_query_alloca++;
		break;

	default:
		break;
	}

	return true;
}
} // namespace dxil_spv


================================================
FILE: opcodes/opcodes_dxil_builtins.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "opcodes.hpp"

namespace dxil_spv
{
using DXILOperationBuilder = bool (*)(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_dxil_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool dxil_instruction_has_side_effects(const llvm::CallInst *instruction);

bool analyze_dxil_instruction_primary_pass(Converter::Impl &impl, const llvm::CallInst *instruction, const llvm::BasicBlock *bb);
bool analyze_dxil_instruction_secondary_pass(Converter::Impl &impl, const llvm::CallInst *instruction);

struct AllocaCBVForwardingTracking;
bool analyze_alloca_cbv_forwarding_pre_resource_emit(Converter::Impl &impl,
                                                     const llvm::Type *scalar_type,
                                                     AllocaCBVForwardingTracking &tracking);
bool analyze_alloca_cbv_forwarding_post_resource_emit(Converter::Impl &impl, AllocaCBVForwardingTracking &tracking);
} // namespace dxil_spv


================================================
FILE: opcodes/opcodes_llvm_builtins.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "opcodes_llvm_builtins.hpp"
#include "converter_impl.hpp"
#include "logging.hpp"
#include "spirv_module.hpp"
#include "dxil/dxil_common.hpp"
#include "dxil/dxil_resources.hpp"
#include "dxil/dxil_arithmetic.hpp"
#include "dxil/dxil_ags.hpp"
#include <limits>

namespace dxil_spv
{
unsigned physical_integer_bit_width(unsigned width)
{
	switch (width)
	{
	case 1:
	case 8:
	case 16:
	case 32:
	case 64:
		return width;

	default:
		return width <= 32 ? 32 : 64;
	}
}

static bool instruction_is_ballot(const llvm::Value *aggregate)
{
	if (const auto *call_inst = llvm::dyn_cast<llvm::CallInst>(aggregate))
	{
		auto *called_function = call_inst->getCalledFunction();
		if (strncmp(called_function->getName().data(), "dx.op", 5) == 0)
		{
			auto *constant = llvm::dyn_cast<llvm::ConstantInt>(call_inst->getOperand(0));
			if (constant)
			{
				auto elem = constant->getUniqueInteger().getZExtValue();
				return static_cast<DXIL::Op>(elem) == DXIL::Op::WaveActiveBallot;
			}
		}
	}

	return false;
}

static spv::Id build_naturally_extended_value(Converter::Impl &impl, const llvm::Value *value,
                                              unsigned bits, bool is_signed)
{
	spv::Id id = impl.get_id_for_value(value);
	if (value->getType()->getTypeID() != llvm::Type::TypeID::IntegerTyID)
		return id;

	auto logical_bits = value->getType()->getIntegerBitWidth();
	auto physical_bits = physical_integer_bit_width(logical_bits);

	if (bits == 0)
		bits = logical_bits;
	if (bits == physical_bits)
		return id;

	auto &builder = impl.builder();
	auto *mask_op = impl.allocate(is_signed ? spv::OpBitFieldSExtract : spv::OpBitFieldUExtract,
	                              impl.get_type_id(value->getType()));
	mask_op->add_id(id);
	mask_op->add_id(builder.makeUintConstant(0));
	mask_op->add_id(builder.makeUintConstant(bits));
	impl.add(mask_op);
	return mask_op->id;
}

static spv::Id build_naturally_extended_value(Converter::Impl &impl, const llvm::Value *value, bool is_signed)
{
	spv::Id id = impl.get_id_for_value(value);
	if (value->getType()->getTypeID() != llvm::Type::TypeID::IntegerTyID)
		return id;

	auto logical_bits = value->getType()->getIntegerBitWidth();
	return build_naturally_extended_value(impl, value, logical_bits, is_signed);
}

static spv::Id emit_fixup_fdiv_sqrt(Converter::Impl &, const llvm::ConstantExpr *)
{
	return 0;
}

static spv::Id emit_fixup_fdiv_sqrt(Converter::Impl &impl, const llvm::BinaryOperator *instruction)
{
	// Only peephole fast math.
	if (!instruction->isFast() || impl.options.force_precise || !impl.options.quirks.fixup_rsqrt)
		return 0;

	// Only consider normal FP32 floats for simplicity since this is just a workaround.
	if (instruction->getType()->getTypeID() != llvm::Type::TypeID::FloatTyID)
		return 0;

	if (value_is_dx_op_instrinsic(instruction->getOperand(1), DXIL::Op::Sqrt))
	{
		auto *sqrt_op = llvm::cast<llvm::CallInst>(instruction->getOperand(1));
		if (sqrt_op->hasMetadata("dx.precise"))
			return 0;

		auto *sqrt_input = sqrt_op->getOperand(1);

		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = impl.builder().import("GLSL.std.450");

		spv::Id input_id = impl.get_id_for_value(sqrt_input);
		spv::Id inv_sqrt_id = 0;

		for (auto &transform : impl.peephole_transformation_cache)
		{
			if (transform.input_id == input_id && transform.key == GLSLstd450InverseSqrt)
			{
				inv_sqrt_id = transform.result_id;
				break;
			}
		}

		if (inv_sqrt_id == 0)
		{
			auto *inv_sqrt = impl.allocate(spv::OpExtInst, impl.get_type_id(instruction->getType()));
			inv_sqrt->add_id(impl.glsl_std450_ext);
			inv_sqrt->add_literal(GLSLstd450InverseSqrt);
			inv_sqrt->add_id(impl.get_id_for_value(sqrt_input));
			impl.add(inv_sqrt);

			auto *clamp = impl.allocate(spv::OpExtInst, impl.get_type_id(instruction->getType()));
			clamp->add_id(impl.glsl_std450_ext);
			clamp->add_literal(GLSLstd450NMin);
			clamp->add_id(inv_sqrt->id);
			clamp->add_id(impl.builder().makeFloatConstant(std::numeric_limits<float>::max()));
			impl.add(clamp);

			inv_sqrt_id = clamp->id;
			impl.peephole_transformation_cache.push_back({ input_id, inv_sqrt_id, GLSLstd450InverseSqrt });
		}

		auto *mul = impl.allocate(spv::OpFMul, instruction);
		mul->add_id(impl.get_id_for_value(instruction->getOperand(0)));
		mul->add_id(inv_sqrt_id);
		impl.add(mul);

		return mul->id;
	}

	return 0;
}

static spv::Id peephole_trivial_arithmetic_identity(Converter::Impl &,
                                                    const llvm::ConstantExpr *,
                                                    llvm::BinaryOperator::BinaryOps,
                                                    bool)
{
	return 0;
}

static spv::Id peephole_trivial_arithmetic_identity(Converter::Impl &impl,
                                                    const llvm::BinaryOperator *instruction,
                                                    llvm::BinaryOperator::BinaryOps inverse_operation,
                                                    bool is_commutative)
{
	// Only peephole fast math.
	if (!instruction->isFast() || impl.options.force_precise)
		return 0;

	// CP77 can trigger a scenario where we do (a / b) * b in fast math.
	// When b is 0, we hit a singularity, but native drivers optimize this away.
	auto *op0 = instruction->getOperand(0);
	auto *op1 = instruction->getOperand(1);

	// This is the case for mul/div or add/sub.
	bool counter_op_is_commutative = !is_commutative;

	// Current expression is op(op0, op1)
	// Find pattern where we have one of 4 cases:
	// - c = F(a, F^-1(c, a)) // F = fmul -> is_commutative
	// - c = F(F^-1(c, b), b) // F = fmul -> is_commutative
	// - c = F(F^-1(c, b), b) // F = fdiv -> !is_commutative
	// - c = F(F^-1(b, c), b) // F = fdiv -> !is_commutative

	const auto hoist_value = [&](llvm::BinaryOperator *binop, llvm::Value *inverse_value) -> bool {
		auto *cancel_op0 = binop->getOperand(0);
		auto *cancel_op1 = binop->getOperand(1);
		if (counter_op_is_commutative && cancel_op0 == inverse_value)
		{
			// op0 is canceled by outer expression, so we're left with op1.
			impl.rewrite_value(instruction, impl.get_id_for_value(cancel_op1));
			return true;
		}
		else if (cancel_op1 == inverse_value)
		{
			// op1 is canceled by outer expression, so we're left with op0.
			impl.rewrite_value(instruction, impl.get_id_for_value(cancel_op0));
			return true;
		}
		else
			return false;
	};

	if (auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(op0))
		if (binop->isFast() && binop->getOpcode() == inverse_operation && hoist_value(binop, op1))
			return impl.get_id_for_value(instruction);

	if (is_commutative)
		if (auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(op1))
			if (binop->isFast() && binop->getOpcode() == inverse_operation && hoist_value(binop, op0))
				return impl.get_id_for_value(instruction);

	return 0;
}

static spv::Id resolve_llvm_actual_value_type(Converter::Impl &impl,
                                              const llvm::Value *dependent_value,
                                              const llvm::Value *value, spv::Id default_value_type)
{
	auto itr = impl.llvm_value_actual_type.find(value);
	if (itr != impl.llvm_value_actual_type.end())
	{
		if (dependent_value)
		{
			// Forward the remapped type as required.
			impl.llvm_value_actual_type[dependent_value] = itr->second;
		}
		return itr->second;
	}
	else
		return default_value_type;
}

static bool instruction_is_fast_math(const llvm::BinaryOperator *op)
{
	return op->isFast();
}

static bool instruction_is_fast_math(const llvm::ConstantExpr *)
{
	// Don't want reordering in constant folding anyways.
	return false;
}

static bool instruction_is_undefined_value(const llvm::Value *value)
{
	if (llvm::isa<llvm::UndefValue>(value))
	{
		return true;
	}
	else if (const auto *cexpr = llvm::dyn_cast<llvm::ConstantExpr>(value))
	{
		return instruction_is_undefined_value(cexpr->getOperand(0)) &&
		       instruction_is_undefined_value(cexpr->getOperand(1));
	}
	else if (const auto *expr = llvm::dyn_cast<llvm::BinaryOperator>(value))
	{
		return instruction_is_undefined_value(expr->getOperand(0)) &&
		       instruction_is_undefined_value(expr->getOperand(1));
	}
	else
		return false;
}

template <typename T>
bool can_optimize_to_snegate_inner(const T *instruction)
{
	if (instruction->getOpcode() != llvm::BinaryOperator::BinaryOps::Sub)
		return false;

	// Peephole. LLVM doesn't have concept of negation apparently ...
	if (const auto *cint = llvm::dyn_cast<llvm::ConstantInt>(instruction->getOperand(0)))
		return cint->getUniqueInteger().getZExtValue() == 0;
	return false;
}

bool can_optimize_to_snegate(const llvm::BinaryOperator *instruction)
{
	return can_optimize_to_snegate_inner(instruction);
}

bool can_optimize_to_snegate(const llvm::ConstantExpr *instruction)
{
	return can_optimize_to_snegate_inner(instruction);
}

static bool constant_is_all_bits_set(llvm::Value *v)
{
	if (auto *vec = llvm::dyn_cast<llvm::ConstantDataVector>(v))
	{
		for (unsigned i = 0; i < vec->getNumElements(); i++)
			if (!constant_is_all_bits_set(vec->getElementAsConstant(i)))
				return false;

		return true;
	}
	else if (const auto *c = llvm::dyn_cast<llvm::ConstantInt>(v))
	{
		uint64_t mask;
		if (c->getType()->getIntegerBitWidth() == 64)
			mask = UINT64_MAX;
		else
			mask = (1ull << c->getType()->getIntegerBitWidth()) - 1ull;

		return (c->getUniqueInteger().getZExtValue() & mask) == mask;
	}
	else
		return false;
}

static llvm::Value *get_bitwise_not_from_xor(llvm::Value *a, llvm::Value *b)
{
	if (constant_is_all_bits_set(a))
		return b;
	else if (constant_is_all_bits_set(b))
		return a;
	else
		return nullptr;
}

template <typename InstructionType>
static spv::Id emit_binary_instruction_impl(Converter::Impl &impl, const InstructionType *instruction)
{
	bool signed_input = false;
	bool is_width_sensitive = false;
	bool is_precision_sensitive = false;
	bool can_relax_precision = false;
	spv::Op opcode;

	auto *type = instruction->getType();
	if (const auto *vec = llvm::dyn_cast<llvm::VectorType>(type))
		type = vec->getElementType();

	switch (llvm::Instruction::BinaryOps(instruction->getOpcode()))
	{
	case llvm::BinaryOperator::BinaryOps::FAdd:
		opcode = spv::OpFAdd;
		is_precision_sensitive = true;
		can_relax_precision = true;
		break;

	case llvm::BinaryOperator::BinaryOps::FSub:
		opcode = spv::OpFSub;
		is_precision_sensitive = true;
		can_relax_precision = true;
		break;

	case llvm::BinaryOperator::BinaryOps::FMul:
	{
		opcode = spv::OpFMul;
		is_precision_sensitive = true;
		can_relax_precision = true;
		if (spv::Id id = peephole_trivial_arithmetic_identity(impl, instruction, llvm::BinaryOperator::BinaryOps::FDiv, true))
			return id;
		break;
	}

	case llvm::BinaryOperator::BinaryOps::FDiv:
		opcode = spv::OpFDiv;
		is_precision_sensitive = true;
		can_relax_precision = true;
		if (spv::Id id = peephole_trivial_arithmetic_identity(impl, instruction, llvm::BinaryOperator::BinaryOps::FMul, false))
			return id;
		if (spv::Id id = emit_fixup_fdiv_sqrt(impl, instruction))
			return id;
		break;

	case llvm::BinaryOperator::BinaryOps::Add:
		opcode = spv::OpIAdd;
		break;

	case llvm::BinaryOperator::BinaryOps::Sub:
	{
		// Peephole. LLVM doesn't have concept of negation apparently ...
		if (can_optimize_to_snegate(instruction))
		{
			auto op = impl.allocate(spv::OpSNegate, instruction);
			op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
			impl.add(op);
			return op->id;
		}

		opcode = spv::OpISub;
		break;
	}

	case llvm::BinaryOperator::BinaryOps::Mul:
		opcode = spv::OpIMul;
		break;

	case llvm::BinaryOperator::BinaryOps::SDiv:
		opcode = spv::OpSDiv;
		signed_input = true;
		is_width_sensitive = true;
		break;

	case llvm::BinaryOperator::BinaryOps::UDiv:
		opcode = spv::OpUDiv;
		is_width_sensitive = true;
		break;

	case llvm::BinaryOperator::BinaryOps::Shl:
		opcode = spv::OpShiftLeftLogical;
		break;

	case llvm::BinaryOperator::BinaryOps::LShr:
		opcode = spv::OpShiftRightLogical;
		is_width_sensitive = true;
		break;

	case llvm::BinaryOperator::BinaryOps::AShr:
		opcode = spv::OpShiftRightArithmetic;
		signed_input = true;
		is_width_sensitive = true;
		break;

	case llvm::BinaryOperator::BinaryOps::SRem:
		opcode = spv::OpSRem;
		signed_input = true;
		is_width_sensitive = true;
		break;

	case llvm::BinaryOperator::BinaryOps::FRem:
		opcode = spv::OpFRem;
		is_precision_sensitive = true;
		can_relax_precision = true;
		break;

	case llvm::BinaryOperator::BinaryOps::URem:
		// Is this correct? There is no URem.
		opcode = spv::OpUMod;
		is_width_sensitive = true;
		break;

	case llvm::BinaryOperator::BinaryOps::Xor:
		// Logical not in LLVM IR is encoded as XOR i1 against true.
		if (const auto *not_value = get_bitwise_not_from_xor(instruction->getOperand(0), instruction->getOperand(1)))
		{
			spv::Id not_id = impl.get_id_for_value(not_value);
			opcode = type->getIntegerBitWidth() == 1 ? spv::OpLogicalNot : spv::OpNot;

			Operation *op;
			if (llvm::isa<llvm::ConstantExpr>(instruction))
				op = impl.allocate(opcode, impl.get_type_id(instruction->getType()));
			else
				op = impl.allocate(opcode, instruction);

			op->add_id(not_id);
			impl.add(op);
			return op->id;
		}

		opcode = type->getIntegerBitWidth() == 1 ? spv::OpLogicalNotEqual : spv::OpBitwiseXor;
		break;

	case llvm::BinaryOperator::BinaryOps::And:
		if (type->getIntegerBitWidth() == 1)
			opcode = spv::OpLogicalAnd;
		else
			opcode = spv::OpBitwiseAnd;
		break;

	case llvm::BinaryOperator::BinaryOps::Or:
		if (type->getIntegerBitWidth() == 1)
			opcode = spv::OpLogicalOr;
		else
			opcode = spv::OpBitwiseOr;
		break;

	default:
		LOGE("Unknown binary operator.\n");
		return false;
	}

	// If we can collapse the expression to undefined (yes, DXIL really emits jank like this!),
	// just emit the non-undefined part.
	// We can consider the value to be undefined in a way that it is irrelevant.

	// Here we make the assumption that undef is not frozen to a fixed but indeterminate value,
	// it can take different values when it's instantiated.
	bool a_is_undef = instruction_is_undefined_value(instruction->getOperand(0));
	bool b_is_undef = instruction_is_undefined_value(instruction->getOperand(1));
	spv::Id forward_value = 0;
	if (b_is_undef)
		forward_value = impl.get_id_for_value(instruction->getOperand(0));
	else if (a_is_undef)
		forward_value = impl.get_id_for_value(instruction->getOperand(1));

	Operation *op;
	if (llvm::isa<llvm::ConstantExpr>(instruction))
	{
		if (forward_value != 0)
			return forward_value;

		op = impl.allocate(opcode, impl.get_type_id(instruction->getType()));
	}
	else if (forward_value != 0)
	{
		impl.rewrite_value(instruction, forward_value);
		return forward_value;
	}
	else
	{
		op = impl.allocate(opcode, instruction);
	}

	uint32_t id0, id1;
	if (is_width_sensitive)
	{
		id0 = build_naturally_extended_value(impl, instruction->getOperand(0), signed_input);
		id1 = build_naturally_extended_value(impl, instruction->getOperand(1), signed_input);
	}
	else
	{
		id0 = impl.get_id_for_value(instruction->getOperand(0));
		id1 = impl.get_id_for_value(instruction->getOperand(1));
	}
	op->add_ids({ id0, id1 });

	impl.add(op);
	if (is_precision_sensitive && (impl.options.force_precise || !instruction_is_fast_math(instruction)))
		impl.builder().addDecoration(op->id, spv::DecorationNoContraction);

	// Only bother relaxing FP, since Integers are murky w.r.t. signage in DXIL.
	if (can_relax_precision)
		impl.decorate_relaxed_precision(instruction->getType(), op->id, false);

	return op->id;
}

bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *instruction)
{
	return emit_binary_instruction_impl(impl, instruction) != 0;
}

bool emit_unary_instruction(Converter::Impl &impl, const llvm::UnaryOperator *instruction)
{
	spv::Op opcode;

	switch (instruction->getOpcode())
	{
	case llvm::UnaryOperator::UnaryOps::FNeg:
		opcode = spv::OpFNegate;
		break;

#ifdef HAVE_LLVMBC
	case llvm::UnaryOperator::UnaryOps::INeg:
		opcode = spv::OpSNegate;
		break;
#endif

	default:
		LOGE("Unknown unary operator.\n");
		return false;
	}

	Operation *op = impl.allocate(opcode, instruction);
	op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);

	impl.add(op);
	return true;
}

template <typename InstructionType>
static spv::Id emit_boolean_trunc_instruction(Converter::Impl &impl, const InstructionType *instruction)
{
	auto &builder = impl.builder();
	Operation *op = impl.allocate(spv::OpINotEqual, instruction);
	op->add_id(build_naturally_extended_value(impl, instruction->getOperand(0), false));

	unsigned physical_width = physical_integer_bit_width(instruction->getOperand(0)->getType()->getIntegerBitWidth());

	switch (physical_width)
	{
	case 16:
		op->add_id(builder.makeUint16Constant(0));
		break;

	case 32:
		op->add_id(builder.makeUintConstant(0));
		break;

	case 64:
		op->add_id(builder.makeUint64Constant(0));
		break;

	default:
		return 0;
	}

	impl.add(op);
	return op->id;
}

template <typename InstructionType>
static spv::Id emit_boolean_convert_instruction(Converter::Impl &impl, const InstructionType *instruction, bool is_signed)
{
	auto &builder = impl.builder();
	spv::Id const_0;
	spv::Id const_1;

	switch (instruction->getType()->getTypeID())
	{
	case llvm::Type::TypeID::HalfTyID:
		if (impl.support_native_fp16_operations())
		{
			const_0 = builder.makeFloat16Constant(0);
			const_1 = builder.makeFloat16Constant(0x3c00u | (is_signed ? 0x8000u : 0u));
		}
		else
		{
			const_0 = builder.makeFloatConstant(0.0f);
			const_1 = builder.makeFloatConstant(is_signed ? -1.0f : 1.0f);
		}
		break;

	case llvm::Type::TypeID::FloatTyID:
		const_0 = builder.makeFloatConstant(0.0f);
		const_1 = builder.makeFloatConstant(is_signed ? -1.0f : 1.0f);
		break;

	case llvm::Type::TypeID::DoubleTyID:
		const_0 = builder.makeDoubleConstant(0.0);
		const_1 = builder.makeDoubleConstant(is_signed ? -1.0 : 1.0);
		break;

	case llvm::Type::TypeID::IntegerTyID:
		switch (physical_integer_bit_width(instruction->getType()->getIntegerBitWidth()))
		{
		case 16:
			const_0 = builder.makeUint16Constant(0);
			const_1 = builder.makeUint16Constant(is_signed ? 0xffff : 1u);
			break;

		case 32:
			const_0 = builder.makeUintConstant(0);
			const_1 = builder.makeUintConstant(is_signed ? 0xffffffffu : 1u);
			break;

		case 64:
			const_0 = builder.makeUint64Constant(0ull);
			const_1 = builder.makeUint64Constant(is_signed ? ~0ull : 1ull);
			break;

		default:
			return 0;
		}
		break;

	default:
		return 0;
	}

	Operation *op = impl.allocate(spv::OpSelect, instruction);
	op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
	op->add_ids({ const_1, const_0 });
	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return op->id;
}

template <typename InstructionType>
static spv::Id emit_masked_cast_instruction(Converter::Impl &impl, const InstructionType *instruction, spv::Op opcode)
{
	auto logical_output_bits = instruction->getType()->getIntegerBitWidth();
	auto logical_input_bits = instruction->getOperand(0)->getType()->getIntegerBitWidth();
	auto physical_output_bits = physical_integer_bit_width(logical_output_bits);
	auto physical_input_bits = physical_integer_bit_width(logical_input_bits);
	auto logical_bits = (std::min)(logical_output_bits, logical_input_bits);

	if (physical_output_bits == physical_input_bits)
	{
		// We cannot use a cast operation in SPIR-V here, just extend the value to physical size and roll with it.
		spv::Id extended_id = build_naturally_extended_value(impl, instruction->getOperand(0), logical_bits,
		                                                     opcode == spv::OpSConvert);
		impl.rewrite_value(instruction, extended_id);
		return extended_id;
	}
	else if (physical_input_bits != logical_input_bits)
	{
		// Before extending, we must properly sign-extend.
		auto *mask_op = impl.allocate(opcode, instruction);
		mask_op->add_id(build_naturally_extended_value(impl, instruction->getOperand(0), logical_bits,
		                                               opcode == spv::OpSConvert));
		impl.add(mask_op);
		return mask_op->id;
	}

	return 0;
}

template <typename InstructionType>
static bool value_cast_is_noop(Converter::Impl &impl, const InstructionType *instruction, bool &relaxed_precision_cast)
{
	relaxed_precision_cast = false;

	// In case we extend min16int to int without native 16-bit ints, this is just a noop.
	// I don't believe overflow is well defined for min-precision integers ...
	// They certainly are not in Vulkan.
	switch (instruction->getOpcode())
	{
	case llvm::Instruction::CastOps::FPExt:
		if (instruction->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID &&
		    instruction->getOperand(0)->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID &&
		    !impl.support_native_fp16_operations())
		{
			return true;
		}
		break;

	case llvm::Instruction::CastOps::FPTrunc:
	{
		if (instruction->getOperand(0)->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID &&
		    instruction->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID &&
		    !impl.support_native_fp16_operations())
		{
			relaxed_precision_cast = impl.options.arithmetic_relaxed_precision;
			return true;
		}
		break;
	}

	default:
		break;
	}

	return false;
}

static bool value_cast_is_fp16_quantization(Converter::Impl &impl, const llvm::CastInst *cast_inst, spv::Id &value_id)
{
	if (cast_inst->getOpcode() == llvm::Instruction::CastOps::FPExt &&
	    cast_inst->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID)
	{
		if (const auto *trunc_inst = llvm::dyn_cast<llvm::CastInst>(cast_inst->getOperand(0)))
		{
			if (trunc_inst->getOpcode() == llvm::Instruction::CastOps::FPTrunc &&
			    trunc_inst->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID &&
			    trunc_inst->getOperand(0)->getType()->getTypeID() == llvm::Type::TypeID::FloatTyID)
			{
				value_id = impl.get_id_for_value(trunc_inst->getOperand(0));
				return true;
			}
		}
	}

	return false;
}

static bool value_cast_is_fp16_quantization(Converter::Impl &, const llvm::ConstantExpr *, spv::Id &)
{
	return false;
}

template <typename InstructionType>
static spv::Id emit_dxbc_tgsm_bitcast(Converter::Impl &impl, const InstructionType *instruction)
{
	// dxbc2dxil workarounds. Backing storage is i8 array. Rewrite this to i32.
	// This can happen with a constexpr bitcast from i8 array to unarrayed i32.
	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::PointerTyID &&
	    DXIL::AddressSpace(instruction->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::GroupShared)
	{
		auto *input_value = instruction->getOperand(0);
		uint32_t constant_gep_offset = 0;
		while (llvm::isa<llvm::ConstantExpr>(input_value))
		{
			auto *cexpr = llvm::cast<llvm::ConstantExpr>(input_value);
			if (cexpr->getOpcode() == llvm::Instruction::GetElementPtr && cexpr->getNumOperands() == 3)
				constant_gep_offset = cexpr->getOperand(2)->getUniqueInteger().getZExtValue();
			input_value = llvm::cast<llvm::ConstantExpr>(input_value)->getOperand(0);
		}

		auto *elem_type = input_value->getType()->getPointerElementType();

		uint32_t input_pointer_depth = 0;
		while (elem_type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
		{
			input_pointer_depth++;
			elem_type = elem_type->getArrayElementType();
		}

		uint32_t output_pointer_depth = 0;
		auto *output_elem_type = instruction->getType()->getPointerElementType();
		while (output_elem_type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
		{
			output_pointer_depth++;
			output_elem_type = output_elem_type->getArrayElementType();
		}

		if (elem_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && elem_type->getIntegerBitWidth() == 8)
		{
			if (constant_gep_offset % 4)
			{
				LOGE("Expected 4 byte aligned constant gep offset for TGSM.\n");
				return 0;
			}

			auto &builder = impl.builder();
			spv::Id input_id = impl.get_id_for_value(input_value);

			if (output_pointer_depth < input_pointer_depth)
			{
				auto *chain = impl.allocate(
				    spv::OpInBoundsAccessChain,
				    builder.makePointer(spv::StorageClassWorkgroup, builder.makeUintType(32)));

				chain->add_id(input_id);
				while (output_pointer_depth < input_pointer_depth)
				{
					output_pointer_depth++;
					chain->add_id(builder.makeUintConstant(constant_gep_offset / 4));
					constant_gep_offset = 0;
				}
				impl.add(chain);
				input_id = chain->id;
			}

			// The backing storage is always 32-bit.
			impl.rewrite_value(instruction, input_id);
			impl.llvm_value_actual_type[instruction] = builder.makeUintType(32);
			return input_id;
		}
	}

	return 0;
}

spv::Id emit_bypass_fp16_trunc(Converter::Impl &impl, const llvm::Instruction *instruction)
{
	auto &builder = impl.builder();

	// Try to find a cast chain where we can extract the native half value as-is.
	auto *value = instruction->getOperand(llvm::isa<llvm::CallInst>(instruction) ? 1 : 0);

	if (value_is_dx_op_instrinsic(value, DXIL::Op::LegacyF16ToF32))
	{
		auto *input_uint = llvm::cast<llvm::CallInst>(value)->getOperand(1);
		int component = 0;
		uint32_t cop = 0;

		if (const auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(input_uint))
		{
			if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::LShr &&
			    get_constant_operand(binop, 1, &cop) && cop == 16)
			{
				input_uint = binop->getOperand(0);
				component = 1;
			}
			else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::And &&
			         get_constant_operand(binop, 1, &cop) && (cop & 0xffffu) == 0xffffu)
			{
				input_uint = binop->getOperand(0);
			}
		}

		auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeVectorType(builder.makeUintType(16), 2));
		bitcast->add_id(impl.get_id_for_value(input_uint));
		impl.add(bitcast);

		auto *ext = impl.allocate(spv::OpCompositeExtract, builder.makeUintType(16));
		ext->add_id(bitcast->id);
		ext->add_literal(component);
		impl.add(ext);

		if (instruction->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID)
		{
			auto *fp16_cast = impl.allocate(spv::OpBitcast, instruction);
			fp16_cast->add_id(ext->id);
			impl.add(fp16_cast);
			return fp16_cast->id;
		}
		else
		{
			auto *upcast = impl.allocate(spv::OpUConvert, instruction);
			upcast->add_id(ext->id);
			impl.add(upcast);
			return upcast->id;
		}
	}
	else if (const auto *cast = llvm::dyn_cast<llvm::CastInst>(value))
	{
		if (cast->getOpcode() == llvm::CastInst::CastOps::FPExt &&
		    cast->getOperand(0)->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID)
		{
			spv::Id id = impl.get_id_for_value(cast->getOperand(0));
			if (instruction->getType()->getTypeID() != llvm::Type::TypeID::HalfTyID)
			{
				auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(16));
				bitcast->add_id(id);
				impl.add(bitcast);

				auto *upcast = impl.allocate(spv::OpUConvert, instruction);
				upcast->add_id(bitcast->id);
				impl.add(upcast);
				return upcast->id;
			}
			else
			{
				impl.rewrite_value(instruction, id);
				return id;
			}
		}
	}

	return 0;
}

static spv::Id emit_special_non_native_packing_bitcast(Converter::Impl &impl, const llvm::CastInst *instruction)
{
	auto &builder = impl.builder();
	auto *input = instruction->getOperand(0);

	auto *output_scalar_type = instruction->getType();
	bool output_is_vec2 = false;
	auto *input_scalar_type = input->getType();
	bool input_is_vec2 = false;

	if (auto *vec_type = llvm::dyn_cast<llvm::VectorType>(output_scalar_type))
	{
		output_scalar_type = vec_type->getElementType();
		output_is_vec2 = true;
	}

	if (auto *vec_type = llvm::dyn_cast<llvm::VectorType>(input_scalar_type))
	{
		input_scalar_type = vec_type->getElementType();
		input_is_vec2 = true;
	}

	if (output_scalar_type->getTypeID() == llvm::Type::TypeID::HalfTyID &&
	    input_scalar_type->getTypeID() == llvm::Type::TypeID::IntegerTyID)
	{
		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		spv::Id id = impl.get_id_for_value(input);

		if (input_is_vec2 == output_is_vec2)
		{
			if (!input_is_vec2)
			{
				auto *upconv = impl.allocate(spv::OpUConvert, builder.makeUintType(32));
				upconv->add_id(id);
				impl.add(upconv);
				id = upconv->id;
			}
			else
			{
				auto *bitcast = impl.allocate(spv::OpBitcast, builder.makeUintType(32));
				bitcast->add_id(id);
				impl.add(bitcast);
				id = bitcast->id;
			}
		}

		auto *cast = impl.allocate(spv::OpExtInst, builder.makeVectorType(builder.makeFloatType(32), 2));
		cast->add_id(impl.glsl_std450_ext);
		cast->add_literal(GLSLstd450UnpackHalf2x16);
		cast->add_id(id);
		impl.add(cast);

		if (output_is_vec2)
		{
			impl.rewrite_value(instruction, cast->id);
			return cast->id;
		}
		else
		{
			auto *ext = impl.allocate(spv::OpCompositeExtract, instruction);
			ext->add_id(cast->id);
			ext->add_literal(0);
			impl.add(ext);
			return ext->id;
		}
	}
	else if (output_scalar_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
	         input_scalar_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
	{
		if (!impl.glsl_std450_ext)
			impl.glsl_std450_ext = builder.import("GLSL.std.450");

		spv::Id id = impl.get_id_for_value(input);
		if (!input_is_vec2)
		{
			spv::Id lanes[2] = { id, builder.makeFloatConstant(0.0f) };
			id = impl.build_vector(builder.makeFloatType(32), lanes, 2);
		}

		auto *cast = impl.allocate(spv::OpExtInst, builder.makeUintType(32));
		cast->add_id(impl.glsl_std450_ext);
		cast->add_literal(GLSLstd450PackHalf2x16);
		cast->add_id(id);
		impl.add(cast);

		if (output_is_vec2 == input_is_vec2)
		{
			if (output_is_vec2)
			{
				auto *bitcast = impl.allocate(spv::OpBitcast, instruction);
				bitcast->add_id(cast->id);
				impl.add(bitcast);
				return bitcast->id;
			}
			else
			{
				auto *downconv = impl.allocate(spv::OpUConvert, instruction);
				downconv->add_id(cast->id);
				impl.add(downconv);
				return downconv->id;
			}
		}
		else
		{
			impl.rewrite_value(instruction, cast->id);
			return cast->id;
		}
	}

	return 0;
}

static spv::Id emit_bypass_fp16_trunc(Converter::Impl &, const llvm::ConstantExpr *)
{
	// If it's constexpr, no point in optimizing ourselves. Really won't happen.
	return 0;
}

static spv::Id emit_special_non_native_packing_bitcast(Converter::Impl &, const llvm::ConstantExpr *)
{
	// If it's constexpr, no point in optimizing ourselves. Really won't happen.
	return 0;
}

template <typename InstructionType>
static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const InstructionType *instruction)
{
	bool can_relax_precision = false;
	bool signed_input = false;
	spv::Id value_id = 0;
	spv::Op opcode;

	if (value_cast_is_fp16_quantization(impl, instruction, value_id) &&
	    impl.execution_mode_meta.native_16bit_operations)
	{
		// D3D12 compilers will enforce a truncate here through a FP32 -> FP16 -> FP32 chain,
		// where Vulkan compilers ... don't :(
		// If we find this pattern, assume that compilers will try to be clever about it (NoContract does not work on NV),
		// and force use of QuantizeToFP16 instead.
		// Rounding mode of this operation is not well-defined,
		// but that is also the case for D3D12. AMD drivers will prefer RTZ here for example.
		auto *quant_op = impl.allocate(spv::OpQuantizeToF16, instruction);
		quant_op->add_id(value_id);
		impl.add(quant_op);
		return quant_op->id;
	}

	if (value_cast_is_noop(impl, instruction, can_relax_precision))
	{
		spv::Id id;
		if (can_relax_precision)
		{
			// We cannot change the type, but we can mark the copied object
			// as relaxed to attempt to signal the intent.
			auto *trunc_op = impl.allocate(spv::OpCopyObject, instruction);
			trunc_op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
			impl.add(trunc_op);
			id = trunc_op->id;
			impl.builder().addDecoration(id, spv::DecorationRelaxedPrecision);
		}
		else
		{
			id = impl.get_id_for_value(instruction->getOperand(0));
			impl.rewrite_value(instruction, id);
		}

		return id;
	}

	if (instruction->getOpcode() == llvm::Instruction::CastOps::FPTrunc &&
	    instruction->getType()->getTypeID() == llvm::Type::TypeID::HalfTyID &&
	    impl.support_native_fp16_operations())
	{
		// Avoid roundtrip. Can happen for shaders which cast between min16 types, legacy packing, etc, etc.
		if (spv::Id id = emit_bypass_fp16_trunc(impl, instruction))
			return id;
	}

	switch (instruction->getOpcode())
	{
	case llvm::Instruction::CastOps::BitCast:
		if (!impl.support_native_fp16_operations())
			if (spv::Id id = emit_special_non_native_packing_bitcast(impl, instruction))
				return id;
		opcode = spv::OpBitcast;
		break;

	case llvm::Instruction::CastOps::SExt:
		if (instruction->getOperand(0)->getType()->getIntegerBitWidth() == 1)
			return emit_boolean_convert_instruction(impl, instruction, true);
		opcode = spv::OpSConvert;
		signed_input = true;
		if (spv::Id id = emit_masked_cast_instruction(impl, instruction, opcode))
			return id;
		break;

	case llvm::Instruction::CastOps::ZExt:
		if (instruction->getOperand(0)->getType()->getIntegerBitWidth() == 1)
			return emit_boolean_convert_instruction(impl, instruction, false);
		opcode = spv::OpUConvert;
		if (spv::Id id = emit_masked_cast_instruction(impl, instruction, opcode))
		    return id;
		break;

	case llvm::Instruction::CastOps::Trunc:
		if (instruction->getType()->getIntegerBitWidth() == 1)
			return emit_boolean_trunc_instruction(impl, instruction);
		opcode = spv::OpUConvert;
		if (spv::Id id = emit_masked_cast_instruction(impl, instruction, opcode))
			return id;
		break;

	case llvm::Instruction::CastOps::FPTrunc:
	case llvm::Instruction::CastOps::FPExt:
		opcode = spv::OpFConvert;
		// Relaxing precision on integers in DXIL is very sketchy, so don't bother.
		can_relax_precision = true;
		break;

	case llvm::Instruction::CastOps::FPToUI:
		opcode = spv::OpConvertFToU;
		break;

	case llvm::Instruction::CastOps::FPToSI:
		opcode = spv::OpConvertFToS;
		break;

	case llvm::Instruction::CastOps::SIToFP:
		if (instruction->getOperand(0)->getType()->getIntegerBitWidth() == 1)
			return emit_boolean_convert_instruction(impl, instruction, true);
		opcode = spv::OpConvertSToF;
		signed_input = true;
		break;

	case llvm::Instruction::CastOps::UIToFP:
		if (instruction->getOperand(0)->getType()->getIntegerBitWidth() == 1)
			return emit_boolean_convert_instruction(impl, instruction, false);
		opcode = spv::OpConvertUToF;
		break;

	default:
		LOGE("Unknown cast operation.\n");
		return 0;
	}

	if (spv::Id id = emit_dxbc_tgsm_bitcast(impl, instruction))
		return id;

	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::PointerTyID)
	{
		// I have observed this code in the wild
		// %blah = bitcast float* %foo to i32*
		// on function local memory.
		// I have no idea if this is legal DXIL.
		// Fake this by copying the object instead without any cast, and resolve the bitcast in OpLoad/OpStore instead.
		auto *pointer_type = llvm::cast<llvm::PointerType>(instruction->getOperand(0)->getType());
		auto *pointee_type = pointer_type->getPointerElementType();

		auto *output_type = llvm::cast<llvm::PointerType>(instruction->getType());
		auto *output_value_type = output_type->getPointerElementType();
		unsigned input_pointer_array_depth = 0;
		unsigned output_pointer_array_depth = 0;

		// The pointee type can be an array if we're bitcasting a pointer to array.
		// The intention is that we will eventually access chain into the bitcast pointer.
		// In DXIL we can only store scalars, so chase down the underlying type.
		while (pointee_type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
		{
			pointee_type = pointee_type->getArrayElementType();
			input_pointer_array_depth++;
		}

		while (output_value_type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
		{
			output_value_type = output_value_type->getArrayElementType();
			output_pointer_array_depth++;
		}

		if (pointee_type->getTypeID() == llvm::Type::TypeID::PointerTyID ||
		    output_value_type->getTypeID() == llvm::Type::TypeID::PointerTyID)
		{
			// Pretty sure DXIL does not support this ...
			LOGE("Cannot handle pointer-to-pointer.\n");
			return 0;
		}

		spv::Id value_type = impl.get_type_id(pointee_type);
		// In case we get back-to-back pointer bitcasts for no good reason :v
		value_type = resolve_llvm_actual_value_type(impl, instruction,
		                                            instruction->getOperand(0), value_type);

		spv::StorageClass fallback_storage;
		if (static_cast<DXIL::AddressSpace>(pointer_type->getAddressSpace()) == DXIL::AddressSpace::GroupShared)
			fallback_storage = spv::StorageClassWorkgroup;
		else
			fallback_storage = spv::StorageClassFunction;

		spv::StorageClass storage = impl.get_effective_storage_class(instruction->getOperand(0), fallback_storage);

		spv::Id id = impl.get_id_for_value(instruction->getOperand(0));

		if (output_pointer_array_depth != input_pointer_array_depth)
		{
			if (output_pointer_array_depth > input_pointer_array_depth)
			{
				// Non-sensical.
				LOGE("Bitcasting pointer while adding more array dimensions.\n");
				return 0;
			}
			else if (output_pointer_array_depth != 0)
			{
				// Bitcasting an array to anything other than scalar is non-sense.
				// We might be able to make it work by access chaining partially, but don't bother unless we observe
				// this. DXIL generally does not support array-of-array anyways ...
				LOGE("Bitcasting pointer to unexpected number of array dimensions.\n");
				return 0;
			}

			// It is apparently possible to bitcast pointer-to-array into pointer-to-value.
			// Since we don't implement pointer bitcast,
			// we pretend to do so by accessing chaining into the first element.
			spv::Id type_id = impl.builder().makePointer(storage, value_type);
			Operation *op = impl.allocate(spv::OpInBoundsAccessChain, type_id);
			op->add_id(id);
			for (unsigned i = 0; i < input_pointer_array_depth; i++)
				op->add_id(impl.builder().makeUintConstant(0));
			impl.add(op);
			id = op->id;
		}
		else if (!llvm::isa<llvm::ConstantExpr>(instruction))
		{
			// Shouldn't try to copy constant expressions.
			// They are built on-demand either way, and we risk infinite recursion that way.
			spv::Id type_id = impl.builder().makePointer(storage, value_type);
			Operation *op = impl.allocate(spv::OpCopyObject, instruction, type_id);
			op->add_id(id);
			impl.add(op);
			id = op->id;
		}

		// Remember that we will need to bitcast on load or store to the real underlying type.
		impl.llvm_value_actual_type[instruction] = value_type;
		impl.handle_to_storage_class[instruction] = storage;
		return id;
	}
	else
	{
		Operation *op;
		if (llvm::isa<llvm::ConstantExpr>(instruction))
			op = impl.allocate(opcode, impl.get_type_id(instruction->getType()));
		else
			op = impl.allocate(opcode, instruction);

		op->add_id(build_naturally_extended_value(impl, instruction->getOperand(0), signed_input));
		impl.add(op);
		if (can_relax_precision)
			impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
		return op->id;
	}
}

static bool cast_instruction_is_ignored(Converter::Impl &, const llvm::CastInst *instruction)
{
	// llvm.lifetime.begin takes i8*, but this pointer type is not allowed otherwise.
	// We have to explicitly ignore this.
	// Ignore any bitcast to i8*,
	// it happens for lib_6_6 and is completely meaningless for us.
	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::PointerTyID)
	{
		auto *result_type = instruction->getType()->getPointerElementType();
		if (result_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && result_type->getIntegerBitWidth() == 8)
			return true;
	}

	return false;
}

bool emit_cast_instruction(Converter::Impl &impl, const llvm::CastInst *instruction)
{
	if (cast_instruction_is_ignored(impl, instruction))
		return true;

	return emit_cast_instruction_impl(impl, instruction) != 0;
}

static bool elementptr_is_nonuniform(const llvm::GetElementPtrInst *inst)
{
	return inst->hasMetadata("dx.nonuniform");
}

static bool elementptr_is_nonuniform(const llvm::ConstantExpr *)
{
	return false;
}

template <typename Inst>
static bool emit_getelementptr_resource(Converter::Impl &impl, const Inst *instruction,
                                        const Converter::Impl::ResourceMetaReference &meta)
{
	auto *elem_index = instruction->getOperand(1);

	// This one must be constant 0, ignore it.
	if (!llvm::isa<llvm::ConstantInt>(elem_index))
	{
		LOGE("First GetElementPtr operand is not constant 0.\n");
		return false;
	}

	if (instruction->getNumOperands() != 3)
	{
		LOGE("Number of operands to getelementptr for a resource handle is unexpected.\n");
		return false;
	}

	auto indexed_meta = meta;
	indexed_meta.offset = instruction->getOperand(2);
	indexed_meta.non_uniform = elementptr_is_nonuniform(instruction);
	impl.llvm_global_variable_to_resource_mapping[instruction] = indexed_meta;
	return true;
}

static spv::Id build_constant_getelementptr(Converter::Impl &impl, const llvm::ConstantExpr *cexpr)
{
	auto &builder = impl.builder();
	spv::Id ptr_id = impl.get_id_for_value(cexpr->getOperand(0));

	auto *element_type = cexpr->getType()->getPointerElementType();
	spv::Id type_id = impl.get_type_id(element_type);

	// If we're trying to getelementptr into a bitcasted pointer to array, we have to rewrite the pointer type.
	type_id = resolve_llvm_actual_value_type(impl, cexpr, cexpr->getOperand(0), type_id);

	auto storage = impl.get_effective_storage_class(cexpr->getOperand(0), builder.getStorageClass(ptr_id));
	type_id = builder.makePointer(storage, type_id);

	Operation *op = impl.allocate(spv::OpAccessChain, type_id);

	op->add_id(ptr_id);

	auto *elem_index = cexpr->getOperand(1);

	// This one must be constant 0, ignore it.
	if (!llvm::isa<llvm::ConstantInt>(elem_index))
	{
		LOGE("First GetElementPtr operand is not constant 0.\n");
		return 0;
	}

	if (llvm::cast<llvm::ConstantInt>(elem_index)->getUniqueInteger().getZExtValue() != 0)
	{
		LOGE("First GetElementPtr operand is not constant 0.\n");
		return 0;
	}

	unsigned num_operands = cexpr->getNumOperands();
	for (uint32_t i = 2; i < num_operands; i++)
		op->add_id(impl.get_id_for_value(cexpr->getOperand(i)));

	impl.add(op);
	return op->id;
}

static spv::Id build_constant_cast(Converter::Impl &impl, const llvm::ConstantExpr *cexpr)
{
	return emit_cast_instruction_impl(impl, cexpr);
}

spv::Id build_constant_expression(Converter::Impl &impl, const llvm::ConstantExpr *cexpr)
{
	switch (cexpr->getOpcode())
	{
	case llvm::Instruction::GetElementPtr:
		return build_constant_getelementptr(impl, cexpr);

	case llvm::Instruction::Trunc:
	case llvm::Instruction::ZExt:
	case llvm::Instruction::SExt:
	case llvm::Instruction::FPToUI:
	case llvm::Instruction::FPToSI:
	case llvm::Instruction::UIToFP:
	case llvm::Instruction::SIToFP:
	case llvm::Instruction::FPTrunc:
	case llvm::Instruction::FPExt:
	case llvm::Instruction::PtrToInt:
	case llvm::Instruction::IntToPtr:
	case llvm::Instruction::BitCast:
	case llvm::Instruction::AddrSpaceCast:
		return build_constant_cast(impl, cexpr);

	case llvm::Instruction::Add:
	case llvm::Instruction::FAdd:
	case llvm::Instruction::Sub:
	case llvm::Instruction::FSub:
	case llvm::Instruction::Mul:
	case llvm::Instruction::FMul:
	case llvm::Instruction::UDiv:
	case llvm::Instruction::SDiv:
	case llvm::Instruction::FDiv:
	case llvm::Instruction::URem:
	case llvm::Instruction::SRem:
	case llvm::Instruction::FRem:
	case llvm::Instruction::Shl:
	case llvm::Instruction::LShr:
	case llvm::Instruction::AShr:
	case llvm::Instruction::And:
	case llvm::Instruction::Or:
	case llvm::Instruction::Xor:
		return emit_binary_instruction_impl(impl, cexpr);

	default:
	{
		LOGE("Unknown constant-expr.\n");
		break;
	}
	}

	return 0;
}

struct AllocaTrackedIndex
{
	const llvm::AllocaInst *alloca_inst;
	const llvm::Value *index;
	UnorderedMap<const llvm::AllocaInst *, AllocaCBVForwardingTracking>::iterator itr;
	const llvm::Value *cbv_handle;
};

static AllocaTrackedIndex gep_pointer_to_alloca_tracked_inst(Converter::Impl &impl, const llvm::Value *ptr)
{
	if (const auto *gep_inst = llvm::dyn_cast<llvm::GetElementPtrInst>(ptr))
	{
		if (const auto *alloca_inst = llvm::dyn_cast<llvm::AllocaInst>(gep_inst->getOperand(0)))
		{
			auto itr = impl.alloca_tracking.find(alloca_inst);
			if (itr != impl.alloca_tracking.end())
				return { alloca_inst, gep_inst->getOperand(2), itr, itr->second.cbv_handle };
		}
	}

	return {};
}

static void get_dxbc_tgsm_gep_workaround(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction,
                                         uint32_t &elementptr_shift, spv::Id &type_id)
{
	// Workaround dxbc2dxil where we access chain into i8 array which is non-sense.
	// The backing variable is i32 array instead. Rewrite appropriately.
	if (DXIL::AddressSpace(instruction->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::GroupShared)
	{
		auto *elem_type = instruction->getOperand(0)->getType()->getPointerElementType();
		if (elem_type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
			elem_type = elem_type->getArrayElementType();
		if (elem_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && elem_type->getIntegerBitWidth() == 8)
		{
			elementptr_shift = 2;
			type_id = impl.builder().makeUintType(32);
		}
	}
}

bool emit_getelementptr_instruction(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction)
{
	if (emit_ags_getelementptr(impl, instruction))
		return true;

	// This is actually the same as PtrAccessChain, but we would need to use variable pointers to support that properly.
	// For now, just assert that the first index is constant 0, in which case PtrAccessChain == AccessChain.

	// Detour the GEP instruction via a cbuffer + AccessChain.
	// This is the store that is ignored.
	auto tracker = gep_pointer_to_alloca_tracked_inst(impl, instruction);

	if (tracker.cbv_handle)
	{
		if (impl.masked_alloca_forward_gep.count(instruction))
			return true;

		return emit_gep_as_cbuffer_scalar_offset(
			impl, instruction,
			tracker.cbv_handle, tracker.itr->second.scalar_index_offset, tracker.itr->second.stride);
	}

	auto global_itr = impl.llvm_global_variable_to_resource_mapping.find(instruction->getOperand(0));
	if (global_itr != impl.llvm_global_variable_to_resource_mapping.end())
		return true;

	auto &builder = impl.builder();
	spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(0));
	uint32_t elementptr_shift = 0;
	spv::Id type_id = 0;

	get_dxbc_tgsm_gep_workaround(impl, instruction, elementptr_shift, type_id);

	if (type_id == 0)
		type_id = impl.get_type_id(instruction->getType()->getPointerElementType());

	// If we're trying to getelementptr into a bitcasted pointer to array, we have to rewrite the pointer type.
	resolve_llvm_actual_value_type(impl, instruction, instruction->getOperand(0), type_id);

	ags_getelementptr_filter(impl, instruction, type_id);

	spv::StorageClass storage;
	if (DXIL::AddressSpace(instruction->getOperand(0)->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::PhysicalNodeIO)
		storage = spv::StorageClassPhysicalStorageBuffer;
	else
		storage = impl.get_effective_storage_class(instruction->getOperand(0), builder.getStorageClass(ptr_id));

	type_id = builder.makePointer(storage, type_id);

	Operation *op = impl.allocate(instruction->isInBounds() ? spv::OpInBoundsAccessChain : spv::OpAccessChain,
	                              instruction, type_id);

	op->add_id(ptr_id);

	auto *elem_index = instruction->getOperand(1);

	// This one must be constant 0, ignore it.
	if (!llvm::isa<llvm::ConstantInt>(elem_index))
	{
		LOGE("First GetElementPtr operand is not constant 0.\n");
		return false;
	}

	if (llvm::cast<llvm::ConstantInt>(elem_index)->getUniqueInteger().getZExtValue() != 0)
	{
		LOGE("First GetElementPtr operand is not constant 0.\n");
		return false;
	}

	unsigned num_operands = instruction->getNumOperands();
	for (uint32_t i = 2; i < num_operands; i++)
	{
		// Be a bit careful with the typing since we might have some weird bitcast pointer types flying around.
		if (i == 2 && !llvm::isa<llvm::Constant>(instruction->getOperand(2)))
		{
			if (auto *aggregate_type = llvm::dyn_cast<llvm::PointerType>(instruction->getOperand(0)->getType()))
			{
				if (auto *array_type = llvm::dyn_cast<llvm::ArrayType>(aggregate_type->getPointerElementType()))
				{
					auto address_space = DXIL::AddressSpace(aggregate_type->getPointerAddressSpace());
					if (address_space == DXIL::AddressSpace::GroupShared || address_space == DXIL::AddressSpace::Thread)
					{
						auto *global_var = llvm::dyn_cast<llvm::GlobalVariable>(instruction->getOperand(0));
						if (global_var && global_var->hasInitializer() && global_var->isConstant() &&
						    impl.options.extended_robustness.constant_lut &&
						    !elementptr_shift && address_space == DXIL::AddressSpace::Thread)
						{
							// Robustness for constant LUTs.
							if (!impl.glsl_std450_ext)
								impl.glsl_std450_ext = builder.import("GLSL.std.450");

							auto *clamp_op = impl.allocate(spv::OpExtInst, builder.makeUintType(32));
							clamp_op->add_id(impl.glsl_std450_ext);
							clamp_op->add_id(GLSLstd450UMin);
							clamp_op->add_id(impl.get_id_for_value(instruction->getOperand(2)));
							clamp_op->add_id(builder.makeUintConstant(array_type->getArrayNumElements()));
							impl.add(clamp_op);
							op->add_id(clamp_op->id);
							continue;
						}
						else if (address_space == DXIL::AddressSpace::Thread && impl.options.extended_robustness.alloca)
						{
							unsigned num_elements = array_type->getArrayNumElements();
							auto *is_in_bounds = impl.allocate(spv::OpULessThan, builder.makeBoolType());
							is_in_bounds->add_id(impl.get_id_for_value(instruction->getOperand(2)));
							is_in_bounds->add_id(builder.makeUintConstant(num_elements));
							impl.add(is_in_bounds);

							impl.handle_to_robustness[instruction] = is_in_bounds->id;
						}
						else if (address_space == DXIL::AddressSpace::GroupShared && impl.options.extended_robustness.group_shared)
						{
							// Need to handle atomics as well, and it'll be a mess to add support for that.
							LOGW("Robust group shared GEP not yet implemented.\n");
						}
						else if (impl.options.instruction_instrumentation.enabled &&
						         impl.options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume)
						{
							// Fallback expect-assume case.
							unsigned num_elements = array_type->getArrayNumElements();
							auto *is_in_bounds = impl.allocate(spv::OpULessThan, builder.makeBoolType());
							is_in_bounds->add_id(impl.get_id_for_value(instruction->getOperand(2)));
							is_in_bounds->add_id(builder.makeUintConstant(num_elements));
							impl.add(is_in_bounds);

							auto *assert_that = impl.allocate(spv::OpAssumeTrueKHR);
							assert_that->add_id(is_in_bounds->id);
							impl.add(assert_that);
						}
					}
				}
			}
		}

		if (i == 2 && elementptr_shift != 0)
		{
			spv::Id index = build_index_divider(impl, instruction->getOperand(2), elementptr_shift, 1);
			op->add_id(index);
		}
		else
		{
			spv::Id id = impl.get_id_for_value(instruction->getOperand(i));

			if (i == 2)
			{
				id = rewrite_alloca_gep_index(impl, instruction, id);
				if (id == UINT32_MAX)
					return true;
				if (!id)
					return false;
			}

			op->add_id(id);
		}
	}

	impl.handle_to_storage_class[instruction] = storage;
	impl.add(op);
	return true;
}

static bool needs_group_shared_auto_barrier(Converter::Impl &impl, const llvm::Value *ptr_value)
{
	return impl.shader_analysis.needs_auto_group_shared_barriers &&
	       DXIL::AddressSpace(ptr_value->getType()->getPointerAddressSpace()) ==
	       DXIL::AddressSpace::GroupShared;
}

static bool vkmm_requires_auto_visibility(Converter::Impl &impl, const llvm::Value *ptr)
{
	auto &ptrs = impl.llvm_vkmm_coherent_ptrs;
	if (ptrs.empty())
		return false;

	for (;;)
	{
		if (std::find(ptrs.begin(), ptrs.end(), ptr) != ptrs.end())
			return true;

		if (const auto *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(ptr))
			ptr = gep->getOperand(0);
		else if (const auto *cast = llvm::dyn_cast<llvm::CastInst>(ptr))
			ptr = cast->getOperand(0);
		else
			break;
	}

	return false;
}

bool emit_load_instruction(Converter::Impl &impl, const llvm::LoadInst *instruction)
{
	auto itr = impl.llvm_global_variable_to_resource_mapping.find(instruction->getPointerOperand());

	// If we are trying to load a resource in RT, this does not translate in SPIR-V, defer this to createHandleForLib.
	if (itr != impl.llvm_global_variable_to_resource_mapping.end())
		return true;

	if (ags_llvm_load_filter_cexpr(impl, instruction))
		return true;

	// We need to get the ID here as the constexpr chain could set our type.
	spv::Id value_id = impl.get_id_for_value(instruction->getPointerOperand());

	spv::Id remapped_type_id = resolve_llvm_actual_value_type(impl, nullptr,
	                                                          instruction->getPointerOperand(), 0);

	auto addr_space = DXIL::AddressSpace(instruction->getPointerOperand()->getType()->getPointerAddressSpace());
	bool non_private = addr_space != DXIL::AddressSpace::Thread &&
	                   impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan;

	if (remapped_type_id != 0)
	{
		Operation *load_op = impl.allocate(spv::OpLoad, remapped_type_id);
		load_op->add_id(value_id);
		add_vkmm_access_qualifiers(impl, load_op, { non_private });
		impl.add(load_op);

		if (needs_group_shared_auto_barrier(impl, instruction->getPointerOperand()))
			load_op->flags |= Operation::AutoGroupSharedBarrier;

		Operation *cast_op = impl.allocate(spv::OpBitcast, instruction);
		cast_op->add_id(load_op->id);
		impl.add(cast_op);
	}
	else
	{
		auto robust_itr = impl.handle_to_robustness.find(instruction->getPointerOperand());
		Operation *op = impl.allocate(robust_itr != impl.handle_to_robustness.end() ? spv::PseudoOpMaskedLoad : spv::OpLoad, instruction);
		op->add_id(value_id);

		if (needs_group_shared_auto_barrier(impl, instruction->getPointerOperand()))
			op->flags |= Operation::AutoGroupSharedBarrier;

		// If this is remapped to BDA, need to add Aligned mask.
		auto tracked = gep_pointer_to_alloca_tracked_inst(impl, instruction->getPointerOperand());
		if (tracked.alloca_inst && tracked.itr->second.cbv_handle)
		{
			spv::Id ptr_id = impl.get_id_for_value(tracked.itr->second.cbv_handle);
			auto &meta = impl.handle_to_resource_meta[ptr_id];
			if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
			{
				op->add_literal(spv::MemoryAccessAlignedMask);
				op->add_literal(4);
			}
		}

		bool auto_visibility = false;

		// For NodeIO, we always have to tag with aligned mask.
		if (addr_space == DXIL::AddressSpace::PhysicalNodeIO)
		{
			// TODO: Properly track aligned size based on the GEP, but for now, just assume scalar.
			op->add_literal(spv::MemoryAccessAlignedMask);
			auto size_alignment = impl.get_physical_size_for_type(
			    impl.builder().getContainedTypeId(impl.get_type_id(instruction->getPointerOperand()->getType(), true)));
			op->add_literal(size_alignment.alignment);

			auto_visibility = vkmm_requires_auto_visibility(impl, instruction->getPointerOperand());
		}

		add_vkmm_access_qualifiers(impl, op, { non_private, auto_visibility });

		if (op->op == spv::PseudoOpMaskedLoad)
		{
			// OpSampledImage must be consumed in same block.
			// We'll split blocks here, so just recreate the combined sampler image if needed.
			impl.combined_image_sampler_cache.clear();
			op->add_id(robust_itr->second);
		}

		if (ags_llvm_load_filter(impl, op, instruction))
			return true;

		impl.add(op);
	}

	return true;
}

bool emit_store_instruction(Converter::Impl &impl, const llvm::StoreInst *instruction)
{
	// Ignore stores to remapped alloca().
	auto tracking = gep_pointer_to_alloca_tracked_inst(impl, instruction->getOperand(1));
	if (tracking.cbv_handle)
		return true;

	// Ignore stores of WMMA if it's not the first component
	if (wmma_store_is_masked(impl, instruction))
		return true;

	// May need to handle constexpr GEP.
	if (ags_store_filter(impl, instruction))
		return true;

	auto robust_itr = impl.handle_to_robustness.find(instruction->getOperand(1));
	Operation *op = impl.allocate(robust_itr != impl.handle_to_robustness.end() ? spv::PseudoOpMaskedStore : spv::OpStore);

	if (needs_group_shared_auto_barrier(impl, instruction->getOperand(1)))
		op->flags |= Operation::AutoGroupSharedBarrier;

	// We need to get the ID here as the constexpr chain could set our type.
	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));

	spv::Id remapped_type_id = resolve_llvm_actual_value_type(impl, nullptr, instruction->getOperand(1), 0);

	if (remapped_type_id != 0)
	{
		Operation *cast_op = impl.allocate(spv::OpBitcast, remapped_type_id);
		cast_op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
		impl.add(cast_op);
		op->add_id(cast_op->id);
	}
	else
		op->add_id(impl.get_id_for_value(instruction->getOperand(0)));

	auto addr_space = DXIL::AddressSpace(instruction->getOperand(1)->getType()->getPointerAddressSpace());
	bool non_private = addr_space != DXIL::AddressSpace::Thread &&
	                   impl.execution_mode_meta.memory_model == spv::MemoryModelVulkan;
	bool auto_visibility = false;

	// For NodeIO, we always have to tag with aligned mask.
	if (addr_space == DXIL::AddressSpace::PhysicalNodeIO)
	{
		// TODO: Properly track aligned size based on the GEP, but for now, just assume scalar.
		op->add_literal(spv::MemoryAccessAlignedMask);
		auto size_alignment = impl.get_physical_size_for_type(
			impl.builder().getContainedTypeId(impl.get_type_id(instruction->getOperand(1)->getType(), true)));
		op->add_literal(size_alignment.alignment);

		auto_visibility = vkmm_requires_auto_visibility(impl, instruction->getOperand(1));
	}

	add_vkmm_access_qualifiers(impl, op, { non_private, auto_visibility });

	if (op->op == spv::PseudoOpMaskedStore)
	{
		// OpSampledImage must be consumed in same block.
		// We'll split blocks here, so just recreate the combined sampler image if needed.
		impl.combined_image_sampler_cache.clear();
		op->add_id(robust_itr->second);
	}

	impl.add(op);
	return true;
}

bool emit_compare_instruction(Converter::Impl &impl, const llvm::CmpInst *instruction)
{
	bool signed_input = false;
	spv::Op opcode;

	switch (instruction->getPredicate())
	{
	case llvm::CmpInst::Predicate::FCMP_OEQ:
		opcode = spv::OpFOrdEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_UEQ:
		opcode = spv::OpFUnordEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_OGT:
		opcode = spv::OpFOrdGreaterThan;
		break;

	case llvm::CmpInst::Predicate::FCMP_UGT:
		opcode = spv::OpFUnordGreaterThan;
		break;

	case llvm::CmpInst::Predicate::FCMP_OGE:
		opcode = spv::OpFOrdGreaterThanEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_UGE:
		opcode = spv::OpFUnordGreaterThanEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_OLT:
		opcode = spv::OpFOrdLessThan;
		break;

	case llvm::CmpInst::Predicate::FCMP_ULT:
		opcode = spv::OpFUnordLessThan;
		break;

	case llvm::CmpInst::Predicate::FCMP_OLE:
		opcode = spv::OpFOrdLessThanEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_ULE:
		opcode = spv::OpFUnordLessThanEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_ONE:
		opcode = spv::OpFOrdNotEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_UNE:
		opcode = spv::OpFUnordNotEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_FALSE:
	{
		// Why on earth is this a thing ...
		impl.rewrite_value(instruction, impl.builder().makeBoolConstant(false));
		return true;
	}

	case llvm::CmpInst::Predicate::FCMP_TRUE:
	{
		// Why on earth is this a thing ...
		impl.rewrite_value(instruction, impl.builder().makeBoolConstant(true));
		return true;
	}

	case llvm::CmpInst::Predicate::ICMP_EQ:
		if (instruction->getOperand(0)->getType()->getIntegerBitWidth() == 1)
			opcode = spv::OpLogicalEqual;
		else
			opcode = spv::OpIEqual;
		break;

	case llvm::CmpInst::Predicate::ICMP_NE:
		if (instruction->getOperand(0)->getType()->getIntegerBitWidth() == 1)
			opcode = spv::OpLogicalNotEqual;
		else
			opcode = spv::OpINotEqual;
		break;

	case llvm::CmpInst::Predicate::ICMP_SLT:
		opcode = spv::OpSLessThan;
		signed_input = true;
		break;

	case llvm::CmpInst::Predicate::ICMP_SLE:
		opcode = spv::OpSLessThanEqual;
		signed_input = true;
		break;

	case llvm::CmpInst::Predicate::ICMP_SGT:
		opcode = spv::OpSGreaterThan;
		signed_input = true;
		break;

	case llvm::CmpInst::Predicate::ICMP_SGE:
		opcode = spv::OpSGreaterThanEqual;
		signed_input = true;
		break;

	case llvm::CmpInst::Predicate::ICMP_ULT:
		opcode = spv::OpULessThan;
		break;

	case llvm::CmpInst::Predicate::ICMP_ULE:
		opcode = spv::OpULessThanEqual;
		break;

	case llvm::CmpInst::Predicate::ICMP_UGT:
		opcode = spv::OpUGreaterThan;
		break;

	case llvm::CmpInst::Predicate::ICMP_UGE:
		opcode = spv::OpUGreaterThanEqual;
		break;

	case llvm::CmpInst::Predicate::FCMP_UNO:
	{
		Operation *first_op = impl.allocate(spv::OpIsNan, impl.builder().makeBoolType());
		first_op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
		impl.add(first_op);

		Operation *second_op = impl.allocate(spv::OpIsNan, impl.builder().makeBoolType());
		second_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(second_op);

		Operation *op = impl.allocate(spv::OpLogicalOr, instruction);
		op->add_ids({ first_op->id, second_op->id });
		impl.add(op);
		return true;
	}

	case llvm::CmpInst::Predicate::FCMP_ORD:
	{
		Operation *first_op = impl.allocate(spv::OpIsNan, impl.builder().makeBoolType());
		first_op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
		impl.add(first_op);

		Operation *second_op = impl.allocate(spv::OpIsNan, impl.builder().makeBoolType());
		second_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
		impl.add(second_op);

		Operation *unordered_op = impl.allocate(spv::OpLogicalOr, impl.builder().makeBoolType());
		unordered_op->add_ids({ first_op->id, second_op->id });
		impl.add(unordered_op);

		Operation *op = impl.allocate(spv::OpLogicalNot, instruction);
		op->add_id(unordered_op->id);
		impl.add(op);
		return true;
	}

	default:
		LOGE("Unknown CmpInst predicate.\n");
		return false;
	}

	Operation *op = impl.allocate(opcode, instruction);

	uint32_t id0 = build_naturally_extended_value(impl, instruction->getOperand(0), signed_input);
	uint32_t id1 = build_naturally_extended_value(impl, instruction->getOperand(1), signed_input);
	op->add_ids({ id0, id1 });

	impl.add(op);
	return true;
}

bool emit_extract_value_instruction(Converter::Impl &impl, const llvm::ExtractValueInst *instruction)
{
	if (emit_ags_extract_value(impl, instruction))
		return true;

	auto itr = impl.llvm_composite_meta.find(instruction->getAggregateOperand());

	if (itr != impl.llvm_composite_meta.end() &&
	    itr->second.components == 1 &&
	    !itr->second.forced_composite)
	{
		// Forward the ID. The composite was originally emitted as a scalar.
		spv::Id rewrite_id = impl.get_id_for_value(instruction->getAggregateOperand());
		impl.rewrite_value(instruction, rewrite_id);
	}
	else
	{
		Operation *op = impl.allocate(spv::OpCompositeExtract, instruction);

		op->add_id(impl.get_id_for_value(instruction->getAggregateOperand()));
		for (unsigned i = 0; i < instruction->getNumIndices(); i++)
			op->add_literal(instruction->getIndices()[i]);

		impl.add(op);
		impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	}

	return true;
}

bool emit_alloca_instruction(Converter::Impl &impl, const llvm::AllocaInst *instruction)
{
	// Remapped. Ignore.
	auto itr = impl.alloca_tracking.find(instruction);
	if (itr != impl.alloca_tracking.end() && itr->second.cbv_handle)
		return true;

	auto *element_type = instruction->getType()->getPointerElementType();
	if (llvm::isa<llvm::PointerType>(element_type))
	{
		LOGE("Cannot alloca elements of pointer type.\n");
		return false;
	}

	spv::Id pointee_type_id = impl.get_type_id(element_type);

	// DXC seems to allocate arrays on stack as 1 element of array type rather than N elements of basic non-array type.
	// Should be possible to support both schemes if desirable, but this will do.
	if (!llvm::isa<llvm::ConstantInt>(instruction->getArraySize()))
	{
		LOGE("Array size for alloca must be constant int.\n");
		return false;
	}

	if (llvm::cast<llvm::ConstantInt>(instruction->getArraySize())->getUniqueInteger().getZExtValue() != 1)
	{
		LOGE("Alloca array size must be constant 1.\n");
		return false;
	}

	auto address_space = static_cast<DXIL::AddressSpace>(instruction->getType()->getAddressSpace());
	if (address_space != DXIL::AddressSpace::Thread)
		return false;

	if (!ags_alloca_or_global_filter(impl, instruction, pointee_type_id))
		return false;

	auto storage = impl.get_effective_storage_class(instruction, spv::StorageClassFunction);
	spv::Id var_id = impl.create_variable(storage, pointee_type_id);
	impl.rewrite_value(instruction, var_id);
	impl.handle_to_storage_class[instruction] = storage;
	impl.decorate_relaxed_precision(element_type, var_id, false);
	return true;
}

static bool emit_peephole_findmsb(Converter::Impl &impl, const llvm::Instruction *instruction)
{
	// DXIL is CLZ, while original HLSL is Vulkan-style findMSB. Peephole the double-conversion pattern.
	auto *cond = instruction->getOperand(0);
	auto *true_value = instruction->getOperand(1);
	auto *false_value = instruction->getOperand(2);
	const llvm::CallInst *clz_instruction = nullptr;

	// Optimize clz(x) == -1 ? -1 : (bits - 1 - clz(x)) -> findmsb(x)

	if (auto *cmp = llvm::dyn_cast<llvm::CmpInst>(cond))
	{
		if (cmp->getPredicate() != llvm::CmpInst::Predicate::ICMP_EQ)
			return false;

		if (!value_is_dx_op_instrinsic(cmp->getOperand(0), DXIL::Op::FirstbitHi) &&
		    !value_is_dx_op_instrinsic(cmp->getOperand(0), DXIL::Op::FirstbitSHi))
			return false;

		uint32_t const_val = 0;
		if (!get_constant_operand(cmp, 1, &const_val) || const_val != UINT32_MAX)
			return false;

		clz_instruction = llvm::cast<llvm::CallInst>(cmp->getOperand(0));
	}
	else
		return false;

	if (auto *true_const_value = llvm::dyn_cast<llvm::ConstantInt>(true_value))
	{
		if (true_const_value->getUniqueInteger().getSExtValue() != -1)
			return false;
	}
	else
		return false;

	if (auto *sub_inst = llvm::dyn_cast<llvm::BinaryOperator>(false_value))
	{
		if (sub_inst->getOpcode() != llvm::Instruction::BinaryOps::Sub)
			return false;

		if (sub_inst->getOperand(1) != clz_instruction)
			return false;

		uint32_t const_val = 0;
		if (!get_constant_operand(sub_inst, 0, &const_val) ||
		    const_val != clz_instruction->getOperand(1)->getType()->getIntegerBitWidth() - 1)
		{
			return false;
		}
	}
	else
		return false;

	GLSLstd450 opcode;
	if (value_is_dx_op_instrinsic(clz_instruction, DXIL::Op::FirstbitHi))
		opcode = GLSLstd450FindUMsb;
	else
		opcode = GLSLstd450FindSMsb;

	emit_native_bitscan(opcode, impl, instruction, clz_instruction->getOperand(1));
	return true;
}

bool emit_select_instruction(Converter::Impl &impl, const llvm::SelectInst *instruction)
{
	if (emit_peephole_findmsb(impl, instruction))
		return true;

	Operation *op = impl.allocate(spv::OpSelect, instruction);

	for (unsigned i = 0; i < 3; i++)
		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));

	impl.add(op);
	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
	return true;
}

bool emit_cmpxchg_instruction(Converter::Impl &impl, const llvm::AtomicCmpXchgInst *instruction)
{
	auto &builder = impl.builder();

	unsigned bits;
	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::StructTyID)
		bits = get_composite_element_type(instruction->getType())->getIntegerBitWidth();
	else
		bits = instruction->getType()->getIntegerBitWidth();

	if (bits == 64)
		builder.addCapability(spv::CapabilityInt64Atomics);

	Operation *atomic_op = impl.allocate(spv::OpAtomicCompareExchange, builder.makeUintType(bits));
	if (needs_group_shared_auto_barrier(impl, instruction->getPointerOperand()))
		atomic_op->flags |= Operation::AutoGroupSharedBarrier;

	atomic_op->add_id(impl.get_id_for_value(instruction->getPointerOperand()));

	atomic_op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));
	atomic_op->add_id(builder.makeUintConstant(0));
	atomic_op->add_id(builder.makeUintConstant(0));
	atomic_op->add_id(impl.get_id_for_value(instruction->getNewValOperand()));
	atomic_op->add_id(impl.get_id_for_value(instruction->getCompareOperand()));

	impl.add(atomic_op);

	if (instruction->getType()->getTypeID() == llvm::Type::TypeID::StructTyID)
	{
		Operation *cmp_op = impl.allocate(spv::OpIEqual, builder.makeBoolType());
		cmp_op->add_ids({ atomic_op->id, impl.get_id_for_value(instruction->getCompareOperand()) });
		impl.add(cmp_op);

		spv::Id cmpxchg_type = impl.get_struct_type({ builder.makeUintType(bits), builder.makeBoolType() }, 0, "CmpXchgResult");

		Operation *op = impl.allocate(spv::OpCompositeConstruct, instruction, cmpxchg_type);
		op->add_ids({ atomic_op->id, cmp_op->id });
		impl.add(op);
	}
	else
	{
		// Extension for custom IR, we don't care about success bit.
		impl.rewrite_value(instruction, atomic_op->id);
	}

	return true;
}

bool emit_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst *instruction)
{
	if (emit_ags_atomicrmw(impl, instruction))
		return true;

	auto &builder = impl.builder();
	spv::Op opcode;
	switch (instruction->getOperation())
	{
	case llvm::AtomicRMWInst::BinOp::Add:
		opcode = spv::OpAtomicIAdd;
		break;

	case llvm::AtomicRMWInst::BinOp::Sub:
		opcode = spv::OpAtomicISub;
		break;

	case llvm::AtomicRMWInst::BinOp::And:
		opcode = spv::OpAtomicAnd;
		break;

	case llvm::AtomicRMWInst::BinOp::Or:
		opcode = spv::OpAtomicOr;
		break;

	case llvm::AtomicRMWInst::BinOp::Xor:
		opcode = spv::OpAtomicXor;
		break;

	case llvm::AtomicRMWInst::BinOp::UMax:
		opcode = spv::OpAtomicUMax;
		break;

	case llvm::AtomicRMWInst::BinOp::UMin:
		opcode = spv::OpAtomicUMin;
		break;

	case llvm::AtomicRMWInst::BinOp::Max:
		opcode = spv::OpAtomicSMax;
		break;

	case llvm::AtomicRMWInst::BinOp::Min:
		opcode = spv::OpAtomicSMin;
		break;

	case llvm::AtomicRMWInst::BinOp::Xchg:
		opcode = spv::OpAtomicExchange;
		break;

	default:
		LOGE("Unrecognized atomicrmw opcode: %u.\n", unsigned(instruction->getOperation()));
		return false;
	}

	unsigned bits = instruction->getType()->getIntegerBitWidth();
	if (bits == 64)
		builder.addCapability(spv::CapabilityInt64Atomics);

	Operation *op = impl.allocate(opcode, instruction);
	if (needs_group_shared_auto_barrier(impl, instruction->getPointerOperand()))
		op->flags |= Operation::AutoGroupSharedBarrier;

	op->add_id(impl.get_id_for_value(instruction->getPointerOperand()));

	op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));
	op->add_id(builder.makeUintConstant(0));
	op->add_id(impl.get_id_for_value(instruction->getValOperand()));

	impl.add(op);
	return true;
}

bool emit_shufflevector_instruction(Converter::Impl &impl, const llvm::ShuffleVectorInst *inst)
{
	Operation *op = impl.allocate(spv::OpVectorShuffle, inst);

	for (unsigned i = 0; i < 2; i++)
		op->add_id(impl.get_id_for_value(inst->getOperand(i)));

	unsigned num_outputs = inst->getType()->getVectorNumElements();
	for (unsigned i = 0; i < num_outputs; i++)
		op->add_literal(inst->getMaskValue(i));

	impl.add(op);
	return true;
}

bool emit_extractelement_instruction(Converter::Impl &impl, const llvm::ExtractElementInst *inst)
{
	spv::Id id;
	if (auto *constant_int = llvm::dyn_cast<llvm::ConstantInt>(inst->getIndexOperand()))
	{
		Operation *op = impl.allocate(spv::OpCompositeExtract, inst);
		op->add_id(impl.get_id_for_value(inst->getVectorOperand()));
		op->add_literal(uint32_t(constant_int->getUniqueInteger().getZExtValue()));
		impl.add(op);
		id = op->id;
	}
	else
	{
		Operation *op = impl.allocate(spv::OpVectorExtractDynamic, inst);
		op->add_id(impl.get_id_for_value(inst->getVectorOperand()));
		op->add_id(impl.get_id_for_value(inst->getIndexOperand()));
		impl.add(op);
		id = op->id;
	}
	impl.decorate_relaxed_precision(inst->getType(), id, false);
	return true;
}

bool emit_insertelement_instruction(Converter::Impl &impl, const llvm::InsertElementInst *inst)
{
	auto *vec = inst->getOperand(0);
	auto *value = inst->getOperand(1);
	auto *index = inst->getOperand(2);

	if (!llvm::isa<llvm::ConstantInt>(index))
	{
		LOGE("Index to insertelement must be a constant.\n");
		return false;
	}
	Operation *op = impl.allocate(spv::OpCompositeInsert, inst);
	op->add_id(impl.get_id_for_value(value));
	op->add_id(impl.get_id_for_value(vec));
	op->add_literal(uint32_t(llvm::cast<llvm::ConstantInt>(index)->getUniqueInteger().getZExtValue()));
	impl.add(op);
	return true;
}

bool analyze_getelementptr_instruction(Converter::Impl &impl, const llvm::GetElementPtrInst *inst)
{
	auto itr = impl.llvm_global_variable_to_resource_mapping.find(inst->getOperand(0));
	if (itr != impl.llvm_global_variable_to_resource_mapping.end() &&
	    !emit_getelementptr_resource(impl, inst, itr->second))
	{
		return false;
	}

	// If this GEP is associated with a tracked alloca, we might want to mask the GEP
	// if this GEP is actually never consumed.
	// Avoids lots of dead code being emitted which looks goofy in disassemblies.
	auto tracking = gep_pointer_to_alloca_tracked_inst(impl, inst);
	if (tracking.alloca_inst)
		impl.masked_alloca_forward_gep.insert(inst);

	return true;
}

bool analyze_load_instruction(Converter::Impl &impl, const llvm::LoadInst *inst)
{
	auto tracked = gep_pointer_to_alloca_tracked_inst(impl, inst->getPointerOperand());
	if (tracked.cbv_handle)
	{
		tracked.itr->second.has_load = true;
		// We'll need this GEP after all.
		impl.masked_alloca_forward_gep.erase(llvm::cast<llvm::GetElementPtrInst>(inst->getPointerOperand()));
	}

	if (DXIL::AddressSpace(inst->getPointerOperand()->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::GroupShared)
		impl.shader_analysis.has_group_shared_access = true;

	if (auto *const_expr = llvm::dyn_cast<llvm::ConstantExpr>(inst->getPointerOperand()))
	{
		if (const_expr->getOpcode() == llvm::Instruction::GetElementPtr)
		{
			auto *ptr = const_expr->getOperand(0);
			auto itr = impl.llvm_global_variable_to_resource_mapping.find(ptr);
			if (itr != impl.llvm_global_variable_to_resource_mapping.end() &&
			    !emit_getelementptr_resource(impl, const_expr, itr->second))
			{
				return false;
			}
		}
	}

	auto itr = impl.llvm_global_variable_to_resource_mapping.find(inst->getPointerOperand());
	if (itr != impl.llvm_global_variable_to_resource_mapping.end())
		impl.llvm_global_variable_to_resource_mapping[inst] = itr->second;

	return true;
}

static bool analyze_alloca_store(Converter::Impl &impl,
                                 const llvm::AllocaInst *alloca_inst,
                                 AllocaCBVForwardingTracking &tracking,
                                 const llvm::Value *index,
                                 const llvm::Value *store_value)
{
	// If we observe store after load, invalidate.
	// Instructions are processed in domination order, so this kind of linear tracking is fine.
	// If a store instruction is reachable after a load, it will be processed after the load.
	if (tracking.has_load)
		return false;

	// Need to store with constant GEP index.
	const auto *const_index = llvm::dyn_cast<llvm::ConstantInt>(index);
	if (!const_index)
		return false;

	uint32_t store_index = const_index->getUniqueInteger().getZExtValue();

	// This needs to be extractvalue from a cbuffer load, that is directly stored into alloca().
	// Ignore non-legacy cbuffer for now. Can be expanded as needed. Non-legacy cbuffer is very rare.
	const auto *extract_value = llvm::dyn_cast<llvm::ExtractValueInst>(store_value);
	if (!extract_value)
		return false;

	const auto *aggregate = extract_value->getAggregateOperand();
	if (!value_is_dx_op_instrinsic(aggregate, DXIL::Op::CBufferLoadLegacy))
		return false;

	const auto *cbuf_load = llvm::cast<llvm::CallInst>(aggregate);
	const auto *cbuffer_handle = cbuf_load->getOperand(1);
	if (tracking.cbv_handle && cbuffer_handle != tracking.cbv_handle)
		return false;

	const auto *cbv_index = llvm::dyn_cast<llvm::ConstantInt>(cbuf_load->getOperand(2));
	if (!cbv_index)
		return false;

	uint32_t scalar_index_offset =
		cbv_index->getUniqueInteger().getZExtValue() * 4 + extract_value->getIndices()[0];

	// Don't want negative word offsets, that does not make much sense.
	if (scalar_index_offset < store_index)
		return false;

	if (tracking.cbv_handle)
	{
		// Redundant store just needs to be validated.
		if (store_index == 0)
			return tracking.scalar_index_offset == scalar_index_offset;

		// Negative stride. Non-sense.
		if (scalar_index_offset < tracking.scalar_index_offset)
			return false;

		scalar_index_offset -= tracking.scalar_index_offset;
		uint32_t stride = scalar_index_offset / store_index;

		// Awkward and doesn't happen in practice. We use this as sentinel.
		if (stride == 0)
			return false;

		// This comes up when alloca-ing vectors. We end up with multiple scalar arrays.
		// When accessing the CBV we have to multiply the stride back up again.
		if ((tracking.stride && stride != tracking.stride) || (scalar_index_offset % stride != 0))
			return false;

		tracking.stride = stride;
	}
	else
	{
		// The first store should be to offset 0. This latches base values.
		if (store_index != 0)
			return false;

		// Must observe a write to last element, for robustness reasons.
		// We only need to care about this for BDA, but always checking is fine with shaders in the wild.
		tracking.min_highest_store_index = alloca_inst->getType()->getPointerElementType()->getArrayNumElements() - 1;
		tracking.cbv_handle = cbuffer_handle;
		tracking.scalar_index_offset = scalar_index_offset;
	}

	tracking.highest_store_index = std::max<uint32_t>(tracking.highest_store_index, store_index);
	return true;
}

static void register_ray_query_mapping(Converter::Impl &impl, const llvm::Value *store_value,
                                       const llvm::Value *store_ptr)
{
	if (!value_is_dx_op_instrinsic(store_value, DXIL::Op::AllocateRayQuery))
		return;

	if (auto *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(store_ptr))
	{
		if (auto *alloca = llvm::dyn_cast<llvm::AllocaInst>(gep->getOperand(0)))
		{
			auto &mappings = impl.shader_analysis.ray_query.alloca_mappings;
			auto itr = std::find_if(mappings.begin(), mappings.end(),
									[&](const auto &mapping) { return mapping.alloca == alloca; });

			if (itr == mappings.end())
			{
				uint32_t ray_flags;
				if (get_constant_operand(llvm::cast<llvm::CallInst>(store_value), 1, &ray_flags))
					mappings.push_back({ alloca, ray_flags });
			}
		}
	}
}

bool analyze_phi_instruction(Converter::Impl &impl, const llvm::PHINode *instruction)
{
	// Just need to forward the mapping in case it's used as a store to alloca.
	spv::Id override_type = 0;
	ags_filter_phi(impl, *instruction, override_type);
	return true;
}

bool analyze_store_instruction(Converter::Impl &impl, const llvm::StoreInst *inst)
{
	auto tracked = gep_pointer_to_alloca_tracked_inst(impl, inst->getOperand(1));
	if (tracked.index && !analyze_alloca_store(
		impl, tracked.alloca_inst,
		tracked.itr->second, tracked.index, inst->getOperand(0)))
	{
		impl.alloca_tracking.erase(tracked.itr);
	}

	if (DXIL::AddressSpace(inst->getOperand(1)->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::GroupShared)
		impl.shader_analysis.has_group_shared_access = true;

	if (!analyze_ags_wmma_store(impl, inst))
		return false;

	// If we store ray query allocas into an array we need to mark the compile time constant ray query flags.
	register_ray_query_mapping(impl, inst->getOperand(0), inst->getOperand(1));

	// Assume we're consuming the entire uvec4.
	if (instruction_is_ballot(inst->getOperand(0)))
	{
		impl.shader_analysis.subgroup_ballot_reads_first = true;
		impl.shader_analysis.subgroup_ballot_reads_upper = true;
	}

	return true;
}

bool analyze_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst *inst)
{
	if (DXIL::AddressSpace(inst->getPointerOperand()->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::GroupShared)
		impl.shader_analysis.has_group_shared_access = true;
	return true;
}

bool analyze_cmpxchg_instruction(Converter::Impl &impl, const llvm::AtomicCmpXchgInst *inst)
{
	if (DXIL::AddressSpace(inst->getPointerOperand()->getType()->getPointerAddressSpace()) == DXIL::AddressSpace::GroupShared)
		impl.shader_analysis.has_group_shared_access = true;
	return true;
}

bool analyze_alloca_instruction(Converter::Impl &impl, const llvm::AllocaInst *inst)
{
	// Only attempt to track simple 32-bit scalar cases.
	// We don't want to deal with DXR vector types, or weird 16-bit promotion shenanigans.
	if (DXIL::AddressSpace(inst->getType()->getAddressSpace()) != DXIL::AddressSpace::Thread)
		return true;

	if (const auto *array_type = llvm::dyn_cast<llvm::ArrayType>(inst->getType()->getPointerElementType()))
	{
		auto *elem_type = array_type->getArrayElementType();
		bool simple_scalar;

		switch (elem_type->getTypeID())
		{
		case llvm::Type::TypeID::FloatTyID:
			simple_scalar = true;
			break;

		case llvm::Type::TypeID::IntegerTyID:
			simple_scalar = elem_type->getIntegerBitWidth() == 32;
			break;

		default:
			simple_scalar = false;
			break;
		}

		if (simple_scalar)
			impl.alloca_tracking[inst] = {};
	}

	return true;
}

static void analyze_extractvalue_instruction(
    Converter::Impl &impl, const llvm::Value *aggregate, unsigned index)
{
	auto &meta = impl.llvm_composite_meta[aggregate];
	bool forward_progress = false;
	const auto *phi = llvm::dyn_cast<llvm::PHINode>(aggregate);

	bool is_fake_struct =
	    std::find(impl.llvm_dxil_op_fake_struct_types.begin(),
	              impl.llvm_dxil_op_fake_struct_types.end(), aggregate->getType()) !=
	    impl.llvm_dxil_op_fake_struct_types.end();

	bool splat_composite_access = phi && index < 4 && is_fake_struct;

	if (splat_composite_access)
	{
		if ((meta.access_mask & 0xf) != 0xf)
		{
			meta.access_mask |= 0xf;
			meta.components = std::max<uint32_t>(4, meta.components);
			forward_progress = true;
		}
	}
	else if ((meta.access_mask & (1u << index)) == 0)
	{
		meta.access_mask |= 1u << index;
		meta.components = std::max<uint32_t>(index + 1, meta.components);
		forward_progress = true;
	}

	if (instruction_is_ballot(aggregate))
	{
		if (index == 0)
			impl.shader_analysis.subgroup_ballot_reads_first = true;
		else
			impl.shader_analysis.subgroup_ballot_reads_upper = true;
	}
	else if (forward_progress && phi)
	{
		// Incoming values to a PHI aggregate must also be flagged as having access.
		// Try to avoid potential cycles if there are PHIs in a loop.
		for (uint32_t i = 0; i < phi->getNumIncomingValues(); i++)
		{
			if (splat_composite_access)
			{
				// Enforce that we get the full 4 components from a normal resource load.
				for (uint32_t c = 0; c < 4; c++)
					analyze_extractvalue_instruction(impl, phi->getIncomingValue(i), c);
			}
			else
			{
				analyze_extractvalue_instruction(impl, phi->getIncomingValue(i), index);
			}
		}
	}
}

bool analyze_extractvalue_instruction(Converter::Impl &impl, const llvm::ExtractValueInst *inst)
{
	if (inst->getNumIndices() == 1 && type_is_composite_return_value(inst->getAggregateOperand()->getType()))
		analyze_extractvalue_instruction(impl, inst->getAggregateOperand(), inst->getIndices()[0]);
	return true;
}

bool analyze_compare_instruction(Converter::Impl &impl, const llvm::CmpInst *inst)
{
	// With patterns like WaveReadFirstLane(x) == x, we have to be extremely careful.
	// A boolean like this will generally be used in control flow later, and if this is in a loop,
	// we risk infinite loops. This is technically undefined in DX without WaveOpsIncludeHelperLanes,
	// but games rely on it working ... somehow :')

	if (inst->getPredicate() != llvm::CmpInst::Predicate::ICMP_EQ &&
	    inst->getPredicate() != llvm::CmpInst::Predicate::ICMP_NE)
	{
		return true;
	}

	auto *op0 = inst->getOperand(0);
	auto *op1 = inst->getOperand(1);

	bool op0_is_read_first = value_is_dx_op_instrinsic(op0, DXIL::Op::WaveReadLaneFirst);
	bool op1_is_read_first = value_is_dx_op_instrinsic(op1, DXIL::Op::WaveReadLaneFirst);
	if (op1_is_read_first)
	{
		std::swap(op0, op1);
		std::swap(op0_is_read_first, op1_is_read_first);
	}

	if (op0_is_read_first && !op1_is_read_first)
	{
		auto *call = llvm::cast<llvm::CallInst>(op0);
		if (call->getOperand(1) == op1)
			impl.wave_op_forced_helper_lanes.insert(call);
	}

	return true;
}

bool can_optimize_conditional_branch_to_static(
    Converter::Impl &impl, const llvm::Value *value, bool &static_cond)
{
	// Can be expanded as needed.
	// For now, search for common exhaustive loop unrolling patterns that DXC farts out.
	// Expect pattern of:
	// lower_bounded_expression > constant.
	// In the shader we've seen, we have: constant / WaveGetLaneCount(), which
	// we must narrow the bound of to avoid generating invalid code.
	const auto *comp_inst = llvm::dyn_cast<llvm::CmpInst>(value);
	if (!comp_inst)
		return false;

	if (comp_inst->getPredicate() != llvm::CmpInst::Predicate::ICMP_UGT)
		return false;

	const auto *rhs = llvm::dyn_cast<llvm::ConstantInt>(comp_inst->getOperand(1));
	if (!rhs)
		return false;

	uint64_t rhs_value = rhs->getUniqueInteger().getZExtValue();

	const auto *lhs = llvm::dyn_cast<llvm::BinaryOperator>(comp_inst->getOperand(0));
	if (!lhs)
		return false;

	if (lhs->getOpcode() != llvm::BinaryOperator::BinaryOps::UDiv)
		return false;

	const auto *num = llvm::dyn_cast<llvm::ConstantInt>(lhs->getOperand(0));
	if (!num)
		return false;
	if (!value_is_dx_op_instrinsic(lhs->getOperand(1), DXIL::Op::WaveGetLaneCount))
		return false;

	uint64_t upper_bound =
	    num->getUniqueInteger().getZExtValue() / impl.options.subgroup_size.implementation_minimum;
	uint64_t lower_bound =
		num->getUniqueInteger().getZExtValue() / impl.options.subgroup_size.implementation_maximum;

	if (lower_bound > rhs_value)
	{
		static_cond = true;
		return true;
	}
	else if (rhs_value >= upper_bound)
	{
		static_cond = false;
		return true;
	}
	else
	{
		static_cond = false;
		return false;
	}
}

#ifdef HAVE_LLVMBC
// Extensions to normal LLVM API used by custom IR.
static bool emit_composite_construct_instruction(Converter::Impl &impl, const llvm::CompositeConstructInst *inst)
{
	auto *constr = impl.allocate(spv::OpCompositeConstruct, inst);
	for (unsigned i = 0; i < inst->getNumOperands(); i++)
		constr->add_id(impl.get_id_for_value(inst->getOperand(i)));
	impl.add(constr);
	return true;
}
#endif

bool emit_call_instruction(Converter::Impl &impl, const llvm::CallInst &inst)
{
	auto *call = impl.allocate(spv::OpFunctionCall, &inst);
	call->add_id(impl.get_id_for_value(inst.getCalledFunction()));
	for (uint32_t i = 0; i < inst.getNumOperands(); i++)
		call->add_id(impl.get_id_for_value(inst.getOperand(i)));
	impl.add(call);
	return true;
}

bool emit_llvm_instruction(Converter::Impl &impl, const llvm::Instruction &instruction)
{
	if (auto *binary_inst = llvm::dyn_cast<llvm::BinaryOperator>(&instruction))
		return emit_binary_instruction(impl, binary_inst);
	else if (auto *unary_inst = llvm::dyn_cast<llvm::UnaryOperator>(&instruction))
		return emit_unary_instruction(impl, unary_inst);
	else if (auto *cast_inst = llvm::dyn_cast<llvm::CastInst>(&instruction))
		return emit_cast_instruction(impl, cast_inst);
	else if (auto *getelementptr_inst = llvm::dyn_cast<llvm::GetElementPtrInst>(&instruction))
		return emit_getelementptr_instruction(impl, getelementptr_inst);
	else if (auto *load_inst = llvm::dyn_cast<llvm::LoadInst>(&instruction))
		return emit_load_instruction(impl, load_inst);
	else if (auto *store_inst = llvm::dyn_cast<llvm::StoreInst>(&instruction))
		return emit_store_instruction(impl, store_inst);
	else if (auto *compare_inst = llvm::dyn_cast<llvm::CmpInst>(&instruction))
		return emit_compare_instruction(impl, compare_inst);
	else if (auto *extract_inst = llvm::dyn_cast<llvm::ExtractValueInst>(&instruction))
		return emit_extract_value_instruction(impl, extract_inst);
	else if (auto *alloca_inst = llvm::dyn_cast<llvm::AllocaInst>(&instruction))
		return emit_alloca_instruction(impl, alloca_inst);
	else if (auto *select_inst = llvm::dyn_cast<llvm::SelectInst>(&instruction))
		return emit_select_instruction(impl, select_inst);
	else if (auto *atomic_inst = llvm::dyn_cast<llvm::AtomicRMWInst>(&instruction))
		return emit_atomicrmw_instruction(impl, atomic_inst);
	else if (auto *cmpxchg_inst = llvm::dyn_cast<llvm::AtomicCmpXchgInst>(&instruction))
		return emit_cmpxchg_instruction(impl, cmpxchg_inst);
	else if (auto *shufflevec_inst = llvm::dyn_cast<llvm::ShuffleVectorInst>(&instruction))
		return emit_shufflevector_instruction(impl, shufflevec_inst);
	else if (auto *extractelement_inst = llvm::dyn_cast<llvm::ExtractElementInst>(&instruction))
		return emit_extractelement_instruction(impl, extractelement_inst);
	else if (auto *insertelement_inst = llvm::dyn_cast<llvm::InsertElementInst>(&instruction))
		return emit_insertelement_instruction(impl, insertelement_inst);
#ifdef HAVE_LLVMBC
	else if (auto *composite_construct_inst = llvm::dyn_cast<llvm::CompositeConstructInst>(&instruction))
		return emit_composite_construct_instruction(impl, composite_construct_inst);
#endif
	else
		return false;
}
} // namespace dxil_spv


================================================
FILE: opcodes/opcodes_llvm_builtins.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "opcodes.hpp"

namespace dxil_spv
{
bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *instruction);
bool emit_unary_instruction(Converter::Impl &impl, const llvm::UnaryOperator *instruction);
bool emit_cast_instruction(Converter::Impl &impl, const llvm::CastInst *instruction);
bool emit_getelementptr_instruction(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction);
bool emit_load_instruction(Converter::Impl &impl, const llvm::LoadInst *instruction);
bool emit_store_instruction(Converter::Impl &impl, const llvm::StoreInst *instruction);
bool emit_compare_instruction(Converter::Impl &impl, const llvm::CmpInst *instruction);
bool emit_extract_value_instruction(Converter::Impl &impl, const llvm::ExtractValueInst *instruction);
bool emit_alloca_instruction(Converter::Impl &impl, const llvm::AllocaInst *instruction);
bool emit_select_instruction(Converter::Impl &impl, const llvm::SelectInst *instruction);
bool emit_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst *instruction);
bool emit_cmpxchg_instruction(Converter::Impl &impl, const llvm::AtomicCmpXchgInst *instruction);
bool emit_shufflevector_instruction(Converter::Impl &impl, const llvm::ShuffleVectorInst *instruction);
bool emit_extractelement_instruction(Converter::Impl &impl, const llvm::ExtractElementInst *instruction);
bool emit_insertelement_instruction(Converter::Impl &impl, const llvm::InsertElementInst *instruction);

bool analyze_load_instruction(Converter::Impl &impl, const llvm::LoadInst *instruction);
bool analyze_store_instruction(Converter::Impl &impl, const llvm::StoreInst *instruction);
bool analyze_phi_instruction(Converter::Impl &impl, const llvm::PHINode *instruction);
bool analyze_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst *instruction);
bool analyze_cmpxchg_instruction(Converter::Impl &impl, const llvm::AtomicCmpXchgInst *instruction);
bool analyze_alloca_instruction(Converter::Impl &impl, const llvm::AllocaInst *instruction);
bool analyze_getelementptr_instruction(Converter::Impl &impl, const llvm::GetElementPtrInst *instruction);
bool analyze_extractvalue_instruction(Converter::Impl &impl, const llvm::ExtractValueInst *instruction);
bool analyze_compare_instruction(Converter::Impl &impl, const llvm::CmpInst *instruction);

bool emit_llvm_instruction(Converter::Impl &impl, const llvm::Instruction &instruction);
bool emit_call_instruction(Converter::Impl &impl, const llvm::CallInst &instruction);

unsigned physical_integer_bit_width(unsigned width);

spv::Id build_constant_expression(Converter::Impl &impl, const llvm::ConstantExpr *cexpr);

bool can_optimize_conditional_branch_to_static(Converter::Impl &impl, const llvm::Value *value, bool &static_cond_value);

bool can_optimize_to_snegate(const llvm::BinaryOperator *inst);
bool can_optimize_to_snegate(const llvm::ConstantExpr *inst);

spv::Id emit_bypass_fp16_trunc(Converter::Impl &impl, const llvm::Instruction *instruction);
} // namespace dxil_spv


================================================
FILE: pkg-config/dxil-spirv-c-shared.pc.in
================================================
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=@DXIL_SPV_INSTALL_LIB_DIR@
sharedlibdir=@DXIL_SPV_INSTALL_LIB_DIR@
includedir=@DXIL_SPV_INSTALL_INC_DIR@

Name: dxil-spirv-c-shared
Description: C API for dxil-spirv
Version: @DXIL_SPV_VERSION@

Requires:
Libs: -L${libdir} -L${sharedlibdir} -ldxil-spirv-c-shared
Cflags: -I${includedir}


================================================
FILE: reference/shaders/ags/ags.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_image_int64 : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) buffer _12_14
{
    uint64_t _m0[];
} _14;

layout(set = 0, binding = 4, std430) buffer _25_27
{
    uint _m0[];
} _27;

layout(set = 0, binding = 4, std430) buffer _30_32
{
    uvec2 _m0[];
} _32;

layout(set = 0, binding = 1, r64ui) uniform u64image1D _17;
layout(set = 0, binding = 2, r64ui) uniform u64image2D _20;
layout(set = 0, binding = 3, r64ui) uniform u64image3D _23;

void main()
{
    uint64_t _58 = atomicMax(_14._m0[gl_GlobalInvocationID.x], packUint2x32(uvec2(40u, 50u)));
    _32._m0[gl_GlobalInvocationID.x] = uvec2(unpackUint2x32(_58));
    uint64_t _71 = imageAtomicMin(_17, int(gl_GlobalInvocationID.x), packUint2x32(uvec2(60u, 70u)));
    uvec2 _72 = unpackUint2x32(_71);
    _32._m0[gl_GlobalInvocationID.x] = uvec2(_32._m0[gl_GlobalInvocationID.x].x + _72.x, _32._m0[gl_GlobalInvocationID.x].y + _72.y);
    uint64_t _89 = imageAtomicXor(_20, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), packUint2x32(uvec2(80u, 90u)));
    uvec2 _90 = unpackUint2x32(_89);
    _32._m0[gl_GlobalInvocationID.x] = uvec2(_32._m0[gl_GlobalInvocationID.x].x + _90.x, _32._m0[gl_GlobalInvocationID.x].y + _90.y);
    uint64_t _107 = imageAtomicAdd(_23, ivec3(uvec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, gl_GlobalInvocationID.z)), packUint2x32(uvec2(100u, 110u)));
    uvec2 _108 = unpackUint2x32(_107);
    _32._m0[gl_GlobalInvocationID.x] = uvec2(_32._m0[gl_GlobalInvocationID.x].x + _108.x, _32._m0[gl_GlobalInvocationID.x].y + _108.y);
    uint _119 = gl_GlobalInvocationID.x << 4u;
    _27._m0[gl_GlobalInvocationID.x * 2u] += subgroupBroadcastFirst(_119);
    _27._m0[gl_GlobalInvocationID.x * 2u] = uint(float(_27._m0[gl_GlobalInvocationID.x * 2u]) + uintBitsToFloat(subgroupBroadcastFirst(floatBitsToUint(float(gl_GlobalInvocationID.y << 4u)))));
    _27._m0[(gl_GlobalInvocationID.x * 2u) + 1u] = _32._m0[gl_GlobalInvocationID.x].y + subgroupBroadcast(gl_GlobalInvocationID.z << 4u, 7u);
    _27._m0[(gl_GlobalInvocationID.x * 2u) + 1u] = uint(float(_32._m0[gl_GlobalInvocationID.x].y) + uintBitsToFloat(subgroupBroadcast(floatBitsToUint(float(_119)), 9u)));
    _32._m0[gl_GlobalInvocationID.x] = uvec2(_32._m0[gl_GlobalInvocationID.x].x + gl_SubgroupInvocationID, _32._m0[gl_GlobalInvocationID.x].y + gl_SubgroupInvocationID);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 179
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability Image1D
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability Int64ImageEXT
OpExtension "SPV_EXT_shader_image_int64"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %39 %167
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %25 "SSBO"
OpName %30 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 Aliased
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 Aliased
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 1
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 2
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 3
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %29 ArrayStride 8
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 4
OpDecorate %27 Aliased
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 4
OpDecorate %32 Aliased
OpDecorate %39 BuiltIn GlobalInvocationId
OpDecorate %167 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeInt 64 0
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeImage %10 1D 0 0 0 2 R64ui
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %10 2D 0 0 0 2 R64ui
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %10 3D 0 0 0 2 R64ui
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeVector %5 2
%29 = OpTypeRuntimeArray %28
%30 = OpTypeStruct %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%37 = OpTypeVector %5 3
%38 = OpTypePointer Input %37
%39 = OpVariable %38 Input
%40 = OpTypePointer Input %5
%42 = OpConstant %5 0
%45 = OpConstant %5 1
%48 = OpConstant %5 2
%51 = OpConstant %5 3
%52 = OpTypePointer StorageBuffer %10
%54 = OpConstant %5 40
%55 = OpConstant %5 50
%63 = OpTypePointer StorageBuffer %28
%65 = OpTypePointer Image %10
%67 = OpConstant %5 60
%68 = OpConstant %5 70
%85 = OpConstant %5 80
%86 = OpConstant %5 90
%103 = OpConstant %5 100
%104 = OpConstant %5 110
%120 = OpConstant %5 4
%123 = OpTypePointer StorageBuffer %5
%130 = OpTypeFloat 32
%145 = OpConstant %5 7
%156 = OpConstant %5 9
%167 = OpVariable %40 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %177
%177 = OpLabel
%34 = OpLoad %21 %23
%35 = OpLoad %18 %20
%36 = OpLoad %15 %17
%41 = OpAccessChain %40 %39 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %40 %39 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %40 %39 %48
%49 = OpLoad %5 %47
%50 = OpShiftLeftLogical %5 %43 %51
%53 = OpAccessChain %52 %14 %42 %43
%56 = OpCompositeConstruct %28 %54 %55
%57 = OpBitcast %10 %56
%58 = OpAtomicUMax %10 %53 %45 %42 %57
%59 = OpBitcast %28 %58
%60 = OpCompositeExtract %5 %59 0
%61 = OpCompositeExtract %5 %59 1
%62 = OpCompositeConstruct %28 %60 %61
%64 = OpAccessChain %63 %32 %42 %43
OpStore %64 %62
%66 = OpImageTexelPointer %65 %17 %43 %42
%69 = OpCompositeConstruct %28 %67 %68
%70 = OpBitcast %10 %69
%71 = OpAtomicUMin %10 %66 %45 %42 %70
%72 = OpBitcast %28 %71
%73 = OpCompositeExtract %5 %72 0
%74 = OpCompositeExtract %5 %72 1
%75 = OpAccessChain %63 %32 %42 %43
%76 = OpLoad %28 %75
%77 = OpCompositeExtract %5 %76 0
%78 = OpCompositeExtract %5 %76 1
%79 = OpIAdd %5 %77 %73
%80 = OpIAdd %5 %78 %74
%81 = OpCompositeConstruct %28 %79 %80
%82 = OpAccessChain %63 %32 %42 %43
OpStore %82 %81
%83 = OpCompositeConstruct %28 %43 %46
%84 = OpImageTexelPointer %65 %20 %83 %42
%87 = OpCompositeConstruct %28 %85 %86
%88 = OpBitcast %10 %87
%89 = OpAtomicXor %10 %84 %45 %42 %88
%90 = OpBitcast %28 %89
%91 = OpCompositeExtract %5 %90 0
%92 = OpCompositeExtract %5 %90 1
%93 = OpAccessChain %63 %32 %42 %43
%94 = OpLoad %28 %93
%95 = OpCompositeExtract %5 %94 0
%96 = OpCompositeExtract %5 %94 1
%97 = OpIAdd %5 %95 %91
%98 = OpIAdd %5 %96 %92
%99 = OpCompositeConstruct %28 %97 %98
%100 = OpAccessChain %63 %32 %42 %43
OpStore %100 %99
%101 = OpCompositeConstruct %37 %43 %46 %49
%102 = OpImageTexelPointer %65 %23 %101 %42
%105 = OpCompositeConstruct %28 %103 %104
%106 = OpBitcast %10 %105
%107 = OpAtomicIAdd %10 %102 %45 %42 %106
%108 = OpBitcast %28 %107
%109 = OpCompositeExtract %5 %108 0
%110 = OpCompositeExtract %5 %108 1
%111 = OpAccessChain %63 %32 %42 %43
%112 = OpLoad %28 %111
%113 = OpCompositeExtract %5 %112 0
%114 = OpCompositeExtract %5 %112 1
%115 = OpIAdd %5 %113 %109
%116 = OpIAdd %5 %114 %110
%117 = OpCompositeConstruct %28 %115 %116
%118 = OpAccessChain %63 %32 %42 %43
OpStore %118 %117
%119 = OpShiftLeftLogical %5 %43 %120
%121 = OpGroupNonUniformBroadcastFirst %5 %51 %119
%122 = OpIMul %5 %43 %48
%124 = OpAccessChain %123 %27 %42 %122
%125 = OpLoad %5 %124
%126 = OpIAdd %5 %125 %121
%127 = OpIMul %5 %43 %48
%128 = OpAccessChain %123 %27 %42 %127
OpStore %128 %126
%129 = OpShiftLeftLogical %5 %46 %120
%131 = OpConvertUToF %130 %129
%132 = OpBitcast %5 %131
%133 = OpGroupNonUniformBroadcastFirst %5 %51 %132
%134 = OpBitcast %130 %133
%135 = OpIMul %5 %43 %48
%136 = OpAccessChain %123 %27 %42 %135
%137 = OpLoad %5 %136
%138 = OpConvertUToF %130 %137
%139 = OpFAdd %130 %138 %134
%140 = OpConvertFToU %5 %139
%141 = OpIMul %5 %43 %48
%142 = OpAccessChain %123 %27 %42 %141
OpStore %142 %140
%143 = OpShiftLeftLogical %5 %49 %120
%144 = OpGroupNonUniformBroadcast %5 %51 %143 %145
%146 = OpAccessChain %63 %32 %42 %43
%147 = OpLoad %28 %146
%148 = OpCompositeExtract %5 %147 1
%149 = OpIAdd %5 %148 %144
%150 = OpIMul %5 %43 %48
%151 = OpIAdd %5 %150 %45
%152 = OpAccessChain %123 %27 %42 %151
OpStore %152 %149
%153 = OpConvertUToF %130 %119
%154 = OpBitcast %5 %153
%155 = OpGroupNonUniformBroadcast %5 %51 %154 %156
%157 = OpBitcast %130 %155
%158 = OpAccessChain %63 %32 %42 %43
%159 = OpLoad %28 %158
%160 = OpCompositeExtract %5 %159 1
%161 = OpConvertUToF %130 %160
%162 = OpFAdd %130 %161 %157
%163 = OpConvertFToU %5 %162
%164 = OpIMul %5 %43 %48
%165 = OpIAdd %5 %164 %45
%166 = OpAccessChain %123 %27 %42 %165
OpStore %166 %163
%168 = OpLoad %5 %167
%169 = OpAccessChain %63 %32 %42 %43
%170 = OpLoad %28 %169
%171 = OpCompositeExtract %5 %170 0
%172 = OpCompositeExtract %5 %170 1
%173 = OpIAdd %5 %171 %168
%174 = OpIAdd %5 %172 %168
%175 = OpCompositeConstruct %28 %173 %174
%176 = OpAccessChain %63 %32 %42 %43
OpStore %176 %175
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_constexpr_wmma_gep.sm66.full-wmma.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_float_e4m3 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _29[5] = coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>[](coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(floate4m3_t(0.0)));
coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33[5] = coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>[](coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(floate4m3_t(0.0)), coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(floate4m3_t(0.0)));

void main()
{
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _38;
    coopMatLoad(_38, _20._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    _29[0u] = _38;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _43;
    coopMatLoad(_43, _20._m0, 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    _33[0u] = _43;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _48;
    coopMatLoad(_48, _20._m0, 32u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    _29[1u] = _48;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _52;
    coopMatLoad(_52, _20._m0, 1056u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    _33[1u] = _52;
    uint _54;
    _54 = 2u;
    for (;;)
    {
        uint _56 = _54 << 5u;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _58;
        coopMatLoad(_58, _20._m0, _56, 64u, gl_CooperativeMatrixLayoutRowMajor);
        _29[_54] = _58;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _89;
        coopMatLoad(_89, _20._m0, _56 + 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
        _33[_54] = _89;
        uint _55 = _54 + 1u;
        if (_55 == 5u)
        {
            break;
        }
        else
        {
            _54 = _55;
        }
    }
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _128;
    uint _130;
    _128 = coopMatMulAdd(_29[1u], _33[1u], coopMatMulAdd(_29[0u], _33[0u], coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0.0), 0), 0);
    _130 = 2u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _196;
    for (;;)
    {
        _196 = coopMatMulAdd(_29[_130], _33[_130], _128, 0);
        uint _131 = _130 + 1u;
        if (_131 == 5u)
        {
            break;
        }
        else
        {
            _128 = _196;
            _130 = _131;
        }
    }
    coopMatStore(_196, _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 205
; Schema: 0
OpCapability Shader
OpCapability Int8
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability StorageBuffer8BitAccess
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24 %29 %33
OpExecutionMode %14 LocalSize 32 1 1
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 8 Float8E4M3EXT
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpConstant %2 5
%26 = OpTypeArray %5 %25
%27 = OpConstantNull %26
%28 = OpTypePointer Private %26
%29 = OpVariable %28 Private %27
%30 = OpTypeArray %8 %25
%31 = OpConstantNull %30
%32 = OpTypePointer Private %30
%33 = OpVariable %32 Private %31
%35 = OpTypePointer StorageBuffer %16
%37 = OpConstant %2 64
%39 = OpTypePointer Private %5
%42 = OpConstant %2 1024
%44 = OpTypePointer Private %8
%47 = OpConstant %2 32
%51 = OpConstant %2 1056
%60 = OpConstant %2 8
%74 = OpConstant %2 4
%81 = OpConstant %2 6
%85 = OpConstant %2 7
%114 = OpTypeBool
%116 = OpConstant %9 0
%117 = OpConstantComposite %11 %116
%14 = OpFunction %12 None %13
%15 = OpLabel
OpBranch %199
%199 = OpLabel
%36 = OpAccessChain %35 %20 %3 %3
%38 = OpCooperativeMatrixLoadKHR %5 %36 %3 %37 NonPrivatePointer
%40 = OpAccessChain %39 %29 %3
OpStore %40 %38
%41 = OpAccessChain %35 %20 %3 %42
%43 = OpCooperativeMatrixLoadKHR %8 %41 %7 %37 NonPrivatePointer
%45 = OpAccessChain %44 %33 %3
OpStore %45 %43
%46 = OpAccessChain %35 %20 %3 %47
%48 = OpCooperativeMatrixLoadKHR %5 %46 %3 %37 NonPrivatePointer
%49 = OpAccessChain %39 %29 %7
OpStore %49 %48
%50 = OpAccessChain %35 %20 %3 %51
%52 = OpCooperativeMatrixLoadKHR %8 %50 %7 %37 NonPrivatePointer
%53 = OpAccessChain %44 %33 %7
OpStore %53 %52
OpBranch %200
%200 = OpLabel
%54 = OpPhi %2 %10 %199 %55 %200
%56 = OpShiftLeftLogical %2 %54 %25
%57 = OpAccessChain %35 %20 %3 %56
%58 = OpCooperativeMatrixLoadKHR %5 %57 %3 %37 NonPrivatePointer
%59 = OpIMul %2 %54 %60
%61 = OpIAdd %2 %3 %59
%62 = OpAccessChain %39 %29 %54
OpStore %62 %58
%63 = OpIMul %2 %54 %60
%64 = OpIAdd %2 %7 %63
%66 = OpIMul %2 %54 %60
%67 = OpIAdd %2 %10 %66
%69 = OpIMul %2 %54 %60
%70 = OpIAdd %2 %6 %69
%72 = OpIMul %2 %54 %60
%73 = OpIAdd %2 %74 %72
%76 = OpIMul %2 %54 %60
%77 = OpIAdd %2 %25 %76
%79 = OpIMul %2 %54 %60
%80 = OpIAdd %2 %81 %79
%83 = OpIMul %2 %54 %60
%84 = OpIAdd %2 %85 %83
%87 = OpIAdd %2 %56 %42
%88 = OpAccessChain %35 %20 %3 %87
%89 = OpCooperativeMatrixLoadKHR %8 %88 %7 %37 NonPrivatePointer
%90 = OpIMul %2 %54 %60
%91 = OpIAdd %2 %3 %90
%92 = OpAccessChain %44 %33 %54
OpStore %92 %89
%93 = OpIMul %2 %54 %60
%94 = OpIAdd %2 %7 %93
%96 = OpIMul %2 %54 %60
%97 = OpIAdd %2 %10 %96
%99 = OpIMul %2 %54 %60
%100 = OpIAdd %2 %6 %99
%102 = OpIMul %2 %54 %60
%103 = OpIAdd %2 %74 %102
%105 = OpIMul %2 %54 %60
%106 = OpIAdd %2 %25 %105
%108 = OpIMul %2 %54 %60
%109 = OpIAdd %2 %81 %108
%111 = OpIMul %2 %54 %60
%112 = OpIAdd %2 %85 %111
%55 = OpIAdd %2 %54 %7
%115 = OpIEqual %114 %55 %25
OpLoopMerge %201 %200 None
OpBranchConditional %115 %201 %200
%201 = OpLabel
%118 = OpAccessChain %39 %29 %3
%119 = OpLoad %5 %118
%120 = OpAccessChain %44 %33 %3
%121 = OpLoad %8 %120
%122 = OpCooperativeMatrixMulAddKHR %11 %119 %121 %117
%123 = OpAccessChain %39 %29 %7
%124 = OpLoad %5 %123
%125 = OpAccessChain %44 %33 %7
%126 = OpLoad %8 %125
%127 = OpCooperativeMatrixMulAddKHR %11 %124 %126 %122
OpBranch %202
%202 = OpLabel
%128 = OpPhi %11 %127 %201 %196 %202
%130 = OpPhi %2 %10 %201 %131 %202
%132 = OpIMul %2 %130 %60
%133 = OpIAdd %2 %3 %132
%134 = OpAccessChain %39 %29 %130
%135 = OpLoad %5 %134
%136 = OpIMul %2 %130 %60
%137 = OpIAdd %2 %7 %136
%140 = OpIMul %2 %130 %60
%141 = OpIAdd %2 %10 %140
%144 = OpIMul %2 %130 %60
%145 = OpIAdd %2 %6 %144
%148 = OpIMul %2 %130 %60
%149 = OpIAdd %2 %74 %148
%152 = OpIMul %2 %130 %60
%153 = OpIAdd %2 %25 %152
%156 = OpIMul %2 %130 %60
%157 = OpIAdd %2 %81 %156
%160 = OpIMul %2 %130 %60
%161 = OpIAdd %2 %85 %160
%164 = OpIMul %2 %130 %60
%165 = OpIAdd %2 %3 %164
%166 = OpAccessChain %44 %33 %130
%167 = OpLoad %8 %166
%168 = OpIMul %2 %130 %60
%169 = OpIAdd %2 %7 %168
%172 = OpIMul %2 %130 %60
%173 = OpIAdd %2 %10 %172
%176 = OpIMul %2 %130 %60
%177 = OpIAdd %2 %6 %176
%180 = OpIMul %2 %130 %60
%181 = OpIAdd %2 %74 %180
%184 = OpIMul %2 %130 %60
%185 = OpIAdd %2 %25 %184
%188 = OpIMul %2 %130 %60
%189 = OpIAdd %2 %81 %188
%192 = OpIMul %2 %130 %60
%193 = OpIAdd %2 %85 %192
%196 = OpCooperativeMatrixMulAddKHR %11 %135 %167 %128
%131 = OpIAdd %2 %130 %7
%197 = OpIEqual %114 %131 %25
OpLoopMerge %203 %202 None
OpBranchConditional %197 %203 %202
%203 = OpLabel
%198 = OpAccessChain %35 %24 %3 %3
OpCooperativeMatrixStoreKHR %198 %196 %7 %37 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_constexpr_wmma_gep.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _29[5] = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>[](coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(float16_t(0.0)));
coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33[5] = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>[](coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(float16_t(0.0)), coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(float16_t(0.0)));

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _43)
{
    uint _51;
    _51 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _52 = _51 + 1u;
        coop_output[_51] = int16BitsToFloat16((int16_t(int8_t(_43[_51])) << 7s) & (-16385s));
        if (_52 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _51 = _52;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _80)
{
    uint _88;
    _88 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _89 = _88 + 1u;
        coop_output[_88] = int16BitsToFloat16((int16_t(int8_t(_80[_88])) << 7s) & (-16385s));
        if (_89 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _88 = _89;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _39;
    coopMatLoad(_39, _20._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _70 = _39;
    _29[0u] = CoopMatFP8toFP16(_70);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _77;
    coopMatLoad(_77, _20._m0, 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _101 = _77;
    _33[0u] = CoopMatFP8toFP16_1(_101);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _107;
    coopMatLoad(_107, _20._m0, 32u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _108 = _107;
    _29[1u] = CoopMatFP8toFP16(_108);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _113;
    coopMatLoad(_113, _20._m0, 1056u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _114 = _113;
    _33[1u] = CoopMatFP8toFP16_1(_114);
    uint _117;
    _117 = 2u;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _122;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _155;
    for (;;)
    {
        uint _119 = _117 << 5u;
        coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _121;
        coopMatLoad(_121, _20._m0, _119, 64u, gl_CooperativeMatrixLayoutRowMajor);
        _122 = _121;
        _29[_117] = CoopMatFP8toFP16(_122);
        coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _154;
        coopMatLoad(_154, _20._m0, _119 + 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
        _155 = _154;
        _33[_117] = CoopMatFP8toFP16_1(_155);
        uint _118 = _117 + 1u;
        if (_118 == 5u)
        {
            break;
        }
        else
        {
            _117 = _118;
        }
    }
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _194;
    uint _196;
    _194 = coopMatMulAdd(_29[1u], _33[1u], coopMatMulAdd(_29[0u], _33[0u], coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0.0), 0), 0);
    _196 = 2u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _262;
    for (;;)
    {
        _262 = coopMatMulAdd(_29[_196], _33[_196], _194, 0);
        uint _197 = _196 + 1u;
        if (_197 == 5u)
        {
            break;
        }
        else
        {
            _194 = _262;
            _196 = _197;
        }
    }
    coopMatStore(_262, _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 271
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24 %29 %33
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %44 "CoopMatFP8toFP16"
OpName %47 "coop_output"
OpName %81 "CoopMatFP8toFP16"
OpName %84 "coop_output"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpConstant %2 5
%26 = OpTypeArray %5 %25
%27 = OpConstantNull %26
%28 = OpTypePointer Private %26
%29 = OpVariable %28 Private %27
%30 = OpTypeArray %8 %25
%31 = OpConstantNull %30
%32 = OpTypePointer Private %30
%33 = OpVariable %32 Private %31
%35 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %3
%36 = OpTypePointer StorageBuffer %16
%38 = OpConstant %2 64
%40 = OpTypeBool
%41 = OpTypePointer Function %35
%42 = OpTypeFunction %5 %41
%46 = OpTypePointer Function %5
%53 = OpTypePointer Function %16
%56 = OpTypeInt 16 1
%59 = OpConstant %56 7
%61 = OpConstant %56 -16385
%63 = OpTypePointer Function %1
%68 = OpConstant %1 0x1p+8
%72 = OpTypePointer Private %5
%74 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %7
%76 = OpConstant %2 1024
%78 = OpTypePointer Function %74
%79 = OpTypeFunction %8 %78
%83 = OpTypePointer Function %8
%103 = OpTypePointer Private %8
%106 = OpConstant %2 32
%112 = OpConstant %2 1056
%125 = OpConstant %2 8
%139 = OpConstant %2 4
%146 = OpConstant %2 6
%150 = OpConstant %2 7
%182 = OpConstant %9 0
%183 = OpConstantComposite %11 %182
%14 = OpFunction %12 None %13
%15 = OpLabel
%70 = OpVariable %41 Function
%101 = OpVariable %78 Function
%108 = OpVariable %41 Function
%114 = OpVariable %78 Function
%122 = OpVariable %41 Function
%155 = OpVariable %78 Function
OpBranch %265
%265 = OpLabel
%37 = OpAccessChain %36 %20 %3 %3
%39 = OpCooperativeMatrixLoadKHR %35 %37 %3 %38 NonPrivatePointer
OpStore %70 %39
%71 = OpFunctionCall %5 %44 %70
%73 = OpAccessChain %72 %29 %3
OpStore %73 %71
%75 = OpAccessChain %36 %20 %3 %76
%77 = OpCooperativeMatrixLoadKHR %74 %75 %7 %38 NonPrivatePointer
OpStore %101 %77
%102 = OpFunctionCall %8 %81 %101
%104 = OpAccessChain %103 %33 %3
OpStore %104 %102
%105 = OpAccessChain %36 %20 %3 %106
%107 = OpCooperativeMatrixLoadKHR %35 %105 %3 %38 NonPrivatePointer
OpStore %108 %107
%109 = OpFunctionCall %5 %44 %108
%110 = OpAccessChain %72 %29 %7
OpStore %110 %109
%111 = OpAccessChain %36 %20 %3 %112
%113 = OpCooperativeMatrixLoadKHR %74 %111 %7 %38 NonPrivatePointer
OpStore %114 %113
%115 = OpFunctionCall %8 %81 %114
%116 = OpAccessChain %103 %33 %7
OpStore %116 %115
OpBranch %266
%266 = OpLabel
%117 = OpPhi %2 %10 %265 %118 %266
%119 = OpShiftLeftLogical %2 %117 %25
%120 = OpAccessChain %36 %20 %3 %119
%121 = OpCooperativeMatrixLoadKHR %35 %120 %3 %38 NonPrivatePointer
OpStore %122 %121
%123 = OpFunctionCall %5 %44 %122
%124 = OpIMul %2 %117 %125
%126 = OpIAdd %2 %3 %124
%127 = OpAccessChain %72 %29 %117
OpStore %127 %123
%128 = OpIMul %2 %117 %125
%129 = OpIAdd %2 %7 %128
%131 = OpIMul %2 %117 %125
%132 = OpIAdd %2 %10 %131
%134 = OpIMul %2 %117 %125
%135 = OpIAdd %2 %6 %134
%137 = OpIMul %2 %117 %125
%138 = OpIAdd %2 %139 %137
%141 = OpIMul %2 %117 %125
%142 = OpIAdd %2 %25 %141
%144 = OpIMul %2 %117 %125
%145 = OpIAdd %2 %146 %144
%148 = OpIMul %2 %117 %125
%149 = OpIAdd %2 %150 %148
%152 = OpIAdd %2 %119 %76
%153 = OpAccessChain %36 %20 %3 %152
%154 = OpCooperativeMatrixLoadKHR %74 %153 %7 %38 NonPrivatePointer
OpStore %155 %154
%156 = OpFunctionCall %8 %81 %155
%157 = OpIMul %2 %117 %125
%158 = OpIAdd %2 %3 %157
%159 = OpAccessChain %103 %33 %117
OpStore %159 %156
%160 = OpIMul %2 %117 %125
%161 = OpIAdd %2 %7 %160
%163 = OpIMul %2 %117 %125
%164 = OpIAdd %2 %10 %163
%166 = OpIMul %2 %117 %125
%167 = OpIAdd %2 %6 %166
%169 = OpIMul %2 %117 %125
%170 = OpIAdd %2 %139 %169
%172 = OpIMul %2 %117 %125
%173 = OpIAdd %2 %25 %172
%175 = OpIMul %2 %117 %125
%176 = OpIAdd %2 %146 %175
%178 = OpIMul %2 %117 %125
%179 = OpIAdd %2 %150 %178
%118 = OpIAdd %2 %117 %7
%181 = OpIEqual %40 %118 %25
OpLoopMerge %267 %266 None
OpBranchConditional %181 %267 %266
%267 = OpLabel
%184 = OpAccessChain %72 %29 %3
%185 = OpLoad %5 %184
%186 = OpAccessChain %103 %33 %3
%187 = OpLoad %8 %186
%188 = OpCooperativeMatrixMulAddKHR %11 %185 %187 %183
%189 = OpAccessChain %72 %29 %7
%190 = OpLoad %5 %189
%191 = OpAccessChain %103 %33 %7
%192 = OpLoad %8 %191
%193 = OpCooperativeMatrixMulAddKHR %11 %190 %192 %188
OpBranch %268
%268 = OpLabel
%194 = OpPhi %11 %193 %267 %262 %268
%196 = OpPhi %2 %10 %267 %197 %268
%198 = OpIMul %2 %196 %125
%199 = OpIAdd %2 %3 %198
%200 = OpAccessChain %72 %29 %196
%201 = OpLoad %5 %200
%202 = OpIMul %2 %196 %125
%203 = OpIAdd %2 %7 %202
%206 = OpIMul %2 %196 %125
%207 = OpIAdd %2 %10 %206
%210 = OpIMul %2 %196 %125
%211 = OpIAdd %2 %6 %210
%214 = OpIMul %2 %196 %125
%215 = OpIAdd %2 %139 %214
%218 = OpIMul %2 %196 %125
%219 = OpIAdd %2 %25 %218
%222 = OpIMul %2 %196 %125
%223 = OpIAdd %2 %146 %222
%226 = OpIMul %2 %196 %125
%227 = OpIAdd %2 %150 %226
%230 = OpIMul %2 %196 %125
%231 = OpIAdd %2 %3 %230
%232 = OpAccessChain %103 %33 %196
%233 = OpLoad %8 %232
%234 = OpIMul %2 %196 %125
%235 = OpIAdd %2 %7 %234
%238 = OpIMul %2 %196 %125
%239 = OpIAdd %2 %10 %238
%242 = OpIMul %2 %196 %125
%243 = OpIAdd %2 %6 %242
%246 = OpIMul %2 %196 %125
%247 = OpIAdd %2 %139 %246
%250 = OpIMul %2 %196 %125
%251 = OpIAdd %2 %25 %250
%254 = OpIMul %2 %196 %125
%255 = OpIAdd %2 %146 %254
%258 = OpIMul %2 %196 %125
%259 = OpIAdd %2 %150 %258
%262 = OpCooperativeMatrixMulAddKHR %11 %201 %233 %194
%197 = OpIAdd %2 %196 %7
%263 = OpIEqual %40 %197 %25
OpLoopMerge %269 %268 None
OpBranchConditional %263 %269 %268
%269 = OpLabel
%264 = OpAccessChain %36 %24 %3 %3
OpCooperativeMatrixStoreKHR %264 %262 %7 %38 NonPrivatePointer
OpReturn
OpFunctionEnd
%44 = OpFunction %5 None %42
%43 = OpFunctionParameter %41
%45 = OpLabel
%47 = OpVariable %46 Function
%48 = OpCooperativeMatrixLengthKHR %2 %35
OpBranch %49
%49 = OpLabel
%51 = OpPhi %2 %3 %45 %52 %49
%52 = OpIAdd %2 %51 %7
%54 = OpInBoundsAccessChain %53 %43 %51
%55 = OpLoad %16 %54
%57 = OpSConvert %56 %55
%58 = OpShiftLeftLogical %56 %57 %59
%60 = OpBitwiseAnd %56 %58 %61
%62 = OpBitcast %1 %60
%64 = OpInBoundsAccessChain %63 %47 %51
OpStore %64 %62
%65 = OpULessThan %40 %52 %48
OpLoopMerge %50 %49 None
OpBranchConditional %65 %49 %50
%50 = OpLabel
%66 = OpLoad %5 %47
%67 = OpMatrixTimesScalar %5 %66 %68
OpReturnValue %67
OpFunctionEnd
%81 = OpFunction %8 None %79
%80 = OpFunctionParameter %78
%82 = OpLabel
%84 = OpVariable %83 Function
%85 = OpCooperativeMatrixLengthKHR %2 %74
OpBranch %86
%86 = OpLabel
%88 = OpPhi %2 %3 %82 %89 %86
%89 = OpIAdd %2 %88 %7
%90 = OpInBoundsAccessChain %53 %80 %88
%91 = OpLoad %16 %90
%92 = OpSConvert %56 %91
%93 = OpShiftLeftLogical %56 %92 %59
%94 = OpBitwiseAnd %56 %93 %61
%95 = OpBitcast %1 %94
%96 = OpInBoundsAccessChain %63 %84 %88
OpStore %96 %95
%97 = OpULessThan %40 %89 %85
OpLoopMerge %87 %86 None
OpBranchConditional %97 %86 %87
%87 = OpLabel
%98 = OpLoad %8 %84
%99 = OpMatrixTimesScalar %8 %98 %68
OpReturnValue %99
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_alloca.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

void main()
{
    uint _34;
    _34 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _32[2];
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _33[2];
    for (;;)
    {
        uint _36 = _34 << 5u;
        coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _42;
        coopMatLoad(_42, _20._m0, _36, 64u, gl_CooperativeMatrixLayoutRowMajor);
        _33[_34] = _42;
        coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _75;
        coopMatLoad(_75, _20._m0, _36 + 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
        _32[_34] = _75;
        uint _35 = _34 + 1u;
        if (_35 == 2u)
        {
            break;
        }
        else
        {
            _34 = _35;
        }
    }
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _105;
    coopMatLoad(_105, _20._m0, 32u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _31[2];
    _31[1u] = _105;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _123;
    coopMatLoad(_123, _20._m0, 1056u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _28[2];
    _28[1u] = _123;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _133;
    coopMatLoad(_133, _20._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    _31[0u] = _133;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _143;
    coopMatLoad(_143, _20._m0, 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    _28[0u] = _143;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _154;
    uint _156;
    _154 = coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0.0);
    _156 = 0u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _222;
    for (;;)
    {
        _222 = coopMatMulAdd(_33[_156], _32[_156], _154, 0);
        uint _157 = _156 + 1u;
        if (_157 == 2u)
        {
            break;
        }
        else
        {
            _154 = _222;
            _156 = _157;
        }
    }
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _224;
    uint _226;
    _224 = _222;
    _226 = 0u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _292;
    for (;;)
    {
        _292 = coopMatMulAdd(_31[_226], _28[_226], _224, 0);
        uint _227 = _226 + 1u;
        if (_227 == 2u)
        {
            break;
        }
        else
        {
            _224 = _292;
            _226 = _227;
        }
    }
    coopMatStore(_292, _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 303
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeArray %2 %4
%26 = OpTypeArray %8 %10
%27 = OpTypePointer Function %26
%29 = OpTypeArray %5 %10
%30 = OpTypePointer Function %29
%37 = OpConstant %2 5
%39 = OpTypePointer StorageBuffer %16
%41 = OpConstant %2 64
%44 = OpConstant %2 8
%46 = OpTypePointer Function %5
%59 = OpConstant %2 4
%66 = OpConstant %2 6
%70 = OpConstant %2 7
%73 = OpConstant %2 1024
%78 = OpTypePointer Function %8
%101 = OpTypeBool
%104 = OpConstant %2 32
%108 = OpConstant %2 9
%110 = OpConstant %2 10
%112 = OpConstant %2 11
%114 = OpConstant %2 12
%116 = OpConstant %2 13
%118 = OpConstant %2 14
%120 = OpConstant %2 15
%122 = OpConstant %2 1056
%152 = OpConstant %9 0
%153 = OpConstantComposite %11 %152
%14 = OpFunction %12 None %13
%15 = OpLabel
%28 = OpVariable %27 Function
%31 = OpVariable %30 Function
%32 = OpVariable %27 Function
%33 = OpVariable %30 Function
OpBranch %295
%295 = OpLabel
OpBranch %296
%296 = OpLabel
%34 = OpPhi %2 %3 %295 %35 %296
%36 = OpShiftLeftLogical %2 %34 %37
%40 = OpAccessChain %39 %20 %3 %36
%42 = OpCooperativeMatrixLoadKHR %5 %40 %3 %41 NonPrivatePointer
%43 = OpIMul %2 %34 %44
%45 = OpIAdd %2 %3 %43
%47 = OpAccessChain %46 %33 %34
OpStore %47 %42
%48 = OpIMul %2 %34 %44
%49 = OpIAdd %2 %7 %48
%51 = OpIMul %2 %34 %44
%52 = OpIAdd %2 %10 %51
%54 = OpIMul %2 %34 %44
%55 = OpIAdd %2 %6 %54
%57 = OpIMul %2 %34 %44
%58 = OpIAdd %2 %59 %57
%61 = OpIMul %2 %34 %44
%62 = OpIAdd %2 %37 %61
%64 = OpIMul %2 %34 %44
%65 = OpIAdd %2 %66 %64
%68 = OpIMul %2 %34 %44
%69 = OpIAdd %2 %70 %68
%72 = OpIAdd %2 %36 %73
%74 = OpAccessChain %39 %20 %3 %72
%75 = OpCooperativeMatrixLoadKHR %8 %74 %7 %41 NonPrivatePointer
%76 = OpIMul %2 %34 %44
%77 = OpIAdd %2 %3 %76
%79 = OpAccessChain %78 %32 %34
OpStore %79 %75
%80 = OpIMul %2 %34 %44
%81 = OpIAdd %2 %7 %80
%83 = OpIMul %2 %34 %44
%84 = OpIAdd %2 %10 %83
%86 = OpIMul %2 %34 %44
%87 = OpIAdd %2 %6 %86
%89 = OpIMul %2 %34 %44
%90 = OpIAdd %2 %59 %89
%92 = OpIMul %2 %34 %44
%93 = OpIAdd %2 %37 %92
%95 = OpIMul %2 %34 %44
%96 = OpIAdd %2 %66 %95
%98 = OpIMul %2 %34 %44
%99 = OpIAdd %2 %70 %98
%35 = OpIAdd %2 %34 %7
%102 = OpIEqual %101 %35 %10
OpLoopMerge %297 %296 None
OpBranchConditional %102 %297 %296
%297 = OpLabel
%103 = OpAccessChain %39 %20 %3 %104
%105 = OpCooperativeMatrixLoadKHR %5 %103 %3 %41 NonPrivatePointer
%106 = OpAccessChain %46 %31 %7
OpStore %106 %105
%121 = OpAccessChain %39 %20 %3 %122
%123 = OpCooperativeMatrixLoadKHR %8 %121 %7 %41 NonPrivatePointer
%124 = OpAccessChain %78 %28 %7
OpStore %124 %123
%132 = OpAccessChain %39 %20 %3 %3
%133 = OpCooperativeMatrixLoadKHR %5 %132 %3 %41 NonPrivatePointer
%134 = OpAccessChain %46 %31 %3
OpStore %134 %133
%142 = OpAccessChain %39 %20 %3 %73
%143 = OpCooperativeMatrixLoadKHR %8 %142 %7 %41 NonPrivatePointer
%144 = OpAccessChain %78 %28 %3
OpStore %144 %143
OpBranch %298
%298 = OpLabel
%154 = OpPhi %11 %153 %297 %222 %298
%156 = OpPhi %2 %3 %297 %157 %298
%158 = OpIMul %2 %156 %44
%159 = OpIAdd %2 %7 %158
%162 = OpIMul %2 %156 %44
%163 = OpIAdd %2 %3 %162
%164 = OpAccessChain %46 %33 %156
%165 = OpLoad %5 %164
%166 = OpIMul %2 %156 %44
%167 = OpIAdd %2 %6 %166
%170 = OpIMul %2 %156 %44
%171 = OpIAdd %2 %10 %170
%174 = OpIMul %2 %156 %44
%175 = OpIAdd %2 %37 %174
%178 = OpIMul %2 %156 %44
%179 = OpIAdd %2 %59 %178
%182 = OpIMul %2 %156 %44
%183 = OpIAdd %2 %70 %182
%186 = OpIMul %2 %156 %44
%187 = OpIAdd %2 %66 %186
%190 = OpIMul %2 %156 %44
%191 = OpIAdd %2 %7 %190
%194 = OpIMul %2 %156 %44
%195 = OpIAdd %2 %3 %194
%196 = OpAccessChain %78 %32 %156
%197 = OpLoad %8 %196
%198 = OpIMul %2 %156 %44
%199 = OpIAdd %2 %6 %198
%202 = OpIMul %2 %156 %44
%203 = OpIAdd %2 %10 %202
%206 = OpIMul %2 %156 %44
%207 = OpIAdd %2 %37 %206
%210 = OpIMul %2 %156 %44
%211 = OpIAdd %2 %59 %210
%214 = OpIMul %2 %156 %44
%215 = OpIAdd %2 %70 %214
%218 = OpIMul %2 %156 %44
%219 = OpIAdd %2 %66 %218
%222 = OpCooperativeMatrixMulAddKHR %11 %165 %197 %154
%157 = OpIAdd %2 %156 %7
%223 = OpIEqual %101 %157 %10
OpLoopMerge %299 %298 None
OpBranchConditional %223 %299 %298
%299 = OpLabel
OpBranch %300
%300 = OpLabel
%224 = OpPhi %11 %222 %299 %292 %300
%226 = OpPhi %2 %3 %299 %227 %300
%228 = OpIMul %2 %226 %44
%229 = OpIAdd %2 %7 %228
%232 = OpIMul %2 %226 %44
%233 = OpIAdd %2 %3 %232
%234 = OpAccessChain %46 %31 %226
%235 = OpLoad %5 %234
%236 = OpIMul %2 %226 %44
%237 = OpIAdd %2 %6 %236
%240 = OpIMul %2 %226 %44
%241 = OpIAdd %2 %10 %240
%244 = OpIMul %2 %226 %44
%245 = OpIAdd %2 %37 %244
%248 = OpIMul %2 %226 %44
%249 = OpIAdd %2 %59 %248
%252 = OpIMul %2 %226 %44
%253 = OpIAdd %2 %70 %252
%256 = OpIMul %2 %226 %44
%257 = OpIAdd %2 %66 %256
%260 = OpIMul %2 %226 %44
%261 = OpIAdd %2 %7 %260
%264 = OpIMul %2 %226 %44
%265 = OpIAdd %2 %3 %264
%266 = OpAccessChain %78 %28 %226
%267 = OpLoad %8 %266
%268 = OpIMul %2 %226 %44
%269 = OpIAdd %2 %6 %268
%272 = OpIMul %2 %226 %44
%273 = OpIAdd %2 %10 %272
%276 = OpIMul %2 %226 %44
%277 = OpIAdd %2 %37 %276
%280 = OpIMul %2 %226 %44
%281 = OpIAdd %2 %59 %280
%284 = OpIMul %2 %226 %44
%285 = OpIAdd %2 %70 %284
%288 = OpIMul %2 %226 %44
%289 = OpIAdd %2 %66 %288
%292 = OpCooperativeMatrixMulAddKHR %11 %235 %267 %224
%227 = OpIAdd %2 %226 %7
%293 = OpIEqual %101 %227 %10
OpLoopMerge %301 %300 None
OpBranchConditional %293 %301 %300
%301 = OpLabel
%294 = OpAccessChain %39 %24 %3 %3
OpCooperativeMatrixStoreKHR %294 %292 %7 %41 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_copy_transpose_fp16.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_spirv_intrinsics : require
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

shared uint LDSTransposeScratch[256];

spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> CoopMatSaturateFP8(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _55)
{
    uint _62;
    _62 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> coop_output;
    for (;;)
    {
        uint _63 = _62 + 1u;
        coop_output[_62] = spvNClamp(_55[_62], float16_t(-448.0), float16_t(448.0));
        if (_63 < uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length()))
        {
            _62 = _63;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP16toFP8(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _91)
{
    uint _99;
    _99 = 0u;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _100 = _99 + 2u;
        uint _101 = _99 + 1u;
        i16vec2 _107 = float16BitsToInt16(f16vec2(_91[_99], _91[_101]));
        i16vec2 _117 = (_107 << i16vec2(1)) - i16vec2(16384);
        i16vec2 _123 = (_117 >> i16vec2(11)) - i16vec2(1);
        i16vec2 _138 = (_117 & ((_123 & i16vec2(-2048)) ^ i16vec2(-1))) | (_123 & i16vec2(2048));
        i16vec2 _142 = _138 >> max((-_123), i16vec2(0));
        i16vec2 _144 = i16vec2(u16vec2(_142) >> u16vec2(i16vec2(8)));
        u8vec2 _159 = u8vec2(((_144 + i16vec2(greaterThan(u16vec2(((_144 & i16vec2(1)) | (_142 | (_138 & i16vec2(127)))) << i16vec2(8)), u16vec2(32768)))) & i16vec2(127)) | (i16vec2(u16vec2(_107) >> u16vec2(i16vec2(15))) << i16vec2(7)));
        coop_output[_99] = _159.x;
        coop_output[_101] = _159.y;
        if (_100 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _99 = _100;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    uint _38 = gl_SubgroupID * 256u;
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopMatStore(_30, LDSTransposeScratch, _38, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _42;
    coopMatLoad(_42, LDSTransposeScratch, _38, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    uint _44 = gl_SubgroupID * 256u;
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopMatStore(_30, LDSTransposeScratch, _44, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _46;
    coopMatLoad(_46, LDSTransposeScratch, _44, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _48 = coopMatMulAdd(_42, _46, coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(_30), 0);
    coopMatStore(_48, _25._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _75 = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(_48);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _76 = CoopMatSaturateFP8(_75);
    uint _78 = gl_SubgroupID * 256u;
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopMatStore(_76, LDSTransposeScratch, _78, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _80;
    coopMatLoad(_80, LDSTransposeScratch, _78, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _168 = _80;
    coopMatStore(CoopMatFP16toFP8(_168), _25._m0, 1024u, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    uint _171 = gl_SubgroupID * 256u;
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopMatStore(_80, LDSTransposeScratch, _171, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _173;
    coopMatLoad(_173, LDSTransposeScratch, _171, 16u, gl_CooperativeMatrixLayoutColumnMajor);
    coopMatStore(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(_173), _25._m0, 2048u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 179
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability GroupNonUniform
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
%67 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25 %34 %36
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpName %34 "LDSTransposeScratch"
OpName %56 "CoopMatSaturateFP8"
OpName %58 "coop_output"
OpName %92 "CoopMatFP16toFP8"
OpName %95 "coop_output"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %36 BuiltIn SubgroupId
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 2
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 0
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpConstant %2 1
%10 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %9
%11 = OpTypeFloat 32
%12 = OpTypeCooperativeMatrixKHR %11 %6 %4 %4 %3
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%31 = OpConstant %2 256
%32 = OpTypeArray %2 %31
%33 = OpTypePointer Workgroup %32
%34 = OpVariable %33 Workgroup
%35 = OpTypePointer Input %2
%36 = OpVariable %35 Input
%39 = OpTypePointer Workgroup %2
%41 = OpConstant %2 24840
%50 = OpConstant %2 64
%52 = OpTypeBool
%53 = OpTypePointer Function %5
%54 = OpTypeFunction %5 %53
%64 = OpTypePointer Function %1
%69 = OpConstant %1 -0x1.cp+8
%70 = OpConstant %1 0x1.cp+8
%82 = OpConstant %2 1024
%83 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %9
%84 = OpTypeInt 16 1
%85 = OpTypeVector %52 2
%86 = OpTypeVector %1 2
%87 = OpTypeVector %84 2
%88 = OpTypeVector %17 2
%89 = OpTypePointer Function %10
%90 = OpTypeFunction %83 %89
%94 = OpTypePointer Function %83
%109 = OpConstant %84 15
%110 = OpConstantComposite %87 %109 %109
%112 = OpConstant %84 7
%113 = OpConstantComposite %87 %112 %112
%115 = OpConstant %84 1
%116 = OpConstantComposite %87 %115 %115
%118 = OpConstant %84 16384
%119 = OpConstantComposite %87 %118 %118
%121 = OpConstant %84 11
%122 = OpConstantComposite %87 %121 %121
%126 = OpConstant %84 0
%127 = OpConstantComposite %87 %126 %126
%129 = OpConstant %84 2048
%130 = OpConstantComposite %87 %129 %129
%132 = OpConstant %84 -2048
%133 = OpConstantComposite %87 %132 %132
%135 = OpConstant %84 -1
%136 = OpConstantComposite %87 %135 %135
%140 = OpConstant %84 127
%141 = OpConstantComposite %87 %140 %140
%145 = OpConstant %84 8
%146 = OpConstantComposite %87 %145 %145
%151 = OpTypeInt 16 0
%152 = OpConstant %151 32768
%153 = OpTypeVector %151 2
%154 = OpConstantComposite %153 %152 %152
%160 = OpTypePointer Function %17
%176 = OpConstant %2 2048
%15 = OpFunction %13 None %14
%16 = OpLabel
%75 = OpVariable %53 Function
%168 = OpVariable %89 Function
OpBranch %177
%177 = OpLabel
%28 = OpAccessChain %27 %21 %7 %7
%30 = OpCooperativeMatrixLoadKHR %5 %28 %9 %29 NonPrivatePointer
%37 = OpLoad %2 %36
%38 = OpIMul %2 %37 %31
%40 = OpInBoundsAccessChain %39 %34 %38
OpControlBarrier %6 %6 %41
OpCooperativeMatrixStoreKHR %40 %30 %9 %4 NonPrivatePointer
OpControlBarrier %6 %6 %41
%42 = OpCooperativeMatrixLoadKHR %8 %40 %9 %4 NonPrivatePointer
%43 = OpLoad %2 %36
%44 = OpIMul %2 %43 %31
%45 = OpInBoundsAccessChain %39 %34 %44
OpControlBarrier %6 %6 %41
OpCooperativeMatrixStoreKHR %45 %30 %9 %4 NonPrivatePointer
OpControlBarrier %6 %6 %41
%46 = OpCooperativeMatrixLoadKHR %10 %45 %9 %4 NonPrivatePointer
%47 = OpFConvert %12 %30
%48 = OpCooperativeMatrixMulAddKHR %12 %42 %46 %47
%49 = OpAccessChain %27 %25 %7 %7
OpCooperativeMatrixStoreKHR %49 %48 %9 %50 NonPrivatePointer
%51 = OpFConvert %5 %48
OpStore %75 %51
%76 = OpFunctionCall %5 %56 %75
%77 = OpLoad %2 %36
%78 = OpIMul %2 %77 %31
%79 = OpInBoundsAccessChain %39 %34 %78
OpControlBarrier %6 %6 %41
OpCooperativeMatrixStoreKHR %79 %76 %9 %4 NonPrivatePointer
OpControlBarrier %6 %6 %41
%80 = OpCooperativeMatrixLoadKHR %10 %79 %9 %4 NonPrivatePointer
%81 = OpAccessChain %27 %25 %7 %82
OpStore %168 %80
%169 = OpFunctionCall %83 %92 %168
OpCooperativeMatrixStoreKHR %81 %169 %9 %4 NonPrivatePointer
%170 = OpLoad %2 %36
%171 = OpIMul %2 %170 %31
%172 = OpInBoundsAccessChain %39 %34 %171
OpControlBarrier %6 %6 %41
OpCooperativeMatrixStoreKHR %172 %80 %9 %4 NonPrivatePointer
OpControlBarrier %6 %6 %41
%173 = OpCooperativeMatrixLoadKHR %5 %172 %9 %4 NonPrivatePointer
%174 = OpFConvert %12 %173
%175 = OpAccessChain %27 %25 %7 %176
OpCooperativeMatrixStoreKHR %175 %174 %9 %50 NonPrivatePointer
OpReturn
OpFunctionEnd
%56 = OpFunction %5 None %54
%55 = OpFunctionParameter %53
%57 = OpLabel
%58 = OpVariable %53 Function
%59 = OpCooperativeMatrixLengthKHR %2 %5
OpBranch %60
%60 = OpLabel
%62 = OpPhi %2 %7 %57 %63 %60
%63 = OpIAdd %2 %62 %9
%65 = OpInBoundsAccessChain %64 %55 %62
%66 = OpLoad %1 %65
%68 = OpExtInst %1 %67 NClamp %66 %69 %70
%71 = OpInBoundsAccessChain %64 %58 %62
OpStore %71 %68
%72 = OpULessThan %52 %63 %59
OpLoopMerge %61 %60 None
OpBranchConditional %72 %60 %61
%61 = OpLabel
%73 = OpLoad %5 %58
OpReturnValue %73
OpFunctionEnd
%92 = OpFunction %83 None %90
%91 = OpFunctionParameter %89
%93 = OpLabel
%95 = OpVariable %94 Function
%96 = OpCooperativeMatrixLengthKHR %2 %83
OpBranch %97
%97 = OpLabel
%99 = OpPhi %2 %7 %93 %100 %97
%100 = OpIAdd %2 %99 %3
%101 = OpIAdd %2 %99 %9
%102 = OpInBoundsAccessChain %64 %91 %99
%103 = OpLoad %1 %102
%104 = OpInBoundsAccessChain %64 %91 %101
%105 = OpLoad %1 %104
%106 = OpCompositeConstruct %86 %103 %105
%107 = OpBitcast %87 %106
%108 = OpShiftRightLogical %87 %107 %110
%111 = OpShiftLeftLogical %87 %108 %113
%114 = OpShiftLeftLogical %87 %107 %116
%117 = OpISub %87 %114 %119
%120 = OpShiftRightArithmetic %87 %117 %122
%123 = OpISub %87 %120 %116
%124 = OpSNegate %87 %123
%125 = OpExtInst %87 %67 SMax %124 %127
%128 = OpBitwiseAnd %87 %123 %130
%131 = OpBitwiseAnd %87 %123 %133
%134 = OpBitwiseXor %87 %131 %136
%137 = OpBitwiseAnd %87 %117 %134
%138 = OpBitwiseOr %87 %137 %128
%139 = OpBitwiseAnd %87 %138 %141
%142 = OpShiftRightArithmetic %87 %138 %125
%143 = OpBitwiseOr %87 %142 %139
%144 = OpShiftRightLogical %87 %142 %146
%147 = OpBitwiseAnd %87 %144 %116
%148 = OpBitwiseOr %87 %147 %143
%149 = OpShiftLeftLogical %87 %148 %146
%150 = OpUGreaterThan %85 %149 %154
%155 = OpSelect %87 %150 %116 %127
%156 = OpIAdd %87 %144 %155
%157 = OpBitwiseAnd %87 %156 %141
%158 = OpBitwiseOr %87 %157 %111
%159 = OpUConvert %88 %158
%161 = OpInBoundsAccessChain %160 %95 %99
%162 = OpCompositeExtract %17 %159 0
OpStore %161 %162
%163 = OpInBoundsAccessChain %160 %95 %101
%164 = OpCompositeExtract %17 %159 1
OpStore %163 %164
%165 = OpULessThan %52 %100 %96
OpLoopMerge %98 %97 None
OpBranchConditional %165 %97 %98
%98 = OpLabel
%166 = OpLoad %83 %95
OpReturnValue %166
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_copy_transpose_fp16.sm66.ssbo.full-wmma.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_NV_cooperative_matrix2 : require
#extension GL_EXT_float_e4m3 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _18;

layout(set = 0, binding = 0, std430) writeonly buffer _20_22
{
    uint8_t _m0[];
} _22;

coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> CoopMatSaturationFixup(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _32)
{
    uint _39;
    _39 = 0u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> coop_output;
    for (;;)
    {
        uint _40 = _39 + 1u;
        coop_output[_39] = isinf(_32[_39]) ? uintBitsToFloat(0x7fc00000u /* nan */) : _32[_39];
        if (_40 < uint(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length()))
        {
            _39 = _40;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

void main()
{
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _28;
    coopMatLoad(_28, _18._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _51 = _28;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _53;
    saturatedConvertEXT(_53, CoopMatSaturationFixup(_51));
    coopMatStore(_53, _22._m0, 1024u, 16u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
OpCapability Shader
OpCapability Int8
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability StorageBuffer8BitAccess
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixConversionsNV
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_NV_cooperative_matrix2"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %12 "main" %18 %22
OpExecutionMode %12 LocalSize 32 1 1
OpName %12 "main"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %33 "CoopMatSaturationFixup"
OpName %35 "coop_output"
OpDecorate %15 ArrayStride 1
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %18 Restrict
OpDecorate %19 ArrayStride 1
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %22 NonReadable
OpDecorate %53 SaturatedToLargestFloat8NormalConversionEXT
%1 = OpTypeFloat 32
%2 = OpTypeInt 32 0
%3 = OpConstant %2 2
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpTypeFloat 8 Float8E4M3EXT
%8 = OpConstant %2 1
%9 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %8
%10 = OpTypeVoid
%11 = OpTypeFunction %10
%14 = OpTypeInt 8 0
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %14
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%24 = OpTypePointer StorageBuffer %14
%26 = OpConstant %2 0
%27 = OpConstant %2 64
%29 = OpTypeBool
%30 = OpTypePointer Function %5
%31 = OpTypeFunction %5 %30
%41 = OpTypePointer Function %1
%46 = OpConstant %1 0x1.8p+128
%55 = OpConstant %2 1024
%12 = OpFunction %10 None %11
%13 = OpLabel
%51 = OpVariable %30 Function
OpBranch %56
%56 = OpLabel
%25 = OpAccessChain %24 %18 %26 %26
%28 = OpCooperativeMatrixLoadKHR %5 %25 %8 %27 NonPrivatePointer
OpStore %51 %28
%52 = OpFunctionCall %5 %33 %51
%53 = OpFConvert %9 %52
%54 = OpAccessChain %24 %22 %26 %55
OpCooperativeMatrixStoreKHR %54 %53 %8 %4 NonPrivatePointer
OpReturn
OpFunctionEnd
%33 = OpFunction %5 None %31
%32 = OpFunctionParameter %30
%34 = OpLabel
%35 = OpVariable %30 Function
%36 = OpCooperativeMatrixLengthKHR %2 %5
OpBranch %37
%37 = OpLabel
%39 = OpPhi %2 %26 %34 %40 %37
%40 = OpIAdd %2 %39 %8
%42 = OpInBoundsAccessChain %41 %32 %39
%43 = OpLoad %1 %42
%44 = OpIsInf %29 %43
%45 = OpSelect %1 %44 %46 %43
%47 = OpInBoundsAccessChain %41 %35 %39
OpStore %47 %45
%48 = OpULessThan %29 %40 %36
OpLoopMerge %38 %37 None
OpBranchConditional %48 %37 %38
%38 = OpLabel
%49 = OpLoad %5 %35
OpReturnValue %49
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_extract_insert.sm66.ssbo.full-wmma.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_float_e4m3 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _15;

layout(set = 0, binding = 0, std430) writeonly buffer _17_19
{
    uint8_t _m0[];
} _19;

layout(set = 0, binding = 0, std430) writeonly buffer _21_23
{
    uint _m0[];
} _23;

void main()
{
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _15._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    uint _32 = uint(coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()) / 4u;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _36;
    if (_32 == 0u)
    {
        _36 = _30;
    }
    else
    {
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _39;
        uint _40;
        _39 = _30;
        _40 = 0u;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _94;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _43;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _74;
        for (;;)
        {
            _43 = _39;
            uint _44 = _40 * 4u;
            uint _62 = (fe4m3vec4(_43[_44 + 0u], _43[_44 + 1u], _43[_44 + 2u], _43[_44 + 3u]));
            uint _64 = _40 + (_32 * gl_LocalInvocationIndex);
            _23._m0[_64 + 64u] = _62;
            _74 = _39;
            u8vec4 _76 = unpack8(_62 ^ 16711935u);
            uint _77 = _40 * 4u;
            _74[_77 + 0u] = uintBitsToFloate4m3EXT(_76.x);
            _74[_77 + 1u] = uintBitsToFloate4m3EXT(_76.y);
            _74[_77 + 2u] = uintBitsToFloate4m3EXT(_76.z);
            _74[_77 + 3u] = uintBitsToFloate4m3EXT(_76.w);
            _94 = _74;
            uint _41 = _40 + 1u;
            if (_41 == _32)
            {
                break;
            }
            else
            {
                _39 = _94;
                _40 = _41;
            }
        }
        _36 = _94;
    }
    coopMatStore(_36, _19._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability Int8
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability StorageBuffer8BitAccess
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %9 "main" %15 %19 %23 %25
OpExecutionMode %9 LocalSize 32 1 1
OpName %9 "main"
OpName %13 "SSBO"
OpName %17 "SSBO"
OpName %21 "SSBO"
OpDecorate %12 ArrayStride 1
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %16 ArrayStride 1
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 ArrayStride 4
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %19 NonReadable
OpDecorate %19 Aliased
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %23 NonReadable
OpDecorate %23 Aliased
OpDecorate %25 BuiltIn LocalInvocationIndex
%1 = OpTypeFloat 8 Float8E4M3EXT
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpTypeVoid
%8 = OpTypeFunction %7
%11 = OpTypeInt 8 0
%12 = OpTypeRuntimeArray %11
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeRuntimeArray %11
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %2
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypePointer Input %2
%25 = OpVariable %24 Input
%28 = OpTypePointer StorageBuffer %11
%33 = OpConstant %2 4
%34 = OpTypeBool
%42 = OpTypePointer Function %5
%46 = OpTypePointer Function %1
%50 = OpConstant %2 1
%54 = OpConstant %2 2
%60 = OpTypeVector %1 4
%67 = OpConstant %2 256
%69 = OpConstant %2 64
%70 = OpTypePointer StorageBuffer %2
%73 = OpConstant %2 16711935
%75 = OpTypeVector %11 4
%9 = OpFunction %7 None %8
%10 = OpLabel
%43 = OpVariable %42 Function
%74 = OpVariable %42 Function
OpBranch %96
%96 = OpLabel
%26 = OpLoad %2 %25
%29 = OpAccessChain %28 %15 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %29 %3 %4 NonPrivatePointer
%31 = OpCooperativeMatrixLengthKHR %2 %5
%32 = OpUDiv %2 %31 %33
%35 = OpIEqual %34 %32 %3
OpSelectionMerge %100 None
OpBranchConditional %35 %100 %97
%97 = OpLabel
OpBranch %98
%98 = OpLabel
%39 = OpPhi %5 %30 %97 %94 %98
%40 = OpPhi %2 %3 %97 %41 %98
OpStore %43 %39
%44 = OpIMul %2 %40 %33
%45 = OpIAdd %2 %44 %3
%47 = OpInBoundsAccessChain %46 %43 %45
%48 = OpLoad %1 %47
%49 = OpIAdd %2 %44 %50
%51 = OpInBoundsAccessChain %46 %43 %49
%52 = OpLoad %1 %51
%53 = OpIAdd %2 %44 %54
%55 = OpInBoundsAccessChain %46 %43 %53
%56 = OpLoad %1 %55
%57 = OpIAdd %2 %44 %6
%58 = OpInBoundsAccessChain %46 %43 %57
%59 = OpLoad %1 %58
%61 = OpCompositeConstruct %60 %48 %52 %56 %59
%62 = OpBitcast %2 %61
%63 = OpIMul %2 %32 %26
%64 = OpIAdd %2 %40 %63
%65 = OpShiftLeftLogical %2 %64 %54
%66 = OpIAdd %2 %65 %67
%68 = OpIAdd %2 %64 %69
%71 = OpAccessChain %70 %23 %3 %68
OpStore %71 %62 NonPrivatePointer
%72 = OpBitwiseXor %2 %62 %73
OpStore %74 %39
%76 = OpBitcast %75 %72
%77 = OpIMul %2 %40 %33
%78 = OpCompositeExtract %11 %76 0
%79 = OpBitcast %1 %78
%80 = OpIAdd %2 %77 %3
%81 = OpInBoundsAccessChain %46 %74 %80
OpStore %81 %79
%82 = OpCompositeExtract %11 %76 1
%83 = OpBitcast %1 %82
%84 = OpIAdd %2 %77 %50
%85 = OpInBoundsAccessChain %46 %74 %84
OpStore %85 %83
%86 = OpCompositeExtract %11 %76 2
%87 = OpBitcast %1 %86
%88 = OpIAdd %2 %77 %54
%89 = OpInBoundsAccessChain %46 %74 %88
OpStore %89 %87
%90 = OpCompositeExtract %11 %76 3
%91 = OpBitcast %1 %90
%92 = OpIAdd %2 %77 %6
%93 = OpInBoundsAccessChain %46 %74 %92
OpStore %93 %91
%94 = OpLoad %5 %74
%41 = OpIAdd %2 %40 %50
%95 = OpIEqual %34 %41 %32
OpLoopMerge %99 %98 None
OpBranchConditional %95 %99 %98
%99 = OpLabel
OpBranch %100
%100 = OpLabel
%36 = OpPhi %5 %30 %96 %94 %99
%38 = OpAccessChain %28 %19 %3 %3
OpCooperativeMatrixStoreKHR %38 %36 %3 %4 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33;
    coopMatLoad(_33, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _37;
    coopMatLoad(_37, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(_30, _33, _37, 0)), _25._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%32 = OpConstant %2 512
%35 = OpConstant %2 1024
%36 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
OpBranch %41
%41 = OpLabel
%28 = OpAccessChain %27 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %28 %3 %29 NonPrivatePointer
%31 = OpAccessChain %27 %21 %3 %32
%33 = OpCooperativeMatrixLoadKHR %8 %31 %3 %29 NonPrivatePointer
%34 = OpAccessChain %27 %21 %3 %35
%37 = OpCooperativeMatrixLoadKHR %11 %34 %3 %36 NonPrivatePointer
%38 = OpCooperativeMatrixMulAddKHR %11 %30 %33 %37
%39 = OpFConvert %12 %38
%40 = OpAccessChain %27 %25 %3 %3
OpCooperativeMatrixStoreKHR %40 %39 %3 %29 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_at.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33;
    coopMatLoad(_33, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _37;
    coopMatLoad(_37, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(_30, _33, _37, 0)), _25._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%32 = OpConstant %2 512
%35 = OpConstant %2 1024
%36 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
OpBranch %41
%41 = OpLabel
%28 = OpAccessChain %27 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %28 %7 %29 NonPrivatePointer
%31 = OpAccessChain %27 %21 %3 %32
%33 = OpCooperativeMatrixLoadKHR %8 %31 %3 %29 NonPrivatePointer
%34 = OpAccessChain %27 %21 %3 %35
%37 = OpCooperativeMatrixLoadKHR %11 %34 %3 %36 NonPrivatePointer
%38 = OpCooperativeMatrixMulAddKHR %11 %30 %33 %37
%39 = OpFConvert %12 %38
%40 = OpAccessChain %27 %25 %3 %3
OpCooperativeMatrixStoreKHR %40 %39 %3 %29 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_bt.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33;
    coopMatLoad(_33, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _37;
    coopMatLoad(_37, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(_30, _33, _37, 0)), _25._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%32 = OpConstant %2 512
%35 = OpConstant %2 1024
%36 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
OpBranch %41
%41 = OpLabel
%28 = OpAccessChain %27 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %28 %3 %29 NonPrivatePointer
%31 = OpAccessChain %27 %21 %3 %32
%33 = OpCooperativeMatrixLoadKHR %8 %31 %7 %29 NonPrivatePointer
%34 = OpAccessChain %27 %21 %3 %35
%37 = OpCooperativeMatrixLoadKHR %11 %34 %3 %36 NonPrivatePointer
%38 = OpCooperativeMatrixMulAddKHR %11 %30 %33 %37
%39 = OpFConvert %12 %38
%40 = OpAccessChain %27 %25 %3 %3
OpCooperativeMatrixStoreKHR %40 %39 %3 %29 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_ct.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33;
    coopMatLoad(_33, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _37;
    coopMatLoad(_37, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(_30, _33, _37, 0)), _25._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%32 = OpConstant %2 512
%35 = OpConstant %2 1024
%36 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
OpBranch %41
%41 = OpLabel
%28 = OpAccessChain %27 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %28 %3 %29 NonPrivatePointer
%31 = OpAccessChain %27 %21 %3 %32
%33 = OpCooperativeMatrixLoadKHR %8 %31 %3 %29 NonPrivatePointer
%34 = OpAccessChain %27 %21 %3 %35
%37 = OpCooperativeMatrixLoadKHR %11 %34 %7 %36 NonPrivatePointer
%38 = OpCooperativeMatrixMulAddKHR %11 %30 %33 %37
%39 = OpFConvert %12 %38
%40 = OpAccessChain %27 %25 %3 %3
OpCooperativeMatrixStoreKHR %40 %39 %3 %29 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_ot.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33;
    coopMatLoad(_33, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _37;
    coopMatLoad(_37, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(_30, _33, _37, 0)), _25._m0, 0u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%32 = OpConstant %2 512
%35 = OpConstant %2 1024
%36 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
OpBranch %41
%41 = OpLabel
%28 = OpAccessChain %27 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %28 %3 %29 NonPrivatePointer
%31 = OpAccessChain %27 %21 %3 %32
%33 = OpCooperativeMatrixLoadKHR %8 %31 %3 %29 NonPrivatePointer
%34 = OpAccessChain %27 %21 %3 %35
%37 = OpCooperativeMatrixLoadKHR %11 %34 %3 %36 NonPrivatePointer
%38 = OpCooperativeMatrixMulAddKHR %11 %30 %33 %37
%39 = OpFConvert %12 %38
%40 = OpAccessChain %27 %25 %3 %3
OpCooperativeMatrixStoreKHR %40 %39 %7 %29 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_f16_quant_fp8.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_spirv_intrinsics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> CoopMatSaturateFP8(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _43)
{
    uint _50;
    _50 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> coop_output;
    for (;;)
    {
        uint _51 = _50 + 1u;
        coop_output[_50] = spvNClamp(_43[_50], float16_t(-448.0), float16_t(448.0));
        if (_51 < uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length()))
        {
            _50 = _51;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> CoopMatFP16toFP8(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _73)
{
    uint _81;
    _81 = 0u;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> coop_output;
    for (;;)
    {
        uint _82 = _81 + 2u;
        uint _83 = _81 + 1u;
        i16vec2 _89 = float16BitsToInt16(f16vec2(_73[_81], _73[_83]));
        i16vec2 _99 = (_89 << i16vec2(1)) - i16vec2(16384);
        i16vec2 _105 = (_99 >> i16vec2(11)) - i16vec2(1);
        i16vec2 _120 = (_99 & ((_105 & i16vec2(-2048)) ^ i16vec2(-1))) | (_105 & i16vec2(2048));
        i16vec2 _124 = _120 >> max((-_105), i16vec2(0));
        i16vec2 _126 = i16vec2(u16vec2(_124) >> u16vec2(i16vec2(8)));
        u8vec2 _141 = u8vec2(((_126 + i16vec2(greaterThan(u16vec2(((_126 & i16vec2(1)) | (_124 | (_120 & i16vec2(127)))) << i16vec2(8)), u16vec2(32768)))) & i16vec2(127)) | (i16vec2(u16vec2(_89) >> u16vec2(i16vec2(15))) << i16vec2(7)));
        coop_output[_81] = _141.x;
        coop_output[_83] = _141.y;
        if (_82 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length()))
        {
            _81 = _82;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33;
    coopMatLoad(_33, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _37;
    coopMatLoad(_37, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _63 = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(_30, _33, _37, 0));
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _150 = CoopMatSaturateFP8(_63);
    coopMatStore(CoopMatFP16toFP8(_150), _25._m0, 0u, 16u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 154
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
%55 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpName %44 "CoopMatSaturateFP8"
OpName %46 "coop_output"
OpName %74 "CoopMatFP16toFP8"
OpName %77 "coop_output"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypePointer StorageBuffer %17
%29 = OpConstant %2 32
%32 = OpConstant %2 512
%35 = OpConstant %2 1024
%36 = OpConstant %2 64
%40 = OpTypeBool
%41 = OpTypePointer Function %12
%42 = OpTypeFunction %12 %41
%52 = OpTypePointer Function %1
%57 = OpConstant %1 -0x1.cp+8
%58 = OpConstant %1 0x1.cp+8
%66 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %10
%67 = OpTypeInt 16 1
%68 = OpTypeVector %40 2
%69 = OpTypeVector %1 2
%70 = OpTypeVector %67 2
%71 = OpTypeVector %17 2
%72 = OpTypeFunction %66 %41
%76 = OpTypePointer Function %66
%91 = OpConstant %67 15
%92 = OpConstantComposite %70 %91 %91
%94 = OpConstant %67 7
%95 = OpConstantComposite %70 %94 %94
%97 = OpConstant %67 1
%98 = OpConstantComposite %70 %97 %97
%100 = OpConstant %67 16384
%101 = OpConstantComposite %70 %100 %100
%103 = OpConstant %67 11
%104 = OpConstantComposite %70 %103 %103
%108 = OpConstant %67 0
%109 = OpConstantComposite %70 %108 %108
%111 = OpConstant %67 2048
%112 = OpConstantComposite %70 %111 %111
%114 = OpConstant %67 -2048
%115 = OpConstantComposite %70 %114 %114
%117 = OpConstant %67 -1
%118 = OpConstantComposite %70 %117 %117
%122 = OpConstant %67 127
%123 = OpConstantComposite %70 %122 %122
%127 = OpConstant %67 8
%128 = OpConstantComposite %70 %127 %127
%133 = OpTypeInt 16 0
%134 = OpConstant %133 32768
%135 = OpTypeVector %133 2
%136 = OpConstantComposite %135 %134 %134
%142 = OpTypePointer Function %17
%15 = OpFunction %13 None %14
%16 = OpLabel
%63 = OpVariable %41 Function
%150 = OpVariable %41 Function
OpBranch %152
%152 = OpLabel
%28 = OpAccessChain %27 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %5 %28 %3 %29 NonPrivatePointer
%31 = OpAccessChain %27 %21 %3 %32
%33 = OpCooperativeMatrixLoadKHR %8 %31 %7 %29 NonPrivatePointer
%34 = OpAccessChain %27 %21 %3 %35
%37 = OpCooperativeMatrixLoadKHR %11 %34 %7 %36 NonPrivatePointer
%38 = OpCooperativeMatrixMulAddKHR %11 %30 %33 %37
%39 = OpFConvert %12 %38
OpStore %63 %39
%64 = OpFunctionCall %12 %44 %63
%65 = OpAccessChain %27 %25 %3 %3
OpStore %150 %64
%151 = OpFunctionCall %66 %74 %150
OpCooperativeMatrixStoreKHR %65 %151 %7 %4 NonPrivatePointer
OpReturn
OpFunctionEnd
%44 = OpFunction %12 None %42
%43 = OpFunctionParameter %41
%45 = OpLabel
%46 = OpVariable %41 Function
%47 = OpCooperativeMatrixLengthKHR %2 %12
OpBranch %48
%48 = OpLabel
%50 = OpPhi %2 %3 %45 %51 %48
%51 = OpIAdd %2 %50 %7
%53 = OpInBoundsAccessChain %52 %43 %50
%54 = OpLoad %1 %53
%56 = OpExtInst %1 %55 NClamp %54 %57 %58
%59 = OpInBoundsAccessChain %52 %46 %50
OpStore %59 %56
%60 = OpULessThan %40 %51 %47
OpLoopMerge %49 %48 None
OpBranchConditional %60 %48 %49
%49 = OpLabel
%61 = OpLoad %12 %46
OpReturnValue %61
OpFunctionEnd
%74 = OpFunction %66 None %72
%73 = OpFunctionParameter %41
%75 = OpLabel
%77 = OpVariable %76 Function
%78 = OpCooperativeMatrixLengthKHR %2 %66
OpBranch %79
%79 = OpLabel
%81 = OpPhi %2 %3 %75 %82 %79
%82 = OpIAdd %2 %81 %10
%83 = OpIAdd %2 %81 %7
%84 = OpInBoundsAccessChain %52 %73 %81
%85 = OpLoad %1 %84
%86 = OpInBoundsAccessChain %52 %73 %83
%87 = OpLoad %1 %86
%88 = OpCompositeConstruct %69 %85 %87
%89 = OpBitcast %70 %88
%90 = OpShiftRightLogical %70 %89 %92
%93 = OpShiftLeftLogical %70 %90 %95
%96 = OpShiftLeftLogical %70 %89 %98
%99 = OpISub %70 %96 %101
%102 = OpShiftRightArithmetic %70 %99 %104
%105 = OpISub %70 %102 %98
%106 = OpSNegate %70 %105
%107 = OpExtInst %70 %55 SMax %106 %109
%110 = OpBitwiseAnd %70 %105 %112
%113 = OpBitwiseAnd %70 %105 %115
%116 = OpBitwiseXor %70 %113 %118
%119 = OpBitwiseAnd %70 %99 %116
%120 = OpBitwiseOr %70 %119 %110
%121 = OpBitwiseAnd %70 %120 %123
%124 = OpShiftRightArithmetic %70 %120 %107
%125 = OpBitwiseOr %70 %124 %121
%126 = OpShiftRightLogical %70 %124 %128
%129 = OpBitwiseAnd %70 %126 %98
%130 = OpBitwiseOr %70 %129 %125
%131 = OpShiftLeftLogical %70 %130 %128
%132 = OpUGreaterThan %68 %131 %136
%137 = OpSelect %70 %132 %98 %109
%138 = OpIAdd %70 %126 %137
%139 = OpBitwiseAnd %70 %138 %123
%140 = OpBitwiseOr %70 %139 %93
%141 = OpUConvert %71 %140
%143 = OpInBoundsAccessChain %142 %77 %81
%144 = OpCompositeExtract %17 %141 0
OpStore %143 %144
%145 = OpInBoundsAccessChain %142 %77 %83
%146 = OpCompositeExtract %17 %141 1
OpStore %145 %146
%147 = OpULessThan %40 %82 %78
OpLoopMerge %80 %79 None
OpBranchConditional %147 %79 %80
%80 = OpLabel
%148 = OpLoad %66 %77
OpReturnValue %148
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_fp8.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _33)
{
    uint _41;
    _41 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _42 = _41 + 1u;
        coop_output[_41] = int16BitsToFloat16((int16_t(int8_t(_33[_41])) << 7s) & (-16385s));
        if (_42 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _41 = _42;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _68)
{
    uint _76;
    _76 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _77 = _76 + 1u;
        coop_output[_76] = int16BitsToFloat16((int16_t(int8_t(_68[_76])) << 7s) & (-16385s));
        if (_77 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _76 = _77;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _29;
    coopMatLoad(_29, _20._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _60 = _29;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _65;
    coopMatLoad(_65, _20._m0, 256u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _89 = _65;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _94;
    coopMatLoad(_94, _20._m0, 512u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopMatMulAdd(CoopMatFP8toFP16(_60), CoopMatFP8toFP16_1(_89), _94, 0), _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %34 "CoopMatFP8toFP16"
OpName %37 "coop_output"
OpName %69 "CoopMatFP8toFP16"
OpName %72 "coop_output"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%26 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %3
%27 = OpTypePointer StorageBuffer %16
%30 = OpTypeBool
%31 = OpTypePointer Function %26
%32 = OpTypeFunction %5 %31
%36 = OpTypePointer Function %5
%43 = OpTypePointer Function %16
%46 = OpTypeInt 16 1
%49 = OpConstant %46 7
%51 = OpConstant %46 -16385
%53 = OpTypePointer Function %1
%58 = OpConstant %1 0x1p+8
%62 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %7
%64 = OpConstant %2 256
%66 = OpTypePointer Function %62
%67 = OpTypeFunction %8 %66
%71 = OpTypePointer Function %8
%92 = OpConstant %2 512
%93 = OpConstant %2 64
%14 = OpFunction %12 None %13
%15 = OpLabel
%60 = OpVariable %31 Function
%89 = OpVariable %66 Function
OpBranch %97
%97 = OpLabel
%28 = OpAccessChain %27 %20 %3 %3
%29 = OpCooperativeMatrixLoadKHR %26 %28 %3 %4 NonPrivatePointer
OpStore %60 %29
%61 = OpFunctionCall %5 %34 %60
%63 = OpAccessChain %27 %20 %3 %64
%65 = OpCooperativeMatrixLoadKHR %62 %63 %3 %4 NonPrivatePointer
OpStore %89 %65
%90 = OpFunctionCall %8 %69 %89
%91 = OpAccessChain %27 %20 %3 %92
%94 = OpCooperativeMatrixLoadKHR %11 %91 %3 %93 NonPrivatePointer
%95 = OpCooperativeMatrixMulAddKHR %11 %61 %90 %94
%96 = OpAccessChain %27 %24 %3 %3
OpCooperativeMatrixStoreKHR %96 %95 %3 %93 NonPrivatePointer
OpReturn
OpFunctionEnd
%34 = OpFunction %5 None %32
%33 = OpFunctionParameter %31
%35 = OpLabel
%37 = OpVariable %36 Function
%38 = OpCooperativeMatrixLengthKHR %2 %26
OpBranch %39
%39 = OpLabel
%41 = OpPhi %2 %3 %35 %42 %39
%42 = OpIAdd %2 %41 %7
%44 = OpInBoundsAccessChain %43 %33 %41
%45 = OpLoad %16 %44
%47 = OpSConvert %46 %45
%48 = OpShiftLeftLogical %46 %47 %49
%50 = OpBitwiseAnd %46 %48 %51
%52 = OpBitcast %1 %50
%54 = OpInBoundsAccessChain %53 %37 %41
OpStore %54 %52
%55 = OpULessThan %30 %42 %38
OpLoopMerge %40 %39 None
OpBranchConditional %55 %39 %40
%40 = OpLabel
%56 = OpLoad %5 %37
%57 = OpMatrixTimesScalar %5 %56 %58
OpReturnValue %57
OpFunctionEnd
%69 = OpFunction %8 None %67
%68 = OpFunctionParameter %66
%70 = OpLabel
%72 = OpVariable %71 Function
%73 = OpCooperativeMatrixLengthKHR %2 %62
OpBranch %74
%74 = OpLabel
%76 = OpPhi %2 %3 %70 %77 %74
%77 = OpIAdd %2 %76 %7
%78 = OpInBoundsAccessChain %43 %68 %76
%79 = OpLoad %16 %78
%80 = OpSConvert %46 %79
%81 = OpShiftLeftLogical %46 %80 %49
%82 = OpBitwiseAnd %46 %81 %51
%83 = OpBitcast %1 %82
%84 = OpInBoundsAccessChain %53 %72 %76
OpStore %84 %83
%85 = OpULessThan %30 %77 %73
OpLoopMerge %75 %74 None
OpBranchConditional %85 %74 %75
%75 = OpLabel
%86 = OpLoad %8 %72
%87 = OpMatrixTimesScalar %8 %86 %58
OpReturnValue %87
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_fp8.sm66.ssbo.full-wmma.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_float_e4m3 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

void main()
{
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _28;
    coopMatLoad(_28, _20._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _31;
    coopMatLoad(_31, _20._m0, 256u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _35;
    coopMatLoad(_35, _20._m0, 512u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopMatMulAdd(_28, _31, _35, 0), _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
OpCapability Shader
OpCapability Int8
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability StorageBuffer8BitAccess
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24
OpExecutionMode %14 LocalSize 32 1 1
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 8 Float8E4M3EXT
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%26 = OpTypePointer StorageBuffer %16
%30 = OpConstant %2 256
%33 = OpConstant %2 512
%34 = OpConstant %2 64
%14 = OpFunction %12 None %13
%15 = OpLabel
OpBranch %38
%38 = OpLabel
%27 = OpAccessChain %26 %20 %3 %3
%28 = OpCooperativeMatrixLoadKHR %5 %27 %3 %4 NonPrivatePointer
%29 = OpAccessChain %26 %20 %3 %30
%31 = OpCooperativeMatrixLoadKHR %8 %29 %3 %4 NonPrivatePointer
%32 = OpAccessChain %26 %20 %3 %33
%35 = OpCooperativeMatrixLoadKHR %11 %32 %3 %34 NonPrivatePointer
%36 = OpCooperativeMatrixMulAddKHR %11 %28 %31 %35
%37 = OpAccessChain %26 %24 %3 %3
OpCooperativeMatrixStoreKHR %37 %36 %3 %34 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f16.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _34)
{
    uint _42;
    _42 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _43 = _42 + 1u;
        coop_output[_42] = int16BitsToFloat16((int16_t(int8_t(_34[_42])) << 7s) & (-16385s));
        if (_43 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _42 = _43;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _69)
{
    uint _77;
    _77 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _78 = _77 + 1u;
        coop_output[_77] = int16BitsToFloat16((int16_t(int8_t(_69[_77])) << 7s) & (-16385s));
        if (_78 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _77 = _78;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _30;
    coopMatLoad(_30, _21._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _61 = _30;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _66;
    coopMatLoad(_66, _21._m0, 256u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _90 = _66;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _95;
    coopMatLoad(_95, _21._m0, 512u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(CoopMatFP8toFP16(_61), CoopMatFP8toFP16_1(_90), _95, 0)), _25._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpName %35 "CoopMatFP8toFP16"
OpName %38 "coop_output"
OpName %70 "CoopMatFP8toFP16"
OpName %73 "coop_output"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %3
%28 = OpTypePointer StorageBuffer %17
%31 = OpTypeBool
%32 = OpTypePointer Function %27
%33 = OpTypeFunction %5 %32
%37 = OpTypePointer Function %5
%44 = OpTypePointer Function %17
%47 = OpTypeInt 16 1
%50 = OpConstant %47 7
%52 = OpConstant %47 -16385
%54 = OpTypePointer Function %1
%59 = OpConstant %1 0x1p+8
%63 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %7
%65 = OpConstant %2 256
%67 = OpTypePointer Function %63
%68 = OpTypeFunction %8 %67
%72 = OpTypePointer Function %8
%93 = OpConstant %2 512
%94 = OpConstant %2 64
%99 = OpConstant %2 32
%15 = OpFunction %13 None %14
%16 = OpLabel
%61 = OpVariable %32 Function
%90 = OpVariable %67 Function
OpBranch %100
%100 = OpLabel
%29 = OpAccessChain %28 %21 %3 %3
%30 = OpCooperativeMatrixLoadKHR %27 %29 %3 %4 NonPrivatePointer
OpStore %61 %30
%62 = OpFunctionCall %5 %35 %61
%64 = OpAccessChain %28 %21 %3 %65
%66 = OpCooperativeMatrixLoadKHR %63 %64 %3 %4 NonPrivatePointer
OpStore %90 %66
%91 = OpFunctionCall %8 %70 %90
%92 = OpAccessChain %28 %21 %3 %93
%95 = OpCooperativeMatrixLoadKHR %11 %92 %3 %94 NonPrivatePointer
%96 = OpCooperativeMatrixMulAddKHR %11 %62 %91 %95
%97 = OpFConvert %12 %96
%98 = OpAccessChain %28 %25 %3 %3
OpCooperativeMatrixStoreKHR %98 %97 %3 %99 NonPrivatePointer
OpReturn
OpFunctionEnd
%35 = OpFunction %5 None %33
%34 = OpFunctionParameter %32
%36 = OpLabel
%38 = OpVariable %37 Function
%39 = OpCooperativeMatrixLengthKHR %2 %27
OpBranch %40
%40 = OpLabel
%42 = OpPhi %2 %3 %36 %43 %40
%43 = OpIAdd %2 %42 %7
%45 = OpInBoundsAccessChain %44 %34 %42
%46 = OpLoad %17 %45
%48 = OpSConvert %47 %46
%49 = OpShiftLeftLogical %47 %48 %50
%51 = OpBitwiseAnd %47 %49 %52
%53 = OpBitcast %1 %51
%55 = OpInBoundsAccessChain %54 %38 %42
OpStore %55 %53
%56 = OpULessThan %31 %43 %39
OpLoopMerge %41 %40 None
OpBranchConditional %56 %40 %41
%41 = OpLabel
%57 = OpLoad %5 %38
%58 = OpMatrixTimesScalar %5 %57 %59
OpReturnValue %58
OpFunctionEnd
%70 = OpFunction %8 None %68
%69 = OpFunctionParameter %67
%71 = OpLabel
%73 = OpVariable %72 Function
%74 = OpCooperativeMatrixLengthKHR %2 %63
OpBranch %75
%75 = OpLabel
%77 = OpPhi %2 %3 %71 %78 %75
%78 = OpIAdd %2 %77 %7
%79 = OpInBoundsAccessChain %44 %69 %77
%80 = OpLoad %17 %79
%81 = OpSConvert %47 %80
%82 = OpShiftLeftLogical %47 %81 %50
%83 = OpBitwiseAnd %47 %82 %52
%84 = OpBitcast %1 %83
%85 = OpInBoundsAccessChain %54 %73 %77
OpStore %85 %84
%86 = OpULessThan %31 %78 %74
OpLoopMerge %76 %75 None
OpBranchConditional %86 %75 %76
%76 = OpLabel
%87 = OpLoad %8 %73
%88 = OpMatrixTimesScalar %8 %87 %59
OpReturnValue %88
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f16_strided.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _35)
{
    uint _43;
    _43 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _44 = _43 + 1u;
        coop_output[_43] = int16BitsToFloat16((int16_t(int8_t(_35[_43])) << 7s) & (-16385s));
        if (_44 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _43 = _44;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _70)
{
    uint _78;
    _78 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _79 = _78 + 1u;
        coop_output[_78] = int16BitsToFloat16((int16_t(int8_t(_70[_78])) << 7s) & (-16385s));
        if (_79 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _78 = _79;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _31;
    coopMatLoad(_31, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _62 = _31;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _67;
    coopMatLoad(_67, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _91 = _67;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _96;
    coopMatLoad(_96, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(CoopMatFP8toFP16(_62), CoopMatFP8toFP16_1(_91), _96, 0)), _25._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpName %36 "CoopMatFP8toFP16"
OpName %39 "coop_output"
OpName %71 "CoopMatFP8toFP16"
OpName %74 "coop_output"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %3
%28 = OpTypePointer StorageBuffer %17
%30 = OpConstant %2 32
%32 = OpTypeBool
%33 = OpTypePointer Function %27
%34 = OpTypeFunction %5 %33
%38 = OpTypePointer Function %5
%45 = OpTypePointer Function %17
%48 = OpTypeInt 16 1
%51 = OpConstant %48 7
%53 = OpConstant %48 -16385
%55 = OpTypePointer Function %1
%60 = OpConstant %1 0x1p+8
%64 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %7
%66 = OpConstant %2 512
%68 = OpTypePointer Function %64
%69 = OpTypeFunction %8 %68
%73 = OpTypePointer Function %8
%94 = OpConstant %2 1024
%95 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
%62 = OpVariable %33 Function
%91 = OpVariable %68 Function
OpBranch %100
%100 = OpLabel
%29 = OpAccessChain %28 %21 %3 %3
%31 = OpCooperativeMatrixLoadKHR %27 %29 %3 %30 NonPrivatePointer
OpStore %62 %31
%63 = OpFunctionCall %5 %36 %62
%65 = OpAccessChain %28 %21 %3 %66
%67 = OpCooperativeMatrixLoadKHR %64 %65 %3 %30 NonPrivatePointer
OpStore %91 %67
%92 = OpFunctionCall %8 %71 %91
%93 = OpAccessChain %28 %21 %3 %94
%96 = OpCooperativeMatrixLoadKHR %11 %93 %3 %95 NonPrivatePointer
%97 = OpCooperativeMatrixMulAddKHR %11 %63 %92 %96
%98 = OpFConvert %12 %97
%99 = OpAccessChain %28 %25 %3 %3
OpCooperativeMatrixStoreKHR %99 %98 %3 %95 NonPrivatePointer
OpReturn
OpFunctionEnd
%36 = OpFunction %5 None %34
%35 = OpFunctionParameter %33
%37 = OpLabel
%39 = OpVariable %38 Function
%40 = OpCooperativeMatrixLengthKHR %2 %27
OpBranch %41
%41 = OpLabel
%43 = OpPhi %2 %3 %37 %44 %41
%44 = OpIAdd %2 %43 %7
%46 = OpInBoundsAccessChain %45 %35 %43
%47 = OpLoad %17 %46
%49 = OpSConvert %48 %47
%50 = OpShiftLeftLogical %48 %49 %51
%52 = OpBitwiseAnd %48 %50 %53
%54 = OpBitcast %1 %52
%56 = OpInBoundsAccessChain %55 %39 %43
OpStore %56 %54
%57 = OpULessThan %32 %44 %40
OpLoopMerge %42 %41 None
OpBranchConditional %57 %41 %42
%42 = OpLabel
%58 = OpLoad %5 %39
%59 = OpMatrixTimesScalar %5 %58 %60
OpReturnValue %59
OpFunctionEnd
%71 = OpFunction %8 None %69
%70 = OpFunctionParameter %68
%72 = OpLabel
%74 = OpVariable %73 Function
%75 = OpCooperativeMatrixLengthKHR %2 %64
OpBranch %76
%76 = OpLabel
%78 = OpPhi %2 %3 %72 %79 %76
%79 = OpIAdd %2 %78 %7
%80 = OpInBoundsAccessChain %45 %70 %78
%81 = OpLoad %17 %80
%82 = OpSConvert %48 %81
%83 = OpShiftLeftLogical %48 %82 %51
%84 = OpBitwiseAnd %48 %83 %53
%85 = OpBitcast %1 %84
%86 = OpInBoundsAccessChain %55 %74 %78
OpStore %86 %85
%87 = OpULessThan %32 %79 %75
OpLoopMerge %77 %76 None
OpBranchConditional %87 %76 %77
%77 = OpLabel
%88 = OpLoad %8 %74
%89 = OpMatrixTimesScalar %8 %88 %60
OpReturnValue %89
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f16_strided_transpose.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _21;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    uint8_t _m0[];
} _25;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _35)
{
    uint _43;
    _43 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _44 = _43 + 1u;
        coop_output[_43] = int16BitsToFloat16((int16_t(int8_t(_35[_43])) << 7s) & (-16385s));
        if (_44 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _43 = _44;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _70)
{
    uint _78;
    _78 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _79 = _78 + 1u;
        coop_output[_78] = int16BitsToFloat16((int16_t(int8_t(_70[_78])) << 7s) & (-16385s));
        if (_79 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _78 = _79;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _31;
    coopMatLoad(_31, _21._m0, 0u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _62 = _31;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _67;
    coopMatLoad(_67, _21._m0, 512u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _91 = _67;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _96;
    coopMatLoad(_96, _21._m0, 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopMatMulAdd(CoopMatFP8toFP16(_62), CoopMatFP8toFP16_1(_91), _96, 0)), _25._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %21 %25
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %19 "SSBO"
OpName %23 "SSBO"
OpName %36 "CoopMatFP8toFP16"
OpName %39 "coop_output"
OpName %71 "CoopMatFP8toFP16"
OpName %74 "coop_output"
OpDecorate %18 ArrayStride 1
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 1
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %10
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeInt 8 0
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %17
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%27 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %3
%28 = OpTypePointer StorageBuffer %17
%30 = OpConstant %2 32
%32 = OpTypeBool
%33 = OpTypePointer Function %27
%34 = OpTypeFunction %5 %33
%38 = OpTypePointer Function %5
%45 = OpTypePointer Function %17
%48 = OpTypeInt 16 1
%51 = OpConstant %48 7
%53 = OpConstant %48 -16385
%55 = OpTypePointer Function %1
%60 = OpConstant %1 0x1p+8
%64 = OpTypeCooperativeMatrixKHR %17 %6 %4 %4 %7
%66 = OpConstant %2 512
%68 = OpTypePointer Function %64
%69 = OpTypeFunction %8 %68
%73 = OpTypePointer Function %8
%94 = OpConstant %2 1024
%95 = OpConstant %2 64
%15 = OpFunction %13 None %14
%16 = OpLabel
%62 = OpVariable %33 Function
%91 = OpVariable %68 Function
OpBranch %100
%100 = OpLabel
%29 = OpAccessChain %28 %21 %3 %3
%31 = OpCooperativeMatrixLoadKHR %27 %29 %7 %30 NonPrivatePointer
OpStore %62 %31
%63 = OpFunctionCall %5 %36 %62
%65 = OpAccessChain %28 %21 %3 %66
%67 = OpCooperativeMatrixLoadKHR %64 %65 %7 %30 NonPrivatePointer
OpStore %91 %67
%92 = OpFunctionCall %8 %71 %91
%93 = OpAccessChain %28 %21 %3 %94
%96 = OpCooperativeMatrixLoadKHR %11 %93 %7 %95 NonPrivatePointer
%97 = OpCooperativeMatrixMulAddKHR %11 %63 %92 %96
%98 = OpFConvert %12 %97
%99 = OpAccessChain %28 %25 %3 %3
OpCooperativeMatrixStoreKHR %99 %98 %7 %95 NonPrivatePointer
OpReturn
OpFunctionEnd
%36 = OpFunction %5 None %34
%35 = OpFunctionParameter %33
%37 = OpLabel
%39 = OpVariable %38 Function
%40 = OpCooperativeMatrixLengthKHR %2 %27
OpBranch %41
%41 = OpLabel
%43 = OpPhi %2 %3 %37 %44 %41
%44 = OpIAdd %2 %43 %7
%46 = OpInBoundsAccessChain %45 %35 %43
%47 = OpLoad %17 %46
%49 = OpSConvert %48 %47
%50 = OpShiftLeftLogical %48 %49 %51
%52 = OpBitwiseAnd %48 %50 %53
%54 = OpBitcast %1 %52
%56 = OpInBoundsAccessChain %55 %39 %43
OpStore %56 %54
%57 = OpULessThan %32 %44 %40
OpLoopMerge %42 %41 None
OpBranchConditional %57 %41 %42
%42 = OpLabel
%58 = OpLoad %5 %39
%59 = OpMatrixTimesScalar %5 %58 %60
OpReturnValue %59
OpFunctionEnd
%71 = OpFunction %8 None %69
%70 = OpFunctionParameter %68
%72 = OpLabel
%74 = OpVariable %73 Function
%75 = OpCooperativeMatrixLengthKHR %2 %64
OpBranch %76
%76 = OpLabel
%78 = OpPhi %2 %3 %72 %79 %76
%79 = OpIAdd %2 %78 %7
%80 = OpInBoundsAccessChain %45 %70 %78
%81 = OpLoad %17 %80
%82 = OpSConvert %48 %81
%83 = OpShiftLeftLogical %48 %82 %51
%84 = OpBitwiseAnd %48 %83 %53
%85 = OpBitcast %1 %84
%86 = OpInBoundsAccessChain %55 %74 %78
OpStore %86 %85
%87 = OpULessThan %32 %79 %75
OpLoopMerge %77 %76 None
OpBranchConditional %87 %76 %77
%77 = OpLabel
%88 = OpLoad %8 %74
%89 = OpMatrixTimesScalar %8 %88 %60
OpReturnValue %89
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f32.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _33)
{
    uint _41;
    _41 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _42 = _41 + 1u;
        coop_output[_41] = int16BitsToFloat16((int16_t(int8_t(_33[_41])) << 7s) & (-16385s));
        if (_42 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _41 = _42;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _68)
{
    uint _76;
    _76 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _77 = _76 + 1u;
        coop_output[_76] = int16BitsToFloat16((int16_t(int8_t(_68[_76])) << 7s) & (-16385s));
        if (_77 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _76 = _77;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _29;
    coopMatLoad(_29, _20._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _60 = _29;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _65;
    coopMatLoad(_65, _20._m0, 256u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _89 = _65;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _94;
    coopMatLoad(_94, _20._m0, 512u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopMatMulAdd(CoopMatFP8toFP16(_60), CoopMatFP8toFP16_1(_89), _94, 0), _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %34 "CoopMatFP8toFP16"
OpName %37 "coop_output"
OpName %69 "CoopMatFP8toFP16"
OpName %72 "coop_output"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%26 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %3
%27 = OpTypePointer StorageBuffer %16
%30 = OpTypeBool
%31 = OpTypePointer Function %26
%32 = OpTypeFunction %5 %31
%36 = OpTypePointer Function %5
%43 = OpTypePointer Function %16
%46 = OpTypeInt 16 1
%49 = OpConstant %46 7
%51 = OpConstant %46 -16385
%53 = OpTypePointer Function %1
%58 = OpConstant %1 0x1p+8
%62 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %7
%64 = OpConstant %2 256
%66 = OpTypePointer Function %62
%67 = OpTypeFunction %8 %66
%71 = OpTypePointer Function %8
%92 = OpConstant %2 512
%93 = OpConstant %2 64
%14 = OpFunction %12 None %13
%15 = OpLabel
%60 = OpVariable %31 Function
%89 = OpVariable %66 Function
OpBranch %97
%97 = OpLabel
%28 = OpAccessChain %27 %20 %3 %3
%29 = OpCooperativeMatrixLoadKHR %26 %28 %3 %4 NonPrivatePointer
OpStore %60 %29
%61 = OpFunctionCall %5 %34 %60
%63 = OpAccessChain %27 %20 %3 %64
%65 = OpCooperativeMatrixLoadKHR %62 %63 %3 %4 NonPrivatePointer
OpStore %89 %65
%90 = OpFunctionCall %8 %69 %89
%91 = OpAccessChain %27 %20 %3 %92
%94 = OpCooperativeMatrixLoadKHR %11 %91 %3 %93 NonPrivatePointer
%95 = OpCooperativeMatrixMulAddKHR %11 %61 %90 %94
%96 = OpAccessChain %27 %24 %3 %3
OpCooperativeMatrixStoreKHR %96 %95 %3 %93 NonPrivatePointer
OpReturn
OpFunctionEnd
%34 = OpFunction %5 None %32
%33 = OpFunctionParameter %31
%35 = OpLabel
%37 = OpVariable %36 Function
%38 = OpCooperativeMatrixLengthKHR %2 %26
OpBranch %39
%39 = OpLabel
%41 = OpPhi %2 %3 %35 %42 %39
%42 = OpIAdd %2 %41 %7
%44 = OpInBoundsAccessChain %43 %33 %41
%45 = OpLoad %16 %44
%47 = OpSConvert %46 %45
%48 = OpShiftLeftLogical %46 %47 %49
%50 = OpBitwiseAnd %46 %48 %51
%52 = OpBitcast %1 %50
%54 = OpInBoundsAccessChain %53 %37 %41
OpStore %54 %52
%55 = OpULessThan %30 %42 %38
OpLoopMerge %40 %39 None
OpBranchConditional %55 %39 %40
%40 = OpLabel
%56 = OpLoad %5 %37
%57 = OpMatrixTimesScalar %5 %56 %58
OpReturnValue %57
OpFunctionEnd
%69 = OpFunction %8 None %67
%68 = OpFunctionParameter %66
%70 = OpLabel
%72 = OpVariable %71 Function
%73 = OpCooperativeMatrixLengthKHR %2 %62
OpBranch %74
%74 = OpLabel
%76 = OpPhi %2 %3 %70 %77 %74
%77 = OpIAdd %2 %76 %7
%78 = OpInBoundsAccessChain %43 %68 %76
%79 = OpLoad %16 %78
%80 = OpSConvert %46 %79
%81 = OpShiftLeftLogical %46 %80 %49
%82 = OpBitwiseAnd %46 %81 %51
%83 = OpBitcast %1 %82
%84 = OpInBoundsAccessChain %53 %72 %76
OpStore %84 %83
%85 = OpULessThan %30 %77 %73
OpLoopMerge %75 %74 None
OpBranchConditional %85 %74 %75
%75 = OpLabel
%86 = OpLoad %8 %72
%87 = OpMatrixTimesScalar %8 %86 %58
OpReturnValue %87
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_fp16_fp8_conversions.sm66.ssbo.full-wmma.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_float_e4m3 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uvec2 _m0[];
} _17;

void main()
{
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _36 = coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(uint16BitsToFloat16(uint16_t(gl_WorkGroupID.x))));
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _37 = coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(uint16BitsToFloat16(uint16_t(gl_WorkGroupID.x | 32768u))));
    if (gl_LocalInvocationIndex == 0u)
    {
        _17._m0[gl_WorkGroupID.x] = uvec2((fe4m3vec4(_36[0], _36[1], _36[2], _36[3])), (fe4m3vec4(_37[0], _37[1], _37[2], _37[3])));
    }
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %11 "main" %17 %19 %23
OpExecutionMode %11 LocalSize 32 1 1
OpExecutionMode %11 DenormPreserve 16
OpName %11 "main"
OpName %15 "SSBO"
OpDecorate %14 ArrayStride 8
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 NonReadable
OpDecorate %19 BuiltIn LocalInvocationIndex
OpDecorate %23 BuiltIn WorkgroupId
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpTypeFloat 8 Float8E4M3EXT
%8 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %3
%9 = OpTypeVoid
%10 = OpTypeFunction %9
%13 = OpTypeVector %2 2
%14 = OpTypeRuntimeArray %13
%15 = OpTypeStruct %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypePointer Input %2
%19 = OpVariable %18 Input
%21 = OpTypeVector %2 3
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%27 = OpTypeInt 16 0
%32 = OpConstant %2 32768
%42 = OpTypeVector %7 4
%51 = OpTypeBool
%55 = OpTypePointer StorageBuffer %13
%11 = OpFunction %9 None %10
%12 = OpLabel
OpBranch %57
%57 = OpLabel
%20 = OpLoad %2 %19
%24 = OpAccessChain %18 %23 %3
%25 = OpLoad %2 %24
%28 = OpUConvert %27 %25
%29 = OpBitcast %1 %28
%30 = OpCompositeConstruct %5 %29
%31 = OpBitwiseOr %2 %25 %32
%33 = OpUConvert %27 %31
%34 = OpBitcast %1 %33
%35 = OpCompositeConstruct %5 %34
%36 = OpFConvert %8 %30
%37 = OpFConvert %8 %35
%38 = OpCompositeExtract %7 %36 0
%39 = OpCompositeExtract %7 %36 1
%40 = OpCompositeExtract %7 %36 2
%41 = OpCompositeExtract %7 %36 3
%43 = OpCompositeConstruct %42 %38 %39 %40 %41
%44 = OpBitcast %2 %43
%45 = OpCompositeExtract %7 %37 0
%46 = OpCompositeExtract %7 %37 1
%47 = OpCompositeExtract %7 %37 2
%48 = OpCompositeExtract %7 %37 3
%49 = OpCompositeConstruct %42 %45 %46 %47 %48
%50 = OpBitcast %2 %49
%52 = OpIEqual %51 %20 %3
OpSelectionMerge %59 None
OpBranchConditional %52 %58 %59
%58 = OpLabel
%53 = OpShiftLeftLogical %2 %25 %6
%54 = OpCompositeConstruct %13 %44 %50
%56 = OpAccessChain %55 %17 %3 %25
OpStore %56 %54 NonPrivatePointer
OpBranch %59
%59 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_fp32_fp16_conversions.sm66.ssbo.full-wmma.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_NV_cooperative_matrix2 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _17;

void main()
{
    if (gl_LocalInvocationIndex == 0u)
    {
        _17._m0[gl_WorkGroupID.x >> 2u] = uint(float16BitsToUint16((coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(uintBitsToFloat(gl_WorkGroupID.x))))[0]));
    }
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixConversionsNV
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_cooperative_matrix2"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %12 "main" %17 %19 %23
OpExecutionMode %12 LocalSize 32 1 1
OpExecutionMode %12 DenormPreserve 16
OpName %12 "main"
OpName %15 "SSBO"
OpDecorate %14 ArrayStride 4
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 NonReadable
OpDecorate %19 BuiltIn LocalInvocationIndex
OpDecorate %23 BuiltIn WorkgroupId
%1 = OpTypeFloat 32
%2 = OpTypeInt 32 0
%3 = OpConstant %2 2
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpTypeFloat 16
%8 = OpConstant %2 1
%9 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %8
%10 = OpTypeVoid
%11 = OpTypeFunction %10
%14 = OpTypeRuntimeArray %2
%15 = OpTypeStruct %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypePointer Input %2
%19 = OpVariable %18 Input
%21 = OpTypeVector %2 3
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%25 = OpConstant %2 0
%32 = OpTypeInt 16 0
%35 = OpTypeBool
%38 = OpTypePointer StorageBuffer %2
%12 = OpFunction %10 None %11
%13 = OpLabel
OpBranch %40
%40 = OpLabel
%20 = OpLoad %2 %19
%24 = OpAccessChain %18 %23 %25
%26 = OpLoad %2 %24
%28 = OpBitcast %1 %26
%29 = OpCompositeConstruct %5 %28
%30 = OpFConvert %9 %29
%31 = OpCompositeExtract %7 %30 0
%33 = OpBitcast %32 %31
%34 = OpUConvert %2 %33
%36 = OpIEqual %35 %20 %25
OpSelectionMerge %42 None
OpBranchConditional %36 %41 %42
%41 = OpLabel
%37 = OpShiftRightLogical %2 %26 %3
%39 = OpAccessChain %38 %17 %25 %37
OpStore %39 %34 NonPrivatePointer
OpBranch %42
%42 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_fp32_fp8_conversions.sm66.ssbo.nv-coopmat2.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_spirv_intrinsics : require
#extension GL_NV_cooperative_matrix2 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint8_t _m0[];
} _18;

spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> CoopMatSaturateFP8(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _34)
{
    uint _41;
    _41 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> coop_output;
    for (;;)
    {
        uint _42 = _41 + 1u;
        coop_output[_41] = spvNClamp(_34[_41], float16_t(-448.0), float16_t(448.0));
        if (_42 < uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length()))
        {
            _41 = _42;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP16toFP8(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _67)
{
    uint _75;
    _75 = 0u;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _76 = _75 + 2u;
        uint _77 = _75 + 1u;
        i16vec2 _83 = float16BitsToInt16(f16vec2(_67[_75], _67[_77]));
        i16vec2 _93 = (_83 << i16vec2(1)) - i16vec2(16384);
        i16vec2 _99 = (_93 >> i16vec2(11)) - i16vec2(1);
        i16vec2 _114 = (_93 & ((_99 & i16vec2(-2048)) ^ i16vec2(-1))) | (_99 & i16vec2(2048));
        i16vec2 _118 = _114 >> max((-_99), i16vec2(0));
        i16vec2 _120 = i16vec2(u16vec2(_118) >> u16vec2(i16vec2(8)));
        u8vec2 _135 = u8vec2(((_120 + i16vec2(greaterThan(u16vec2(((_120 & i16vec2(1)) | (_118 | (_114 & i16vec2(127)))) << i16vec2(8)), u16vec2(32768)))) & i16vec2(127)) | (i16vec2(u16vec2(_83) >> u16vec2(i16vec2(15))) << i16vec2(7)));
        coop_output[_75] = _135.x;
        coop_output[_77] = _135.y;
        if (_76 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _75 = _76;
        }
        else
        {
            break;
        }
    }
    return coop_output;
}

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _54 = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(uintBitsToFloat(gl_WorkGroupID.x)));
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _144 = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(CoopMatSaturateFP8(_54));
    coopMatStore(CoopMatFP16toFP8(_144), _18._m0, 0u, 16u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 148
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixConversionsNV
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_cooperative_matrix2"
%46 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %12 "main" %18 %21
OpExecutionMode %12 LocalSize 32 1 1
OpExecutionMode %12 DenormPreserve 16
OpName %12 "main"
OpName %16 "SSBO"
OpName %35 "CoopMatSaturateFP8"
OpName %37 "coop_output"
OpName %68 "CoopMatFP16toFP8"
OpName %71 "coop_output"
OpDecorate %15 ArrayStride 1
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 NonReadable
OpDecorate %21 BuiltIn WorkgroupId
%1 = OpTypeFloat 32
%2 = OpTypeInt 32 0
%3 = OpConstant %2 2
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpTypeFloat 16
%8 = OpConstant %2 1
%9 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %8
%10 = OpTypeVoid
%11 = OpTypeFunction %10
%14 = OpTypeInt 8 0
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeVector %2 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypePointer Input %2
%24 = OpConstant %2 0
%29 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %3
%31 = OpTypeBool
%32 = OpTypePointer Function %29
%33 = OpTypeFunction %29 %32
%43 = OpTypePointer Function %7
%48 = OpConstant %7 -0x1.cp+8
%49 = OpConstant %7 0x1.cp+8
%57 = OpTypePointer StorageBuffer %14
%59 = OpTypeCooperativeMatrixKHR %14 %6 %4 %4 %8
%60 = OpTypeInt 16 1
%61 = OpTypeVector %31 2
%62 = OpTypeVector %7 2
%63 = OpTypeVector %60 2
%64 = OpTypeVector %14 2
%65 = OpTypePointer Function %9
%66 = OpTypeFunction %59 %65
%70 = OpTypePointer Function %59
%85 = OpConstant %60 15
%86 = OpConstantComposite %63 %85 %85
%88 = OpConstant %60 7
%89 = OpConstantComposite %63 %88 %88
%91 = OpConstant %60 1
%92 = OpConstantComposite %63 %91 %91
%94 = OpConstant %60 16384
%95 = OpConstantComposite %63 %94 %94
%97 = OpConstant %60 11
%98 = OpConstantComposite %63 %97 %97
%102 = OpConstant %60 0
%103 = OpConstantComposite %63 %102 %102
%105 = OpConstant %60 2048
%106 = OpConstantComposite %63 %105 %105
%108 = OpConstant %60 -2048
%109 = OpConstantComposite %63 %108 %108
%111 = OpConstant %60 -1
%112 = OpConstantComposite %63 %111 %111
%116 = OpConstant %60 127
%117 = OpConstantComposite %63 %116 %116
%121 = OpConstant %60 8
%122 = OpConstantComposite %63 %121 %121
%127 = OpTypeInt 16 0
%128 = OpConstant %127 32768
%129 = OpTypeVector %127 2
%130 = OpConstantComposite %129 %128 %128
%136 = OpTypePointer Function %14
%12 = OpFunction %10 None %11
%13 = OpLabel
%54 = OpVariable %32 Function
%144 = OpVariable %65 Function
OpBranch %146
%146 = OpLabel
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %2 %23
%27 = OpBitcast %1 %25
%28 = OpCompositeConstruct %5 %27
%30 = OpFConvert %29 %28
OpStore %54 %30
%55 = OpFunctionCall %29 %35 %54
%56 = OpCooperativeMatrixConvertNV %9 %55
%58 = OpAccessChain %57 %18 %24 %24
OpStore %144 %56
%145 = OpFunctionCall %59 %68 %144
OpCooperativeMatrixStoreKHR %58 %145 %8 %4 NonPrivatePointer
OpReturn
OpFunctionEnd
%35 = OpFunction %29 None %33
%34 = OpFunctionParameter %32
%36 = OpLabel
%37 = OpVariable %32 Function
%38 = OpCooperativeMatrixLengthKHR %2 %29
OpBranch %39
%39 = OpLabel
%41 = OpPhi %2 %24 %36 %42 %39
%42 = OpIAdd %2 %41 %8
%44 = OpInBoundsAccessChain %43 %34 %41
%45 = OpLoad %7 %44
%47 = OpExtInst %7 %46 NClamp %45 %48 %49
%50 = OpInBoundsAccessChain %43 %37 %41
OpStore %50 %47
%51 = OpULessThan %31 %42 %38
OpLoopMerge %40 %39 None
OpBranchConditional %51 %39 %40
%40 = OpLabel
%52 = OpLoad %29 %37
OpReturnValue %52
OpFunctionEnd
%68 = OpFunction %59 None %66
%67 = OpFunctionParameter %65
%69 = OpLabel
%71 = OpVariable %70 Function
%72 = OpCooperativeMatrixLengthKHR %2 %59
OpBranch %73
%73 = OpLabel
%75 = OpPhi %2 %24 %69 %76 %73
%76 = OpIAdd %2 %75 %3
%77 = OpIAdd %2 %75 %8
%78 = OpInBoundsAccessChain %43 %67 %75
%79 = OpLoad %7 %78
%80 = OpInBoundsAccessChain %43 %67 %77
%81 = OpLoad %7 %80
%82 = OpCompositeConstruct %62 %79 %81
%83 = OpBitcast %63 %82
%84 = OpShiftRightLogical %63 %83 %86
%87 = OpShiftLeftLogical %63 %84 %89
%90 = OpShiftLeftLogical %63 %83 %92
%93 = OpISub %63 %90 %95
%96 = OpShiftRightArithmetic %63 %93 %98
%99 = OpISub %63 %96 %92
%100 = OpSNegate %63 %99
%101 = OpExtInst %63 %46 SMax %100 %103
%104 = OpBitwiseAnd %63 %99 %106
%107 = OpBitwiseAnd %63 %99 %109
%110 = OpBitwiseXor %63 %107 %112
%113 = OpBitwiseAnd %63 %93 %110
%114 = OpBitwiseOr %63 %113 %104
%115 = OpBitwiseAnd %63 %114 %117
%118 = OpShiftRightArithmetic %63 %114 %101
%119 = OpBitwiseOr %63 %118 %115
%120 = OpShiftRightLogical %63 %118 %122
%123 = OpBitwiseAnd %63 %120 %92
%124 = OpBitwiseOr %63 %123 %119
%125 = OpShiftLeftLogical %63 %124 %122
%126 = OpUGreaterThan %61 %125 %130
%131 = OpSelect %63 %126 %92 %103
%132 = OpIAdd %63 %120 %131
%133 = OpBitwiseAnd %63 %132 %117
%134 = OpBitwiseOr %63 %133 %87
%135 = OpUConvert %64 %134
%137 = OpInBoundsAccessChain %136 %71 %75
%138 = OpCompositeExtract %14 %135 0
OpStore %137 %138
%139 = OpInBoundsAccessChain %136 %71 %77
%140 = OpCompositeExtract %14 %135 1
OpStore %139 %140
%141 = OpULessThan %31 %76 %72
OpLoopMerge %74 %73 None
OpBranchConditional %141 %73 %74
%74 = OpLabel
%142 = OpLoad %59 %71
OpReturnValue %142
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_fp8_fp32_conversions.sm66.ssbo.full-wmma.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_float_e4m3 : require
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _20;

shared uint _24[512];

void main()
{
    coopMatStore(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(uintBitsToFloate4m3EXT(uint8_t(gl_WorkGroupID.x)))), _24, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    barrier();
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _44;
    coopMatLoad(_44, _24, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    if (gl_LocalInvocationIndex == 0u)
    {
        _20._m0[gl_WorkGroupID.x] = floatBitsToUint((coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(_44))[0]);
    }
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %15 "main" %20 %24 %26 %30
OpExecutionMode %15 LocalSize 32 1 1
OpExecutionMode %15 DenormPreserve 16
OpName %15 "main"
OpName %18 "SSBO"
OpDecorate %17 ArrayStride 4
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonReadable
OpDecorate %26 BuiltIn LocalInvocationIndex
OpDecorate %30 BuiltIn WorkgroupId
%1 = OpTypeFloat 8 Float8E4M3EXT
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpTypeFloat 16
%8 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %3
%9 = OpConstant %2 2
%10 = OpTypeCooperativeMatrixKHR %7 %6 %4 %4 %9
%11 = OpTypeFloat 32
%12 = OpTypeCooperativeMatrixKHR %11 %6 %4 %4 %9
%13 = OpTypeVoid
%14 = OpTypeFunction %13
%17 = OpTypeRuntimeArray %2
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpConstant %2 512
%22 = OpTypeArray %2 %21
%23 = OpTypePointer Workgroup %22
%24 = OpVariable %23 Workgroup
%25 = OpTypePointer Input %2
%26 = OpVariable %25 Input
%28 = OpTypeVector %2 3
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%34 = OpTypeInt 8 0
%39 = OpTypePointer Workgroup %2
%41 = OpConstant %2 32
%42 = OpConstant %2 24840
%48 = OpTypeBool
%51 = OpTypePointer StorageBuffer %2
%15 = OpFunction %13 None %14
%16 = OpLabel
OpBranch %53
%53 = OpLabel
%27 = OpLoad %2 %26
%31 = OpAccessChain %25 %30 %3
%32 = OpLoad %2 %31
%35 = OpUConvert %34 %32
%36 = OpBitcast %1 %35
%37 = OpCompositeConstruct %5 %36
%38 = OpFConvert %8 %37
%40 = OpAccessChain %39 %24 %3
OpCooperativeMatrixStoreKHR %40 %38 %3 %41 NonPrivatePointer
OpControlBarrier %9 %9 %42
%43 = OpAccessChain %39 %24 %3
OpControlBarrier %6 %6 %42
%44 = OpCooperativeMatrixLoadKHR %10 %43 %3 %41 NonPrivatePointer
OpControlBarrier %6 %6 %42
%45 = OpFConvert %12 %44
%46 = OpCompositeExtract %11 %45 0
%47 = OpBitcast %2 %46
%49 = OpIEqual %48 %27 %3
OpSelectionMerge %55 None
OpBranchConditional %49 %54 %55
%54 = OpLabel
%50 = OpShiftLeftLogical %2 %32 %9
%52 = OpAccessChain %51 %20 %3 %32
OpStore %52 %47 NonPrivatePointer
OpBranch %55
%55 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_lds_transpose.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

shared uint _28[512];

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _33;
    coopMatLoad(_33, _20._m0, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(_33, _28, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    barrier();
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _38;
    coopMatLoad(_38, _28, 0u, 32u, gl_CooperativeMatrixLayoutColumnMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _40;
    coopMatLoad(_40, _28, 0u, 32u, gl_CooperativeMatrixLayoutRowMajor);
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    coopMatStore(coopMatMulAdd(_38, _40, coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0.0), 0), _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24 %28
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpConstant %2 512
%26 = OpTypeArray %2 %25
%27 = OpTypePointer Workgroup %26
%28 = OpVariable %27 Workgroup
%30 = OpTypePointer StorageBuffer %16
%32 = OpConstant %2 32
%34 = OpTypePointer Workgroup %2
%36 = OpConstant %2 24840
%41 = OpConstant %9 0
%42 = OpConstantComposite %11 %41
%45 = OpConstant %2 64
%14 = OpFunction %12 None %13
%15 = OpLabel
OpBranch %46
%46 = OpLabel
%31 = OpAccessChain %30 %20 %3 %3
%33 = OpCooperativeMatrixLoadKHR %5 %31 %3 %32 NonPrivatePointer
%35 = OpAccessChain %34 %28 %3
OpCooperativeMatrixStoreKHR %35 %33 %3 %32 NonPrivatePointer
OpControlBarrier %10 %10 %36
%37 = OpAccessChain %34 %28 %3
OpControlBarrier %6 %6 %36
%38 = OpCooperativeMatrixLoadKHR %5 %37 %7 %32 NonPrivatePointer
OpControlBarrier %6 %6 %36
%39 = OpAccessChain %34 %28 %3
OpControlBarrier %6 %6 %36
%40 = OpCooperativeMatrixLoadKHR %8 %39 %3 %32 NonPrivatePointer
OpControlBarrier %6 %6 %36
%43 = OpCooperativeMatrixMulAddKHR %11 %38 %40 %42
%44 = OpAccessChain %30 %24 %3 %3
OpCooperativeMatrixStoreKHR %44 %43 %3 %45 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_matrix_length.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _9;

void main()
{
    uint _13[10];
    _13[0u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length());
    _13[1u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length());
    _13[2u] = uint(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length());
    _13[3u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()) / 4u;
    _13[4u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()) / 4u;
    _13[5u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length());
    _13[6u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length());
    _13[7u] = uint(coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0).length());
    _13[8u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()) / 4u;
    _13[9u] = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()) / 4u;
    uint _56;
    uint _58;
    _56 = uint(coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length());
    _58 = 0u;
    uint _59;
    for (;;)
    {
        _9._m0[_58] = _56;
        _59 = _58 + 1u;
        if (_59 == 10u)
        {
            break;
        }
        else
        {
            _56 = _13[_59];
            _58 = _59;
            continue;
        }
    }
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %9
OpExecutionMode %3 LocalSize 32 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpConstant %5 10
%11 = OpTypeArray %5 %10
%12 = OpTypePointer Function %11
%15 = OpTypeFloat 16
%16 = OpConstant %5 0
%17 = OpConstant %5 16
%19 = OpConstant %5 3
%18 = OpTypeCooperativeMatrixKHR %15 %19 %17 %17 %16
%21 = OpTypePointer Function %5
%23 = OpConstant %5 1
%24 = OpTypeCooperativeMatrixKHR %15 %19 %17 %17 %23
%27 = OpTypeFloat 32
%28 = OpConstant %5 2
%29 = OpTypeCooperativeMatrixKHR %27 %19 %17 %17 %28
%34 = OpConstant %5 4
%41 = OpConstant %5 5
%44 = OpConstant %5 6
%47 = OpConstant %5 7
%51 = OpConstant %5 8
%55 = OpConstant %5 9
%61 = OpTypePointer StorageBuffer %5
%63 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
%13 = OpVariable %12 Function
OpBranch %66
%66 = OpLabel
%20 = OpCooperativeMatrixLengthKHR %5 %18
%22 = OpInBoundsAccessChain %21 %13 %16
OpStore %22 %20
%25 = OpCooperativeMatrixLengthKHR %5 %24
%26 = OpInBoundsAccessChain %21 %13 %23
OpStore %26 %25
%30 = OpCooperativeMatrixLengthKHR %5 %29
%31 = OpInBoundsAccessChain %21 %13 %28
OpStore %31 %30
%32 = OpCooperativeMatrixLengthKHR %5 %18
%33 = OpUDiv %5 %32 %34
%35 = OpInBoundsAccessChain %21 %13 %19
OpStore %35 %33
%36 = OpCooperativeMatrixLengthKHR %5 %24
%37 = OpUDiv %5 %36 %34
%38 = OpInBoundsAccessChain %21 %13 %34
OpStore %38 %37
%39 = OpCooperativeMatrixLengthKHR %5 %18
%40 = OpInBoundsAccessChain %21 %13 %41
OpStore %40 %39
%42 = OpCooperativeMatrixLengthKHR %5 %24
%43 = OpInBoundsAccessChain %21 %13 %44
OpStore %43 %42
%45 = OpCooperativeMatrixLengthKHR %5 %29
%46 = OpInBoundsAccessChain %21 %13 %47
OpStore %46 %45
%48 = OpCooperativeMatrixLengthKHR %5 %18
%49 = OpUDiv %5 %48 %34
%50 = OpInBoundsAccessChain %21 %13 %51
OpStore %50 %49
%52 = OpCooperativeMatrixLengthKHR %5 %24
%53 = OpUDiv %5 %52 %34
%54 = OpInBoundsAccessChain %21 %13 %55
OpStore %54 %53
OpBranch %67
%67 = OpLabel
%56 = OpPhi %5 %20 %66 %57 %69
%58 = OpPhi %5 %16 %66 %59 %69
%60 = OpShiftLeftLogical %5 %58 %28
%62 = OpAccessChain %61 %9 %16 %58
OpStore %62 %56 NonPrivatePointer
%59 = OpIAdd %5 %58 %23
%64 = OpIEqual %63 %59 %10
OpLoopMerge %70 %69 None
OpBranchConditional %64 %70 %68
%68 = OpLabel
OpBranch %69
%69 = OpLabel
%65 = OpInBoundsAccessChain %21 %13 %59
%57 = OpLoad %5 %65
OpBranch %67
%70 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_store_phi.full-wmma.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_EXT_float_e4m3 : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

layout(set = 0, binding = 0, std140) uniform _27_29
{
    vec4 _m0[1];
} _29;

void main()
{
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _89;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33[2];
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _36[2];
    uint _37 = 0u;
    uint _39;
    bool _82;
    for (;;)
    {
        _39 = _37 << 5u;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _45;
        coopMatLoad(_45, _20._m0, _39, 64u, gl_CooperativeMatrixLayoutRowMajor);
        _36[_37] = _45;
        _82 = floatBitsToUint(_29._m0[0u]).x == 1u;
        coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> frontier_phi_4_pred;
        if (_82)
        {
            coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _86;
            coopMatLoad(_86, _20._m0, _39 + 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
            frontier_phi_4_pred = _86;
        }
        else
        {
            frontier_phi_4_pred = coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(floate4m3_t(0.0));
        }
        _89 = frontier_phi_4_pred;
        _33[_37] = _89;
        uint _38 = _37 + 1u;
        if (_38 == 2u)
        {
            break;
        }
        else
        {
            _37 = _38;
            continue;
        }
    }
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _118;
    uint _120;
    _118 = coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0.0);
    _120 = 0u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _186;
    for (;;)
    {
        _186 = coopMatMulAdd(_36[_120], _33[_120], _118, 0);
        uint _121 = _120 + 1u;
        if (_121 == 2u)
        {
            break;
        }
        else
        {
            _118 = _186;
            _120 = _121;
        }
    }
    coopMatStore(_186, _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 201
; Schema: 0
OpCapability Shader
OpCapability Int8
OpCapability Float8EXT
OpCapability Float8CooperativeMatrixEXT
OpCapability StorageBuffer8BitAccess
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_EXT_float8"
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24 %29
OpExecutionMode %14 LocalSize 32 1 1
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %27 ""
OpName %189 "frontier_phi_4.pred"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
%1 = OpTypeFloat 8 Float8E4M3EXT
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeVector %9 4
%26 = OpTypeArray %25 %7
%27 = OpTypeStruct %26
%28 = OpTypePointer Uniform %27
%29 = OpVariable %28 Uniform
%30 = OpTypeArray %2 %4
%31 = OpTypeArray %8 %10
%32 = OpTypePointer Function %31
%34 = OpTypeArray %5 %10
%35 = OpTypePointer Function %34
%40 = OpConstant %2 5
%42 = OpTypePointer StorageBuffer %16
%44 = OpConstant %2 64
%47 = OpConstant %2 8
%49 = OpTypePointer Function %5
%62 = OpConstant %2 4
%69 = OpConstant %2 6
%73 = OpConstant %2 7
%75 = OpTypePointer Uniform %25
%78 = OpTypeVector %2 4
%81 = OpTypeBool
%84 = OpConstant %2 1024
%87 = OpConstant %1 0x0p+0
%88 = OpConstantComposite %8 %87
%92 = OpTypePointer Function %8
%116 = OpConstant %9 0
%117 = OpConstantComposite %11 %116
%14 = OpFunction %12 None %13
%15 = OpLabel
%33 = OpVariable %32 Function
%36 = OpVariable %35 Function
OpBranch %190
%190 = OpLabel
OpBranch %191
%191 = OpLabel
%37 = OpPhi %2 %3 %190 %38 %196
%39 = OpShiftLeftLogical %2 %37 %40
%43 = OpAccessChain %42 %20 %3 %39
%45 = OpCooperativeMatrixLoadKHR %5 %43 %3 %44 NonPrivatePointer
%46 = OpIMul %2 %37 %47
%48 = OpIAdd %2 %3 %46
%50 = OpAccessChain %49 %36 %37
OpStore %50 %45
%51 = OpIMul %2 %37 %47
%52 = OpIAdd %2 %7 %51
%54 = OpIMul %2 %37 %47
%55 = OpIAdd %2 %10 %54
%57 = OpIMul %2 %37 %47
%58 = OpIAdd %2 %6 %57
%60 = OpIMul %2 %37 %47
%61 = OpIAdd %2 %62 %60
%64 = OpIMul %2 %37 %47
%65 = OpIAdd %2 %40 %64
%67 = OpIMul %2 %37 %47
%68 = OpIAdd %2 %69 %67
%71 = OpIMul %2 %37 %47
%72 = OpIAdd %2 %73 %71
%76 = OpAccessChain %75 %29 %3 %3
%77 = OpLoad %25 %76
%79 = OpBitcast %78 %77
%80 = OpCompositeExtract %2 %79 0
%82 = OpIEqual %81 %80 %7
OpLoopMerge %197 %196 None
OpBranch %192
%192 = OpLabel
OpSelectionMerge %195 None
OpBranchConditional %82 %194 %193
%194 = OpLabel
%83 = OpIAdd %2 %39 %84
%85 = OpAccessChain %42 %20 %3 %83
%86 = OpCooperativeMatrixLoadKHR %8 %85 %7 %44 NonPrivatePointer
OpBranch %195
%193 = OpLabel
OpBranch %195
%195 = OpLabel
%189 = OpPhi %8 %86 %194 %88 %193
%89 = OpCopyObject %8 %189
OpBranch %196
%196 = OpLabel
%90 = OpIMul %2 %37 %47
%91 = OpIAdd %2 %3 %90
%93 = OpAccessChain %92 %33 %37
OpStore %93 %89
%94 = OpIMul %2 %37 %47
%95 = OpIAdd %2 %7 %94
%97 = OpIMul %2 %37 %47
%98 = OpIAdd %2 %10 %97
%100 = OpIMul %2 %37 %47
%101 = OpIAdd %2 %6 %100
%103 = OpIMul %2 %37 %47
%104 = OpIAdd %2 %62 %103
%106 = OpIMul %2 %37 %47
%107 = OpIAdd %2 %40 %106
%109 = OpIMul %2 %37 %47
%110 = OpIAdd %2 %69 %109
%112 = OpIMul %2 %37 %47
%113 = OpIAdd %2 %73 %112
%38 = OpIAdd %2 %37 %7
%115 = OpIEqual %81 %38 %10
OpBranchConditional %115 %197 %191
%197 = OpLabel
OpBranch %198
%198 = OpLabel
%118 = OpPhi %11 %117 %197 %186 %198
%120 = OpPhi %2 %3 %197 %121 %198
%122 = OpIMul %2 %120 %47
%123 = OpIAdd %2 %7 %122
%126 = OpIMul %2 %120 %47
%127 = OpIAdd %2 %3 %126
%128 = OpAccessChain %49 %36 %120
%129 = OpLoad %5 %128
%130 = OpIMul %2 %120 %47
%131 = OpIAdd %2 %6 %130
%134 = OpIMul %2 %120 %47
%135 = OpIAdd %2 %10 %134
%138 = OpIMul %2 %120 %47
%139 = OpIAdd %2 %40 %138
%142 = OpIMul %2 %120 %47
%143 = OpIAdd %2 %62 %142
%146 = OpIMul %2 %120 %47
%147 = OpIAdd %2 %73 %146
%150 = OpIMul %2 %120 %47
%151 = OpIAdd %2 %69 %150
%154 = OpIMul %2 %120 %47
%155 = OpIAdd %2 %7 %154
%158 = OpIMul %2 %120 %47
%159 = OpIAdd %2 %3 %158
%160 = OpAccessChain %92 %33 %120
%161 = OpLoad %8 %160
%162 = OpIMul %2 %120 %47
%163 = OpIAdd %2 %6 %162
%166 = OpIMul %2 %120 %47
%167 = OpIAdd %2 %10 %166
%170 = OpIMul %2 %120 %47
%171 = OpIAdd %2 %40 %170
%174 = OpIMul %2 %120 %47
%175 = OpIAdd %2 %62 %174
%178 = OpIMul %2 %120 %47
%179 = OpIAdd %2 %73 %178
%182 = OpIMul %2 %120 %47
%183 = OpIAdd %2 %69 %182
%186 = OpCooperativeMatrixMulAddKHR %11 %129 %161 %118
%121 = OpIAdd %2 %120 %7
%187 = OpIEqual %81 %121 %10
OpLoopMerge %199 %198 None
OpBranchConditional %187 %199 %198
%199 = OpLabel
%188 = OpAccessChain %42 %24 %3 %3
OpCooperativeMatrixStoreKHR %188 %186 %7 %44 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/ags/cs_wmma_store_phi.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

layout(set = 0, binding = 0, std140) uniform _27_29
{
    vec4 _m0[1];
} _29;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _50)
{
    uint _58;
    _58 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _59 = _58 + 1u;
        coop_output[_58] = int16BitsToFloat16((int16_t(int8_t(_50[_58])) << 7s) & (-16385s));
        if (_59 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _58 = _59;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _121)
{
    uint _129;
    _129 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _130 = _129 + 1u;
        coop_output[_129] = int16BitsToFloat16((int16_t(int8_t(_121[_129])) << 7s) & (-16385s));
        if (_130 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _129 = _130;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _146;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _33[2];
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _36[2];
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _77;
    uint _37 = 0u;
    uint _39;
    bool _113;
    for (;;)
    {
        _39 = _37 << 5u;
        coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _46;
        coopMatLoad(_46, _20._m0, _39, 64u, gl_CooperativeMatrixLayoutRowMajor);
        _77 = _46;
        _36[_37] = CoopMatFP8toFP16(_77);
        _113 = floatBitsToUint(_29._m0[0u]).x == 1u;
        coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> frontier_phi_4_pred;
        if (_113)
        {
            coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _118;
            coopMatLoad(_118, _20._m0, _39 + 1024u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
            coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _142 = _118;
            frontier_phi_4_pred = CoopMatFP8toFP16_1(_142);
        }
        else
        {
            frontier_phi_4_pred = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(float16_t(0.0));
        }
        _146 = frontier_phi_4_pred;
        _33[_37] = _146;
        uint _38 = _37 + 1u;
        if (_38 == 2u)
        {
            break;
        }
        else
        {
            _37 = _38;
            continue;
        }
    }
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _174;
    uint _176;
    _174 = coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(0.0);
    _176 = 0u;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _242;
    for (;;)
    {
        _242 = coopMatMulAdd(_36[_176], _33[_176], _174, 0);
        uint _177 = _176 + 1u;
        if (_177 == 2u)
        {
            break;
        }
        else
        {
            _174 = _242;
            _176 = _177;
        }
    }
    coopMatStore(_242, _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutColumnMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 257
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24 %29
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %27 ""
OpName %51 "CoopMatFP8toFP16"
OpName %54 "coop_output"
OpName %122 "CoopMatFP8toFP16"
OpName %125 "coop_output"
OpName %245 "frontier_phi_4.pred"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeVector %9 4
%26 = OpTypeArray %25 %7
%27 = OpTypeStruct %26
%28 = OpTypePointer Uniform %27
%29 = OpVariable %28 Uniform
%30 = OpTypeArray %2 %4
%31 = OpTypeArray %8 %10
%32 = OpTypePointer Function %31
%34 = OpTypeArray %5 %10
%35 = OpTypePointer Function %34
%40 = OpConstant %2 5
%42 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %3
%43 = OpTypePointer StorageBuffer %16
%45 = OpConstant %2 64
%47 = OpTypeBool
%48 = OpTypePointer Function %42
%49 = OpTypeFunction %5 %48
%53 = OpTypePointer Function %5
%60 = OpTypePointer Function %16
%63 = OpTypeInt 16 1
%66 = OpConstant %63 7
%68 = OpConstant %63 -16385
%70 = OpTypePointer Function %1
%75 = OpConstant %1 0x1p+8
%80 = OpConstant %2 8
%94 = OpConstant %2 4
%101 = OpConstant %2 6
%105 = OpConstant %2 7
%107 = OpTypePointer Uniform %25
%110 = OpTypeVector %2 4
%115 = OpConstant %2 1024
%116 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %7
%119 = OpTypePointer Function %116
%120 = OpTypeFunction %8 %119
%124 = OpTypePointer Function %8
%144 = OpConstant %1 0x0p+0
%145 = OpConstantComposite %8 %144
%172 = OpConstant %9 0
%173 = OpConstantComposite %11 %172
%14 = OpFunction %12 None %13
%15 = OpLabel
%33 = OpVariable %32 Function
%36 = OpVariable %35 Function
%77 = OpVariable %48 Function
%142 = OpVariable %119 Function
OpBranch %246
%246 = OpLabel
OpBranch %247
%247 = OpLabel
%37 = OpPhi %2 %3 %246 %38 %252
%39 = OpShiftLeftLogical %2 %37 %40
%44 = OpAccessChain %43 %20 %3 %39
%46 = OpCooperativeMatrixLoadKHR %42 %44 %3 %45 NonPrivatePointer
OpStore %77 %46
%78 = OpFunctionCall %5 %51 %77
%79 = OpIMul %2 %37 %80
%81 = OpIAdd %2 %3 %79
%82 = OpAccessChain %53 %36 %37
OpStore %82 %78
%83 = OpIMul %2 %37 %80
%84 = OpIAdd %2 %7 %83
%86 = OpIMul %2 %37 %80
%87 = OpIAdd %2 %10 %86
%89 = OpIMul %2 %37 %80
%90 = OpIAdd %2 %6 %89
%92 = OpIMul %2 %37 %80
%93 = OpIAdd %2 %94 %92
%96 = OpIMul %2 %37 %80
%97 = OpIAdd %2 %40 %96
%99 = OpIMul %2 %37 %80
%100 = OpIAdd %2 %101 %99
%103 = OpIMul %2 %37 %80
%104 = OpIAdd %2 %105 %103
%108 = OpAccessChain %107 %29 %3 %3
%109 = OpLoad %25 %108
%111 = OpBitcast %110 %109
%112 = OpCompositeExtract %2 %111 0
%113 = OpIEqual %47 %112 %7
OpLoopMerge %253 %252 None
OpBranch %248
%248 = OpLabel
OpSelectionMerge %251 None
OpBranchConditional %113 %250 %249
%250 = OpLabel
%114 = OpIAdd %2 %39 %115
%117 = OpAccessChain %43 %20 %3 %114
%118 = OpCooperativeMatrixLoadKHR %116 %117 %7 %45 NonPrivatePointer
OpStore %142 %118
%143 = OpFunctionCall %8 %122 %142
OpBranch %251
%249 = OpLabel
OpBranch %251
%251 = OpLabel
%245 = OpPhi %8 %143 %250 %145 %249
%146 = OpCopyObject %8 %245
OpBranch %252
%252 = OpLabel
%147 = OpIMul %2 %37 %80
%148 = OpIAdd %2 %3 %147
%149 = OpAccessChain %124 %33 %37
OpStore %149 %146
%150 = OpIMul %2 %37 %80
%151 = OpIAdd %2 %7 %150
%153 = OpIMul %2 %37 %80
%154 = OpIAdd %2 %10 %153
%156 = OpIMul %2 %37 %80
%157 = OpIAdd %2 %6 %156
%159 = OpIMul %2 %37 %80
%160 = OpIAdd %2 %94 %159
%162 = OpIMul %2 %37 %80
%163 = OpIAdd %2 %40 %162
%165 = OpIMul %2 %37 %80
%166 = OpIAdd %2 %101 %165
%168 = OpIMul %2 %37 %80
%169 = OpIAdd %2 %105 %168
%38 = OpIAdd %2 %37 %7
%171 = OpIEqual %47 %38 %10
OpBranchConditional %171 %253 %247
%253 = OpLabel
OpBranch %254
%254 = OpLabel
%174 = OpPhi %11 %173 %253 %242 %254
%176 = OpPhi %2 %3 %253 %177 %254
%178 = OpIMul %2 %176 %80
%179 = OpIAdd %2 %7 %178
%182 = OpIMul %2 %176 %80
%183 = OpIAdd %2 %3 %182
%184 = OpAccessChain %53 %36 %176
%185 = OpLoad %5 %184
%186 = OpIMul %2 %176 %80
%187 = OpIAdd %2 %6 %186
%190 = OpIMul %2 %176 %80
%191 = OpIAdd %2 %10 %190
%194 = OpIMul %2 %176 %80
%195 = OpIAdd %2 %40 %194
%198 = OpIMul %2 %176 %80
%199 = OpIAdd %2 %94 %198
%202 = OpIMul %2 %176 %80
%203 = OpIAdd %2 %105 %202
%206 = OpIMul %2 %176 %80
%207 = OpIAdd %2 %101 %206
%210 = OpIMul %2 %176 %80
%211 = OpIAdd %2 %7 %210
%214 = OpIMul %2 %176 %80
%215 = OpIAdd %2 %3 %214
%216 = OpAccessChain %124 %33 %176
%217 = OpLoad %8 %216
%218 = OpIMul %2 %176 %80
%219 = OpIAdd %2 %6 %218
%222 = OpIMul %2 %176 %80
%223 = OpIAdd %2 %10 %222
%226 = OpIMul %2 %176 %80
%227 = OpIAdd %2 %40 %226
%230 = OpIMul %2 %176 %80
%231 = OpIAdd %2 %94 %230
%234 = OpIMul %2 %176 %80
%235 = OpIAdd %2 %105 %234
%238 = OpIMul %2 %176 %80
%239 = OpIAdd %2 %101 %238
%242 = OpCooperativeMatrixMulAddKHR %11 %185 %217 %174
%177 = OpIAdd %2 %176 %7
%243 = OpIEqual %47 %177 %10
OpLoopMerge %255 %254 None
OpBranchConditional %243 %255 %254
%255 = OpLabel
%244 = OpAccessChain %43 %24 %3 %3
OpCooperativeMatrixStoreKHR %244 %242 %7 %45 NonPrivatePointer
OpReturn
OpFunctionEnd
%51 = OpFunction %5 None %49
%50 = OpFunctionParameter %48
%52 = OpLabel
%54 = OpVariable %53 Function
%55 = OpCooperativeMatrixLengthKHR %2 %42
OpBranch %56
%56 = OpLabel
%58 = OpPhi %2 %3 %52 %59 %56
%59 = OpIAdd %2 %58 %7
%61 = OpInBoundsAccessChain %60 %50 %58
%62 = OpLoad %16 %61
%64 = OpSConvert %63 %62
%65 = OpShiftLeftLogical %63 %64 %66
%67 = OpBitwiseAnd %63 %65 %68
%69 = OpBitcast %1 %67
%71 = OpInBoundsAccessChain %70 %54 %58
OpStore %71 %69
%72 = OpULessThan %47 %59 %55
OpLoopMerge %57 %56 None
OpBranchConditional %72 %56 %57
%57 = OpLabel
%73 = OpLoad %5 %54
%74 = OpMatrixTimesScalar %5 %73 %75
OpReturnValue %74
OpFunctionEnd
%122 = OpFunction %8 None %120
%121 = OpFunctionParameter %119
%123 = OpLabel
%125 = OpVariable %124 Function
%126 = OpCooperativeMatrixLengthKHR %2 %116
OpBranch %127
%127 = OpLabel
%129 = OpPhi %2 %3 %123 %130 %127
%130 = OpIAdd %2 %129 %7
%131 = OpInBoundsAccessChain %60 %121 %129
%132 = OpLoad %16 %131
%133 = OpSConvert %63 %132
%134 = OpShiftLeftLogical %63 %133 %66
%135 = OpBitwiseAnd %63 %134 %68
%136 = OpBitcast %1 %135
%137 = OpInBoundsAccessChain %70 %125 %129
OpStore %137 %136
%138 = OpULessThan %47 %130 %126
OpLoopMerge %128 %127 None
OpBranchConditional %138 %127 %128
%128 = OpLabel
%139 = OpLoad %8 %125
%140 = OpMatrixTimesScalar %8 %139 %75
OpReturnValue %140
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/bad-stride.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[6];
} _12;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _35[6];
    float _36[6];
    float _37[6];
    float _38[6];
    _35[0u] = _12._m0[0u].x;
    _36[0u] = _12._m0[0u].y;
    _37[0u] = _12._m0[0u].z;
    _38[0u] = _12._m0[0u].w;
    _35[1u] = _12._m0[1u].x;
    _36[1u] = _12._m0[1u].y;
    _37[1u] = _12._m0[1u].z;
    _38[1u] = _12._m0[1u].w;
    _35[2u] = _12._m0[3u].x;
    _36[2u] = _12._m0[3u].y;
    _37[2u] = _12._m0[3u].z;
    _38[2u] = _12._m0[3u].w;
    uint _71 = A % 6u;
    SV_Target.x = _35[_71] * P.x;
    SV_Target.y = _36[_71] * P.y;
    SV_Target.z = _37[_71] * P.z;
    SV_Target.w = _38[_71] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "A"
OpName %16 "P"
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %16 Location 1
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 6
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %8
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %8
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %7
%21 = OpConstant %5 0
%24 = OpConstant %5 1
%27 = OpConstant %5 2
%30 = OpConstant %5 3
%33 = OpTypeArray %7 %6
%34 = OpTypePointer Function %33
%39 = OpTypePointer Uniform %8
%46 = OpTypePointer Function %7
%84 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
%35 = OpVariable %34 Function
%36 = OpVariable %34 Function
%37 = OpVariable %34 Function
%38 = OpVariable %34 Function
OpBranch %89
%89 = OpLabel
%20 = OpAccessChain %19 %16 %21
%22 = OpLoad %7 %20
%23 = OpAccessChain %19 %16 %24
%25 = OpLoad %7 %23
%26 = OpAccessChain %19 %16 %27
%28 = OpLoad %7 %26
%29 = OpAccessChain %19 %16 %30
%31 = OpLoad %7 %29
%32 = OpLoad %5 %14
%40 = OpAccessChain %39 %12 %21 %21
%41 = OpLoad %8 %40
%42 = OpCompositeExtract %7 %41 0
%43 = OpCompositeExtract %7 %41 1
%44 = OpCompositeExtract %7 %41 2
%45 = OpCompositeExtract %7 %41 3
%47 = OpAccessChain %46 %35 %21
%48 = OpAccessChain %46 %36 %21
%49 = OpAccessChain %46 %37 %21
%50 = OpAccessChain %46 %38 %21
OpStore %47 %42
OpStore %48 %43
OpStore %49 %44
OpStore %50 %45
%51 = OpAccessChain %39 %12 %21 %24
%52 = OpLoad %8 %51
%53 = OpCompositeExtract %7 %52 0
%54 = OpCompositeExtract %7 %52 1
%55 = OpCompositeExtract %7 %52 2
%56 = OpCompositeExtract %7 %52 3
%57 = OpAccessChain %46 %35 %24
%58 = OpAccessChain %46 %36 %24
%59 = OpAccessChain %46 %37 %24
%60 = OpAccessChain %46 %38 %24
OpStore %57 %53
OpStore %58 %54
OpStore %59 %55
OpStore %60 %56
%61 = OpAccessChain %39 %12 %21 %30
%62 = OpLoad %8 %61
%63 = OpCompositeExtract %7 %62 0
%64 = OpCompositeExtract %7 %62 1
%65 = OpCompositeExtract %7 %62 2
%66 = OpCompositeExtract %7 %62 3
%67 = OpAccessChain %46 %35 %27
%68 = OpAccessChain %46 %36 %27
%69 = OpAccessChain %46 %37 %27
%70 = OpAccessChain %46 %38 %27
OpStore %67 %63
OpStore %68 %64
OpStore %69 %65
OpStore %70 %66
%71 = OpUMod %5 %32 %6
%72 = OpAccessChain %46 %35 %71
%73 = OpAccessChain %46 %36 %71
%74 = OpAccessChain %46 %37 %71
%75 = OpAccessChain %46 %38 %71
%76 = OpLoad %7 %72
%77 = OpLoad %7 %73
%78 = OpLoad %7 %74
%79 = OpLoad %7 %75
%80 = OpFMul %7 %76 %22
%81 = OpFMul %7 %77 %25
%82 = OpFMul %7 %78 %28
%83 = OpFMul %7 %79 %31
%85 = OpAccessChain %84 %18 %21
OpStore %85 %80
%86 = OpAccessChain %84 %18 %24
OpStore %86 %81
%87 = OpAccessChain %84 %18 %27
OpStore %87 %82
%88 = OpAccessChain %84 %18 %30
OpStore %88 %83
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/double-array-load.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    dvec2 _m0[12];
} _12;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    double _38[6];
    double _39[6];
    double _40[6];
    double _41[6];
    _38[0u] = _12._m0[0u].x;
    _39[0u] = _12._m0[0u].y;
    _40[0u] = _12._m0[1u].x;
    _41[0u] = _12._m0[1u].y;
    _38[1u] = _12._m0[2u].x;
    _39[1u] = _12._m0[2u].y;
    _40[1u] = _12._m0[3u].x;
    _41[1u] = _12._m0[3u].y;
    _38[2u] = _12._m0[4u].x;
    _39[2u] = _12._m0[4u].y;
    _40[2u] = _12._m0[5u].x;
    _41[2u] = _12._m0[5u].y;
    _38[3u] = _12._m0[6u].x;
    _39[3u] = _12._m0[6u].y;
    _40[3u] = _12._m0[7u].x;
    _41[3u] = _12._m0[7u].y;
    _38[4u] = _12._m0[8u].x;
    _39[4u] = _12._m0[8u].y;
    _40[4u] = _12._m0[9u].x;
    _41[4u] = _12._m0[9u].y;
    _38[5u] = _12._m0[10u].x;
    _39[5u] = _12._m0[10u].y;
    _40[5u] = _12._m0[11u].x;
    _41[5u] = _12._m0[11u].y;
    uint _123 = A % 6u;
    SV_Target.x = float(_38[_123]) * P.x;
    SV_Target.y = float(_39[_123]) * P.y;
    SV_Target.z = float(_40[_123]) * P.z;
    SV_Target.w = float(_41[_123]) * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 147
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %18 %20
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %10 ""
OpName %14 "A"
OpName %18 "P"
OpName %20 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %18 Location 1
OpDecorate %20 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 12
%7 = OpTypeFloat 64
%8 = OpTypeVector %7 2
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypeFloat 32
%16 = OpTypeVector %15 4
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Output %16
%20 = OpVariable %19 Output
%21 = OpTypePointer Input %15
%23 = OpConstant %5 0
%26 = OpConstant %5 1
%29 = OpConstant %5 2
%32 = OpConstant %5 3
%35 = OpConstant %5 6
%36 = OpTypeArray %7 %35
%37 = OpTypePointer Function %36
%42 = OpTypePointer Uniform %8
%51 = OpTypePointer Function %7
%68 = OpConstant %5 4
%73 = OpConstant %5 5
%86 = OpConstant %5 7
%95 = OpConstant %5 8
%100 = OpConstant %5 9
%109 = OpConstant %5 10
%114 = OpConstant %5 11
%140 = OpTypePointer Output %15
%3 = OpFunction %1 None %2
%4 = OpLabel
%38 = OpVariable %37 Function
%39 = OpVariable %37 Function
%40 = OpVariable %37 Function
%41 = OpVariable %37 Function
OpBranch %145
%145 = OpLabel
%22 = OpAccessChain %21 %18 %23
%24 = OpLoad %15 %22
%25 = OpAccessChain %21 %18 %26
%27 = OpLoad %15 %25
%28 = OpAccessChain %21 %18 %29
%30 = OpLoad %15 %28
%31 = OpAccessChain %21 %18 %32
%33 = OpLoad %15 %31
%34 = OpLoad %5 %14
%43 = OpAccessChain %42 %12 %23 %23
%44 = OpLoad %8 %43
%45 = OpCompositeExtract %7 %44 0
%46 = OpCompositeExtract %7 %44 1
%47 = OpAccessChain %42 %12 %23 %26
%48 = OpLoad %8 %47
%49 = OpCompositeExtract %7 %48 0
%50 = OpCompositeExtract %7 %48 1
%52 = OpAccessChain %51 %38 %23
%53 = OpAccessChain %51 %39 %23
%54 = OpAccessChain %51 %40 %23
%55 = OpAccessChain %51 %41 %23
OpStore %52 %45
OpStore %53 %46
OpStore %54 %49
OpStore %55 %50
%56 = OpAccessChain %42 %12 %23 %29
%57 = OpLoad %8 %56
%58 = OpCompositeExtract %7 %57 0
%59 = OpCompositeExtract %7 %57 1
%60 = OpAccessChain %42 %12 %23 %32
%61 = OpLoad %8 %60
%62 = OpCompositeExtract %7 %61 0
%63 = OpCompositeExtract %7 %61 1
%64 = OpAccessChain %51 %38 %26
%65 = OpAccessChain %51 %39 %26
%66 = OpAccessChain %51 %40 %26
%67 = OpAccessChain %51 %41 %26
OpStore %64 %58
OpStore %65 %59
OpStore %66 %62
OpStore %67 %63
%69 = OpAccessChain %42 %12 %23 %68
%70 = OpLoad %8 %69
%71 = OpCompositeExtract %7 %70 0
%72 = OpCompositeExtract %7 %70 1
%74 = OpAccessChain %42 %12 %23 %73
%75 = OpLoad %8 %74
%76 = OpCompositeExtract %7 %75 0
%77 = OpCompositeExtract %7 %75 1
%78 = OpAccessChain %51 %38 %29
%79 = OpAccessChain %51 %39 %29
%80 = OpAccessChain %51 %40 %29
%81 = OpAccessChain %51 %41 %29
OpStore %78 %71
OpStore %79 %72
OpStore %80 %76
OpStore %81 %77
%82 = OpAccessChain %42 %12 %23 %35
%83 = OpLoad %8 %82
%84 = OpCompositeExtract %7 %83 0
%85 = OpCompositeExtract %7 %83 1
%87 = OpAccessChain %42 %12 %23 %86
%88 = OpLoad %8 %87
%89 = OpCompositeExtract %7 %88 0
%90 = OpCompositeExtract %7 %88 1
%91 = OpAccessChain %51 %38 %32
%92 = OpAccessChain %51 %39 %32
%93 = OpAccessChain %51 %40 %32
%94 = OpAccessChain %51 %41 %32
OpStore %91 %84
OpStore %92 %85
OpStore %93 %89
OpStore %94 %90
%96 = OpAccessChain %42 %12 %23 %95
%97 = OpLoad %8 %96
%98 = OpCompositeExtract %7 %97 0
%99 = OpCompositeExtract %7 %97 1
%101 = OpAccessChain %42 %12 %23 %100
%102 = OpLoad %8 %101
%103 = OpCompositeExtract %7 %102 0
%104 = OpCompositeExtract %7 %102 1
%105 = OpAccessChain %51 %38 %68
%106 = OpAccessChain %51 %39 %68
%107 = OpAccessChain %51 %40 %68
%108 = OpAccessChain %51 %41 %68
OpStore %105 %98
OpStore %106 %99
OpStore %107 %103
OpStore %108 %104
%110 = OpAccessChain %42 %12 %23 %109
%111 = OpLoad %8 %110
%112 = OpCompositeExtract %7 %111 0
%113 = OpCompositeExtract %7 %111 1
%115 = OpAccessChain %42 %12 %23 %114
%116 = OpLoad %8 %115
%117 = OpCompositeExtract %7 %116 0
%118 = OpCompositeExtract %7 %116 1
%119 = OpAccessChain %51 %38 %73
%120 = OpAccessChain %51 %39 %73
%121 = OpAccessChain %51 %40 %73
%122 = OpAccessChain %51 %41 %73
OpStore %119 %112
OpStore %120 %113
OpStore %121 %117
OpStore %122 %118
%123 = OpUMod %5 %34 %35
%124 = OpAccessChain %51 %38 %123
%125 = OpAccessChain %51 %39 %123
%126 = OpAccessChain %51 %40 %123
%127 = OpAccessChain %51 %41 %123
%128 = OpLoad %7 %124
%129 = OpLoad %7 %125
%130 = OpLoad %7 %126
%131 = OpLoad %7 %127
%132 = OpFConvert %15 %128
%133 = OpFConvert %15 %129
%134 = OpFConvert %15 %130
%135 = OpFConvert %15 %131
%136 = OpFMul %15 %132 %24
%137 = OpFMul %15 %133 %27
%138 = OpFMul %15 %134 %30
%139 = OpFMul %15 %135 %33
%141 = OpAccessChain %140 %20 %23
OpStore %141 %136
%142 = OpAccessChain %140 %20 %26
OpStore %142 %137
%143 = OpAccessChain %140 %20 %29
OpStore %143 %138
%144 = OpAccessChain %140 %20 %32
OpStore %144 %139
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/float4-array-load.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 5, binding = 0, scalar) uniform BindlessCBV
{
    float _m0[16384];
} _15[];

layout(set = 5, binding = 0, std140) uniform _19_22
{
    vec4 _m0[4096];
} _22[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _91 = A % 6u;
    SV_Target.x = _15[registers._m5]._m0[_91 * 4u] * P.x;
    SV_Target.y = _15[registers._m5]._m0[(_91 * 4u) + 1u] * P.y;
    SV_Target.z = _15[registers._m5]._m0[(_91 * 4u) + 2u] * P.z;
    SV_Target.w = _15[registers._m5]._m0[(_91 * 4u) + 3u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 120
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %24 %26 %28
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "BindlessCBV"
OpName %19 "BindlessCBV"
OpName %24 "A"
OpName %26 "P"
OpName %28 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 4
OpDecorate %12 Block
OpMemberDecorate %12 0 Offset 0
OpDecorate %15 DescriptorSet 5
OpDecorate %15 Binding 0
OpDecorate %18 ArrayStride 16
OpDecorate %19 Block
OpMemberDecorate %19 0 Offset 0
OpDecorate %22 DescriptorSet 5
OpDecorate %22 Binding 0
OpDecorate %24 Flat
OpDecorate %24 Location 0
OpDecorate %26 Location 1
OpDecorate %28 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpConstant %5 16384
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer Uniform %13
%15 = OpVariable %14 Uniform
%16 = OpTypeVector %9 4
%17 = OpConstant %5 4096
%18 = OpTypeArray %16 %17
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypePointer Input %5
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %16
%26 = OpVariable %25 Input
%27 = OpTypePointer Output %16
%28 = OpVariable %27 Output
%29 = OpTypePointer Uniform %12
%31 = OpTypePointer PushConstant %5
%33 = OpConstant %5 5
%35 = OpTypePointer Uniform %19
%39 = OpTypePointer Input %9
%41 = OpConstant %5 0
%44 = OpConstant %5 1
%47 = OpConstant %5 2
%50 = OpConstant %5 3
%53 = OpTypePointer Uniform %16
%78 = OpConstant %5 4
%92 = OpConstant %5 6
%94 = OpTypePointer Uniform %9
%113 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %118
%118 = OpLabel
%32 = OpAccessChain %31 %8 %33
%34 = OpLoad %5 %32
%30 = OpAccessChain %29 %15 %34
%37 = OpAccessChain %31 %8 %33
%38 = OpLoad %5 %37
%36 = OpAccessChain %35 %22 %38
%40 = OpAccessChain %39 %26 %41
%42 = OpLoad %9 %40
%43 = OpAccessChain %39 %26 %44
%45 = OpLoad %9 %43
%46 = OpAccessChain %39 %26 %47
%48 = OpLoad %9 %46
%49 = OpAccessChain %39 %26 %50
%51 = OpLoad %9 %49
%52 = OpLoad %5 %24
%54 = OpAccessChain %53 %36 %41 %41
%55 = OpLoad %16 %54
%56 = OpCompositeExtract %9 %55 0
%57 = OpCompositeExtract %9 %55 1
%58 = OpCompositeExtract %9 %55 2
%59 = OpCompositeExtract %9 %55 3
%60 = OpAccessChain %53 %36 %41 %44
%61 = OpLoad %16 %60
%62 = OpCompositeExtract %9 %61 0
%63 = OpCompositeExtract %9 %61 1
%64 = OpCompositeExtract %9 %61 2
%65 = OpCompositeExtract %9 %61 3
%66 = OpAccessChain %53 %36 %41 %47
%67 = OpLoad %16 %66
%68 = OpCompositeExtract %9 %67 0
%69 = OpCompositeExtract %9 %67 1
%70 = OpCompositeExtract %9 %67 2
%71 = OpCompositeExtract %9 %67 3
%72 = OpAccessChain %53 %36 %41 %50
%73 = OpLoad %16 %72
%74 = OpCompositeExtract %9 %73 0
%75 = OpCompositeExtract %9 %73 1
%76 = OpCompositeExtract %9 %73 2
%77 = OpCompositeExtract %9 %73 3
%79 = OpAccessChain %53 %36 %41 %78
%80 = OpLoad %16 %79
%81 = OpCompositeExtract %9 %80 0
%82 = OpCompositeExtract %9 %80 1
%83 = OpCompositeExtract %9 %80 2
%84 = OpCompositeExtract %9 %80 3
%85 = OpAccessChain %53 %36 %41 %33
%86 = OpLoad %16 %85
%87 = OpCompositeExtract %9 %86 0
%88 = OpCompositeExtract %9 %86 1
%89 = OpCompositeExtract %9 %86 2
%90 = OpCompositeExtract %9 %86 3
%91 = OpUMod %5 %52 %92
%93 = OpIMul %5 %91 %78
%95 = OpAccessChain %94 %30 %41 %93
%96 = OpIMul %5 %91 %78
%97 = OpIAdd %5 %96 %44
%98 = OpAccessChain %94 %30 %41 %97
%99 = OpIMul %5 %91 %78
%100 = OpIAdd %5 %99 %47
%101 = OpAccessChain %94 %30 %41 %100
%102 = OpIMul %5 %91 %78
%103 = OpIAdd %5 %102 %50
%104 = OpAccessChain %94 %30 %41 %103
%105 = OpLoad %9 %95
%106 = OpLoad %9 %98
%107 = OpLoad %9 %101
%108 = OpLoad %9 %104
%109 = OpFMul %9 %105 %42
%110 = OpFMul %9 %106 %45
%111 = OpFMul %9 %107 %48
%112 = OpFMul %9 %108 %51
%114 = OpAccessChain %113 %28 %41
OpStore %114 %109
%115 = OpAccessChain %113 %28 %44
OpStore %115 %110
%116 = OpAccessChain %113 %28 %47
OpStore %116 %111
%117 = OpAccessChain %113 %28 %50
OpStore %117 %112
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/float4-array-load.bindless.root-constants.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 5, binding = 0, scalar) uniform BindlessCBV
{
    float _m0[16384];
} _15[];

layout(set = 5, binding = 0, std140) uniform _19_22
{
    vec4 _m0[4096];
} _22[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _91 = A % 6u;
    SV_Target.x = _15[registers._m5]._m0[_91 * 4u] * P.x;
    SV_Target.y = _15[registers._m5]._m0[(_91 * 4u) + 1u] * P.y;
    SV_Target.z = _15[registers._m5]._m0[(_91 * 4u) + 2u] * P.z;
    SV_Target.w = _15[registers._m5]._m0[(_91 * 4u) + 3u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 120
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %24 %26 %28
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "BindlessCBV"
OpName %19 "BindlessCBV"
OpName %24 "A"
OpName %26 "P"
OpName %28 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 4
OpDecorate %12 Block
OpMemberDecorate %12 0 Offset 0
OpDecorate %15 DescriptorSet 5
OpDecorate %15 Binding 0
OpDecorate %18 ArrayStride 16
OpDecorate %19 Block
OpMemberDecorate %19 0 Offset 0
OpDecorate %22 DescriptorSet 5
OpDecorate %22 Binding 0
OpDecorate %24 Flat
OpDecorate %24 Location 0
OpDecorate %26 Location 1
OpDecorate %28 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpConstant %5 16384
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer Uniform %13
%15 = OpVariable %14 Uniform
%16 = OpTypeVector %9 4
%17 = OpConstant %5 4096
%18 = OpTypeArray %16 %17
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypePointer Input %5
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %16
%26 = OpVariable %25 Input
%27 = OpTypePointer Output %16
%28 = OpVariable %27 Output
%29 = OpTypePointer Uniform %12
%31 = OpTypePointer PushConstant %5
%33 = OpConstant %5 5
%35 = OpTypePointer Uniform %19
%39 = OpTypePointer Input %9
%41 = OpConstant %5 0
%44 = OpConstant %5 1
%47 = OpConstant %5 2
%50 = OpConstant %5 3
%53 = OpTypePointer Uniform %16
%78 = OpConstant %5 4
%92 = OpConstant %5 6
%94 = OpTypePointer Uniform %9
%113 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %118
%118 = OpLabel
%32 = OpAccessChain %31 %8 %33
%34 = OpLoad %5 %32
%30 = OpAccessChain %29 %15 %34
%37 = OpAccessChain %31 %8 %33
%38 = OpLoad %5 %37
%36 = OpAccessChain %35 %22 %38
%40 = OpAccessChain %39 %26 %41
%42 = OpLoad %9 %40
%43 = OpAccessChain %39 %26 %44
%45 = OpLoad %9 %43
%46 = OpAccessChain %39 %26 %47
%48 = OpLoad %9 %46
%49 = OpAccessChain %39 %26 %50
%51 = OpLoad %9 %49
%52 = OpLoad %5 %24
%54 = OpAccessChain %53 %36 %41 %41
%55 = OpLoad %16 %54
%56 = OpCompositeExtract %9 %55 0
%57 = OpCompositeExtract %9 %55 1
%58 = OpCompositeExtract %9 %55 2
%59 = OpCompositeExtract %9 %55 3
%60 = OpAccessChain %53 %36 %41 %44
%61 = OpLoad %16 %60
%62 = OpCompositeExtract %9 %61 0
%63 = OpCompositeExtract %9 %61 1
%64 = OpCompositeExtract %9 %61 2
%65 = OpCompositeExtract %9 %61 3
%66 = OpAccessChain %53 %36 %41 %47
%67 = OpLoad %16 %66
%68 = OpCompositeExtract %9 %67 0
%69 = OpCompositeExtract %9 %67 1
%70 = OpCompositeExtract %9 %67 2
%71 = OpCompositeExtract %9 %67 3
%72 = OpAccessChain %53 %36 %41 %50
%73 = OpLoad %16 %72
%74 = OpCompositeExtract %9 %73 0
%75 = OpCompositeExtract %9 %73 1
%76 = OpCompositeExtract %9 %73 2
%77 = OpCompositeExtract %9 %73 3
%79 = OpAccessChain %53 %36 %41 %78
%80 = OpLoad %16 %79
%81 = OpCompositeExtract %9 %80 0
%82 = OpCompositeExtract %9 %80 1
%83 = OpCompositeExtract %9 %80 2
%84 = OpCompositeExtract %9 %80 3
%85 = OpAccessChain %53 %36 %41 %33
%86 = OpLoad %16 %85
%87 = OpCompositeExtract %9 %86 0
%88 = OpCompositeExtract %9 %86 1
%89 = OpCompositeExtract %9 %86 2
%90 = OpCompositeExtract %9 %86 3
%91 = OpUMod %5 %52 %92
%93 = OpIMul %5 %91 %78
%95 = OpAccessChain %94 %30 %41 %93
%96 = OpIMul %5 %91 %78
%97 = OpIAdd %5 %96 %44
%98 = OpAccessChain %94 %30 %41 %97
%99 = OpIMul %5 %91 %78
%100 = OpIAdd %5 %99 %47
%101 = OpAccessChain %94 %30 %41 %100
%102 = OpIMul %5 %91 %78
%103 = OpIAdd %5 %102 %50
%104 = OpAccessChain %94 %30 %41 %103
%105 = OpLoad %9 %95
%106 = OpLoad %9 %98
%107 = OpLoad %9 %101
%108 = OpLoad %9 %104
%109 = OpFMul %9 %105 %42
%110 = OpFMul %9 %106 %45
%111 = OpFMul %9 %107 %48
%112 = OpFMul %9 %108 %51
%114 = OpAccessChain %113 %28 %41
OpStore %114 %109
%115 = OpAccessChain %113 %28 %44
OpStore %115 %110
%116 = OpAccessChain %113 %28 %47
OpStore %116 %111
%117 = OpAccessChain %113 %28 %50
OpStore %117 %112
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/float4-array-load.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _9_11
{
    float _m0[24];
} _11;

layout(set = 0, binding = 0, std140) uniform _15_17
{
    vec4 _m0[6];
} _17;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _77 = A % 6u;
    SV_Target.x = _11._m0[_77 * 4u] * P.x;
    SV_Target.y = _11._m0[(_77 * 4u) + 1u] * P.y;
    SV_Target.z = _11._m0[(_77 * 4u) + 2u] * P.z;
    SV_Target.w = _11._m0[(_77 * 4u) + 3u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 105
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %21 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %9 ""
OpName %15 ""
OpName %19 "A"
OpName %21 "P"
OpName %23 "SV_Target"
OpDecorate %8 ArrayStride 4
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 Block
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %21 Location 1
OpDecorate %23 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 24
%7 = OpTypeFloat 32
%8 = OpTypeArray %7 %6
%9 = OpTypeStruct %8
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpConstant %5 6
%13 = OpTypeVector %7 4
%14 = OpTypeArray %13 %12
%15 = OpTypeStruct %14
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %13
%21 = OpVariable %20 Input
%22 = OpTypePointer Output %13
%23 = OpVariable %22 Output
%24 = OpTypePointer Input %7
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%32 = OpConstant %5 2
%35 = OpConstant %5 3
%38 = OpTypePointer Uniform %13
%63 = OpConstant %5 4
%70 = OpConstant %5 5
%79 = OpTypePointer Uniform %7
%98 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %103
%103 = OpLabel
%25 = OpAccessChain %24 %21 %26
%27 = OpLoad %7 %25
%28 = OpAccessChain %24 %21 %29
%30 = OpLoad %7 %28
%31 = OpAccessChain %24 %21 %32
%33 = OpLoad %7 %31
%34 = OpAccessChain %24 %21 %35
%36 = OpLoad %7 %34
%37 = OpLoad %5 %19
%39 = OpAccessChain %38 %17 %26 %26
%40 = OpLoad %13 %39
%41 = OpCompositeExtract %7 %40 0
%42 = OpCompositeExtract %7 %40 1
%43 = OpCompositeExtract %7 %40 2
%44 = OpCompositeExtract %7 %40 3
%45 = OpAccessChain %38 %17 %26 %29
%46 = OpLoad %13 %45
%47 = OpCompositeExtract %7 %46 0
%48 = OpCompositeExtract %7 %46 1
%49 = OpCompositeExtract %7 %46 2
%50 = OpCompositeExtract %7 %46 3
%51 = OpAccessChain %38 %17 %26 %32
%52 = OpLoad %13 %51
%53 = OpCompositeExtract %7 %52 0
%54 = OpCompositeExtract %7 %52 1
%55 = OpCompositeExtract %7 %52 2
%56 = OpCompositeExtract %7 %52 3
%57 = OpAccessChain %38 %17 %26 %35
%58 = OpLoad %13 %57
%59 = OpCompositeExtract %7 %58 0
%60 = OpCompositeExtract %7 %58 1
%61 = OpCompositeExtract %7 %58 2
%62 = OpCompositeExtract %7 %58 3
%64 = OpAccessChain %38 %17 %26 %63
%65 = OpLoad %13 %64
%66 = OpCompositeExtract %7 %65 0
%67 = OpCompositeExtract %7 %65 1
%68 = OpCompositeExtract %7 %65 2
%69 = OpCompositeExtract %7 %65 3
%71 = OpAccessChain %38 %17 %26 %70
%72 = OpLoad %13 %71
%73 = OpCompositeExtract %7 %72 0
%74 = OpCompositeExtract %7 %72 1
%75 = OpCompositeExtract %7 %72 2
%76 = OpCompositeExtract %7 %72 3
%77 = OpUMod %5 %37 %12
%78 = OpIMul %5 %77 %63
%80 = OpAccessChain %79 %11 %26 %78
%81 = OpIMul %5 %77 %63
%82 = OpIAdd %5 %81 %29
%83 = OpAccessChain %79 %11 %26 %82
%84 = OpIMul %5 %77 %63
%85 = OpIAdd %5 %84 %32
%86 = OpAccessChain %79 %11 %26 %85
%87 = OpIMul %5 %77 %63
%88 = OpIAdd %5 %87 %35
%89 = OpAccessChain %79 %11 %26 %88
%90 = OpLoad %7 %80
%91 = OpLoad %7 %83
%92 = OpLoad %7 %86
%93 = OpLoad %7 %89
%94 = OpFMul %7 %90 %27
%95 = OpFMul %7 %91 %30
%96 = OpFMul %7 %92 %33
%97 = OpFMul %7 %93 %36
%99 = OpAccessChain %98 %23 %26
OpStore %99 %94
%100 = OpAccessChain %98 %23 %29
OpStore %100 %95
%101 = OpAccessChain %98 %23 %32
OpStore %101 %96
%102 = OpAccessChain %98 %23 %35
OpStore %102 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/float4-array-load.root-constant.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 3, scalar) uniform _12_14
{
    float _m0[12];
} _14;

layout(set = 0, binding = 3, std140) uniform _18_20
{
    vec4 _m0[3];
} _20;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _61 = uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7));
    float _42[3];
    float _43[3];
    float _44[3];
    float _45[3];
    _42[0u] = _61.x;
    _43[0u] = _61.y;
    _44[0u] = _61.z;
    _45[0u] = _61.w;
    vec4 _84 = uintBitsToFloat(uvec4(registers._m8, registers._m9, registers._m10, registers._m11));
    _42[1u] = _84.x;
    _43[1u] = _84.y;
    _44[1u] = _84.z;
    _45[1u] = _84.w;
    vec4 _105 = uintBitsToFloat(uvec4(registers._m12, registers._m13, registers._m14, registers._m15));
    _42[2u] = _105.x;
    _43[2u] = _105.y;
    _44[2u] = _105.z;
    _45[2u] = _105.w;
    uint _133 = A % 3u;
    SV_Target.x = _14._m0[_133 * 4u] + (_42[_133] * P.x);
    SV_Target.y = _14._m0[(_133 * 4u) + 1u] + (_43[_133] * P.y);
    SV_Target.z = _14._m0[(_133 * 4u) + 2u] + (_44[_133] * P.z);
    SV_Target.w = _14._m0[(_133 * 4u) + 3u] + (_45[_133] * P.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 173
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %22 %24 %26
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 ""
OpName %18 ""
OpName %22 "A"
OpName %24 "P"
OpName %26 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %17 ArrayStride 16
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 3
OpDecorate %22 Flat
OpDecorate %22 Location 0
OpDecorate %24 Location 1
OpDecorate %26 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 12
%10 = OpTypeFloat 32
%11 = OpTypeArray %10 %9
%12 = OpTypeStruct %11
%13 = OpTypePointer Uniform %12
%14 = OpVariable %13 Uniform
%15 = OpConstant %5 3
%16 = OpTypeVector %10 4
%17 = OpTypeArray %16 %15
%18 = OpTypeStruct %17
%19 = OpTypePointer Uniform %18
%20 = OpVariable %19 Uniform
%21 = OpTypePointer Input %5
%22 = OpVariable %21 Input
%23 = OpTypePointer Input %16
%24 = OpVariable %23 Input
%25 = OpTypePointer Output %16
%26 = OpVariable %25 Output
%27 = OpTypePointer Input %10
%29 = OpConstant %5 0
%32 = OpConstant %5 1
%35 = OpConstant %5 2
%40 = OpTypeArray %10 %15
%41 = OpTypePointer Function %40
%46 = OpTypePointer PushConstant %5
%48 = OpConstant %5 4
%51 = OpConstant %5 5
%54 = OpConstant %5 6
%57 = OpConstant %5 7
%59 = OpTypeVector %5 4
%66 = OpTypePointer Function %10
%72 = OpConstant %5 8
%75 = OpConstant %5 9
%78 = OpConstant %5 10
%81 = OpConstant %5 11
%96 = OpConstant %5 13
%99 = OpConstant %5 14
%102 = OpConstant %5 15
%114 = OpTypePointer Uniform %16
%147 = OpTypePointer Uniform %10
%166 = OpTypePointer Output %10
%3 = OpFunction %1 None %2
%4 = OpLabel
%42 = OpVariable %41 Function
%43 = OpVariable %41 Function
%44 = OpVariable %41 Function
%45 = OpVariable %41 Function
OpBranch %171
%171 = OpLabel
%28 = OpAccessChain %27 %24 %29
%30 = OpLoad %10 %28
%31 = OpAccessChain %27 %24 %32
%33 = OpLoad %10 %31
%34 = OpAccessChain %27 %24 %35
%36 = OpLoad %10 %34
%37 = OpAccessChain %27 %24 %15
%38 = OpLoad %10 %37
%39 = OpLoad %5 %22
%47 = OpAccessChain %46 %8 %48
%49 = OpLoad %5 %47
%50 = OpAccessChain %46 %8 %51
%52 = OpLoad %5 %50
%53 = OpAccessChain %46 %8 %54
%55 = OpLoad %5 %53
%56 = OpAccessChain %46 %8 %57
%58 = OpLoad %5 %56
%60 = OpCompositeConstruct %59 %49 %52 %55 %58
%61 = OpBitcast %16 %60
%62 = OpCompositeExtract %10 %61 0
%63 = OpCompositeExtract %10 %61 1
%64 = OpCompositeExtract %10 %61 2
%65 = OpCompositeExtract %10 %61 3
%67 = OpAccessChain %66 %42 %29
%68 = OpAccessChain %66 %43 %29
%69 = OpAccessChain %66 %44 %29
%70 = OpAccessChain %66 %45 %29
OpStore %67 %62
OpStore %68 %63
OpStore %69 %64
OpStore %70 %65
%71 = OpAccessChain %46 %8 %72
%73 = OpLoad %5 %71
%74 = OpAccessChain %46 %8 %75
%76 = OpLoad %5 %74
%77 = OpAccessChain %46 %8 %78
%79 = OpLoad %5 %77
%80 = OpAccessChain %46 %8 %81
%82 = OpLoad %5 %80
%83 = OpCompositeConstruct %59 %73 %76 %79 %82
%84 = OpBitcast %16 %83
%85 = OpCompositeExtract %10 %84 0
%86 = OpCompositeExtract %10 %84 1
%87 = OpCompositeExtract %10 %84 2
%88 = OpCompositeExtract %10 %84 3
%89 = OpAccessChain %66 %42 %32
%90 = OpAccessChain %66 %43 %32
%91 = OpAccessChain %66 %44 %32
%92 = OpAccessChain %66 %45 %32
OpStore %89 %85
OpStore %90 %86
OpStore %91 %87
OpStore %92 %88
%93 = OpAccessChain %46 %8 %9
%94 = OpLoad %5 %93
%95 = OpAccessChain %46 %8 %96
%97 = OpLoad %5 %95
%98 = OpAccessChain %46 %8 %99
%100 = OpLoad %5 %98
%101 = OpAccessChain %46 %8 %102
%103 = OpLoad %5 %101
%104 = OpCompositeConstruct %59 %94 %97 %100 %103
%105 = OpBitcast %16 %104
%106 = OpCompositeExtract %10 %105 0
%107 = OpCompositeExtract %10 %105 1
%108 = OpCompositeExtract %10 %105 2
%109 = OpCompositeExtract %10 %105 3
%110 = OpAccessChain %66 %42 %35
%111 = OpAccessChain %66 %43 %35
%112 = OpAccessChain %66 %44 %35
%113 = OpAccessChain %66 %45 %35
OpStore %110 %106
OpStore %111 %107
OpStore %112 %108
OpStore %113 %109
%115 = OpAccessChain %114 %20 %29 %29
%116 = OpLoad %16 %115
%117 = OpCompositeExtract %10 %116 0
%118 = OpCompositeExtract %10 %116 1
%119 = OpCompositeExtract %10 %116 2
%120 = OpCompositeExtract %10 %116 3
%121 = OpAccessChain %114 %20 %29 %32
%122 = OpLoad %16 %121
%123 = OpCompositeExtract %10 %122 0
%124 = OpCompositeExtract %10 %122 1
%125 = OpCompositeExtract %10 %122 2
%126 = OpCompositeExtract %10 %122 3
%127 = OpAccessChain %114 %20 %29 %35
%128 = OpLoad %16 %127
%129 = OpCompositeExtract %10 %128 0
%130 = OpCompositeExtract %10 %128 1
%131 = OpCompositeExtract %10 %128 2
%132 = OpCompositeExtract %10 %128 3
%133 = OpUMod %5 %39 %15
%134 = OpAccessChain %66 %42 %133
%135 = OpAccessChain %66 %43 %133
%136 = OpAccessChain %66 %44 %133
%137 = OpAccessChain %66 %45 %133
%138 = OpLoad %10 %134
%139 = OpLoad %10 %135
%140 = OpLoad %10 %136
%141 = OpLoad %10 %137
%142 = OpFMul %10 %138 %30
%143 = OpFMul %10 %139 %33
%144 = OpFMul %10 %140 %36
%145 = OpFMul %10 %141 %38
%146 = OpIMul %5 %133 %48
%148 = OpAccessChain %147 %14 %29 %146
%149 = OpIMul %5 %133 %48
%150 = OpIAdd %5 %149 %32
%151 = OpAccessChain %147 %14 %29 %150
%152 = OpIMul %5 %133 %48
%153 = OpIAdd %5 %152 %35
%154 = OpAccessChain %147 %14 %29 %153
%155 = OpIMul %5 %133 %48
%156 = OpIAdd %5 %155 %15
%157 = OpAccessChain %147 %14 %29 %156
%158 = OpLoad %10 %148
%159 = OpLoad %10 %151
%160 = OpLoad %10 %154
%161 = OpLoad %10 %157
%162 = OpFAdd %10 %158 %142
%163 = OpFAdd %10 %159 %143
%164 = OpFAdd %10 %160 %144
%165 = OpFAdd %10 %161 %145
%167 = OpAccessChain %166 %26 %29
OpStore %167 %162
%168 = OpAccessChain %166 %26 %32
OpStore %168 %163
%169 = OpAccessChain %166 %26 %35
OpStore %169 %164
%170 = OpAccessChain %166 %26 %15
OpStore %170 %165
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/float4-array-load.root-descriptor.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloatNonWriteCBVArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteCBVArray
{
    float value[16384];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _84 = A % 6u;
    SV_Target.x = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[_84 * 4u] * P.x;
    SV_Target.y = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[(_84 * 4u) + 1u] * P.y;
    SV_Target.z = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[(_84 * 4u) + 2u] * P.z;
    SV_Target.w = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[(_84 * 4u) + 3u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 121
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %11 %15 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %11 "A"
OpName %15 "P"
OpName %17 "SV_Target"
OpName %37 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %37 0 "value"
OpName %89 "PhysicalPointerFloatNonWriteCBVArray"
OpMemberName %89 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 1
OpDecorate %17 Location 0
OpDecorate %36 ArrayStride 16
OpMemberDecorate %37 0 Offset 0
OpDecorate %37 Block
OpMemberDecorate %37 0 NonWritable
OpDecorate %88 ArrayStride 4
OpMemberDecorate %89 0 Offset 0
OpDecorate %89 Block
OpMemberDecorate %89 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypePointer Output %13
%17 = OpVariable %16 Output
%18 = OpTypePointer PushConstant %6
%20 = OpConstant %5 0
%22 = OpTypePointer Input %12
%26 = OpConstant %5 1
%29 = OpConstant %5 2
%32 = OpConstant %5 3
%35 = OpConstant %5 4096
%36 = OpTypeArray %13 %35
%37 = OpTypeStruct %36
%38 = OpTypePointer PhysicalStorageBuffer %37
%40 = OpTypePointer PhysicalStorageBuffer %13
%68 = OpConstant %5 4
%76 = OpConstant %5 5
%85 = OpConstant %5 6
%87 = OpConstant %5 16384
%88 = OpTypeArray %12 %87
%89 = OpTypeStruct %88
%90 = OpTypePointer PhysicalStorageBuffer %89
%92 = OpTypePointer PhysicalStorageBuffer %12
%114 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %119
%119 = OpLabel
%19 = OpAccessChain %18 %9 %20
%21 = OpLoad %6 %19
%23 = OpAccessChain %22 %15 %20
%24 = OpLoad %12 %23
%25 = OpAccessChain %22 %15 %26
%27 = OpLoad %12 %25
%28 = OpAccessChain %22 %15 %29
%30 = OpLoad %12 %28
%31 = OpAccessChain %22 %15 %32
%33 = OpLoad %12 %31
%34 = OpLoad %5 %11
%39 = OpBitcast %38 %21
%41 = OpInBoundsAccessChain %40 %39 %20 %20
%42 = OpLoad %13 %41 Aligned 16
%43 = OpCompositeExtract %12 %42 0
%44 = OpCompositeExtract %12 %42 1
%45 = OpCompositeExtract %12 %42 2
%46 = OpCompositeExtract %12 %42 3
%47 = OpBitcast %38 %21
%48 = OpInBoundsAccessChain %40 %47 %20 %26
%49 = OpLoad %13 %48 Aligned 16
%50 = OpCompositeExtract %12 %49 0
%51 = OpCompositeExtract %12 %49 1
%52 = OpCompositeExtract %12 %49 2
%53 = OpCompositeExtract %12 %49 3
%54 = OpBitcast %38 %21
%55 = OpInBoundsAccessChain %40 %54 %20 %29
%56 = OpLoad %13 %55 Aligned 16
%57 = OpCompositeExtract %12 %56 0
%58 = OpCompositeExtract %12 %56 1
%59 = OpCompositeExtract %12 %56 2
%60 = OpCompositeExtract %12 %56 3
%61 = OpBitcast %38 %21
%62 = OpInBoundsAccessChain %40 %61 %20 %32
%63 = OpLoad %13 %62 Aligned 16
%64 = OpCompositeExtract %12 %63 0
%65 = OpCompositeExtract %12 %63 1
%66 = OpCompositeExtract %12 %63 2
%67 = OpCompositeExtract %12 %63 3
%69 = OpBitcast %38 %21
%70 = OpInBoundsAccessChain %40 %69 %20 %68
%71 = OpLoad %13 %70 Aligned 16
%72 = OpCompositeExtract %12 %71 0
%73 = OpCompositeExtract %12 %71 1
%74 = OpCompositeExtract %12 %71 2
%75 = OpCompositeExtract %12 %71 3
%77 = OpBitcast %38 %21
%78 = OpInBoundsAccessChain %40 %77 %20 %76
%79 = OpLoad %13 %78 Aligned 16
%80 = OpCompositeExtract %12 %79 0
%81 = OpCompositeExtract %12 %79 1
%82 = OpCompositeExtract %12 %79 2
%83 = OpCompositeExtract %12 %79 3
%84 = OpUMod %5 %34 %85
%86 = OpIMul %5 %84 %68
%91 = OpBitcast %90 %21
%93 = OpInBoundsAccessChain %92 %91 %20 %86
%94 = OpIMul %5 %84 %68
%95 = OpIAdd %5 %94 %26
%96 = OpBitcast %90 %21
%97 = OpInBoundsAccessChain %92 %96 %20 %95
%98 = OpIMul %5 %84 %68
%99 = OpIAdd %5 %98 %29
%100 = OpBitcast %90 %21
%101 = OpInBoundsAccessChain %92 %100 %20 %99
%102 = OpIMul %5 %84 %68
%103 = OpIAdd %5 %102 %32
%104 = OpBitcast %90 %21
%105 = OpInBoundsAccessChain %92 %104 %20 %103
%106 = OpLoad %12 %93 Aligned 4
%107 = OpLoad %12 %97 Aligned 4
%108 = OpLoad %12 %101 Aligned 4
%109 = OpLoad %12 %105 Aligned 4
%110 = OpFMul %12 %106 %24
%111 = OpFMul %12 %107 %27
%112 = OpFMul %12 %108 %30
%113 = OpFMul %12 %109 %33
%115 = OpAccessChain %114 %17 %20
OpStore %115 %110
%116 = OpAccessChain %114 %17 %26
OpStore %116 %111
%117 = OpAccessChain %114 %17 %29
OpStore %117 %112
%118 = OpAccessChain %114 %17 %32
OpStore %118 %113
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/float4-array-load.root-descriptor.root-constants.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloatNonWriteCBVArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteCBVArray
{
    float value[16384];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _84 = A % 6u;
    SV_Target.x = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[_84 * 4u] * P.x;
    SV_Target.y = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[(_84 * 4u) + 1u] * P.y;
    SV_Target.z = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[(_84 * 4u) + 2u] * P.z;
    SV_Target.w = PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[(_84 * 4u) + 3u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 121
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %11 %15 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %11 "A"
OpName %15 "P"
OpName %17 "SV_Target"
OpName %37 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %37 0 "value"
OpName %89 "PhysicalPointerFloatNonWriteCBVArray"
OpMemberName %89 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 1
OpDecorate %17 Location 0
OpDecorate %36 ArrayStride 16
OpMemberDecorate %37 0 Offset 0
OpDecorate %37 Block
OpMemberDecorate %37 0 NonWritable
OpDecorate %88 ArrayStride 4
OpMemberDecorate %89 0 Offset 0
OpDecorate %89 Block
OpMemberDecorate %89 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypePointer Output %13
%17 = OpVariable %16 Output
%18 = OpTypePointer PushConstant %6
%20 = OpConstant %5 0
%22 = OpTypePointer Input %12
%26 = OpConstant %5 1
%29 = OpConstant %5 2
%32 = OpConstant %5 3
%35 = OpConstant %5 4096
%36 = OpTypeArray %13 %35
%37 = OpTypeStruct %36
%38 = OpTypePointer PhysicalStorageBuffer %37
%40 = OpTypePointer PhysicalStorageBuffer %13
%68 = OpConstant %5 4
%76 = OpConstant %5 5
%85 = OpConstant %5 6
%87 = OpConstant %5 16384
%88 = OpTypeArray %12 %87
%89 = OpTypeStruct %88
%90 = OpTypePointer PhysicalStorageBuffer %89
%92 = OpTypePointer PhysicalStorageBuffer %12
%114 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %119
%119 = OpLabel
%19 = OpAccessChain %18 %9 %20
%21 = OpLoad %6 %19
%23 = OpAccessChain %22 %15 %20
%24 = OpLoad %12 %23
%25 = OpAccessChain %22 %15 %26
%27 = OpLoad %12 %25
%28 = OpAccessChain %22 %15 %29
%30 = OpLoad %12 %28
%31 = OpAccessChain %22 %15 %32
%33 = OpLoad %12 %31
%34 = OpLoad %5 %11
%39 = OpBitcast %38 %21
%41 = OpInBoundsAccessChain %40 %39 %20 %20
%42 = OpLoad %13 %41 Aligned 16
%43 = OpCompositeExtract %12 %42 0
%44 = OpCompositeExtract %12 %42 1
%45 = OpCompositeExtract %12 %42 2
%46 = OpCompositeExtract %12 %42 3
%47 = OpBitcast %38 %21
%48 = OpInBoundsAccessChain %40 %47 %20 %26
%49 = OpLoad %13 %48 Aligned 16
%50 = OpCompositeExtract %12 %49 0
%51 = OpCompositeExtract %12 %49 1
%52 = OpCompositeExtract %12 %49 2
%53 = OpCompositeExtract %12 %49 3
%54 = OpBitcast %38 %21
%55 = OpInBoundsAccessChain %40 %54 %20 %29
%56 = OpLoad %13 %55 Aligned 16
%57 = OpCompositeExtract %12 %56 0
%58 = OpCompositeExtract %12 %56 1
%59 = OpCompositeExtract %12 %56 2
%60 = OpCompositeExtract %12 %56 3
%61 = OpBitcast %38 %21
%62 = OpInBoundsAccessChain %40 %61 %20 %32
%63 = OpLoad %13 %62 Aligned 16
%64 = OpCompositeExtract %12 %63 0
%65 = OpCompositeExtract %12 %63 1
%66 = OpCompositeExtract %12 %63 2
%67 = OpCompositeExtract %12 %63 3
%69 = OpBitcast %38 %21
%70 = OpInBoundsAccessChain %40 %69 %20 %68
%71 = OpLoad %13 %70 Aligned 16
%72 = OpCompositeExtract %12 %71 0
%73 = OpCompositeExtract %12 %71 1
%74 = OpCompositeExtract %12 %71 2
%75 = OpCompositeExtract %12 %71 3
%77 = OpBitcast %38 %21
%78 = OpInBoundsAccessChain %40 %77 %20 %76
%79 = OpLoad %13 %78 Aligned 16
%80 = OpCompositeExtract %12 %79 0
%81 = OpCompositeExtract %12 %79 1
%82 = OpCompositeExtract %12 %79 2
%83 = OpCompositeExtract %12 %79 3
%84 = OpUMod %5 %34 %85
%86 = OpIMul %5 %84 %68
%91 = OpBitcast %90 %21
%93 = OpInBoundsAccessChain %92 %91 %20 %86
%94 = OpIMul %5 %84 %68
%95 = OpIAdd %5 %94 %26
%96 = OpBitcast %90 %21
%97 = OpInBoundsAccessChain %92 %96 %20 %95
%98 = OpIMul %5 %84 %68
%99 = OpIAdd %5 %98 %29
%100 = OpBitcast %90 %21
%101 = OpInBoundsAccessChain %92 %100 %20 %99
%102 = OpIMul %5 %84 %68
%103 = OpIAdd %5 %102 %32
%104 = OpBitcast %90 %21
%105 = OpInBoundsAccessChain %92 %104 %20 %103
%106 = OpLoad %12 %93 Aligned 4
%107 = OpLoad %12 %97 Aligned 4
%108 = OpLoad %12 %101 Aligned 4
%109 = OpLoad %12 %105 Aligned 4
%110 = OpFMul %12 %106 %24
%111 = OpFMul %12 %107 %27
%112 = OpFMul %12 %108 %30
%113 = OpFMul %12 %109 %33
%115 = OpAccessChain %114 %17 %20
OpStore %115 %110
%116 = OpAccessChain %114 %17 %26
OpStore %116 %111
%117 = OpAccessChain %114 %17 %29
OpStore %117 %112
%118 = OpAccessChain %114 %17 %32
OpStore %118 %113
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/load-different.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _9_11
{
    float _m0[4];
} _11;

layout(set = 0, binding = 0, std140) uniform _15_17
{
    vec4 _m0[1];
} _17;

layout(set = 0, binding = 1, std140) uniform _19_21
{
    vec4 _m0[1];
} _21;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _43[4];
    _43[0u] = _17._m0[0u].x;
    _43[1u] = _17._m0[0u].y;
    _43[2u] = _21._m0[0u].z;
    _43[3u] = _21._m0[0u].w;
    uint _60 = A & 3u;
    float _66 = _43[_60] * _11._m0[_60];
    SV_Target.x = _66 * P.x;
    SV_Target.y = _66 * P.y;
    SV_Target.z = _66 * P.z;
    SV_Target.w = _66 * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 78
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %25 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %9 ""
OpName %15 ""
OpName %19 ""
OpName %23 "A"
OpName %25 "P"
OpName %27 "SV_Target"
OpDecorate %8 ArrayStride 4
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 Block
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %18 ArrayStride 16
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 1
OpDecorate %23 Flat
OpDecorate %23 Location 0
OpDecorate %25 Location 1
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 4
%7 = OpTypeFloat 32
%8 = OpTypeArray %7 %6
%9 = OpTypeStruct %8
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpConstant %5 1
%13 = OpTypeVector %7 4
%14 = OpTypeArray %13 %12
%15 = OpTypeStruct %14
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypeArray %13 %12
%19 = OpTypeStruct %18
%20 = OpTypePointer Uniform %19
%21 = OpVariable %20 Uniform
%22 = OpTypePointer Input %5
%23 = OpVariable %22 Input
%24 = OpTypePointer Input %13
%25 = OpVariable %24 Input
%26 = OpTypePointer Output %13
%27 = OpVariable %26 Output
%28 = OpTypePointer Input %7
%30 = OpConstant %5 0
%35 = OpConstant %5 2
%38 = OpConstant %5 3
%41 = OpTypeArray %7 %6
%42 = OpTypePointer Function %41
%44 = OpTypePointer Uniform %13
%51 = OpTypePointer Function %7
%61 = OpTypePointer Uniform %7
%71 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
%43 = OpVariable %42 Function
OpBranch %76
%76 = OpLabel
%29 = OpAccessChain %28 %25 %30
%31 = OpLoad %7 %29
%32 = OpAccessChain %28 %25 %12
%33 = OpLoad %7 %32
%34 = OpAccessChain %28 %25 %35
%36 = OpLoad %7 %34
%37 = OpAccessChain %28 %25 %38
%39 = OpLoad %7 %37
%40 = OpLoad %5 %23
%45 = OpAccessChain %44 %17 %30 %30
%46 = OpLoad %13 %45
%47 = OpCompositeExtract %7 %46 0
%48 = OpCompositeExtract %7 %46 1
%49 = OpCompositeExtract %7 %46 2
%50 = OpCompositeExtract %7 %46 3
%52 = OpInBoundsAccessChain %51 %43 %30
OpStore %52 %47
%53 = OpInBoundsAccessChain %51 %43 %12
OpStore %53 %48
%54 = OpAccessChain %44 %21 %30 %30
%55 = OpLoad %13 %54
%56 = OpCompositeExtract %7 %55 2
%57 = OpInBoundsAccessChain %51 %43 %35
OpStore %57 %56
%58 = OpCompositeExtract %7 %55 3
%59 = OpInBoundsAccessChain %51 %43 %38
OpStore %59 %58
%60 = OpBitwiseAnd %5 %40 %38
%62 = OpAccessChain %61 %11 %30 %60
%63 = OpLoad %7 %62
%64 = OpInBoundsAccessChain %51 %43 %60
%65 = OpLoad %7 %64
%66 = OpFMul %7 %65 %63
%67 = OpFMul %7 %66 %31
%68 = OpFMul %7 %66 %33
%69 = OpFMul %7 %66 %36
%70 = OpFMul %7 %66 %39
%72 = OpAccessChain %71 %27 %30
OpStore %72 %67
%73 = OpAccessChain %71 %27 %12
OpStore %73 %68
%74 = OpAccessChain %71 %27 %35
OpStore %74 %69
%75 = OpAccessChain %71 %27 %38
OpStore %75 %70
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/local-root-constants.local-root-signature.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloatNonWriteCBVArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteCBVArray
{
    float value[16384];
};

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(set = 0, binding = 0) uniform writeonly image2D RWIMG;

void main()
{
    vec4 _43 = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u]));
    float _21[4];
    _21[0u] = _43.x;
    _21[1u] = _43.y;
    _21[2u] = _43.z;
    _21[3u] = _43.w;
    imageStore(RWIMG, ivec2(uvec2(gl_LaunchIDEXT.x, gl_LaunchIDEXT.y)), vec4(PhysicalPointerFloatNonWriteCBVArray(SBT._m6).value[gl_LaunchIDEXT.y & 3u] * _21[gl_LaunchIDEXT.x & 3u]));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %13 %17 %67
OpName %3 "main"
OpName %11 "SBTBlock"
OpName %13 "SBT"
OpName %17 "RWIMG"
OpName %55 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %55 0 "value"
OpName %79 "PhysicalPointerFloatNonWriteCBVArray"
OpMemberName %79 0 "value"
OpDecorate %7 ArrayStride 4
OpDecorate %9 ArrayStride 4
OpDecorate %11 Block
OpMemberDecorate %11 0 Offset 0
OpMemberDecorate %11 1 Offset 20
OpMemberDecorate %11 2 Offset 48
OpMemberDecorate %11 3 Offset 56
OpMemberDecorate %11 4 Offset 64
OpMemberDecorate %11 5 Offset 72
OpMemberDecorate %11 6 Offset 80
OpMemberDecorate %11 7 Offset 88
OpMemberDecorate %11 8 Offset 96
OpMemberDecorate %11 9 Offset 104
OpMemberDecorate %11 10 Offset 112
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 NonReadable
OpDecorate %54 ArrayStride 16
OpMemberDecorate %55 0 Offset 0
OpDecorate %55 Block
OpMemberDecorate %55 0 NonWritable
OpDecorate %67 BuiltIn LaunchIdKHR
OpDecorate %78 ArrayStride 4
OpMemberDecorate %79 0 Offset 0
OpDecorate %79 Block
OpMemberDecorate %79 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeArray %5 %6
%8 = OpConstant %5 6
%9 = OpTypeArray %5 %8
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %7 %9 %10 %10 %10 %10 %10 %10 %10 %10 %10
%12 = OpTypePointer ShaderRecordBufferKHR %11
%13 = OpVariable %12 ShaderRecordBufferKHR
%14 = OpTypeFloat 32
%15 = OpTypeImage %14 2D 0 0 0 2 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpConstant %5 4
%19 = OpTypeArray %14 %18
%20 = OpTypePointer Function %19
%22 = OpTypePointer ShaderRecordBufferKHR %10
%25 = OpTypePointer ShaderRecordBufferKHR %7
%27 = OpConstant %5 0
%28 = OpTypePointer ShaderRecordBufferKHR %5
%32 = OpConstant %5 1
%35 = OpConstant %5 2
%38 = OpConstant %5 3
%40 = OpTypeVector %5 4
%42 = OpTypeVector %14 4
%45 = OpTypePointer Function %14
%53 = OpConstant %5 4096
%54 = OpTypeArray %42 %53
%55 = OpTypeStruct %54
%56 = OpTypePointer PhysicalStorageBuffer %55
%58 = OpTypePointer PhysicalStorageBuffer %42
%65 = OpTypeVector %5 3
%66 = OpTypePointer Input %65
%67 = OpVariable %66 Input
%68 = OpTypePointer Input %5
%77 = OpConstant %5 16384
%78 = OpTypeArray %14 %77
%79 = OpTypeStruct %78
%80 = OpTypePointer PhysicalStorageBuffer %79
%82 = OpTypePointer PhysicalStorageBuffer %14
%3 = OpFunction %1 None %2
%4 = OpLabel
%21 = OpVariable %20 Function
OpBranch %89
%89 = OpLabel
%23 = OpAccessChain %22 %13 %8
%24 = OpLoad %10 %23
%26 = OpAccessChain %25 %13 %27
%29 = OpAccessChain %28 %26 %27
%30 = OpLoad %5 %29
%31 = OpAccessChain %28 %26 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %28 %26 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %28 %26 %38
%39 = OpLoad %5 %37
%41 = OpCompositeConstruct %40 %30 %33 %36 %39
%43 = OpBitcast %42 %41
%44 = OpCompositeExtract %14 %43 0
%46 = OpInBoundsAccessChain %45 %21 %27
OpStore %46 %44
%47 = OpCompositeExtract %14 %43 1
%48 = OpInBoundsAccessChain %45 %21 %32
OpStore %48 %47
%49 = OpCompositeExtract %14 %43 2
%50 = OpInBoundsAccessChain %45 %21 %35
OpStore %50 %49
%51 = OpCompositeExtract %14 %43 3
%52 = OpInBoundsAccessChain %45 %21 %38
OpStore %52 %51
%57 = OpBitcast %56 %24
%59 = OpInBoundsAccessChain %58 %57 %27 %27
%60 = OpLoad %42 %59 Aligned 16
%61 = OpCompositeExtract %14 %60 0
%62 = OpCompositeExtract %14 %60 1
%63 = OpCompositeExtract %14 %60 2
%64 = OpCompositeExtract %14 %60 3
%69 = OpAccessChain %68 %67 %27
%70 = OpLoad %5 %69
%71 = OpAccessChain %68 %67 %32
%72 = OpLoad %5 %71
%73 = OpBitwiseAnd %5 %70 %38
%74 = OpInBoundsAccessChain %45 %21 %73
%75 = OpLoad %14 %74
%76 = OpBitwiseAnd %5 %72 %38
%81 = OpBitcast %80 %24
%83 = OpInBoundsAccessChain %82 %81 %27 %76
%84 = OpLoad %14 %83 Aligned 4
%85 = OpFMul %14 %84 %75
%86 = OpLoad %15 %17
%87 = OpCompositeConstruct %10 %70 %72
%88 = OpCompositeConstruct %42 %85 %85 %85 %85
OpImageWrite %86 %87 %88
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/matrix-load.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _9_11
{
    float _m0[104];
} _11;

layout(set = 0, binding = 0, std140) uniform _15_17
{
    vec4 _m0[26];
} _17;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _121 = A % 3u;
    SV_Target.x = _11._m0[(2u + (_121 * 16u)) + 8u] * P.x;
    SV_Target.y = _11._m0[(6u + (_121 * 16u)) + 8u] * P.y;
    SV_Target.z = _11._m0[(10u + (_121 * 16u)) + 8u] * P.z;
    SV_Target.w = _11._m0[(14u + (_121 * 16u)) + 8u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 156
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %21 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %9 ""
OpName %15 ""
OpName %19 "A"
OpName %21 "P"
OpName %23 "SV_Target"
OpDecorate %8 ArrayStride 4
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 Block
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %21 Location 1
OpDecorate %23 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 104
%7 = OpTypeFloat 32
%8 = OpTypeArray %7 %6
%9 = OpTypeStruct %8
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpConstant %5 26
%13 = OpTypeVector %7 4
%14 = OpTypeArray %13 %12
%15 = OpTypeStruct %14
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %13
%21 = OpVariable %20 Input
%22 = OpTypePointer Output %13
%23 = OpVariable %22 Output
%24 = OpTypePointer Input %7
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%32 = OpConstant %5 2
%35 = OpConstant %5 3
%38 = OpTypePointer Uniform %13
%51 = OpConstant %5 4
%58 = OpConstant %5 5
%65 = OpConstant %5 6
%72 = OpConstant %5 7
%79 = OpConstant %5 8
%86 = OpConstant %5 9
%93 = OpConstant %5 10
%100 = OpConstant %5 11
%107 = OpConstant %5 12
%114 = OpConstant %5 13
%123 = OpConstant %5 16
%126 = OpTypePointer Uniform %7
%141 = OpConstant %5 14
%149 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %154
%154 = OpLabel
%25 = OpAccessChain %24 %21 %26
%27 = OpLoad %7 %25
%28 = OpAccessChain %24 %21 %29
%30 = OpLoad %7 %28
%31 = OpAccessChain %24 %21 %32
%33 = OpLoad %7 %31
%34 = OpAccessChain %24 %21 %35
%36 = OpLoad %7 %34
%37 = OpLoad %5 %19
%39 = OpAccessChain %38 %17 %26 %32
%40 = OpLoad %13 %39
%41 = OpCompositeExtract %7 %40 0
%42 = OpCompositeExtract %7 %40 1
%43 = OpCompositeExtract %7 %40 2
%44 = OpCompositeExtract %7 %40 3
%45 = OpAccessChain %38 %17 %26 %35
%46 = OpLoad %13 %45
%47 = OpCompositeExtract %7 %46 0
%48 = OpCompositeExtract %7 %46 1
%49 = OpCompositeExtract %7 %46 2
%50 = OpCompositeExtract %7 %46 3
%52 = OpAccessChain %38 %17 %26 %51
%53 = OpLoad %13 %52
%54 = OpCompositeExtract %7 %53 0
%55 = OpCompositeExtract %7 %53 1
%56 = OpCompositeExtract %7 %53 2
%57 = OpCompositeExtract %7 %53 3
%59 = OpAccessChain %38 %17 %26 %58
%60 = OpLoad %13 %59
%61 = OpCompositeExtract %7 %60 0
%62 = OpCompositeExtract %7 %60 1
%63 = OpCompositeExtract %7 %60 2
%64 = OpCompositeExtract %7 %60 3
%66 = OpAccessChain %38 %17 %26 %65
%67 = OpLoad %13 %66
%68 = OpCompositeExtract %7 %67 0
%69 = OpCompositeExtract %7 %67 1
%70 = OpCompositeExtract %7 %67 2
%71 = OpCompositeExtract %7 %67 3
%73 = OpAccessChain %38 %17 %26 %72
%74 = OpLoad %13 %73
%75 = OpCompositeExtract %7 %74 0
%76 = OpCompositeExtract %7 %74 1
%77 = OpCompositeExtract %7 %74 2
%78 = OpCompositeExtract %7 %74 3
%80 = OpAccessChain %38 %17 %26 %79
%81 = OpLoad %13 %80
%82 = OpCompositeExtract %7 %81 0
%83 = OpCompositeExtract %7 %81 1
%84 = OpCompositeExtract %7 %81 2
%85 = OpCompositeExtract %7 %81 3
%87 = OpAccessChain %38 %17 %26 %86
%88 = OpLoad %13 %87
%89 = OpCompositeExtract %7 %88 0
%90 = OpCompositeExtract %7 %88 1
%91 = OpCompositeExtract %7 %88 2
%92 = OpCompositeExtract %7 %88 3
%94 = OpAccessChain %38 %17 %26 %93
%95 = OpLoad %13 %94
%96 = OpCompositeExtract %7 %95 0
%97 = OpCompositeExtract %7 %95 1
%98 = OpCompositeExtract %7 %95 2
%99 = OpCompositeExtract %7 %95 3
%101 = OpAccessChain %38 %17 %26 %100
%102 = OpLoad %13 %101
%103 = OpCompositeExtract %7 %102 0
%104 = OpCompositeExtract %7 %102 1
%105 = OpCompositeExtract %7 %102 2
%106 = OpCompositeExtract %7 %102 3
%108 = OpAccessChain %38 %17 %26 %107
%109 = OpLoad %13 %108
%110 = OpCompositeExtract %7 %109 0
%111 = OpCompositeExtract %7 %109 1
%112 = OpCompositeExtract %7 %109 2
%113 = OpCompositeExtract %7 %109 3
%115 = OpAccessChain %38 %17 %26 %114
%116 = OpLoad %13 %115
%117 = OpCompositeExtract %7 %116 0
%118 = OpCompositeExtract %7 %116 1
%119 = OpCompositeExtract %7 %116 2
%120 = OpCompositeExtract %7 %116 3
%121 = OpUMod %5 %37 %35
%122 = OpIMul %5 %121 %123
%124 = OpIAdd %5 %32 %122
%125 = OpIAdd %5 %124 %79
%127 = OpAccessChain %126 %11 %26 %125
%128 = OpLoad %7 %127
%129 = OpIMul %5 %121 %123
%130 = OpIAdd %5 %65 %129
%131 = OpIAdd %5 %130 %79
%132 = OpAccessChain %126 %11 %26 %131
%133 = OpLoad %7 %132
%134 = OpIMul %5 %121 %123
%135 = OpIAdd %5 %93 %134
%136 = OpIAdd %5 %135 %79
%137 = OpAccessChain %126 %11 %26 %136
%138 = OpLoad %7 %137
%139 = OpIMul %5 %121 %123
%140 = OpIAdd %5 %141 %139
%142 = OpIAdd %5 %140 %79
%143 = OpAccessChain %126 %11 %26 %142
%144 = OpLoad %7 %143
%145 = OpFMul %7 %128 %27
%146 = OpFMul %7 %133 %30
%147 = OpFMul %7 %138 %33
%148 = OpFMul %7 %144 %36
%150 = OpAccessChain %149 %23 %26
OpStore %150 %145
%151 = OpAccessChain %149 %23 %29
OpStore %151 %146
%152 = OpAccessChain %149 %23 %32
OpStore %152 %147
%153 = OpAccessChain %149 %23 %35
OpStore %153 %148
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/missing-first.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[6];
} _12;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _35[6];
    float _36[6];
    float _37[6];
    float _38[6];
    _35[1u] = _12._m0[1u].x;
    _36[1u] = _12._m0[1u].y;
    _37[1u] = _12._m0[1u].z;
    _38[1u] = _12._m0[1u].w;
    _35[2u] = _12._m0[2u].x;
    _36[2u] = _12._m0[2u].y;
    _37[2u] = _12._m0[2u].z;
    _38[2u] = _12._m0[2u].w;
    _35[3u] = _12._m0[3u].x;
    _36[3u] = _12._m0[3u].y;
    _37[3u] = _12._m0[3u].z;
    _38[3u] = _12._m0[3u].w;
    _35[4u] = _12._m0[4u].x;
    _36[4u] = _12._m0[4u].y;
    _37[4u] = _12._m0[4u].z;
    _38[4u] = _12._m0[4u].w;
    _35[5u] = _12._m0[5u].x;
    _36[5u] = _12._m0[5u].y;
    _37[5u] = _12._m0[5u].z;
    _38[5u] = _12._m0[5u].w;
    uint _93 = A % 6u;
    SV_Target.x = _35[_93] * P.x;
    SV_Target.y = _36[_93] * P.y;
    SV_Target.z = _37[_93] * P.z;
    SV_Target.w = _38[_93] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 113
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "A"
OpName %16 "P"
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %16 Location 1
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 6
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %8
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %8
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %7
%21 = OpConstant %5 0
%24 = OpConstant %5 1
%27 = OpConstant %5 2
%30 = OpConstant %5 3
%33 = OpTypeArray %7 %6
%34 = OpTypePointer Function %33
%39 = OpTypePointer Uniform %8
%46 = OpTypePointer Function %7
%71 = OpConstant %5 4
%82 = OpConstant %5 5
%106 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
%35 = OpVariable %34 Function
%36 = OpVariable %34 Function
%37 = OpVariable %34 Function
%38 = OpVariable %34 Function
OpBranch %111
%111 = OpLabel
%20 = OpAccessChain %19 %16 %21
%22 = OpLoad %7 %20
%23 = OpAccessChain %19 %16 %24
%25 = OpLoad %7 %23
%26 = OpAccessChain %19 %16 %27
%28 = OpLoad %7 %26
%29 = OpAccessChain %19 %16 %30
%31 = OpLoad %7 %29
%32 = OpLoad %5 %14
%40 = OpAccessChain %39 %12 %21 %24
%41 = OpLoad %8 %40
%42 = OpCompositeExtract %7 %41 0
%43 = OpCompositeExtract %7 %41 1
%44 = OpCompositeExtract %7 %41 2
%45 = OpCompositeExtract %7 %41 3
%47 = OpAccessChain %46 %35 %24
%48 = OpAccessChain %46 %36 %24
%49 = OpAccessChain %46 %37 %24
%50 = OpAccessChain %46 %38 %24
OpStore %47 %42
OpStore %48 %43
OpStore %49 %44
OpStore %50 %45
%51 = OpAccessChain %39 %12 %21 %27
%52 = OpLoad %8 %51
%53 = OpCompositeExtract %7 %52 0
%54 = OpCompositeExtract %7 %52 1
%55 = OpCompositeExtract %7 %52 2
%56 = OpCompositeExtract %7 %52 3
%57 = OpAccessChain %46 %35 %27
%58 = OpAccessChain %46 %36 %27
%59 = OpAccessChain %46 %37 %27
%60 = OpAccessChain %46 %38 %27
OpStore %57 %53
OpStore %58 %54
OpStore %59 %55
OpStore %60 %56
%61 = OpAccessChain %39 %12 %21 %30
%62 = OpLoad %8 %61
%63 = OpCompositeExtract %7 %62 0
%64 = OpCompositeExtract %7 %62 1
%65 = OpCompositeExtract %7 %62 2
%66 = OpCompositeExtract %7 %62 3
%67 = OpAccessChain %46 %35 %30
%68 = OpAccessChain %46 %36 %30
%69 = OpAccessChain %46 %37 %30
%70 = OpAccessChain %46 %38 %30
OpStore %67 %63
OpStore %68 %64
OpStore %69 %65
OpStore %70 %66
%72 = OpAccessChain %39 %12 %21 %71
%73 = OpLoad %8 %72
%74 = OpCompositeExtract %7 %73 0
%75 = OpCompositeExtract %7 %73 1
%76 = OpCompositeExtract %7 %73 2
%77 = OpCompositeExtract %7 %73 3
%78 = OpAccessChain %46 %35 %71
%79 = OpAccessChain %46 %36 %71
%80 = OpAccessChain %46 %37 %71
%81 = OpAccessChain %46 %38 %71
OpStore %78 %74
OpStore %79 %75
OpStore %80 %76
OpStore %81 %77
%83 = OpAccessChain %39 %12 %21 %82
%84 = OpLoad %8 %83
%85 = OpCompositeExtract %7 %84 0
%86 = OpCompositeExtract %7 %84 1
%87 = OpCompositeExtract %7 %84 2
%88 = OpCompositeExtract %7 %84 3
%89 = OpAccessChain %46 %35 %82
%90 = OpAccessChain %46 %36 %82
%91 = OpAccessChain %46 %37 %82
%92 = OpAccessChain %46 %38 %82
OpStore %89 %85
OpStore %90 %86
OpStore %91 %87
OpStore %92 %88
%93 = OpUMod %5 %32 %6
%94 = OpAccessChain %46 %35 %93
%95 = OpAccessChain %46 %36 %93
%96 = OpAccessChain %46 %37 %93
%97 = OpAccessChain %46 %38 %93
%98 = OpLoad %7 %94
%99 = OpLoad %7 %95
%100 = OpLoad %7 %96
%101 = OpLoad %7 %97
%102 = OpFMul %7 %98 %22
%103 = OpFMul %7 %99 %25
%104 = OpFMul %7 %100 %28
%105 = OpFMul %7 %101 %31
%107 = OpAccessChain %106 %18 %21
OpStore %107 %102
%108 = OpAccessChain %106 %18 %24
OpStore %108 %103
%109 = OpAccessChain %106 %18 %27
OpStore %109 %104
%110 = OpAccessChain %106 %18 %30
OpStore %110 %105
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/missing-last-element.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[6];
} _12;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _35[6];
    float _36[6];
    float _37[6];
    float _38[6];
    _35[0u] = _12._m0[0u].x;
    _36[0u] = _12._m0[0u].y;
    _37[0u] = _12._m0[0u].z;
    _38[0u] = _12._m0[0u].w;
    _35[1u] = _12._m0[1u].x;
    _36[1u] = _12._m0[1u].y;
    _37[1u] = _12._m0[1u].z;
    _38[1u] = _12._m0[1u].w;
    _35[2u] = _12._m0[2u].x;
    _36[2u] = _12._m0[2u].y;
    _37[2u] = _12._m0[2u].z;
    _38[2u] = _12._m0[2u].w;
    _35[3u] = _12._m0[3u].x;
    _36[3u] = _12._m0[3u].y;
    _37[3u] = _12._m0[3u].z;
    _38[3u] = _12._m0[3u].w;
    _35[4u] = _12._m0[4u].x;
    _36[4u] = _12._m0[4u].y;
    _37[4u] = _12._m0[4u].z;
    _38[4u] = _12._m0[4u].w;
    uint _92 = A % 6u;
    SV_Target.x = _35[_92] * P.x;
    SV_Target.y = _36[_92] * P.y;
    SV_Target.z = _37[_92] * P.z;
    SV_Target.w = _38[_92] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 112
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "A"
OpName %16 "P"
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %16 Location 1
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 6
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %8
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %8
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %7
%21 = OpConstant %5 0
%24 = OpConstant %5 1
%27 = OpConstant %5 2
%30 = OpConstant %5 3
%33 = OpTypeArray %7 %6
%34 = OpTypePointer Function %33
%39 = OpTypePointer Uniform %8
%46 = OpTypePointer Function %7
%81 = OpConstant %5 4
%105 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
%35 = OpVariable %34 Function
%36 = OpVariable %34 Function
%37 = OpVariable %34 Function
%38 = OpVariable %34 Function
OpBranch %110
%110 = OpLabel
%20 = OpAccessChain %19 %16 %21
%22 = OpLoad %7 %20
%23 = OpAccessChain %19 %16 %24
%25 = OpLoad %7 %23
%26 = OpAccessChain %19 %16 %27
%28 = OpLoad %7 %26
%29 = OpAccessChain %19 %16 %30
%31 = OpLoad %7 %29
%32 = OpLoad %5 %14
%40 = OpAccessChain %39 %12 %21 %21
%41 = OpLoad %8 %40
%42 = OpCompositeExtract %7 %41 0
%43 = OpCompositeExtract %7 %41 1
%44 = OpCompositeExtract %7 %41 2
%45 = OpCompositeExtract %7 %41 3
%47 = OpAccessChain %46 %35 %21
%48 = OpAccessChain %46 %36 %21
%49 = OpAccessChain %46 %37 %21
%50 = OpAccessChain %46 %38 %21
OpStore %47 %42
OpStore %48 %43
OpStore %49 %44
OpStore %50 %45
%51 = OpAccessChain %39 %12 %21 %24
%52 = OpLoad %8 %51
%53 = OpCompositeExtract %7 %52 0
%54 = OpCompositeExtract %7 %52 1
%55 = OpCompositeExtract %7 %52 2
%56 = OpCompositeExtract %7 %52 3
%57 = OpAccessChain %46 %35 %24
%58 = OpAccessChain %46 %36 %24
%59 = OpAccessChain %46 %37 %24
%60 = OpAccessChain %46 %38 %24
OpStore %57 %53
OpStore %58 %54
OpStore %59 %55
OpStore %60 %56
%61 = OpAccessChain %39 %12 %21 %27
%62 = OpLoad %8 %61
%63 = OpCompositeExtract %7 %62 0
%64 = OpCompositeExtract %7 %62 1
%65 = OpCompositeExtract %7 %62 2
%66 = OpCompositeExtract %7 %62 3
%67 = OpAccessChain %46 %35 %27
%68 = OpAccessChain %46 %36 %27
%69 = OpAccessChain %46 %37 %27
%70 = OpAccessChain %46 %38 %27
OpStore %67 %63
OpStore %68 %64
OpStore %69 %65
OpStore %70 %66
%71 = OpAccessChain %39 %12 %21 %30
%72 = OpLoad %8 %71
%73 = OpCompositeExtract %7 %72 0
%74 = OpCompositeExtract %7 %72 1
%75 = OpCompositeExtract %7 %72 2
%76 = OpCompositeExtract %7 %72 3
%77 = OpAccessChain %46 %35 %30
%78 = OpAccessChain %46 %36 %30
%79 = OpAccessChain %46 %37 %30
%80 = OpAccessChain %46 %38 %30
OpStore %77 %73
OpStore %78 %74
OpStore %79 %75
OpStore %80 %76
%82 = OpAccessChain %39 %12 %21 %81
%83 = OpLoad %8 %82
%84 = OpCompositeExtract %7 %83 0
%85 = OpCompositeExtract %7 %83 1
%86 = OpCompositeExtract %7 %83 2
%87 = OpCompositeExtract %7 %83 3
%88 = OpAccessChain %46 %35 %81
%89 = OpAccessChain %46 %36 %81
%90 = OpAccessChain %46 %37 %81
%91 = OpAccessChain %46 %38 %81
OpStore %88 %84
OpStore %89 %85
OpStore %90 %86
OpStore %91 %87
%92 = OpUMod %5 %32 %6
%93 = OpAccessChain %46 %35 %92
%94 = OpAccessChain %46 %36 %92
%95 = OpAccessChain %46 %37 %92
%96 = OpAccessChain %46 %38 %92
%97 = OpLoad %7 %93
%98 = OpLoad %7 %94
%99 = OpLoad %7 %95
%100 = OpLoad %7 %96
%101 = OpFMul %7 %97 %22
%102 = OpFMul %7 %98 %25
%103 = OpFMul %7 %99 %28
%104 = OpFMul %7 %100 %31
%106 = OpAccessChain %105 %18 %21
OpStore %106 %101
%107 = OpAccessChain %105 %18 %24
OpStore %107 %102
%108 = OpAccessChain %105 %18 %27
OpStore %108 %103
%109 = OpAccessChain %105 %18 %30
OpStore %109 %104
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/out-of-order-load.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _9_11
{
    float _m0[24];
} _11;

layout(set = 0, binding = 0, std140) uniform _15_17
{
    vec4 _m0[6];
} _17;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _58 = A % 6u;
    SV_Target.x = _11._m0[_58 * 4u] * P.x;
    SV_Target.y = _11._m0[(_58 * 4u) + 1u] * P.y;
    SV_Target.z = _11._m0[(_58 * 4u) + 2u] * P.z;
    SV_Target.w = _11._m0[(_58 * 4u) + 3u] * P.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %21 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %9 ""
OpName %15 ""
OpName %19 "A"
OpName %21 "P"
OpName %23 "SV_Target"
OpDecorate %8 ArrayStride 4
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 Block
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %21 Location 1
OpDecorate %23 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 24
%7 = OpTypeFloat 32
%8 = OpTypeArray %7 %6
%9 = OpTypeStruct %8
%10 = OpTypePointer Uniform %9
%11 = OpVariable %10 Uniform
%12 = OpConstant %5 6
%13 = OpTypeVector %7 4
%14 = OpTypeArray %13 %12
%15 = OpTypeStruct %14
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %13
%21 = OpVariable %20 Input
%22 = OpTypePointer Output %13
%23 = OpVariable %22 Output
%24 = OpTypePointer Input %7
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%32 = OpConstant %5 2
%35 = OpConstant %5 3
%38 = OpTypePointer Uniform %13
%45 = OpConstant %5 5
%60 = OpConstant %5 4
%61 = OpTypePointer Uniform %7
%80 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %85
%85 = OpLabel
%25 = OpAccessChain %24 %21 %26
%27 = OpLoad %7 %25
%28 = OpAccessChain %24 %21 %29
%30 = OpLoad %7 %28
%31 = OpAccessChain %24 %21 %32
%33 = OpLoad %7 %31
%34 = OpAccessChain %24 %21 %35
%36 = OpLoad %7 %34
%37 = OpLoad %5 %19
%39 = OpAccessChain %38 %17 %26 %26
%40 = OpLoad %13 %39
%41 = OpCompositeExtract %7 %40 0
%42 = OpCompositeExtract %7 %40 1
%43 = OpCompositeExtract %7 %40 2
%44 = OpCompositeExtract %7 %40 3
%46 = OpAccessChain %38 %17 %26 %45
%47 = OpLoad %13 %46
%48 = OpCompositeExtract %7 %47 0
%49 = OpCompositeExtract %7 %47 1
%50 = OpCompositeExtract %7 %47 2
%51 = OpCompositeExtract %7 %47 3
%52 = OpAccessChain %38 %17 %26 %35
%53 = OpLoad %13 %52
%54 = OpCompositeExtract %7 %53 0
%55 = OpCompositeExtract %7 %53 1
%56 = OpCompositeExtract %7 %53 2
%57 = OpCompositeExtract %7 %53 3
%58 = OpUMod %5 %37 %12
%59 = OpIMul %5 %58 %60
%62 = OpAccessChain %61 %11 %26 %59
%63 = OpIMul %5 %58 %60
%64 = OpIAdd %5 %63 %29
%65 = OpAccessChain %61 %11 %26 %64
%66 = OpIMul %5 %58 %60
%67 = OpIAdd %5 %66 %32
%68 = OpAccessChain %61 %11 %26 %67
%69 = OpIMul %5 %58 %60
%70 = OpIAdd %5 %69 %35
%71 = OpAccessChain %61 %11 %26 %70
%72 = OpLoad %7 %62
%73 = OpLoad %7 %65
%74 = OpLoad %7 %68
%75 = OpLoad %7 %71
%76 = OpFMul %7 %72 %27
%77 = OpFMul %7 %73 %30
%78 = OpFMul %7 %74 %33
%79 = OpFMul %7 %75 %36
%81 = OpAccessChain %80 %23 %26
OpStore %81 %76
%82 = OpAccessChain %80 %23 %29
OpStore %82 %77
%83 = OpAccessChain %80 %23 %32
OpStore %83 %78
%84 = OpAccessChain %80 %23 %35
OpStore %84 %79
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/store-after-load.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[6];
} _12;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _35[6];
    float _36[6];
    float _37[6];
    float _38[6];
    _35[0u] = _12._m0[0u].x;
    _36[0u] = _12._m0[0u].y;
    _37[0u] = _12._m0[0u].z;
    _38[0u] = _12._m0[0u].w;
    _35[1u] = _12._m0[1u].x;
    _36[1u] = _12._m0[1u].y;
    _37[1u] = _12._m0[1u].z;
    _38[1u] = _12._m0[1u].w;
    _35[2u] = _12._m0[2u].x;
    _36[2u] = _12._m0[2u].y;
    _37[2u] = _12._m0[2u].z;
    _38[2u] = _12._m0[2u].w;
    uint _71 = A % 3u;
    float _76 = _35[_71];
    float _77 = _36[_71];
    float _78 = _37[_71];
    float _79 = _38[_71];
    _35[3u] = _12._m0[3u].x;
    _36[3u] = _12._m0[3u].y;
    _37[3u] = _12._m0[3u].z;
    _38[3u] = _12._m0[3u].w;
    _35[4u] = _12._m0[4u].x;
    _36[4u] = _12._m0[4u].y;
    _37[4u] = _12._m0[4u].z;
    _38[4u] = _12._m0[4u].w;
    _35[5u] = _12._m0[5u].x;
    _36[5u] = _12._m0[5u].y;
    _37[5u] = _12._m0[5u].z;
    _38[5u] = _12._m0[5u].w;
    uint _112 = A % 6u;
    SV_Target.x = (_35[_112] * P.x) + _76;
    SV_Target.y = (_36[_112] * P.y) + _77;
    SV_Target.z = (_37[_112] * P.z) + _78;
    SV_Target.w = (_38[_112] * P.w) + _79;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 136
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "A"
OpName %16 "P"
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %16 Location 1
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 6
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %8
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %8
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %7
%21 = OpConstant %5 0
%24 = OpConstant %5 1
%27 = OpConstant %5 2
%30 = OpConstant %5 3
%33 = OpTypeArray %7 %6
%34 = OpTypePointer Function %33
%39 = OpTypePointer Uniform %8
%46 = OpTypePointer Function %7
%90 = OpConstant %5 4
%101 = OpConstant %5 5
%129 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
%35 = OpVariable %34 Function
%36 = OpVariable %34 Function
%37 = OpVariable %34 Function
%38 = OpVariable %34 Function
OpBranch %134
%134 = OpLabel
%20 = OpAccessChain %19 %16 %21
%22 = OpLoad %7 %20
%23 = OpAccessChain %19 %16 %24
%25 = OpLoad %7 %23
%26 = OpAccessChain %19 %16 %27
%28 = OpLoad %7 %26
%29 = OpAccessChain %19 %16 %30
%31 = OpLoad %7 %29
%32 = OpLoad %5 %14
%40 = OpAccessChain %39 %12 %21 %21
%41 = OpLoad %8 %40
%42 = OpCompositeExtract %7 %41 0
%43 = OpCompositeExtract %7 %41 1
%44 = OpCompositeExtract %7 %41 2
%45 = OpCompositeExtract %7 %41 3
%47 = OpAccessChain %46 %35 %21
%48 = OpAccessChain %46 %36 %21
%49 = OpAccessChain %46 %37 %21
%50 = OpAccessChain %46 %38 %21
OpStore %47 %42
OpStore %48 %43
OpStore %49 %44
OpStore %50 %45
%51 = OpAccessChain %39 %12 %21 %24
%52 = OpLoad %8 %51
%53 = OpCompositeExtract %7 %52 0
%54 = OpCompositeExtract %7 %52 1
%55 = OpCompositeExtract %7 %52 2
%56 = OpCompositeExtract %7 %52 3
%57 = OpAccessChain %46 %35 %24
%58 = OpAccessChain %46 %36 %24
%59 = OpAccessChain %46 %37 %24
%60 = OpAccessChain %46 %38 %24
OpStore %57 %53
OpStore %58 %54
OpStore %59 %55
OpStore %60 %56
%61 = OpAccessChain %39 %12 %21 %27
%62 = OpLoad %8 %61
%63 = OpCompositeExtract %7 %62 0
%64 = OpCompositeExtract %7 %62 1
%65 = OpCompositeExtract %7 %62 2
%66 = OpCompositeExtract %7 %62 3
%67 = OpAccessChain %46 %35 %27
%68 = OpAccessChain %46 %36 %27
%69 = OpAccessChain %46 %37 %27
%70 = OpAccessChain %46 %38 %27
OpStore %67 %63
OpStore %68 %64
OpStore %69 %65
OpStore %70 %66
%71 = OpUMod %5 %32 %30
%72 = OpAccessChain %46 %35 %71
%73 = OpAccessChain %46 %36 %71
%74 = OpAccessChain %46 %37 %71
%75 = OpAccessChain %46 %38 %71
%76 = OpLoad %7 %72
%77 = OpLoad %7 %73
%78 = OpLoad %7 %74
%79 = OpLoad %7 %75
%80 = OpAccessChain %39 %12 %21 %30
%81 = OpLoad %8 %80
%82 = OpCompositeExtract %7 %81 0
%83 = OpCompositeExtract %7 %81 1
%84 = OpCompositeExtract %7 %81 2
%85 = OpCompositeExtract %7 %81 3
%86 = OpAccessChain %46 %35 %30
%87 = OpAccessChain %46 %36 %30
%88 = OpAccessChain %46 %37 %30
%89 = OpAccessChain %46 %38 %30
OpStore %86 %82
OpStore %87 %83
OpStore %88 %84
OpStore %89 %85
%91 = OpAccessChain %39 %12 %21 %90
%92 = OpLoad %8 %91
%93 = OpCompositeExtract %7 %92 0
%94 = OpCompositeExtract %7 %92 1
%95 = OpCompositeExtract %7 %92 2
%96 = OpCompositeExtract %7 %92 3
%97 = OpAccessChain %46 %35 %90
%98 = OpAccessChain %46 %36 %90
%99 = OpAccessChain %46 %37 %90
%100 = OpAccessChain %46 %38 %90
OpStore %97 %93
OpStore %98 %94
OpStore %99 %95
OpStore %100 %96
%102 = OpAccessChain %39 %12 %21 %101
%103 = OpLoad %8 %102
%104 = OpCompositeExtract %7 %103 0
%105 = OpCompositeExtract %7 %103 1
%106 = OpCompositeExtract %7 %103 2
%107 = OpCompositeExtract %7 %103 3
%108 = OpAccessChain %46 %35 %101
%109 = OpAccessChain %46 %36 %101
%110 = OpAccessChain %46 %37 %101
%111 = OpAccessChain %46 %38 %101
OpStore %108 %104
OpStore %109 %105
OpStore %110 %106
OpStore %111 %107
%112 = OpUMod %5 %32 %6
%113 = OpAccessChain %46 %35 %112
%114 = OpAccessChain %46 %36 %112
%115 = OpAccessChain %46 %37 %112
%116 = OpAccessChain %46 %38 %112
%117 = OpLoad %7 %113
%118 = OpLoad %7 %114
%119 = OpLoad %7 %115
%120 = OpLoad %7 %116
%121 = OpFMul %7 %117 %22
%122 = OpFMul %7 %118 %25
%123 = OpFMul %7 %119 %28
%124 = OpFMul %7 %120 %31
%125 = OpFAdd %7 %121 %76
%126 = OpFAdd %7 %122 %77
%127 = OpFAdd %7 %123 %78
%128 = OpFAdd %7 %124 %79
%130 = OpAccessChain %129 %18 %21
OpStore %130 %125
%131 = OpAccessChain %129 %18 %24
OpStore %131 %126
%132 = OpAccessChain %129 %18 %27
OpStore %132 %127
%133 = OpAccessChain %129 %18 %30
OpStore %133 %128
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/alloca-opts/uint4-array-load.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _8_10
{
    uint _m0[24];
} _10;

layout(set = 0, binding = 0, std140) uniform _15_17
{
    vec4 _m0[6];
} _17;

layout(location = 0) flat in uint A;
layout(location = 1) in vec4 P;
layout(location = 0) out uvec4 SV_Target;

void main()
{
    uint _84 = A % 6u;
    SV_Target.x = uint(float(_10._m0[_84 * 4u]) * P.x);
    SV_Target.y = uint(float(_10._m0[(_84 * 4u) + 1u]) * P.y);
    SV_Target.z = uint(float(_10._m0[(_84 * 4u) + 2u]) * P.z);
    SV_Target.w = uint(float(_10._m0[(_84 * 4u) + 3u]) * P.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 120
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %21 %24
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 ""
OpName %15 ""
OpName %19 "A"
OpName %21 "P"
OpName %24 "SV_Target"
OpDecorate %7 ArrayStride 4
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %21 Location 1
OpDecorate %24 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 24
%7 = OpTypeArray %5 %6
%8 = OpTypeStruct %7
%9 = OpTypePointer Uniform %8
%10 = OpVariable %9 Uniform
%11 = OpConstant %5 6
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypeArray %13 %11
%15 = OpTypeStruct %14
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %13
%21 = OpVariable %20 Input
%22 = OpTypeVector %5 4
%23 = OpTypePointer Output %22
%24 = OpVariable %23 Output
%25 = OpTypePointer Input %12
%27 = OpConstant %5 0
%30 = OpConstant %5 1
%33 = OpConstant %5 2
%36 = OpConstant %5 3
%39 = OpTypePointer Uniform %13
%68 = OpConstant %5 4
%76 = OpConstant %5 5
%86 = OpTypePointer Uniform %5
%113 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %118
%118 = OpLabel
%26 = OpAccessChain %25 %21 %27
%28 = OpLoad %12 %26
%29 = OpAccessChain %25 %21 %30
%31 = OpLoad %12 %29
%32 = OpAccessChain %25 %21 %33
%34 = OpLoad %12 %32
%35 = OpAccessChain %25 %21 %36
%37 = OpLoad %12 %35
%38 = OpLoad %5 %19
%40 = OpAccessChain %39 %17 %27 %27
%41 = OpLoad %13 %40
%42 = OpBitcast %22 %41
%43 = OpCompositeExtract %5 %42 0
%44 = OpCompositeExtract %5 %42 1
%45 = OpCompositeExtract %5 %42 2
%46 = OpCompositeExtract %5 %42 3
%47 = OpAccessChain %39 %17 %27 %30
%48 = OpLoad %13 %47
%49 = OpBitcast %22 %48
%50 = OpCompositeExtract %5 %49 0
%51 = OpCompositeExtract %5 %49 1
%52 = OpCompositeExtract %5 %49 2
%53 = OpCompositeExtract %5 %49 3
%54 = OpAccessChain %39 %17 %27 %33
%55 = OpLoad %13 %54
%56 = OpBitcast %22 %55
%57 = OpCompositeExtract %5 %56 0
%58 = OpCompositeExtract %5 %56 1
%59 = OpCompositeExtract %5 %56 2
%60 = OpCompositeExtract %5 %56 3
%61 = OpAccessChain %39 %17 %27 %36
%62 = OpLoad %13 %61
%63 = OpBitcast %22 %62
%64 = OpCompositeExtract %5 %63 0
%65 = OpCompositeExtract %5 %63 1
%66 = OpCompositeExtract %5 %63 2
%67 = OpCompositeExtract %5 %63 3
%69 = OpAccessChain %39 %17 %27 %68
%70 = OpLoad %13 %69
%71 = OpBitcast %22 %70
%72 = OpCompositeExtract %5 %71 0
%73 = OpCompositeExtract %5 %71 1
%74 = OpCompositeExtract %5 %71 2
%75 = OpCompositeExtract %5 %71 3
%77 = OpAccessChain %39 %17 %27 %76
%78 = OpLoad %13 %77
%79 = OpBitcast %22 %78
%80 = OpCompositeExtract %5 %79 0
%81 = OpCompositeExtract %5 %79 1
%82 = OpCompositeExtract %5 %79 2
%83 = OpCompositeExtract %5 %79 3
%84 = OpUMod %5 %38 %11
%85 = OpIMul %5 %84 %68
%87 = OpAccessChain %86 %10 %27 %85
%88 = OpIMul %5 %84 %68
%89 = OpIAdd %5 %88 %30
%90 = OpAccessChain %86 %10 %27 %89
%91 = OpIMul %5 %84 %68
%92 = OpIAdd %5 %91 %33
%93 = OpAccessChain %86 %10 %27 %92
%94 = OpIMul %5 %84 %68
%95 = OpIAdd %5 %94 %36
%96 = OpAccessChain %86 %10 %27 %95
%97 = OpLoad %5 %87
%98 = OpLoad %5 %90
%99 = OpLoad %5 %93
%100 = OpLoad %5 %96
%101 = OpConvertUToF %12 %97
%102 = OpConvertUToF %12 %98
%103 = OpConvertUToF %12 %99
%104 = OpConvertUToF %12 %100
%105 = OpFMul %12 %101 %28
%106 = OpFMul %12 %102 %31
%107 = OpFMul %12 %103 %34
%108 = OpFMul %12 %104 %37
%109 = OpConvertFToU %5 %105
%110 = OpConvertFToU %5 %106
%111 = OpConvertFToU %5 %107
%112 = OpConvertFToU %5 %108
%114 = OpAccessChain %113 %24 %27
OpStore %114 %109
%115 = OpAccessChain %113 %24 %30
OpStore %115 %110
%116 = OpAccessChain %113 %24 %33
OpStore %116 %111
%117 = OpAccessChain %113 %24 %36
OpStore %117 %112
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/bfi.bc.dxil
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[1];
} _12;

layout(location = 0) out uvec4 SV_Target;

void main()
{
    uvec4 _20 = floatBitsToUint(_12._m0[0u]);
    uint _27 = _20.y & 31u;
    uint _32 = bitfieldInsert(_20.w, _20.z, int(_27), int(min((_20.x & 31u), (32u - _27))));
    SV_Target.x = _32;
    SV_Target.y = _32;
    SV_Target.z = _32;
    SV_Target.w = _32;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
%31 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %15 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%16 = OpConstant %5 0
%17 = OpTypePointer Uniform %8
%26 = OpConstant %5 31
%29 = OpConstant %5 32
%33 = OpTypePointer Output %5
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%18 = OpAccessChain %17 %12 %16 %16
%19 = OpLoad %8 %18
%20 = OpBitcast %13 %19
%21 = OpCompositeExtract %5 %20 0
%22 = OpCompositeExtract %5 %20 1
%23 = OpCompositeExtract %5 %20 2
%24 = OpCompositeExtract %5 %20 3
%25 = OpBitwiseAnd %5 %21 %26
%27 = OpBitwiseAnd %5 %22 %26
%28 = OpISub %5 %29 %27
%30 = OpExtInst %5 %31 UMin %25 %28
%32 = OpBitFieldInsert %5 %24 %23 %27 %30
%34 = OpAccessChain %33 %15 %16
OpStore %34 %32
%35 = OpAccessChain %33 %15 %6
OpStore %35 %32
%36 = OpAccessChain %33 %15 %37
OpStore %36 %32
%38 = OpAccessChain %33 %15 %39
OpStore %38 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform BindlessCBV
{
    float _m0[16384];
} _12[];

layout(set = 0, binding = 0, std140) uniform _16_19
{
    vec4 _m0[4096];
} _19[];

layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _34 = floatBitsToUint(_12[1u]._m0[1u]) >> 2u;
    SV_Target.x = _12[0u]._m0[_34];
    SV_Target.y = _12[0u]._m0[_34];
    SV_Target.z = _12[0u]._m0[_34];
    SV_Target.w = _12[0u]._m0[_34];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %9 "BindlessCBV"
OpName %16 "BindlessCBV"
OpName %21 "SV_Target"
OpDecorate %8 ArrayStride 4
OpDecorate %9 Block
OpMemberDecorate %9 0 Offset 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %15 ArrayStride 16
OpDecorate %16 Block
OpMemberDecorate %16 0 Offset 0
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %21 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 16384
%8 = OpTypeArray %5 %7
%9 = OpTypeStruct %8
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeVector %5 4
%14 = OpConstant %6 4096
%15 = OpTypeArray %13 %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer Uniform %17
%19 = OpVariable %18 Uniform
%20 = OpTypePointer Output %13
%21 = OpVariable %20 Output
%22 = OpTypePointer Uniform %9
%24 = OpConstant %6 0
%26 = OpConstant %6 1
%27 = OpTypePointer Uniform %16
%29 = OpConstant %6 2
%30 = OpTypePointer Uniform %5
%37 = OpTypePointer Output %5
%42 = OpConstant %6 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%23 = OpAccessChain %22 %12 %24
%25 = OpAccessChain %22 %12 %26
%28 = OpAccessChain %27 %19 %29
%31 = OpAccessChain %30 %25 %24 %26
%32 = OpLoad %5 %31
%33 = OpBitcast %6 %32
%34 = OpShiftRightLogical %6 %33 %29
%35 = OpAccessChain %30 %23 %24 %34
%36 = OpLoad %5 %35
%38 = OpAccessChain %37 %21 %24
OpStore %38 %36
%39 = OpAccessChain %37 %21 %26
OpStore %39 %36
%40 = OpAccessChain %37 %21 %29
OpStore %40 %36
%41 = OpAccessChain %37 %21 %42
OpStore %41 %36
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform BindlessCBV
{
    float _m0[16384];
} _12[];

layout(set = 0, binding = 0, scalar) uniform _16_19
{
    float16_t _m0[32768];
} _19[];

layout(set = 0, binding = 0, scalar) uniform _23_26
{
    uint64_t _m0[8192];
} _26[];

layout(location = 0) out vec4 SV_Target;

void main()
{
    float _51 = (_12[0u]._m0[1u] + float(_19[1u]._m0[1u])) + float(_26[2u]._m0[1u]);
    SV_Target.x = _51;
    SV_Target.y = _51;
    SV_Target.z = _51;
    SV_Target.w = _51;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability UniformAndStorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %29
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %9 "BindlessCBV"
OpName %16 "BindlessCBV"
OpName %23 "BindlessCBV"
OpName %29 "SV_Target"
OpDecorate %8 ArrayStride 4
OpDecorate %9 Block
OpMemberDecorate %9 0 Offset 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %15 ArrayStride 2
OpDecorate %16 Block
OpMemberDecorate %16 0 Offset 0
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %22 ArrayStride 8
OpDecorate %23 Block
OpMemberDecorate %23 0 Offset 0
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 16384
%8 = OpTypeArray %5 %7
%9 = OpTypeStruct %8
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeFloat 16
%14 = OpConstant %6 32768
%15 = OpTypeArray %13 %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer Uniform %17
%19 = OpVariable %18 Uniform
%20 = OpTypeInt 64 0
%21 = OpConstant %6 8192
%22 = OpTypeArray %20 %21
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer Uniform %24
%26 = OpVariable %25 Uniform
%27 = OpTypeVector %5 4
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%30 = OpTypePointer Uniform %9
%32 = OpConstant %6 0
%33 = OpTypePointer Uniform %16
%35 = OpConstant %6 1
%36 = OpTypePointer Uniform %23
%38 = OpConstant %6 2
%39 = OpTypePointer Uniform %5
%42 = OpTypePointer Uniform %13
%45 = OpTypePointer Uniform %20
%52 = OpTypePointer Output %5
%57 = OpConstant %6 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%31 = OpAccessChain %30 %12 %32
%34 = OpAccessChain %33 %19 %35
%37 = OpAccessChain %36 %26 %38
%40 = OpAccessChain %39 %31 %32 %35
%41 = OpLoad %5 %40
%43 = OpAccessChain %42 %34 %32 %35
%44 = OpLoad %13 %43
%46 = OpAccessChain %45 %37 %32 %35
%47 = OpLoad %20 %46
%48 = OpFConvert %5 %44
%49 = OpConvertUToF %5 %47
%50 = OpFAdd %5 %41 %48
%51 = OpFAdd %5 %50 %49
%53 = OpAccessChain %52 %29 %32
OpStore %53 %51
%54 = OpAccessChain %52 %29 %35
OpStore %54 %51
%55 = OpAccessChain %52 %29 %38
OpStore %55 %51
%56 = OpAccessChain %52 %29 %57
OpStore %56 %51
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/constant-struct-aggregate.bc.dxil
================================================
#version 460

struct _13
{
    float _m0;
    float _m1;
};

const _13 _27[4] = _13[](_13(1.0, 2.0), _13(3.0, 4.0), _13(5.0, 6.0), _13(7.0, 8.0));

layout(location = 0) in float F;
layout(location = 0) out vec2 SV_Target;

void main()
{
    SV_Target.x = _27[uint(int(F)) & 3u]._m1;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "F"
OpName %10 "SV_Target"
OpName %13 ""
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 2
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%11 = OpTypeInt 32 0
%12 = OpConstant %11 4
%13 = OpTypeStruct %5 %5
%14 = OpTypeArray %13 %12
%15 = OpConstant %5 1
%16 = OpConstant %5 2
%17 = OpConstantComposite %13 %15 %16
%18 = OpConstant %5 3
%19 = OpConstant %5 4
%20 = OpConstantComposite %13 %18 %19
%21 = OpConstant %5 5
%22 = OpConstant %5 6
%23 = OpConstantComposite %13 %21 %22
%24 = OpConstant %5 7
%25 = OpConstant %5 8
%26 = OpConstantComposite %13 %24 %25
%27 = OpConstantComposite %14 %17 %20 %23 %26
%28 = OpTypePointer Private %14
%29 = OpVariable %28 Private %27
%33 = OpConstant %11 3
%34 = OpTypePointer Private %5
%36 = OpConstant %11 1
%38 = OpTypePointer Output %5
%40 = OpConstant %11 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %41
%41 = OpLabel
%30 = OpLoad %5 %7
%31 = OpConvertFToS %11 %30
%32 = OpBitwiseAnd %11 %31 %33
%35 = OpAccessChain %34 %29 %32 %36
%37 = OpLoad %5 %35
%39 = OpAccessChain %38 %10 %40
OpStore %39 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/control-flow-multi-break-with-non-idom-loop-header.dxil
================================================
#version 460
layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) readonly buffer _11_13
{
    uint _m0[];
} _13;

void _someFn_YAXI_Z(uint _15)
{
    uint _21 = atomicAdd(_9._m0[0u], 1u);
    _9._m0[_21] = _15;
}

uint _getBranchTarget_YAII_Z(uint _25)
{
    return _13._m0[_25];
}

void main()
{
    uint _32;
    bool selector_5;
    uint _31 = 0u;
    for (;;)
    {
        _32 = 17u;
        _someFn_YAXI_Z(_32);
        _31++;
        _32 = 18u;
        _someFn_YAXI_Z(_32);
        if (_getBranchTarget_YAII_Z(_32) == 20u)
        {
            selector_5 = false;
            break;
        }
        _32 = 21u;
        _someFn_YAXI_Z(_32);
        if (!(_getBranchTarget_YAII_Z(_32) == 32u))
        {
            _32 = 24u;
            _someFn_YAXI_Z(_32);
            if (_getBranchTarget_YAII_Z(_32) == 30u)
            {
                selector_5 = true;
                break;
            }
            _32 = 43u;
            _someFn_YAXI_Z(_32);
            if (_getBranchTarget_YAII_Z(_32) == 30u)
            {
                selector_5 = true;
                break;
            }
        }
        _32 = 32u;
        _someFn_YAXI_Z(_32);
        if (int(_31) < int(99u))
        {
            continue;
        }
        else
        {
            selector_5 = false;
            break;
        }
    }
    if (selector_5)
    {
        _32 = 30u;
        _someFn_YAXI_Z(_32);
    }
    else
    {
        _32 = 20u;
        _someFn_YAXI_Z(_32);
    }
    _32 = 23u;
    _someFn_YAXI_Z(_32);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 8 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %16 "?someFn@@YAXI@Z"
OpName %26 "?getBranchTarget@@YAII@Z"
OpName %78 "selector_5"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFunction %1 %5
%18 = OpConstant %5 0
%19 = OpTypePointer StorageBuffer %5
%22 = OpConstant %5 1
%24 = OpTypeFunction %5 %5
%30 = OpTypePointer Function %5
%33 = OpConstant %5 17
%38 = OpConstant %5 18
%43 = OpTypeBool
%45 = OpConstant %5 20
%48 = OpConstant %5 21
%54 = OpConstant %5 32
%55 = OpConstant %5 23
%62 = OpConstant %5 99
%63 = OpConstant %5 24
%69 = OpConstant %5 30
%72 = OpConstant %5 43
%79 = OpConstantFalse %43
%80 = OpConstantTrue %43
%3 = OpFunction %1 None %2
%4 = OpLabel
%31 = OpVariable %30 Function
%32 = OpVariable %30 Function
OpBranch %81
%81 = OpLabel
OpStore %31 %18
OpBranch %82
%82 = OpLabel
OpStore %32 %33
%34 = OpLoad %5 %32
%35 = OpFunctionCall %1 %16 %34
%36 = OpLoad %5 %31
%37 = OpIAdd %5 %36 %22
OpStore %31 %37
OpLoopMerge %90 %89 None
OpBranch %83
%83 = OpLabel
OpStore %32 %38
%39 = OpLoad %5 %32
%40 = OpFunctionCall %1 %16 %39
%41 = OpLoad %5 %32
%42 = OpFunctionCall %5 %26 %41
%44 = OpIEqual %43 %42 %45
OpSelectionMerge %84 None
OpBranchConditional %44 %90 %84
%84 = OpLabel
OpStore %32 %48
%49 = OpLoad %5 %32
%50 = OpFunctionCall %1 %16 %49
%51 = OpLoad %5 %32
%52 = OpFunctionCall %5 %26 %51
%53 = OpIEqual %43 %52 %54
OpSelectionMerge %88 None
OpBranchConditional %53 %88 %85
%85 = OpLabel
OpStore %32 %63
%64 = OpLoad %5 %32
%65 = OpFunctionCall %1 %16 %64
%66 = OpLoad %5 %32
%67 = OpFunctionCall %5 %26 %66
%68 = OpIEqual %43 %67 %69
OpSelectionMerge %86 None
OpBranchConditional %68 %90 %86
%86 = OpLabel
OpStore %32 %72
%73 = OpLoad %5 %32
%74 = OpFunctionCall %1 %16 %73
%75 = OpLoad %5 %32
%76 = OpFunctionCall %5 %26 %75
%77 = OpIEqual %43 %76 %69
OpSelectionMerge %87 None
OpBranchConditional %77 %90 %87
%87 = OpLabel
OpBranch %88
%88 = OpLabel
OpBranch %89
%89 = OpLabel
OpStore %32 %54
%58 = OpLoad %5 %32
%59 = OpFunctionCall %1 %16 %58
%60 = OpLoad %5 %31
%61 = OpSLessThan %43 %60 %62
OpBranchConditional %61 %82 %90
%90 = OpLabel
%78 = OpPhi %43 %79 %83 %79 %89 %80 %85 %80 %86
OpSelectionMerge %93 None
OpBranchConditional %78 %91 %92
%91 = OpLabel
OpStore %32 %69
%70 = OpLoad %5 %32
%71 = OpFunctionCall %1 %16 %70
OpBranch %93
%92 = OpLabel
OpStore %32 %45
%46 = OpLoad %5 %32
%47 = OpFunctionCall %1 %16 %46
OpBranch %93
%93 = OpLabel
OpStore %32 %55
%56 = OpLoad %5 %32
%57 = OpFunctionCall %1 %16 %56
OpReturn
OpFunctionEnd
%16 = OpFunction %1 None %14
%15 = OpFunctionParameter %5
%17 = OpLabel
OpBranch %95
%95 = OpLabel
%20 = OpAccessChain %19 %9 %18 %18
%21 = OpAtomicIAdd %5 %20 %22 %18 %22
%23 = OpAccessChain %19 %9 %18 %21
OpStore %23 %15
OpReturn
OpFunctionEnd
%26 = OpFunction %5 None %24
%25 = OpFunctionParameter %5
%27 = OpLabel
OpBranch %97
%97 = OpLabel
%28 = OpAccessChain %19 %13 %18 %25
%29 = OpLoad %5 %28
OpReturnValue %29
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/ibfe.bc.dxil
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[1];
} _12;

layout(location = 0) out uvec4 SV_Target;

void main()
{
    uvec4 _20 = floatBitsToUint(_12._m0[0u]);
    uint _26 = _20.y & 31u;
    uint _31 = uint(bitfieldExtract(int(_20.z), int(_26), int(min((_20.x & 31u), (32u - _26)))));
    SV_Target.x = _31;
    SV_Target.y = _31;
    SV_Target.z = _31;
    SV_Target.w = _31;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
%30 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %15 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%16 = OpConstant %5 0
%17 = OpTypePointer Uniform %8
%25 = OpConstant %5 31
%28 = OpConstant %5 32
%32 = OpTypePointer Output %5
%36 = OpConstant %5 2
%38 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%18 = OpAccessChain %17 %12 %16 %16
%19 = OpLoad %8 %18
%20 = OpBitcast %13 %19
%21 = OpCompositeExtract %5 %20 0
%22 = OpCompositeExtract %5 %20 1
%23 = OpCompositeExtract %5 %20 2
%24 = OpBitwiseAnd %5 %21 %25
%26 = OpBitwiseAnd %5 %22 %25
%27 = OpISub %5 %28 %26
%29 = OpExtInst %5 %30 UMin %24 %27
%31 = OpBitFieldSExtract %5 %23 %26 %29
%33 = OpAccessChain %32 %15 %16
OpStore %33 %31
%34 = OpAccessChain %32 %15 %6
OpStore %34 %31
%35 = OpAccessChain %32 %15 %36
OpStore %35 %31
%37 = OpAccessChain %32 %15 %38
OpStore %37 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/asm/ubfe.bc.dxil
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[1];
} _12;

layout(location = 0) out uvec4 SV_Target;

void main()
{
    uvec4 _20 = floatBitsToUint(_12._m0[0u]);
    uint _26 = _20.y & 31u;
    uint _31 = bitfieldExtract(_20.z, int(_26), int(min((_20.x & 31u), (32u - _26))));
    SV_Target.x = _31;
    SV_Target.y = _31;
    SV_Target.z = _31;
    SV_Target.w = _31;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
%30 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %15 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%16 = OpConstant %5 0
%17 = OpTypePointer Uniform %8
%25 = OpConstant %5 31
%28 = OpConstant %5 32
%32 = OpTypePointer Output %5
%36 = OpConstant %5 2
%38 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%18 = OpAccessChain %17 %12 %16 %16
%19 = OpLoad %8 %18
%20 = OpBitcast %13 %19
%21 = OpCompositeExtract %5 %20 0
%22 = OpCompositeExtract %5 %20 1
%23 = OpCompositeExtract %5 %20 2
%24 = OpBitwiseAnd %5 %21 %25
%26 = OpBitwiseAnd %5 %22 %25
%27 = OpISub %5 %28 %26
%29 = OpExtInst %5 %30 UMin %24 %27
%31 = OpBitFieldUExtract %5 %23 %26 %29
%33 = OpAccessChain %32 %15 %16
OpStore %33 %31
%34 = OpAccessChain %32 %15 %6
OpStore %34 %31
%35 = OpAccessChain %32 %15 %36
OpStore %35 %31
%37 = OpAccessChain %32 %15 %38
OpStore %37 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/auto-barrier/complex-loop.auto-group-shared-barrier.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[32];

void main()
{
    uint _34;
    uint _17 = 0u;
    for (;;)
    {
        subgroupMemoryBarrierShared();
        subgroupBarrier();
        _12[gl_LocalInvocationIndex] = 40u;
        subgroupMemoryBarrierShared();
        subgroupBarrier();
        imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 31u]));
        imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 15u]));
        _34 = 0u;
        for (;;)
        {
            subgroupMemoryBarrierShared();
            subgroupBarrier();
            uint _36 = atomicAdd(_12[gl_LocalInvocationIndex], 1u);
            uint _39 = atomicOr(_12[gl_LocalInvocationIndex], _36);
            uint _40 = atomicXor(_12[gl_LocalInvocationIndex], _39);
            uint _35 = _34 + 1u;
            if (_35 == 4u)
            {
                break;
            }
            else
            {
                _34 = _35;
            }
        }
        uint _19 = _17 + 1u;
        if (_19 == 4u)
        {
            break;
        }
        else
        {
            _17 = _19;
            continue;
        }
    }
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %15 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 32
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%18 = OpConstant %5 0
%20 = OpTypePointer Workgroup %5
%22 = OpConstant %5 40
%24 = OpConstant %5 31
%27 = OpTypeVector %5 4
%30 = OpConstant %5 15
%37 = OpConstant %5 2
%38 = OpConstant %5 1
%41 = OpTypeBool
%43 = OpConstant %5 4
%51 = OpConstant %5 3
%52 = OpConstant %5 264
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%13 = OpLoad %6 %8
%16 = OpLoad %5 %15
OpBranch %46
%46 = OpLabel
%17 = OpPhi %5 %18 %45 %19 %49
OpControlBarrier %51 %51 %52
%21 = OpAccessChain %20 %12 %16
OpStore %21 %22
%23 = OpBitwiseXor %5 %16 %24
%25 = OpAccessChain %20 %12 %23
OpControlBarrier %51 %51 %52
%26 = OpLoad %5 %25
%28 = OpCompositeConstruct %27 %26 %26 %26 %26
OpImageWrite %13 %16 %28
%29 = OpBitwiseXor %5 %16 %30
%31 = OpAccessChain %20 %12 %29
%32 = OpLoad %5 %31
%33 = OpCompositeConstruct %27 %32 %32 %32 %32
OpImageWrite %13 %16 %33
OpLoopMerge %50 %49 None
OpBranch %47
%47 = OpLabel
%34 = OpPhi %5 %18 %46 %35 %47
OpControlBarrier %51 %51 %52
%36 = OpAtomicIAdd %5 %21 %37 %18 %38
%39 = OpAtomicOr %5 %21 %37 %18 %36
%40 = OpAtomicXor %5 %21 %37 %18 %39
%35 = OpIAdd %5 %34 %38
%42 = OpIEqual %41 %35 %43
OpLoopMerge %48 %47 None
OpBranchConditional %42 %48 %47
%48 = OpLabel
OpBranch %49
%49 = OpLabel
%19 = OpIAdd %5 %17 %38
%44 = OpIEqual %41 %19 %43
OpBranchConditional %44 %50 %46
%50 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/auto-barrier/inner-to-inner.auto-group-shared-barrier.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform readonly uimageBuffer _9;

shared uint _13[32];

void main()
{
    bool _20 = gl_LocalInvocationIndex < 16u;
    if (_20)
    {
        _13[gl_LocalInvocationIndex] = imageLoad(_8, int(gl_LocalInvocationIndex + 1024u)).x;
    }
    else
    {
        _13[gl_LocalInvocationIndex ^ 1u] = imageLoad(_9, int(gl_LocalInvocationIndex + 1024u)).x;
    }
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    if (_20)
    {
        if (imageLoad(_8, int(gl_LocalInvocationIndex)).x == 50u)
        {
            imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_13[gl_LocalInvocationIndex ^ 31u]));
        }
    }
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonWritable
OpDecorate %17 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpConstant %5 32
%11 = OpTypeArray %5 %10
%12 = OpTypePointer Workgroup %11
%13 = OpVariable %12 Workgroup
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%19 = OpTypeBool
%21 = OpConstant %5 16
%23 = OpConstant %5 1024
%25 = OpTypeVector %5 4
%28 = OpTypePointer Workgroup %5
%34 = OpConstant %5 1
%39 = OpConstant %5 50
%41 = OpConstant %5 31
%53 = OpConstant %5 3
%54 = OpConstant %5 264
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%14 = OpLoad %6 %9
%15 = OpLoad %6 %8
%18 = OpLoad %5 %17
%20 = OpULessThan %19 %18 %21
%22 = OpIAdd %5 %18 %23
OpSelectionMerge %48 None
OpBranchConditional %20 %47 %46
%47 = OpLabel
%24 = OpIAdd %5 %18 %23
%26 = OpImageRead %25 %15 %24
%27 = OpCompositeExtract %5 %26 0
%29 = OpAccessChain %28 %13 %18
OpStore %29 %27
OpBranch %48
%46 = OpLabel
%30 = OpIAdd %5 %18 %23
%31 = OpImageRead %25 %14 %30
%32 = OpCompositeExtract %5 %31 0
%33 = OpBitwiseXor %5 %18 %34
%35 = OpAccessChain %28 %13 %33
OpStore %35 %32
OpBranch %48
%48 = OpLabel
OpControlBarrier %53 %53 %54
OpSelectionMerge %52 None
OpBranchConditional %20 %49 %52
%49 = OpLabel
%36 = OpImageRead %25 %15 %18
%37 = OpCompositeExtract %5 %36 0
%38 = OpIEqual %19 %37 %39
OpSelectionMerge %51 None
OpBranchConditional %38 %50 %51
%50 = OpLabel
%40 = OpBitwiseXor %5 %18 %41
%42 = OpAccessChain %28 %13 %40
%43 = OpLoad %5 %42
%44 = OpCompositeConstruct %25 %43 %43 %43 %43
OpImageWrite %15 %18 %44
OpBranch %51
%51 = OpLabel
OpBranch %52
%52 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/auto-barrier/inner-to-outer.auto-group-shared-barrier.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

shared uint _10;

void main()
{
    if (gl_LocalInvocationIndex < 16u)
    {
        if (imageLoad(_8, int(gl_LocalInvocationIndex)).x == 50u)
        {
            _10 = 50u;
        }
    }
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_10));
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %13
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypePointer Workgroup %5
%10 = OpVariable %9 Workgroup
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%15 = OpTypeBool
%17 = OpConstant %5 16
%18 = OpTypeVector %5 4
%22 = OpConstant %5 50
%30 = OpConstant %5 3
%31 = OpConstant %5 264
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%11 = OpLoad %6 %8
%14 = OpLoad %5 %13
%16 = OpULessThan %15 %14 %17
OpSelectionMerge %29 None
OpBranchConditional %16 %26 %29
%26 = OpLabel
%19 = OpImageRead %18 %11 %14
%20 = OpCompositeExtract %5 %19 0
%21 = OpIEqual %15 %20 %22
OpSelectionMerge %28 None
OpBranchConditional %21 %27 %28
%27 = OpLabel
OpStore %10 %22
OpBranch %28
%28 = OpLabel
OpBranch %29
%29 = OpLabel
OpControlBarrier %30 %30 %31
%23 = OpLoad %5 %10
%24 = OpCompositeConstruct %18 %23 %23 %23 %23
OpImageWrite %11 %14 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/auto-barrier/outer-to-inner.auto-group-shared-barrier.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

shared uint _12[32];

void main()
{
    _12[gl_LocalInvocationIndex] = imageLoad(_8, int(gl_LocalInvocationIndex + 1024u)).x;
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    if (gl_LocalInvocationIndex < 16u)
    {
        if (imageLoad(_8, int(gl_LocalInvocationIndex)).x == 50u)
        {
            imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 31u]));
        }
    }
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %15 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 32
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%18 = OpConstant %5 1024
%20 = OpTypeVector %5 4
%23 = OpTypePointer Workgroup %5
%25 = OpTypeBool
%27 = OpConstant %5 16
%31 = OpConstant %5 50
%33 = OpConstant %5 31
%42 = OpConstant %5 3
%43 = OpConstant %5 264
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%13 = OpLoad %6 %8
%16 = OpLoad %5 %15
%17 = OpIAdd %5 %16 %18
%19 = OpIAdd %5 %16 %18
%21 = OpImageRead %20 %13 %19
%22 = OpCompositeExtract %5 %21 0
%24 = OpAccessChain %23 %12 %16
OpStore %24 %22
%26 = OpULessThan %25 %16 %27
OpControlBarrier %42 %42 %43
OpSelectionMerge %41 None
OpBranchConditional %26 %38 %41
%38 = OpLabel
%28 = OpImageRead %20 %13 %16
%29 = OpCompositeExtract %5 %28 0
%30 = OpIEqual %25 %29 %31
OpSelectionMerge %40 None
OpBranchConditional %30 %39 %40
%39 = OpLabel
%32 = OpBitwiseXor %5 %16 %33
%34 = OpAccessChain %23 %12 %32
%35 = OpLoad %5 %34
%36 = OpCompositeConstruct %20 %35 %35 %35 %35
OpImageWrite %13 %16 %36
OpBranch %40
%40 = OpLabel
OpBranch %41
%41 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/auto-barrier/single-block-loop.auto-group-shared-barrier.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[32];

void main()
{
    uint _17;
    _17 = 0u;
    for (;;)
    {
        _12[gl_LocalInvocationIndex] = 40u;
        subgroupMemoryBarrierShared();
        subgroupBarrier();
        imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 31u]));
        imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 15u]));
        subgroupMemoryBarrierShared();
        subgroupBarrier();
        uint _34 = atomicAdd(_12[gl_LocalInvocationIndex], 1u);
        uint _37 = atomicOr(_12[gl_LocalInvocationIndex], 1u);
        uint _38 = atomicXor(_12[gl_LocalInvocationIndex], 1u);
        uint _19 = _17 + 1u;
        subgroupMemoryBarrierShared();
        subgroupBarrier();
        if (_19 == 4u)
        {
            break;
        }
        else
        {
            _17 = _19;
        }
    }
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %15 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 32
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%18 = OpConstant %5 0
%20 = OpTypePointer Workgroup %5
%22 = OpConstant %5 40
%24 = OpConstant %5 31
%27 = OpTypeVector %5 4
%30 = OpConstant %5 15
%35 = OpConstant %5 2
%36 = OpConstant %5 1
%39 = OpTypeBool
%41 = OpConstant %5 4
%45 = OpConstant %5 3
%46 = OpConstant %5 264
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%13 = OpLoad %6 %8
%16 = OpLoad %5 %15
OpBranch %43
%43 = OpLabel
%17 = OpPhi %5 %18 %42 %19 %43
%21 = OpAccessChain %20 %12 %16
OpStore %21 %22
%23 = OpBitwiseXor %5 %16 %24
%25 = OpAccessChain %20 %12 %23
OpControlBarrier %45 %45 %46
%26 = OpLoad %5 %25
%28 = OpCompositeConstruct %27 %26 %26 %26 %26
OpImageWrite %13 %16 %28
%29 = OpBitwiseXor %5 %16 %30
%31 = OpAccessChain %20 %12 %29
%32 = OpLoad %5 %31
%33 = OpCompositeConstruct %27 %32 %32 %32 %32
OpImageWrite %13 %16 %33
OpControlBarrier %45 %45 %46
%34 = OpAtomicIAdd %5 %21 %35 %18 %36
%37 = OpAtomicOr %5 %21 %35 %18 %36
%38 = OpAtomicXor %5 %21 %35 %18 %36
%19 = OpIAdd %5 %17 %36
%40 = OpIEqual %39 %19 %41
OpControlBarrier %45 %45 %46
OpLoopMerge %44 %43 None
OpBranchConditional %40 %44 %43
%44 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/auto-barrier/single-block.auto-group-shared-barrier.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[32];

void main()
{
    _12[gl_LocalInvocationIndex] = 40u;
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 31u]));
    imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 15u]));
    subgroupMemoryBarrierShared();
    subgroupBarrier();
    uint _31 = atomicAdd(_12[gl_LocalInvocationIndex], 1u);
    uint _35 = atomicOr(_12[gl_LocalInvocationIndex], 1u);
    uint _36 = atomicXor(_12[gl_LocalInvocationIndex], 1u);
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %15
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %15 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 32
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer Workgroup %5
%19 = OpConstant %5 40
%21 = OpConstant %5 31
%24 = OpTypeVector %5 4
%27 = OpConstant %5 15
%32 = OpConstant %5 2
%33 = OpConstant %5 0
%34 = OpConstant %5 1
%38 = OpConstant %5 3
%39 = OpConstant %5 264
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%13 = OpLoad %6 %8
%16 = OpLoad %5 %15
%18 = OpAccessChain %17 %12 %16
OpStore %18 %19
%20 = OpBitwiseXor %5 %16 %21
%22 = OpAccessChain %17 %12 %20
OpControlBarrier %38 %38 %39
%23 = OpLoad %5 %22
%25 = OpCompositeConstruct %24 %23 %23 %23 %23
OpImageWrite %13 %16 %25
%26 = OpBitwiseXor %5 %16 %27
%28 = OpAccessChain %17 %12 %26
%29 = OpLoad %5 %28
%30 = OpCompositeConstruct %24 %29 %29 %29 %29
OpImageWrite %13 %16 %30
OpControlBarrier %38 %38 %39
%31 = OpAtomicIAdd %5 %18 %32 %33 %34
%35 = OpAtomicOr %5 %18 %32 %33 %34
%36 = OpAtomicXor %5 %18 %32 %33 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/branch-return-2.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _37;
    uint _16;
    uint _19;
    uint _22;
    bool _24;
    for (;;)
    {
        _16 = gl_GlobalInvocationID.x;
        _19 = gl_GlobalInvocationID.y;
        _22 = gl_GlobalInvocationID.z;
        _24 = _16 == 10u;
        uint _31;
        if (_24)
        {
            if (_22 == 40u)
            {
                _37 = _19 * 10u;
                break;
            }
            _31 = 20u;
        }
        else
        {
            uint frontier_phi_4_2_ladder;
            if (_19 == 20u)
            {
                if (_22 == 50u)
                {
                    _37 = _16 * 24u;
                    break;
                }
                uint frontier_phi_4_2_ladder_5_ladder;
                uint _42;
                bool _43;
                for (;;)
                {
                    _42 = _16 + 10u;
                    _43 = _16 == 0u;
                    uint _46;
                    if (_43)
                    {
                        if (_22 == 30u)
                        {
                            frontier_phi_4_2_ladder_5_ladder = _42 * 30u;
                            break;
                        }
                        _46 = _42;
                    }
                    else
                    {
                        _46 = 0u;
                    }
                    frontier_phi_4_2_ladder_5_ladder = _46 + _42;
                    break;
                }
                frontier_phi_4_2_ladder = frontier_phi_4_2_ladder_5_ladder;
            }
            else
            {
                frontier_phi_4_2_ladder = 0u;
            }
            _31 = frontier_phi_4_2_ladder;
        }
        _37 = _31 + _16;
        break;
    }
    imageStore(_8, int(0u), uvec4(_37));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 70
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %47 "frontier_phi_4.2.ladder.5.ladder"
OpName %48 "frontier_phi_4.2.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeBool
%25 = OpConstant %5 10
%27 = OpConstant %5 40
%29 = OpConstant %5 20
%36 = OpConstant %5 50
%39 = OpTypeVector %5 4
%41 = OpConstant %5 24
%45 = OpConstant %5 30
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %49
%49 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%24 = OpIEqual %23 %16 %25
OpLoopMerge %66 %67 None
OpBranch %50
%50 = OpLabel
OpSelectionMerge %64 None
OpBranchConditional %24 %62 %51
%62 = OpLabel
%26 = OpIEqual %23 %22 %27
OpSelectionMerge %63 None
OpBranchConditional %26 %65 %63
%65 = OpLabel
%30 = OpIMul %5 %19 %25
OpBranch %66
%63 = OpLabel
OpBranch %64
%51 = OpLabel
%28 = OpIEqual %23 %19 %29
OpSelectionMerge %60 None
OpBranchConditional %28 %52 %60
%52 = OpLabel
%35 = OpIEqual %23 %22 %36
OpSelectionMerge %53 None
OpBranchConditional %35 %61 %53
%61 = OpLabel
%38 = OpIMul %5 %16 %41
OpBranch %66
%53 = OpLabel
%42 = OpIAdd %5 %16 %25
%43 = OpIEqual %23 %16 %15
OpLoopMerge %59 %68 None
OpBranch %54
%54 = OpLabel
OpSelectionMerge %57 None
OpBranchConditional %43 %55 %57
%55 = OpLabel
%44 = OpIEqual %23 %22 %45
OpSelectionMerge %56 None
OpBranchConditional %44 %58 %56
%58 = OpLabel
%32 = OpIMul %5 %42 %45
OpBranch %59
%56 = OpLabel
OpBranch %57
%57 = OpLabel
%46 = OpPhi %5 %15 %54 %42 %56
%33 = OpIAdd %5 %46 %42
OpBranch %59
%68 = OpLabel
OpBranch %53
%59 = OpLabel
%47 = OpPhi %5 %32 %58 %33 %57
OpBranch %60
%60 = OpLabel
%48 = OpPhi %5 %15 %51 %47 %59
OpBranch %64
%64 = OpLabel
%31 = OpPhi %5 %29 %63 %48 %60
%34 = OpIAdd %5 %31 %16
OpBranch %66
%67 = OpLabel
OpBranch %49
%66 = OpLabel
%37 = OpPhi %5 %30 %65 %34 %64 %38 %61
%40 = OpCompositeConstruct %39 %37 %37 %37 %37
OpImageWrite %9 %15 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/branch-return.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _35;
    uint _16;
    uint _19;
    uint _22;
    bool _24;
    for (;;)
    {
        _16 = gl_GlobalInvocationID.x;
        _19 = gl_GlobalInvocationID.y;
        _22 = gl_GlobalInvocationID.z;
        _24 = _16 == 10u;
        uint _31;
        if (_24)
        {
            if (_22 == 40u)
            {
                _35 = _19 * 10u;
                break;
            }
            _31 = _19;
        }
        else
        {
            uint frontier_phi_4_2_ladder;
            if (_19 == 20u)
            {
                if (_22 == 50u)
                {
                    _35 = _16 * 24u;
                    break;
                }
                frontier_phi_4_2_ladder = _22;
            }
            else
            {
                frontier_phi_4_2_ladder = 0u;
            }
            _31 = frontier_phi_4_2_ladder;
        }
        _35 = _31 + _16;
        break;
    }
    imageStore(_8, int(0u), uvec4(_35));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %40 "frontier_phi_4.2.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeBool
%25 = OpConstant %5 10
%27 = OpConstant %5 40
%29 = OpConstant %5 20
%34 = OpConstant %5 50
%37 = OpTypeVector %5 4
%39 = OpConstant %5 24
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %41
%41 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%24 = OpIEqual %23 %16 %25
OpLoopMerge %52 %53 None
OpBranch %42
%42 = OpLabel
OpSelectionMerge %50 None
OpBranchConditional %24 %48 %43
%48 = OpLabel
%26 = OpIEqual %23 %22 %27
OpSelectionMerge %49 None
OpBranchConditional %26 %51 %49
%51 = OpLabel
%30 = OpIMul %5 %19 %25
OpBranch %52
%49 = OpLabel
OpBranch %50
%43 = OpLabel
%28 = OpIEqual %23 %19 %29
OpSelectionMerge %46 None
OpBranchConditional %28 %44 %46
%44 = OpLabel
%33 = OpIEqual %23 %22 %34
OpSelectionMerge %45 None
OpBranchConditional %33 %47 %45
%47 = OpLabel
%36 = OpIMul %5 %16 %39
OpBranch %52
%45 = OpLabel
OpBranch %46
%46 = OpLabel
%40 = OpPhi %5 %15 %43 %22 %45
OpBranch %50
%50 = OpLabel
%31 = OpPhi %5 %19 %49 %40 %46
%32 = OpIAdd %5 %31 %16
OpBranch %52
%53 = OpLabel
OpBranch %41
%52 = OpLabel
%35 = OpPhi %5 %30 %51 %32 %50 %36 %47
%38 = OpCompositeConstruct %37 %35 %35 %35 %35
OpImageWrite %9 %15 %38
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/branch.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _23;
    if (gl_GlobalInvocationID.x == 10u)
    {
        _23 = 40u;
    }
    else
    {
        _23 = 50u;
    }
    uint _36;
    if (gl_GlobalInvocationID.x == 20u)
    {
        uint frontier_phi_9_3_ladder;
        if (gl_GlobalInvocationID.y == 30u)
        {
            frontier_phi_9_3_ladder = _23 + 20u;
        }
        else
        {
            frontier_phi_9_3_ladder = _23 * _23;
        }
        _36 = frontier_phi_9_3_ladder;
    }
    else
    {
        uint frontier_phi_9_4_ladder;
        if (gl_GlobalInvocationID.y == 40u)
        {
            frontier_phi_9_4_ladder = _23 + 70u;
        }
        else
        {
            frontier_phi_9_4_ladder = _23 * 10u;
        }
        _36 = frontier_phi_9_4_ladder;
    }
    imageStore(_8, int(0u), uvec4(_36));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %39 "frontier_phi_9.4.ladder"
OpName %40 "frontier_phi_9.3.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%20 = OpTypeBool
%22 = OpConstant %5 10
%24 = OpConstant %5 50
%25 = OpConstant %5 40
%27 = OpConstant %5 20
%29 = OpConstant %5 30
%34 = OpConstant %5 70
%37 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %41
%41 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%21 = OpIEqual %20 %16 %22
OpSelectionMerge %43 None
OpBranchConditional %21 %43 %42
%42 = OpLabel
OpBranch %43
%43 = OpLabel
%23 = OpPhi %5 %25 %41 %24 %42
%26 = OpIEqual %20 %16 %27
OpSelectionMerge %52 None
OpBranchConditional %26 %48 %44
%48 = OpLabel
%28 = OpIEqual %20 %19 %29
OpSelectionMerge %51 None
OpBranchConditional %28 %50 %49
%50 = OpLabel
%31 = OpIAdd %5 %23 %27
OpBranch %51
%49 = OpLabel
%32 = OpIMul %5 %23 %23
OpBranch %51
%51 = OpLabel
%40 = OpPhi %5 %31 %50 %32 %49
OpBranch %52
%44 = OpLabel
%30 = OpIEqual %20 %19 %25
OpSelectionMerge %47 None
OpBranchConditional %30 %46 %45
%46 = OpLabel
%33 = OpIAdd %5 %23 %34
OpBranch %47
%45 = OpLabel
%35 = OpIMul %5 %23 %22
OpBranch %47
%47 = OpLabel
%39 = OpPhi %5 %33 %46 %35 %45
OpBranch %52
%52 = OpLabel
%36 = OpPhi %5 %40 %51 %39 %47
%38 = OpCompositeConstruct %37 %36 %36 %36 %36
OpImageWrite %9 %15 %38
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/conditional-break-into-if-else-if-ladder-2.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _24;
    if ((gl_GlobalInvocationID.x & 1u) == 0u)
    {
        _24 = 0u;
    }
    else
    {
        uint frontier_phi_3_2_ladder;
        if ((gl_GlobalInvocationID.x & 2u) == 0u)
        {
            uint frontier_phi_3_2_ladder_4_ladder;
            if ((gl_GlobalInvocationID.x & 4u) == 0u)
            {
                uint frontier_phi_3_2_ladder_4_ladder_5_ladder;
                if ((gl_GlobalInvocationID.x & 8u) == 0u)
                {
                    uint frontier_phi_3_2_ladder_4_ladder_5_ladder_6_ladder;
                    if ((gl_GlobalInvocationID.x & 16u) == 0u)
                    {
                        frontier_phi_3_2_ladder_4_ladder_5_ladder_6_ladder = 0u;
                    }
                    else
                    {
                        frontier_phi_3_2_ladder_4_ladder_5_ladder_6_ladder = 50u;
                    }
                    frontier_phi_3_2_ladder_4_ladder_5_ladder = frontier_phi_3_2_ladder_4_ladder_5_ladder_6_ladder;
                }
                else
                {
                    frontier_phi_3_2_ladder_4_ladder_5_ladder = 40u;
                }
                frontier_phi_3_2_ladder_4_ladder = frontier_phi_3_2_ladder_4_ladder_5_ladder;
            }
            else
            {
                frontier_phi_3_2_ladder_4_ladder = 30u;
            }
            frontier_phi_3_2_ladder = frontier_phi_3_2_ladder_4_ladder;
        }
        else
        {
            frontier_phi_3_2_ladder = 60u;
        }
        _24 = frontier_phi_3_2_ladder;
    }
    imageStore(_8, int(0u), uvec4(_24));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %40 "frontier_phi_3.2.ladder.4.ladder.5.ladder.6.ladder"
OpName %41 "frontier_phi_3.2.ladder.4.ladder.5.ladder"
OpName %42 "frontier_phi_3.2.ladder.4.ladder"
OpName %43 "frontier_phi_3.2.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%19 = OpTypeBool
%22 = OpConstant %5 2
%25 = OpConstant %5 60
%26 = OpConstant %5 30
%27 = OpConstant %5 40
%28 = OpConstant %5 50
%29 = OpTypeVector %5 4
%32 = OpConstant %5 4
%35 = OpConstant %5 8
%38 = OpConstant %5 16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpBitwiseAnd %5 %16 %18
%20 = OpIEqual %19 %17 %15
OpSelectionMerge %55 None
OpBranchConditional %20 %54 %45
%54 = OpLabel
OpBranch %55
%45 = OpLabel
%21 = OpBitwiseAnd %5 %16 %22
%23 = OpIEqual %19 %21 %15
OpSelectionMerge %53 None
OpBranchConditional %23 %46 %53
%46 = OpLabel
%31 = OpBitwiseAnd %5 %16 %32
%33 = OpIEqual %19 %31 %15
OpSelectionMerge %52 None
OpBranchConditional %33 %47 %52
%47 = OpLabel
%34 = OpBitwiseAnd %5 %16 %35
%36 = OpIEqual %19 %34 %15
OpSelectionMerge %51 None
OpBranchConditional %36 %48 %51
%48 = OpLabel
%37 = OpBitwiseAnd %5 %16 %38
%39 = OpIEqual %19 %37 %15
OpSelectionMerge %50 None
OpBranchConditional %39 %49 %50
%49 = OpLabel
OpBranch %50
%50 = OpLabel
%40 = OpPhi %5 %15 %49 %28 %48
OpBranch %51
%51 = OpLabel
%41 = OpPhi %5 %27 %47 %40 %50
OpBranch %52
%52 = OpLabel
%42 = OpPhi %5 %26 %46 %41 %51
OpBranch %53
%53 = OpLabel
%43 = OpPhi %5 %25 %45 %42 %52
OpBranch %55
%55 = OpLabel
%24 = OpPhi %5 %15 %54 %43 %53
%30 = OpCompositeConstruct %29 %24 %24 %24 %24
OpImageWrite %9 %15 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/conditional-break-into-if-else-if-ladder.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _24;
    if ((gl_GlobalInvocationID.x & 1u) == 0u)
    {
        _24 = 0u;
    }
    else
    {
        uint frontier_phi_3_2_ladder;
        if ((gl_GlobalInvocationID.x & 2u) == 0u)
        {
            uint frontier_phi_3_2_ladder_4_ladder;
            if ((gl_GlobalInvocationID.x & 4u) == 0u)
            {
                uint frontier_phi_3_2_ladder_4_ladder_6_ladder;
                if ((gl_GlobalInvocationID.x & 8u) == 0u)
                {
                    uint frontier_phi_3_2_ladder_4_ladder_6_ladder_7_ladder;
                    if ((gl_GlobalInvocationID.x & 16u) == 0u)
                    {
                        frontier_phi_3_2_ladder_4_ladder_6_ladder_7_ladder = 0u;
                    }
                    else
                    {
                        frontier_phi_3_2_ladder_4_ladder_6_ladder_7_ladder = 50u;
                    }
                    frontier_phi_3_2_ladder_4_ladder_6_ladder = frontier_phi_3_2_ladder_4_ladder_6_ladder_7_ladder;
                }
                else
                {
                    frontier_phi_3_2_ladder_4_ladder_6_ladder = 40u;
                }
                frontier_phi_3_2_ladder_4_ladder = frontier_phi_3_2_ladder_4_ladder_6_ladder;
            }
            else
            {
                frontier_phi_3_2_ladder_4_ladder = 30u;
            }
            frontier_phi_3_2_ladder = frontier_phi_3_2_ladder_4_ladder;
        }
        else
        {
            imageStore(_8, int(2u), uvec4(20u));
            frontier_phi_3_2_ladder = 60u;
        }
        _24 = frontier_phi_3_2_ladder;
    }
    imageStore(_8, int(0u), uvec4(_24));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %42 "frontier_phi_3.2.ladder.4.ladder.6.ladder.7.ladder"
OpName %43 "frontier_phi_3.2.ladder.4.ladder.6.ladder"
OpName %44 "frontier_phi_3.2.ladder.4.ladder"
OpName %45 "frontier_phi_3.2.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%19 = OpTypeBool
%22 = OpConstant %5 2
%25 = OpConstant %5 60
%26 = OpConstant %5 30
%27 = OpConstant %5 40
%28 = OpConstant %5 50
%29 = OpTypeVector %5 4
%32 = OpConstant %5 4
%34 = OpConstant %5 20
%37 = OpConstant %5 8
%40 = OpConstant %5 16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpBitwiseAnd %5 %16 %18
%20 = OpIEqual %19 %17 %15
OpSelectionMerge %58 None
OpBranchConditional %20 %57 %47
%57 = OpLabel
OpBranch %58
%47 = OpLabel
%21 = OpBitwiseAnd %5 %16 %22
%23 = OpIEqual %19 %21 %15
OpSelectionMerge %56 None
OpBranchConditional %23 %49 %48
%49 = OpLabel
%31 = OpBitwiseAnd %5 %16 %32
%33 = OpIEqual %19 %31 %15
OpSelectionMerge %55 None
OpBranchConditional %33 %50 %55
%50 = OpLabel
%36 = OpBitwiseAnd %5 %16 %37
%38 = OpIEqual %19 %36 %15
OpSelectionMerge %54 None
OpBranchConditional %38 %51 %54
%51 = OpLabel
%39 = OpBitwiseAnd %5 %16 %40
%41 = OpIEqual %19 %39 %15
OpSelectionMerge %53 None
OpBranchConditional %41 %52 %53
%52 = OpLabel
OpBranch %53
%53 = OpLabel
%42 = OpPhi %5 %15 %52 %28 %51
OpBranch %54
%54 = OpLabel
%43 = OpPhi %5 %27 %50 %42 %53
OpBranch %55
%55 = OpLabel
%44 = OpPhi %5 %26 %49 %43 %54
OpBranch %56
%48 = OpLabel
%35 = OpCompositeConstruct %29 %34 %34 %34 %34
OpImageWrite %9 %22 %35
OpBranch %56
%56 = OpLabel
%45 = OpPhi %5 %25 %48 %44 %55
OpBranch %58
%58 = OpLabel
%24 = OpPhi %5 %15 %57 %45 %56
%30 = OpCompositeConstruct %29 %24 %24 %24 %24
OpImageWrite %9 %15 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/dual-inner-loop-early-return.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

uint _50;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _23;
    uint _25;
    uint _37;
    for (;;)
    {
        _23 = 0u;
        _25 = gl_GlobalInvocationID.y;
        uint _27;
        uint _29;
        bool ladder_phi_8;
        uint frontier_phi_7_pred_pred;
        uint frontier_phi_7_pred_pred_1;
        for (;;)
        {
            _27 = 0u;
            _29 = _25;
            bool ladder_phi_7_pred;
            bool _31;
            for (;;)
            {
                _31 = _29 < 10u;
                if (_31)
                {
                    ladder_phi_7_pred = true;
                    frontier_phi_7_pred_pred = gl_GlobalInvocationID.y;
                    frontier_phi_7_pred_pred_1 = 50u;
                    break;
                }
                else
                {
                    if (!(gl_GlobalInvocationID.z < 10u))
                    {
                        uint _26 = _29 + 1u;
                        uint _28 = _27 + 1u;
                        if (int(_28) < int(20u))
                        {
                            _27 = _28;
                            _29 = _26;
                            continue;
                        }
                        else
                        {
                            ladder_phi_7_pred = false;
                            frontier_phi_7_pred_pred = _26;
                            frontier_phi_7_pred_pred_1 = _50;
                            break;
                        }
                    }
                    ladder_phi_7_pred = true;
                    frontier_phi_7_pred_pred = gl_GlobalInvocationID.y;
                    frontier_phi_7_pred_pred_1 = 70u;
                    break;
                }
            }
            if (ladder_phi_7_pred)
            {
                ladder_phi_8 = true;
                break;
            }
            uint _24 = _23 + 1u;
            if (int(_24) < int(10u))
            {
                _23 = _24;
                _25 = frontier_phi_7_pred_pred;
                continue;
            }
            else
            {
                ladder_phi_8 = false;
                break;
            }
        }
        if (ladder_phi_8)
        {
            _37 = frontier_phi_7_pred_pred_1;
            break;
        }
        _37 = 80u;
        break;
    }
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(_37));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 70
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %44 "ladder_phi_7.pred"
OpName %47 "ladder_phi_8"
OpName %48 "frontier_phi_7.pred.pred"
OpName %49 "frontier_phi_7.pred.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%30 = OpTypeBool
%32 = OpConstant %5 10
%34 = OpConstant %5 70
%35 = OpConstant %5 50
%38 = OpConstant %5 80
%39 = OpTypeVector %5 4
%42 = OpConstant %5 20
%45 = OpConstantFalse %30
%46 = OpConstantTrue %30
%3 = OpFunction %1 None %2
%4 = OpLabel
%50 = OpUndef %5
OpBranch %51
%51 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
OpBranch %52
%52 = OpLabel
OpLoopMerge %65 %66 None
OpBranch %53
%53 = OpLabel
%23 = OpPhi %5 %15 %52 %24 %62
%25 = OpPhi %5 %19 %52 %48 %62
OpLoopMerge %63 %62 None
OpBranch %54
%54 = OpLabel
%27 = OpPhi %5 %15 %53 %28 %57
%29 = OpPhi %5 %25 %53 %26 %57
%31 = OpULessThan %30 %29 %32
OpLoopMerge %60 %57 None
OpBranch %67
%67 = OpLabel
OpSelectionMerge %68 None
OpBranchConditional %31 %59 %55
%59 = OpLabel
OpBranch %60
%55 = OpLabel
%36 = OpULessThan %30 %22 %32
OpSelectionMerge %58 None
OpBranchConditional %36 %58 %56
%56 = OpLabel
OpBranch %57
%58 = OpLabel
OpBranch %60
%68 = OpLabel
OpUnreachable
%57 = OpLabel
%26 = OpIAdd %5 %29 %18
%28 = OpIAdd %5 %27 %18
%41 = OpSLessThan %30 %28 %42
OpBranchConditional %41 %54 %60
%60 = OpLabel
%44 = OpPhi %30 %45 %57 %46 %59 %46 %58
%48 = OpPhi %5 %26 %57 %19 %59 %19 %58
%49 = OpPhi %5 %50 %57 %35 %59 %34 %58
OpSelectionMerge %61 None
OpBranchConditional %44 %63 %61
%61 = OpLabel
OpBranch %62
%62 = OpLabel
%24 = OpIAdd %5 %23 %18
%43 = OpSLessThan %30 %24 %32
OpBranchConditional %43 %53 %63
%63 = OpLabel
%47 = OpPhi %30 %45 %62 %46 %60
OpSelectionMerge %64 None
OpBranchConditional %47 %65 %64
%64 = OpLabel
OpBranch %65
%66 = OpLabel
OpBranch %52
%65 = OpLabel
%37 = OpPhi %5 %38 %64 %49 %63
%40 = OpCompositeConstruct %39 %37 %37 %37 %37
OpImageWrite %9 %16 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/if-else-if-into-continue.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    if (!(gl_GlobalInvocationID.x == 0u))
    {
        uint _25 = 0u;
        uint _27 = gl_GlobalInvocationID.x;
        uint _29 = gl_GlobalInvocationID.y;
        uint _31 = gl_GlobalInvocationID.z;
        bool _33;
        for (;;)
        {
            _33 = _31 < 10u;
            if (_33)
            {
                imageStore(_8, int(_29), uvec4(20u));
            }
            else
            {
                if (_29 < 5u)
                {
                    imageStore(_8, int(_31), uvec4(40u));
                }
            }
            uint _28 = _27 + 1u;
            uint _26 = _25 + 1u;
            if (_26 < _28)
            {
                _25 = _26;
                _27 = _28;
                _29++;
                _31++;
                continue;
            }
            else
            {
                break;
            }
        }
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%20 = OpConstant %5 2
%23 = OpConstant %5 1
%34 = OpConstant %5 10
%35 = OpConstant %5 20
%36 = OpTypeVector %5 4
%39 = OpConstant %5 5
%41 = OpConstant %5 40
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpIEqual %17 %16 %15
OpSelectionMerge %54 None
OpBranchConditional %18 %54 %44
%44 = OpLabel
%19 = OpAccessChain %13 %12 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %13 %12 %23
%24 = OpLoad %5 %22
OpBranch %45
%45 = OpLabel
%25 = OpPhi %5 %15 %44 %26 %52
%27 = OpPhi %5 %16 %44 %28 %52
%29 = OpPhi %5 %24 %44 %30 %52
%31 = OpPhi %5 %21 %44 %32 %52
%33 = OpULessThan %17 %31 %34
OpLoopMerge %53 %52 None
OpBranch %46
%46 = OpLabel
OpSelectionMerge %51 None
OpBranchConditional %33 %50 %47
%50 = OpLabel
%37 = OpCompositeConstruct %36 %35 %35 %35 %35
OpImageWrite %9 %29 %37
OpBranch %51
%47 = OpLabel
%38 = OpULessThan %17 %29 %39
OpSelectionMerge %49 None
OpBranchConditional %38 %48 %49
%48 = OpLabel
%42 = OpCompositeConstruct %36 %41 %41 %41 %41
OpImageWrite %9 %31 %42
OpBranch %49
%49 = OpLabel
OpBranch %51
%51 = OpLabel
OpBranch %52
%52 = OpLabel
%28 = OpIAdd %5 %27 %23
%30 = OpIAdd %5 %29 %23
%32 = OpIAdd %5 %31 %23
%26 = OpIAdd %5 %25 %23
%40 = OpULessThan %17 %26 %28
OpBranchConditional %40 %45 %53
%53 = OpLabel
OpBranch %54
%54 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/inner-loop-early-return.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

uint _41;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _20;
    uint _30;
    for (;;)
    {
        _20 = 0u;
        uint _22;
        bool ladder_phi_7;
        uint frontier_phi_6_pred_pred;
        for (;;)
        {
            _22 = 0u;
            bool ladder_phi_6_pred;
            bool _25;
            for (;;)
            {
                _25 = gl_GlobalInvocationID.z < 10u;
                if (_25)
                {
                    ladder_phi_6_pred = true;
                    frontier_phi_6_pred_pred = 70u;
                    break;
                }
                else
                {
                    uint _23 = _22 + 1u;
                    if (int(_23) < int(20u))
                    {
                        _22 = _23;
                        continue;
                    }
                    else
                    {
                        ladder_phi_6_pred = false;
                        frontier_phi_6_pred_pred = _41;
                        break;
                    }
                }
            }
            if (ladder_phi_6_pred)
            {
                ladder_phi_7 = true;
                break;
            }
            uint _21 = _20 + 1u;
            if (int(_21) < int(10u))
            {
                _20 = _21;
                continue;
            }
            else
            {
                ladder_phi_7 = false;
                break;
            }
        }
        if (ladder_phi_7)
        {
            _30 = frontier_phi_6_pred_pred;
            break;
        }
        _30 = 80u;
        break;
    }
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(_30));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %36 "ladder_phi_6.pred"
OpName %39 "ladder_phi_7"
OpName %40 "frontier_phi_6.pred.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 2
%24 = OpTypeBool
%26 = OpConstant %5 10
%27 = OpConstant %5 1
%29 = OpConstant %5 20
%31 = OpConstant %5 70
%32 = OpConstant %5 80
%33 = OpTypeVector %5 4
%37 = OpConstantFalse %24
%38 = OpConstantTrue %24
%3 = OpFunction %1 None %2
%4 = OpLabel
%41 = OpUndef %5
OpBranch %42
%42 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
OpBranch %43
%43 = OpLabel
OpLoopMerge %54 %55 None
OpBranch %44
%44 = OpLabel
%20 = OpPhi %5 %15 %43 %21 %51
OpLoopMerge %52 %51 None
OpBranch %45
%45 = OpLabel
%22 = OpPhi %5 %15 %44 %23 %47
%25 = OpULessThan %24 %19 %26
OpLoopMerge %49 %47 None
OpBranch %56
%56 = OpLabel
OpSelectionMerge %57 None
OpBranchConditional %25 %48 %46
%48 = OpLabel
OpBranch %49
%46 = OpLabel
OpBranch %47
%57 = OpLabel
OpUnreachable
%47 = OpLabel
%23 = OpIAdd %5 %22 %27
%28 = OpSLessThan %24 %23 %29
OpBranchConditional %28 %45 %49
%49 = OpLabel
%36 = OpPhi %24 %37 %47 %38 %48
%40 = OpPhi %5 %41 %47 %31 %48
OpSelectionMerge %50 None
OpBranchConditional %36 %52 %50
%50 = OpLabel
OpBranch %51
%51 = OpLabel
%21 = OpIAdd %5 %20 %27
%35 = OpSLessThan %24 %21 %26
OpBranchConditional %35 %44 %52
%52 = OpLabel
%39 = OpPhi %24 %37 %51 %38 %49
OpSelectionMerge %53 None
OpBranchConditional %39 %54 %53
%53 = OpLabel
OpBranch %54
%55 = OpLabel
OpBranch %43
%54 = OpLabel
%30 = OpPhi %5 %32 %53 %40 %52
%34 = OpCompositeConstruct %33 %30 %30 %30 %30
OpImageWrite %9 %16 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/interleaved-unrolled-loop-breaks.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

uint _75;
uint _77;
uint _78;
uint _80;
uint _81;
uint _82;
uint _83;
uint _84;
uint _85;
uint _86;
uint _87;
uint _88;
uint _89;
uint _90;
uint _92;
uint _93;
uint _94;
uint _95;
uint _96;
uint _97;
uint _98;
uint _99;
uint _100;
uint _101;
uint _102;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    int selector_5;
    uint frontier_phi_5_pred;
    uint frontier_phi_5_pred_1;
    uint frontier_phi_5_pred_2;
    uint frontier_phi_5_pred_3;
    uint frontier_phi_5_pred_4;
    uint _13;
    for (;;)
    {
        _13 = imageAtomicAdd(_8, int(0u), 1u);
        if ((_13 & 13u) == 0u)
        {
            if (!((_13 & 1u) == 0u))
            {
                selector_5 = 1;
                frontier_phi_5_pred = _13;
                frontier_phi_5_pred_1 = _13;
                frontier_phi_5_pred_2 = _77;
                frontier_phi_5_pred_3 = _86;
                frontier_phi_5_pred_4 = _98;
                break;
            }
            if (!((_13 & 2u) == 0u))
            {
                selector_5 = 0;
                frontier_phi_5_pred = _13;
                frontier_phi_5_pred_1 = _13;
                frontier_phi_5_pred_2 = _13;
                frontier_phi_5_pred_3 = _82;
                frontier_phi_5_pred_4 = _94;
                break;
            }
            uint _22 = imageAtomicAdd(_8, int(0u), _13);
            if (!((_22 & 13u) == 0u))
            {
                selector_5 = 2;
                frontier_phi_5_pred = _22;
                frontier_phi_5_pred_1 = _13;
                frontier_phi_5_pred_2 = _13;
                frontier_phi_5_pred_3 = _89;
                frontier_phi_5_pred_4 = _101;
                break;
            }
            if (!((_22 & 1u) == 0u))
            {
                selector_5 = 1;
                frontier_phi_5_pred = _22;
                frontier_phi_5_pred_1 = _22;
                frontier_phi_5_pred_2 = _13;
                frontier_phi_5_pred_3 = _85;
                frontier_phi_5_pred_4 = _97;
                break;
            }
            if (!((_22 & 2u) == 0u))
            {
                selector_5 = 0;
                frontier_phi_5_pred = _22;
                frontier_phi_5_pred_1 = _22;
                frontier_phi_5_pred_2 = _22;
                frontier_phi_5_pred_3 = _81;
                frontier_phi_5_pred_4 = _93;
                break;
            }
            uint _23 = imageAtomicAdd(_8, int(0u), _22);
            if (!((_23 & 13u) == 0u))
            {
                selector_5 = 2;
                frontier_phi_5_pred = _23;
                frontier_phi_5_pred_1 = _22;
                frontier_phi_5_pred_2 = _22;
                frontier_phi_5_pred_3 = _88;
                frontier_phi_5_pred_4 = _100;
                break;
            }
            if (!((_23 & 1u) == 0u))
            {
                selector_5 = 1;
                frontier_phi_5_pred = _23;
                frontier_phi_5_pred_1 = _23;
                frontier_phi_5_pred_2 = _22;
                frontier_phi_5_pred_3 = _84;
                frontier_phi_5_pred_4 = _96;
                break;
            }
            if (!((_23 & 2u) == 0u))
            {
                selector_5 = 0;
                frontier_phi_5_pred = _23;
                frontier_phi_5_pred_1 = _23;
                frontier_phi_5_pred_2 = _23;
                frontier_phi_5_pred_3 = _80;
                frontier_phi_5_pred_4 = _92;
                break;
            }
            uint _24 = imageAtomicAdd(_8, int(0u), _23);
            if (!((_24 & 13u) == 0u))
            {
                selector_5 = 2;
                frontier_phi_5_pred = _24;
                frontier_phi_5_pred_1 = _23;
                frontier_phi_5_pred_2 = _23;
                frontier_phi_5_pred_3 = _87;
                frontier_phi_5_pred_4 = _99;
                break;
            }
            if (!((_24 & 1u) == 0u))
            {
                selector_5 = 1;
                frontier_phi_5_pred = _24;
                frontier_phi_5_pred_1 = _24;
                frontier_phi_5_pred_2 = _23;
                frontier_phi_5_pred_3 = _83;
                frontier_phi_5_pred_4 = _95;
                break;
            }
            if ((_24 & 2u) == 0u)
            {
                selector_5 = -1;
                frontier_phi_5_pred = _24;
                frontier_phi_5_pred_1 = _24;
                frontier_phi_5_pred_2 = _24;
                frontier_phi_5_pred_3 = 0u;
                frontier_phi_5_pred_4 = _24;
                break;
            }
            selector_5 = 0;
            frontier_phi_5_pred = _24;
            frontier_phi_5_pred_1 = _24;
            frontier_phi_5_pred_2 = _24;
            frontier_phi_5_pred_3 = 0u;
            frontier_phi_5_pred_4 = _24;
            break;
        }
        else
        {
            selector_5 = 2;
            frontier_phi_5_pred = _13;
            frontier_phi_5_pred_1 = _75;
            frontier_phi_5_pred_2 = _78;
            frontier_phi_5_pred_3 = _90;
            frontier_phi_5_pred_4 = _102;
            break;
        }
    }
    uint _21 = frontier_phi_5_pred;
    uint _30 = frontier_phi_5_pred_1;
    uint _46 = frontier_phi_5_pred_2;
    uint _34;
    uint _36;
    switch (selector_5)
    {
        case 0:
        {
            uint _37 = imageAtomicOr(_8, int(0u), _46);
            _34 = 3u;
            _36 = _37;
            break;
        }
        case 1:
        {
            if ((_30 & 4u) == 0u)
            {
                _34 = 2u;
                _36 = _30;
                break;
            }
            uint _38 = imageAtomicOr(_8, int(0u), _30);
            _34 = 2u;
            _36 = _38;
            break;
        }
        case 2:
        {
            uint _26 = imageAtomicAdd(_8, int(0u), _21);
            _34 = 1u;
            _36 = _26;
            break;
        }
        default:
        {
            _34 = frontier_phi_5_pred_3;
            _36 = frontier_phi_5_pred_4;
            break;
        }
    }
    uint _40 = imageAtomicAdd(_8, int(0u), _36);
    uint _42 = imageAtomicAdd(_8, int(0u), _34);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 124
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %67 "selector_5"
OpName %73 "frontier_phi_5.pred"
OpName %74 "frontier_phi_5.pred"
OpName %76 "frontier_phi_5.pred"
OpName %79 "frontier_phi_5.pred"
OpName %91 "frontier_phi_5.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpConstant %5 0
%11 = OpTypePointer Image %5
%14 = OpConstant %5 1
%16 = OpConstant %5 13
%17 = OpTypeBool
%28 = OpConstant %5 2
%32 = OpConstant %5 4
%35 = OpConstant %5 3
%68 = OpTypeInt 32 1
%69 = OpConstant %68 -1
%70 = OpConstant %68 0
%71 = OpConstant %68 1
%72 = OpConstant %68 2
%3 = OpFunction %1 None %2
%4 = OpLabel
%75 = OpUndef %5
%77 = OpUndef %5
%78 = OpUndef %5
%80 = OpUndef %5
%81 = OpUndef %5
%82 = OpUndef %5
%83 = OpUndef %5
%84 = OpUndef %5
%85 = OpUndef %5
%86 = OpUndef %5
%87 = OpUndef %5
%88 = OpUndef %5
%89 = OpUndef %5
%90 = OpUndef %5
%92 = OpUndef %5
%93 = OpUndef %5
%94 = OpUndef %5
%95 = OpUndef %5
%96 = OpUndef %5
%97 = OpUndef %5
%98 = OpUndef %5
%99 = OpUndef %5
%100 = OpUndef %5
%101 = OpUndef %5
%102 = OpUndef %5
OpBranch %103
%103 = OpLabel
%9 = OpLoad %6 %8
%12 = OpImageTexelPointer %11 %8 %10 %10
%13 = OpAtomicIAdd %5 %12 %14 %10 %14
%15 = OpBitwiseAnd %5 %13 %16
%18 = OpIEqual %17 %15 %10
OpLoopMerge %116 %122 None
OpBranchConditional %18 %104 %116
%104 = OpLabel
%19 = OpBitwiseAnd %5 %13 %14
%20 = OpIEqual %17 %19 %10
OpSelectionMerge %105 None
OpBranchConditional %20 %105 %116
%105 = OpLabel
%27 = OpBitwiseAnd %5 %13 %28
%29 = OpIEqual %17 %27 %10
OpSelectionMerge %106 None
OpBranchConditional %29 %106 %116
%106 = OpLabel
%43 = OpImageTexelPointer %11 %8 %10 %10
%22 = OpAtomicIAdd %5 %43 %14 %10 %13
%44 = OpBitwiseAnd %5 %22 %16
%45 = OpIEqual %17 %44 %10
OpSelectionMerge %107 None
OpBranchConditional %45 %107 %116
%107 = OpLabel
%49 = OpBitwiseAnd %5 %22 %14
%50 = OpIEqual %17 %49 %10
OpSelectionMerge %108 None
OpBranchConditional %50 %108 %116
%108 = OpLabel
%51 = OpBitwiseAnd %5 %22 %28
%52 = OpIEqual %17 %51 %10
OpSelectionMerge %109 None
OpBranchConditional %52 %109 %116
%109 = OpLabel
%53 = OpImageTexelPointer %11 %8 %10 %10
%23 = OpAtomicIAdd %5 %53 %14 %10 %22
%54 = OpBitwiseAnd %5 %23 %16
%55 = OpIEqual %17 %54 %10
OpSelectionMerge %110 None
OpBranchConditional %55 %110 %116
%110 = OpLabel
%56 = OpBitwiseAnd %5 %23 %14
%57 = OpIEqual %17 %56 %10
OpSelectionMerge %111 None
OpBranchConditional %57 %111 %116
%111 = OpLabel
%58 = OpBitwiseAnd %5 %23 %28
%59 = OpIEqual %17 %58 %10
OpSelectionMerge %112 None
OpBranchConditional %59 %112 %116
%112 = OpLabel
%60 = OpImageTexelPointer %11 %8 %10 %10
%24 = OpAtomicIAdd %5 %60 %14 %10 %23
%61 = OpBitwiseAnd %5 %24 %16
%62 = OpIEqual %17 %61 %10
OpSelectionMerge %113 None
OpBranchConditional %62 %113 %116
%113 = OpLabel
%63 = OpBitwiseAnd %5 %24 %14
%64 = OpIEqual %17 %63 %10
OpSelectionMerge %114 None
OpBranchConditional %64 %114 %116
%114 = OpLabel
%65 = OpBitwiseAnd %5 %24 %28
%66 = OpIEqual %17 %65 %10
OpSelectionMerge %115 None
OpBranchConditional %66 %116 %115
%115 = OpLabel
OpBranch %116
%122 = OpLabel
OpBranch %103
%116 = OpLabel
%67 = OpPhi %68 %69 %114 %70 %115 %70 %111 %70 %108 %70 %105 %71 %113 %71 %110 %71 %107 %71 %104 %72 %112 %72 %109 %72 %106 %72 %103
%73 = OpPhi %5 %24 %114 %24 %115 %23 %111 %22 %108 %13 %105 %24 %113 %23 %110 %22 %107 %13 %104 %24 %112 %23 %109 %22 %106 %13 %103
%74 = OpPhi %5 %24 %114 %24 %115 %23 %111 %22 %108 %13 %105 %24 %113 %23 %110 %22 %107 %13 %104 %23 %112 %22 %109 %13 %106 %75 %103
%76 = OpPhi %5 %24 %114 %24 %115 %23 %111 %22 %108 %13 %105 %23 %113 %22 %110 %13 %107 %77 %104 %23 %112 %22 %109 %13 %106 %78 %103
%79 = OpPhi %5 %10 %114 %10 %115 %80 %111 %81 %108 %82 %105 %83 %113 %84 %110 %85 %107 %86 %104 %87 %112 %88 %109 %89 %106 %90 %103
%91 = OpPhi %5 %24 %114 %24 %115 %92 %111 %93 %108 %94 %105 %95 %113 %96 %110 %97 %107 %98 %104 %99 %112 %100 %109 %101 %106 %102 %103
%21 = OpCopyObject %5 %73
%30 = OpCopyObject %5 %74
%46 = OpCopyObject %5 %76
OpSelectionMerge %121 None
OpSwitch %67 %121 0 %120 1 %118 2 %117
%120 = OpLabel
%47 = OpImageTexelPointer %11 %8 %10 %10
%37 = OpAtomicOr %5 %47 %14 %10 %46
OpBranch %121
%118 = OpLabel
%31 = OpBitwiseAnd %5 %30 %32
%33 = OpIEqual %17 %31 %10
OpSelectionMerge %119 None
OpBranchConditional %33 %121 %119
%119 = OpLabel
%48 = OpImageTexelPointer %11 %8 %10 %10
%38 = OpAtomicOr %5 %48 %14 %10 %30
OpBranch %121
%117 = OpLabel
%25 = OpImageTexelPointer %11 %8 %10 %10
%26 = OpAtomicIAdd %5 %25 %14 %10 %21
OpBranch %121
%121 = OpLabel
%34 = OpPhi %5 %35 %120 %28 %118 %28 %119 %14 %117 %79 %116
%36 = OpPhi %5 %37 %120 %30 %118 %38 %119 %26 %117 %91 %116
%39 = OpImageTexelPointer %11 %8 %10 %10
%40 = OpAtomicIAdd %5 %39 %14 %10 %36
%41 = OpImageTexelPointer %11 %8 %10 %10
%42 = OpAtomicIAdd %5 %41 %14 %10 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-break-2.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _20;
    if (gl_GlobalInvocationID.z == 0u)
    {
        _20 = 0u;
    }
    else
    {
        uint frontier_phi_6;
        uint _25 = 0u;
        uint _26 = 0u;
        uint _30;
        bool _31;
        for (;;)
        {
            _30 = imageLoad(_8, int(_26)).x;
            _31 = _30 == 30u;
            if (_31)
            {
                frontier_phi_6 = imageLoad(_8, int(25u)).x + _25;
                break;
            }
            else
            {
                uint _22 = _30 + _25;
                uint _27 = _26 + 1u;
                if (_27 < gl_GlobalInvocationID.z)
                {
                    _25 = _22;
                    _26 = _27;
                    continue;
                }
                else
                {
                    frontier_phi_6 = _22;
                    break;
                }
            }
        }
        _20 = frontier_phi_6;
    }
    imageStore(_8, int(0u), uvec4(_20));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 50
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %38 "frontier_phi_6"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 2
%17 = OpTypeBool
%19 = OpConstant %5 0
%23 = OpTypeVector %5 4
%32 = OpConstant %5 30
%33 = OpConstant %5 25
%36 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpIEqual %17 %16 %19
OpSelectionMerge %46 None
OpBranchConditional %18 %46 %40
%40 = OpLabel
OpBranch %41
%41 = OpLabel
%25 = OpPhi %5 %19 %40 %22 %43
%26 = OpPhi %5 %19 %40 %27 %43
%28 = OpShiftLeftLogical %5 %26 %15
%29 = OpImageRead %23 %9 %26
%30 = OpCompositeExtract %5 %29 0
%31 = OpIEqual %17 %30 %32
OpLoopMerge %45 %43 None
OpBranch %47
%47 = OpLabel
OpSelectionMerge %48 None
OpBranchConditional %31 %44 %42
%44 = OpLabel
%34 = OpImageRead %23 %9 %33
%35 = OpCompositeExtract %5 %34 0
%21 = OpIAdd %5 %35 %25
OpBranch %45
%42 = OpLabel
OpBranch %43
%48 = OpLabel
OpUnreachable
%43 = OpLabel
%22 = OpIAdd %5 %30 %25
%27 = OpIAdd %5 %26 %36
%37 = OpULessThan %17 %27 %16
OpBranchConditional %37 %41 %45
%45 = OpLabel
%38 = OpPhi %5 %22 %43 %21 %44
OpBranch %46
%46 = OpLabel
%20 = OpPhi %5 %19 %39 %38 %45
%24 = OpCompositeConstruct %23 %20 %20 %20 %20
OpImageWrite %9 %19 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-break.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _20;
    if (gl_GlobalInvocationID.z == 0u)
    {
        _20 = 0u;
    }
    else
    {
        uint _21;
        uint _24 = 0u;
        uint _26 = 0u;
        uint _30;
        for (;;)
        {
            _30 = imageLoad(_8, int(_26)).x;
            if (_30 == 30u)
            {
                _21 = _24;
                break;
            }
            else
            {
                uint _25 = _30 + _24;
                uint _27 = _26 + 1u;
                if (_27 < gl_GlobalInvocationID.z)
                {
                    _24 = _25;
                    _26 = _27;
                    continue;
                }
                else
                {
                    _21 = _25;
                    break;
                }
            }
        }
        _20 = _21;
    }
    imageStore(_8, int(0u), uvec4(_20));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 2
%17 = OpTypeBool
%19 = OpConstant %5 0
%22 = OpTypeVector %5 4
%32 = OpConstant %5 30
%33 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpIEqual %17 %16 %19
OpSelectionMerge %41 None
OpBranchConditional %18 %41 %36
%36 = OpLabel
OpBranch %37
%37 = OpLabel
%24 = OpPhi %5 %19 %36 %25 %39
%26 = OpPhi %5 %19 %36 %27 %39
%28 = OpShiftLeftLogical %5 %26 %15
%29 = OpImageRead %22 %9 %26
%30 = OpCompositeExtract %5 %29 0
%31 = OpIEqual %17 %30 %32
OpLoopMerge %40 %39 None
OpBranchConditional %31 %40 %38
%38 = OpLabel
OpBranch %39
%39 = OpLabel
%25 = OpIAdd %5 %30 %24
%27 = OpIAdd %5 %26 %33
%34 = OpULessThan %17 %27 %16
OpBranchConditional %34 %37 %40
%40 = OpLabel
%21 = OpPhi %5 %24 %37 %25 %39
OpBranch %41
%41 = OpLabel
%20 = OpPhi %5 %19 %35 %21 %40
%23 = OpCompositeConstruct %22 %20 %20 %20 %20
OpImageWrite %9 %19 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-continue-2.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _22;
    if (gl_GlobalInvocationID.x == 0u)
    {
        _22 = 0u;
    }
    else
    {
        uint _23;
        uint _44;
        uint _26 = 0u;
        uint _27 = 0u;
        bool _33;
        for (;;)
        {
            _33 = imageLoad(_8, int(_27)).x == 40u;
            uint frontier_phi_6_pred;
            uint frontier_phi_6_pred_1;
            for (;;)
            {
                uint _51;
                if (_33)
                {
                    if (_27 == 10u)
                    {
                        frontier_phi_6_pred = _26;
                        frontier_phi_6_pred_1 = gl_GlobalInvocationID.y;
                        break;
                    }
                    _51 = imageLoad(_8, int(_27 * 4u)).x;
                }
                else
                {
                    _51 = imageLoad(_8, int(_27 * 3u)).x;
                }
                frontier_phi_6_pred = (_51 + _26) + imageLoad(_8, int(_27 * 2u)).x;
                frontier_phi_6_pred_1 = 1u;
                break;
            }
            _23 = frontier_phi_6_pred;
            _44 = frontier_phi_6_pred_1;
            uint _28 = _44 + _27;
            if (_28 < gl_GlobalInvocationID.x)
            {
                _26 = _23;
                _27 = _28;
                continue;
            }
            else
            {
                break;
            }
        }
        _22 = _23;
    }
    imageStore(_8, int(0u), uvec4(_22));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %57 "frontier_phi_6.pred"
OpName %58 "frontier_phi_6.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%20 = OpTypeBool
%24 = OpTypeVector %5 4
%30 = OpConstant %5 2
%34 = OpConstant %5 40
%36 = OpConstant %5 10
%38 = OpConstant %5 12
%40 = OpConstant %5 3
%47 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%21 = OpIEqual %20 %16 %15
OpSelectionMerge %71 None
OpBranchConditional %21 %71 %60
%60 = OpLabel
OpBranch %61
%61 = OpLabel
%26 = OpPhi %5 %15 %60 %23 %69
%27 = OpPhi %5 %15 %60 %28 %69
%29 = OpShiftLeftLogical %5 %27 %30
%31 = OpImageRead %24 %9 %27
%32 = OpCompositeExtract %5 %31 0
%33 = OpIEqual %20 %32 %34
OpLoopMerge %70 %69 None
OpBranch %62
%62 = OpLabel
OpLoopMerge %68 %72 None
OpBranch %63
%63 = OpLabel
OpSelectionMerge %67 None
OpBranchConditional %33 %65 %64
%65 = OpLabel
%35 = OpIEqual %20 %27 %36
OpSelectionMerge %66 None
OpBranchConditional %35 %68 %66
%66 = OpLabel
%46 = OpShiftLeftLogical %5 %27 %47
%48 = OpIMul %5 %27 %47
%49 = OpImageRead %24 %9 %48
%50 = OpCompositeExtract %5 %49 0
OpBranch %67
%64 = OpLabel
%37 = OpIMul %5 %27 %38
%39 = OpIMul %5 %27 %40
%41 = OpImageRead %24 %9 %39
%42 = OpCompositeExtract %5 %41 0
OpBranch %67
%67 = OpLabel
%51 = OpPhi %5 %42 %64 %50 %66
%52 = OpIAdd %5 %51 %26
%53 = OpShiftLeftLogical %5 %27 %40
%54 = OpIMul %5 %27 %30
%55 = OpImageRead %24 %9 %54
%56 = OpCompositeExtract %5 %55 0
%43 = OpIAdd %5 %52 %56
OpBranch %68
%72 = OpLabel
OpBranch %62
%68 = OpLabel
%57 = OpPhi %5 %26 %65 %43 %67
%58 = OpPhi %5 %19 %65 %18 %67
%23 = OpCopyObject %5 %57
%44 = OpCopyObject %5 %58
OpBranch %69
%69 = OpLabel
%28 = OpIAdd %5 %44 %27
%45 = OpULessThan %20 %28 %16
OpBranchConditional %45 %61 %70
%70 = OpLabel
OpBranch %71
%71 = OpLabel
%22 = OpPhi %5 %15 %59 %23 %70
%25 = OpCompositeConstruct %24 %22 %22 %22 %22
OpImageWrite %9 %15 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-continue-3.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _22;
    if (gl_GlobalInvocationID.x == 0u)
    {
        _22 = 0u;
    }
    else
    {
        uint _23;
        uint _28;
        uint _26 = 0u;
        uint _27 = 0u;
        bool _33;
        for (;;)
        {
            _33 = imageLoad(_8, int(_27)).x == 40u;
            uint frontier_phi_9_pred;
            uint frontier_phi_9_pred_1;
            for (;;)
            {
                uint _51;
                if (_33)
                {
                    if (_27 == 10u)
                    {
                        imageStore(_8, int(640u), uvec4(10u));
                        frontier_phi_9_pred = _26;
                        frontier_phi_9_pred_1 = gl_GlobalInvocationID.y * 10u;
                        break;
                    }
                    _51 = imageLoad(_8, int(_27 * 4u)).x;
                }
                else
                {
                    _51 = imageLoad(_8, int(_27 * 3u)).x;
                }
                frontier_phi_9_pred = (_51 + _26) + imageLoad(_8, int(_27 * 2u)).x;
                frontier_phi_9_pred_1 = _27 + 1u;
                break;
            }
            _23 = frontier_phi_9_pred;
            _28 = frontier_phi_9_pred_1;
            if (_28 < gl_GlobalInvocationID.x)
            {
                _26 = _23;
                _27 = _28;
                continue;
            }
            else
            {
                break;
            }
        }
        _22 = _23;
    }
    imageStore(_8, int(0u), uvec4(_22));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 78
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %60 "frontier_phi_9.pred"
OpName %61 "frontier_phi_9.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%20 = OpTypeBool
%24 = OpTypeVector %5 4
%30 = OpConstant %5 2
%34 = OpConstant %5 40
%36 = OpConstant %5 10
%38 = OpConstant %5 12
%40 = OpConstant %5 3
%43 = OpConstant %5 640
%47 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%21 = OpIEqual %20 %16 %15
OpSelectionMerge %75 None
OpBranchConditional %21 %75 %63
%63 = OpLabel
OpBranch %64
%64 = OpLabel
%26 = OpPhi %5 %15 %63 %23 %73
%27 = OpPhi %5 %15 %63 %28 %73
%29 = OpShiftLeftLogical %5 %27 %30
%31 = OpImageRead %24 %9 %27
%32 = OpCompositeExtract %5 %31 0
%33 = OpIEqual %20 %32 %34
OpLoopMerge %74 %73 None
OpBranch %65
%65 = OpLabel
OpLoopMerge %72 %76 None
OpBranch %66
%66 = OpLabel
OpSelectionMerge %70 None
OpBranchConditional %33 %68 %67
%68 = OpLabel
%35 = OpIEqual %20 %27 %36
OpSelectionMerge %69 None
OpBranchConditional %35 %71 %69
%71 = OpLabel
%44 = OpCompositeConstruct %24 %36 %36 %36 %36
OpImageWrite %9 %43 %44
%45 = OpIMul %5 %19 %36
OpBranch %72
%69 = OpLabel
%46 = OpShiftLeftLogical %5 %27 %47
%48 = OpIMul %5 %27 %47
%49 = OpImageRead %24 %9 %48
%50 = OpCompositeExtract %5 %49 0
OpBranch %70
%67 = OpLabel
%37 = OpIMul %5 %27 %38
%39 = OpIMul %5 %27 %40
%41 = OpImageRead %24 %9 %39
%42 = OpCompositeExtract %5 %41 0
OpBranch %70
%70 = OpLabel
%51 = OpPhi %5 %42 %67 %50 %69
%52 = OpIAdd %5 %51 %26
%53 = OpShiftLeftLogical %5 %27 %40
%54 = OpIMul %5 %27 %30
%55 = OpImageRead %24 %9 %54
%56 = OpCompositeExtract %5 %55 0
%57 = OpIAdd %5 %52 %56
%58 = OpIAdd %5 %27 %18
OpBranch %72
%76 = OpLabel
OpBranch %65
%72 = OpLabel
%60 = OpPhi %5 %26 %71 %57 %70
%61 = OpPhi %5 %45 %71 %58 %70
%23 = OpCopyObject %5 %60
%28 = OpCopyObject %5 %61
OpBranch %73
%73 = OpLabel
%59 = OpULessThan %20 %28 %16
OpBranchConditional %59 %64 %74
%74 = OpLabel
OpBranch %75
%75 = OpLabel
%22 = OpPhi %5 %15 %62 %23 %74
%25 = OpCompositeConstruct %24 %22 %22 %22 %22
OpImageWrite %9 %15 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-continue.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _19;
    if (gl_GlobalInvocationID.x == 0u)
    {
        _19 = 0u;
    }
    else
    {
        uint _20;
        uint _23 = 0u;
        uint _24 = 0u;
        bool _30;
        for (;;)
        {
            _30 = imageLoad(_8, int(_24)).x == 40u;
            uint frontier_phi_6_pred;
            for (;;)
            {
                uint _48;
                if (_30)
                {
                    if (_24 == 10u)
                    {
                        frontier_phi_6_pred = _23;
                        break;
                    }
                    _48 = imageLoad(_8, int(_24 * 4u)).x;
                }
                else
                {
                    _48 = imageLoad(_8, int(_24 * 3u)).x;
                }
                frontier_phi_6_pred = (_48 + _23) + imageLoad(_8, int(_24 * 2u)).x;
                break;
            }
            _20 = frontier_phi_6_pred;
            uint _25 = _24 + 1u;
            if (_25 == gl_GlobalInvocationID.x)
            {
                break;
            }
            else
            {
                _23 = _20;
                _24 = _25;
                continue;
            }
        }
        _19 = _20;
    }
    imageStore(_8, int(0u), uvec4(_19));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 70
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %54 "frontier_phi_6.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%21 = OpTypeVector %5 4
%27 = OpConstant %5 2
%31 = OpConstant %5 40
%33 = OpConstant %5 10
%35 = OpConstant %5 12
%37 = OpConstant %5 3
%41 = OpConstant %5 1
%44 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %55
%55 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpIEqual %17 %16 %15
OpSelectionMerge %67 None
OpBranchConditional %18 %67 %56
%56 = OpLabel
OpBranch %57
%57 = OpLabel
%23 = OpPhi %5 %15 %56 %20 %65
%24 = OpPhi %5 %15 %56 %25 %65
%26 = OpShiftLeftLogical %5 %24 %27
%28 = OpImageRead %21 %9 %24
%29 = OpCompositeExtract %5 %28 0
%30 = OpIEqual %17 %29 %31
OpLoopMerge %66 %65 None
OpBranch %58
%58 = OpLabel
OpLoopMerge %64 %68 None
OpBranch %59
%59 = OpLabel
OpSelectionMerge %63 None
OpBranchConditional %30 %61 %60
%61 = OpLabel
%32 = OpIEqual %17 %24 %33
OpSelectionMerge %62 None
OpBranchConditional %32 %64 %62
%62 = OpLabel
%43 = OpShiftLeftLogical %5 %24 %44
%45 = OpIMul %5 %24 %44
%46 = OpImageRead %21 %9 %45
%47 = OpCompositeExtract %5 %46 0
OpBranch %63
%60 = OpLabel
%34 = OpIMul %5 %24 %35
%36 = OpIMul %5 %24 %37
%38 = OpImageRead %21 %9 %36
%39 = OpCompositeExtract %5 %38 0
OpBranch %63
%63 = OpLabel
%48 = OpPhi %5 %39 %60 %47 %62
%49 = OpIAdd %5 %48 %23
%50 = OpShiftLeftLogical %5 %24 %37
%51 = OpIMul %5 %24 %27
%52 = OpImageRead %21 %9 %51
%53 = OpCompositeExtract %5 %52 0
%40 = OpIAdd %5 %49 %53
OpBranch %64
%68 = OpLabel
OpBranch %58
%64 = OpLabel
%54 = OpPhi %5 %23 %61 %40 %63
%20 = OpCopyObject %5 %54
OpBranch %65
%65 = OpLabel
%25 = OpIAdd %5 %24 %41
%42 = OpIEqual %17 %25 %16
OpBranchConditional %42 %66 %57
%66 = OpLabel
OpBranch %67
%67 = OpLabel
%19 = OpPhi %5 %15 %55 %20 %66
%22 = OpCompositeConstruct %21 %19 %19 %19 %19
OpImageWrite %9 %15 %22
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-inside-infinite-loop-2.frag
================================================
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

layout(location = 0) flat in uvec4 V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _71;
    uint _72;
    uint _73;
    uint _74;
    if (V.x < 10u)
    {
        uint _36;
        _36 = V.x;
        bool _41;
        for (;;)
        {
            uint _40 = imageAtomicAdd(_8, int(0u), _36);
            _41 = V.y == 0u;
            if (!_41)
            {
                uint _50 = _36 & 3u;
                bool ladder_phi_16;
                uint _54 = _50;
                uint _55 = 0u;
                for (;;)
                {
                    if (_54 == 0u)
                    {
                        if (!((_36 & 7u) == 0u))
                        {
                            ladder_phi_16 = false;
                            break;
                        }
                        uint _69 = imageAtomicAdd(_8, int(0u), _36);
                        uint _56 = _55 + 1u;
                        if (_56 < V.y)
                        {
                            _54 = 0u;
                            _55 = _56;
                            continue;
                        }
                        else
                        {
                            ladder_phi_16 = false;
                            break;
                        }
                    }
                    else
                    {
                        ladder_phi_16 = true;
                        break;
                    }
                }
                if (ladder_phi_16)
                {
                    break;
                }
            }
            uint _49 = imageAtomicAdd(_8, int(0u), _36);
            _36++;
            continue;
        }
        _71 = _36;
        _72 = V.y;
        _73 = V.z;
        _74 = V.w;
    }
    else
    {
        uint _34 = ~V.y;
        uint _42;
        _42 = ~V.x;
        bool _46;
        for (;;)
        {
            uint _45 = imageAtomicAdd(_8, int(0u), _42);
            _46 = V.y == 4294967295u;
            if (!_46)
            {
                uint _53 = _42 & 3u;
                bool ladder_phi_19;
                uint _58 = _53;
                uint _59 = 0u;
                for (;;)
                {
                    if (_58 == 0u)
                    {
                        if (!((_42 & 7u) == 0u))
                        {
                            ladder_phi_19 = false;
                            break;
                        }
                        uint _85 = imageAtomicAdd(_8, int(0u), _42);
                        uint _60 = _59 + 1u;
                        if (_60 < _34)
                        {
                            _58 = 0u;
                            _59 = _60;
                            continue;
                        }
                        else
                        {
                            ladder_phi_19 = false;
                            break;
                        }
                    }
                    else
                    {
                        ladder_phi_19 = true;
                        break;
                    }
                }
                if (ladder_phi_19)
                {
                    break;
                }
            }
            uint _52 = imageAtomicAdd(_8, int(0u), _42);
            _42++;
            continue;
        }
        _71 = _42;
        _72 = _34;
        _73 = ~V.z;
        _74 = ~V.w;
    }
    SV_Target.x = float(_71);
    SV_Target.y = float(_72);
    SV_Target.z = float(_73);
    SV_Target.w = float(_74);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 120
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "V"
OpName %15 "SV_Target"
OpName %87 "ladder_phi_16"
OpName %90 "ladder_phi_19"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 1
%25 = OpConstant %5 2
%28 = OpConstant %5 3
%30 = OpTypeBool
%32 = OpConstant %5 10
%38 = OpTypePointer Image %5
%47 = OpConstant %5 4294967295
%63 = OpConstant %5 7
%79 = OpTypePointer Output %12
%88 = OpConstantFalse %30
%89 = OpConstantTrue %30
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%16 = OpLoad %6 %8
%18 = OpAccessChain %17 %11 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %11 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %17 %11 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %17 %11 %28
%29 = OpLoad %5 %27
%31 = OpULessThan %30 %20 %32
OpSelectionMerge %115 None
OpBranchConditional %31 %105 %92
%105 = OpLabel
OpBranch %106
%106 = OpLabel
%36 = OpPhi %5 %20 %105 %37 %118
%39 = OpImageTexelPointer %38 %8 %19 %19
%40 = OpAtomicIAdd %5 %39 %22 %19 %36
%41 = OpIEqual %30 %23 %19
OpLoopMerge %114 %118 None
OpBranch %107
%107 = OpLabel
OpSelectionMerge %117 None
OpBranchConditional %41 %117 %108
%108 = OpLabel
%50 = OpBitwiseAnd %5 %36 %28
OpBranch %109
%109 = OpLabel
%54 = OpPhi %5 %50 %108 %19 %112
%55 = OpPhi %5 %19 %108 %56 %112
%57 = OpIEqual %30 %54 %19
OpLoopMerge %113 %112 None
OpBranchConditional %57 %110 %113
%110 = OpLabel
%62 = OpBitwiseAnd %5 %36 %63
%64 = OpIEqual %30 %62 %19
OpSelectionMerge %111 None
OpBranchConditional %64 %111 %113
%111 = OpLabel
OpBranch %112
%112 = OpLabel
%68 = OpImageTexelPointer %38 %8 %19 %19
%69 = OpAtomicIAdd %5 %68 %22 %19 %36
%56 = OpIAdd %5 %55 %22
%70 = OpULessThan %30 %56 %23
OpBranchConditional %70 %109 %113
%113 = OpLabel
%87 = OpPhi %30 %88 %110 %88 %112 %89 %109
OpSelectionMerge %116 None
OpBranchConditional %87 %114 %116
%116 = OpLabel
OpBranch %117
%117 = OpLabel
OpBranch %118
%118 = OpLabel
%48 = OpImageTexelPointer %38 %8 %19 %19
%49 = OpAtomicIAdd %5 %48 %22 %19 %36
%37 = OpIAdd %5 %36 %22
OpBranch %106
%114 = OpLabel
OpBranch %115
%92 = OpLabel
%33 = OpNot %5 %20
%34 = OpNot %5 %23
%35 = OpNot %5 %26
OpBranch %93
%93 = OpLabel
%42 = OpPhi %5 %33 %92 %43 %104
%44 = OpImageTexelPointer %38 %8 %19 %19
%45 = OpAtomicIAdd %5 %44 %22 %19 %42
%46 = OpIEqual %30 %23 %47
OpLoopMerge %101 %104 None
OpBranch %94
%94 = OpLabel
OpSelectionMerge %103 None
OpBranchConditional %46 %103 %95
%95 = OpLabel
%53 = OpBitwiseAnd %5 %42 %28
OpBranch %96
%96 = OpLabel
%58 = OpPhi %5 %53 %95 %19 %99
%59 = OpPhi %5 %19 %95 %60 %99
%61 = OpIEqual %30 %58 %19
OpLoopMerge %100 %99 None
OpBranchConditional %61 %97 %100
%97 = OpLabel
%65 = OpBitwiseAnd %5 %42 %63
%66 = OpIEqual %30 %65 %19
OpSelectionMerge %98 None
OpBranchConditional %66 %98 %100
%98 = OpLabel
OpBranch %99
%99 = OpLabel
%84 = OpImageTexelPointer %38 %8 %19 %19
%85 = OpAtomicIAdd %5 %84 %22 %19 %42
%60 = OpIAdd %5 %59 %22
%86 = OpULessThan %30 %60 %34
OpBranchConditional %86 %96 %100
%100 = OpLabel
%90 = OpPhi %30 %88 %97 %88 %99 %89 %96
OpSelectionMerge %102 None
OpBranchConditional %90 %101 %102
%102 = OpLabel
OpBranch %103
%103 = OpLabel
OpBranch %104
%104 = OpLabel
%51 = OpImageTexelPointer %38 %8 %19 %19
%52 = OpAtomicIAdd %5 %51 %22 %19 %42
%43 = OpIAdd %5 %42 %22
OpBranch %93
%101 = OpLabel
%67 = OpNot %5 %29
OpBranch %115
%115 = OpLabel
%71 = OpPhi %5 %36 %114 %42 %101
%72 = OpPhi %5 %23 %114 %34 %101
%73 = OpPhi %5 %26 %114 %35 %101
%74 = OpPhi %5 %29 %114 %67 %101
%75 = OpConvertUToF %12 %71
%76 = OpConvertUToF %12 %72
%77 = OpConvertUToF %12 %73
%78 = OpConvertUToF %12 %74
%80 = OpAccessChain %79 %15 %19
OpStore %80 %75
%81 = OpAccessChain %79 %15 %22
OpStore %81 %76
%82 = OpAccessChain %79 %15 %25
OpStore %82 %77
%83 = OpAccessChain %79 %15 %28
OpStore %83 %78
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-inside-infinite-loop.frag
================================================
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

layout(location = 0) flat in uvec4 V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _33;
    float _36;
    float _38;
    float _40;
    if (V.x < 10u)
    {
        uint _47;
        _47 = V.x;
        bool _52;
        for (;;)
        {
            uint _51 = imageAtomicAdd(_8, int(0u), _47);
            _52 = V.y == 0u;
            if (!_52)
            {
                uint _55 = _47 & 3u;
                bool ladder_phi_10;
                uint _56 = _55;
                uint _57 = 0u;
                for (;;)
                {
                    if (_56 == 0u)
                    {
                        if (!((_47 & 7u) == 0u))
                        {
                            ladder_phi_10 = false;
                            break;
                        }
                        uint _64 = imageAtomicAdd(_8, int(0u), _47);
                        uint _58 = _57 + 1u;
                        if (_58 < V.y)
                        {
                            _56 = 0u;
                            _57 = _58;
                            continue;
                        }
                        else
                        {
                            ladder_phi_10 = false;
                            break;
                        }
                    }
                    else
                    {
                        ladder_phi_10 = true;
                        break;
                    }
                }
                if (ladder_phi_10)
                {
                    break;
                }
            }
            uint _54 = imageAtomicAdd(_8, int(0u), _47);
            _47++;
            continue;
        }
        _33 = float(_47);
        _36 = float(V.y);
        _38 = float(V.z);
        _40 = float(V.w);
    }
    else
    {
        _33 = 0.0;
        _36 = 0.0;
        _38 = 0.0;
        _40 = 0.0;
    }
    SV_Target.x = _33;
    SV_Target.y = _36;
    SV_Target.z = _38;
    SV_Target.w = _40;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 85
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "V"
OpName %15 "SV_Target"
OpName %66 "ladder_phi_10"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 1
%25 = OpConstant %5 2
%28 = OpConstant %5 3
%30 = OpTypeBool
%32 = OpConstant %5 10
%35 = OpConstant %12 0
%42 = OpTypePointer Output %12
%49 = OpTypePointer Image %5
%61 = OpConstant %5 7
%67 = OpConstantFalse %30
%68 = OpConstantTrue %30
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %69
%69 = OpLabel
%16 = OpLoad %6 %8
%18 = OpAccessChain %17 %11 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %11 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %17 %11 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %17 %11 %28
%29 = OpLoad %5 %27
%31 = OpULessThan %30 %20 %32
OpSelectionMerge %80 None
OpBranchConditional %31 %70 %80
%70 = OpLabel
OpBranch %71
%71 = OpLabel
%47 = OpPhi %5 %20 %70 %48 %83
%50 = OpImageTexelPointer %49 %8 %19 %19
%51 = OpAtomicIAdd %5 %50 %22 %19 %47
%52 = OpIEqual %30 %23 %19
OpLoopMerge %79 %83 None
OpBranch %72
%72 = OpLabel
OpSelectionMerge %82 None
OpBranchConditional %52 %82 %73
%73 = OpLabel
%55 = OpBitwiseAnd %5 %47 %28
OpBranch %74
%74 = OpLabel
%56 = OpPhi %5 %55 %73 %19 %77
%57 = OpPhi %5 %19 %73 %58 %77
%59 = OpIEqual %30 %56 %19
OpLoopMerge %78 %77 None
OpBranchConditional %59 %75 %78
%75 = OpLabel
%60 = OpBitwiseAnd %5 %47 %61
%62 = OpIEqual %30 %60 %19
OpSelectionMerge %76 None
OpBranchConditional %62 %76 %78
%76 = OpLabel
OpBranch %77
%77 = OpLabel
%63 = OpImageTexelPointer %49 %8 %19 %19
%64 = OpAtomicIAdd %5 %63 %22 %19 %47
%58 = OpIAdd %5 %57 %22
%65 = OpULessThan %30 %58 %23
OpBranchConditional %65 %74 %78
%78 = OpLabel
%66 = OpPhi %30 %67 %75 %67 %77 %68 %74
OpSelectionMerge %81 None
OpBranchConditional %66 %79 %81
%81 = OpLabel
OpBranch %82
%82 = OpLabel
OpBranch %83
%83 = OpLabel
%53 = OpImageTexelPointer %49 %8 %19 %19
%54 = OpAtomicIAdd %5 %53 %22 %19 %47
%48 = OpIAdd %5 %47 %22
OpBranch %71
%79 = OpLabel
%34 = OpConvertUToF %12 %47
%37 = OpConvertUToF %12 %23
%39 = OpConvertUToF %12 %26
%41 = OpConvertUToF %12 %29
OpBranch %80
%80 = OpLabel
%33 = OpPhi %12 %35 %69 %34 %79
%36 = OpPhi %12 %35 %69 %37 %79
%38 = OpPhi %12 %35 %69 %39 %79
%40 = OpPhi %12 %35 %69 %41 %79
%43 = OpAccessChain %42 %15 %19
OpStore %43 %33
%44 = OpAccessChain %42 %15 %22
OpStore %44 %36
%45 = OpAccessChain %42 %15 %25
OpStore %45 %38
%46 = OpAccessChain %42 %15 %28
OpStore %46 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop-return.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _28;
    if (gl_GlobalInvocationID.x < 100u)
    {
        uint frontier_phi_3_1_ladder;
        if (gl_GlobalInvocationID.x == 0u)
        {
            frontier_phi_3_1_ladder = 0u;
        }
        else
        {
            uint _35;
            _35 = 0u;
            uint _31;
            uint frontier_phi_12;
            uint _37 = 0u;
            bool _38;
            for (;;)
            {
                _38 = gl_GlobalInvocationID.y == 0u;
                uint frontier_phi_8_pred;
                if (_38)
                {
                    frontier_phi_8_pred = _37;
                }
                else
                {
                    bool ladder_phi_20;
                    uint frontier_phi_20_pred;
                    uint frontier_phi_20_pred_1;
                    uint _47 = _37;
                    uint _48 = 0u;
                    bool _54;
                    for (;;)
                    {
                        uint _50 = _48 ^ _35;
                        _54 = imageLoad(_8, int(_50)).x == 100u;
                        if (_54)
                        {
                            ladder_phi_20 = true;
                            frontier_phi_20_pred = _37;
                            frontier_phi_20_pred_1 = imageLoad(_8, int(32u)).x + _47;
                            break;
                        }
                        else
                        {
                            uint _43 = imageLoad(_8, int(((_35 << 2u) * _48) >> 2u)).x + _47;
                            uint _49 = _48 + 1u;
                            if (_49 < gl_GlobalInvocationID.y)
                            {
                                _47 = _43;
                                _48 = _49;
                                continue;
                            }
                            else
                            {
                                ladder_phi_20 = false;
                                frontier_phi_20_pred = _43;
                                frontier_phi_20_pred_1 = 0u;
                                break;
                            }
                        }
                    }
                    if (ladder_phi_20)
                    {
                        frontier_phi_12 = frontier_phi_20_pred_1;
                        break;
                    }
                    frontier_phi_8_pred = frontier_phi_20_pred;
                }
                _31 = frontier_phi_8_pred;
                uint _36 = _35 + 1u;
                if (_36 < gl_GlobalInvocationID.x)
                {
                    _35 = _36;
                    _37 = _31;
                    continue;
                }
                else
                {
                    frontier_phi_12 = _31;
                    break;
                }
            }
            frontier_phi_3_1_ladder = frontier_phi_12;
        }
        _28 = frontier_phi_3_1_ladder;
    }
    else
    {
        uint frontier_phi_3_2_ladder;
        if (gl_GlobalInvocationID.z == 0u)
        {
            frontier_phi_3_2_ladder = 0u;
        }
        else
        {
            uint _39;
            _39 = 0u;
            uint _32;
            uint frontier_phi_14;
            uint _41 = 0u;
            bool _42;
            for (;;)
            {
                _42 = gl_GlobalInvocationID.y == 0u;
                uint frontier_phi_10_pred;
                if (_42)
                {
                    frontier_phi_10_pred = _41;
                }
                else
                {
                    bool ladder_phi_21;
                    uint frontier_phi_21_pred;
                    uint frontier_phi_21_pred_1;
                    uint _55 = 0u;
                    uint _57 = _41;
                    bool _62;
                    for (;;)
                    {
                        uint _58 = _55 ^ _39;
                        _62 = imageLoad(_8, int(_58)).x == 100u;
                        if (_62)
                        {
                            ladder_phi_21 = true;
                            frontier_phi_21_pred = _41;
                            frontier_phi_21_pred_1 = imageLoad(_8, int(32u)).x + _57;
                            break;
                        }
                        else
                        {
                            uint _45 = imageLoad(_8, int(((_39 << 2u) * _55) >> 2u)).x + _57;
                            uint _56 = _55 + 1u;
                            if (_56 < gl_GlobalInvocationID.y)
                            {
                                _55 = _56;
                                _57 = _45;
                                continue;
                            }
                            else
                            {
                                ladder_phi_21 = false;
                                frontier_phi_21_pred = _45;
                                frontier_phi_21_pred_1 = 0u;
                                break;
                            }
                        }
                    }
                    if (ladder_phi_21)
                    {
                        frontier_phi_14 = frontier_phi_21_pred_1;
                        break;
                    }
                    frontier_phi_10_pred = frontier_phi_21_pred;
                }
                _32 = frontier_phi_10_pred;
                uint _40 = _39 + 1u;
                if (_40 < gl_GlobalInvocationID.z)
                {
                    _39 = _40;
                    _41 = _32;
                    continue;
                }
                else
                {
                    frontier_phi_14 = _32;
                    break;
                }
            }
            frontier_phi_3_2_ladder = frontier_phi_14;
        }
        _28 = frontier_phi_3_2_ladder;
    }
    imageStore(_8, int(0u), uvec4(_28));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 131
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %80 "ladder_phi_20"
OpName %83 "ladder_phi_21"
OpName %84 "frontier_phi_21.pred"
OpName %85 "frontier_phi_10.pred"
OpName %86 "frontier_phi_20.pred"
OpName %87 "frontier_phi_8.pred"
OpName %88 "frontier_phi_21.pred"
OpName %89 "frontier_phi_14"
OpName %90 "frontier_phi_3.2.ladder"
OpName %91 "frontier_phi_20.pred"
OpName %92 "frontier_phi_12"
OpName %93 "frontier_phi_3.1.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeBool
%25 = OpConstant %5 100
%33 = OpTypeVector %5 4
%63 = OpConstant %5 32
%81 = OpConstantFalse %23
%82 = OpConstantTrue %23
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %94
%94 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%24 = OpULessThan %23 %16 %25
OpSelectionMerge %125 None
OpBranchConditional %24 %110 %95
%110 = OpLabel
%26 = OpIEqual %23 %16 %15
OpSelectionMerge %124 None
OpBranchConditional %26 %124 %111
%111 = OpLabel
OpBranch %112
%112 = OpLabel
%35 = OpPhi %5 %15 %111 %36 %122
%37 = OpPhi %5 %15 %111 %31 %122
%38 = OpIEqual %23 %19 %15
OpLoopMerge %123 %122 None
OpBranch %113
%113 = OpLabel
OpSelectionMerge %121 None
OpBranchConditional %38 %121 %114
%114 = OpLabel
OpBranch %115
%115 = OpLabel
%47 = OpPhi %5 %37 %114 %43 %117
%48 = OpPhi %5 %15 %114 %49 %117
%50 = OpBitwiseXor %5 %48 %35
%51 = OpShiftLeftLogical %5 %50 %21
%52 = OpImageRead %33 %9 %50
%53 = OpCompositeExtract %5 %52 0
%54 = OpIEqual %23 %53 %25
OpLoopMerge %119 %117 None
OpBranch %128
%128 = OpLabel
OpSelectionMerge %129 None
OpBranchConditional %54 %118 %116
%118 = OpLabel
%64 = OpImageRead %33 %9 %63
%65 = OpCompositeExtract %5 %64 0
%29 = OpIAdd %5 %65 %47
OpBranch %119
%116 = OpLabel
OpBranch %117
%129 = OpLabel
OpUnreachable
%117 = OpLabel
%66 = OpShiftLeftLogical %5 %35 %21
%67 = OpIMul %5 %66 %48
%68 = OpShiftRightLogical %5 %67 %21
%69 = OpImageRead %33 %9 %68
%70 = OpCompositeExtract %5 %69 0
%43 = OpIAdd %5 %70 %47
%49 = OpIAdd %5 %48 %18
%71 = OpULessThan %23 %49 %19
OpBranchConditional %71 %115 %119
%119 = OpLabel
%80 = OpPhi %23 %81 %117 %82 %118
%86 = OpPhi %5 %43 %117 %37 %118
%91 = OpPhi %5 %15 %117 %29 %118
OpSelectionMerge %120 None
OpBranchConditional %80 %123 %120
%120 = OpLabel
OpBranch %121
%121 = OpLabel
%87 = OpPhi %5 %37 %113 %86 %120
%31 = OpCopyObject %5 %87
OpBranch %122
%122 = OpLabel
%36 = OpIAdd %5 %35 %18
%44 = OpULessThan %23 %36 %16
OpBranchConditional %44 %112 %123
%123 = OpLabel
%92 = OpPhi %5 %31 %122 %91 %119
OpBranch %124
%124 = OpLabel
%93 = OpPhi %5 %15 %110 %92 %123
OpBranch %125
%95 = OpLabel
%27 = OpIEqual %23 %22 %15
OpSelectionMerge %109 None
OpBranchConditional %27 %109 %96
%96 = OpLabel
OpBranch %97
%97 = OpLabel
%39 = OpPhi %5 %15 %96 %40 %107
%41 = OpPhi %5 %15 %96 %32 %107
%42 = OpIEqual %23 %19 %15
OpLoopMerge %108 %107 None
OpBranch %98
%98 = OpLabel
OpSelectionMerge %106 None
OpBranchConditional %42 %106 %99
%99 = OpLabel
OpBranch %100
%100 = OpLabel
%55 = OpPhi %5 %15 %99 %56 %102
%57 = OpPhi %5 %41 %99 %45 %102
%58 = OpBitwiseXor %5 %55 %39
%59 = OpShiftLeftLogical %5 %58 %21
%60 = OpImageRead %33 %9 %58
%61 = OpCompositeExtract %5 %60 0
%62 = OpIEqual %23 %61 %25
OpLoopMerge %104 %102 None
OpBranch %126
%126 = OpLabel
OpSelectionMerge %127 None
OpBranchConditional %62 %103 %101
%103 = OpLabel
%72 = OpImageRead %33 %9 %63
%73 = OpCompositeExtract %5 %72 0
%30 = OpIAdd %5 %73 %57
OpBranch %104
%101 = OpLabel
OpBranch %102
%127 = OpLabel
OpUnreachable
%102 = OpLabel
%74 = OpShiftLeftLogical %5 %39 %21
%75 = OpIMul %5 %74 %55
%76 = OpShiftRightLogical %5 %75 %21
%77 = OpImageRead %33 %9 %76
%78 = OpCompositeExtract %5 %77 0
%45 = OpIAdd %5 %78 %57
%56 = OpIAdd %5 %55 %18
%79 = OpULessThan %23 %56 %19
OpBranchConditional %79 %100 %104
%104 = OpLabel
%83 = OpPhi %23 %81 %102 %82 %103
%84 = OpPhi %5 %45 %102 %41 %103
%88 = OpPhi %5 %15 %102 %30 %103
OpSelectionMerge %105 None
OpBranchConditional %83 %108 %105
%105 = OpLabel
OpBranch %106
%106 = OpLabel
%85 = OpPhi %5 %41 %98 %84 %105
%32 = OpCopyObject %5 %85
OpBranch %107
%107 = OpLabel
%40 = OpIAdd %5 %39 %18
%46 = OpULessThan %23 %40 %22
OpBranchConditional %46 %97 %108
%108 = OpLabel
%89 = OpPhi %5 %32 %107 %88 %104
OpBranch %109
%109 = OpLabel
%90 = OpPhi %5 %15 %95 %89 %108
OpBranch %125
%125 = OpLabel
%28 = OpPhi %5 %93 %124 %90 %109
%34 = OpCompositeConstruct %33 %28 %28 %28 %28
OpImageWrite %9 %15 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/loop.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _20;
    if (gl_GlobalInvocationID.z == 0u)
    {
        _20 = 0u;
    }
    else
    {
        uint _24;
        uint _25;
        _24 = 0u;
        _25 = 0u;
        uint _21;
        for (;;)
        {
            _21 = imageLoad(_8, int(_25)).x + _24;
            uint _26 = _25 + 1u;
            if (_26 == gl_GlobalInvocationID.z)
            {
                break;
            }
            else
            {
                _24 = _21;
                _25 = _26;
            }
        }
        _20 = _21;
    }
    imageStore(_8, int(0u), uvec4(_20));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 2
%17 = OpTypeBool
%19 = OpConstant %5 0
%22 = OpTypeVector %5 4
%30 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpIEqual %17 %16 %19
OpSelectionMerge %36 None
OpBranchConditional %18 %36 %33
%33 = OpLabel
OpBranch %34
%34 = OpLabel
%24 = OpPhi %5 %19 %33 %21 %34
%25 = OpPhi %5 %19 %33 %26 %34
%27 = OpShiftLeftLogical %5 %25 %15
%28 = OpImageRead %22 %9 %25
%29 = OpCompositeExtract %5 %28 0
%21 = OpIAdd %5 %29 %24
%26 = OpIAdd %5 %25 %30
%31 = OpIEqual %17 %26 %16
OpLoopMerge %35 %34 None
OpBranchConditional %31 %35 %34
%35 = OpLabel
OpBranch %36
%36 = OpLabel
%20 = OpPhi %5 %19 %32 %21 %35
%23 = OpCompositeConstruct %22 %20 %20 %20 %20
OpImageWrite %9 %19 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/nested-loop-break-2.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _23;
    if (gl_GlobalInvocationID.y == 0u)
    {
        _23 = 0u;
    }
    else
    {
        uint _27;
        _27 = 0u;
        uint _24;
        uint _29 = 0u;
        bool _30;
        for (;;)
        {
            _30 = gl_GlobalInvocationID.z == 0u;
            uint frontier_phi_4_pred;
            if (_30)
            {
                frontier_phi_4_pred = _29;
            }
            else
            {
                uint frontier_phi_10;
                uint _34 = _29;
                uint _35 = 0u;
                bool _43;
                for (;;)
                {
                    _43 = imageLoad(_8, int(_35 * 32u)).x == 10u;
                    if (_43)
                    {
                        frontier_phi_10 = imageLoad(_8, int(2u)).x + _34;
                        break;
                    }
                    else
                    {
                        uint _47 = _35 ^ _27;
                        uint _32 = imageLoad(_8, int(_47)).x + _34;
                        uint _36 = _35 + 1u;
                        if (_36 < gl_GlobalInvocationID.z)
                        {
                            _34 = _32;
                            _35 = _36;
                            continue;
                        }
                        else
                        {
                            frontier_phi_10 = _32;
                            break;
                        }
                    }
                }
                frontier_phi_4_pred = frontier_phi_10;
            }
            _24 = frontier_phi_4_pred;
            uint _28 = _27 + 1u;
            if (_28 == gl_GlobalInvocationID.y)
            {
                break;
            }
            else
            {
                _27 = _28;
                _29 = _24;
                continue;
            }
        }
        _23 = _24;
    }
    imageStore(_8, int(0u), uvec4(_23));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %52 "frontier_phi_10"
OpName %53 "frontier_phi_4.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 1
%18 = OpConstant %5 2
%20 = OpTypeBool
%22 = OpConstant %5 0
%25 = OpTypeVector %5 4
%38 = OpConstant %5 7
%40 = OpConstant %5 32
%44 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%21 = OpIEqual %20 %16 %22
OpSelectionMerge %67 None
OpBranchConditional %21 %67 %55
%55 = OpLabel
OpBranch %56
%56 = OpLabel
%27 = OpPhi %5 %22 %55 %28 %65
%29 = OpPhi %5 %22 %55 %24 %65
%30 = OpIEqual %20 %19 %22
OpLoopMerge %66 %65 None
OpBranch %57
%57 = OpLabel
OpSelectionMerge %64 None
OpBranchConditional %30 %64 %58
%58 = OpLabel
OpBranch %59
%59 = OpLabel
%34 = OpPhi %5 %29 %58 %32 %61
%35 = OpPhi %5 %22 %58 %36 %61
%37 = OpShiftLeftLogical %5 %35 %38
%39 = OpIMul %5 %35 %40
%41 = OpImageRead %25 %9 %39
%42 = OpCompositeExtract %5 %41 0
%43 = OpIEqual %20 %42 %44
OpLoopMerge %63 %61 None
OpBranch %68
%68 = OpLabel
OpSelectionMerge %69 None
OpBranchConditional %43 %62 %60
%62 = OpLabel
%45 = OpImageRead %25 %9 %18
%46 = OpCompositeExtract %5 %45 0
%31 = OpIAdd %5 %46 %34
OpBranch %63
%60 = OpLabel
OpBranch %61
%69 = OpLabel
OpUnreachable
%61 = OpLabel
%47 = OpBitwiseXor %5 %35 %27
%48 = OpShiftLeftLogical %5 %47 %18
%49 = OpImageRead %25 %9 %47
%50 = OpCompositeExtract %5 %49 0
%32 = OpIAdd %5 %50 %34
%36 = OpIAdd %5 %35 %15
%51 = OpULessThan %20 %36 %19
OpBranchConditional %51 %59 %63
%63 = OpLabel
%52 = OpPhi %5 %32 %61 %31 %62
OpBranch %64
%64 = OpLabel
%53 = OpPhi %5 %29 %57 %52 %63
%24 = OpCopyObject %5 %53
OpBranch %65
%65 = OpLabel
%28 = OpIAdd %5 %27 %15
%33 = OpIEqual %20 %28 %16
OpBranchConditional %33 %66 %56
%66 = OpLabel
OpBranch %67
%67 = OpLabel
%23 = OpPhi %5 %22 %54 %24 %66
%26 = OpCompositeConstruct %25 %23 %23 %23 %23
OpImageWrite %9 %22 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/nested-loop-break.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _25;
    if (gl_GlobalInvocationID.x == 0u)
    {
        _25 = 0u;
    }
    else
    {
        uint _29;
        _29 = 0u;
        uint _26;
        uint _31 = 0u;
        bool _32;
        for (;;)
        {
            _32 = gl_GlobalInvocationID.y == 0u;
            uint frontier_phi_4_pred;
            if (_32)
            {
                frontier_phi_4_pred = _31;
            }
            else
            {
                uint _36;
                _36 = 0u;
                uint _34;
                uint frontier_phi_12;
                uint _38 = _31;
                bool _45;
                for (;;)
                {
                    _45 = imageLoad(_8, int(_36 * 32u)).x == 10u;
                    if (_45)
                    {
                        frontier_phi_12 = imageLoad(_8, int(1u)).x + _38;
                        break;
                    }
                    else
                    {
                        uint frontier_phi_10_pred;
                        if (gl_GlobalInvocationID.z == 0u)
                        {
                            frontier_phi_10_pred = _38;
                        }
                        else
                        {
                            uint frontier_phi_16;
                            uint _53 = _38;
                            uint _54 = 0u;
                            bool _60;
                            for (;;)
                            {
                                _60 = imageLoad(_8, int(_54 * 32u)).x == 10u;
                                if (_60)
                                {
                                    frontier_phi_16 = imageLoad(_8, int(2u)).x + _53;
                                    break;
                                }
                                else
                                {
                                    uint _64 = (_36 ^ _29) ^ _54;
                                    uint _51 = imageLoad(_8, int(_64)).x + _53;
                                    uint _55 = _54 + 1u;
                                    if (_55 < gl_GlobalInvocationID.z)
                                    {
                                        _53 = _51;
                                        _54 = _55;
                                        continue;
                                    }
                                    else
                                    {
                                        frontier_phi_16 = _51;
                                        break;
                                    }
                                }
                            }
                            frontier_phi_10_pred = frontier_phi_16;
                        }
                        _34 = frontier_phi_10_pred;
                        uint _37 = _36 + 1u;
                        if (_37 < gl_GlobalInvocationID.y)
                        {
                            _36 = _37;
                            _38 = _34;
                            continue;
                        }
                        else
                        {
                            frontier_phi_12 = _34;
                            break;
                        }
                    }
                }
                frontier_phi_4_pred = frontier_phi_12;
            }
            _26 = frontier_phi_4_pred;
            uint _30 = _29 + 1u;
            if (_30 == gl_GlobalInvocationID.x)
            {
                break;
            }
            else
            {
                _29 = _30;
                _31 = _26;
                continue;
            }
        }
        _25 = _26;
    }
    imageStore(_8, int(0u), uvec4(_25));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %69 "frontier_phi_16"
OpName %70 "frontier_phi_10.pred"
OpName %71 "frontier_phi_12"
OpName %72 "frontier_phi_4.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeBool
%27 = OpTypeVector %5 4
%40 = OpConstant %5 7
%42 = OpConstant %5 32
%46 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%24 = OpIEqual %23 %16 %15
OpSelectionMerge %93 None
OpBranchConditional %24 %93 %74
%74 = OpLabel
OpBranch %75
%75 = OpLabel
%29 = OpPhi %5 %15 %74 %30 %91
%31 = OpPhi %5 %15 %74 %26 %91
%32 = OpIEqual %23 %19 %15
OpLoopMerge %92 %91 None
OpBranch %76
%76 = OpLabel
OpSelectionMerge %90 None
OpBranchConditional %32 %90 %77
%77 = OpLabel
OpBranch %78
%78 = OpLabel
%36 = OpPhi %5 %15 %77 %37 %87
%38 = OpPhi %5 %31 %77 %34 %87
%39 = OpShiftLeftLogical %5 %36 %40
%41 = OpIMul %5 %36 %42
%43 = OpImageRead %27 %9 %41
%44 = OpCompositeExtract %5 %43 0
%45 = OpIEqual %23 %44 %46
OpLoopMerge %89 %87 None
OpBranch %94
%94 = OpLabel
OpSelectionMerge %95 None
OpBranchConditional %45 %88 %79
%88 = OpLabel
%47 = OpImageRead %27 %9 %18
%48 = OpCompositeExtract %5 %47 0
%33 = OpIAdd %5 %48 %38
OpBranch %89
%79 = OpLabel
%49 = OpIEqual %23 %22 %15
OpSelectionMerge %86 None
OpBranchConditional %49 %86 %80
%80 = OpLabel
OpBranch %81
%81 = OpLabel
%53 = OpPhi %5 %38 %80 %51 %83
%54 = OpPhi %5 %15 %80 %55 %83
%56 = OpShiftLeftLogical %5 %54 %40
%57 = OpIMul %5 %54 %42
%58 = OpImageRead %27 %9 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpIEqual %23 %59 %46
OpLoopMerge %85 %83 None
OpBranch %96
%96 = OpLabel
OpSelectionMerge %97 None
OpBranchConditional %60 %84 %82
%84 = OpLabel
%61 = OpImageRead %27 %9 %21
%62 = OpCompositeExtract %5 %61 0
%50 = OpIAdd %5 %62 %53
OpBranch %85
%82 = OpLabel
OpBranch %83
%97 = OpLabel
OpUnreachable
%83 = OpLabel
%63 = OpBitwiseXor %5 %36 %29
%64 = OpBitwiseXor %5 %63 %54
%65 = OpShiftLeftLogical %5 %64 %21
%66 = OpImageRead %27 %9 %64
%67 = OpCompositeExtract %5 %66 0
%51 = OpIAdd %5 %67 %53
%55 = OpIAdd %5 %54 %18
%68 = OpULessThan %23 %55 %22
OpBranchConditional %68 %81 %85
%85 = OpLabel
%69 = OpPhi %5 %51 %83 %50 %84
OpBranch %86
%86 = OpLabel
%70 = OpPhi %5 %38 %79 %69 %85
%34 = OpCopyObject %5 %70
OpBranch %87
%95 = OpLabel
OpUnreachable
%87 = OpLabel
%37 = OpIAdd %5 %36 %18
%52 = OpULessThan %23 %37 %19
OpBranchConditional %52 %78 %89
%89 = OpLabel
%71 = OpPhi %5 %34 %87 %33 %88
OpBranch %90
%90 = OpLabel
%72 = OpPhi %5 %31 %76 %71 %89
%26 = OpCopyObject %5 %72
OpBranch %91
%91 = OpLabel
%30 = OpIAdd %5 %29 %18
%35 = OpIEqual %23 %30 %16
OpBranchConditional %35 %92 %75
%92 = OpLabel
OpBranch %93
%93 = OpLabel
%25 = OpPhi %5 %15 %73 %26 %92
%28 = OpCompositeConstruct %27 %25 %25 %25 %25
OpImageWrite %9 %15 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/nested-loop.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _25;
    if (gl_GlobalInvocationID.x == 0u)
    {
        _25 = 0u;
    }
    else
    {
        uint _26;
        uint _29 = 0u;
        uint _31 = 0u;
        bool _32;
        for (;;)
        {
            _32 = gl_GlobalInvocationID.y == 0u;
            uint frontier_phi_4_pred;
            if (_32)
            {
                frontier_phi_4_pred = _31;
            }
            else
            {
                uint _33;
                uint _35 = 0u;
                uint _37 = _31;
                bool _38;
                for (;;)
                {
                    _38 = gl_GlobalInvocationID.z == 0u;
                    uint frontier_phi_8_pred;
                    if (_38)
                    {
                        frontier_phi_8_pred = _37;
                    }
                    else
                    {
                        uint _41;
                        uint _42;
                        _41 = _37;
                        _42 = 0u;
                        uint _39;
                        for (;;)
                        {
                            uint _45 = (_35 ^ _29) ^ _42;
                            _39 = imageLoad(_8, int(_45)).x + _41;
                            uint _43 = _42 + 1u;
                            if (_43 == gl_GlobalInvocationID.z)
                            {
                                break;
                            }
                            else
                            {
                                _41 = _39;
                                _42 = _43;
                            }
                        }
                        frontier_phi_8_pred = _39;
                    }
                    _33 = frontier_phi_8_pred;
                    uint _36 = _35 + 1u;
                    if (_36 == gl_GlobalInvocationID.y)
                    {
                        break;
                    }
                    else
                    {
                        _35 = _36;
                        _37 = _33;
                        continue;
                    }
                }
                frontier_phi_4_pred = _33;
            }
            _26 = frontier_phi_4_pred;
            uint _30 = _29 + 1u;
            if (_30 == gl_GlobalInvocationID.x)
            {
                break;
            }
            else
            {
                _29 = _30;
                _31 = _26;
                continue;
            }
        }
        _25 = _26;
    }
    imageStore(_8, int(0u), uvec4(_25));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 70
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %50 "frontier_phi_8.pred"
OpName %51 "frontier_phi_4.pred"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeBool
%27 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %52
%52 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%24 = OpIEqual %23 %16 %15
OpSelectionMerge %68 None
OpBranchConditional %24 %68 %53
%53 = OpLabel
OpBranch %54
%54 = OpLabel
%29 = OpPhi %5 %15 %53 %30 %66
%31 = OpPhi %5 %15 %53 %26 %66
%32 = OpIEqual %23 %19 %15
OpLoopMerge %67 %66 None
OpBranch %55
%55 = OpLabel
OpSelectionMerge %65 None
OpBranchConditional %32 %65 %56
%56 = OpLabel
OpBranch %57
%57 = OpLabel
%35 = OpPhi %5 %15 %56 %36 %63
%37 = OpPhi %5 %31 %56 %33 %63
%38 = OpIEqual %23 %22 %15
OpLoopMerge %64 %63 None
OpBranch %58
%58 = OpLabel
OpSelectionMerge %62 None
OpBranchConditional %38 %62 %59
%59 = OpLabel
OpBranch %60
%60 = OpLabel
%41 = OpPhi %5 %37 %59 %39 %60
%42 = OpPhi %5 %15 %59 %43 %60
%44 = OpBitwiseXor %5 %35 %29
%45 = OpBitwiseXor %5 %44 %42
%46 = OpShiftLeftLogical %5 %45 %21
%47 = OpImageRead %27 %9 %45
%48 = OpCompositeExtract %5 %47 0
%39 = OpIAdd %5 %48 %41
%43 = OpIAdd %5 %42 %18
%49 = OpIEqual %23 %43 %22
OpLoopMerge %61 %60 None
OpBranchConditional %49 %61 %60
%61 = OpLabel
OpBranch %62
%62 = OpLabel
%50 = OpPhi %5 %37 %58 %39 %61
%33 = OpCopyObject %5 %50
OpBranch %63
%63 = OpLabel
%36 = OpIAdd %5 %35 %18
%40 = OpIEqual %23 %36 %19
OpBranchConditional %40 %64 %57
%64 = OpLabel
OpBranch %65
%65 = OpLabel
%51 = OpPhi %5 %31 %55 %33 %64
%26 = OpCopyObject %5 %51
OpBranch %66
%66 = OpLabel
%30 = OpIAdd %5 %29 %18
%34 = OpIEqual %23 %30 %16
OpBranchConditional %34 %67 %54
%67 = OpLabel
OpBranch %68
%68 = OpLabel
%25 = OpPhi %5 %15 %52 %26 %67
%28 = OpCompositeConstruct %27 %25 %25 %25 %25
OpImageWrite %9 %15 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/selection-merge-split-post-domination.frag
================================================
#version 460

layout(location = 0) in vec4 V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _25;
    if (V.x > 0.0)
    {
        _25 = 20.0;
    }
    else
    {
        _25 = 10.0;
    }
    float _31;
    float _33;
    if (V.y > 0.0)
    {
        float _32;
        if (V.x > 1.0)
        {
            _32 = 10.0;
        }
        else
        {
            _32 = 20.0;
        }
        float frontier_phi_4_6_ladder;
        float frontier_phi_4_6_ladder_1;
        if (V.z > 0.0)
        {
            frontier_phi_4_6_ladder = _32;
            frontier_phi_4_6_ladder_1 = 30.0;
        }
        else
        {
            frontier_phi_4_6_ladder = _32;
            frontier_phi_4_6_ladder_1 = 10.0;
        }
        _31 = frontier_phi_4_6_ladder;
        _33 = frontier_phi_4_6_ladder_1;
    }
    else
    {
        _31 = 0.0;
        _33 = 10.0;
    }
    SV_Target.x = _25;
    SV_Target.y = _31 + 10.0;
    SV_Target.z = 10.0;
    SV_Target.w = _33;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "V"
OpName %10 "SV_Target"
OpName %43 "frontier_phi_4.6.ladder"
OpName %44 "frontier_phi_4.6.ladder"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%22 = OpTypeBool
%24 = OpConstant %5 0
%26 = OpConstant %5 20
%27 = OpConstant %5 10
%30 = OpConstant %5 1
%34 = OpConstant %5 30
%36 = OpTypePointer Output %5
%41 = OpConstant %13 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%23 = OpFOrdGreaterThan %22 %15 %24
OpSelectionMerge %47 None
OpBranchConditional %23 %46 %47
%46 = OpLabel
OpBranch %47
%47 = OpLabel
%25 = OpPhi %5 %27 %45 %26 %46
%28 = OpFOrdGreaterThan %22 %18 %24
OpSelectionMerge %53 None
OpBranchConditional %28 %48 %53
%48 = OpLabel
%29 = OpFOrdGreaterThan %22 %15 %30
OpSelectionMerge %50 None
OpBranchConditional %29 %49 %50
%49 = OpLabel
OpBranch %50
%50 = OpLabel
%32 = OpPhi %5 %26 %48 %27 %49
%42 = OpFOrdGreaterThan %22 %21 %24
OpSelectionMerge %52 None
OpBranchConditional %42 %51 %52
%51 = OpLabel
OpBranch %52
%52 = OpLabel
%43 = OpPhi %5 %32 %51 %32 %50
%44 = OpPhi %5 %34 %51 %27 %50
OpBranch %53
%53 = OpLabel
%31 = OpPhi %5 %24 %47 %43 %52
%33 = OpPhi %5 %27 %47 %44 %52
%35 = OpFAdd %5 %31 %27
%37 = OpAccessChain %36 %10 %14
OpStore %37 %25
%38 = OpAccessChain %36 %10 %17
OpStore %38 %35
%39 = OpAccessChain %36 %10 %20
OpStore %39 %27
%40 = OpAccessChain %36 %10 %41
OpStore %40 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/switch-continue.frag
================================================
#version 460

layout(location = 0) flat in ivec4 I;
layout(location = 0) out float SV_Target;

void main()
{
    uint _17 = uint(I.x);
    uint _29 = uint(I.w);
    uint _31;
    uint _32;
    _31 = uint(I.y);
    _32 = uint(I.z);
    uint _34;
    uint _30 = _17;
    uint _33 = _29;
    for (;;)
    {
        bool _41_ladder_break = false;
        switch (_30)
        {
            case 5u:
            {
                _34 = 6u;
                _30 = _31;
                _31 = _32;
                _32 = _33;
                _33 = _34;
                continue;
            }
            case 7u:
            {
                _34 = 17u;
                _30 = _31;
                _31 = _32;
                _32 = _33;
                _33 = _34;
                continue;
            }
            default:
            {
                _41_ladder_break = true;
                break;
            }
        }
        if (_41_ladder_break)
        {
            break;
        }
    }
    SV_Target = float(int(_32 + _31));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "I"
OpName %11 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeFloat 32
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%19 = OpConstant %14 1
%23 = OpConstant %14 2
%27 = OpConstant %14 3
%37 = OpConstant %14 17
%38 = OpConstant %14 6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpBitcast %14 %16
%18 = OpAccessChain %12 %8 %19
%20 = OpLoad %5 %18
%21 = OpBitcast %14 %20
%22 = OpAccessChain %12 %8 %23
%24 = OpLoad %5 %22
%25 = OpBitcast %14 %24
%26 = OpAccessChain %12 %8 %27
%28 = OpLoad %5 %26
%29 = OpBitcast %14 %28
OpBranch %40
%40 = OpLabel
%30 = OpPhi %14 %17 %39 %31 %44
%31 = OpPhi %14 %21 %39 %32 %44
%32 = OpPhi %14 %25 %39 %33 %44
%33 = OpPhi %14 %29 %39 %34 %44
OpLoopMerge %46 %44 None
OpBranch %41
%41 = OpLabel
OpSelectionMerge %47 None
OpSwitch %30 %45 5 %43 7 %42
%45 = OpLabel
OpBranch %46
%43 = OpLabel
OpBranch %44
%42 = OpLabel
OpBranch %44
%47 = OpLabel
OpUnreachable
%44 = OpLabel
%34 = OpPhi %14 %37 %42 %38 %43
OpBranch %40
%46 = OpLabel
%35 = OpIAdd %14 %32 %31
%36 = OpConvertSToF %9 %35
OpStore %11 %36
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/switch-merge-into-other-merge.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _22;
    if (gl_GlobalInvocationID.y == 0u)
    {
        _22 = 0u;
    }
    else
    {
        uint _23;
        if (imageLoad(_8, int(100u)).x > 20u)
        {
            imageStore(_8, int(50u), uvec4(imageLoad(_8, int(50u)).x + 320u));
            _23 = 4u;
        }
        else
        {
            _23 = 0u;
        }
        uint frontier_phi_1_switch_merge;
        switch (gl_GlobalInvocationID.x)
        {
            case 0u:
            {
                imageStore(_8, int(0u), uvec4(imageLoad(_8, int(0u)).x + _23));
                frontier_phi_1_switch_merge = _23;
                break;
            }
            case 1u:
            {
                imageStore(_8, int(40u), uvec4(imageLoad(_8, int(40u)).x + (_23 << 1u)));
                frontier_phi_1_switch_merge = _23;
                break;
            }
            case 5u:
            {
                imageStore(_8, int(60u), uvec4(imageLoad(_8, int(60u)).x + (_23 * 3u)));
                frontier_phi_1_switch_merge = _23;
                break;
            }
            default:
            {
                imageStore(_8, int(100u), uvec4(imageLoad(_8, int(100u)).x + (_23 << 2u)));
                frontier_phi_1_switch_merge = _23;
                break;
            }
        }
        _22 = frontier_phi_1_switch_merge;
    }
    imageStore(_8, int(1u), uvec4(_22));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %61 "frontier_phi_1.switch-merge"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%20 = OpTypeBool
%24 = OpTypeVector %5 4
%26 = OpConstant %5 100
%30 = OpConstant %5 20
%31 = OpConstant %5 50
%35 = OpConstant %5 320
%37 = OpConstant %5 4
%39 = OpConstant %5 2
%49 = OpConstant %5 40
%55 = OpConstant %5 3
%56 = OpConstant %5 60
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%21 = OpIEqual %20 %19 %15
OpSelectionMerge %71 None
OpBranchConditional %21 %71 %63
%63 = OpLabel
%27 = OpImageRead %24 %9 %26
%28 = OpCompositeExtract %5 %27 0
%29 = OpUGreaterThan %20 %28 %30
OpSelectionMerge %65 None
OpBranchConditional %29 %64 %65
%64 = OpLabel
%32 = OpImageRead %24 %9 %31
%33 = OpCompositeExtract %5 %32 0
%34 = OpIAdd %5 %33 %35
%36 = OpCompositeConstruct %24 %34 %34 %34 %34
OpImageWrite %9 %31 %36
OpBranch %65
%65 = OpLabel
%23 = OpPhi %5 %15 %63 %37 %64
OpSelectionMerge %70 None
OpSwitch %16 %69 0 %68 1 %67 5 %66
%69 = OpLabel
%38 = OpShiftLeftLogical %5 %23 %39
%40 = OpImageRead %24 %9 %26
%41 = OpCompositeExtract %5 %40 0
%42 = OpIAdd %5 %41 %38
%43 = OpCompositeConstruct %24 %42 %42 %42 %42
OpImageWrite %9 %26 %43
OpBranch %70
%68 = OpLabel
%44 = OpImageRead %24 %9 %15
%45 = OpCompositeExtract %5 %44 0
%46 = OpIAdd %5 %45 %23
%47 = OpCompositeConstruct %24 %46 %46 %46 %46
OpImageWrite %9 %15 %47
OpBranch %70
%67 = OpLabel
%48 = OpShiftLeftLogical %5 %23 %18
%50 = OpImageRead %24 %9 %49
%51 = OpCompositeExtract %5 %50 0
%52 = OpIAdd %5 %51 %48
%53 = OpCompositeConstruct %24 %52 %52 %52 %52
OpImageWrite %9 %49 %53
OpBranch %70
%66 = OpLabel
%54 = OpIMul %5 %23 %55
%57 = OpImageRead %24 %9 %56
%58 = OpCompositeExtract %5 %57 0
%59 = OpIAdd %5 %58 %54
%60 = OpCompositeConstruct %24 %59 %59 %59 %59
OpImageWrite %9 %56 %60
OpBranch %70
%70 = OpLabel
%61 = OpPhi %5 %23 %69 %23 %68 %23 %67 %23 %66
OpBranch %71
%71 = OpLabel
%22 = OpPhi %5 %15 %62 %61 %70
%25 = OpCompositeConstruct %24 %22 %22 %22 %22
OpImageWrite %9 %18 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/switch-shared-header-with-loop.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    if (!(gl_GlobalInvocationID.y == 0u))
    {
        uint _24 = 0u;
        for (;;)
        {
            switch (gl_GlobalInvocationID.x)
            {
                case 0u:
                {
                    imageStore(_8, int(0u), uvec4(imageLoad(_8, int(0u)).x));
                    break;
                }
                case 1u:
                {
                    imageStore(_8, int(40u), uvec4(imageLoad(_8, int(40u)).x));
                    break;
                }
                case 5u:
                {
                    imageStore(_8, int(60u), uvec4(imageLoad(_8, int(60u)).x));
                    break;
                }
                default:
                {
                    imageStore(_8, int(100u), uvec4(imageLoad(_8, int(100u)).x));
                    break;
                }
            }
            uint _25 = _24 + 1u;
            if (_25 == gl_GlobalInvocationID.y)
            {
                break;
            }
            else
            {
                _24 = _25;
                continue;
            }
        }
    }
    imageStore(_8, int(1u), uvec4(0u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%20 = OpTypeBool
%22 = OpTypeVector %5 4
%26 = OpConstant %5 100
%33 = OpConstant %5 40
%37 = OpConstant %5 60
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%21 = OpIEqual %20 %19 %15
OpSelectionMerge %53 None
OpBranchConditional %21 %53 %43
%43 = OpLabel
OpBranch %44
%44 = OpLabel
%24 = OpPhi %5 %15 %43 %25 %51
OpLoopMerge %52 %51 None
OpBranch %45
%45 = OpLabel
OpSelectionMerge %50 None
OpSwitch %16 %49 0 %48 1 %47 5 %46
%49 = OpLabel
%27 = OpImageRead %22 %9 %26
%28 = OpCompositeExtract %5 %27 0
%29 = OpCompositeConstruct %22 %28 %28 %28 %28
OpImageWrite %9 %26 %29
OpBranch %50
%48 = OpLabel
%30 = OpImageRead %22 %9 %15
%31 = OpCompositeExtract %5 %30 0
%32 = OpCompositeConstruct %22 %31 %31 %31 %31
OpImageWrite %9 %15 %32
OpBranch %50
%47 = OpLabel
%34 = OpImageRead %22 %9 %33
%35 = OpCompositeExtract %5 %34 0
%36 = OpCompositeConstruct %22 %35 %35 %35 %35
OpImageWrite %9 %33 %36
OpBranch %50
%46 = OpLabel
%38 = OpImageRead %22 %9 %37
%39 = OpCompositeExtract %5 %38 0
%40 = OpCompositeConstruct %22 %39 %39 %39 %39
OpImageWrite %9 %37 %40
OpBranch %50
%50 = OpLabel
OpBranch %51
%51 = OpLabel
%25 = OpIAdd %5 %24 %18
%41 = OpIEqual %20 %25 %19
OpBranchConditional %41 %52 %44
%52 = OpLabel
OpBranch %53
%53 = OpLabel
%23 = OpCompositeConstruct %22 %15 %15 %15 %15
OpImageWrite %9 %18 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/control-flow/wave-size-dependent-loop-unroll.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared float _13[256];

void main()
{
    uint _24 = 256u / gl_SubgroupSize;
    bool _26 = _24 == 0u;
    bool _27;
    if (_26)
    {
        _27 = true;
    }
    else
    {
        _13[0u] = 0.0;
        _13[1u] = 1.0;
        _13[2u] = 2.0;
        _13[3u] = 3.0;
        _13[4u] = 4.0;
        _13[5u] = 5.0;
        _13[6u] = 6.0;
        _13[7u] = 7.0;
        _13[8u] = 8.0;
        _13[9u] = 9.0;
        _13[10u] = 10.0;
        _13[11u] = 11.0;
        _13[12u] = 12.0;
        _13[13u] = 13.0;
        _13[14u] = 14.0;
        _13[15u] = 15.0;
        bool frontier_phi_1_9_ladder;
        if (_24 > 4u)
        {
            _13[16u] = 16.0;
            _13[17u] = 17.0;
            _13[18u] = 18.0;
            _13[19u] = 19.0;
            bool frontier_phi_1_9_ladder_10_ladder;
            if (_24 > 5u)
            {
                _13[20u] = 20.0;
                _13[21u] = 21.0;
                _13[22u] = 22.0;
                _13[23u] = 23.0;
                bool frontier_phi_1_9_ladder_10_ladder_11_ladder;
                if (_24 > 6u)
                {
                    _13[24u] = 24.0;
                    _13[25u] = 25.0;
                    _13[26u] = 26.0;
                    _13[27u] = 27.0;
                    bool frontier_phi_1_9_ladder_10_ladder_11_ladder_12_ladder;
                    if (_24 > 7u)
                    {
                        _13[28u] = 28.0;
                        _13[29u] = 29.0;
                        _13[30u] = 30.0;
                        _13[31u] = 31.0;
                        frontier_phi_1_9_ladder_10_ladder_11_ladder_12_ladder = _26;
                    }
                    else
                    {
                        frontier_phi_1_9_ladder_10_ladder_11_ladder_12_ladder = _26;
                    }
                    frontier_phi_1_9_ladder_10_ladder_11_ladder = frontier_phi_1_9_ladder_10_ladder_11_ladder_12_ladder;
                }
                else
                {
                    frontier_phi_1_9_ladder_10_ladder_11_ladder = _26;
                }
                frontier_phi_1_9_ladder_10_ladder = frontier_phi_1_9_ladder_10_ladder_11_ladder;
            }
            else
            {
                frontier_phi_1_9_ladder_10_ladder = _26;
            }
            frontier_phi_1_9_ladder = frontier_phi_1_9_ladder_10_ladder;
        }
        else
        {
            frontier_phi_1_9_ladder = _26;
        }
        _27 = frontier_phi_1_9_ladder;
    }
    barrier();
    if (!_27)
    {
        uint _56;
        _56 = 0u;
        for (;;)
        {
            uint _74 = gl_GlobalInvocationID.x * 4u;
            imageStore(_8, int(_74), uvec4(floatBitsToUint(_13[0u + (_56 * 4u)])));
            imageStore(_8, int(_74 + 1u), uvec4(floatBitsToUint(_13[1u + (_56 * 4u)])));
            imageStore(_8, int(_74 + 2u), uvec4(floatBitsToUint(_13[2u + (_56 * 4u)])));
            imageStore(_8, int(_74 + 3u), uvec4(floatBitsToUint(_13[3u + (_56 * 4u)])));
            uint _57 = _56 + 1u;
            if (_57 < _24)
            {
                _56 = _57;
            }
            else
            {
                break;
            }
        }
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 189
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17 %22
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %166 "frontier_phi_1.9.ladder.10.ladder.11.ladder.12.ladder"
OpName %167 "frontier_phi_1.9.ladder.10.ladder.11.ladder"
OpName %168 "frontier_phi_1.9.ladder.10.ladder"
OpName %169 "frontier_phi_1.9.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %17 BuiltIn GlobalInvocationId
OpDecorate %22 BuiltIn SubgroupSize
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 256
%10 = OpTypeFloat 32
%11 = OpTypeArray %10 %9
%12 = OpTypePointer Workgroup %11
%13 = OpVariable %12 Workgroup
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %5
%20 = OpConstant %5 0
%22 = OpVariable %18 Input
%25 = OpTypeBool
%28 = OpConstantTrue %25
%29 = OpConstant %5 2
%30 = OpConstant %5 264
%31 = OpTypePointer Workgroup %10
%33 = OpConstant %10 0
%35 = OpConstant %5 1
%36 = OpConstant %10 1
%38 = OpConstant %10 2
%40 = OpConstant %5 3
%41 = OpConstant %10 3
%44 = OpConstant %5 4
%45 = OpConstant %10 4
%47 = OpConstant %5 5
%48 = OpConstant %10 5
%50 = OpConstant %5 6
%51 = OpConstant %10 6
%53 = OpConstant %5 7
%54 = OpConstant %10 7
%79 = OpTypeVector %5 4
%89 = OpConstant %5 8
%90 = OpConstant %10 8
%92 = OpConstant %5 9
%93 = OpConstant %10 9
%95 = OpConstant %5 10
%96 = OpConstant %10 10
%98 = OpConstant %5 11
%99 = OpConstant %10 11
%102 = OpConstant %5 12
%103 = OpConstant %10 12
%105 = OpConstant %5 13
%106 = OpConstant %10 13
%108 = OpConstant %5 14
%109 = OpConstant %10 14
%111 = OpConstant %5 15
%112 = OpConstant %10 15
%115 = OpConstant %5 16
%116 = OpConstant %10 16
%118 = OpConstant %5 17
%119 = OpConstant %10 17
%121 = OpConstant %5 18
%122 = OpConstant %10 18
%124 = OpConstant %5 19
%125 = OpConstant %10 19
%128 = OpConstant %5 20
%129 = OpConstant %10 20
%131 = OpConstant %5 21
%132 = OpConstant %10 21
%134 = OpConstant %5 22
%135 = OpConstant %10 22
%137 = OpConstant %5 23
%138 = OpConstant %10 23
%141 = OpConstant %5 24
%142 = OpConstant %10 24
%144 = OpConstant %5 25
%145 = OpConstant %10 25
%147 = OpConstant %5 26
%148 = OpConstant %10 26
%150 = OpConstant %5 27
%151 = OpConstant %10 27
%154 = OpConstant %5 28
%155 = OpConstant %10 28
%157 = OpConstant %5 29
%158 = OpConstant %10 29
%160 = OpConstant %5 30
%161 = OpConstant %10 30
%163 = OpConstant %5 31
%164 = OpConstant %10 31
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %170
%170 = OpLabel
%14 = OpLoad %6 %8
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%23 = OpLoad %5 %22
%24 = OpUDiv %5 %9 %23
%26 = OpIEqual %25 %24 %20
OpSelectionMerge %183 None
OpBranchConditional %26 %183 %171
%171 = OpLabel
%32 = OpAccessChain %31 %13 %20
OpStore %32 %33
%34 = OpAccessChain %31 %13 %35
OpStore %34 %36
%37 = OpAccessChain %31 %13 %29
OpStore %37 %38
%39 = OpAccessChain %31 %13 %40
OpStore %39 %41
%42 = OpUGreaterThan %25 %24 %35
OpBranch %172
%172 = OpLabel
%43 = OpAccessChain %31 %13 %44
OpStore %43 %45
%46 = OpAccessChain %31 %13 %47
OpStore %46 %48
%49 = OpAccessChain %31 %13 %50
OpStore %49 %51
%52 = OpAccessChain %31 %13 %53
OpStore %52 %54
%55 = OpUGreaterThan %25 %24 %29
OpBranch %173
%173 = OpLabel
%88 = OpAccessChain %31 %13 %89
OpStore %88 %90
%91 = OpAccessChain %31 %13 %92
OpStore %91 %93
%94 = OpAccessChain %31 %13 %95
OpStore %94 %96
%97 = OpAccessChain %31 %13 %98
OpStore %97 %99
%100 = OpUGreaterThan %25 %24 %40
OpBranch %174
%174 = OpLabel
%101 = OpAccessChain %31 %13 %102
OpStore %101 %103
%104 = OpAccessChain %31 %13 %105
OpStore %104 %106
%107 = OpAccessChain %31 %13 %108
OpStore %107 %109
%110 = OpAccessChain %31 %13 %111
OpStore %110 %112
%113 = OpUGreaterThan %25 %24 %44
OpSelectionMerge %182 None
OpBranchConditional %113 %175 %182
%175 = OpLabel
%114 = OpAccessChain %31 %13 %115
OpStore %114 %116
%117 = OpAccessChain %31 %13 %118
OpStore %117 %119
%120 = OpAccessChain %31 %13 %121
OpStore %120 %122
%123 = OpAccessChain %31 %13 %124
OpStore %123 %125
%126 = OpUGreaterThan %25 %24 %47
OpSelectionMerge %181 None
OpBranchConditional %126 %176 %181
%176 = OpLabel
%127 = OpAccessChain %31 %13 %128
OpStore %127 %129
%130 = OpAccessChain %31 %13 %131
OpStore %130 %132
%133 = OpAccessChain %31 %13 %134
OpStore %133 %135
%136 = OpAccessChain %31 %13 %137
OpStore %136 %138
%139 = OpUGreaterThan %25 %24 %50
OpSelectionMerge %180 None
OpBranchConditional %139 %177 %180
%177 = OpLabel
%140 = OpAccessChain %31 %13 %141
OpStore %140 %142
%143 = OpAccessChain %31 %13 %144
OpStore %143 %145
%146 = OpAccessChain %31 %13 %147
OpStore %146 %148
%149 = OpAccessChain %31 %13 %150
OpStore %149 %151
%152 = OpUGreaterThan %25 %24 %53
OpSelectionMerge %179 None
OpBranchConditional %152 %178 %179
%178 = OpLabel
%153 = OpAccessChain %31 %13 %154
OpStore %153 %155
%156 = OpAccessChain %31 %13 %157
OpStore %156 %158
%159 = OpAccessChain %31 %13 %160
OpStore %159 %161
%162 = OpAccessChain %31 %13 %163
OpStore %162 %164
%165 = OpUGreaterThan %25 %24 %89
OpBranch %179
%179 = OpLabel
%166 = OpPhi %25 %26 %178 %26 %177
OpBranch %180
%180 = OpLabel
%167 = OpPhi %25 %26 %176 %166 %179
OpBranch %181
%181 = OpLabel
%168 = OpPhi %25 %26 %175 %167 %180
OpBranch %182
%182 = OpLabel
%169 = OpPhi %25 %26 %174 %168 %181
OpBranch %183
%183 = OpLabel
%27 = OpPhi %25 %28 %170 %169 %182
OpControlBarrier %29 %29 %30
OpSelectionMerge %187 None
OpBranchConditional %27 %187 %184
%184 = OpLabel
OpBranch %185
%185 = OpLabel
%56 = OpPhi %5 %20 %184 %57 %185
%58 = OpIMul %5 %56 %44
%59 = OpIAdd %5 %20 %58
%60 = OpAccessChain %31 %13 %59
%61 = OpLoad %10 %60
%62 = OpIMul %5 %56 %44
%63 = OpIAdd %5 %35 %62
%64 = OpAccessChain %31 %13 %63
%65 = OpLoad %10 %64
%66 = OpIMul %5 %56 %44
%67 = OpIAdd %5 %29 %66
%68 = OpAccessChain %31 %13 %67
%69 = OpLoad %10 %68
%70 = OpIMul %5 %56 %44
%71 = OpIAdd %5 %40 %70
%72 = OpAccessChain %31 %13 %71
%73 = OpLoad %10 %72
%74 = OpIMul %5 %21 %44
%75 = OpBitcast %5 %61
%76 = OpBitcast %5 %65
%77 = OpBitcast %5 %69
%78 = OpBitcast %5 %73
%80 = OpCompositeConstruct %79 %75 %75 %75 %75
OpImageWrite %14 %74 %80
%81 = OpCompositeConstruct %79 %76 %76 %76 %76
%82 = OpIAdd %5 %74 %35
OpImageWrite %14 %82 %81
%83 = OpCompositeConstruct %79 %77 %77 %77 %77
%84 = OpIAdd %5 %74 %29
OpImageWrite %14 %84 %83
%85 = OpCompositeConstruct %79 %78 %78 %78 %78
%86 = OpIAdd %5 %74 %40
OpImageWrite %14 %86 %85
%57 = OpIAdd %5 %56 %35
%87 = OpULessThan %25 %57 %24
OpLoopMerge %186 %185 None
OpBranchConditional %87 %185 %186
%186 = OpLabel
OpBranch %187
%187 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/acceleration-structure.bindless.descriptor-qa.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _15
{
    vec4 _m0;
};

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _12[];
layout(location = 0) rayPayloadEXT _15 _17;
layout(location = 1) rayPayloadEXT _15 _18;
layout(location = 2) rayPayloadEXT _15 _19;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _47 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_47 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _84 = QAHeapData.descriptor_count;
    uint _86 = QAHeapData.heap_index;
    uint _89 = QAGlobalData.va_map_timestamp;
    uvec3 _92 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _101 = QAGlobalData.live_status_table[_92.x >> 5u];
    uint _115 = ((uint(heap_offset >= _84) | (((_92.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_101 & (1u << (_92.x & 31u))) != 0u) ? 0u : 4u)) | ((_89 >= _92.y) ? 0u : 8u);
    if (_115 != 0u)
    {
        descriptor_qa_report_fault(_115, heap_offset, _92.x, _86, descriptor_type_mask, _92.z, instruction);
        return _84;
    }
    return heap_offset;
}

void main()
{
    uint _28 = descriptor_qa_check(registers._m0 + 110u, 64u, 1u);
    _19._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(_12[_28], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    uint _140 = descriptor_qa_check(registers._m0 + 3u, 64u, 2u);
    _18._m0 = _19._m0;
    traceRayEXT(_12[_140], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    uint _154 = descriptor_qa_check((registers._m0 + 100u) + uint(int(_18._m0.w)), 64u, 3u);
    _17._m0 = _18._m0;
    traceRayEXT(_12[nonuniformEXT(_154)], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 161
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %17 %18 %19 %33 %76
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 ""
OpName %31 "DescriptorHeapGlobalQAData"
OpMemberName %31 0 "failed_shader_hash"
OpMemberName %31 1 "failed_offset"
OpMemberName %31 2 "failed_heap"
OpMemberName %31 3 "failed_cookie"
OpMemberName %31 4 "fault_atomic"
OpMemberName %31 5 "failed_instruction"
OpMemberName %31 6 "failed_descriptor_type_mask"
OpMemberName %31 7 "actual_descriptor_type_mask"
OpMemberName %31 8 "fault_type"
OpMemberName %31 9 "va_map_timestamp"
OpMemberName %31 10 "live_status_table"
OpName %33 "QAGlobalData"
OpName %42 "descriptor_qa_report_fault"
OpName %35 "fault_type"
OpName %36 "heap_offset"
OpName %37 "cookie"
OpName %38 "heap_index"
OpName %39 "descriptor_type"
OpName %40 "actual_descriptor_type"
OpName %41 "instruction"
OpName %74 "DescriptorHeapQAData"
OpMemberName %74 0 "descriptor_count"
OpMemberName %74 1 "heap_index"
OpMemberName %74 2 "cookies_descriptor_info"
OpName %76 "QAHeapData"
OpName %81 "descriptor_qa_check"
OpName %78 "heap_offset"
OpName %79 "descriptor_type_mask"
OpName %80 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %30 ArrayStride 4
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 1 Offset 8
OpMemberDecorate %31 2 Offset 12
OpMemberDecorate %31 3 Offset 16
OpMemberDecorate %31 4 Offset 20
OpMemberDecorate %31 5 Offset 24
OpMemberDecorate %31 6 Offset 28
OpMemberDecorate %31 7 Offset 32
OpMemberDecorate %31 8 Offset 36
OpMemberDecorate %31 9 Offset 40
OpMemberDecorate %31 10 Offset 44
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 10
OpDecorate %33 Binding 10
OpDecorate %73 ArrayStride 12
OpMemberDecorate %74 0 Offset 0
OpMemberDecorate %74 1 Offset 4
OpMemberDecorate %74 2 Offset 8
OpDecorate %74 Block
OpDecorate %76 DescriptorSet 10
OpDecorate %76 Binding 11
OpDecorate %76 NonWritable
OpDecorate %154 NonUniform
OpDecorate %155 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeAccelerationStructureKHR
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeStruct %14
%16 = OpTypePointer RayPayloadKHR %15
%17 = OpVariable %16 RayPayloadKHR
%18 = OpVariable %16 RayPayloadKHR
%19 = OpVariable %16 RayPayloadKHR
%20 = OpTypePointer UniformConstant %9
%22 = OpTypePointer PushConstant %5
%24 = OpConstant %5 0
%27 = OpConstant %5 110
%29 = OpTypeVector %5 2
%30 = OpTypeRuntimeArray %5
%31 = OpTypeStruct %29 %5 %5 %5 %5 %5 %5 %5 %5 %5 %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%44 = OpTypePointer StorageBuffer %5
%46 = OpConstant %5 4
%48 = OpConstant %5 1
%49 = OpTypeBool
%54 = OpConstant %5 3
%57 = OpConstant %5 2
%59 = OpConstant %5 6
%61 = OpConstant %5 7
%63 = OpConstant %5 5
%64 = OpConstant %5 3735928559
%65 = OpConstantComposite %29 %64 %24
%66 = OpTypePointer StorageBuffer %29
%68 = OpConstant %5 72
%70 = OpConstant %5 8
%72 = OpTypeVector %5 3
%73 = OpTypeRuntimeArray %72
%74 = OpTypeStruct %5 %5 %73
%75 = OpTypePointer StorageBuffer %74
%76 = OpVariable %75 StorageBuffer
%77 = OpTypeFunction %5 %5 %5 %5
%88 = OpConstant %5 9
%90 = OpTypePointer StorageBuffer %72
%98 = OpConstant %5 31
%100 = OpConstant %5 10
%122 = OpConstant %5 64
%124 = OpTypePointer RayPayloadKHR %14
%126 = OpConstant %13 1
%127 = OpConstant %13 2
%128 = OpConstant %13 3
%129 = OpConstant %13 4
%130 = OpConstantComposite %14 %126 %127 %128 %129
%131 = OpConstant %13 0
%132 = OpTypeVector %13 3
%152 = OpConstant %5 100
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %159
%159 = OpLabel
%23 = OpAccessChain %22 %8 %24
%25 = OpLoad %5 %23
%26 = OpIAdd %5 %25 %27
%28 = OpFunctionCall %5 %81 %26 %122 %48
%21 = OpAccessChain %20 %12 %28
%123 = OpLoad %9 %21
%125 = OpInBoundsAccessChain %124 %19 %24
OpStore %125 %130
%133 = OpCompositeConstruct %132 %126 %127 %128
%134 = OpCompositeConstruct %132 %131 %131 %126
OpTraceRayKHR %123 %24 %24 %24 %24 %24 %133 %126 %134 %129 %19
%135 = OpLoad %14 %125
%137 = OpAccessChain %22 %8 %24
%138 = OpLoad %5 %137
%139 = OpIAdd %5 %138 %54
%140 = OpFunctionCall %5 %81 %139 %122 %57
%136 = OpAccessChain %20 %12 %140
%141 = OpLoad %9 %136
%142 = OpInBoundsAccessChain %124 %18 %24
OpStore %142 %135
%143 = OpCompositeConstruct %132 %126 %127 %128
%144 = OpCompositeConstruct %132 %131 %131 %126
OpTraceRayKHR %141 %24 %24 %24 %24 %24 %143 %126 %144 %129 %18
%145 = OpLoad %14 %142
%146 = OpCompositeExtract %13 %145 3
%147 = OpConvertFToS %5 %146
%149 = OpAccessChain %22 %8 %24
%150 = OpLoad %5 %149
%151 = OpIAdd %5 %150 %152
%153 = OpIAdd %5 %151 %147
%154 = OpFunctionCall %5 %81 %153 %122 %54
%148 = OpAccessChain %20 %12 %154
%155 = OpLoad %9 %148
%156 = OpInBoundsAccessChain %124 %17 %24
OpStore %156 %145
%157 = OpCompositeConstruct %132 %126 %127 %128
%158 = OpCompositeConstruct %132 %131 %131 %126
OpTraceRayKHR %155 %24 %24 %24 %24 %24 %157 %126 %158 %129 %17
OpReturn
OpFunctionEnd
%42 = OpFunction %1 None %34
%35 = OpFunctionParameter %5
%36 = OpFunctionParameter %5
%37 = OpFunctionParameter %5
%38 = OpFunctionParameter %5
%39 = OpFunctionParameter %5
%40 = OpFunctionParameter %5
%41 = OpFunctionParameter %5
%43 = OpLabel
%45 = OpAccessChain %44 %33 %46
%47 = OpAtomicIAdd %5 %45 %48 %24 %48
%50 = OpIEqual %49 %47 %24
OpSelectionMerge %52 None
OpBranchConditional %50 %51 %52
%51 = OpLabel
%53 = OpAccessChain %44 %33 %54
OpStore %53 %37
%55 = OpAccessChain %44 %33 %48
OpStore %55 %36
%56 = OpAccessChain %44 %33 %57
OpStore %56 %38
%58 = OpAccessChain %44 %33 %59
OpStore %58 %39
%60 = OpAccessChain %44 %33 %61
OpStore %60 %40
%62 = OpAccessChain %44 %33 %63
OpStore %62 %41
%67 = OpAccessChain %66 %33 %24
OpStore %67 %65
OpMemoryBarrier %48 %68
%69 = OpAccessChain %44 %33 %70
OpStore %69 %35
OpBranch %52
%52 = OpLabel
OpReturn
OpFunctionEnd
%81 = OpFunction %5 None %77
%78 = OpFunctionParameter %5
%79 = OpFunctionParameter %5
%80 = OpFunctionParameter %5
%82 = OpLabel
%83 = OpAccessChain %44 %76 %24
%84 = OpLoad %5 %83
%85 = OpAccessChain %44 %76 %48
%86 = OpLoad %5 %85
%87 = OpAccessChain %44 %33 %88
%89 = OpLoad %5 %87
%91 = OpAccessChain %90 %76 %57 %78
%92 = OpLoad %72 %91
%93 = OpCompositeExtract %5 %92 0
%94 = OpCompositeExtract %5 %92 1
%95 = OpCompositeExtract %5 %92 2
%96 = OpShiftRightLogical %5 %93 %63
%97 = OpBitwiseAnd %5 %93 %98
%99 = OpAccessChain %44 %33 %100 %96
%101 = OpLoad %5 %99
%102 = OpShiftLeftLogical %5 %48 %97
%103 = OpBitwiseAnd %5 %101 %102
%104 = OpINotEqual %49 %103 %24
%105 = OpBitwiseAnd %5 %95 %79
%106 = OpIEqual %49 %105 %79
%107 = OpUGreaterThanEqual %49 %78 %84
%108 = OpSelect %5 %107 %48 %24
%109 = OpSelect %5 %106 %24 %57
%110 = OpSelect %5 %104 %24 %46
%111 = OpUGreaterThanEqual %49 %89 %94
%112 = OpSelect %5 %111 %24 %70
%113 = OpBitwiseOr %5 %108 %109
%114 = OpBitwiseOr %5 %113 %110
%115 = OpBitwiseOr %5 %114 %112
%116 = OpINotEqual %49 %115 %24
OpSelectionMerge %118 None
OpBranchConditional %116 %117 %118
%117 = OpLabel
%119 = OpFunctionCall %1 %42 %115 %78 %93 %86 %79 %95 %80
OpReturnValue %84
%118 = OpLabel
OpReturnValue %78
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/acceleration-structure.bindless.descriptor-qa.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _15
{
    vec4 _m0;
};

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _12[];
layout(location = 0) rayPayloadEXT _15 _17;
layout(location = 1) rayPayloadEXT _15 _18;
layout(location = 2) rayPayloadEXT _15 _19;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _47 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_47 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _84 = QAHeapData.descriptor_count;
    uint _86 = QAHeapData.heap_index;
    uint _89 = QAGlobalData.va_map_timestamp;
    uvec3 _92 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _101 = QAGlobalData.live_status_table[_92.x >> 5u];
    uint _115 = ((uint(heap_offset >= _84) | (((_92.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_101 & (1u << (_92.x & 31u))) != 0u) ? 0u : 4u)) | ((_89 >= _92.y) ? 0u : 8u);
    if (_115 != 0u)
    {
        descriptor_qa_report_fault(_115, heap_offset, _92.x, _86, descriptor_type_mask, _92.z, instruction);
        return _84;
    }
    return heap_offset;
}

void main()
{
    uint _28 = descriptor_qa_check(registers._m0 + 110u, 64u, 1u);
    _19._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(_12[_28], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    uint _140 = descriptor_qa_check(registers._m0 + 3u, 64u, 2u);
    _18._m0 = _19._m0;
    traceRayEXT(_12[_140], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    uint _154 = descriptor_qa_check((registers._m0 + 100u) + uint(int(_18._m0.w)), 64u, 3u);
    _17._m0 = _18._m0;
    traceRayEXT(_12[nonuniformEXT(_154)], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 161
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %17 %18 %19 %33 %76
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 ""
OpName %31 "DescriptorHeapGlobalQAData"
OpMemberName %31 0 "failed_shader_hash"
OpMemberName %31 1 "failed_offset"
OpMemberName %31 2 "failed_heap"
OpMemberName %31 3 "failed_cookie"
OpMemberName %31 4 "fault_atomic"
OpMemberName %31 5 "failed_instruction"
OpMemberName %31 6 "failed_descriptor_type_mask"
OpMemberName %31 7 "actual_descriptor_type_mask"
OpMemberName %31 8 "fault_type"
OpMemberName %31 9 "va_map_timestamp"
OpMemberName %31 10 "live_status_table"
OpName %33 "QAGlobalData"
OpName %42 "descriptor_qa_report_fault"
OpName %35 "fault_type"
OpName %36 "heap_offset"
OpName %37 "cookie"
OpName %38 "heap_index"
OpName %39 "descriptor_type"
OpName %40 "actual_descriptor_type"
OpName %41 "instruction"
OpName %74 "DescriptorHeapQAData"
OpMemberName %74 0 "descriptor_count"
OpMemberName %74 1 "heap_index"
OpMemberName %74 2 "cookies_descriptor_info"
OpName %76 "QAHeapData"
OpName %81 "descriptor_qa_check"
OpName %78 "heap_offset"
OpName %79 "descriptor_type_mask"
OpName %80 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %30 ArrayStride 4
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 1 Offset 8
OpMemberDecorate %31 2 Offset 12
OpMemberDecorate %31 3 Offset 16
OpMemberDecorate %31 4 Offset 20
OpMemberDecorate %31 5 Offset 24
OpMemberDecorate %31 6 Offset 28
OpMemberDecorate %31 7 Offset 32
OpMemberDecorate %31 8 Offset 36
OpMemberDecorate %31 9 Offset 40
OpMemberDecorate %31 10 Offset 44
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 10
OpDecorate %33 Binding 10
OpDecorate %73 ArrayStride 12
OpMemberDecorate %74 0 Offset 0
OpMemberDecorate %74 1 Offset 4
OpMemberDecorate %74 2 Offset 8
OpDecorate %74 Block
OpDecorate %76 DescriptorSet 10
OpDecorate %76 Binding 11
OpDecorate %76 NonWritable
OpDecorate %154 NonUniform
OpDecorate %155 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeAccelerationStructureKHR
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeStruct %14
%16 = OpTypePointer RayPayloadKHR %15
%17 = OpVariable %16 RayPayloadKHR
%18 = OpVariable %16 RayPayloadKHR
%19 = OpVariable %16 RayPayloadKHR
%20 = OpTypePointer UniformConstant %9
%22 = OpTypePointer PushConstant %5
%24 = OpConstant %5 0
%27 = OpConstant %5 110
%29 = OpTypeVector %5 2
%30 = OpTypeRuntimeArray %5
%31 = OpTypeStruct %29 %5 %5 %5 %5 %5 %5 %5 %5 %5 %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%44 = OpTypePointer StorageBuffer %5
%46 = OpConstant %5 4
%48 = OpConstant %5 1
%49 = OpTypeBool
%54 = OpConstant %5 3
%57 = OpConstant %5 2
%59 = OpConstant %5 6
%61 = OpConstant %5 7
%63 = OpConstant %5 5
%64 = OpConstant %5 3735928559
%65 = OpConstantComposite %29 %64 %24
%66 = OpTypePointer StorageBuffer %29
%68 = OpConstant %5 72
%70 = OpConstant %5 8
%72 = OpTypeVector %5 3
%73 = OpTypeRuntimeArray %72
%74 = OpTypeStruct %5 %5 %73
%75 = OpTypePointer StorageBuffer %74
%76 = OpVariable %75 StorageBuffer
%77 = OpTypeFunction %5 %5 %5 %5
%88 = OpConstant %5 9
%90 = OpTypePointer StorageBuffer %72
%98 = OpConstant %5 31
%100 = OpConstant %5 10
%122 = OpConstant %5 64
%124 = OpTypePointer RayPayloadKHR %14
%126 = OpConstant %13 1
%127 = OpConstant %13 2
%128 = OpConstant %13 3
%129 = OpConstant %13 4
%130 = OpConstantComposite %14 %126 %127 %128 %129
%131 = OpConstant %13 0
%132 = OpTypeVector %13 3
%152 = OpConstant %5 100
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %159
%159 = OpLabel
%23 = OpAccessChain %22 %8 %24
%25 = OpLoad %5 %23
%26 = OpIAdd %5 %25 %27
%28 = OpFunctionCall %5 %81 %26 %122 %48
%21 = OpAccessChain %20 %12 %28
%123 = OpLoad %9 %21
%125 = OpInBoundsAccessChain %124 %19 %24
OpStore %125 %130
%133 = OpCompositeConstruct %132 %126 %127 %128
%134 = OpCompositeConstruct %132 %131 %131 %126
OpTraceRayKHR %123 %24 %24 %24 %24 %24 %133 %126 %134 %129 %19
%135 = OpLoad %14 %125
%137 = OpAccessChain %22 %8 %24
%138 = OpLoad %5 %137
%139 = OpIAdd %5 %138 %54
%140 = OpFunctionCall %5 %81 %139 %122 %57
%136 = OpAccessChain %20 %12 %140
%141 = OpLoad %9 %136
%142 = OpInBoundsAccessChain %124 %18 %24
OpStore %142 %135
%143 = OpCompositeConstruct %132 %126 %127 %128
%144 = OpCompositeConstruct %132 %131 %131 %126
OpTraceRayKHR %141 %24 %24 %24 %24 %24 %143 %126 %144 %129 %18
%145 = OpLoad %14 %142
%146 = OpCompositeExtract %13 %145 3
%147 = OpConvertFToS %5 %146
%149 = OpAccessChain %22 %8 %24
%150 = OpLoad %5 %149
%151 = OpIAdd %5 %150 %152
%153 = OpIAdd %5 %151 %147
%154 = OpFunctionCall %5 %81 %153 %122 %54
%148 = OpAccessChain %20 %12 %154
%155 = OpLoad %9 %148
%156 = OpInBoundsAccessChain %124 %17 %24
OpStore %156 %145
%157 = OpCompositeConstruct %132 %126 %127 %128
%158 = OpCompositeConstruct %132 %131 %131 %126
OpTraceRayKHR %155 %24 %24 %24 %24 %24 %157 %126 %158 %129 %17
OpReturn
OpFunctionEnd
%42 = OpFunction %1 None %34
%35 = OpFunctionParameter %5
%36 = OpFunctionParameter %5
%37 = OpFunctionParameter %5
%38 = OpFunctionParameter %5
%39 = OpFunctionParameter %5
%40 = OpFunctionParameter %5
%41 = OpFunctionParameter %5
%43 = OpLabel
%45 = OpAccessChain %44 %33 %46
%47 = OpAtomicIAdd %5 %45 %48 %24 %48
%50 = OpIEqual %49 %47 %24
OpSelectionMerge %52 None
OpBranchConditional %50 %51 %52
%51 = OpLabel
%53 = OpAccessChain %44 %33 %54
OpStore %53 %37
%55 = OpAccessChain %44 %33 %48
OpStore %55 %36
%56 = OpAccessChain %44 %33 %57
OpStore %56 %38
%58 = OpAccessChain %44 %33 %59
OpStore %58 %39
%60 = OpAccessChain %44 %33 %61
OpStore %60 %40
%62 = OpAccessChain %44 %33 %63
OpStore %62 %41
%67 = OpAccessChain %66 %33 %24
OpStore %67 %65
OpMemoryBarrier %48 %68
%69 = OpAccessChain %44 %33 %70
OpStore %69 %35
OpBranch %52
%52 = OpLabel
OpReturn
OpFunctionEnd
%81 = OpFunction %5 None %77
%78 = OpFunctionParameter %5
%79 = OpFunctionParameter %5
%80 = OpFunctionParameter %5
%82 = OpLabel
%83 = OpAccessChain %44 %76 %24
%84 = OpLoad %5 %83
%85 = OpAccessChain %44 %76 %48
%86 = OpLoad %5 %85
%87 = OpAccessChain %44 %33 %88
%89 = OpLoad %5 %87
%91 = OpAccessChain %90 %76 %57 %78
%92 = OpLoad %72 %91
%93 = OpCompositeExtract %5 %92 0
%94 = OpCompositeExtract %5 %92 1
%95 = OpCompositeExtract %5 %92 2
%96 = OpShiftRightLogical %5 %93 %63
%97 = OpBitwiseAnd %5 %93 %98
%99 = OpAccessChain %44 %33 %100 %96
%101 = OpLoad %5 %99
%102 = OpShiftLeftLogical %5 %48 %97
%103 = OpBitwiseAnd %5 %101 %102
%104 = OpINotEqual %49 %103 %24
%105 = OpBitwiseAnd %5 %95 %79
%106 = OpIEqual %49 %105 %79
%107 = OpUGreaterThanEqual %49 %78 %84
%108 = OpSelect %5 %107 %48 %24
%109 = OpSelect %5 %106 %24 %57
%110 = OpSelect %5 %104 %24 %46
%111 = OpUGreaterThanEqual %49 %89 %94
%112 = OpSelect %5 %111 %24 %70
%113 = OpBitwiseOr %5 %108 %109
%114 = OpBitwiseOr %5 %113 %110
%115 = OpBitwiseOr %5 %114 %112
%116 = OpINotEqual %49 %115 %24
OpSelectionMerge %118 None
OpBranchConditional %116 %117 %118
%117 = OpLabel
%119 = OpFunctionCall %1 %42 %115 %78 %93 %86 %79 %95 %80
OpReturnValue %84
%118 = OpLabel
OpReturnValue %78
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/acceleration-structure.bindless.ssbo-rtas.local-root-signature.descriptor-qa.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _23
{
    vec4 _m0;
};

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _20;

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) rayPayloadEXT _23 _25;
layout(location = 1) rayPayloadEXT _23 _26;
layout(location = 2) rayPayloadEXT _23 _27;
layout(location = 3) rayPayloadEXT _23 _28;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _53 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_53 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _88 = QAHeapData.descriptor_count;
    uint _90 = QAHeapData.heap_index;
    uint _93 = QAGlobalData.va_map_timestamp;
    uvec3 _96 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _105 = QAGlobalData.live_status_table[_96.x >> 5u];
    uint _119 = ((uint(heap_offset >= _88) | (((_96.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_105 & (1u << (_96.x & 31u))) != 0u) ? 0u : 4u)) | ((_93 >= _96.y) ? 0u : 8u);
    if (_119 != 0u)
    {
        descriptor_qa_report_fault(_119, heap_offset, _96.x, _90, descriptor_type_mask, _96.z, instruction);
        return _88;
    }
    return heap_offset;
}

void main()
{
    uint _35 = descriptor_qa_check(registers._m0 + 110u, 320u, 1u);
    _28._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(accelerationStructureEXT(_20._m0[subgroupBroadcastFirst(_35)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 3);
    uint _147 = descriptor_qa_check(registers._m0 + 3u, 320u, 2u);
    _27._m0 = _28._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[subgroupBroadcastFirst(_147)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    uint _163 = descriptor_qa_check((registers._m0 + 100u) + uint(int(_27._m0.w)), 320u, 3u);
    _26._m0 = _27._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[_163]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    uint _177 = descriptor_qa_check((SBT._m7.x >> 6u) + 210u, 320u, 4u);
    _25._m0 = _26._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[subgroupBroadcastFirst(_177)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 187
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %16 %20 %25 %26 %27 %28 %39 %80
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %14 "SBTBlock"
OpName %16 "SBT"
OpName %18 "RTASHeap"
OpName %23 ""
OpName %37 "DescriptorHeapGlobalQAData"
OpMemberName %37 0 "failed_shader_hash"
OpMemberName %37 1 "failed_offset"
OpMemberName %37 2 "failed_heap"
OpMemberName %37 3 "failed_cookie"
OpMemberName %37 4 "fault_atomic"
OpMemberName %37 5 "failed_instruction"
OpMemberName %37 6 "failed_descriptor_type_mask"
OpMemberName %37 7 "actual_descriptor_type_mask"
OpMemberName %37 8 "fault_type"
OpMemberName %37 9 "va_map_timestamp"
OpMemberName %37 10 "live_status_table"
OpName %39 "QAGlobalData"
OpName %48 "descriptor_qa_report_fault"
OpName %41 "fault_type"
OpName %42 "heap_offset"
OpName %43 "cookie"
OpName %44 "heap_index"
OpName %45 "descriptor_type"
OpName %46 "actual_descriptor_type"
OpName %47 "instruction"
OpName %78 "DescriptorHeapQAData"
OpMemberName %78 0 "descriptor_count"
OpMemberName %78 1 "heap_index"
OpMemberName %78 2 "cookies_descriptor_info"
OpName %80 "QAHeapData"
OpName %85 "descriptor_qa_check"
OpName %82 "heap_offset"
OpName %83 "descriptor_type_mask"
OpName %84 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 4
OpDecorate %12 ArrayStride 4
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpMemberDecorate %14 1 Offset 20
OpMemberDecorate %14 2 Offset 48
OpMemberDecorate %14 3 Offset 56
OpMemberDecorate %14 4 Offset 64
OpMemberDecorate %14 5 Offset 72
OpMemberDecorate %14 6 Offset 80
OpMemberDecorate %14 7 Offset 88
OpMemberDecorate %14 8 Offset 96
OpMemberDecorate %14 9 Offset 104
OpMemberDecorate %14 10 Offset 112
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %36 ArrayStride 4
OpMemberDecorate %37 0 Offset 0
OpMemberDecorate %37 1 Offset 8
OpMemberDecorate %37 2 Offset 12
OpMemberDecorate %37 3 Offset 16
OpMemberDecorate %37 4 Offset 20
OpMemberDecorate %37 5 Offset 24
OpMemberDecorate %37 6 Offset 28
OpMemberDecorate %37 7 Offset 32
OpMemberDecorate %37 8 Offset 36
OpMemberDecorate %37 9 Offset 40
OpMemberDecorate %37 10 Offset 44
OpDecorate %37 Block
OpDecorate %39 DescriptorSet 10
OpDecorate %39 Binding 10
OpDecorate %77 ArrayStride 12
OpMemberDecorate %78 0 Offset 0
OpMemberDecorate %78 1 Offset 4
OpMemberDecorate %78 2 Offset 8
OpDecorate %78 Block
OpDecorate %80 DescriptorSet 10
OpDecorate %80 Binding 11
OpDecorate %80 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 5
%10 = OpTypeArray %5 %9
%11 = OpConstant %5 6
%12 = OpTypeArray %5 %11
%13 = OpTypeVector %5 2
%14 = OpTypeStruct %10 %12 %13 %13 %13 %13 %13 %13 %13 %13 %13
%15 = OpTypePointer ShaderRecordBufferKHR %14
%16 = OpVariable %15 ShaderRecordBufferKHR
%17 = OpTypeRuntimeArray %13
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeStruct %22
%24 = OpTypePointer RayPayloadKHR %23
%25 = OpVariable %24 RayPayloadKHR
%26 = OpVariable %24 RayPayloadKHR
%27 = OpVariable %24 RayPayloadKHR
%28 = OpVariable %24 RayPayloadKHR
%29 = OpTypePointer PushConstant %5
%31 = OpConstant %5 0
%34 = OpConstant %5 110
%36 = OpTypeRuntimeArray %5
%37 = OpTypeStruct %13 %5 %5 %5 %5 %5 %5 %5 %5 %5 %36
%38 = OpTypePointer StorageBuffer %37
%39 = OpVariable %38 StorageBuffer
%40 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%50 = OpTypePointer StorageBuffer %5
%52 = OpConstant %5 4
%54 = OpConstant %5 1
%55 = OpTypeBool
%60 = OpConstant %5 3
%63 = OpConstant %5 2
%66 = OpConstant %5 7
%68 = OpConstant %5 3735928559
%69 = OpConstantComposite %13 %68 %31
%70 = OpTypePointer StorageBuffer %13
%72 = OpConstant %5 72
%74 = OpConstant %5 8
%76 = OpTypeVector %5 3
%77 = OpTypeRuntimeArray %76
%78 = OpTypeStruct %5 %5 %77
%79 = OpTypePointer StorageBuffer %78
%80 = OpVariable %79 StorageBuffer
%81 = OpTypeFunction %5 %5 %5 %5
%92 = OpConstant %5 9
%94 = OpTypePointer StorageBuffer %76
%102 = OpConstant %5 31
%104 = OpConstant %5 10
%126 = OpConstant %5 320
%130 = OpTypeAccelerationStructureKHR
%132 = OpTypePointer RayPayloadKHR %22
%134 = OpConstant %21 1
%135 = OpConstant %21 2
%136 = OpConstant %21 3
%137 = OpConstant %21 4
%138 = OpConstantComposite %22 %134 %135 %136 %137
%139 = OpConstant %21 0
%140 = OpTypeVector %21 3
%161 = OpConstant %5 100
%171 = OpTypePointer ShaderRecordBufferKHR %5
%176 = OpConstant %5 210
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %185
%185 = OpLabel
%30 = OpAccessChain %29 %8 %31
%32 = OpLoad %5 %30
%33 = OpIAdd %5 %32 %34
%35 = OpFunctionCall %5 %85 %33 %126 %54
%127 = OpGroupNonUniformBroadcastFirst %5 %60 %35
%128 = OpAccessChain %70 %20 %31 %127
%129 = OpLoad %13 %128
%131 = OpConvertUToAccelerationStructureKHR %130 %129
%133 = OpInBoundsAccessChain %132 %28 %31
OpStore %133 %138
%141 = OpCompositeConstruct %140 %134 %135 %136
%142 = OpCompositeConstruct %140 %139 %139 %134
OpTraceRayKHR %131 %31 %31 %31 %31 %31 %141 %134 %142 %137 %28
%143 = OpLoad %22 %133
%144 = OpAccessChain %29 %8 %31
%145 = OpLoad %5 %144
%146 = OpIAdd %5 %145 %60
%147 = OpFunctionCall %5 %85 %146 %126 %63
%148 = OpGroupNonUniformBroadcastFirst %5 %60 %147
%149 = OpAccessChain %70 %20 %31 %148
%150 = OpLoad %13 %149
%151 = OpConvertUToAccelerationStructureKHR %130 %150
%152 = OpInBoundsAccessChain %132 %27 %31
OpStore %152 %143
%153 = OpCompositeConstruct %140 %134 %135 %136
%154 = OpCompositeConstruct %140 %139 %139 %134
OpTraceRayKHR %151 %31 %31 %31 %31 %31 %153 %134 %154 %137 %27
%155 = OpLoad %22 %152
%156 = OpCompositeExtract %21 %155 3
%157 = OpConvertFToS %5 %156
%158 = OpAccessChain %29 %8 %31
%159 = OpLoad %5 %158
%160 = OpIAdd %5 %159 %161
%162 = OpIAdd %5 %160 %157
%163 = OpFunctionCall %5 %85 %162 %126 %60
%164 = OpAccessChain %70 %20 %31 %163
%165 = OpLoad %13 %164
%166 = OpConvertUToAccelerationStructureKHR %130 %165
%167 = OpInBoundsAccessChain %132 %26 %31
OpStore %167 %155
%168 = OpCompositeConstruct %140 %134 %135 %136
%169 = OpCompositeConstruct %140 %139 %139 %134
OpTraceRayKHR %166 %31 %31 %31 %31 %31 %168 %134 %169 %137 %26
%170 = OpLoad %22 %167
%172 = OpAccessChain %171 %16 %66 %31
%173 = OpLoad %5 %172
%174 = OpShiftRightLogical %5 %173 %11
%175 = OpIAdd %5 %174 %176
%177 = OpFunctionCall %5 %85 %175 %126 %52
%178 = OpGroupNonUniformBroadcastFirst %5 %60 %177
%179 = OpAccessChain %70 %20 %31 %178
%180 = OpLoad %13 %179
%181 = OpConvertUToAccelerationStructureKHR %130 %180
%182 = OpInBoundsAccessChain %132 %25 %31
OpStore %182 %170
%183 = OpCompositeConstruct %140 %134 %135 %136
%184 = OpCompositeConstruct %140 %139 %139 %134
OpTraceRayKHR %181 %31 %31 %31 %31 %31 %183 %134 %184 %137 %25
OpReturn
OpFunctionEnd
%48 = OpFunction %1 None %40
%41 = OpFunctionParameter %5
%42 = OpFunctionParameter %5
%43 = OpFunctionParameter %5
%44 = OpFunctionParameter %5
%45 = OpFunctionParameter %5
%46 = OpFunctionParameter %5
%47 = OpFunctionParameter %5
%49 = OpLabel
%51 = OpAccessChain %50 %39 %52
%53 = OpAtomicIAdd %5 %51 %54 %31 %54
%56 = OpIEqual %55 %53 %31
OpSelectionMerge %58 None
OpBranchConditional %56 %57 %58
%57 = OpLabel
%59 = OpAccessChain %50 %39 %60
OpStore %59 %43
%61 = OpAccessChain %50 %39 %54
OpStore %61 %42
%62 = OpAccessChain %50 %39 %63
OpStore %62 %44
%64 = OpAccessChain %50 %39 %11
OpStore %64 %45
%65 = OpAccessChain %50 %39 %66
OpStore %65 %46
%67 = OpAccessChain %50 %39 %9
OpStore %67 %47
%71 = OpAccessChain %70 %39 %31
OpStore %71 %69
OpMemoryBarrier %54 %72
%73 = OpAccessChain %50 %39 %74
OpStore %73 %41
OpBranch %58
%58 = OpLabel
OpReturn
OpFunctionEnd
%85 = OpFunction %5 None %81
%82 = OpFunctionParameter %5
%83 = OpFunctionParameter %5
%84 = OpFunctionParameter %5
%86 = OpLabel
%87 = OpAccessChain %50 %80 %31
%88 = OpLoad %5 %87
%89 = OpAccessChain %50 %80 %54
%90 = OpLoad %5 %89
%91 = OpAccessChain %50 %39 %92
%93 = OpLoad %5 %91
%95 = OpAccessChain %94 %80 %63 %82
%96 = OpLoad %76 %95
%97 = OpCompositeExtract %5 %96 0
%98 = OpCompositeExtract %5 %96 1
%99 = OpCompositeExtract %5 %96 2
%100 = OpShiftRightLogical %5 %97 %9
%101 = OpBitwiseAnd %5 %97 %102
%103 = OpAccessChain %50 %39 %104 %100
%105 = OpLoad %5 %103
%106 = OpShiftLeftLogical %5 %54 %101
%107 = OpBitwiseAnd %5 %105 %106
%108 = OpINotEqual %55 %107 %31
%109 = OpBitwiseAnd %5 %99 %83
%110 = OpIEqual %55 %109 %83
%111 = OpUGreaterThanEqual %55 %82 %88
%112 = OpSelect %5 %111 %54 %31
%113 = OpSelect %5 %110 %31 %63
%114 = OpSelect %5 %108 %31 %52
%115 = OpUGreaterThanEqual %55 %93 %98
%116 = OpSelect %5 %115 %31 %74
%117 = OpBitwiseOr %5 %112 %113
%118 = OpBitwiseOr %5 %117 %114
%119 = OpBitwiseOr %5 %118 %116
%120 = OpINotEqual %55 %119 %31
OpSelectionMerge %122 None
OpBranchConditional %120 %121 %122
%121 = OpLabel
%123 = OpFunctionCall %1 %48 %119 %82 %97 %90 %83 %99 %84
OpReturnValue %88
%122 = OpLabel
OpReturnValue %82
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _33;

layout(set = 5, binding = 0, std430) readonly buffer BindlessCBV
{
    vec4 _m0[4096];
} _47[];

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 1, binding = 0) uniform usamplerBuffer _21[];
layout(set = 1, binding = 0) uniform usamplerBuffer _24[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _28[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _36[];
layout(set = 3, binding = 0) uniform writeonly image2D _40[];

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _82 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_82 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _115 = QAHeapData.descriptor_count;
    uint _117 = QAHeapData.heap_index;
    uint _120 = QAGlobalData.va_map_timestamp;
    uvec3 _123 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _132 = QAGlobalData.live_status_table[_123.x >> 5u];
    uint _146 = ((uint(heap_offset >= _115) | (((_123.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_132 & (1u << (_123.x & 31u))) != 0u) ? 0u : 4u)) | ((_120 >= _123.y) ? 0u : 8u);
    if (_146 != 0u)
    {
        descriptor_qa_report_fault(_146, heap_offset, _123.x, _117, descriptor_type_mask, _123.z, instruction);
        return _115;
    }
    return heap_offset;
}

uint RobustPhysicalAtomicCounter(uvec2 _373, uint _374, uint _375)
{
    uint _388;
    if (any(notEqual(_373, uvec2(0u))))
    {
        uint _386 = atomicAdd(uintPointer(_373).value, _374);
        _388 = _386 + _375;
    }
    else
    {
        _388 = 0u;
    }
    return _388;
}

void main()
{
    float _157;
    if (gl_GlobalInvocationID.x > 2u)
    {
        uint _64 = descriptor_qa_check(registers._m5, 8u, 1u);
        _157 = _47[_64]._m0[0u].x;
    }
    else
    {
        _157 = 0.0;
    }
    float _170;
    if (gl_GlobalInvocationID.x > 3u)
    {
        uint _165 = descriptor_qa_check(registers._m5 + (gl_GlobalInvocationID.x + 1u), 8u, 2u);
        _170 = _47[_165]._m0[0u].x + _157;
    }
    else
    {
        _170 = _157;
    }
    float _182;
    if (gl_GlobalInvocationID.x > 4u)
    {
        uint _176 = descriptor_qa_check(registers._m0, 1u, 3u);
        _182 = texelFetch(_13[_176], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), int(0u)).x + _170;
    }
    else
    {
        _182 = _170;
    }
    float _196;
    if (gl_GlobalInvocationID.x > 5u)
    {
        uint _190 = descriptor_qa_check(registers._m0 + ((gl_GlobalInvocationID.x & 1u) + 1u), 1u, 4u);
        _196 = texelFetch(_13[_190], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), int(0u)).x + _182;
    }
    else
    {
        _196 = _182;
    }
    float _209;
    if (gl_GlobalInvocationID.x > 6u)
    {
        uint _203 = descriptor_qa_check(registers._m1 + 3u, 16u, 5u);
        _209 = texelFetch(_17[_203], int(gl_GlobalInvocationID.x)).x + _196;
    }
    else
    {
        _209 = _196;
    }
    float _222;
    if (gl_GlobalInvocationID.x > 7u)
    {
        uint _217 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 4u), 16u, 6u);
        _222 = texelFetch(_17[_217], int(gl_GlobalInvocationID.x)).x + _209;
    }
    else
    {
        _222 = _209;
    }
    float _236;
    if (gl_GlobalInvocationID.x > 8u)
    {
        uint _229 = descriptor_qa_check(registers._m1 + 6u, 16u, 7u);
        _236 = uintBitsToFloat(texelFetch(_21[_229], int(gl_GlobalInvocationID.x)).x) + _222;
    }
    else
    {
        _236 = _222;
    }
    float _250;
    if (gl_GlobalInvocationID.x > 9u)
    {
        uint _244 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 7u), 16u, 8u);
        _250 = uintBitsToFloat(texelFetch(_21[_244], int(gl_GlobalInvocationID.x)).x) + _236;
    }
    else
    {
        _250 = _236;
    }
    float _263;
    if (gl_GlobalInvocationID.x > 10u)
    {
        uint _256 = descriptor_qa_check(registers._m1 + 9u, 16u, 9u);
        _263 = uintBitsToFloat(texelFetch(_24[_256], int(gl_GlobalInvocationID.x)).x) + _250;
    }
    else
    {
        _263 = _250;
    }
    float _279;
    if (gl_GlobalInvocationID.x > 11u)
    {
        uint _272 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 10u), 16u, 10u);
        _279 = uintBitsToFloat(texelFetch(_24[_272], int(gl_GlobalInvocationID.x >> 2u)).x) + _263;
    }
    else
    {
        _279 = _263;
    }
    if (gl_GlobalInvocationID.x > 1u)
    {
        uint _285 = descriptor_qa_check(registers._m4, 32u, 11u);
        uint _291 = descriptor_qa_check(registers._m4, 256u, 12u);
        imageStore(_28[_285], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_279)));
    }
    if (gl_GlobalInvocationID.x > 30u)
    {
        uint _305 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 1u), 32u, 13u);
        imageStore(_28[_305], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_279)));
    }
    if (gl_GlobalInvocationID.x > 40u)
    {
        uint _316 = descriptor_qa_check(registers._m4 + 3u, 32u, 14u);
        imageStore(_36[_316], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_279)));
    }
    if (gl_GlobalInvocationID.x > 50u)
    {
        uint _332 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 4u), 32u, 15u);
        imageStore(_36[_332], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_279)));
    }
    if (gl_GlobalInvocationID.x > 80u)
    {
        uint _343 = descriptor_qa_check(registers._m3 + 6u, 2u, 16u);
        imageStore(_40[_343], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(_279));
    }
    if (gl_GlobalInvocationID.x > 90u)
    {
        uint _355 = descriptor_qa_check(registers._m3 + ((gl_GlobalInvocationID.x & 1u) + 7u), 2u, 17u);
        imageStore(_40[_355], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(_279));
    }
    uint _363 = descriptor_qa_check(registers._m4, 32u, 18u);
    uint _369 = descriptor_qa_check(registers._m4, 256u, 19u);
    uvec2 _371 = _33.counters[_369];
    uint _390 = RobustPhysicalAtomicCounter(_371, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 425
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %50
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %31 "AtomicCounters"
OpMemberName %31 0 "counters"
OpName %44 "BindlessCBV"
OpName %66 "DescriptorHeapGlobalQAData"
OpMemberName %66 0 "failed_shader_hash"
OpMemberName %66 1 "failed_offset"
OpMemberName %66 2 "failed_heap"
OpMemberName %66 3 "failed_cookie"
OpMemberName %66 4 "fault_atomic"
OpMemberName %66 5 "failed_instruction"
OpMemberName %66 6 "failed_descriptor_type_mask"
OpMemberName %66 7 "actual_descriptor_type_mask"
OpMemberName %66 8 "fault_type"
OpMemberName %66 9 "va_map_timestamp"
OpMemberName %66 10 "live_status_table"
OpName %68 "QAGlobalData"
OpName %77 "descriptor_qa_report_fault"
OpName %70 "fault_type"
OpName %71 "heap_offset"
OpName %72 "cookie"
OpName %73 "heap_index"
OpName %74 "descriptor_type"
OpName %75 "actual_descriptor_type"
OpName %76 "instruction"
OpName %105 "DescriptorHeapQAData"
OpMemberName %105 0 "descriptor_count"
OpMemberName %105 1 "heap_index"
OpMemberName %105 2 "cookies_descriptor_info"
OpName %107 "QAHeapData"
OpName %112 "descriptor_qa_check"
OpName %109 "heap_offset"
OpName %110 "descriptor_type_mask"
OpName %111 "instruction"
OpName %376 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 1
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %28 DescriptorSet 4
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %30 ArrayStride 8
OpDecorate %31 Block
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 0 NonWritable
OpDecorate %33 DescriptorSet 7
OpDecorate %33 Binding 0
OpDecorate %33 AliasedPointer
OpDecorate %36 DescriptorSet 4
OpDecorate %36 Binding 0
OpDecorate %36 NonReadable
OpDecorate %40 DescriptorSet 3
OpDecorate %40 Binding 0
OpDecorate %40 NonReadable
OpDecorate %43 ArrayStride 16
OpDecorate %44 Block
OpMemberDecorate %44 0 NonWritable
OpMemberDecorate %44 0 Offset 0
OpDecorate %47 DescriptorSet 5
OpDecorate %47 Binding 0
OpDecorate %50 BuiltIn GlobalInvocationId
OpDecorate %65 ArrayStride 4
OpMemberDecorate %66 0 Offset 0
OpMemberDecorate %66 1 Offset 8
OpMemberDecorate %66 2 Offset 12
OpMemberDecorate %66 3 Offset 16
OpMemberDecorate %66 4 Offset 20
OpMemberDecorate %66 5 Offset 24
OpMemberDecorate %66 6 Offset 28
OpMemberDecorate %66 7 Offset 32
OpMemberDecorate %66 8 Offset 36
OpMemberDecorate %66 9 Offset 40
OpMemberDecorate %66 10 Offset 44
OpDecorate %66 Block
OpDecorate %68 DescriptorSet 10
OpDecorate %68 Binding 10
OpDecorate %104 ArrayStride 12
OpMemberDecorate %105 0 Offset 0
OpMemberDecorate %105 1 Offset 4
OpMemberDecorate %105 2 Offset 8
OpDecorate %105 Block
OpDecorate %107 DescriptorSet 10
OpDecorate %107 Binding 11
OpDecorate %107 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%26 = OpTypeRuntimeArray %25
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeVector %5 2
%30 = OpTypeRuntimeArray %29
%31 = OpTypeStruct %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeRuntimeArray %25
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeImage %9 2D 0 0 0 2 Unknown
%38 = OpTypeRuntimeArray %37
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeVector %9 4
%42 = OpConstant %5 4096
%43 = OpTypeArray %41 %42
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer StorageBuffer %45
%47 = OpVariable %46 StorageBuffer
%48 = OpTypeVector %5 3
%49 = OpTypePointer Input %48
%50 = OpVariable %49 Input
%51 = OpTypePointer Input %5
%53 = OpConstant %5 0
%55 = OpTypeBool
%57 = OpConstant %5 2
%58 = OpTypePointer StorageBuffer %44
%60 = OpTypePointer PushConstant %5
%62 = OpConstant %5 5
%65 = OpTypeRuntimeArray %5
%66 = OpTypeStruct %29 %5 %5 %5 %5 %5 %5 %5 %5 %5 %65
%67 = OpTypePointer StorageBuffer %66
%68 = OpVariable %67 StorageBuffer
%69 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%79 = OpTypePointer StorageBuffer %5
%81 = OpConstant %5 4
%83 = OpConstant %5 1
%88 = OpConstant %5 3
%92 = OpConstant %5 6
%94 = OpConstant %5 7
%96 = OpConstant %5 3735928559
%97 = OpConstantComposite %29 %96 %53
%98 = OpTypePointer StorageBuffer %29
%100 = OpConstant %5 72
%102 = OpConstant %5 8
%104 = OpTypeRuntimeArray %48
%105 = OpTypeStruct %5 %5 %104
%106 = OpTypePointer StorageBuffer %105
%107 = OpVariable %106 StorageBuffer
%108 = OpTypeFunction %5 %5 %5 %5
%119 = OpConstant %5 9
%121 = OpTypePointer StorageBuffer %48
%129 = OpConstant %5 31
%131 = OpConstant %5 10
%153 = OpTypePointer StorageBuffer %41
%158 = OpConstant %9 0
%172 = OpTypePointer UniformConstant %10
%198 = OpTypePointer UniformConstant %14
%204 = OpConstant %5 16
%224 = OpTypePointer UniformConstant %18
%231 = OpTypeVector %5 4
%265 = OpConstant %5 11
%281 = OpTypePointer UniformConstant %25
%286 = OpConstant %5 32
%292 = OpConstant %5 256
%293 = OpConstant %5 12
%298 = OpConstant %5 30
%306 = OpConstant %5 13
%311 = OpConstant %5 40
%317 = OpConstant %5 14
%323 = OpConstant %5 50
%333 = OpConstant %5 15
%337 = OpConstant %5 80
%338 = OpTypePointer UniformConstant %37
%348 = OpConstant %5 90
%356 = OpConstant %5 17
%364 = OpConstant %5 18
%370 = OpConstant %5 19
%372 = OpTypeFunction %5 %29 %5 %5
%380 = OpTypeVector %55 2
%381 = OpConstantNull %29
%384 = OpTypePointer PhysicalStorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %391
%391 = OpLabel
%52 = OpAccessChain %51 %50 %53
%54 = OpLoad %5 %52
%56 = OpUGreaterThan %55 %54 %57
OpSelectionMerge %393 None
OpBranchConditional %56 %392 %393
%392 = OpLabel
%61 = OpAccessChain %60 %8 %62
%63 = OpLoad %5 %61
%64 = OpFunctionCall %5 %112 %63 %102 %83
%59 = OpAccessChain %58 %47 %64
%154 = OpAccessChain %153 %59 %53 %53
%155 = OpLoad %41 %154
%156 = OpCompositeExtract %9 %155 0
OpBranch %393
%393 = OpLabel
%157 = OpPhi %9 %158 %391 %156 %392
%159 = OpUGreaterThan %55 %54 %88
OpSelectionMerge %395 None
OpBranchConditional %159 %394 %395
%394 = OpLabel
%160 = OpIAdd %5 %54 %83
%162 = OpAccessChain %60 %8 %62
%163 = OpLoad %5 %162
%164 = OpIAdd %5 %163 %160
%165 = OpFunctionCall %5 %112 %164 %102 %57
%161 = OpAccessChain %58 %47 %165
%166 = OpAccessChain %153 %161 %53 %53
%167 = OpLoad %41 %166
%168 = OpCompositeExtract %9 %167 0
%169 = OpFAdd %9 %168 %157
OpBranch %395
%395 = OpLabel
%170 = OpPhi %9 %157 %393 %169 %394
%171 = OpUGreaterThan %55 %54 %81
OpSelectionMerge %397 None
OpBranchConditional %171 %396 %397
%396 = OpLabel
%174 = OpAccessChain %60 %8 %53
%175 = OpLoad %5 %174
%176 = OpFunctionCall %5 %112 %175 %83 %88
%173 = OpAccessChain %172 %13 %176
%177 = OpLoad %10 %173
%179 = OpCompositeConstruct %29 %54 %53
%178 = OpImageFetch %41 %177 %179 Lod %53
%180 = OpCompositeExtract %9 %178 0
%181 = OpFAdd %9 %180 %170
OpBranch %397
%397 = OpLabel
%182 = OpPhi %9 %170 %395 %181 %396
%183 = OpUGreaterThan %55 %54 %62
OpSelectionMerge %399 None
OpBranchConditional %183 %398 %399
%398 = OpLabel
%184 = OpBitwiseAnd %5 %54 %83
%185 = OpIAdd %5 %184 %83
%187 = OpAccessChain %60 %8 %53
%188 = OpLoad %5 %187
%189 = OpIAdd %5 %188 %185
%190 = OpFunctionCall %5 %112 %189 %83 %81
%186 = OpAccessChain %172 %13 %190
%191 = OpLoad %10 %186
%193 = OpCompositeConstruct %29 %53 %54
%192 = OpImageFetch %41 %191 %193 Lod %53
%194 = OpCompositeExtract %9 %192 0
%195 = OpFAdd %9 %194 %182
OpBranch %399
%399 = OpLabel
%196 = OpPhi %9 %182 %397 %195 %398
%197 = OpUGreaterThan %55 %54 %92
OpSelectionMerge %401 None
OpBranchConditional %197 %400 %401
%400 = OpLabel
%200 = OpAccessChain %60 %8 %83
%201 = OpLoad %5 %200
%202 = OpIAdd %5 %201 %88
%203 = OpFunctionCall %5 %112 %202 %204 %62
%199 = OpAccessChain %198 %17 %203
%205 = OpLoad %14 %199
%206 = OpImageFetch %41 %205 %54
%207 = OpCompositeExtract %9 %206 0
%208 = OpFAdd %9 %207 %196
OpBranch %401
%401 = OpLabel
%209 = OpPhi %9 %196 %399 %208 %400
%210 = OpUGreaterThan %55 %54 %94
OpSelectionMerge %403 None
OpBranchConditional %210 %402 %403
%402 = OpLabel
%211 = OpBitwiseAnd %5 %54 %83
%212 = OpIAdd %5 %211 %81
%214 = OpAccessChain %60 %8 %83
%215 = OpLoad %5 %214
%216 = OpIAdd %5 %215 %212
%217 = OpFunctionCall %5 %112 %216 %204 %92
%213 = OpAccessChain %198 %17 %217
%218 = OpLoad %14 %213
%219 = OpImageFetch %41 %218 %54
%220 = OpCompositeExtract %9 %219 0
%221 = OpFAdd %9 %220 %209
OpBranch %403
%403 = OpLabel
%222 = OpPhi %9 %209 %401 %221 %402
%223 = OpUGreaterThan %55 %54 %102
OpSelectionMerge %405 None
OpBranchConditional %223 %404 %405
%404 = OpLabel
%226 = OpAccessChain %60 %8 %83
%227 = OpLoad %5 %226
%228 = OpIAdd %5 %227 %92
%229 = OpFunctionCall %5 %112 %228 %204 %94
%225 = OpAccessChain %224 %21 %229
%230 = OpLoad %18 %225
%232 = OpImageFetch %231 %230 %54
%233 = OpCompositeExtract %5 %232 0
%234 = OpBitcast %9 %233
%235 = OpFAdd %9 %234 %222
OpBranch %405
%405 = OpLabel
%236 = OpPhi %9 %222 %403 %235 %404
%237 = OpUGreaterThan %55 %54 %119
OpSelectionMerge %407 None
OpBranchConditional %237 %406 %407
%406 = OpLabel
%238 = OpBitwiseAnd %5 %54 %83
%239 = OpIAdd %5 %238 %94
%241 = OpAccessChain %60 %8 %83
%242 = OpLoad %5 %241
%243 = OpIAdd %5 %242 %239
%244 = OpFunctionCall %5 %112 %243 %204 %102
%240 = OpAccessChain %224 %21 %244
%245 = OpLoad %18 %240
%246 = OpImageFetch %231 %245 %54
%247 = OpCompositeExtract %5 %246 0
%248 = OpBitcast %9 %247
%249 = OpFAdd %9 %248 %236
OpBranch %407
%407 = OpLabel
%250 = OpPhi %9 %236 %405 %249 %406
%251 = OpUGreaterThan %55 %54 %131
OpSelectionMerge %409 None
OpBranchConditional %251 %408 %409
%408 = OpLabel
%253 = OpAccessChain %60 %8 %83
%254 = OpLoad %5 %253
%255 = OpIAdd %5 %254 %119
%256 = OpFunctionCall %5 %112 %255 %204 %119
%252 = OpAccessChain %224 %24 %256
%257 = OpLoad %18 %252
%258 = OpShiftLeftLogical %5 %54 %57
%259 = OpImageFetch %231 %257 %54
%260 = OpCompositeExtract %5 %259 0
%261 = OpBitcast %9 %260
%262 = OpFAdd %9 %261 %250
OpBranch %409
%409 = OpLabel
%263 = OpPhi %9 %250 %407 %262 %408
%264 = OpUGreaterThan %55 %54 %265
OpSelectionMerge %411 None
OpBranchConditional %264 %410 %411
%410 = OpLabel
%266 = OpBitwiseAnd %5 %54 %83
%267 = OpIAdd %5 %266 %131
%269 = OpAccessChain %60 %8 %83
%270 = OpLoad %5 %269
%271 = OpIAdd %5 %270 %267
%272 = OpFunctionCall %5 %112 %271 %204 %131
%268 = OpAccessChain %224 %24 %272
%273 = OpLoad %18 %268
%274 = OpShiftRightLogical %5 %54 %57
%275 = OpImageFetch %231 %273 %274
%276 = OpCompositeExtract %5 %275 0
%277 = OpBitcast %9 %276
%278 = OpFAdd %9 %277 %263
OpBranch %411
%411 = OpLabel
%279 = OpPhi %9 %263 %409 %278 %410
%280 = OpUGreaterThan %55 %54 %83
OpSelectionMerge %413 None
OpBranchConditional %280 %412 %413
%412 = OpLabel
%283 = OpAccessChain %60 %8 %81
%284 = OpLoad %5 %283
%285 = OpFunctionCall %5 %112 %284 %286 %265
%282 = OpAccessChain %281 %28 %285
%287 = OpLoad %25 %282
%289 = OpAccessChain %60 %8 %81
%290 = OpLoad %5 %289
%291 = OpFunctionCall %5 %112 %290 %292 %293
%288 = OpAccessChain %98 %33 %53 %291
%294 = OpLoad %29 %288
%295 = OpBitcast %5 %279
%296 = OpCompositeConstruct %231 %295 %295 %295 %295
OpImageWrite %287 %54 %296
OpBranch %413
%413 = OpLabel
%297 = OpUGreaterThan %55 %54 %298
OpSelectionMerge %415 None
OpBranchConditional %297 %414 %415
%414 = OpLabel
%299 = OpBitwiseAnd %5 %54 %83
%300 = OpIAdd %5 %299 %83
%302 = OpAccessChain %60 %8 %81
%303 = OpLoad %5 %302
%304 = OpIAdd %5 %303 %300
%305 = OpFunctionCall %5 %112 %304 %286 %306
%301 = OpAccessChain %281 %28 %305
%307 = OpLoad %25 %301
%308 = OpBitcast %5 %279
%309 = OpCompositeConstruct %231 %308 %308 %308 %308
OpImageWrite %307 %54 %309
OpBranch %415
%415 = OpLabel
%310 = OpUGreaterThan %55 %54 %311
OpSelectionMerge %417 None
OpBranchConditional %310 %416 %417
%416 = OpLabel
%313 = OpAccessChain %60 %8 %81
%314 = OpLoad %5 %313
%315 = OpIAdd %5 %314 %88
%316 = OpFunctionCall %5 %112 %315 %286 %317
%312 = OpAccessChain %281 %36 %316
%318 = OpLoad %25 %312
%319 = OpBitcast %5 %279
%320 = OpShiftLeftLogical %5 %54 %57
%321 = OpCompositeConstruct %231 %319 %319 %319 %319
OpImageWrite %318 %54 %321
OpBranch %417
%417 = OpLabel
%322 = OpUGreaterThan %55 %54 %323
OpSelectionMerge %419 None
OpBranchConditional %322 %418 %419
%418 = OpLabel
%324 = OpBitwiseAnd %5 %54 %83
%325 = OpBitcast %5 %279
%326 = OpShiftLeftLogical %5 %54 %57
%327 = OpIAdd %5 %324 %81
%329 = OpAccessChain %60 %8 %81
%330 = OpLoad %5 %329
%331 = OpIAdd %5 %330 %327
%332 = OpFunctionCall %5 %112 %331 %286 %333
%328 = OpAccessChain %281 %36 %332
%334 = OpLoad %25 %328
%335 = OpCompositeConstruct %231 %325 %325 %325 %325
OpImageWrite %334 %54 %335
OpBranch %419
%419 = OpLabel
%336 = OpUGreaterThan %55 %54 %337
OpSelectionMerge %421 None
OpBranchConditional %336 %420 %421
%420 = OpLabel
%340 = OpAccessChain %60 %8 %88
%341 = OpLoad %5 %340
%342 = OpIAdd %5 %341 %92
%343 = OpFunctionCall %5 %112 %342 %57 %204
%339 = OpAccessChain %338 %40 %343
%344 = OpLoad %37 %339
%345 = OpCompositeConstruct %29 %54 %53
%346 = OpCompositeConstruct %41 %279 %279 %279 %279
OpImageWrite %344 %345 %346
OpBranch %421
%421 = OpLabel
%347 = OpUGreaterThan %55 %54 %348
OpSelectionMerge %423 None
OpBranchConditional %347 %422 %423
%422 = OpLabel
%349 = OpBitwiseAnd %5 %54 %83
%350 = OpIAdd %5 %349 %94
%352 = OpAccessChain %60 %8 %88
%353 = OpLoad %5 %352
%354 = OpIAdd %5 %353 %350
%355 = OpFunctionCall %5 %112 %354 %57 %356
%351 = OpAccessChain %338 %40 %355
%357 = OpLoad %37 %351
%358 = OpCompositeConstruct %29 %53 %54
%359 = OpCompositeConstruct %41 %279 %279 %279 %279
OpImageWrite %357 %358 %359
OpBranch %423
%423 = OpLabel
%361 = OpAccessChain %60 %8 %81
%362 = OpLoad %5 %361
%363 = OpFunctionCall %5 %112 %362 %286 %364
%360 = OpAccessChain %281 %28 %363
%365 = OpLoad %25 %360
%367 = OpAccessChain %60 %8 %81
%368 = OpLoad %5 %367
%369 = OpFunctionCall %5 %112 %368 %292 %370
%366 = OpAccessChain %98 %33 %53 %369
%371 = OpLoad %29 %366
%390 = OpFunctionCall %5 %376 %371 %83 %53
OpReturn
OpFunctionEnd
%77 = OpFunction %1 None %69
%70 = OpFunctionParameter %5
%71 = OpFunctionParameter %5
%72 = OpFunctionParameter %5
%73 = OpFunctionParameter %5
%74 = OpFunctionParameter %5
%75 = OpFunctionParameter %5
%76 = OpFunctionParameter %5
%78 = OpLabel
%80 = OpAccessChain %79 %68 %81
%82 = OpAtomicIAdd %5 %80 %83 %53 %83
%84 = OpIEqual %55 %82 %53
OpSelectionMerge %86 None
OpBranchConditional %84 %85 %86
%85 = OpLabel
%87 = OpAccessChain %79 %68 %88
OpStore %87 %72
%89 = OpAccessChain %79 %68 %83
OpStore %89 %71
%90 = OpAccessChain %79 %68 %57
OpStore %90 %73
%91 = OpAccessChain %79 %68 %92
OpStore %91 %74
%93 = OpAccessChain %79 %68 %94
OpStore %93 %75
%95 = OpAccessChain %79 %68 %62
OpStore %95 %76
%99 = OpAccessChain %98 %68 %53
OpStore %99 %97
OpMemoryBarrier %83 %100
%101 = OpAccessChain %79 %68 %102
OpStore %101 %70
OpBranch %86
%86 = OpLabel
OpReturn
OpFunctionEnd
%112 = OpFunction %5 None %108
%109 = OpFunctionParameter %5
%110 = OpFunctionParameter %5
%111 = OpFunctionParameter %5
%113 = OpLabel
%114 = OpAccessChain %79 %107 %53
%115 = OpLoad %5 %114
%116 = OpAccessChain %79 %107 %83
%117 = OpLoad %5 %116
%118 = OpAccessChain %79 %68 %119
%120 = OpLoad %5 %118
%122 = OpAccessChain %121 %107 %57 %109
%123 = OpLoad %48 %122
%124 = OpCompositeExtract %5 %123 0
%125 = OpCompositeExtract %5 %123 1
%126 = OpCompositeExtract %5 %123 2
%127 = OpShiftRightLogical %5 %124 %62
%128 = OpBitwiseAnd %5 %124 %129
%130 = OpAccessChain %79 %68 %131 %127
%132 = OpLoad %5 %130
%133 = OpShiftLeftLogical %5 %83 %128
%134 = OpBitwiseAnd %5 %132 %133
%135 = OpINotEqual %55 %134 %53
%136 = OpBitwiseAnd %5 %126 %110
%137 = OpIEqual %55 %136 %110
%138 = OpUGreaterThanEqual %55 %109 %115
%139 = OpSelect %5 %138 %83 %53
%140 = OpSelect %5 %137 %53 %57
%141 = OpSelect %5 %135 %53 %81
%142 = OpUGreaterThanEqual %55 %120 %125
%143 = OpSelect %5 %142 %53 %102
%144 = OpBitwiseOr %5 %139 %140
%145 = OpBitwiseOr %5 %144 %141
%146 = OpBitwiseOr %5 %145 %143
%147 = OpINotEqual %55 %146 %53
OpSelectionMerge %149 None
OpBranchConditional %147 %148 %149
%148 = OpLabel
%150 = OpFunctionCall %1 %77 %146 %109 %124 %117 %110 %126 %111
OpReturnValue %115
%149 = OpLabel
OpReturnValue %109
OpFunctionEnd
%376 = OpFunction %5 None %372
%373 = OpFunctionParameter %29
%374 = OpFunctionParameter %5
%375 = OpFunctionParameter %5
%377 = OpLabel
%382 = OpINotEqual %380 %373 %381
%383 = OpAny %55 %382
OpSelectionMerge %379 None
OpBranchConditional %383 %378 %379
%378 = OpLabel
%385 = OpBitcast %384 %373
%386 = OpAtomicIAdd %5 %385 %83 %53 %374
%387 = OpIAdd %5 %386 %375
OpBranch %379
%379 = OpLabel
%388 = OpPhi %5 %53 %377 %387 %378
OpReturnValue %388
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _33;

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _47[];

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 1, binding = 0) uniform usamplerBuffer _21[];
layout(set = 1, binding = 0) uniform usamplerBuffer _24[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _28[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _36[];
layout(set = 3, binding = 0) uniform writeonly image2D _40[];
layout(set = 2, binding = 0) uniform sampler _51[];

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _86 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_86 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _119 = QAHeapData.descriptor_count;
    uint _121 = QAHeapData.heap_index;
    uint _124 = QAGlobalData.va_map_timestamp;
    uvec3 _127 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _136 = QAGlobalData.live_status_table[_127.x >> 5u];
    uint _150 = ((uint(heap_offset >= _119) | (((_127.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_136 & (1u << (_127.x & 31u))) != 0u) ? 0u : 4u)) | ((_124 >= _127.y) ? 0u : 8u);
    if (_150 != 0u)
    {
        descriptor_qa_report_fault(_150, heap_offset, _127.x, _121, descriptor_type_mask, _127.z, instruction);
        return _119;
    }
    return heap_offset;
}

uint RobustPhysicalAtomicCounter(uvec2 _386, uint _387, uint _388)
{
    uint _401;
    if (any(notEqual(_386, uvec2(0u))))
    {
        uint _399 = atomicAdd(uintPointer(_386).value, _387);
        _401 = _399 + _388;
    }
    else
    {
        _401 = 0u;
    }
    return _401;
}

void main()
{
    float _161;
    if (gl_GlobalInvocationID.x > 2u)
    {
        uint _68 = descriptor_qa_check(registers._m5, 4u, 1u);
        _161 = _47[_68]._m0[0u].x;
    }
    else
    {
        _161 = 0.0;
    }
    float _174;
    if (gl_GlobalInvocationID.x > 3u)
    {
        uint _169 = descriptor_qa_check(registers._m5 + (gl_GlobalInvocationID.x + 1u), 4u, 2u);
        _174 = _47[_169]._m0[0u].x + _161;
    }
    else
    {
        _174 = _161;
    }
    float _195;
    if (gl_GlobalInvocationID.x > 4u)
    {
        uint _180 = descriptor_qa_check(registers._m0, 1u, 3u);
        _195 = textureLod(sampler2D(_13[_180], _51[registers._m2]), vec2(0.5), 0.0).x + _174;
    }
    else
    {
        _195 = _174;
    }
    float _209;
    if (gl_GlobalInvocationID.x > 5u)
    {
        uint _203 = descriptor_qa_check(registers._m0 + ((gl_GlobalInvocationID.x & 1u) + 1u), 1u, 4u);
        _209 = texelFetch(_13[_203], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), int(0u)).x + _195;
    }
    else
    {
        _209 = _195;
    }
    float _222;
    if (gl_GlobalInvocationID.x > 6u)
    {
        uint _216 = descriptor_qa_check(registers._m1 + 3u, 16u, 5u);
        _222 = texelFetch(_17[_216], int(gl_GlobalInvocationID.x)).x + _209;
    }
    else
    {
        _222 = _209;
    }
    float _235;
    if (gl_GlobalInvocationID.x > 7u)
    {
        uint _230 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 4u), 16u, 6u);
        _235 = texelFetch(_17[_230], int(gl_GlobalInvocationID.x)).x + _222;
    }
    else
    {
        _235 = _222;
    }
    float _249;
    if (gl_GlobalInvocationID.x > 8u)
    {
        uint _242 = descriptor_qa_check(registers._m1 + 6u, 16u, 7u);
        _249 = uintBitsToFloat(texelFetch(_21[_242], int(gl_GlobalInvocationID.x)).x) + _235;
    }
    else
    {
        _249 = _235;
    }
    float _263;
    if (gl_GlobalInvocationID.x > 9u)
    {
        uint _257 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 7u), 16u, 8u);
        _263 = uintBitsToFloat(texelFetch(_21[_257], int(gl_GlobalInvocationID.x)).x) + _249;
    }
    else
    {
        _263 = _249;
    }
    float _276;
    if (gl_GlobalInvocationID.x > 10u)
    {
        uint _269 = descriptor_qa_check(registers._m1 + 9u, 16u, 9u);
        _276 = uintBitsToFloat(texelFetch(_24[_269], int(gl_GlobalInvocationID.x)).x) + _263;
    }
    else
    {
        _276 = _263;
    }
    float _292;
    if (gl_GlobalInvocationID.x > 11u)
    {
        uint _285 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 10u), 16u, 10u);
        _292 = uintBitsToFloat(texelFetch(_24[_285], int(gl_GlobalInvocationID.x >> 2u)).x) + _276;
    }
    else
    {
        _292 = _276;
    }
    if (gl_GlobalInvocationID.x > 1u)
    {
        uint _298 = descriptor_qa_check(registers._m4, 32u, 11u);
        uint _304 = descriptor_qa_check(registers._m4, 256u, 12u);
        imageStore(_28[_298], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 30u)
    {
        uint _318 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 1u), 32u, 13u);
        imageStore(_28[_318], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 40u)
    {
        uint _329 = descriptor_qa_check(registers._m4 + 3u, 32u, 14u);
        imageStore(_36[_329], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 50u)
    {
        uint _345 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 4u), 32u, 15u);
        imageStore(_36[_345], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 80u)
    {
        uint _356 = descriptor_qa_check(registers._m3 + 6u, 2u, 16u);
        imageStore(_40[_356], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(_292));
    }
    if (gl_GlobalInvocationID.x > 90u)
    {
        uint _368 = descriptor_qa_check(registers._m3 + ((gl_GlobalInvocationID.x & 1u) + 7u), 2u, 17u);
        imageStore(_40[_368], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(_292));
    }
    uint _376 = descriptor_qa_check(registers._m4, 32u, 18u);
    uint _382 = descriptor_qa_check(registers._m4, 256u, 19u);
    uvec2 _384 = _33.counters[_382];
    uint _403 = RobustPhysicalAtomicCounter(_384, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 438
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %54
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %31 "AtomicCounters"
OpMemberName %31 0 "counters"
OpName %44 "BindlessCBV"
OpName %70 "DescriptorHeapGlobalQAData"
OpMemberName %70 0 "failed_shader_hash"
OpMemberName %70 1 "failed_offset"
OpMemberName %70 2 "failed_heap"
OpMemberName %70 3 "failed_cookie"
OpMemberName %70 4 "fault_atomic"
OpMemberName %70 5 "failed_instruction"
OpMemberName %70 6 "failed_descriptor_type_mask"
OpMemberName %70 7 "actual_descriptor_type_mask"
OpMemberName %70 8 "fault_type"
OpMemberName %70 9 "va_map_timestamp"
OpMemberName %70 10 "live_status_table"
OpName %72 "QAGlobalData"
OpName %81 "descriptor_qa_report_fault"
OpName %74 "fault_type"
OpName %75 "heap_offset"
OpName %76 "cookie"
OpName %77 "heap_index"
OpName %78 "descriptor_type"
OpName %79 "actual_descriptor_type"
OpName %80 "instruction"
OpName %109 "DescriptorHeapQAData"
OpMemberName %109 0 "descriptor_count"
OpMemberName %109 1 "heap_index"
OpMemberName %109 2 "cookies_descriptor_info"
OpName %111 "QAHeapData"
OpName %116 "descriptor_qa_check"
OpName %113 "heap_offset"
OpName %114 "descriptor_type_mask"
OpName %115 "instruction"
OpName %389 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 1
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %28 DescriptorSet 4
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %30 ArrayStride 8
OpDecorate %31 Block
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 0 NonWritable
OpDecorate %33 DescriptorSet 7
OpDecorate %33 Binding 0
OpDecorate %33 AliasedPointer
OpDecorate %36 DescriptorSet 4
OpDecorate %36 Binding 0
OpDecorate %36 NonReadable
OpDecorate %40 DescriptorSet 3
OpDecorate %40 Binding 0
OpDecorate %40 NonReadable
OpDecorate %43 ArrayStride 16
OpDecorate %44 Block
OpMemberDecorate %44 0 Offset 0
OpDecorate %47 DescriptorSet 5
OpDecorate %47 Binding 0
OpDecorate %51 DescriptorSet 2
OpDecorate %51 Binding 0
OpDecorate %54 BuiltIn GlobalInvocationId
OpDecorate %69 ArrayStride 4
OpMemberDecorate %70 0 Offset 0
OpMemberDecorate %70 1 Offset 8
OpMemberDecorate %70 2 Offset 12
OpMemberDecorate %70 3 Offset 16
OpMemberDecorate %70 4 Offset 20
OpMemberDecorate %70 5 Offset 24
OpMemberDecorate %70 6 Offset 28
OpMemberDecorate %70 7 Offset 32
OpMemberDecorate %70 8 Offset 36
OpMemberDecorate %70 9 Offset 40
OpMemberDecorate %70 10 Offset 44
OpDecorate %70 Block
OpDecorate %72 DescriptorSet 10
OpDecorate %72 Binding 10
OpDecorate %108 ArrayStride 12
OpMemberDecorate %109 0 Offset 0
OpMemberDecorate %109 1 Offset 4
OpMemberDecorate %109 2 Offset 8
OpDecorate %109 Block
OpDecorate %111 DescriptorSet 10
OpDecorate %111 Binding 11
OpDecorate %111 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%26 = OpTypeRuntimeArray %25
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeVector %5 2
%30 = OpTypeRuntimeArray %29
%31 = OpTypeStruct %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeRuntimeArray %25
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeImage %9 2D 0 0 0 2 Unknown
%38 = OpTypeRuntimeArray %37
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeVector %9 4
%42 = OpConstant %5 4096
%43 = OpTypeArray %41 %42
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer Uniform %45
%47 = OpVariable %46 Uniform
%48 = OpTypeSampler
%49 = OpTypeRuntimeArray %48
%50 = OpTypePointer UniformConstant %49
%51 = OpVariable %50 UniformConstant
%52 = OpTypeVector %5 3
%53 = OpTypePointer Input %52
%54 = OpVariable %53 Input
%55 = OpTypePointer Input %5
%57 = OpConstant %5 0
%59 = OpTypeBool
%61 = OpConstant %5 2
%62 = OpTypePointer Uniform %44
%64 = OpTypePointer PushConstant %5
%66 = OpConstant %5 5
%69 = OpTypeRuntimeArray %5
%70 = OpTypeStruct %29 %5 %5 %5 %5 %5 %5 %5 %5 %5 %69
%71 = OpTypePointer StorageBuffer %70
%72 = OpVariable %71 StorageBuffer
%73 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%83 = OpTypePointer StorageBuffer %5
%85 = OpConstant %5 4
%87 = OpConstant %5 1
%92 = OpConstant %5 3
%96 = OpConstant %5 6
%98 = OpConstant %5 7
%100 = OpConstant %5 3735928559
%101 = OpConstantComposite %29 %100 %57
%102 = OpTypePointer StorageBuffer %29
%104 = OpConstant %5 72
%106 = OpConstant %5 8
%108 = OpTypeRuntimeArray %52
%109 = OpTypeStruct %5 %5 %108
%110 = OpTypePointer StorageBuffer %109
%111 = OpVariable %110 StorageBuffer
%112 = OpTypeFunction %5 %5 %5 %5
%123 = OpConstant %5 9
%125 = OpTypePointer StorageBuffer %52
%133 = OpConstant %5 31
%135 = OpConstant %5 10
%157 = OpTypePointer Uniform %41
%162 = OpConstant %9 0
%176 = OpTypePointer UniformConstant %10
%182 = OpTypePointer UniformConstant %48
%187 = OpTypeSampledImage %10
%189 = OpConstant %9 0.5
%191 = OpTypeVector %9 2
%211 = OpTypePointer UniformConstant %14
%217 = OpConstant %5 16
%237 = OpTypePointer UniformConstant %18
%244 = OpTypeVector %5 4
%278 = OpConstant %5 11
%294 = OpTypePointer UniformConstant %25
%299 = OpConstant %5 32
%305 = OpConstant %5 256
%306 = OpConstant %5 12
%311 = OpConstant %5 30
%319 = OpConstant %5 13
%324 = OpConstant %5 40
%330 = OpConstant %5 14
%336 = OpConstant %5 50
%346 = OpConstant %5 15
%350 = OpConstant %5 80
%351 = OpTypePointer UniformConstant %37
%361 = OpConstant %5 90
%369 = OpConstant %5 17
%377 = OpConstant %5 18
%383 = OpConstant %5 19
%385 = OpTypeFunction %5 %29 %5 %5
%393 = OpTypeVector %59 2
%394 = OpConstantNull %29
%397 = OpTypePointer PhysicalStorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %404
%404 = OpLabel
%56 = OpAccessChain %55 %54 %57
%58 = OpLoad %5 %56
%60 = OpUGreaterThan %59 %58 %61
OpSelectionMerge %406 None
OpBranchConditional %60 %405 %406
%405 = OpLabel
%65 = OpAccessChain %64 %8 %66
%67 = OpLoad %5 %65
%68 = OpFunctionCall %5 %116 %67 %85 %87
%63 = OpAccessChain %62 %47 %68
%158 = OpAccessChain %157 %63 %57 %57
%159 = OpLoad %41 %158
%160 = OpCompositeExtract %9 %159 0
OpBranch %406
%406 = OpLabel
%161 = OpPhi %9 %162 %404 %160 %405
%163 = OpUGreaterThan %59 %58 %92
OpSelectionMerge %408 None
OpBranchConditional %163 %407 %408
%407 = OpLabel
%164 = OpIAdd %5 %58 %87
%166 = OpAccessChain %64 %8 %66
%167 = OpLoad %5 %166
%168 = OpIAdd %5 %167 %164
%169 = OpFunctionCall %5 %116 %168 %85 %61
%165 = OpAccessChain %62 %47 %169
%170 = OpAccessChain %157 %165 %57 %57
%171 = OpLoad %41 %170
%172 = OpCompositeExtract %9 %171 0
%173 = OpFAdd %9 %172 %161
OpBranch %408
%408 = OpLabel
%174 = OpPhi %9 %161 %406 %173 %407
%175 = OpUGreaterThan %59 %58 %85
OpSelectionMerge %410 None
OpBranchConditional %175 %409 %410
%409 = OpLabel
%178 = OpAccessChain %64 %8 %57
%179 = OpLoad %5 %178
%180 = OpFunctionCall %5 %116 %179 %87 %92
%177 = OpAccessChain %176 %13 %180
%181 = OpLoad %10 %177
%184 = OpAccessChain %64 %8 %61
%185 = OpLoad %5 %184
%183 = OpAccessChain %182 %51 %185
%186 = OpLoad %48 %183
%188 = OpSampledImage %187 %181 %186
%192 = OpCompositeConstruct %191 %189 %189
%190 = OpImageSampleExplicitLod %41 %188 %192 Lod %162
%193 = OpCompositeExtract %9 %190 0
%194 = OpFAdd %9 %193 %174
OpBranch %410
%410 = OpLabel
%195 = OpPhi %9 %174 %408 %194 %409
%196 = OpUGreaterThan %59 %58 %66
OpSelectionMerge %412 None
OpBranchConditional %196 %411 %412
%411 = OpLabel
%197 = OpBitwiseAnd %5 %58 %87
%198 = OpIAdd %5 %197 %87
%200 = OpAccessChain %64 %8 %57
%201 = OpLoad %5 %200
%202 = OpIAdd %5 %201 %198
%203 = OpFunctionCall %5 %116 %202 %87 %85
%199 = OpAccessChain %176 %13 %203
%204 = OpLoad %10 %199
%206 = OpCompositeConstruct %29 %57 %58
%205 = OpImageFetch %41 %204 %206 Lod %57
%207 = OpCompositeExtract %9 %205 0
%208 = OpFAdd %9 %207 %195
OpBranch %412
%412 = OpLabel
%209 = OpPhi %9 %195 %410 %208 %411
%210 = OpUGreaterThan %59 %58 %96
OpSelectionMerge %414 None
OpBranchConditional %210 %413 %414
%413 = OpLabel
%213 = OpAccessChain %64 %8 %87
%214 = OpLoad %5 %213
%215 = OpIAdd %5 %214 %92
%216 = OpFunctionCall %5 %116 %215 %217 %66
%212 = OpAccessChain %211 %17 %216
%218 = OpLoad %14 %212
%219 = OpImageFetch %41 %218 %58
%220 = OpCompositeExtract %9 %219 0
%221 = OpFAdd %9 %220 %209
OpBranch %414
%414 = OpLabel
%222 = OpPhi %9 %209 %412 %221 %413
%223 = OpUGreaterThan %59 %58 %98
OpSelectionMerge %416 None
OpBranchConditional %223 %415 %416
%415 = OpLabel
%224 = OpBitwiseAnd %5 %58 %87
%225 = OpIAdd %5 %224 %85
%227 = OpAccessChain %64 %8 %87
%228 = OpLoad %5 %227
%229 = OpIAdd %5 %228 %225
%230 = OpFunctionCall %5 %116 %229 %217 %96
%226 = OpAccessChain %211 %17 %230
%231 = OpLoad %14 %226
%232 = OpImageFetch %41 %231 %58
%233 = OpCompositeExtract %9 %232 0
%234 = OpFAdd %9 %233 %222
OpBranch %416
%416 = OpLabel
%235 = OpPhi %9 %222 %414 %234 %415
%236 = OpUGreaterThan %59 %58 %106
OpSelectionMerge %418 None
OpBranchConditional %236 %417 %418
%417 = OpLabel
%239 = OpAccessChain %64 %8 %87
%240 = OpLoad %5 %239
%241 = OpIAdd %5 %240 %96
%242 = OpFunctionCall %5 %116 %241 %217 %98
%238 = OpAccessChain %237 %21 %242
%243 = OpLoad %18 %238
%245 = OpImageFetch %244 %243 %58
%246 = OpCompositeExtract %5 %245 0
%247 = OpBitcast %9 %246
%248 = OpFAdd %9 %247 %235
OpBranch %418
%418 = OpLabel
%249 = OpPhi %9 %235 %416 %248 %417
%250 = OpUGreaterThan %59 %58 %123
OpSelectionMerge %420 None
OpBranchConditional %250 %419 %420
%419 = OpLabel
%251 = OpBitwiseAnd %5 %58 %87
%252 = OpIAdd %5 %251 %98
%254 = OpAccessChain %64 %8 %87
%255 = OpLoad %5 %254
%256 = OpIAdd %5 %255 %252
%257 = OpFunctionCall %5 %116 %256 %217 %106
%253 = OpAccessChain %237 %21 %257
%258 = OpLoad %18 %253
%259 = OpImageFetch %244 %258 %58
%260 = OpCompositeExtract %5 %259 0
%261 = OpBitcast %9 %260
%262 = OpFAdd %9 %261 %249
OpBranch %420
%420 = OpLabel
%263 = OpPhi %9 %249 %418 %262 %419
%264 = OpUGreaterThan %59 %58 %135
OpSelectionMerge %422 None
OpBranchConditional %264 %421 %422
%421 = OpLabel
%266 = OpAccessChain %64 %8 %87
%267 = OpLoad %5 %266
%268 = OpIAdd %5 %267 %123
%269 = OpFunctionCall %5 %116 %268 %217 %123
%265 = OpAccessChain %237 %24 %269
%270 = OpLoad %18 %265
%271 = OpShiftLeftLogical %5 %58 %61
%272 = OpImageFetch %244 %270 %58
%273 = OpCompositeExtract %5 %272 0
%274 = OpBitcast %9 %273
%275 = OpFAdd %9 %274 %263
OpBranch %422
%422 = OpLabel
%276 = OpPhi %9 %263 %420 %275 %421
%277 = OpUGreaterThan %59 %58 %278
OpSelectionMerge %424 None
OpBranchConditional %277 %423 %424
%423 = OpLabel
%279 = OpBitwiseAnd %5 %58 %87
%280 = OpIAdd %5 %279 %135
%282 = OpAccessChain %64 %8 %87
%283 = OpLoad %5 %282
%284 = OpIAdd %5 %283 %280
%285 = OpFunctionCall %5 %116 %284 %217 %135
%281 = OpAccessChain %237 %24 %285
%286 = OpLoad %18 %281
%287 = OpShiftRightLogical %5 %58 %61
%288 = OpImageFetch %244 %286 %287
%289 = OpCompositeExtract %5 %288 0
%290 = OpBitcast %9 %289
%291 = OpFAdd %9 %290 %276
OpBranch %424
%424 = OpLabel
%292 = OpPhi %9 %276 %422 %291 %423
%293 = OpUGreaterThan %59 %58 %87
OpSelectionMerge %426 None
OpBranchConditional %293 %425 %426
%425 = OpLabel
%296 = OpAccessChain %64 %8 %85
%297 = OpLoad %5 %296
%298 = OpFunctionCall %5 %116 %297 %299 %278
%295 = OpAccessChain %294 %28 %298
%300 = OpLoad %25 %295
%302 = OpAccessChain %64 %8 %85
%303 = OpLoad %5 %302
%304 = OpFunctionCall %5 %116 %303 %305 %306
%301 = OpAccessChain %102 %33 %57 %304
%307 = OpLoad %29 %301
%308 = OpBitcast %5 %292
%309 = OpCompositeConstruct %244 %308 %308 %308 %308
OpImageWrite %300 %58 %309
OpBranch %426
%426 = OpLabel
%310 = OpUGreaterThan %59 %58 %311
OpSelectionMerge %428 None
OpBranchConditional %310 %427 %428
%427 = OpLabel
%312 = OpBitwiseAnd %5 %58 %87
%313 = OpIAdd %5 %312 %87
%315 = OpAccessChain %64 %8 %85
%316 = OpLoad %5 %315
%317 = OpIAdd %5 %316 %313
%318 = OpFunctionCall %5 %116 %317 %299 %319
%314 = OpAccessChain %294 %28 %318
%320 = OpLoad %25 %314
%321 = OpBitcast %5 %292
%322 = OpCompositeConstruct %244 %321 %321 %321 %321
OpImageWrite %320 %58 %322
OpBranch %428
%428 = OpLabel
%323 = OpUGreaterThan %59 %58 %324
OpSelectionMerge %430 None
OpBranchConditional %323 %429 %430
%429 = OpLabel
%326 = OpAccessChain %64 %8 %85
%327 = OpLoad %5 %326
%328 = OpIAdd %5 %327 %92
%329 = OpFunctionCall %5 %116 %328 %299 %330
%325 = OpAccessChain %294 %36 %329
%331 = OpLoad %25 %325
%332 = OpBitcast %5 %292
%333 = OpShiftLeftLogical %5 %58 %61
%334 = OpCompositeConstruct %244 %332 %332 %332 %332
OpImageWrite %331 %58 %334
OpBranch %430
%430 = OpLabel
%335 = OpUGreaterThan %59 %58 %336
OpSelectionMerge %432 None
OpBranchConditional %335 %431 %432
%431 = OpLabel
%337 = OpBitwiseAnd %5 %58 %87
%338 = OpBitcast %5 %292
%339 = OpShiftLeftLogical %5 %58 %61
%340 = OpIAdd %5 %337 %85
%342 = OpAccessChain %64 %8 %85
%343 = OpLoad %5 %342
%344 = OpIAdd %5 %343 %340
%345 = OpFunctionCall %5 %116 %344 %299 %346
%341 = OpAccessChain %294 %36 %345
%347 = OpLoad %25 %341
%348 = OpCompositeConstruct %244 %338 %338 %338 %338
OpImageWrite %347 %58 %348
OpBranch %432
%432 = OpLabel
%349 = OpUGreaterThan %59 %58 %350
OpSelectionMerge %434 None
OpBranchConditional %349 %433 %434
%433 = OpLabel
%353 = OpAccessChain %64 %8 %92
%354 = OpLoad %5 %353
%355 = OpIAdd %5 %354 %96
%356 = OpFunctionCall %5 %116 %355 %61 %217
%352 = OpAccessChain %351 %40 %356
%357 = OpLoad %37 %352
%358 = OpCompositeConstruct %29 %58 %57
%359 = OpCompositeConstruct %41 %292 %292 %292 %292
OpImageWrite %357 %358 %359
OpBranch %434
%434 = OpLabel
%360 = OpUGreaterThan %59 %58 %361
OpSelectionMerge %436 None
OpBranchConditional %360 %435 %436
%435 = OpLabel
%362 = OpBitwiseAnd %5 %58 %87
%363 = OpIAdd %5 %362 %98
%365 = OpAccessChain %64 %8 %92
%366 = OpLoad %5 %365
%367 = OpIAdd %5 %366 %363
%368 = OpFunctionCall %5 %116 %367 %61 %369
%364 = OpAccessChain %351 %40 %368
%370 = OpLoad %37 %364
%371 = OpCompositeConstruct %29 %57 %58
%372 = OpCompositeConstruct %41 %292 %292 %292 %292
OpImageWrite %370 %371 %372
OpBranch %436
%436 = OpLabel
%374 = OpAccessChain %64 %8 %85
%375 = OpLoad %5 %374
%376 = OpFunctionCall %5 %116 %375 %299 %377
%373 = OpAccessChain %294 %28 %376
%378 = OpLoad %25 %373
%380 = OpAccessChain %64 %8 %85
%381 = OpLoad %5 %380
%382 = OpFunctionCall %5 %116 %381 %305 %383
%379 = OpAccessChain %102 %33 %57 %382
%384 = OpLoad %29 %379
%403 = OpFunctionCall %5 %389 %384 %87 %57
OpReturn
OpFunctionEnd
%81 = OpFunction %1 None %73
%74 = OpFunctionParameter %5
%75 = OpFunctionParameter %5
%76 = OpFunctionParameter %5
%77 = OpFunctionParameter %5
%78 = OpFunctionParameter %5
%79 = OpFunctionParameter %5
%80 = OpFunctionParameter %5
%82 = OpLabel
%84 = OpAccessChain %83 %72 %85
%86 = OpAtomicIAdd %5 %84 %87 %57 %87
%88 = OpIEqual %59 %86 %57
OpSelectionMerge %90 None
OpBranchConditional %88 %89 %90
%89 = OpLabel
%91 = OpAccessChain %83 %72 %92
OpStore %91 %76
%93 = OpAccessChain %83 %72 %87
OpStore %93 %75
%94 = OpAccessChain %83 %72 %61
OpStore %94 %77
%95 = OpAccessChain %83 %72 %96
OpStore %95 %78
%97 = OpAccessChain %83 %72 %98
OpStore %97 %79
%99 = OpAccessChain %83 %72 %66
OpStore %99 %80
%103 = OpAccessChain %102 %72 %57
OpStore %103 %101
OpMemoryBarrier %87 %104
%105 = OpAccessChain %83 %72 %106
OpStore %105 %74
OpBranch %90
%90 = OpLabel
OpReturn
OpFunctionEnd
%116 = OpFunction %5 None %112
%113 = OpFunctionParameter %5
%114 = OpFunctionParameter %5
%115 = OpFunctionParameter %5
%117 = OpLabel
%118 = OpAccessChain %83 %111 %57
%119 = OpLoad %5 %118
%120 = OpAccessChain %83 %111 %87
%121 = OpLoad %5 %120
%122 = OpAccessChain %83 %72 %123
%124 = OpLoad %5 %122
%126 = OpAccessChain %125 %111 %61 %113
%127 = OpLoad %52 %126
%128 = OpCompositeExtract %5 %127 0
%129 = OpCompositeExtract %5 %127 1
%130 = OpCompositeExtract %5 %127 2
%131 = OpShiftRightLogical %5 %128 %66
%132 = OpBitwiseAnd %5 %128 %133
%134 = OpAccessChain %83 %72 %135 %131
%136 = OpLoad %5 %134
%137 = OpShiftLeftLogical %5 %87 %132
%138 = OpBitwiseAnd %5 %136 %137
%139 = OpINotEqual %59 %138 %57
%140 = OpBitwiseAnd %5 %130 %114
%141 = OpIEqual %59 %140 %114
%142 = OpUGreaterThanEqual %59 %113 %119
%143 = OpSelect %5 %142 %87 %57
%144 = OpSelect %5 %141 %57 %61
%145 = OpSelect %5 %139 %57 %85
%146 = OpUGreaterThanEqual %59 %124 %129
%147 = OpSelect %5 %146 %57 %106
%148 = OpBitwiseOr %5 %143 %144
%149 = OpBitwiseOr %5 %148 %145
%150 = OpBitwiseOr %5 %149 %147
%151 = OpINotEqual %59 %150 %57
OpSelectionMerge %153 None
OpBranchConditional %151 %152 %153
%152 = OpLabel
%154 = OpFunctionCall %1 %81 %150 %113 %128 %121 %114 %130 %115
OpReturnValue %119
%153 = OpLabel
OpReturnValue %113
OpFunctionEnd
%389 = OpFunction %5 None %385
%386 = OpFunctionParameter %29
%387 = OpFunctionParameter %5
%388 = OpFunctionParameter %5
%390 = OpLabel
%395 = OpINotEqual %393 %386 %394
%396 = OpAny %59 %395
OpSelectionMerge %392 None
OpBranchConditional %396 %391 %392
%391 = OpLabel
%398 = OpBitcast %397 %386
%399 = OpAtomicIAdd %5 %398 %87 %57 %387
%400 = OpIAdd %5 %399 %388
OpBranch %392
%392 = OpLabel
%401 = OpPhi %5 %57 %390 %400 %391
OpReturnValue %401
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.sm66.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _33;

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _47[];

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 1, binding = 0) uniform usamplerBuffer _21[];
layout(set = 1, binding = 0) uniform usamplerBuffer _24[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _28[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _36[];
layout(set = 3, binding = 0) uniform writeonly image2D _40[];
layout(set = 2, binding = 0) uniform sampler _51[];

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _88 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_88 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _121 = QAHeapData.descriptor_count;
    uint _123 = QAHeapData.heap_index;
    uint _126 = QAGlobalData.va_map_timestamp;
    uvec3 _129 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _138 = QAGlobalData.live_status_table[_129.x >> 5u];
    uint _152 = ((uint(heap_offset >= _121) | (((_129.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_138 & (1u << (_129.x & 31u))) != 0u) ? 0u : 4u)) | ((_126 >= _129.y) ? 0u : 8u);
    if (_152 != 0u)
    {
        descriptor_qa_report_fault(_152, heap_offset, _129.x, _123, descriptor_type_mask, _129.z, instruction);
        return _121;
    }
    return heap_offset;
}

uint RobustPhysicalAtomicCounter(uvec2 _409, uint _410, uint _411)
{
    uint _424;
    if (any(notEqual(_409, uvec2(0u))))
    {
        uint _422 = atomicAdd(uintPointer(_409).value, _410);
        _424 = _422 + _411;
    }
    else
    {
        _424 = 0u;
    }
    return _424;
}

void main()
{
    float _163;
    if ((gl_GlobalInvocationID.x & 3423432u) > 2u)
    {
        uint _70 = descriptor_qa_check(registers._m5, 4u, 1u);
        _163 = _47[_70]._m0[0u].x;
    }
    else
    {
        _163 = 0.0;
    }
    float _178;
    if ((gl_GlobalInvocationID.x & 234232u) > 3u)
    {
        uint _173 = descriptor_qa_check(registers._m5 + (gl_GlobalInvocationID.x + 1u), 4u, 2u);
        _178 = _47[_173]._m0[0u].x + _163;
    }
    else
    {
        _178 = _163;
    }
    float _201;
    if ((gl_GlobalInvocationID.x & 236234u) > 4u)
    {
        uint _186 = descriptor_qa_check(registers._m0, 1u, 3u);
        _201 = textureLod(sampler2D(_13[_186], _51[registers._m2]), vec2(0.5), 0.0).x + _178;
    }
    else
    {
        _201 = _178;
    }
    uint _202 = gl_GlobalInvocationID.x & 34234u;
    float _217;
    if (_202 > 5u)
    {
        uint _211 = descriptor_qa_check(registers._m0 + ((gl_GlobalInvocationID.x & 1u) + 1u), 1u, 4u);
        _217 = texelFetch(_13[_211], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), int(0u)).x + _201;
    }
    else
    {
        _217 = _201;
    }
    float _232;
    if ((gl_GlobalInvocationID.x & 234u) > 6u)
    {
        uint _226 = descriptor_qa_check(registers._m1 + 3u, 16u, 5u);
        _232 = texelFetch(_17[_226], int(gl_GlobalInvocationID.x)).x + _217;
    }
    else
    {
        _232 = _217;
    }
    float _246;
    if ((gl_GlobalInvocationID.x & 16u) > 7u)
    {
        uint _241 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 4u), 16u, 6u);
        _246 = texelFetch(_17[_241], int(gl_GlobalInvocationID.x)).x + _232;
    }
    else
    {
        _246 = _232;
    }
    float _262;
    if ((gl_GlobalInvocationID.x & 9234234u) > 8u)
    {
        uint _255 = descriptor_qa_check(registers._m1 + 6u, 16u, 7u);
        _262 = uintBitsToFloat(texelFetch(_21[_255], int(gl_GlobalInvocationID.x)).x) + _246;
    }
    else
    {
        _262 = _246;
    }
    float _278;
    if ((gl_GlobalInvocationID.x & 2342342u) > 9u)
    {
        uint _272 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 7u), 16u, 8u);
        _278 = uintBitsToFloat(texelFetch(_21[_272], int(gl_GlobalInvocationID.x)).x) + _262;
    }
    else
    {
        _278 = _262;
    }
    float _293;
    if ((gl_GlobalInvocationID.x & 234234324u) > 10u)
    {
        uint _287 = descriptor_qa_check(registers._m1 + 9u, 16u, 9u);
        _293 = uintBitsToFloat(texelFetch(_24[_287], int(gl_GlobalInvocationID.x)).x) + _278;
    }
    else
    {
        _293 = _278;
    }
    float _311;
    if ((gl_GlobalInvocationID.x & 234234232u) > 11u)
    {
        uint _304 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 10u), 16u, 10u);
        _311 = uintBitsToFloat(texelFetch(_24[_304], int(gl_GlobalInvocationID.x >> 2u)).x) + _293;
    }
    else
    {
        _311 = _293;
    }
    if ((gl_GlobalInvocationID.x & 234884u) > 1u)
    {
        uint _319 = descriptor_qa_check(registers._m4, 32u, 11u);
        imageStore(_28[_319], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_311)));
    }
    if ((gl_GlobalInvocationID.x & 9999u) > 30u)
    {
        uint _334 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 1u), 32u, 12u);
        imageStore(_28[_334], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_311)));
    }
    if ((gl_GlobalInvocationID.x & 8888u) > 40u)
    {
        uint _349 = descriptor_qa_check(registers._m4 + 3u, 32u, 13u);
        imageStore(_36[_349], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_311)));
    }
    if ((gl_GlobalInvocationID.x & 7777u) > 50u)
    {
        uint _365 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 4u), 32u, 14u);
        imageStore(_36[_365], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_311)));
    }
    if ((gl_GlobalInvocationID.x & 5555u) > 80u)
    {
        uint _378 = descriptor_qa_check(registers._m3 + 6u, 2u, 15u);
        imageStore(_40[_378], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(_311));
    }
    if (_202 > 90u)
    {
        uint _391 = descriptor_qa_check(registers._m3 + ((gl_GlobalInvocationID.x & 1u) + 7u), 2u, 16u);
        imageStore(_40[_391], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(_311));
    }
    uint _398 = descriptor_qa_check(registers._m4, 32u, 17u);
    uint _404 = descriptor_qa_check(registers._m4, 256u, 18u);
    uvec2 _407 = _33.counters[_404];
    uint _426 = RobustPhysicalAtomicCounter(_407, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 461
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %54
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %31 "AtomicCounters"
OpMemberName %31 0 "counters"
OpName %44 "BindlessCBV"
OpName %72 "DescriptorHeapGlobalQAData"
OpMemberName %72 0 "failed_shader_hash"
OpMemberName %72 1 "failed_offset"
OpMemberName %72 2 "failed_heap"
OpMemberName %72 3 "failed_cookie"
OpMemberName %72 4 "fault_atomic"
OpMemberName %72 5 "failed_instruction"
OpMemberName %72 6 "failed_descriptor_type_mask"
OpMemberName %72 7 "actual_descriptor_type_mask"
OpMemberName %72 8 "fault_type"
OpMemberName %72 9 "va_map_timestamp"
OpMemberName %72 10 "live_status_table"
OpName %74 "QAGlobalData"
OpName %83 "descriptor_qa_report_fault"
OpName %76 "fault_type"
OpName %77 "heap_offset"
OpName %78 "cookie"
OpName %79 "heap_index"
OpName %80 "descriptor_type"
OpName %81 "actual_descriptor_type"
OpName %82 "instruction"
OpName %111 "DescriptorHeapQAData"
OpMemberName %111 0 "descriptor_count"
OpMemberName %111 1 "heap_index"
OpMemberName %111 2 "cookies_descriptor_info"
OpName %113 "QAHeapData"
OpName %118 "descriptor_qa_check"
OpName %115 "heap_offset"
OpName %116 "descriptor_type_mask"
OpName %117 "instruction"
OpName %412 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 1
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %28 DescriptorSet 4
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %30 ArrayStride 8
OpDecorate %31 Block
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 0 NonWritable
OpDecorate %33 DescriptorSet 7
OpDecorate %33 Binding 0
OpDecorate %33 AliasedPointer
OpDecorate %36 DescriptorSet 4
OpDecorate %36 Binding 0
OpDecorate %36 NonReadable
OpDecorate %40 DescriptorSet 3
OpDecorate %40 Binding 0
OpDecorate %40 NonReadable
OpDecorate %43 ArrayStride 16
OpDecorate %44 Block
OpMemberDecorate %44 0 Offset 0
OpDecorate %47 DescriptorSet 5
OpDecorate %47 Binding 0
OpDecorate %51 DescriptorSet 2
OpDecorate %51 Binding 0
OpDecorate %54 BuiltIn GlobalInvocationId
OpDecorate %71 ArrayStride 4
OpMemberDecorate %72 0 Offset 0
OpMemberDecorate %72 1 Offset 8
OpMemberDecorate %72 2 Offset 12
OpMemberDecorate %72 3 Offset 16
OpMemberDecorate %72 4 Offset 20
OpMemberDecorate %72 5 Offset 24
OpMemberDecorate %72 6 Offset 28
OpMemberDecorate %72 7 Offset 32
OpMemberDecorate %72 8 Offset 36
OpMemberDecorate %72 9 Offset 40
OpMemberDecorate %72 10 Offset 44
OpDecorate %72 Block
OpDecorate %74 DescriptorSet 10
OpDecorate %74 Binding 10
OpDecorate %110 ArrayStride 12
OpMemberDecorate %111 0 Offset 0
OpMemberDecorate %111 1 Offset 4
OpMemberDecorate %111 2 Offset 8
OpDecorate %111 Block
OpDecorate %113 DescriptorSet 10
OpDecorate %113 Binding 11
OpDecorate %113 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%26 = OpTypeRuntimeArray %25
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeVector %5 2
%30 = OpTypeRuntimeArray %29
%31 = OpTypeStruct %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeRuntimeArray %25
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeImage %9 2D 0 0 0 2 Unknown
%38 = OpTypeRuntimeArray %37
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeVector %9 4
%42 = OpConstant %5 4096
%43 = OpTypeArray %41 %42
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer Uniform %45
%47 = OpVariable %46 Uniform
%48 = OpTypeSampler
%49 = OpTypeRuntimeArray %48
%50 = OpTypePointer UniformConstant %49
%51 = OpVariable %50 UniformConstant
%52 = OpTypeVector %5 3
%53 = OpTypePointer Input %52
%54 = OpVariable %53 Input
%55 = OpTypePointer Input %5
%57 = OpConstant %5 0
%60 = OpConstant %5 3423432
%61 = OpTypeBool
%63 = OpConstant %5 2
%64 = OpTypePointer Uniform %44
%66 = OpTypePointer PushConstant %5
%68 = OpConstant %5 5
%71 = OpTypeRuntimeArray %5
%72 = OpTypeStruct %29 %5 %5 %5 %5 %5 %5 %5 %5 %5 %71
%73 = OpTypePointer StorageBuffer %72
%74 = OpVariable %73 StorageBuffer
%75 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%85 = OpTypePointer StorageBuffer %5
%87 = OpConstant %5 4
%89 = OpConstant %5 1
%94 = OpConstant %5 3
%98 = OpConstant %5 6
%100 = OpConstant %5 7
%102 = OpConstant %5 3735928559
%103 = OpConstantComposite %29 %102 %57
%104 = OpTypePointer StorageBuffer %29
%106 = OpConstant %5 72
%108 = OpConstant %5 8
%110 = OpTypeRuntimeArray %52
%111 = OpTypeStruct %5 %5 %110
%112 = OpTypePointer StorageBuffer %111
%113 = OpVariable %112 StorageBuffer
%114 = OpTypeFunction %5 %5 %5 %5
%125 = OpConstant %5 9
%127 = OpTypePointer StorageBuffer %52
%135 = OpConstant %5 31
%137 = OpConstant %5 10
%159 = OpTypePointer Uniform %41
%164 = OpConstant %9 0
%166 = OpConstant %5 234232
%180 = OpConstant %5 236234
%182 = OpTypePointer UniformConstant %10
%188 = OpTypePointer UniformConstant %48
%193 = OpTypeSampledImage %10
%195 = OpConstant %9 0.5
%197 = OpTypeVector %9 2
%203 = OpConstant %5 34234
%219 = OpConstant %5 234
%221 = OpTypePointer UniformConstant %14
%227 = OpConstant %5 16
%248 = OpConstant %5 9234234
%250 = OpTypePointer UniformConstant %18
%257 = OpTypeVector %5 4
%264 = OpConstant %5 2342342
%280 = OpConstant %5 234234324
%295 = OpConstant %5 234234232
%297 = OpConstant %5 11
%313 = OpConstant %5 234884
%315 = OpTypePointer UniformConstant %25
%320 = OpConstant %5 32
%325 = OpConstant %5 9999
%327 = OpConstant %5 30
%335 = OpConstant %5 12
%340 = OpConstant %5 8888
%342 = OpConstant %5 40
%350 = OpConstant %5 13
%354 = OpConstant %5 7777
%356 = OpConstant %5 50
%366 = OpConstant %5 14
%370 = OpConstant %5 5555
%372 = OpConstant %5 80
%373 = OpTypePointer UniformConstant %37
%379 = OpConstant %5 15
%384 = OpConstant %5 90
%399 = OpConstant %5 17
%405 = OpConstant %5 256
%406 = OpConstant %5 18
%408 = OpTypeFunction %5 %29 %5 %5
%416 = OpTypeVector %61 2
%417 = OpConstantNull %29
%420 = OpTypePointer PhysicalStorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %427
%427 = OpLabel
%56 = OpAccessChain %55 %54 %57
%58 = OpLoad %5 %56
%59 = OpBitwiseAnd %5 %58 %60
%62 = OpUGreaterThan %61 %59 %63
OpSelectionMerge %429 None
OpBranchConditional %62 %428 %429
%428 = OpLabel
%67 = OpAccessChain %66 %8 %68
%69 = OpLoad %5 %67
%70 = OpFunctionCall %5 %118 %69 %87 %89
%65 = OpAccessChain %64 %47 %70
%160 = OpAccessChain %159 %65 %57 %57
%161 = OpLoad %41 %160
%162 = OpCompositeExtract %9 %161 0
OpBranch %429
%429 = OpLabel
%163 = OpPhi %9 %164 %427 %162 %428
%165 = OpBitwiseAnd %5 %58 %166
%167 = OpUGreaterThan %61 %165 %94
OpSelectionMerge %431 None
OpBranchConditional %167 %430 %431
%430 = OpLabel
%168 = OpIAdd %5 %58 %89
%170 = OpAccessChain %66 %8 %68
%171 = OpLoad %5 %170
%172 = OpIAdd %5 %171 %168
%173 = OpFunctionCall %5 %118 %172 %87 %63
%169 = OpAccessChain %64 %47 %173
%174 = OpAccessChain %159 %169 %57 %57
%175 = OpLoad %41 %174
%176 = OpCompositeExtract %9 %175 0
%177 = OpFAdd %9 %176 %163
OpBranch %431
%431 = OpLabel
%178 = OpPhi %9 %163 %429 %177 %430
%179 = OpBitwiseAnd %5 %58 %180
%181 = OpUGreaterThan %61 %179 %87
OpSelectionMerge %433 None
OpBranchConditional %181 %432 %433
%432 = OpLabel
%184 = OpAccessChain %66 %8 %57
%185 = OpLoad %5 %184
%186 = OpFunctionCall %5 %118 %185 %89 %94
%183 = OpAccessChain %182 %13 %186
%187 = OpLoad %10 %183
%190 = OpAccessChain %66 %8 %63
%191 = OpLoad %5 %190
%189 = OpAccessChain %188 %51 %191
%192 = OpLoad %48 %189
%194 = OpSampledImage %193 %187 %192
%198 = OpCompositeConstruct %197 %195 %195
%196 = OpImageSampleExplicitLod %41 %194 %198 Lod %164
%199 = OpCompositeExtract %9 %196 0
%200 = OpFAdd %9 %199 %178
OpBranch %433
%433 = OpLabel
%201 = OpPhi %9 %178 %431 %200 %432
%202 = OpBitwiseAnd %5 %58 %203
%204 = OpUGreaterThan %61 %202 %68
OpSelectionMerge %435 None
OpBranchConditional %204 %434 %435
%434 = OpLabel
%205 = OpBitwiseAnd %5 %58 %89
%206 = OpIAdd %5 %205 %89
%208 = OpAccessChain %66 %8 %57
%209 = OpLoad %5 %208
%210 = OpIAdd %5 %209 %206
%211 = OpFunctionCall %5 %118 %210 %89 %87
%207 = OpAccessChain %182 %13 %211
%212 = OpLoad %10 %207
%214 = OpCompositeConstruct %29 %57 %58
%213 = OpImageFetch %41 %212 %214 Lod %57
%215 = OpCompositeExtract %9 %213 0
%216 = OpFAdd %9 %215 %201
OpBranch %435
%435 = OpLabel
%217 = OpPhi %9 %201 %433 %216 %434
%218 = OpBitwiseAnd %5 %58 %219
%220 = OpUGreaterThan %61 %218 %98
OpSelectionMerge %437 None
OpBranchConditional %220 %436 %437
%436 = OpLabel
%223 = OpAccessChain %66 %8 %89
%224 = OpLoad %5 %223
%225 = OpIAdd %5 %224 %94
%226 = OpFunctionCall %5 %118 %225 %227 %68
%222 = OpAccessChain %221 %17 %226
%228 = OpLoad %14 %222
%229 = OpImageFetch %41 %228 %58
%230 = OpCompositeExtract %9 %229 0
%231 = OpFAdd %9 %230 %217
OpBranch %437
%437 = OpLabel
%232 = OpPhi %9 %217 %435 %231 %436
%233 = OpBitwiseAnd %5 %58 %227
%234 = OpUGreaterThan %61 %233 %100
OpSelectionMerge %439 None
OpBranchConditional %234 %438 %439
%438 = OpLabel
%235 = OpBitwiseAnd %5 %58 %89
%236 = OpIAdd %5 %235 %87
%238 = OpAccessChain %66 %8 %89
%239 = OpLoad %5 %238
%240 = OpIAdd %5 %239 %236
%241 = OpFunctionCall %5 %118 %240 %227 %98
%237 = OpAccessChain %221 %17 %241
%242 = OpLoad %14 %237
%243 = OpImageFetch %41 %242 %58
%244 = OpCompositeExtract %9 %243 0
%245 = OpFAdd %9 %244 %232
OpBranch %439
%439 = OpLabel
%246 = OpPhi %9 %232 %437 %245 %438
%247 = OpBitwiseAnd %5 %58 %248
%249 = OpUGreaterThan %61 %247 %108
OpSelectionMerge %441 None
OpBranchConditional %249 %440 %441
%440 = OpLabel
%252 = OpAccessChain %66 %8 %89
%253 = OpLoad %5 %252
%254 = OpIAdd %5 %253 %98
%255 = OpFunctionCall %5 %118 %254 %227 %100
%251 = OpAccessChain %250 %21 %255
%256 = OpLoad %18 %251
%258 = OpImageFetch %257 %256 %58
%259 = OpCompositeExtract %5 %258 0
%260 = OpBitcast %9 %259
%261 = OpFAdd %9 %260 %246
OpBranch %441
%441 = OpLabel
%262 = OpPhi %9 %246 %439 %261 %440
%263 = OpBitwiseAnd %5 %58 %264
%265 = OpUGreaterThan %61 %263 %125
OpSelectionMerge %443 None
OpBranchConditional %265 %442 %443
%442 = OpLabel
%266 = OpBitwiseAnd %5 %58 %89
%267 = OpIAdd %5 %266 %100
%269 = OpAccessChain %66 %8 %89
%270 = OpLoad %5 %269
%271 = OpIAdd %5 %270 %267
%272 = OpFunctionCall %5 %118 %271 %227 %108
%268 = OpAccessChain %250 %21 %272
%273 = OpLoad %18 %268
%274 = OpImageFetch %257 %273 %58
%275 = OpCompositeExtract %5 %274 0
%276 = OpBitcast %9 %275
%277 = OpFAdd %9 %276 %262
OpBranch %443
%443 = OpLabel
%278 = OpPhi %9 %262 %441 %277 %442
%279 = OpBitwiseAnd %5 %58 %280
%281 = OpUGreaterThan %61 %279 %137
OpSelectionMerge %445 None
OpBranchConditional %281 %444 %445
%444 = OpLabel
%282 = OpShiftLeftLogical %5 %58 %63
%284 = OpAccessChain %66 %8 %89
%285 = OpLoad %5 %284
%286 = OpIAdd %5 %285 %125
%287 = OpFunctionCall %5 %118 %286 %227 %125
%283 = OpAccessChain %250 %24 %287
%288 = OpLoad %18 %283
%289 = OpImageFetch %257 %288 %58
%290 = OpCompositeExtract %5 %289 0
%291 = OpBitcast %9 %290
%292 = OpFAdd %9 %291 %278
OpBranch %445
%445 = OpLabel
%293 = OpPhi %9 %278 %443 %292 %444
%294 = OpBitwiseAnd %5 %58 %295
%296 = OpUGreaterThan %61 %294 %297
OpSelectionMerge %447 None
OpBranchConditional %296 %446 %447
%446 = OpLabel
%298 = OpBitwiseAnd %5 %58 %89
%299 = OpIAdd %5 %298 %137
%301 = OpAccessChain %66 %8 %89
%302 = OpLoad %5 %301
%303 = OpIAdd %5 %302 %299
%304 = OpFunctionCall %5 %118 %303 %227 %137
%300 = OpAccessChain %250 %24 %304
%305 = OpLoad %18 %300
%306 = OpShiftRightLogical %5 %58 %63
%307 = OpImageFetch %257 %305 %306
%308 = OpCompositeExtract %5 %307 0
%309 = OpBitcast %9 %308
%310 = OpFAdd %9 %309 %293
OpBranch %447
%447 = OpLabel
%311 = OpPhi %9 %293 %445 %310 %446
%312 = OpBitwiseAnd %5 %58 %313
%314 = OpUGreaterThan %61 %312 %89
OpSelectionMerge %449 None
OpBranchConditional %314 %448 %449
%448 = OpLabel
%317 = OpAccessChain %66 %8 %87
%318 = OpLoad %5 %317
%319 = OpFunctionCall %5 %118 %318 %320 %297
%316 = OpAccessChain %315 %28 %319
%321 = OpLoad %25 %316
%322 = OpBitcast %5 %311
%323 = OpCompositeConstruct %257 %322 %322 %322 %322
OpImageWrite %321 %58 %323
OpBranch %449
%449 = OpLabel
%324 = OpBitwiseAnd %5 %58 %325
%326 = OpUGreaterThan %61 %324 %327
OpSelectionMerge %451 None
OpBranchConditional %326 %450 %451
%450 = OpLabel
%328 = OpBitwiseAnd %5 %58 %89
%329 = OpIAdd %5 %328 %89
%331 = OpAccessChain %66 %8 %87
%332 = OpLoad %5 %331
%333 = OpIAdd %5 %332 %329
%334 = OpFunctionCall %5 %118 %333 %320 %335
%330 = OpAccessChain %315 %28 %334
%336 = OpLoad %25 %330
%337 = OpBitcast %5 %311
%338 = OpCompositeConstruct %257 %337 %337 %337 %337
OpImageWrite %336 %58 %338
OpBranch %451
%451 = OpLabel
%339 = OpBitwiseAnd %5 %58 %340
%341 = OpUGreaterThan %61 %339 %342
OpSelectionMerge %453 None
OpBranchConditional %341 %452 %453
%452 = OpLabel
%343 = OpBitcast %5 %311
%344 = OpShiftLeftLogical %5 %58 %63
%346 = OpAccessChain %66 %8 %87
%347 = OpLoad %5 %346
%348 = OpIAdd %5 %347 %94
%349 = OpFunctionCall %5 %118 %348 %320 %350
%345 = OpAccessChain %315 %36 %349
%351 = OpLoad %25 %345
%352 = OpCompositeConstruct %257 %343 %343 %343 %343
OpImageWrite %351 %58 %352
OpBranch %453
%453 = OpLabel
%353 = OpBitwiseAnd %5 %58 %354
%355 = OpUGreaterThan %61 %353 %356
OpSelectionMerge %455 None
OpBranchConditional %355 %454 %455
%454 = OpLabel
%357 = OpBitwiseAnd %5 %58 %89
%358 = OpBitcast %5 %311
%359 = OpShiftLeftLogical %5 %58 %63
%360 = OpIAdd %5 %357 %87
%362 = OpAccessChain %66 %8 %87
%363 = OpLoad %5 %362
%364 = OpIAdd %5 %363 %360
%365 = OpFunctionCall %5 %118 %364 %320 %366
%361 = OpAccessChain %315 %36 %365
%367 = OpLoad %25 %361
%368 = OpCompositeConstruct %257 %358 %358 %358 %358
OpImageWrite %367 %58 %368
OpBranch %455
%455 = OpLabel
%369 = OpBitwiseAnd %5 %58 %370
%371 = OpUGreaterThan %61 %369 %372
OpSelectionMerge %457 None
OpBranchConditional %371 %456 %457
%456 = OpLabel
%375 = OpAccessChain %66 %8 %94
%376 = OpLoad %5 %375
%377 = OpIAdd %5 %376 %98
%378 = OpFunctionCall %5 %118 %377 %63 %379
%374 = OpAccessChain %373 %40 %378
%380 = OpLoad %37 %374
%381 = OpCompositeConstruct %29 %58 %57
%382 = OpCompositeConstruct %41 %311 %311 %311 %311
OpImageWrite %380 %381 %382
OpBranch %457
%457 = OpLabel
%383 = OpUGreaterThan %61 %202 %384
OpSelectionMerge %459 None
OpBranchConditional %383 %458 %459
%458 = OpLabel
%385 = OpBitwiseAnd %5 %58 %89
%386 = OpIAdd %5 %385 %100
%388 = OpAccessChain %66 %8 %94
%389 = OpLoad %5 %388
%390 = OpIAdd %5 %389 %386
%391 = OpFunctionCall %5 %118 %390 %63 %227
%387 = OpAccessChain %373 %40 %391
%392 = OpLoad %37 %387
%393 = OpCompositeConstruct %29 %57 %58
%394 = OpCompositeConstruct %41 %311 %311 %311 %311
OpImageWrite %392 %393 %394
OpBranch %459
%459 = OpLabel
%396 = OpAccessChain %66 %8 %87
%397 = OpLoad %5 %396
%398 = OpFunctionCall %5 %118 %397 %320 %399
%395 = OpAccessChain %315 %28 %398
%400 = OpLoad %25 %395
%402 = OpAccessChain %66 %8 %87
%403 = OpLoad %5 %402
%404 = OpFunctionCall %5 %118 %403 %405 %406
%401 = OpAccessChain %104 %33 %57 %404
%407 = OpLoad %29 %401
%426 = OpFunctionCall %5 %412 %407 %89 %57
OpReturn
OpFunctionEnd
%83 = OpFunction %1 None %75
%76 = OpFunctionParameter %5
%77 = OpFunctionParameter %5
%78 = OpFunctionParameter %5
%79 = OpFunctionParameter %5
%80 = OpFunctionParameter %5
%81 = OpFunctionParameter %5
%82 = OpFunctionParameter %5
%84 = OpLabel
%86 = OpAccessChain %85 %74 %87
%88 = OpAtomicIAdd %5 %86 %89 %57 %89
%90 = OpIEqual %61 %88 %57
OpSelectionMerge %92 None
OpBranchConditional %90 %91 %92
%91 = OpLabel
%93 = OpAccessChain %85 %74 %94
OpStore %93 %78
%95 = OpAccessChain %85 %74 %89
OpStore %95 %77
%96 = OpAccessChain %85 %74 %63
OpStore %96 %79
%97 = OpAccessChain %85 %74 %98
OpStore %97 %80
%99 = OpAccessChain %85 %74 %100
OpStore %99 %81
%101 = OpAccessChain %85 %74 %68
OpStore %101 %82
%105 = OpAccessChain %104 %74 %57
OpStore %105 %103
OpMemoryBarrier %89 %106
%107 = OpAccessChain %85 %74 %108
OpStore %107 %76
OpBranch %92
%92 = OpLabel
OpReturn
OpFunctionEnd
%118 = OpFunction %5 None %114
%115 = OpFunctionParameter %5
%116 = OpFunctionParameter %5
%117 = OpFunctionParameter %5
%119 = OpLabel
%120 = OpAccessChain %85 %113 %57
%121 = OpLoad %5 %120
%122 = OpAccessChain %85 %113 %89
%123 = OpLoad %5 %122
%124 = OpAccessChain %85 %74 %125
%126 = OpLoad %5 %124
%128 = OpAccessChain %127 %113 %63 %115
%129 = OpLoad %52 %128
%130 = OpCompositeExtract %5 %129 0
%131 = OpCompositeExtract %5 %129 1
%132 = OpCompositeExtract %5 %129 2
%133 = OpShiftRightLogical %5 %130 %68
%134 = OpBitwiseAnd %5 %130 %135
%136 = OpAccessChain %85 %74 %137 %133
%138 = OpLoad %5 %136
%139 = OpShiftLeftLogical %5 %89 %134
%140 = OpBitwiseAnd %5 %138 %139
%141 = OpINotEqual %61 %140 %57
%142 = OpBitwiseAnd %5 %132 %116
%143 = OpIEqual %61 %142 %116
%144 = OpUGreaterThanEqual %61 %115 %121
%145 = OpSelect %5 %144 %89 %57
%146 = OpSelect %5 %143 %57 %63
%147 = OpSelect %5 %141 %57 %87
%148 = OpUGreaterThanEqual %61 %126 %131
%149 = OpSelect %5 %148 %57 %108
%150 = OpBitwiseOr %5 %145 %146
%151 = OpBitwiseOr %5 %150 %147
%152 = OpBitwiseOr %5 %151 %149
%153 = OpINotEqual %61 %152 %57
OpSelectionMerge %155 None
OpBranchConditional %153 %154 %155
%154 = OpLabel
%156 = OpFunctionCall %1 %83 %152 %115 %130 %123 %116 %132 %117
OpReturnValue %121
%155 = OpLabel
OpReturnValue %115
OpFunctionEnd
%412 = OpFunction %5 None %408
%409 = OpFunctionParameter %29
%410 = OpFunctionParameter %5
%411 = OpFunctionParameter %5
%413 = OpLabel
%418 = OpINotEqual %416 %409 %417
%419 = OpAny %61 %418
OpSelectionMerge %415 None
OpBranchConditional %419 %414 %415
%414 = OpLabel
%421 = OpBitcast %420 %409
%422 = OpAtomicIAdd %5 %421 %89 %57 %410
%423 = OpIAdd %5 %422 %411
OpBranch %415
%415 = OpLabel
%424 = OpPhi %5 %57 %413 %423 %414
OpReturnValue %424
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/descriptor_qa.bindless.ssbo.descriptor-qa.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _22[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _24_27
{
    uint _m0[];
} _27[];

layout(set = 4, binding = 0, std430) writeonly buffer _29_32
{
    uint _m0[];
} _32[];

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _37;

layout(set = 4, binding = 0, std430) writeonly buffer _39_42
{
    uint _m0[];
} _42[];

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _53[];

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 3, binding = 0) uniform writeonly image2D _46[];

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _88 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_88 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _121 = QAHeapData.descriptor_count;
    uint _123 = QAHeapData.heap_index;
    uint _126 = QAGlobalData.va_map_timestamp;
    uvec3 _129 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _138 = QAGlobalData.live_status_table[_129.x >> 5u];
    uint _152 = ((uint(heap_offset >= _121) | (((_129.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_138 & (1u << (_129.x & 31u))) != 0u) ? 0u : 4u)) | ((_126 >= _129.y) ? 0u : 8u);
    if (_152 != 0u)
    {
        descriptor_qa_report_fault(_152, heap_offset, _129.x, _123, descriptor_type_mask, _129.z, instruction);
        return _121;
    }
    return heap_offset;
}

uint RobustPhysicalAtomicCounter(uvec2 _370, uint _371, uint _372)
{
    uint _385;
    if (any(notEqual(_370, uvec2(0u))))
    {
        uint _383 = atomicAdd(uintPointer(_370).value, _371);
        _385 = _383 + _372;
    }
    else
    {
        _385 = 0u;
    }
    return _385;
}

void main()
{
    float _163;
    if (gl_GlobalInvocationID.x > 2u)
    {
        uint _70 = descriptor_qa_check(registers._m5, 4u, 1u);
        _163 = _53[_70]._m0[0u].x;
    }
    else
    {
        _163 = 0.0;
    }
    float _176;
    if (gl_GlobalInvocationID.x > 3u)
    {
        uint _171 = descriptor_qa_check(registers._m5 + (gl_GlobalInvocationID.x + 1u), 4u, 2u);
        _176 = _53[_171]._m0[0u].x + _163;
    }
    else
    {
        _176 = _163;
    }
    float _188;
    if (gl_GlobalInvocationID.x > 4u)
    {
        uint _182 = descriptor_qa_check(registers._m0, 1u, 3u);
        _188 = texelFetch(_13[_182], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), int(0u)).x + _176;
    }
    else
    {
        _188 = _176;
    }
    float _202;
    if (gl_GlobalInvocationID.x > 5u)
    {
        uint _196 = descriptor_qa_check(registers._m0 + ((gl_GlobalInvocationID.x & 1u) + 1u), 1u, 4u);
        _202 = texelFetch(_13[_196], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), int(0u)).x + _188;
    }
    else
    {
        _202 = _188;
    }
    float _215;
    if (gl_GlobalInvocationID.x > 6u)
    {
        uint _209 = descriptor_qa_check(registers._m1 + 3u, 16u, 5u);
        _215 = texelFetch(_17[_209], int(gl_GlobalInvocationID.x)).x + _202;
    }
    else
    {
        _215 = _202;
    }
    float _228;
    if (gl_GlobalInvocationID.x > 7u)
    {
        uint _223 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 4u), 16u, 6u);
        _228 = texelFetch(_17[_223], int(gl_GlobalInvocationID.x)).x + _215;
    }
    else
    {
        _228 = _215;
    }
    float _240;
    if (gl_GlobalInvocationID.x > 8u)
    {
        uint _235 = descriptor_qa_check(registers._m1 + 6u, 8u, 7u);
        _240 = uintBitsToFloat(_22[_235]._m0[gl_GlobalInvocationID.x]) + _228;
    }
    else
    {
        _240 = _228;
    }
    float _253;
    if (gl_GlobalInvocationID.x > 9u)
    {
        uint _248 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 7u), 8u, 8u);
        _253 = uintBitsToFloat(_22[_248]._m0[gl_GlobalInvocationID.x]) + _240;
    }
    else
    {
        _253 = _240;
    }
    float _266;
    if (gl_GlobalInvocationID.x > 10u)
    {
        uint _260 = descriptor_qa_check(registers._m1 + 9u, 8u, 9u);
        _266 = uintBitsToFloat(_27[_260]._m0[gl_GlobalInvocationID.x]) + _253;
    }
    else
    {
        _266 = _253;
    }
    float _281;
    if (gl_GlobalInvocationID.x > 11u)
    {
        uint _275 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 10u), 8u, 10u);
        _281 = uintBitsToFloat(_27[_275]._m0[gl_GlobalInvocationID.x >> 2u]) + _266;
    }
    else
    {
        _281 = _266;
    }
    if (gl_GlobalInvocationID.x > 1u)
    {
        uint _287 = descriptor_qa_check(registers._m4, 8u, 11u);
        uint _291 = descriptor_qa_check(registers._m4, 256u, 12u);
        _32[_287]._m0[gl_GlobalInvocationID.x] = floatBitsToUint(_281);
    }
    if (gl_GlobalInvocationID.x > 30u)
    {
        uint _305 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 1u), 8u, 13u);
        _32[_305]._m0[gl_GlobalInvocationID.x] = floatBitsToUint(_281);
    }
    if (gl_GlobalInvocationID.x > 40u)
    {
        uint _316 = descriptor_qa_check(registers._m4 + 3u, 8u, 14u);
        _42[_316]._m0[gl_GlobalInvocationID.x] = floatBitsToUint(_281);
    }
    if (gl_GlobalInvocationID.x > 50u)
    {
        uint _331 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 4u), 8u, 15u);
        _42[_331]._m0[gl_GlobalInvocationID.x] = floatBitsToUint(_281);
    }
    if (gl_GlobalInvocationID.x > 80u)
    {
        uint _341 = descriptor_qa_check(registers._m3 + 6u, 2u, 16u);
        imageStore(_46[_341], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(_281));
    }
    if (gl_GlobalInvocationID.x > 90u)
    {
        uint _353 = descriptor_qa_check(registers._m3 + ((gl_GlobalInvocationID.x & 1u) + 7u), 2u, 17u);
        imageStore(_46[_353], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(_281));
    }
    uint _361 = descriptor_qa_check(registers._m4, 8u, 18u);
    uint _366 = descriptor_qa_check(registers._m4, 256u, 19u);
    uvec2 _368 = _37.counters[_366];
    uint _387 = RobustPhysicalAtomicCounter(_368, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 422
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %56
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %19 "SSBO"
OpName %24 "SSBO"
OpName %29 "SSBO"
OpName %35 "AtomicCounters"
OpMemberName %35 0 "counters"
OpName %39 "SSBO"
OpName %50 "BindlessCBV"
OpName %72 "DescriptorHeapGlobalQAData"
OpMemberName %72 0 "failed_shader_hash"
OpMemberName %72 1 "failed_offset"
OpMemberName %72 2 "failed_heap"
OpMemberName %72 3 "failed_cookie"
OpMemberName %72 4 "fault_atomic"
OpMemberName %72 5 "failed_instruction"
OpMemberName %72 6 "failed_descriptor_type_mask"
OpMemberName %72 7 "actual_descriptor_type_mask"
OpMemberName %72 8 "fault_type"
OpMemberName %72 9 "va_map_timestamp"
OpMemberName %72 10 "live_status_table"
OpName %74 "QAGlobalData"
OpName %83 "descriptor_qa_report_fault"
OpName %76 "fault_type"
OpName %77 "heap_offset"
OpName %78 "cookie"
OpName %79 "heap_index"
OpName %80 "descriptor_type"
OpName %81 "actual_descriptor_type"
OpName %82 "instruction"
OpName %111 "DescriptorHeapQAData"
OpMemberName %111 0 "descriptor_count"
OpMemberName %111 1 "heap_index"
OpMemberName %111 2 "cookies_descriptor_info"
OpName %113 "QAHeapData"
OpName %118 "descriptor_qa_check"
OpName %115 "heap_offset"
OpName %116 "descriptor_type_mask"
OpName %117 "instruction"
OpName %373 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %18 ArrayStride 4
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %22 DescriptorSet 1
OpDecorate %22 Binding 0
OpDecorate %22 NonWritable
OpDecorate %22 Restrict
OpDecorate %23 ArrayStride 4
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %27 NonWritable
OpDecorate %27 Restrict
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %32 DescriptorSet 4
OpDecorate %32 Binding 0
OpDecorate %32 NonReadable
OpDecorate %34 ArrayStride 8
OpDecorate %35 Block
OpMemberDecorate %35 0 Offset 0
OpMemberDecorate %35 0 NonWritable
OpDecorate %37 DescriptorSet 7
OpDecorate %37 Binding 0
OpDecorate %37 AliasedPointer
OpDecorate %38 ArrayStride 4
OpMemberDecorate %39 0 Offset 0
OpDecorate %39 Block
OpDecorate %42 DescriptorSet 4
OpDecorate %42 Binding 0
OpDecorate %42 NonReadable
OpDecorate %46 DescriptorSet 3
OpDecorate %46 Binding 0
OpDecorate %46 NonReadable
OpDecorate %49 ArrayStride 16
OpDecorate %50 Block
OpMemberDecorate %50 0 Offset 0
OpDecorate %53 DescriptorSet 5
OpDecorate %53 Binding 0
OpDecorate %56 BuiltIn GlobalInvocationId
OpDecorate %71 ArrayStride 4
OpMemberDecorate %72 0 Offset 0
OpMemberDecorate %72 1 Offset 8
OpMemberDecorate %72 2 Offset 12
OpMemberDecorate %72 3 Offset 16
OpMemberDecorate %72 4 Offset 20
OpMemberDecorate %72 5 Offset 24
OpMemberDecorate %72 6 Offset 28
OpMemberDecorate %72 7 Offset 32
OpMemberDecorate %72 8 Offset 36
OpMemberDecorate %72 9 Offset 40
OpMemberDecorate %72 10 Offset 44
OpDecorate %72 Block
OpDecorate %74 DescriptorSet 10
OpDecorate %74 Binding 10
OpDecorate %110 ArrayStride 12
OpMemberDecorate %111 0 Offset 0
OpMemberDecorate %111 1 Offset 4
OpMemberDecorate %111 2 Offset 8
OpDecorate %111 Block
OpDecorate %113 DescriptorSet 10
OpDecorate %113 Binding 11
OpDecorate %113 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeRuntimeArray %5
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %5
%24 = OpTypeStruct %23
%25 = OpTypeRuntimeArray %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeRuntimeArray %5
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpTypeVector %5 2
%34 = OpTypeRuntimeArray %33
%35 = OpTypeStruct %34
%36 = OpTypePointer StorageBuffer %35
%37 = OpVariable %36 StorageBuffer
%38 = OpTypeRuntimeArray %5
%39 = OpTypeStruct %38
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer StorageBuffer %40
%42 = OpVariable %41 StorageBuffer
%43 = OpTypeImage %9 2D 0 0 0 2 Unknown
%44 = OpTypeRuntimeArray %43
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeVector %9 4
%48 = OpConstant %5 4096
%49 = OpTypeArray %47 %48
%50 = OpTypeStruct %49
%51 = OpTypeRuntimeArray %50
%52 = OpTypePointer Uniform %51
%53 = OpVariable %52 Uniform
%54 = OpTypeVector %5 3
%55 = OpTypePointer Input %54
%56 = OpVariable %55 Input
%57 = OpTypePointer Input %5
%59 = OpConstant %5 0
%61 = OpTypeBool
%63 = OpConstant %5 2
%64 = OpTypePointer Uniform %50
%66 = OpTypePointer PushConstant %5
%68 = OpConstant %5 5
%71 = OpTypeRuntimeArray %5
%72 = OpTypeStruct %33 %5 %5 %5 %5 %5 %5 %5 %5 %5 %71
%73 = OpTypePointer StorageBuffer %72
%74 = OpVariable %73 StorageBuffer
%75 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%85 = OpTypePointer StorageBuffer %5
%87 = OpConstant %5 4
%89 = OpConstant %5 1
%94 = OpConstant %5 3
%98 = OpConstant %5 6
%100 = OpConstant %5 7
%102 = OpConstant %5 3735928559
%103 = OpConstantComposite %33 %102 %59
%104 = OpTypePointer StorageBuffer %33
%106 = OpConstant %5 72
%108 = OpConstant %5 8
%110 = OpTypeRuntimeArray %54
%111 = OpTypeStruct %5 %5 %110
%112 = OpTypePointer StorageBuffer %111
%113 = OpVariable %112 StorageBuffer
%114 = OpTypeFunction %5 %5 %5 %5
%125 = OpConstant %5 9
%127 = OpTypePointer StorageBuffer %54
%135 = OpConstant %5 31
%137 = OpConstant %5 10
%159 = OpTypePointer Uniform %47
%164 = OpConstant %9 0
%178 = OpTypePointer UniformConstant %10
%204 = OpTypePointer UniformConstant %14
%210 = OpConstant %5 16
%230 = OpTypePointer StorageBuffer %19
%255 = OpTypePointer StorageBuffer %24
%268 = OpConstant %5 11
%283 = OpTypePointer StorageBuffer %29
%292 = OpConstant %5 256
%293 = OpConstant %5 12
%298 = OpConstant %5 30
%306 = OpConstant %5 13
%310 = OpConstant %5 40
%311 = OpTypePointer StorageBuffer %39
%317 = OpConstant %5 14
%322 = OpConstant %5 50
%332 = OpConstant %5 15
%335 = OpConstant %5 80
%336 = OpTypePointer UniformConstant %43
%346 = OpConstant %5 90
%354 = OpConstant %5 17
%362 = OpConstant %5 18
%367 = OpConstant %5 19
%369 = OpTypeFunction %5 %33 %5 %5
%377 = OpTypeVector %61 2
%378 = OpConstantNull %33
%381 = OpTypePointer PhysicalStorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %388
%388 = OpLabel
%58 = OpAccessChain %57 %56 %59
%60 = OpLoad %5 %58
%62 = OpUGreaterThan %61 %60 %63
OpSelectionMerge %390 None
OpBranchConditional %62 %389 %390
%389 = OpLabel
%67 = OpAccessChain %66 %8 %68
%69 = OpLoad %5 %67
%70 = OpFunctionCall %5 %118 %69 %87 %89
%65 = OpAccessChain %64 %53 %70
%160 = OpAccessChain %159 %65 %59 %59
%161 = OpLoad %47 %160
%162 = OpCompositeExtract %9 %161 0
OpBranch %390
%390 = OpLabel
%163 = OpPhi %9 %164 %388 %162 %389
%165 = OpUGreaterThan %61 %60 %94
OpSelectionMerge %392 None
OpBranchConditional %165 %391 %392
%391 = OpLabel
%166 = OpIAdd %5 %60 %89
%168 = OpAccessChain %66 %8 %68
%169 = OpLoad %5 %168
%170 = OpIAdd %5 %169 %166
%171 = OpFunctionCall %5 %118 %170 %87 %63
%167 = OpAccessChain %64 %53 %171
%172 = OpAccessChain %159 %167 %59 %59
%173 = OpLoad %47 %172
%174 = OpCompositeExtract %9 %173 0
%175 = OpFAdd %9 %174 %163
OpBranch %392
%392 = OpLabel
%176 = OpPhi %9 %163 %390 %175 %391
%177 = OpUGreaterThan %61 %60 %87
OpSelectionMerge %394 None
OpBranchConditional %177 %393 %394
%393 = OpLabel
%180 = OpAccessChain %66 %8 %59
%181 = OpLoad %5 %180
%182 = OpFunctionCall %5 %118 %181 %89 %94
%179 = OpAccessChain %178 %13 %182
%183 = OpLoad %10 %179
%185 = OpCompositeConstruct %33 %60 %59
%184 = OpImageFetch %47 %183 %185 Lod %59
%186 = OpCompositeExtract %9 %184 0
%187 = OpFAdd %9 %186 %176
OpBranch %394
%394 = OpLabel
%188 = OpPhi %9 %176 %392 %187 %393
%189 = OpUGreaterThan %61 %60 %68
OpSelectionMerge %396 None
OpBranchConditional %189 %395 %396
%395 = OpLabel
%190 = OpBitwiseAnd %5 %60 %89
%191 = OpIAdd %5 %190 %89
%193 = OpAccessChain %66 %8 %59
%194 = OpLoad %5 %193
%195 = OpIAdd %5 %194 %191
%196 = OpFunctionCall %5 %118 %195 %89 %87
%192 = OpAccessChain %178 %13 %196
%197 = OpLoad %10 %192
%199 = OpCompositeConstruct %33 %59 %60
%198 = OpImageFetch %47 %197 %199 Lod %59
%200 = OpCompositeExtract %9 %198 0
%201 = OpFAdd %9 %200 %188
OpBranch %396
%396 = OpLabel
%202 = OpPhi %9 %188 %394 %201 %395
%203 = OpUGreaterThan %61 %60 %98
OpSelectionMerge %398 None
OpBranchConditional %203 %397 %398
%397 = OpLabel
%206 = OpAccessChain %66 %8 %89
%207 = OpLoad %5 %206
%208 = OpIAdd %5 %207 %94
%209 = OpFunctionCall %5 %118 %208 %210 %68
%205 = OpAccessChain %204 %17 %209
%211 = OpLoad %14 %205
%212 = OpImageFetch %47 %211 %60
%213 = OpCompositeExtract %9 %212 0
%214 = OpFAdd %9 %213 %202
OpBranch %398
%398 = OpLabel
%215 = OpPhi %9 %202 %396 %214 %397
%216 = OpUGreaterThan %61 %60 %100
OpSelectionMerge %400 None
OpBranchConditional %216 %399 %400
%399 = OpLabel
%217 = OpBitwiseAnd %5 %60 %89
%218 = OpIAdd %5 %217 %87
%220 = OpAccessChain %66 %8 %89
%221 = OpLoad %5 %220
%222 = OpIAdd %5 %221 %218
%223 = OpFunctionCall %5 %118 %222 %210 %98
%219 = OpAccessChain %204 %17 %223
%224 = OpLoad %14 %219
%225 = OpImageFetch %47 %224 %60
%226 = OpCompositeExtract %9 %225 0
%227 = OpFAdd %9 %226 %215
OpBranch %400
%400 = OpLabel
%228 = OpPhi %9 %215 %398 %227 %399
%229 = OpUGreaterThan %61 %60 %108
OpSelectionMerge %402 None
OpBranchConditional %229 %401 %402
%401 = OpLabel
%232 = OpAccessChain %66 %8 %89
%233 = OpLoad %5 %232
%234 = OpIAdd %5 %233 %98
%235 = OpFunctionCall %5 %118 %234 %108 %100
%231 = OpAccessChain %230 %22 %235
%236 = OpAccessChain %85 %231 %59 %60
%237 = OpLoad %5 %236
%238 = OpBitcast %9 %237
%239 = OpFAdd %9 %238 %228
OpBranch %402
%402 = OpLabel
%240 = OpPhi %9 %228 %400 %239 %401
%241 = OpUGreaterThan %61 %60 %125
OpSelectionMerge %404 None
OpBranchConditional %241 %403 %404
%403 = OpLabel
%242 = OpBitwiseAnd %5 %60 %89
%243 = OpIAdd %5 %242 %100
%245 = OpAccessChain %66 %8 %89
%246 = OpLoad %5 %245
%247 = OpIAdd %5 %246 %243
%248 = OpFunctionCall %5 %118 %247 %108 %108
%244 = OpAccessChain %230 %22 %248
%249 = OpAccessChain %85 %244 %59 %60
%250 = OpLoad %5 %249
%251 = OpBitcast %9 %250
%252 = OpFAdd %9 %251 %240
OpBranch %404
%404 = OpLabel
%253 = OpPhi %9 %240 %402 %252 %403
%254 = OpUGreaterThan %61 %60 %137
OpSelectionMerge %406 None
OpBranchConditional %254 %405 %406
%405 = OpLabel
%257 = OpAccessChain %66 %8 %89
%258 = OpLoad %5 %257
%259 = OpIAdd %5 %258 %125
%260 = OpFunctionCall %5 %118 %259 %108 %125
%256 = OpAccessChain %255 %27 %260
%261 = OpShiftLeftLogical %5 %60 %63
%262 = OpAccessChain %85 %256 %59 %60
%263 = OpLoad %5 %262
%264 = OpBitcast %9 %263
%265 = OpFAdd %9 %264 %253
OpBranch %406
%406 = OpLabel
%266 = OpPhi %9 %253 %404 %265 %405
%267 = OpUGreaterThan %61 %60 %268
OpSelectionMerge %408 None
OpBranchConditional %267 %407 %408
%407 = OpLabel
%269 = OpBitwiseAnd %5 %60 %89
%270 = OpIAdd %5 %269 %137
%272 = OpAccessChain %66 %8 %89
%273 = OpLoad %5 %272
%274 = OpIAdd %5 %273 %270
%275 = OpFunctionCall %5 %118 %274 %108 %137
%271 = OpAccessChain %255 %27 %275
%276 = OpShiftRightLogical %5 %60 %63
%277 = OpAccessChain %85 %271 %59 %276
%278 = OpLoad %5 %277
%279 = OpBitcast %9 %278
%280 = OpFAdd %9 %279 %266
OpBranch %408
%408 = OpLabel
%281 = OpPhi %9 %266 %406 %280 %407
%282 = OpUGreaterThan %61 %60 %89
OpSelectionMerge %410 None
OpBranchConditional %282 %409 %410
%409 = OpLabel
%285 = OpAccessChain %66 %8 %87
%286 = OpLoad %5 %285
%287 = OpFunctionCall %5 %118 %286 %108 %268
%284 = OpAccessChain %283 %32 %287
%289 = OpAccessChain %66 %8 %87
%290 = OpLoad %5 %289
%291 = OpFunctionCall %5 %118 %290 %292 %293
%288 = OpAccessChain %104 %37 %59 %291
%294 = OpLoad %33 %288
%295 = OpBitcast %5 %281
%296 = OpAccessChain %85 %284 %59 %60
OpStore %296 %295
OpBranch %410
%410 = OpLabel
%297 = OpUGreaterThan %61 %60 %298
OpSelectionMerge %412 None
OpBranchConditional %297 %411 %412
%411 = OpLabel
%299 = OpBitwiseAnd %5 %60 %89
%300 = OpIAdd %5 %299 %89
%302 = OpAccessChain %66 %8 %87
%303 = OpLoad %5 %302
%304 = OpIAdd %5 %303 %300
%305 = OpFunctionCall %5 %118 %304 %108 %306
%301 = OpAccessChain %283 %32 %305
%307 = OpBitcast %5 %281
%308 = OpAccessChain %85 %301 %59 %60
OpStore %308 %307
OpBranch %412
%412 = OpLabel
%309 = OpUGreaterThan %61 %60 %310
OpSelectionMerge %414 None
OpBranchConditional %309 %413 %414
%413 = OpLabel
%313 = OpAccessChain %66 %8 %87
%314 = OpLoad %5 %313
%315 = OpIAdd %5 %314 %94
%316 = OpFunctionCall %5 %118 %315 %108 %317
%312 = OpAccessChain %311 %42 %316
%318 = OpBitcast %5 %281
%319 = OpShiftLeftLogical %5 %60 %63
%320 = OpAccessChain %85 %312 %59 %60
OpStore %320 %318
OpBranch %414
%414 = OpLabel
%321 = OpUGreaterThan %61 %60 %322
OpSelectionMerge %416 None
OpBranchConditional %321 %415 %416
%415 = OpLabel
%323 = OpBitwiseAnd %5 %60 %89
%324 = OpBitcast %5 %281
%325 = OpShiftLeftLogical %5 %60 %63
%326 = OpIAdd %5 %323 %87
%328 = OpAccessChain %66 %8 %87
%329 = OpLoad %5 %328
%330 = OpIAdd %5 %329 %326
%331 = OpFunctionCall %5 %118 %330 %108 %332
%327 = OpAccessChain %311 %42 %331
%333 = OpAccessChain %85 %327 %59 %60
OpStore %333 %324
OpBranch %416
%416 = OpLabel
%334 = OpUGreaterThan %61 %60 %335
OpSelectionMerge %418 None
OpBranchConditional %334 %417 %418
%417 = OpLabel
%338 = OpAccessChain %66 %8 %94
%339 = OpLoad %5 %338
%340 = OpIAdd %5 %339 %98
%341 = OpFunctionCall %5 %118 %340 %63 %210
%337 = OpAccessChain %336 %46 %341
%342 = OpLoad %43 %337
%343 = OpCompositeConstruct %33 %60 %59
%344 = OpCompositeConstruct %47 %281 %281 %281 %281
OpImageWrite %342 %343 %344
OpBranch %418
%418 = OpLabel
%345 = OpUGreaterThan %61 %60 %346
OpSelectionMerge %420 None
OpBranchConditional %345 %419 %420
%419 = OpLabel
%347 = OpBitwiseAnd %5 %60 %89
%348 = OpIAdd %5 %347 %100
%350 = OpAccessChain %66 %8 %94
%351 = OpLoad %5 %350
%352 = OpIAdd %5 %351 %348
%353 = OpFunctionCall %5 %118 %352 %63 %354
%349 = OpAccessChain %336 %46 %353
%355 = OpLoad %43 %349
%356 = OpCompositeConstruct %33 %59 %60
%357 = OpCompositeConstruct %47 %281 %281 %281 %281
OpImageWrite %355 %356 %357
OpBranch %420
%420 = OpLabel
%359 = OpAccessChain %66 %8 %87
%360 = OpLoad %5 %359
%361 = OpFunctionCall %5 %118 %360 %108 %362
%358 = OpAccessChain %283 %32 %361
%364 = OpAccessChain %66 %8 %87
%365 = OpLoad %5 %364
%366 = OpFunctionCall %5 %118 %365 %292 %367
%363 = OpAccessChain %104 %37 %59 %366
%368 = OpLoad %33 %363
%387 = OpFunctionCall %5 %373 %368 %89 %59
OpReturn
OpFunctionEnd
%83 = OpFunction %1 None %75
%76 = OpFunctionParameter %5
%77 = OpFunctionParameter %5
%78 = OpFunctionParameter %5
%79 = OpFunctionParameter %5
%80 = OpFunctionParameter %5
%81 = OpFunctionParameter %5
%82 = OpFunctionParameter %5
%84 = OpLabel
%86 = OpAccessChain %85 %74 %87
%88 = OpAtomicIAdd %5 %86 %89 %59 %89
%90 = OpIEqual %61 %88 %59
OpSelectionMerge %92 None
OpBranchConditional %90 %91 %92
%91 = OpLabel
%93 = OpAccessChain %85 %74 %94
OpStore %93 %78
%95 = OpAccessChain %85 %74 %89
OpStore %95 %77
%96 = OpAccessChain %85 %74 %63
OpStore %96 %79
%97 = OpAccessChain %85 %74 %98
OpStore %97 %80
%99 = OpAccessChain %85 %74 %100
OpStore %99 %81
%101 = OpAccessChain %85 %74 %68
OpStore %101 %82
%105 = OpAccessChain %104 %74 %59
OpStore %105 %103
OpMemoryBarrier %89 %106
%107 = OpAccessChain %85 %74 %108
OpStore %107 %76
OpBranch %92
%92 = OpLabel
OpReturn
OpFunctionEnd
%118 = OpFunction %5 None %114
%115 = OpFunctionParameter %5
%116 = OpFunctionParameter %5
%117 = OpFunctionParameter %5
%119 = OpLabel
%120 = OpAccessChain %85 %113 %59
%121 = OpLoad %5 %120
%122 = OpAccessChain %85 %113 %89
%123 = OpLoad %5 %122
%124 = OpAccessChain %85 %74 %125
%126 = OpLoad %5 %124
%128 = OpAccessChain %127 %113 %63 %115
%129 = OpLoad %54 %128
%130 = OpCompositeExtract %5 %129 0
%131 = OpCompositeExtract %5 %129 1
%132 = OpCompositeExtract %5 %129 2
%133 = OpShiftRightLogical %5 %130 %68
%134 = OpBitwiseAnd %5 %130 %135
%136 = OpAccessChain %85 %74 %137 %133
%138 = OpLoad %5 %136
%139 = OpShiftLeftLogical %5 %89 %134
%140 = OpBitwiseAnd %5 %138 %139
%141 = OpINotEqual %61 %140 %59
%142 = OpBitwiseAnd %5 %132 %116
%143 = OpIEqual %61 %142 %116
%144 = OpUGreaterThanEqual %61 %115 %121
%145 = OpSelect %5 %144 %89 %59
%146 = OpSelect %5 %143 %59 %63
%147 = OpSelect %5 %141 %59 %87
%148 = OpUGreaterThanEqual %61 %126 %131
%149 = OpSelect %5 %148 %59 %108
%150 = OpBitwiseOr %5 %145 %146
%151 = OpBitwiseOr %5 %150 %147
%152 = OpBitwiseOr %5 %151 %149
%153 = OpINotEqual %61 %152 %59
OpSelectionMerge %155 None
OpBranchConditional %153 %154 %155
%154 = OpLabel
%156 = OpFunctionCall %1 %83 %152 %115 %130 %123 %116 %132 %117
OpReturnValue %121
%155 = OpLabel
OpReturnValue %115
OpFunctionEnd
%373 = OpFunction %5 None %369
%370 = OpFunctionParameter %33
%371 = OpFunctionParameter %5
%372 = OpFunctionParameter %5
%374 = OpLabel
%379 = OpINotEqual %377 %370 %378
%380 = OpAny %61 %379
OpSelectionMerge %376 None
OpBranchConditional %380 %375 %376
%375 = OpLabel
%382 = OpBitcast %381 %370
%383 = OpAtomicIAdd %5 %382 %89 %59 %371
%384 = OpIAdd %5 %383 %372
OpBranch %376
%376 = OpLabel
%385 = OpPhi %5 %59 %374 %384 %375
OpReturnValue %385
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 2, binding = 0) uniform sampler _17[];

layout(location = 0) in vec2 UV;
layout(location = 0) out vec4 SV_Target;
bool discard_state;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _61 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_61 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _96 = QAHeapData.descriptor_count;
    uint _98 = QAHeapData.heap_index;
    uint _101 = QAGlobalData.va_map_timestamp;
    uvec3 _104 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _113 = QAGlobalData.live_status_table[_104.x >> 5u];
    uint _127 = ((uint(heap_offset >= _96) | (((_104.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_113 & (1u << (_104.x & 31u))) != 0u) ? 0u : 4u)) | ((_101 >= _104.y) ? 0u : 8u);
    if (_127 != 0u)
    {
        descriptor_qa_report_fault(_127, heap_offset, _104.x, _98, descriptor_type_mask, _104.z, instruction);
        return _96;
    }
    return heap_offset;
}

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (UV.x < 0.0)
    {
        discard_state = true;
    }
    uint _42 = descriptor_qa_check(registers._m0, 1u, 1u);
    vec4 _142 = texture(sampler2D(_13[_42], _17[registers._m2]), vec2(UV.x, UV.y));
    SV_Target.x = _142.x;
    SV_Target.y = _142.y;
    SV_Target.z = _142.z;
    SV_Target.w = _142.w;
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 165
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %20 "UV"
OpName %23 "SV_Target"
OpName %35 "discard_state"
OpName %45 "DescriptorHeapGlobalQAData"
OpMemberName %45 0 "failed_shader_hash"
OpMemberName %45 1 "failed_offset"
OpMemberName %45 2 "failed_heap"
OpMemberName %45 3 "failed_cookie"
OpMemberName %45 4 "fault_atomic"
OpMemberName %45 5 "failed_instruction"
OpMemberName %45 6 "failed_descriptor_type_mask"
OpMemberName %45 7 "actual_descriptor_type_mask"
OpMemberName %45 8 "fault_type"
OpMemberName %45 9 "va_map_timestamp"
OpMemberName %45 10 "live_status_table"
OpName %47 "QAGlobalData"
OpName %56 "descriptor_qa_report_fault"
OpName %49 "fault_type"
OpName %50 "heap_offset"
OpName %51 "cookie"
OpName %52 "heap_index"
OpName %53 "descriptor_type"
OpName %54 "actual_descriptor_type"
OpName %55 "instruction"
OpName %86 "DescriptorHeapQAData"
OpMemberName %86 0 "descriptor_count"
OpMemberName %86 1 "heap_index"
OpMemberName %86 2 "cookies_descriptor_info"
OpName %88 "QAHeapData"
OpName %93 "descriptor_qa_check"
OpName %90 "heap_offset"
OpName %91 "descriptor_type_mask"
OpName %92 "instruction"
OpName %157 "discard_exit"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
OpDecorate %23 Location 0
OpDecorate %44 ArrayStride 4
OpMemberDecorate %45 0 Offset 0
OpMemberDecorate %45 1 Offset 8
OpMemberDecorate %45 2 Offset 12
OpMemberDecorate %45 3 Offset 16
OpMemberDecorate %45 4 Offset 20
OpMemberDecorate %45 5 Offset 24
OpMemberDecorate %45 6 Offset 28
OpMemberDecorate %45 7 Offset 32
OpMemberDecorate %45 8 Offset 36
OpMemberDecorate %45 9 Offset 40
OpMemberDecorate %45 10 Offset 44
OpDecorate %45 Block
OpDecorate %47 DescriptorSet 10
OpDecorate %47 Binding 10
OpDecorate %85 ArrayStride 12
OpMemberDecorate %86 0 Offset 0
OpMemberDecorate %86 1 Offset 4
OpMemberDecorate %86 2 Offset 8
OpDecorate %86 Block
OpDecorate %88 DescriptorSet 10
OpDecorate %88 Binding 11
OpDecorate %88 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %9 2
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypeVector %9 4
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpTypePointer Input %9
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%31 = OpTypeBool
%33 = OpConstant %9 0
%34 = OpTypePointer Private %31
%35 = OpVariable %34 Private
%36 = OpConstantFalse %31
%37 = OpTypePointer UniformConstant %10
%39 = OpTypePointer PushConstant %5
%43 = OpTypeVector %5 2
%44 = OpTypeRuntimeArray %5
%45 = OpTypeStruct %43 %5 %5 %5 %5 %5 %5 %5 %5 %5 %44
%46 = OpTypePointer StorageBuffer %45
%47 = OpVariable %46 StorageBuffer
%48 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%58 = OpTypePointer StorageBuffer %5
%60 = OpConstant %5 4
%66 = OpConstant %5 3
%69 = OpConstant %5 2
%71 = OpConstant %5 6
%73 = OpConstant %5 7
%75 = OpConstant %5 5
%76 = OpConstant %5 3735928559
%77 = OpConstantComposite %43 %76 %26
%78 = OpTypePointer StorageBuffer %43
%80 = OpConstant %5 72
%82 = OpConstant %5 8
%84 = OpTypeVector %5 3
%85 = OpTypeRuntimeArray %84
%86 = OpTypeStruct %5 %5 %85
%87 = OpTypePointer StorageBuffer %86
%88 = OpVariable %87 StorageBuffer
%89 = OpTypeFunction %5 %5 %5 %5
%100 = OpConstant %5 9
%102 = OpTypePointer StorageBuffer %84
%110 = OpConstant %5 31
%112 = OpConstant %5 10
%135 = OpTypePointer UniformConstant %14
%140 = OpTypeSampledImage %10
%148 = OpTypePointer Output %9
%156 = OpConstantTrue %31
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %35 %36
OpBranch %153
%153 = OpLabel
%25 = OpAccessChain %24 %20 %26
%27 = OpLoad %9 %25
%28 = OpAccessChain %24 %20 %29
%30 = OpLoad %9 %28
%32 = OpFOrdLessThan %31 %27 %33
OpSelectionMerge %155 None
OpBranchConditional %32 %154 %155
%154 = OpLabel
OpStore %35 %156
OpBranch %155
%155 = OpLabel
%40 = OpAccessChain %39 %8 %26
%41 = OpLoad %5 %40
%42 = OpFunctionCall %5 %93 %41 %29 %29
%38 = OpAccessChain %37 %13 %42
%134 = OpLoad %10 %38
%137 = OpAccessChain %39 %8 %69
%138 = OpLoad %5 %137
%136 = OpAccessChain %135 %17 %138
%139 = OpLoad %14 %136
%141 = OpSampledImage %140 %134 %139
%143 = OpCompositeConstruct %18 %27 %30
%142 = OpImageSampleImplicitLod %21 %141 %143 None
%144 = OpCompositeExtract %9 %142 0
%145 = OpCompositeExtract %9 %142 1
%146 = OpCompositeExtract %9 %142 2
%147 = OpCompositeExtract %9 %142 3
%149 = OpAccessChain %148 %23 %26
OpStore %149 %144
%150 = OpAccessChain %148 %23 %29
OpStore %150 %145
%151 = OpAccessChain %148 %23 %69
OpStore %151 %146
%152 = OpAccessChain %148 %23 %66
OpStore %152 %147
%163 = OpFunctionCall %1 %157
OpReturn
OpFunctionEnd
%56 = OpFunction %1 None %48
%49 = OpFunctionParameter %5
%50 = OpFunctionParameter %5
%51 = OpFunctionParameter %5
%52 = OpFunctionParameter %5
%53 = OpFunctionParameter %5
%54 = OpFunctionParameter %5
%55 = OpFunctionParameter %5
%57 = OpLabel
%59 = OpAccessChain %58 %47 %60
%61 = OpAtomicIAdd %5 %59 %29 %26 %29
%62 = OpIEqual %31 %61 %26
OpSelectionMerge %64 None
OpBranchConditional %62 %63 %64
%63 = OpLabel
%65 = OpAccessChain %58 %47 %66
OpStore %65 %51
%67 = OpAccessChain %58 %47 %29
OpStore %67 %50
%68 = OpAccessChain %58 %47 %69
OpStore %68 %52
%70 = OpAccessChain %58 %47 %71
OpStore %70 %53
%72 = OpAccessChain %58 %47 %73
OpStore %72 %54
%74 = OpAccessChain %58 %47 %75
OpStore %74 %55
%79 = OpAccessChain %78 %47 %26
OpStore %79 %77
OpMemoryBarrier %29 %80
%81 = OpAccessChain %58 %47 %82
OpStore %81 %49
OpBranch %64
%64 = OpLabel
OpReturn
OpFunctionEnd
%93 = OpFunction %5 None %89
%90 = OpFunctionParameter %5
%91 = OpFunctionParameter %5
%92 = OpFunctionParameter %5
%94 = OpLabel
%95 = OpAccessChain %58 %88 %26
%96 = OpLoad %5 %95
%97 = OpAccessChain %58 %88 %29
%98 = OpLoad %5 %97
%99 = OpAccessChain %58 %47 %100
%101 = OpLoad %5 %99
%103 = OpAccessChain %102 %88 %69 %90
%104 = OpLoad %84 %103
%105 = OpCompositeExtract %5 %104 0
%106 = OpCompositeExtract %5 %104 1
%107 = OpCompositeExtract %5 %104 2
%108 = OpShiftRightLogical %5 %105 %75
%109 = OpBitwiseAnd %5 %105 %110
%111 = OpAccessChain %58 %47 %112 %108
%113 = OpLoad %5 %111
%114 = OpShiftLeftLogical %5 %29 %109
%115 = OpBitwiseAnd %5 %113 %114
%116 = OpINotEqual %31 %115 %26
%117 = OpBitwiseAnd %5 %107 %91
%118 = OpIEqual %31 %117 %91
%119 = OpUGreaterThanEqual %31 %90 %96
%120 = OpSelect %5 %119 %29 %26
%121 = OpSelect %5 %118 %26 %69
%122 = OpSelect %5 %116 %26 %60
%123 = OpUGreaterThanEqual %31 %101 %106
%124 = OpSelect %5 %123 %26 %82
%125 = OpBitwiseOr %5 %120 %121
%126 = OpBitwiseOr %5 %125 %122
%127 = OpBitwiseOr %5 %126 %124
%128 = OpINotEqual %31 %127 %26
OpSelectionMerge %130 None
OpBranchConditional %128 %129 %130
%129 = OpLabel
%131 = OpFunctionCall %1 %56 %127 %90 %105 %98 %91 %107 %92
OpReturnValue %96
%130 = OpLabel
OpReturnValue %90
OpFunctionEnd
%157 = OpFunction %1 None %2
%158 = OpLabel
%161 = OpLoad %31 %35
OpSelectionMerge %160 None
OpBranchConditional %161 %159 %160
%159 = OpLabel
OpKill
%160 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/early-3.bindless.descriptor-qa.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 2, binding = 0) uniform sampler _17[];

layout(location = 0) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _51 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_51 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _88 = QAHeapData.descriptor_count;
    uint _90 = QAHeapData.heap_index;
    uint _93 = QAGlobalData.va_map_timestamp;
    uvec3 _96 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _105 = QAGlobalData.live_status_table[_96.x >> 5u];
    uint _119 = ((uint(heap_offset >= _88) | (((_96.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_105 & (1u << (_96.x & 31u))) != 0u) ? 0u : 4u)) | ((_93 >= _96.y) ? 0u : 8u);
    if (_119 != 0u)
    {
        descriptor_qa_report_fault(_119, heap_offset, _96.x, _90, descriptor_type_mask, _96.z, instruction);
        return _88;
    }
    return heap_offset;
}

void main()
{
    uint _32 = descriptor_qa_check(registers._m0, 1u, 1u);
    vec4 _140 = texture(sampler2D(_13[_32], _17[registers._m2]), vec2(UV.x, UV.y));
    SV_Target.x = _140.x;
    SV_Target.y = _140.y;
    SV_Target.z = _140.z;
    SV_Target.w = _140.w;
    gl_FragDepth = 0.5;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 153
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %23 %25
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DepthReplacing
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %20 "UV"
OpName %23 "SV_Target"
OpName %25 "SV_Depth"
OpName %35 "DescriptorHeapGlobalQAData"
OpMemberName %35 0 "failed_shader_hash"
OpMemberName %35 1 "failed_offset"
OpMemberName %35 2 "failed_heap"
OpMemberName %35 3 "failed_cookie"
OpMemberName %35 4 "fault_atomic"
OpMemberName %35 5 "failed_instruction"
OpMemberName %35 6 "failed_descriptor_type_mask"
OpMemberName %35 7 "actual_descriptor_type_mask"
OpMemberName %35 8 "fault_type"
OpMemberName %35 9 "va_map_timestamp"
OpMemberName %35 10 "live_status_table"
OpName %37 "QAGlobalData"
OpName %46 "descriptor_qa_report_fault"
OpName %39 "fault_type"
OpName %40 "heap_offset"
OpName %41 "cookie"
OpName %42 "heap_index"
OpName %43 "descriptor_type"
OpName %44 "actual_descriptor_type"
OpName %45 "instruction"
OpName %78 "DescriptorHeapQAData"
OpMemberName %78 0 "descriptor_count"
OpMemberName %78 1 "heap_index"
OpMemberName %78 2 "cookies_descriptor_info"
OpName %80 "QAHeapData"
OpName %85 "descriptor_qa_check"
OpName %82 "heap_offset"
OpName %83 "descriptor_type_mask"
OpName %84 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
OpDecorate %23 Location 0
OpDecorate %25 BuiltIn FragDepth
OpDecorate %34 ArrayStride 4
OpMemberDecorate %35 0 Offset 0
OpMemberDecorate %35 1 Offset 8
OpMemberDecorate %35 2 Offset 12
OpMemberDecorate %35 3 Offset 16
OpMemberDecorate %35 4 Offset 20
OpMemberDecorate %35 5 Offset 24
OpMemberDecorate %35 6 Offset 28
OpMemberDecorate %35 7 Offset 32
OpMemberDecorate %35 8 Offset 36
OpMemberDecorate %35 9 Offset 40
OpMemberDecorate %35 10 Offset 44
OpDecorate %35 Block
OpDecorate %37 DescriptorSet 10
OpDecorate %37 Binding 10
OpDecorate %77 ArrayStride 12
OpMemberDecorate %78 0 Offset 0
OpMemberDecorate %78 1 Offset 4
OpMemberDecorate %78 2 Offset 8
OpDecorate %78 Block
OpDecorate %80 DescriptorSet 10
OpDecorate %80 Binding 11
OpDecorate %80 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %9 2
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypeVector %9 4
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpTypePointer Output %9
%25 = OpVariable %24 Output
%26 = OpTypePointer UniformConstant %10
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 0
%33 = OpTypeVector %5 2
%34 = OpTypeRuntimeArray %5
%35 = OpTypeStruct %33 %5 %5 %5 %5 %5 %5 %5 %5 %5 %34
%36 = OpTypePointer StorageBuffer %35
%37 = OpVariable %36 StorageBuffer
%38 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%48 = OpTypePointer StorageBuffer %5
%50 = OpConstant %5 4
%52 = OpConstant %5 1
%53 = OpTypeBool
%58 = OpConstant %5 3
%61 = OpConstant %5 2
%63 = OpConstant %5 6
%65 = OpConstant %5 7
%67 = OpConstant %5 5
%68 = OpConstant %5 3735928559
%69 = OpConstantComposite %33 %68 %30
%70 = OpTypePointer StorageBuffer %33
%72 = OpConstant %5 72
%74 = OpConstant %5 8
%76 = OpTypeVector %5 3
%77 = OpTypeRuntimeArray %76
%78 = OpTypeStruct %5 %5 %77
%79 = OpTypePointer StorageBuffer %78
%80 = OpVariable %79 StorageBuffer
%81 = OpTypeFunction %5 %5 %5 %5
%92 = OpConstant %5 9
%94 = OpTypePointer StorageBuffer %76
%102 = OpConstant %5 31
%104 = OpConstant %5 10
%127 = OpTypePointer UniformConstant %14
%132 = OpTypePointer Input %9
%137 = OpTypeSampledImage %10
%139 = OpConstant %9 0
%150 = OpConstant %9 0.5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %151
%151 = OpLabel
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%32 = OpFunctionCall %5 %85 %31 %52 %52
%27 = OpAccessChain %26 %13 %32
%126 = OpLoad %10 %27
%129 = OpAccessChain %28 %8 %61
%130 = OpLoad %5 %129
%128 = OpAccessChain %127 %17 %130
%131 = OpLoad %14 %128
%133 = OpAccessChain %132 %20 %30
%134 = OpLoad %9 %133
%135 = OpAccessChain %132 %20 %52
%136 = OpLoad %9 %135
%138 = OpSampledImage %137 %126 %131
%141 = OpCompositeConstruct %18 %134 %136
%140 = OpImageSampleImplicitLod %21 %138 %141 None
%142 = OpCompositeExtract %9 %140 0
%143 = OpCompositeExtract %9 %140 1
%144 = OpCompositeExtract %9 %140 2
%145 = OpCompositeExtract %9 %140 3
%146 = OpAccessChain %24 %23 %30
OpStore %146 %142
%147 = OpAccessChain %24 %23 %52
OpStore %147 %143
%148 = OpAccessChain %24 %23 %61
OpStore %148 %144
%149 = OpAccessChain %24 %23 %58
OpStore %149 %145
OpStore %25 %150
OpReturn
OpFunctionEnd
%46 = OpFunction %1 None %38
%39 = OpFunctionParameter %5
%40 = OpFunctionParameter %5
%41 = OpFunctionParameter %5
%42 = OpFunctionParameter %5
%43 = OpFunctionParameter %5
%44 = OpFunctionParameter %5
%45 = OpFunctionParameter %5
%47 = OpLabel
%49 = OpAccessChain %48 %37 %50
%51 = OpAtomicIAdd %5 %49 %52 %30 %52
%54 = OpIEqual %53 %51 %30
OpSelectionMerge %56 None
OpBranchConditional %54 %55 %56
%55 = OpLabel
%57 = OpAccessChain %48 %37 %58
OpStore %57 %41
%59 = OpAccessChain %48 %37 %52
OpStore %59 %40
%60 = OpAccessChain %48 %37 %61
OpStore %60 %42
%62 = OpAccessChain %48 %37 %63
OpStore %62 %43
%64 = OpAccessChain %48 %37 %65
OpStore %64 %44
%66 = OpAccessChain %48 %37 %67
OpStore %66 %45
%71 = OpAccessChain %70 %37 %30
OpStore %71 %69
OpMemoryBarrier %52 %72
%73 = OpAccessChain %48 %37 %74
OpStore %73 %39
OpBranch %56
%56 = OpLabel
OpReturn
OpFunctionEnd
%85 = OpFunction %5 None %81
%82 = OpFunctionParameter %5
%83 = OpFunctionParameter %5
%84 = OpFunctionParameter %5
%86 = OpLabel
%87 = OpAccessChain %48 %80 %30
%88 = OpLoad %5 %87
%89 = OpAccessChain %48 %80 %52
%90 = OpLoad %5 %89
%91 = OpAccessChain %48 %37 %92
%93 = OpLoad %5 %91
%95 = OpAccessChain %94 %80 %61 %82
%96 = OpLoad %76 %95
%97 = OpCompositeExtract %5 %96 0
%98 = OpCompositeExtract %5 %96 1
%99 = OpCompositeExtract %5 %96 2
%100 = OpShiftRightLogical %5 %97 %67
%101 = OpBitwiseAnd %5 %97 %102
%103 = OpAccessChain %48 %37 %104 %100
%105 = OpLoad %5 %103
%106 = OpShiftLeftLogical %5 %52 %101
%107 = OpBitwiseAnd %5 %105 %106
%108 = OpINotEqual %53 %107 %30
%109 = OpBitwiseAnd %5 %99 %83
%110 = OpIEqual %53 %109 %83
%111 = OpUGreaterThanEqual %53 %82 %88
%112 = OpSelect %5 %111 %52 %30
%113 = OpSelect %5 %110 %30 %61
%114 = OpSelect %5 %108 %30 %50
%115 = OpUGreaterThanEqual %53 %93 %98
%116 = OpSelect %5 %115 %30 %74
%117 = OpBitwiseOr %5 %112 %113
%118 = OpBitwiseOr %5 %117 %114
%119 = OpBitwiseOr %5 %118 %116
%120 = OpINotEqual %53 %119 %30
OpSelectionMerge %122 None
OpBranchConditional %120 %121 %122
%121 = OpLabel
%123 = OpFunctionCall %1 %46 %119 %82 %97 %90 %83 %99 %84
OpReturnValue %88
%122 = OpLabel
OpReturnValue %82
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/early-4.bindless.descriptor-qa.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 2, binding = 0) uniform sampler _17[];

layout(location = 0) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _53 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_53 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _89 = QAHeapData.descriptor_count;
    uint _91 = QAHeapData.heap_index;
    uint _94 = QAGlobalData.va_map_timestamp;
    uvec3 _97 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _106 = QAGlobalData.live_status_table[_97.x >> 5u];
    uint _120 = ((uint(heap_offset >= _89) | (((_97.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_106 & (1u << (_97.x & 31u))) != 0u) ? 0u : 4u)) | ((_94 >= _97.y) ? 0u : 8u);
    if (_120 != 0u)
    {
        descriptor_qa_report_fault(_120, heap_offset, _97.x, _91, descriptor_type_mask, _97.z, instruction);
        return _89;
    }
    return heap_offset;
}

void main()
{
    uint _34 = descriptor_qa_check(registers._m0, 1u, 1u);
    vec4 _141 = texture(sampler2D(_13[_34], _17[registers._m2]), vec2(UV.x, UV.y));
    SV_Target.x = _141.x;
    SV_Target.y = _141.y;
    SV_Target.z = _141.z;
    SV_Target.w = _141.w;
    gl_SampleMask[0u] = int(3u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 156
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %23 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %20 "UV"
OpName %23 "SV_Target"
OpName %27 "SV_Coverage"
OpName %37 "DescriptorHeapGlobalQAData"
OpMemberName %37 0 "failed_shader_hash"
OpMemberName %37 1 "failed_offset"
OpMemberName %37 2 "failed_heap"
OpMemberName %37 3 "failed_cookie"
OpMemberName %37 4 "fault_atomic"
OpMemberName %37 5 "failed_instruction"
OpMemberName %37 6 "failed_descriptor_type_mask"
OpMemberName %37 7 "actual_descriptor_type_mask"
OpMemberName %37 8 "fault_type"
OpMemberName %37 9 "va_map_timestamp"
OpMemberName %37 10 "live_status_table"
OpName %39 "QAGlobalData"
OpName %48 "descriptor_qa_report_fault"
OpName %41 "fault_type"
OpName %42 "heap_offset"
OpName %43 "cookie"
OpName %44 "heap_index"
OpName %45 "descriptor_type"
OpName %46 "actual_descriptor_type"
OpName %47 "instruction"
OpName %79 "DescriptorHeapQAData"
OpMemberName %79 0 "descriptor_count"
OpMemberName %79 1 "heap_index"
OpMemberName %79 2 "cookies_descriptor_info"
OpName %81 "QAHeapData"
OpName %86 "descriptor_qa_check"
OpName %83 "heap_offset"
OpName %84 "descriptor_type_mask"
OpName %85 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
OpDecorate %23 Location 0
OpDecorate %27 BuiltIn SampleMask
OpDecorate %36 ArrayStride 4
OpMemberDecorate %37 0 Offset 0
OpMemberDecorate %37 1 Offset 8
OpMemberDecorate %37 2 Offset 12
OpMemberDecorate %37 3 Offset 16
OpMemberDecorate %37 4 Offset 20
OpMemberDecorate %37 5 Offset 24
OpMemberDecorate %37 6 Offset 28
OpMemberDecorate %37 7 Offset 32
OpMemberDecorate %37 8 Offset 36
OpMemberDecorate %37 9 Offset 40
OpMemberDecorate %37 10 Offset 44
OpDecorate %37 Block
OpDecorate %39 DescriptorSet 10
OpDecorate %39 Binding 10
OpDecorate %78 ArrayStride 12
OpMemberDecorate %79 0 Offset 0
OpMemberDecorate %79 1 Offset 4
OpMemberDecorate %79 2 Offset 8
OpDecorate %79 Block
OpDecorate %81 DescriptorSet 10
OpDecorate %81 Binding 11
OpDecorate %81 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %9 2
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypeVector %9 4
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpConstant %5 1
%25 = OpTypeArray %5 %24
%26 = OpTypePointer Output %25
%27 = OpVariable %26 Output
%28 = OpTypePointer UniformConstant %10
%30 = OpTypePointer PushConstant %5
%32 = OpConstant %5 0
%35 = OpTypeVector %5 2
%36 = OpTypeRuntimeArray %5
%37 = OpTypeStruct %35 %5 %5 %5 %5 %5 %5 %5 %5 %5 %36
%38 = OpTypePointer StorageBuffer %37
%39 = OpVariable %38 StorageBuffer
%40 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%50 = OpTypePointer StorageBuffer %5
%52 = OpConstant %5 4
%54 = OpTypeBool
%59 = OpConstant %5 3
%62 = OpConstant %5 2
%64 = OpConstant %5 6
%66 = OpConstant %5 7
%68 = OpConstant %5 5
%69 = OpConstant %5 3735928559
%70 = OpConstantComposite %35 %69 %32
%71 = OpTypePointer StorageBuffer %35
%73 = OpConstant %5 72
%75 = OpConstant %5 8
%77 = OpTypeVector %5 3
%78 = OpTypeRuntimeArray %77
%79 = OpTypeStruct %5 %5 %78
%80 = OpTypePointer StorageBuffer %79
%81 = OpVariable %80 StorageBuffer
%82 = OpTypeFunction %5 %5 %5 %5
%93 = OpConstant %5 9
%95 = OpTypePointer StorageBuffer %77
%103 = OpConstant %5 31
%105 = OpConstant %5 10
%128 = OpTypePointer UniformConstant %14
%133 = OpTypePointer Input %9
%138 = OpTypeSampledImage %10
%140 = OpConstant %9 0
%147 = OpTypePointer Output %9
%152 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %154
%154 = OpLabel
%31 = OpAccessChain %30 %8 %32
%33 = OpLoad %5 %31
%34 = OpFunctionCall %5 %86 %33 %24 %24
%29 = OpAccessChain %28 %13 %34
%127 = OpLoad %10 %29
%130 = OpAccessChain %30 %8 %62
%131 = OpLoad %5 %130
%129 = OpAccessChain %128 %17 %131
%132 = OpLoad %14 %129
%134 = OpAccessChain %133 %20 %32
%135 = OpLoad %9 %134
%136 = OpAccessChain %133 %20 %24
%137 = OpLoad %9 %136
%139 = OpSampledImage %138 %127 %132
%142 = OpCompositeConstruct %18 %135 %137
%141 = OpImageSampleImplicitLod %21 %139 %142 None
%143 = OpCompositeExtract %9 %141 0
%144 = OpCompositeExtract %9 %141 1
%145 = OpCompositeExtract %9 %141 2
%146 = OpCompositeExtract %9 %141 3
%148 = OpAccessChain %147 %23 %32
OpStore %148 %143
%149 = OpAccessChain %147 %23 %24
OpStore %149 %144
%150 = OpAccessChain %147 %23 %62
OpStore %150 %145
%151 = OpAccessChain %147 %23 %59
OpStore %151 %146
%153 = OpAccessChain %152 %27 %32
OpStore %153 %59
OpReturn
OpFunctionEnd
%48 = OpFunction %1 None %40
%41 = OpFunctionParameter %5
%42 = OpFunctionParameter %5
%43 = OpFunctionParameter %5
%44 = OpFunctionParameter %5
%45 = OpFunctionParameter %5
%46 = OpFunctionParameter %5
%47 = OpFunctionParameter %5
%49 = OpLabel
%51 = OpAccessChain %50 %39 %52
%53 = OpAtomicIAdd %5 %51 %24 %32 %24
%55 = OpIEqual %54 %53 %32
OpSelectionMerge %57 None
OpBranchConditional %55 %56 %57
%56 = OpLabel
%58 = OpAccessChain %50 %39 %59
OpStore %58 %43
%60 = OpAccessChain %50 %39 %24
OpStore %60 %42
%61 = OpAccessChain %50 %39 %62
OpStore %61 %44
%63 = OpAccessChain %50 %39 %64
OpStore %63 %45
%65 = OpAccessChain %50 %39 %66
OpStore %65 %46
%67 = OpAccessChain %50 %39 %68
OpStore %67 %47
%72 = OpAccessChain %71 %39 %32
OpStore %72 %70
OpMemoryBarrier %24 %73
%74 = OpAccessChain %50 %39 %75
OpStore %74 %41
OpBranch %57
%57 = OpLabel
OpReturn
OpFunctionEnd
%86 = OpFunction %5 None %82
%83 = OpFunctionParameter %5
%84 = OpFunctionParameter %5
%85 = OpFunctionParameter %5
%87 = OpLabel
%88 = OpAccessChain %50 %81 %32
%89 = OpLoad %5 %88
%90 = OpAccessChain %50 %81 %24
%91 = OpLoad %5 %90
%92 = OpAccessChain %50 %39 %93
%94 = OpLoad %5 %92
%96 = OpAccessChain %95 %81 %62 %83
%97 = OpLoad %77 %96
%98 = OpCompositeExtract %5 %97 0
%99 = OpCompositeExtract %5 %97 1
%100 = OpCompositeExtract %5 %97 2
%101 = OpShiftRightLogical %5 %98 %68
%102 = OpBitwiseAnd %5 %98 %103
%104 = OpAccessChain %50 %39 %105 %101
%106 = OpLoad %5 %104
%107 = OpShiftLeftLogical %5 %24 %102
%108 = OpBitwiseAnd %5 %106 %107
%109 = OpINotEqual %54 %108 %32
%110 = OpBitwiseAnd %5 %100 %84
%111 = OpIEqual %54 %110 %84
%112 = OpUGreaterThanEqual %54 %83 %89
%113 = OpSelect %5 %112 %24 %32
%114 = OpSelect %5 %111 %32 %62
%115 = OpSelect %5 %109 %32 %52
%116 = OpUGreaterThanEqual %54 %94 %99
%117 = OpSelect %5 %116 %32 %75
%118 = OpBitwiseOr %5 %113 %114
%119 = OpBitwiseOr %5 %118 %115
%120 = OpBitwiseOr %5 %119 %117
%121 = OpINotEqual %54 %120 %32
OpSelectionMerge %123 None
OpBranchConditional %121 %122 %123
%122 = OpLabel
%124 = OpFunctionCall %1 %48 %120 %83 %98 %91 %84 %100 %85
OpReturnValue %89
%123 = OpLabel
OpReturnValue %83
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/early-5.bindless.descriptor-qa.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 3, binding = 0) uniform writeonly image2D _17[];
layout(set = 2, binding = 0) uniform sampler _21[];

layout(location = 0) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _63 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_63 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _97 = QAHeapData.descriptor_count;
    uint _99 = QAHeapData.heap_index;
    uint _102 = QAGlobalData.va_map_timestamp;
    uvec3 _105 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _114 = QAGlobalData.live_status_table[_105.x >> 5u];
    uint _128 = ((uint(heap_offset >= _97) | (((_105.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_114 & (1u << (_105.x & 31u))) != 0u) ? 0u : 4u)) | ((_102 >= _105.y) ? 0u : 8u);
    if (_128 != 0u)
    {
        descriptor_qa_report_fault(_128, heap_offset, _105.x, _99, descriptor_type_mask, _105.z, instruction);
        return _97;
    }
    return heap_offset;
}

void main()
{
    if (UV.x < 0.0)
    {
        uint _44 = descriptor_qa_check(registers._m3, 2u, 1u);
        imageStore(_17[_44], ivec2(uvec2(uint(int(UV.x)), uint(int(UV.y)))), vec4(2.0));
    }
    uint _145 = descriptor_qa_check(registers._m0, 1u, 2u);
    vec4 _154 = texture(sampler2D(_13[_145], _21[registers._m2]), vec2(UV.x, UV.y));
    SV_Target.x = _154.x;
    SV_Target.y = _154.y;
    SV_Target.z = _154.z;
    SV_Target.w = _154.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 169
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %24 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %24 "UV"
OpName %27 "SV_Target"
OpName %47 "DescriptorHeapGlobalQAData"
OpMemberName %47 0 "failed_shader_hash"
OpMemberName %47 1 "failed_offset"
OpMemberName %47 2 "failed_heap"
OpMemberName %47 3 "failed_cookie"
OpMemberName %47 4 "fault_atomic"
OpMemberName %47 5 "failed_instruction"
OpMemberName %47 6 "failed_descriptor_type_mask"
OpMemberName %47 7 "actual_descriptor_type_mask"
OpMemberName %47 8 "fault_type"
OpMemberName %47 9 "va_map_timestamp"
OpMemberName %47 10 "live_status_table"
OpName %49 "QAGlobalData"
OpName %58 "descriptor_qa_report_fault"
OpName %51 "fault_type"
OpName %52 "heap_offset"
OpName %53 "cookie"
OpName %54 "heap_index"
OpName %55 "descriptor_type"
OpName %56 "actual_descriptor_type"
OpName %57 "instruction"
OpName %87 "DescriptorHeapQAData"
OpMemberName %87 0 "descriptor_count"
OpMemberName %87 1 "heap_index"
OpMemberName %87 2 "cookies_descriptor_info"
OpName %89 "QAHeapData"
OpName %94 "descriptor_qa_check"
OpName %91 "heap_offset"
OpName %92 "descriptor_type_mask"
OpName %93 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 3
OpDecorate %17 Binding 0
OpDecorate %17 NonReadable
OpDecorate %21 DescriptorSet 2
OpDecorate %21 Binding 0
OpDecorate %24 Location 0
OpDecorate %27 Location 0
OpDecorate %46 ArrayStride 4
OpMemberDecorate %47 0 Offset 0
OpMemberDecorate %47 1 Offset 8
OpMemberDecorate %47 2 Offset 12
OpMemberDecorate %47 3 Offset 16
OpMemberDecorate %47 4 Offset 20
OpMemberDecorate %47 5 Offset 24
OpMemberDecorate %47 6 Offset 28
OpMemberDecorate %47 7 Offset 32
OpMemberDecorate %47 8 Offset 36
OpMemberDecorate %47 9 Offset 40
OpMemberDecorate %47 10 Offset 44
OpDecorate %47 Block
OpDecorate %49 DescriptorSet 10
OpDecorate %49 Binding 10
OpDecorate %86 ArrayStride 12
OpMemberDecorate %87 0 Offset 0
OpMemberDecorate %87 1 Offset 4
OpMemberDecorate %87 2 Offset 8
OpDecorate %87 Block
OpDecorate %89 DescriptorSet 10
OpDecorate %89 Binding 11
OpDecorate %89 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 2D 0 0 0 2 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeVector %9 2
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypeVector %9 4
%26 = OpTypePointer Output %25
%27 = OpVariable %26 Output
%28 = OpTypePointer Input %9
%30 = OpConstant %5 0
%33 = OpConstant %5 1
%35 = OpTypeBool
%37 = OpConstant %9 0
%38 = OpTypePointer UniformConstant %14
%40 = OpTypePointer PushConstant %5
%42 = OpConstant %5 3
%45 = OpTypeVector %5 2
%46 = OpTypeRuntimeArray %5
%47 = OpTypeStruct %45 %5 %5 %5 %5 %5 %5 %5 %5 %5 %46
%48 = OpTypePointer StorageBuffer %47
%49 = OpVariable %48 StorageBuffer
%50 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%60 = OpTypePointer StorageBuffer %5
%62 = OpConstant %5 4
%70 = OpConstant %5 2
%72 = OpConstant %5 6
%74 = OpConstant %5 7
%76 = OpConstant %5 5
%77 = OpConstant %5 3735928559
%78 = OpConstantComposite %45 %77 %30
%79 = OpTypePointer StorageBuffer %45
%81 = OpConstant %5 72
%83 = OpConstant %5 8
%85 = OpTypeVector %5 3
%86 = OpTypeRuntimeArray %85
%87 = OpTypeStruct %5 %5 %86
%88 = OpTypePointer StorageBuffer %87
%89 = OpVariable %88 StorageBuffer
%90 = OpTypeFunction %5 %5 %5 %5
%101 = OpConstant %5 9
%103 = OpTypePointer StorageBuffer %85
%111 = OpConstant %5 31
%113 = OpConstant %5 10
%138 = OpConstant %9 2
%141 = OpTypePointer UniformConstant %10
%147 = OpTypePointer UniformConstant %18
%152 = OpTypeSampledImage %10
%160 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %165
%165 = OpLabel
%29 = OpAccessChain %28 %24 %30
%31 = OpLoad %9 %29
%32 = OpAccessChain %28 %24 %33
%34 = OpLoad %9 %32
%36 = OpFOrdLessThan %35 %31 %37
OpSelectionMerge %167 None
OpBranchConditional %36 %166 %167
%166 = OpLabel
%41 = OpAccessChain %40 %8 %42
%43 = OpLoad %5 %41
%44 = OpFunctionCall %5 %94 %43 %70 %33
%39 = OpAccessChain %38 %17 %44
%135 = OpLoad %14 %39
%136 = OpConvertFToS %5 %31
%137 = OpConvertFToS %5 %34
%139 = OpCompositeConstruct %45 %136 %137
%140 = OpCompositeConstruct %25 %138 %138 %138 %138
OpImageWrite %135 %139 %140
OpBranch %167
%167 = OpLabel
%143 = OpAccessChain %40 %8 %30
%144 = OpLoad %5 %143
%145 = OpFunctionCall %5 %94 %144 %33 %70
%142 = OpAccessChain %141 %13 %145
%146 = OpLoad %10 %142
%149 = OpAccessChain %40 %8 %70
%150 = OpLoad %5 %149
%148 = OpAccessChain %147 %21 %150
%151 = OpLoad %18 %148
%153 = OpSampledImage %152 %146 %151
%155 = OpCompositeConstruct %22 %31 %34
%154 = OpImageSampleImplicitLod %25 %153 %155 None
%156 = OpCompositeExtract %9 %154 0
%157 = OpCompositeExtract %9 %154 1
%158 = OpCompositeExtract %9 %154 2
%159 = OpCompositeExtract %9 %154 3
%161 = OpAccessChain %160 %27 %30
OpStore %161 %156
%162 = OpAccessChain %160 %27 %33
OpStore %162 %157
%163 = OpAccessChain %160 %27 %70
OpStore %163 %158
%164 = OpAccessChain %160 %27 %42
OpStore %164 %159
OpReturn
OpFunctionEnd
%58 = OpFunction %1 None %50
%51 = OpFunctionParameter %5
%52 = OpFunctionParameter %5
%53 = OpFunctionParameter %5
%54 = OpFunctionParameter %5
%55 = OpFunctionParameter %5
%56 = OpFunctionParameter %5
%57 = OpFunctionParameter %5
%59 = OpLabel
%61 = OpAccessChain %60 %49 %62
%63 = OpAtomicIAdd %5 %61 %33 %30 %33
%64 = OpIEqual %35 %63 %30
OpSelectionMerge %66 None
OpBranchConditional %64 %65 %66
%65 = OpLabel
%67 = OpAccessChain %60 %49 %42
OpStore %67 %53
%68 = OpAccessChain %60 %49 %33
OpStore %68 %52
%69 = OpAccessChain %60 %49 %70
OpStore %69 %54
%71 = OpAccessChain %60 %49 %72
OpStore %71 %55
%73 = OpAccessChain %60 %49 %74
OpStore %73 %56
%75 = OpAccessChain %60 %49 %76
OpStore %75 %57
%80 = OpAccessChain %79 %49 %30
OpStore %80 %78
OpMemoryBarrier %33 %81
%82 = OpAccessChain %60 %49 %83
OpStore %82 %51
OpBranch %66
%66 = OpLabel
OpReturn
OpFunctionEnd
%94 = OpFunction %5 None %90
%91 = OpFunctionParameter %5
%92 = OpFunctionParameter %5
%93 = OpFunctionParameter %5
%95 = OpLabel
%96 = OpAccessChain %60 %89 %30
%97 = OpLoad %5 %96
%98 = OpAccessChain %60 %89 %33
%99 = OpLoad %5 %98
%100 = OpAccessChain %60 %49 %101
%102 = OpLoad %5 %100
%104 = OpAccessChain %103 %89 %70 %91
%105 = OpLoad %85 %104
%106 = OpCompositeExtract %5 %105 0
%107 = OpCompositeExtract %5 %105 1
%108 = OpCompositeExtract %5 %105 2
%109 = OpShiftRightLogical %5 %106 %76
%110 = OpBitwiseAnd %5 %106 %111
%112 = OpAccessChain %60 %49 %113 %109
%114 = OpLoad %5 %112
%115 = OpShiftLeftLogical %5 %33 %110
%116 = OpBitwiseAnd %5 %114 %115
%117 = OpINotEqual %35 %116 %30
%118 = OpBitwiseAnd %5 %108 %92
%119 = OpIEqual %35 %118 %92
%120 = OpUGreaterThanEqual %35 %91 %97
%121 = OpSelect %5 %120 %33 %30
%122 = OpSelect %5 %119 %30 %70
%123 = OpSelect %5 %117 %30 %62
%124 = OpUGreaterThanEqual %35 %102 %107
%125 = OpSelect %5 %124 %30 %83
%126 = OpBitwiseOr %5 %121 %122
%127 = OpBitwiseOr %5 %126 %123
%128 = OpBitwiseOr %5 %127 %125
%129 = OpINotEqual %35 %128 %30
OpSelectionMerge %131 None
OpBranchConditional %129 %130 %131
%130 = OpLabel
%132 = OpFunctionCall %1 %58 %128 %91 %106 %99 %92 %108 %93
OpReturnValue %97
%131 = OpLabel
OpReturnValue %91
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/early-heap.descriptor-qa.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require
layout(early_fragment_tests) in;

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(set = 0, binding = 0) uniform texture2D _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _48 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_48 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _84 = QAHeapData.descriptor_count;
    uint _86 = QAHeapData.heap_index;
    uint _89 = QAGlobalData.va_map_timestamp;
    uvec3 _92 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _101 = QAGlobalData.live_status_table[_92.x >> 5u];
    uint _115 = ((uint(heap_offset >= _84) | (((_92.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_101 & (1u << (_92.x & 31u))) != 0u) ? 0u : 4u)) | ((_89 >= _92.y) ? 0u : 8u);
    if (_115 != 0u)
    {
        descriptor_qa_report_fault(_115, heap_offset, _92.x, _86, descriptor_type_mask, _92.z, instruction);
        return _84;
    }
    return heap_offset;
}

void main()
{
    uint _29 = descriptor_qa_check(INDEX, 1u, 1u);
    vec4 _125 = texelFetch(_9[_29], ivec2(uvec2(uint(int(UV.x)), uint(int(UV.y)))), int(0u));
    SV_Target.x = _125.x;
    SV_Target.y = _125.y;
    SV_Target.z = _125.z;
    SV_Target.w = _125.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 138
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %15 %18
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpName %3 "main"
OpName %12 "INDEX"
OpName %15 "UV"
OpName %18 "SV_Target"
OpName %32 "DescriptorHeapGlobalQAData"
OpMemberName %32 0 "failed_shader_hash"
OpMemberName %32 1 "failed_offset"
OpMemberName %32 2 "failed_heap"
OpMemberName %32 3 "failed_cookie"
OpMemberName %32 4 "fault_atomic"
OpMemberName %32 5 "failed_instruction"
OpMemberName %32 6 "failed_descriptor_type_mask"
OpMemberName %32 7 "actual_descriptor_type_mask"
OpMemberName %32 8 "fault_type"
OpMemberName %32 9 "va_map_timestamp"
OpMemberName %32 10 "live_status_table"
OpName %34 "QAGlobalData"
OpName %43 "descriptor_qa_report_fault"
OpName %36 "fault_type"
OpName %37 "heap_offset"
OpName %38 "cookie"
OpName %39 "heap_index"
OpName %40 "descriptor_type"
OpName %41 "actual_descriptor_type"
OpName %42 "instruction"
OpName %74 "DescriptorHeapQAData"
OpMemberName %74 0 "descriptor_count"
OpMemberName %74 1 "heap_index"
OpMemberName %74 2 "cookies_descriptor_info"
OpName %76 "QAHeapData"
OpName %81 "descriptor_qa_check"
OpName %78 "heap_offset"
OpName %79 "descriptor_type_mask"
OpName %80 "instruction"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %15 Location 1
OpDecorate %18 Location 0
OpDecorate %31 ArrayStride 4
OpMemberDecorate %32 0 Offset 0
OpMemberDecorate %32 1 Offset 8
OpMemberDecorate %32 2 Offset 12
OpMemberDecorate %32 3 Offset 16
OpMemberDecorate %32 4 Offset 20
OpMemberDecorate %32 5 Offset 24
OpMemberDecorate %32 6 Offset 28
OpMemberDecorate %32 7 Offset 32
OpMemberDecorate %32 8 Offset 36
OpMemberDecorate %32 9 Offset 40
OpMemberDecorate %32 10 Offset 44
OpDecorate %32 Block
OpDecorate %34 DescriptorSet 10
OpDecorate %34 Binding 10
OpDecorate %73 ArrayStride 12
OpMemberDecorate %74 0 Offset 0
OpMemberDecorate %74 1 Offset 4
OpMemberDecorate %74 2 Offset 8
OpDecorate %74 Block
OpDecorate %76 DescriptorSet 10
OpDecorate %76 Binding 11
OpDecorate %76 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeVector %5 2
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeVector %5 4
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %5
%21 = OpConstant %10 0
%24 = OpConstant %10 1
%27 = OpTypePointer UniformConstant %6
%30 = OpTypeVector %10 2
%31 = OpTypeRuntimeArray %10
%32 = OpTypeStruct %30 %10 %10 %10 %10 %10 %10 %10 %10 %10 %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeFunction %1 %10 %10 %10 %10 %10 %10 %10
%45 = OpTypePointer StorageBuffer %10
%47 = OpConstant %10 4
%49 = OpTypeBool
%54 = OpConstant %10 3
%57 = OpConstant %10 2
%59 = OpConstant %10 6
%61 = OpConstant %10 7
%63 = OpConstant %10 5
%64 = OpConstant %10 3735928559
%65 = OpConstantComposite %30 %64 %21
%66 = OpTypePointer StorageBuffer %30
%68 = OpConstant %10 72
%70 = OpConstant %10 8
%72 = OpTypeVector %10 3
%73 = OpTypeRuntimeArray %72
%74 = OpTypeStruct %10 %10 %73
%75 = OpTypePointer StorageBuffer %74
%76 = OpVariable %75 StorageBuffer
%77 = OpTypeFunction %10 %10 %10 %10
%88 = OpConstant %10 9
%90 = OpTypePointer StorageBuffer %72
%98 = OpConstant %10 31
%100 = OpConstant %10 10
%131 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %136
%136 = OpLabel
%20 = OpAccessChain %19 %15 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %15 %24
%25 = OpLoad %5 %23
%26 = OpLoad %10 %12
%29 = OpFunctionCall %10 %81 %26 %24 %24
%28 = OpAccessChain %27 %9 %29
%122 = OpLoad %6 %28
%123 = OpConvertFToS %10 %22
%124 = OpConvertFToS %10 %25
%126 = OpCompositeConstruct %30 %123 %124
%125 = OpImageFetch %16 %122 %126 Lod %21
%127 = OpCompositeExtract %5 %125 0
%128 = OpCompositeExtract %5 %125 1
%129 = OpCompositeExtract %5 %125 2
%130 = OpCompositeExtract %5 %125 3
%132 = OpAccessChain %131 %18 %21
OpStore %132 %127
%133 = OpAccessChain %131 %18 %24
OpStore %133 %128
%134 = OpAccessChain %131 %18 %57
OpStore %134 %129
%135 = OpAccessChain %131 %18 %54
OpStore %135 %130
OpReturn
OpFunctionEnd
%43 = OpFunction %1 None %35
%36 = OpFunctionParameter %10
%37 = OpFunctionParameter %10
%38 = OpFunctionParameter %10
%39 = OpFunctionParameter %10
%40 = OpFunctionParameter %10
%41 = OpFunctionParameter %10
%42 = OpFunctionParameter %10
%44 = OpLabel
%46 = OpAccessChain %45 %34 %47
%48 = OpAtomicIAdd %10 %46 %24 %21 %24
%50 = OpIEqual %49 %48 %21
OpSelectionMerge %52 None
OpBranchConditional %50 %51 %52
%51 = OpLabel
%53 = OpAccessChain %45 %34 %54
OpStore %53 %38
%55 = OpAccessChain %45 %34 %24
OpStore %55 %37
%56 = OpAccessChain %45 %34 %57
OpStore %56 %39
%58 = OpAccessChain %45 %34 %59
OpStore %58 %40
%60 = OpAccessChain %45 %34 %61
OpStore %60 %41
%62 = OpAccessChain %45 %34 %63
OpStore %62 %42
%67 = OpAccessChain %66 %34 %21
OpStore %67 %65
OpMemoryBarrier %24 %68
%69 = OpAccessChain %45 %34 %70
OpStore %69 %36
OpBranch %52
%52 = OpLabel
OpReturn
OpFunctionEnd
%81 = OpFunction %10 None %77
%78 = OpFunctionParameter %10
%79 = OpFunctionParameter %10
%80 = OpFunctionParameter %10
%82 = OpLabel
%83 = OpAccessChain %45 %76 %21
%84 = OpLoad %10 %83
%85 = OpAccessChain %45 %76 %24
%86 = OpLoad %10 %85
%87 = OpAccessChain %45 %34 %88
%89 = OpLoad %10 %87
%91 = OpAccessChain %90 %76 %57 %78
%92 = OpLoad %72 %91
%93 = OpCompositeExtract %10 %92 0
%94 = OpCompositeExtract %10 %92 1
%95 = OpCompositeExtract %10 %92 2
%96 = OpShiftRightLogical %10 %93 %63
%97 = OpBitwiseAnd %10 %93 %98
%99 = OpAccessChain %45 %34 %100 %96
%101 = OpLoad %10 %99
%102 = OpShiftLeftLogical %10 %24 %97
%103 = OpBitwiseAnd %10 %101 %102
%104 = OpINotEqual %49 %103 %21
%105 = OpBitwiseAnd %10 %95 %79
%106 = OpIEqual %49 %105 %79
%107 = OpUGreaterThanEqual %49 %78 %84
%108 = OpSelect %10 %107 %24 %21
%109 = OpSelect %10 %106 %21 %57
%110 = OpSelect %10 %104 %21 %47
%111 = OpUGreaterThanEqual %49 %89 %94
%112 = OpSelect %10 %111 %21 %70
%113 = OpBitwiseOr %10 %108 %109
%114 = OpBitwiseOr %10 %113 %110
%115 = OpBitwiseOr %10 %114 %112
%116 = OpINotEqual %49 %115 %21
OpSelectionMerge %118 None
OpBranchConditional %116 %117 %118
%117 = OpLabel
%119 = OpFunctionCall %1 %43 %115 %78 %93 %86 %79 %95 %80
OpReturnValue %84
%118 = OpLabel
OpReturnValue %78
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/descriptor_qa/early.bindless.descriptor-qa.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
layout(early_fragment_tests) in;

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 2, binding = 0) uniform sampler _17[];

layout(location = 0) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _49 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_49 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _86 = QAHeapData.descriptor_count;
    uint _88 = QAHeapData.heap_index;
    uint _91 = QAGlobalData.va_map_timestamp;
    uvec3 _94 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _103 = QAGlobalData.live_status_table[_94.x >> 5u];
    uint _117 = ((uint(heap_offset >= _86) | (((_94.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_103 & (1u << (_94.x & 31u))) != 0u) ? 0u : 4u)) | ((_91 >= _94.y) ? 0u : 8u);
    if (_117 != 0u)
    {
        descriptor_qa_report_fault(_117, heap_offset, _94.x, _88, descriptor_type_mask, _94.z, instruction);
        return _86;
    }
    return heap_offset;
}

void main()
{
    uint _30 = descriptor_qa_check(registers._m0, 1u, 1u);
    vec4 _138 = texture(sampler2D(_13[_30], _17[registers._m2]), vec2(UV.x, UV.y));
    SV_Target.x = _138.x;
    SV_Target.y = _138.y;
    SV_Target.z = _138.z;
    SV_Target.w = _138.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 151
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %23
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %20 "UV"
OpName %23 "SV_Target"
OpName %33 "DescriptorHeapGlobalQAData"
OpMemberName %33 0 "failed_shader_hash"
OpMemberName %33 1 "failed_offset"
OpMemberName %33 2 "failed_heap"
OpMemberName %33 3 "failed_cookie"
OpMemberName %33 4 "fault_atomic"
OpMemberName %33 5 "failed_instruction"
OpMemberName %33 6 "failed_descriptor_type_mask"
OpMemberName %33 7 "actual_descriptor_type_mask"
OpMemberName %33 8 "fault_type"
OpMemberName %33 9 "va_map_timestamp"
OpMemberName %33 10 "live_status_table"
OpName %35 "QAGlobalData"
OpName %44 "descriptor_qa_report_fault"
OpName %37 "fault_type"
OpName %38 "heap_offset"
OpName %39 "cookie"
OpName %40 "heap_index"
OpName %41 "descriptor_type"
OpName %42 "actual_descriptor_type"
OpName %43 "instruction"
OpName %76 "DescriptorHeapQAData"
OpMemberName %76 0 "descriptor_count"
OpMemberName %76 1 "heap_index"
OpMemberName %76 2 "cookies_descriptor_info"
OpName %78 "QAHeapData"
OpName %83 "descriptor_qa_check"
OpName %80 "heap_offset"
OpName %81 "descriptor_type_mask"
OpName %82 "instruction"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
OpDecorate %23 Location 0
OpDecorate %32 ArrayStride 4
OpMemberDecorate %33 0 Offset 0
OpMemberDecorate %33 1 Offset 8
OpMemberDecorate %33 2 Offset 12
OpMemberDecorate %33 3 Offset 16
OpMemberDecorate %33 4 Offset 20
OpMemberDecorate %33 5 Offset 24
OpMemberDecorate %33 6 Offset 28
OpMemberDecorate %33 7 Offset 32
OpMemberDecorate %33 8 Offset 36
OpMemberDecorate %33 9 Offset 40
OpMemberDecorate %33 10 Offset 44
OpDecorate %33 Block
OpDecorate %35 DescriptorSet 10
OpDecorate %35 Binding 10
OpDecorate %75 ArrayStride 12
OpMemberDecorate %76 0 Offset 0
OpMemberDecorate %76 1 Offset 4
OpMemberDecorate %76 2 Offset 8
OpDecorate %76 Block
OpDecorate %78 DescriptorSet 10
OpDecorate %78 Binding 11
OpDecorate %78 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %9 2
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypeVector %9 4
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpTypePointer UniformConstant %10
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 0
%31 = OpTypeVector %5 2
%32 = OpTypeRuntimeArray %5
%33 = OpTypeStruct %31 %5 %5 %5 %5 %5 %5 %5 %5 %5 %32
%34 = OpTypePointer StorageBuffer %33
%35 = OpVariable %34 StorageBuffer
%36 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%46 = OpTypePointer StorageBuffer %5
%48 = OpConstant %5 4
%50 = OpConstant %5 1
%51 = OpTypeBool
%56 = OpConstant %5 3
%59 = OpConstant %5 2
%61 = OpConstant %5 6
%63 = OpConstant %5 7
%65 = OpConstant %5 5
%66 = OpConstant %5 3735928559
%67 = OpConstantComposite %31 %66 %28
%68 = OpTypePointer StorageBuffer %31
%70 = OpConstant %5 72
%72 = OpConstant %5 8
%74 = OpTypeVector %5 3
%75 = OpTypeRuntimeArray %74
%76 = OpTypeStruct %5 %5 %75
%77 = OpTypePointer StorageBuffer %76
%78 = OpVariable %77 StorageBuffer
%79 = OpTypeFunction %5 %5 %5 %5
%90 = OpConstant %5 9
%92 = OpTypePointer StorageBuffer %74
%100 = OpConstant %5 31
%102 = OpConstant %5 10
%125 = OpTypePointer UniformConstant %14
%130 = OpTypePointer Input %9
%135 = OpTypeSampledImage %10
%137 = OpConstant %9 0
%144 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %149
%149 = OpLabel
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpFunctionCall %5 %83 %29 %50 %50
%25 = OpAccessChain %24 %13 %30
%124 = OpLoad %10 %25
%127 = OpAccessChain %26 %8 %59
%128 = OpLoad %5 %127
%126 = OpAccessChain %125 %17 %128
%129 = OpLoad %14 %126
%131 = OpAccessChain %130 %20 %28
%132 = OpLoad %9 %131
%133 = OpAccessChain %130 %20 %50
%134 = OpLoad %9 %133
%136 = OpSampledImage %135 %124 %129
%139 = OpCompositeConstruct %18 %132 %134
%138 = OpImageSampleImplicitLod %21 %136 %139 None
%140 = OpCompositeExtract %9 %138 0
%141 = OpCompositeExtract %9 %138 1
%142 = OpCompositeExtract %9 %138 2
%143 = OpCompositeExtract %9 %138 3
%145 = OpAccessChain %144 %23 %28
OpStore %145 %140
%146 = OpAccessChain %144 %23 %50
OpStore %146 %141
%147 = OpAccessChain %144 %23 %59
OpStore %147 %142
%148 = OpAccessChain %144 %23 %56
OpStore %148 %143
OpReturn
OpFunctionEnd
%44 = OpFunction %1 None %36
%37 = OpFunctionParameter %5
%38 = OpFunctionParameter %5
%39 = OpFunctionParameter %5
%40 = OpFunctionParameter %5
%41 = OpFunctionParameter %5
%42 = OpFunctionParameter %5
%43 = OpFunctionParameter %5
%45 = OpLabel
%47 = OpAccessChain %46 %35 %48
%49 = OpAtomicIAdd %5 %47 %50 %28 %50
%52 = OpIEqual %51 %49 %28
OpSelectionMerge %54 None
OpBranchConditional %52 %53 %54
%53 = OpLabel
%55 = OpAccessChain %46 %35 %56
OpStore %55 %39
%57 = OpAccessChain %46 %35 %50
OpStore %57 %38
%58 = OpAccessChain %46 %35 %59
OpStore %58 %40
%60 = OpAccessChain %46 %35 %61
OpStore %60 %41
%62 = OpAccessChain %46 %35 %63
OpStore %62 %42
%64 = OpAccessChain %46 %35 %65
OpStore %64 %43
%69 = OpAccessChain %68 %35 %28
OpStore %69 %67
OpMemoryBarrier %50 %70
%71 = OpAccessChain %46 %35 %72
OpStore %71 %37
OpBranch %54
%54 = OpLabel
OpReturn
OpFunctionEnd
%83 = OpFunction %5 None %79
%80 = OpFunctionParameter %5
%81 = OpFunctionParameter %5
%82 = OpFunctionParameter %5
%84 = OpLabel
%85 = OpAccessChain %46 %78 %28
%86 = OpLoad %5 %85
%87 = OpAccessChain %46 %78 %50
%88 = OpLoad %5 %87
%89 = OpAccessChain %46 %35 %90
%91 = OpLoad %5 %89
%93 = OpAccessChain %92 %78 %59 %80
%94 = OpLoad %74 %93
%95 = OpCompositeExtract %5 %94 0
%96 = OpCompositeExtract %5 %94 1
%97 = OpCompositeExtract %5 %94 2
%98 = OpShiftRightLogical %5 %95 %65
%99 = OpBitwiseAnd %5 %95 %100
%101 = OpAccessChain %46 %35 %102 %98
%103 = OpLoad %5 %101
%104 = OpShiftLeftLogical %5 %50 %99
%105 = OpBitwiseAnd %5 %103 %104
%106 = OpINotEqual %51 %105 %28
%107 = OpBitwiseAnd %5 %97 %81
%108 = OpIEqual %51 %107 %81
%109 = OpUGreaterThanEqual %51 %80 %86
%110 = OpSelect %5 %109 %50 %28
%111 = OpSelect %5 %108 %28 %59
%112 = OpSelect %5 %106 %28 %48
%113 = OpUGreaterThanEqual %51 %91 %96
%114 = OpSelect %5 %113 %28 %72
%115 = OpBitwiseOr %5 %110 %111
%116 = OpBitwiseOr %5 %115 %112
%117 = OpBitwiseOr %5 %116 %114
%118 = OpINotEqual %51 %117 %28
OpSelectionMerge %120 None
OpBranchConditional %118 %119 %120
%119 = OpLabel
%121 = OpFunctionCall %1 %44 %117 %80 %95 %88 %81 %97 %82
OpReturnValue %86
%120 = OpLabel
OpReturnValue %80
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/accept-hit-and-end-search-ignore-hit.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    float _m0;
};

struct _10
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _10 hit;

void main()
{
    if (hit._m0.x > 10.0)
    {
        payload._m0 = 10.0;
        terminateRayEXT;
    }
    else
    {
        ignoreIntersectionEXT;
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %12
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %10 ""
OpName %12 "hit"
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeVector %5 2
%10 = OpTypeStruct %9
%11 = OpTypePointer HitAttributeKHR %10
%12 = OpVariable %11 HitAttributeKHR
%13 = OpTypePointer HitAttributeKHR %9
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpTypeBool
%21 = OpConstant %5 10
%22 = OpTypePointer IncomingRayPayloadKHR %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%14 = OpInBoundsAccessChain %13 %12 %16
%17 = OpLoad %9 %14
%18 = OpCompositeExtract %5 %17 0
%20 = OpFOrdGreaterThan %19 %18 %21
OpSelectionMerge %27 None
OpBranchConditional %20 %26 %25
%26 = OpLabel
%23 = OpInBoundsAccessChain %22 %8 %16
OpStore %23 %21
OpTerminateRayKHR
%25 = OpLabel
OpIgnoreIntersectionKHR
%27 = OpLabel
OpUnreachable
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/acos.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = acos(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Acos %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/asin.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = asin(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Asin %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atan.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = atan(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Atan %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-bin-op.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 3, binding = 0, r32ui) uniform uimage2D _12[];

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in uvec3 TEXCOORD;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _41 = imageAtomicAdd(_12[nonuniformEXT(registers._m3 + (INDEX + 2u))], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 3u);
    SV_Target = _41;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %14 %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %14 "INDEX"
OpName %17 "TEXCOORD"
OpName %19 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 3
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %17 Flat
OpDecorate %17 Location 0
OpDecorate %17 Component 1
OpDecorate %19 Location 0
OpDecorate %35 NonUniform
OpDecorate %36 NonUniform
OpDecorate %40 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 2D 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %5
%19 = OpVariable %18 Output
%21 = OpConstant %5 0
%24 = OpConstant %5 1
%28 = OpConstant %5 2
%29 = OpTypePointer UniformConstant %9
%31 = OpTypePointer PushConstant %5
%33 = OpConstant %5 3
%37 = OpTypeVector %5 2
%39 = OpTypePointer Image %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%20 = OpAccessChain %13 %17 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %13 %17 %24
%25 = OpLoad %5 %23
%26 = OpLoad %5 %14
%27 = OpIAdd %5 %26 %28
%32 = OpAccessChain %31 %8 %33
%34 = OpLoad %5 %32
%35 = OpIAdd %5 %34 %27
%30 = OpAccessChain %29 %12 %35
%36 = OpLoad %9 %30
%38 = OpCompositeConstruct %37 %22 %25
%40 = OpImageTexelPointer %39 %30 %38 %21
%41 = OpAtomicIAdd %5 %40 %24 %21 %33
OpStore %19 %41
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-bin-op.frag
================================================
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage1D _8;
layout(set = 0, binding = 1, r32ui) uniform uimage1DArray _11;
layout(set = 0, binding = 2, r32ui) uniform uimage2D _14;
layout(set = 0, binding = 3, r32ui) uniform uimage2DArray _17;
layout(set = 0, binding = 4, r32ui) uniform uimage3D _20;
layout(set = 0, binding = 5, r32ui) uniform uimageBuffer _23;
layout(set = 1, binding = 0, r32i) uniform iimage1D _27;
layout(set = 1, binding = 2, r32i) uniform iimage2D _30;
layout(set = 0, binding = 6, r32ui) uniform uimageBuffer _31;
layout(set = 0, binding = 7, r32ui) uniform uimageBuffer _32;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _60 = imageAtomicAdd(_8, int(TEXCOORD.x), 1u);
    uint _64 = imageAtomicAnd(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 2u);
    uint _68 = imageAtomicExchange(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 3u);
    uint _73 = imageAtomicMax(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 4u);
    uint _78 = imageAtomicMin(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 5u);
    uint _82 = imageAtomicOr(_23, int(TEXCOORD.x), 6u);
    uint _86 = imageAtomicXor(_23, int(TEXCOORD.x), 7u);
    int _91 = imageAtomicMin(_27, int(TEXCOORD.x), int(8u));
    int _98 = imageAtomicMax(_30, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(9u));
    uint _106 = imageAtomicAdd(_31, int((TEXCOORD.x * 4u) + 2u), 10u);
    uint _111 = imageAtomicMax(_32, int(TEXCOORD.x), 12u);
    SV_Target = (((((((((_64 + _60) + _68) + _73) + _78) + _82) + _86) + uint(_91)) + uint(_98)) + _106) + _111;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 116
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %35 %37
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %35 "TEXCOORD"
OpName %37 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 2
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 6
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 7
OpDecorate %35 Flat
OpDecorate %35 Location 0
OpDecorate %37 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 1D 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 2 R32ui
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 2 R32ui
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 2 R32ui
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeInt 32 1
%25 = OpTypeImage %24 1D 0 0 0 2 R32i
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeImage %24 2D 0 0 0 2 R32i
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpVariable %22 UniformConstant
%32 = OpVariable %22 UniformConstant
%33 = OpTypeVector %5 3
%34 = OpTypePointer Input %33
%35 = OpVariable %34 Input
%36 = OpTypePointer Output %5
%37 = OpVariable %36 Output
%48 = OpTypePointer Input %5
%50 = OpConstant %5 0
%53 = OpConstant %5 1
%56 = OpConstant %5 2
%58 = OpTypePointer Image %5
%61 = OpTypeVector %5 2
%69 = OpConstant %5 3
%74 = OpConstant %5 4
%79 = OpConstant %5 5
%83 = OpConstant %5 6
%87 = OpConstant %5 7
%89 = OpTypePointer Image %24
%92 = OpConstant %5 8
%99 = OpConstant %5 9
%107 = OpConstant %5 10
%112 = OpConstant %5 12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%38 = OpLoad %21 %32
%39 = OpLoad %21 %31
%40 = OpLoad %28 %30
%41 = OpLoad %25 %27
%42 = OpLoad %21 %23
%43 = OpLoad %18 %20
%44 = OpLoad %15 %17
%45 = OpLoad %12 %14
%46 = OpLoad %9 %11
%47 = OpLoad %6 %8
%49 = OpAccessChain %48 %35 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %48 %35 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %48 %35 %56
%57 = OpLoad %5 %55
%59 = OpImageTexelPointer %58 %8 %51 %50
%60 = OpAtomicIAdd %5 %59 %53 %50 %53
%62 = OpCompositeConstruct %61 %51 %54
%63 = OpImageTexelPointer %58 %11 %62 %50
%64 = OpAtomicAnd %5 %63 %53 %50 %56
%65 = OpIAdd %5 %64 %60
%66 = OpCompositeConstruct %61 %51 %54
%67 = OpImageTexelPointer %58 %14 %66 %50
%68 = OpAtomicExchange %5 %67 %53 %50 %69
%70 = OpIAdd %5 %65 %68
%71 = OpCompositeConstruct %33 %51 %54 %57
%72 = OpImageTexelPointer %58 %17 %71 %50
%73 = OpAtomicUMax %5 %72 %53 %50 %74
%75 = OpIAdd %5 %70 %73
%76 = OpCompositeConstruct %33 %51 %54 %57
%77 = OpImageTexelPointer %58 %20 %76 %50
%78 = OpAtomicUMin %5 %77 %53 %50 %79
%80 = OpIAdd %5 %75 %78
%81 = OpImageTexelPointer %58 %23 %51 %50
%82 = OpAtomicOr %5 %81 %53 %50 %83
%84 = OpIAdd %5 %80 %82
%85 = OpImageTexelPointer %58 %23 %51 %50
%86 = OpAtomicXor %5 %85 %53 %50 %87
%88 = OpIAdd %5 %84 %86
%90 = OpImageTexelPointer %89 %27 %51 %50
%93 = OpBitcast %24 %92
%91 = OpAtomicSMin %24 %90 %53 %50 %93
%94 = OpBitcast %5 %91
%95 = OpIAdd %5 %88 %94
%96 = OpCompositeConstruct %61 %51 %54
%97 = OpImageTexelPointer %89 %30 %96 %50
%100 = OpBitcast %24 %99
%98 = OpAtomicSMax %24 %97 %53 %50 %100
%101 = OpBitcast %5 %98
%102 = OpIAdd %5 %95 %101
%103 = OpIMul %5 %51 %74
%104 = OpIAdd %5 %103 %56
%105 = OpImageTexelPointer %58 %31 %104 %50
%106 = OpAtomicIAdd %5 %105 %53 %50 %107
%108 = OpIAdd %5 %102 %106
%109 = OpShiftLeftLogical %5 %51 %56
%110 = OpImageTexelPointer %58 %32 %51 %50
%111 = OpAtomicUMax %5 %110 %53 %50 %112
%113 = OpIAdd %5 %108 %111
OpStore %37 %113
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-bin-op.root-descriptor.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerUintArray;

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray
{
    uint value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    uint _21 = gl_GlobalInvocationID.x << 4u;
    uint _30 = atomicAdd(PhysicalPointerUintArray(registers._m2).value[gl_GlobalInvocationID.x * 4u], 1u);
    uint _37 = atomicAdd(PhysicalPointerUintArray(registers._m2).value[(gl_GlobalInvocationID.x * 4u) + 1u], 2u);
    uint _42 = atomicAdd(PhysicalPointerUintArray(registers._m2).value[(_21 | 2u) >> 2u], 3u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %25 "PhysicalPointerUintArray"
OpMemberName %25 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %16 BuiltIn GlobalInvocationId
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer PushConstant %6
%12 = OpConstant %5 2
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 4
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer PhysicalStorageBuffer %25
%28 = OpTypePointer PhysicalStorageBuffer %5
%31 = OpConstant %5 1
%43 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%11 = OpAccessChain %10 %9 %12
%13 = OpLoad %6 %11
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpShiftLeftLogical %5 %20 %22
%23 = OpIMul %5 %20 %22
%27 = OpBitcast %26 %13
%29 = OpAccessChain %28 %27 %19 %23
%30 = OpAtomicIAdd %5 %29 %31 %19 %31
%32 = OpBitwiseOr %5 %21 %22
%33 = OpIMul %5 %20 %22
%34 = OpIAdd %5 %33 %31
%35 = OpBitcast %26 %13
%36 = OpAccessChain %28 %35 %19 %34
%37 = OpAtomicIAdd %5 %36 %31 %19 %12
%38 = OpBitwiseOr %5 %21 %12
%39 = OpShiftRightLogical %5 %38 %12
%40 = OpBitcast %26 %13
%41 = OpAccessChain %28 %40 %19 %39
%42 = OpAtomicIAdd %5 %41 %31 %19 %43
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-bin-op.ssbo.frag
================================================
#version 460

layout(set = 0, binding = 6, std430) buffer SSBO
{
    uint _m0[];
} _34;

layout(set = 0, binding = 7, std430) buffer _36_38
{
    uint _m0[];
} _38;

layout(set = 0, binding = 0, r32ui) uniform uimage1D _8;
layout(set = 0, binding = 1, r32ui) uniform uimage1DArray _11;
layout(set = 0, binding = 2, r32ui) uniform uimage2D _14;
layout(set = 0, binding = 3, r32ui) uniform uimage2DArray _17;
layout(set = 0, binding = 4, r32ui) uniform uimage3D _20;
layout(set = 0, binding = 5, r32ui) uniform uimageBuffer _23;
layout(set = 1, binding = 0, r32i) uniform iimage1D _27;
layout(set = 1, binding = 2, r32i) uniform iimage2D _30;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _64 = imageAtomicAdd(_8, int(TEXCOORD.x), 1u);
    uint _68 = imageAtomicAnd(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 2u);
    uint _72 = imageAtomicExchange(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 3u);
    uint _77 = imageAtomicMax(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 4u);
    uint _82 = imageAtomicMin(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 5u);
    uint _86 = imageAtomicOr(_23, int(TEXCOORD.x), 6u);
    uint _90 = imageAtomicXor(_23, int(TEXCOORD.x), 7u);
    int _95 = imageAtomicMin(_27, int(TEXCOORD.x), int(8u));
    int _102 = imageAtomicMax(_30, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(9u));
    uint _111 = atomicAdd(_34._m0[(TEXCOORD.x * 4u) + 2u], 10u);
    uint _116 = atomicMax(_38._m0[TEXCOORD.x], 12u);
    SV_Target = (((((((((_68 + _64) + _72) + _77) + _82) + _86) + _90) + uint(_95)) + uint(_102)) + _111) + _116;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 121
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %41 %43
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "SSBO"
OpName %36 "SSBO"
OpName %41 "TEXCOORD"
OpName %43 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 2
OpDecorate %31 ArrayStride 4
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 6
OpDecorate %35 ArrayStride 4
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 7
OpDecorate %41 Flat
OpDecorate %41 Location 0
OpDecorate %43 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 1D 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 2 R32ui
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 2 R32ui
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 2 R32ui
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeInt 32 1
%25 = OpTypeImage %24 1D 0 0 0 2 R32i
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeImage %24 2D 0 0 0 2 R32i
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeRuntimeArray %5
%32 = OpTypeStruct %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeRuntimeArray %5
%36 = OpTypeStruct %35
%37 = OpTypePointer StorageBuffer %36
%38 = OpVariable %37 StorageBuffer
%39 = OpTypeVector %5 3
%40 = OpTypePointer Input %39
%41 = OpVariable %40 Input
%42 = OpTypePointer Output %5
%43 = OpVariable %42 Output
%52 = OpTypePointer Input %5
%54 = OpConstant %5 0
%57 = OpConstant %5 1
%60 = OpConstant %5 2
%62 = OpTypePointer Image %5
%65 = OpTypeVector %5 2
%73 = OpConstant %5 3
%78 = OpConstant %5 4
%83 = OpConstant %5 5
%87 = OpConstant %5 6
%91 = OpConstant %5 7
%93 = OpTypePointer Image %24
%96 = OpConstant %5 8
%103 = OpConstant %5 9
%109 = OpTypePointer StorageBuffer %5
%112 = OpConstant %5 10
%117 = OpConstant %5 12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %119
%119 = OpLabel
%44 = OpLoad %28 %30
%45 = OpLoad %25 %27
%46 = OpLoad %21 %23
%47 = OpLoad %18 %20
%48 = OpLoad %15 %17
%49 = OpLoad %12 %14
%50 = OpLoad %9 %11
%51 = OpLoad %6 %8
%53 = OpAccessChain %52 %41 %54
%55 = OpLoad %5 %53
%56 = OpAccessChain %52 %41 %57
%58 = OpLoad %5 %56
%59 = OpAccessChain %52 %41 %60
%61 = OpLoad %5 %59
%63 = OpImageTexelPointer %62 %8 %55 %54
%64 = OpAtomicIAdd %5 %63 %57 %54 %57
%66 = OpCompositeConstruct %65 %55 %58
%67 = OpImageTexelPointer %62 %11 %66 %54
%68 = OpAtomicAnd %5 %67 %57 %54 %60
%69 = OpIAdd %5 %68 %64
%70 = OpCompositeConstruct %65 %55 %58
%71 = OpImageTexelPointer %62 %14 %70 %54
%72 = OpAtomicExchange %5 %71 %57 %54 %73
%74 = OpIAdd %5 %69 %72
%75 = OpCompositeConstruct %39 %55 %58 %61
%76 = OpImageTexelPointer %62 %17 %75 %54
%77 = OpAtomicUMax %5 %76 %57 %54 %78
%79 = OpIAdd %5 %74 %77
%80 = OpCompositeConstruct %39 %55 %58 %61
%81 = OpImageTexelPointer %62 %20 %80 %54
%82 = OpAtomicUMin %5 %81 %57 %54 %83
%84 = OpIAdd %5 %79 %82
%85 = OpImageTexelPointer %62 %23 %55 %54
%86 = OpAtomicOr %5 %85 %57 %54 %87
%88 = OpIAdd %5 %84 %86
%89 = OpImageTexelPointer %62 %23 %55 %54
%90 = OpAtomicXor %5 %89 %57 %54 %91
%92 = OpIAdd %5 %88 %90
%94 = OpImageTexelPointer %93 %27 %55 %54
%97 = OpBitcast %24 %96
%95 = OpAtomicSMin %24 %94 %57 %54 %97
%98 = OpBitcast %5 %95
%99 = OpIAdd %5 %92 %98
%100 = OpCompositeConstruct %65 %55 %58
%101 = OpImageTexelPointer %93 %30 %100 %54
%104 = OpBitcast %24 %103
%102 = OpAtomicSMax %24 %101 %57 %54 %104
%105 = OpBitcast %5 %102
%106 = OpIAdd %5 %99 %105
%107 = OpIMul %5 %55 %78
%108 = OpIAdd %5 %107 %60
%110 = OpAccessChain %109 %34 %54 %108
%111 = OpAtomicIAdd %5 %110 %57 %54 %112
%113 = OpIAdd %5 %106 %111
%114 = OpShiftLeftLogical %5 %55 %60
%115 = OpAccessChain %109 %38 %54 %55
%116 = OpAtomicUMax %5 %115 %57 %54 %117
%118 = OpIAdd %5 %113 %116
OpStore %43 %118
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-compare-exchange.frag
================================================
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage1D _8;
layout(set = 0, binding = 1, r32ui) uniform uimage1DArray _11;
layout(set = 0, binding = 2, r32ui) uniform uimage2D _14;
layout(set = 0, binding = 3, r32ui) uniform uimage2DArray _17;
layout(set = 0, binding = 4, r32ui) uniform uimage3D _20;
layout(set = 0, binding = 5, r32ui) uniform uimageBuffer _23;
layout(set = 1, binding = 0, r32i) uniform iimage1D _27;
layout(set = 1, binding = 2, r32i) uniform iimage2D _30;
layout(set = 0, binding = 6, r32ui) uniform uimageBuffer _31;
layout(set = 0, binding = 7, r32ui) uniform uimageBuffer _32;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _60 = imageAtomicCompSwap(_8, int(TEXCOORD.x), 20u, 30u);
    uint _66 = imageAtomicCompSwap(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 20u, 30u);
    uint _70 = imageAtomicCompSwap(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 20u, 30u);
    uint _74 = imageAtomicCompSwap(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 20u, 30u);
    uint _78 = imageAtomicCompSwap(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 20u, 30u);
    uint _81 = imageAtomicCompSwap(_23, int(TEXCOORD.x), 20u, 30u);
    uint _84 = imageAtomicCompSwap(_23, int(TEXCOORD.x), 20u, 30u);
    int _88 = imageAtomicCompSwap(_27, int(TEXCOORD.x), int(20u), int(30u));
    int _95 = imageAtomicCompSwap(_30, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(20u), int(30u));
    uint _104 = imageAtomicCompSwap(_31, int((TEXCOORD.x * 4u) + 2u), 20u, 30u);
    uint _108 = imageAtomicCompSwap(_32, int(TEXCOORD.x), 20u, 30u);
    SV_Target = (((((((((_66 + _60) + _70) + _74) + _78) + _81) + _84) + uint(_88)) + uint(_95)) + _104) + _108;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 112
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %35 %37
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %35 "TEXCOORD"
OpName %37 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 2
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 6
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 7
OpDecorate %35 Flat
OpDecorate %35 Location 0
OpDecorate %37 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 1D 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 2 R32ui
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 2 R32ui
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 2 R32ui
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeInt 32 1
%25 = OpTypeImage %24 1D 0 0 0 2 R32i
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeImage %24 2D 0 0 0 2 R32i
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpVariable %22 UniformConstant
%32 = OpVariable %22 UniformConstant
%33 = OpTypeVector %5 3
%34 = OpTypePointer Input %33
%35 = OpVariable %34 Input
%36 = OpTypePointer Output %5
%37 = OpVariable %36 Output
%48 = OpTypePointer Input %5
%50 = OpConstant %5 0
%53 = OpConstant %5 1
%56 = OpConstant %5 2
%58 = OpTypePointer Image %5
%61 = OpConstant %5 20
%62 = OpConstant %5 30
%63 = OpTypeVector %5 2
%86 = OpTypePointer Image %24
%101 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %110
%110 = OpLabel
%38 = OpLoad %21 %32
%39 = OpLoad %21 %31
%40 = OpLoad %28 %30
%41 = OpLoad %25 %27
%42 = OpLoad %21 %23
%43 = OpLoad %18 %20
%44 = OpLoad %15 %17
%45 = OpLoad %12 %14
%46 = OpLoad %9 %11
%47 = OpLoad %6 %8
%49 = OpAccessChain %48 %35 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %48 %35 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %48 %35 %56
%57 = OpLoad %5 %55
%59 = OpImageTexelPointer %58 %8 %51 %50
%60 = OpAtomicCompareExchange %5 %59 %53 %50 %50 %62 %61
%64 = OpCompositeConstruct %63 %51 %54
%65 = OpImageTexelPointer %58 %11 %64 %50
%66 = OpAtomicCompareExchange %5 %65 %53 %50 %50 %62 %61
%67 = OpIAdd %5 %66 %60
%68 = OpCompositeConstruct %63 %51 %54
%69 = OpImageTexelPointer %58 %14 %68 %50
%70 = OpAtomicCompareExchange %5 %69 %53 %50 %50 %62 %61
%71 = OpIAdd %5 %67 %70
%72 = OpCompositeConstruct %33 %51 %54 %57
%73 = OpImageTexelPointer %58 %17 %72 %50
%74 = OpAtomicCompareExchange %5 %73 %53 %50 %50 %62 %61
%75 = OpIAdd %5 %71 %74
%76 = OpCompositeConstruct %33 %51 %54 %57
%77 = OpImageTexelPointer %58 %20 %76 %50
%78 = OpAtomicCompareExchange %5 %77 %53 %50 %50 %62 %61
%79 = OpIAdd %5 %75 %78
%80 = OpImageTexelPointer %58 %23 %51 %50
%81 = OpAtomicCompareExchange %5 %80 %53 %50 %50 %62 %61
%82 = OpIAdd %5 %79 %81
%83 = OpImageTexelPointer %58 %23 %51 %50
%84 = OpAtomicCompareExchange %5 %83 %53 %50 %50 %62 %61
%85 = OpIAdd %5 %82 %84
%87 = OpImageTexelPointer %86 %27 %51 %50
%89 = OpBitcast %24 %61
%90 = OpBitcast %24 %62
%88 = OpAtomicCompareExchange %24 %87 %53 %50 %50 %90 %89
%91 = OpBitcast %5 %88
%92 = OpIAdd %5 %85 %91
%93 = OpCompositeConstruct %63 %51 %54
%94 = OpImageTexelPointer %86 %30 %93 %50
%96 = OpBitcast %24 %61
%97 = OpBitcast %24 %62
%95 = OpAtomicCompareExchange %24 %94 %53 %50 %50 %97 %96
%98 = OpBitcast %5 %95
%99 = OpIAdd %5 %92 %98
%100 = OpIMul %5 %51 %101
%102 = OpIAdd %5 %100 %56
%103 = OpImageTexelPointer %58 %31 %102 %50
%104 = OpAtomicCompareExchange %5 %103 %53 %50 %50 %62 %61
%105 = OpIAdd %5 %99 %104
%106 = OpShiftLeftLogical %5 %51 %56
%107 = OpImageTexelPointer %58 %32 %51 %50
%108 = OpAtomicCompareExchange %5 %107 %53 %50 %50 %62 %61
%109 = OpIAdd %5 %105 %108
OpStore %37 %109
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-compare-exchange.root-descriptor.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerUintArray;

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray
{
    uint value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    uint _28 = atomicCompSwap(PhysicalPointerUintArray(registers._m2).value[gl_GlobalInvocationID.x], 1u, 2u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %23 "PhysicalPointerUintArray"
OpMemberName %23 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %16 BuiltIn GlobalInvocationId
OpDecorate %22 ArrayStride 4
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer PushConstant %6
%12 = OpConstant %5 2
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpTypeRuntimeArray %5
%23 = OpTypeStruct %22
%24 = OpTypePointer PhysicalStorageBuffer %23
%26 = OpTypePointer PhysicalStorageBuffer %5
%29 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%11 = OpAccessChain %10 %9 %12
%13 = OpLoad %6 %11
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpShiftLeftLogical %5 %20 %12
%25 = OpBitcast %24 %13
%27 = OpAccessChain %26 %25 %19 %20
%28 = OpAtomicCompareExchange %5 %27 %29 %19 %19 %12 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/atomic-compare-exchange.ssbo.frag
================================================
#version 460

layout(set = 0, binding = 6, std430) buffer SSBO
{
    uint _m0[];
} _34;

layout(set = 0, binding = 7, std430) buffer _36_38
{
    uint _m0[];
} _38;

layout(set = 0, binding = 0, r32ui) uniform uimage1D _8;
layout(set = 0, binding = 1, r32ui) uniform uimage1DArray _11;
layout(set = 0, binding = 2, r32ui) uniform uimage2D _14;
layout(set = 0, binding = 3, r32ui) uniform uimage2DArray _17;
layout(set = 0, binding = 4, r32ui) uniform uimage3D _20;
layout(set = 0, binding = 5, r32ui) uniform uimageBuffer _23;
layout(set = 1, binding = 0, r32i) uniform iimage1D _27;
layout(set = 1, binding = 2, r32i) uniform iimage2D _30;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _64 = imageAtomicCompSwap(_8, int(TEXCOORD.x), 20u, 30u);
    uint _70 = imageAtomicCompSwap(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 20u, 30u);
    uint _74 = imageAtomicCompSwap(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), 20u, 30u);
    uint _78 = imageAtomicCompSwap(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 20u, 30u);
    uint _82 = imageAtomicCompSwap(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), 20u, 30u);
    uint _85 = imageAtomicCompSwap(_23, int(TEXCOORD.x), 20u, 30u);
    uint _88 = imageAtomicCompSwap(_23, int(TEXCOORD.x), 20u, 30u);
    int _92 = imageAtomicCompSwap(_27, int(TEXCOORD.x), int(20u), int(30u));
    int _99 = imageAtomicCompSwap(_30, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(20u), int(30u));
    uint _109 = atomicCompSwap(_34._m0[(TEXCOORD.x * 4u) + 2u], 20u, 30u);
    uint _113 = atomicCompSwap(_38._m0[TEXCOORD.x], 20u, 30u);
    SV_Target = (((((((((_70 + _64) + _74) + _78) + _82) + _85) + _88) + uint(_92)) + uint(_99)) + _109) + _113;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 117
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %41 %43
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "SSBO"
OpName %36 "SSBO"
OpName %41 "TEXCOORD"
OpName %43 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 2
OpDecorate %31 ArrayStride 4
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 6
OpDecorate %35 ArrayStride 4
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 7
OpDecorate %41 Flat
OpDecorate %41 Location 0
OpDecorate %43 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 1D 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 2 R32ui
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 2 R32ui
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 2 R32ui
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeInt 32 1
%25 = OpTypeImage %24 1D 0 0 0 2 R32i
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeImage %24 2D 0 0 0 2 R32i
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeRuntimeArray %5
%32 = OpTypeStruct %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeRuntimeArray %5
%36 = OpTypeStruct %35
%37 = OpTypePointer StorageBuffer %36
%38 = OpVariable %37 StorageBuffer
%39 = OpTypeVector %5 3
%40 = OpTypePointer Input %39
%41 = OpVariable %40 Input
%42 = OpTypePointer Output %5
%43 = OpVariable %42 Output
%52 = OpTypePointer Input %5
%54 = OpConstant %5 0
%57 = OpConstant %5 1
%60 = OpConstant %5 2
%62 = OpTypePointer Image %5
%65 = OpConstant %5 20
%66 = OpConstant %5 30
%67 = OpTypeVector %5 2
%90 = OpTypePointer Image %24
%105 = OpConstant %5 4
%107 = OpTypePointer StorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %115
%115 = OpLabel
%44 = OpLoad %28 %30
%45 = OpLoad %25 %27
%46 = OpLoad %21 %23
%47 = OpLoad %18 %20
%48 = OpLoad %15 %17
%49 = OpLoad %12 %14
%50 = OpLoad %9 %11
%51 = OpLoad %6 %8
%53 = OpAccessChain %52 %41 %54
%55 = OpLoad %5 %53
%56 = OpAccessChain %52 %41 %57
%58 = OpLoad %5 %56
%59 = OpAccessChain %52 %41 %60
%61 = OpLoad %5 %59
%63 = OpImageTexelPointer %62 %8 %55 %54
%64 = OpAtomicCompareExchange %5 %63 %57 %54 %54 %66 %65
%68 = OpCompositeConstruct %67 %55 %58
%69 = OpImageTexelPointer %62 %11 %68 %54
%70 = OpAtomicCompareExchange %5 %69 %57 %54 %54 %66 %65
%71 = OpIAdd %5 %70 %64
%72 = OpCompositeConstruct %67 %55 %58
%73 = OpImageTexelPointer %62 %14 %72 %54
%74 = OpAtomicCompareExchange %5 %73 %57 %54 %54 %66 %65
%75 = OpIAdd %5 %71 %74
%76 = OpCompositeConstruct %39 %55 %58 %61
%77 = OpImageTexelPointer %62 %17 %76 %54
%78 = OpAtomicCompareExchange %5 %77 %57 %54 %54 %66 %65
%79 = OpIAdd %5 %75 %78
%80 = OpCompositeConstruct %39 %55 %58 %61
%81 = OpImageTexelPointer %62 %20 %80 %54
%82 = OpAtomicCompareExchange %5 %81 %57 %54 %54 %66 %65
%83 = OpIAdd %5 %79 %82
%84 = OpImageTexelPointer %62 %23 %55 %54
%85 = OpAtomicCompareExchange %5 %84 %57 %54 %54 %66 %65
%86 = OpIAdd %5 %83 %85
%87 = OpImageTexelPointer %62 %23 %55 %54
%88 = OpAtomicCompareExchange %5 %87 %57 %54 %54 %66 %65
%89 = OpIAdd %5 %86 %88
%91 = OpImageTexelPointer %90 %27 %55 %54
%93 = OpBitcast %24 %65
%94 = OpBitcast %24 %66
%92 = OpAtomicCompareExchange %24 %91 %57 %54 %54 %94 %93
%95 = OpBitcast %5 %92
%96 = OpIAdd %5 %89 %95
%97 = OpCompositeConstruct %67 %55 %58
%98 = OpImageTexelPointer %90 %30 %97 %54
%100 = OpBitcast %24 %65
%101 = OpBitcast %24 %66
%99 = OpAtomicCompareExchange %24 %98 %57 %54 %54 %101 %100
%102 = OpBitcast %5 %99
%103 = OpIAdd %5 %96 %102
%104 = OpIMul %5 %55 %105
%106 = OpIAdd %5 %104 %60
%108 = OpAccessChain %107 %34 %54 %106
%109 = OpAtomicCompareExchange %5 %108 %57 %54 %54 %66 %65
%110 = OpIAdd %5 %103 %109
%111 = OpShiftLeftLogical %5 %55 %60
%112 = OpAccessChain %107 %38 %54 %55
%113 = OpAtomicCompareExchange %5 %112 %57 %54 %54 %66 %65
%114 = OpIAdd %5 %110 %113
OpStore %43 %114
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/attributes.denorm-ftz.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _11;

void main()
{
    imageStore(_11, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x) * 2.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormFlushToZero 32
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypeVector %5 4
%24 = OpTypeFloat 32
%27 = OpConstant %24 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%12 = OpLoad %9 %11
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpImageFetch %21 %13 %20
%23 = OpCompositeExtract %5 %22 0
%25 = OpBitcast %24 %23
%26 = OpFMul %24 %25 %27
%28 = OpBitcast %5 %26
%29 = OpCompositeConstruct %21 %28 %28 %28 %28
OpImageWrite %12 %20 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/attributes.denorm-preserve.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _11;

void main()
{
    imageStore(_11, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x) * 2.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 32
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypeVector %5 4
%24 = OpTypeFloat 32
%27 = OpConstant %24 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%12 = OpLoad %9 %11
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpImageFetch %21 %13 %20
%23 = OpCompositeExtract %5 %22 0
%25 = OpBitcast %24 %23
%26 = OpFMul %24 %25 %27
%28 = OpBitcast %5 %26
%29 = OpCompositeConstruct %21 %28 %28 %28 %28
OpImageWrite %12 %20 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/barrier.comp
================================================
#version 460
layout(local_size_x = 7, local_size_y = 8, local_size_z = 9) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(0u), uvec4(5u));
    memoryBarrierShared();
    imageStore(_8, int(0u), uvec4(10u));
    groupMemoryBarrier();
    imageStore(_8, int(0u), uvec4(15u));
    barrier();
    imageStore(_8, int(0u), uvec4(20u));
    groupMemoryBarrier();
    barrier();
    imageStore(_8, int(0u), uvec4(30u));
    groupMemoryBarrier();
    imageStore(_8, int(0u), uvec4(40u));
    groupMemoryBarrier();
    barrier();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 7 8 9
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpConstant %5 0
%11 = OpConstant %5 5
%12 = OpTypeVector %5 4
%14 = OpConstant %5 2
%15 = OpConstant %5 264
%16 = OpConstant %5 10
%18 = OpConstant %5 2376
%19 = OpConstant %5 15
%21 = OpConstant %5 20
%23 = OpConstant %5 30
%25 = OpConstant %5 2120
%26 = OpConstant %5 40
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%9 = OpLoad %6 %8
%13 = OpCompositeConstruct %12 %11 %11 %11 %11
OpImageWrite %9 %10 %13
OpMemoryBarrier %14 %15
%17 = OpCompositeConstruct %12 %16 %16 %16 %16
OpImageWrite %9 %10 %17
OpMemoryBarrier %14 %18
%20 = OpCompositeConstruct %12 %19 %19 %19 %19
OpImageWrite %9 %10 %20
OpControlBarrier %14 %14 %15
%22 = OpCompositeConstruct %12 %21 %21 %21 %21
OpImageWrite %9 %10 %22
OpControlBarrier %14 %14 %18
%24 = OpCompositeConstruct %12 %23 %23 %23 %23
OpImageWrite %9 %10 %24
OpMemoryBarrier %14 %25
%27 = OpCompositeConstruct %12 %26 %26 %26 %26
OpImageWrite %9 %10 %27
OpControlBarrier %14 %14 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/barycentrics-2.frag
================================================
#version 460
#extension GL_EXT_fragment_shader_barycentric : require

layout(location = 0) pervertexEXT in float ATTRIB[3];
layout(location = 0, component = 1) pervertexEXT in float ATTRIB_1[3];
layout(location = 0, component = 2) pervertexEXT in float ATTRIB_2[3];
layout(location = 1) centroid in float FOO;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (((ATTRIB[0u] * gl_BaryCoordEXT.x) + FOO) + (ATTRIB_1[1u] * gl_BaryCoordEXT.y)) + (ATTRIB_2[2u] * gl_BaryCoordEXT.z);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpCapability SampleRateShading
OpCapability FragmentBarycentricKHR
OpExtension "SPV_KHR_fragment_shader_barycentric"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10 %11 %12 %15 %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 "ATTRIB"
OpName %11 "ATTRIB_1"
OpName %12 "ATTRIB_2"
OpName %15 "SV_Barycentrics"
OpName %17 "FOO"
OpName %19 "SV_Target"
OpDecorate %10 PerVertexKHR
OpDecorate %10 Location 0
OpDecorate %11 PerVertexKHR
OpDecorate %11 Location 0
OpDecorate %11 Component 1
OpDecorate %12 PerVertexKHR
OpDecorate %12 Location 0
OpDecorate %12 Component 2
OpDecorate %15 BuiltIn BaryCoordKHR
OpDecorate %15 Sample
OpDecorate %17 Centroid
OpDecorate %17 Location 1
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpVariable %9 Input
%12 = OpVariable %9 Input
%13 = OpTypeVector %5 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %5
%19 = OpVariable %18 Output
%22 = OpConstant %6 0
%25 = OpConstant %6 1
%28 = OpConstant %6 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%20 = OpLoad %5 %17
%21 = OpAccessChain %16 %15 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %16 %15 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %16 %15 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %16 %10 %22
%31 = OpLoad %5 %30
%32 = OpFMul %5 %31 %23
%33 = OpAccessChain %16 %11 %25
%34 = OpLoad %5 %33
%35 = OpFMul %5 %34 %26
%36 = OpAccessChain %16 %12 %28
%37 = OpLoad %5 %36
%38 = OpFMul %5 %37 %29
%39 = OpFAdd %5 %32 %20
%40 = OpFAdd %5 %39 %35
%41 = OpFAdd %5 %40 %38
OpStore %19 %41
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/barycentrics.frag
================================================
#version 460
#extension GL_EXT_fragment_shader_barycentric : require

layout(location = 0) pervertexEXT in vec4 ATTRIB[3];
layout(location = 1) pervertexEXT in vec4 ATTRIB_2[3][2];
layout(location = 3) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = (((((ATTRIB[0u].x + gl_BaryCoordEXT.x) + (ATTRIB[0u].x * gl_BaryCoordEXT.x)) + (ATTRIB[1u].x * gl_BaryCoordEXT.y)) + (ATTRIB_2[0u][INDEX].x * gl_BaryCoordEXT.z)) + ((ATTRIB_2[0u][1u].x + ATTRIB_2[1u][0u].x) * gl_BaryCoordNoPerspEXT.x)) + ((ATTRIB_2[1u][1u].x + ATTRIB_2[2u][0u].x) * gl_BaryCoordNoPerspEXT.y);
    SV_Target.y = (((((ATTRIB[0u].y + gl_BaryCoordEXT.y) + (ATTRIB[0u].y * gl_BaryCoordEXT.x)) + (ATTRIB[1u].y * gl_BaryCoordEXT.y)) + (ATTRIB_2[0u][INDEX].y * gl_BaryCoordEXT.z)) + ((ATTRIB_2[0u][1u].y + ATTRIB_2[1u][0u].y) * gl_BaryCoordNoPerspEXT.x)) + ((ATTRIB_2[1u][1u].y + ATTRIB_2[2u][0u].y) * gl_BaryCoordNoPerspEXT.y);
    SV_Target.z = (((((ATTRIB[0u].z + gl_BaryCoordNoPerspEXT.y) + (ATTRIB[0u].z * gl_BaryCoordEXT.x)) + (ATTRIB[1u].z * gl_BaryCoordEXT.y)) + (ATTRIB_2[0u][INDEX].z * gl_BaryCoordEXT.z)) + ((ATTRIB_2[0u][1u].z + ATTRIB_2[1u][0u].z) * gl_BaryCoordNoPerspEXT.x)) + ((ATTRIB_2[1u][1u].z + ATTRIB_2[2u][0u].z) * gl_BaryCoordNoPerspEXT.y);
    SV_Target.w = (((((ATTRIB[0u].w + gl_BaryCoordNoPerspEXT.x) + (ATTRIB[0u].w * gl_BaryCoordEXT.x)) + (ATTRIB[1u].w * gl_BaryCoordEXT.y)) + (ATTRIB_2[0u][INDEX].w * gl_BaryCoordEXT.z)) + ((ATTRIB_2[0u][1u].w + ATTRIB_2[1u][0u].w) * gl_BaryCoordNoPerspEXT.x)) + ((ATTRIB_2[1u][1u].w + ATTRIB_2[2u][0u].w) * gl_BaryCoordNoPerspEXT.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 162
; Schema: 0
OpCapability Shader
OpCapability FragmentBarycentricKHR
OpExtension "SPV_KHR_fragment_shader_barycentric"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %16 %19 %20 %22 %24
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "ATTRIB"
OpName %16 "ATTRIB_2"
OpName %19 "SV_Barycentrics"
OpName %20 "SV_Barycentrics_1"
OpName %22 "INDEX"
OpName %24 "SV_Target"
OpDecorate %11 PerVertexKHR
OpDecorate %11 Location 0
OpDecorate %16 PerVertexKHR
OpDecorate %16 Location 1
OpDecorate %19 BuiltIn BaryCoordKHR
OpDecorate %20 BuiltIn BaryCoordNoPerspKHR
OpDecorate %20 Centroid
OpDecorate %22 Flat
OpDecorate %22 Location 3
OpDecorate %24 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpConstant %7 2
%13 = OpTypeArray %6 %12
%14 = OpTypeArray %13 %8
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpVariable %18 Input
%21 = OpTypePointer Input %7
%22 = OpVariable %21 Input
%23 = OpTypePointer Output %6
%24 = OpVariable %23 Output
%26 = OpTypePointer Input %5
%28 = OpConstant %7 0
%31 = OpConstant %7 1
%155 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %160
%160 = OpLabel
%25 = OpLoad %7 %22
%27 = OpAccessChain %26 %20 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %26 %20 %31
%32 = OpLoad %5 %30
%33 = OpAccessChain %26 %19 %28
%34 = OpLoad %5 %33
%35 = OpAccessChain %26 %19 %31
%36 = OpLoad %5 %35
%37 = OpAccessChain %26 %19 %12
%38 = OpLoad %5 %37
%39 = OpAccessChain %26 %11 %28 %28
%40 = OpLoad %5 %39
%41 = OpAccessChain %26 %11 %28 %31
%42 = OpLoad %5 %41
%43 = OpAccessChain %26 %11 %28 %12
%44 = OpLoad %5 %43
%45 = OpAccessChain %26 %11 %28 %8
%46 = OpLoad %5 %45
%47 = OpAccessChain %26 %11 %28 %28
%48 = OpLoad %5 %47
%49 = OpAccessChain %26 %11 %28 %31
%50 = OpLoad %5 %49
%51 = OpAccessChain %26 %11 %28 %12
%52 = OpLoad %5 %51
%53 = OpAccessChain %26 %11 %28 %8
%54 = OpLoad %5 %53
%55 = OpFMul %5 %48 %34
%56 = OpFMul %5 %50 %34
%57 = OpFMul %5 %52 %34
%58 = OpFMul %5 %54 %34
%59 = OpAccessChain %26 %11 %31 %28
%60 = OpLoad %5 %59
%61 = OpAccessChain %26 %11 %31 %31
%62 = OpLoad %5 %61
%63 = OpAccessChain %26 %11 %31 %12
%64 = OpLoad %5 %63
%65 = OpAccessChain %26 %11 %31 %8
%66 = OpLoad %5 %65
%67 = OpFMul %5 %60 %36
%68 = OpFMul %5 %62 %36
%69 = OpFMul %5 %64 %36
%70 = OpFMul %5 %66 %36
%71 = OpAccessChain %26 %16 %28 %25 %28
%72 = OpLoad %5 %71
%73 = OpAccessChain %26 %16 %28 %25 %31
%74 = OpLoad %5 %73
%75 = OpAccessChain %26 %16 %28 %25 %12
%76 = OpLoad %5 %75
%77 = OpAccessChain %26 %16 %28 %25 %8
%78 = OpLoad %5 %77
%79 = OpFMul %5 %72 %38
%80 = OpFMul %5 %74 %38
%81 = OpFMul %5 %76 %38
%82 = OpFMul %5 %78 %38
%83 = OpAccessChain %26 %16 %31 %28 %28
%84 = OpLoad %5 %83
%85 = OpAccessChain %26 %16 %31 %28 %31
%86 = OpLoad %5 %85
%87 = OpAccessChain %26 %16 %31 %28 %12
%88 = OpLoad %5 %87
%89 = OpAccessChain %26 %16 %31 %28 %8
%90 = OpLoad %5 %89
%91 = OpAccessChain %26 %16 %12 %28 %28
%92 = OpLoad %5 %91
%93 = OpAccessChain %26 %16 %12 %28 %31
%94 = OpLoad %5 %93
%95 = OpAccessChain %26 %16 %12 %28 %12
%96 = OpLoad %5 %95
%97 = OpAccessChain %26 %16 %12 %28 %8
%98 = OpLoad %5 %97
%99 = OpAccessChain %26 %16 %28 %31 %28
%100 = OpLoad %5 %99
%101 = OpAccessChain %26 %16 %28 %31 %31
%102 = OpLoad %5 %101
%103 = OpAccessChain %26 %16 %28 %31 %12
%104 = OpLoad %5 %103
%105 = OpAccessChain %26 %16 %28 %31 %8
%106 = OpLoad %5 %105
%107 = OpAccessChain %26 %16 %31 %31 %28
%108 = OpLoad %5 %107
%109 = OpAccessChain %26 %16 %31 %31 %31
%110 = OpLoad %5 %109
%111 = OpAccessChain %26 %16 %31 %31 %12
%112 = OpLoad %5 %111
%113 = OpAccessChain %26 %16 %31 %31 %8
%114 = OpLoad %5 %113
%115 = OpFAdd %5 %108 %92
%116 = OpFMul %5 %115 %32
%117 = OpFAdd %5 %100 %84
%118 = OpFMul %5 %117 %29
%119 = OpFAdd %5 %40 %34
%120 = OpFAdd %5 %119 %55
%121 = OpFAdd %5 %120 %67
%122 = OpFAdd %5 %121 %79
%123 = OpFAdd %5 %122 %118
%124 = OpFAdd %5 %123 %116
%125 = OpFAdd %5 %110 %94
%126 = OpFMul %5 %125 %32
%127 = OpFAdd %5 %102 %86
%128 = OpFMul %5 %127 %29
%129 = OpFAdd %5 %42 %36
%130 = OpFAdd %5 %129 %56
%131 = OpFAdd %5 %130 %68
%132 = OpFAdd %5 %131 %80
%133 = OpFAdd %5 %132 %128
%134 = OpFAdd %5 %133 %126
%135 = OpFAdd %5 %112 %96
%136 = OpFMul %5 %135 %32
%137 = OpFAdd %5 %104 %88
%138 = OpFMul %5 %137 %29
%139 = OpFAdd %5 %44 %32
%140 = OpFAdd %5 %139 %57
%141 = OpFAdd %5 %140 %69
%142 = OpFAdd %5 %141 %81
%143 = OpFAdd %5 %142 %138
%144 = OpFAdd %5 %143 %136
%145 = OpFAdd %5 %114 %98
%146 = OpFMul %5 %145 %32
%147 = OpFAdd %5 %106 %90
%148 = OpFMul %5 %147 %29
%149 = OpFAdd %5 %46 %29
%150 = OpFAdd %5 %149 %58
%151 = OpFAdd %5 %150 %70
%152 = OpFAdd %5 %151 %82
%153 = OpFAdd %5 %152 %148
%154 = OpFAdd %5 %153 %146
%156 = OpAccessChain %155 %24 %28
OpStore %156 %124
%157 = OpAccessChain %155 %24 %31
OpStore %157 %134
%158 = OpAccessChain %155 %24 %12
OpStore %158 %144
%159 = OpAccessChain %155 %24 %8
OpStore %159 %154
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/bfrev.frag
================================================
#version 460

layout(location = 0) flat in uint A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = bitfieldReverse(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 14
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %12
%12 = OpLabel
%10 = OpLoad %5 %7
%11 = OpBitReverse %5 %10
OpStore %9 %11
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/bitcount-bitrev-sizes.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint64_t _m0[];
} _9;

layout(set = 0, binding = 1, std430) buffer _12_14
{
    uint16_t _m0[];
} _14;

void main()
{
    _9._m0[0u] = packUint2x32(bitfieldReverse(unpackUint2x32(_9._m0[0u])).yx);
    _14._m0[0u] = unpackUint2x16(bitfieldReverse(uint(_14._m0[0u]))).y;
    uvec2 _39 = uvec2(bitCount(unpackUint2x32(_9._m0[1u])));
    _9._m0[1u] = uint64_t(_39.x + _39.y);
    _14._m0[1u] = uint16_t(uint(bitCount(uint(_14._m0[1u]))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 53
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpDecorate %6 ArrayStride 8
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %11 ArrayStride 2
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeInt 16 0
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%17 = OpTypePointer StorageBuffer %5
%20 = OpTypeVector %15 2
%26 = OpTypePointer StorageBuffer %10
%29 = OpTypeVector %10 2
%35 = OpConstant %15 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %51
%51 = OpLabel
%18 = OpAccessChain %17 %9 %16 %16
%19 = OpLoad %5 %18
%21 = OpBitcast %20 %19
%22 = OpBitReverse %20 %21
%23 = OpVectorShuffle %20 %22 %22 1 0
%24 = OpBitcast %5 %23
%25 = OpAccessChain %17 %9 %16 %16
OpStore %25 %24
%27 = OpAccessChain %26 %14 %16 %16
%28 = OpLoad %10 %27
%30 = OpUConvert %15 %28
%31 = OpBitReverse %15 %30
%32 = OpBitcast %29 %31
%33 = OpCompositeExtract %10 %32 1
%34 = OpAccessChain %26 %14 %16 %16
OpStore %34 %33
%36 = OpAccessChain %17 %9 %16 %35
%37 = OpLoad %5 %36
%38 = OpBitcast %20 %37
%39 = OpBitCount %20 %38
%40 = OpCompositeExtract %15 %39 0
%41 = OpCompositeExtract %15 %39 1
%42 = OpIAdd %15 %40 %41
%43 = OpUConvert %5 %42
%44 = OpAccessChain %17 %9 %16 %35
OpStore %44 %43
%45 = OpAccessChain %26 %14 %16 %35
%46 = OpLoad %10 %45
%47 = OpUConvert %15 %46
%48 = OpBitCount %15 %47
%49 = OpUConvert %10 %48
%50 = OpAccessChain %26 %14 %16 %35
OpStore %50 %49
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-load-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    uvec4 _m1;
};

struct _38
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
};

struct _76
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

uint _40;
uint _41;
uint _60;
uint _61;
float _80;
float _81;
float _97;
float _98;

layout(set = 0, binding = 1) uniform usamplerBuffer _8;
layout(set = 0, binding = 2) uniform usamplerBuffer _9;
layout(set = 0, binding = 1, r32ui) uniform readonly uimageBuffer _12;
layout(set = 0, binding = 2, r32ui) uniform readonly uimageBuffer _13;

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _27 = TEXCOORD * 2u;
    uint _122;
    uvec4 _123;
    _122 = sparseTexelFetchARB(_8, int(_27), _123);
    SparseTexel _31 = SparseTexel(_122, _123);
    _38 _39 = _38(_31._m1.x, texelFetch(_8, int(_27 + 1u)).x, _40, _41, _31._m0);
    uint _52 = TEXCOORD * 2u;
    uint _124;
    uvec4 _125;
    _124 = sparseImageLoadARB(_12, int(_52), _125);
    SparseTexel _53 = SparseTexel(_124, _125);
    _38 _59 = _38(_53._m1.x, imageLoad(_12, int(_52 + 1u)).x, _60, _61, _53._m0);
    uint _69 = TEXCOORD * 2u;
    uint _126;
    uvec4 _127;
    _126 = sparseTexelFetchARB(_9, int(_69), _127);
    SparseTexel _70 = SparseTexel(_126, _127);
    _76 _77 = _76(uintBitsToFloat(_70._m1.x), uintBitsToFloat(texelFetch(_9, int(_69 + 1u)).x), _80, _81, _70._m0);
    uint _87 = TEXCOORD * 2u;
    uint _128;
    uvec4 _129;
    _128 = sparseImageLoadARB(_13, int(_87), _129);
    SparseTexel _88 = SparseTexel(_128, _129);
    _76 _94 = _76(uintBitsToFloat(_88._m1.x), uintBitsToFloat(imageLoad(_13, int(_87 + 1u)).x), _97, _98, _88._m0);
    float _103 = float(sparseTexelsResidentARB(int(_94._m4)));
    float _105 = float(sparseTexelsResidentARB(int(_77._m4))) + (float(sparseTexelsResidentARB(int(_59._m4))) + float(sparseTexelsResidentARB(int(_39._m4))));
    SV_Target.x = ((((_105 + uintBitsToFloat(_39._m0)) + uintBitsToFloat(_59._m0)) + _77._m0) + _94._m0) + _103;
    SV_Target.y = ((((_105 + uintBitsToFloat(_39._m1)) + uintBitsToFloat(_59._m1)) + _77._m1) + _94._m1) + _103;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 122
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %15 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %15 "TEXCOORD"
OpName %19 "SV_Target"
OpName %30 "SparseTexel"
OpName %38 ""
OpName %76 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 2
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %12 NonWritable
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 2
OpDecorate %13 NonWritable
OpDecorate %15 Flat
OpDecorate %15 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpVariable %11 UniformConstant
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%16 = OpTypeFloat 32
%17 = OpTypeVector %16 2
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%26 = OpConstant %5 3
%28 = OpConstant %5 2
%29 = OpTypeVector %5 4
%30 = OpTypeStruct %5 %29
%36 = OpConstant %5 1
%38 = OpTypeStruct %5 %5 %5 %5 %5
%45 = OpTypeBool
%50 = OpConstant %16 1
%51 = OpConstant %16 0
%76 = OpTypeStruct %16 %16 %16 %16 %5
%116 = OpTypePointer Output %16
%118 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
%40 = OpUndef %5
%41 = OpUndef %5
%60 = OpUndef %5
%61 = OpUndef %5
%80 = OpUndef %16
%81 = OpUndef %16
%97 = OpUndef %16
%98 = OpUndef %16
OpBranch %120
%120 = OpLabel
%20 = OpLoad %10 %13
%21 = OpLoad %10 %12
%22 = OpLoad %6 %9
%23 = OpLoad %6 %8
%24 = OpLoad %5 %15
%25 = OpShiftLeftLogical %5 %24 %26
%27 = OpIMul %5 %24 %28
%31 = OpImageSparseFetch %30 %23 %27
%32 = OpCompositeExtract %5 %31 0
%33 = OpCompositeExtract %5 %31 1 0
%35 = OpIAdd %5 %27 %36
%34 = OpImageFetch %29 %23 %35
%37 = OpCompositeExtract %5 %34 0
%39 = OpCompositeConstruct %38 %33 %37 %40 %41 %32
%42 = OpCompositeExtract %5 %39 0
%43 = OpCompositeExtract %5 %39 1
%44 = OpCompositeExtract %5 %39 4
%46 = OpImageSparseTexelsResident %45 %44
%47 = OpBitcast %16 %42
%48 = OpBitcast %16 %43
%49 = OpSelect %16 %46 %50 %51
%52 = OpIMul %5 %24 %28
%53 = OpImageSparseRead %30 %21 %52
%54 = OpCompositeExtract %5 %53 0
%55 = OpCompositeExtract %5 %53 1 0
%57 = OpIAdd %5 %52 %36
%56 = OpImageRead %29 %21 %57
%58 = OpCompositeExtract %5 %56 0
%59 = OpCompositeConstruct %38 %55 %58 %60 %61 %54
%62 = OpCompositeExtract %5 %59 0
%63 = OpCompositeExtract %5 %59 1
%64 = OpCompositeExtract %5 %59 4
%65 = OpImageSparseTexelsResident %45 %64
%66 = OpBitcast %16 %62
%67 = OpBitcast %16 %63
%68 = OpSelect %16 %65 %50 %51
%69 = OpIMul %5 %24 %28
%70 = OpImageSparseFetch %30 %22 %69
%71 = OpCompositeExtract %5 %70 0
%72 = OpCompositeExtract %5 %70 1 0
%74 = OpIAdd %5 %69 %36
%73 = OpImageFetch %29 %22 %74
%75 = OpCompositeExtract %5 %73 0
%78 = OpBitcast %16 %72
%79 = OpBitcast %16 %75
%77 = OpCompositeConstruct %76 %78 %79 %80 %81 %71
%82 = OpCompositeExtract %16 %77 0
%83 = OpCompositeExtract %16 %77 1
%84 = OpCompositeExtract %5 %77 4
%85 = OpImageSparseTexelsResident %45 %84
%86 = OpSelect %16 %85 %50 %51
%87 = OpIMul %5 %24 %28
%88 = OpImageSparseRead %30 %20 %87
%89 = OpCompositeExtract %5 %88 0
%90 = OpCompositeExtract %5 %88 1 0
%92 = OpIAdd %5 %87 %36
%91 = OpImageRead %29 %20 %92
%93 = OpCompositeExtract %5 %91 0
%95 = OpBitcast %16 %90
%96 = OpBitcast %16 %93
%94 = OpCompositeConstruct %76 %95 %96 %97 %98 %89
%99 = OpCompositeExtract %16 %94 0
%100 = OpCompositeExtract %16 %94 1
%101 = OpCompositeExtract %5 %94 4
%102 = OpImageSparseTexelsResident %45 %101
%103 = OpSelect %16 %102 %50 %51
%104 = OpFAdd %16 %68 %49
%105 = OpFAdd %16 %86 %104
%106 = OpFAdd %16 %105 %47
%107 = OpFAdd %16 %106 %66
%108 = OpFAdd %16 %107 %82
%109 = OpFAdd %16 %108 %99
%110 = OpFAdd %16 %109 %103
%111 = OpFAdd %16 %105 %48
%112 = OpFAdd %16 %111 %67
%113 = OpFAdd %16 %112 %83
%114 = OpFAdd %16 %113 %100
%115 = OpFAdd %16 %114 %103
%117 = OpAccessChain %116 %19 %118
OpStore %117 %110
%119 = OpAccessChain %116 %19 %36
OpStore %119 %115
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-load-signed-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    ivec4 _m1;
};

struct _32
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
};

layout(set = 0, binding = 0) uniform isamplerBuffer _8;
layout(set = 0, binding = 0, r32i) uniform readonly iimageBuffer _11;

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out ivec2 SV_Target;

void main()
{
    uint _68;
    ivec4 _69;
    _68 = sparseTexelFetchARB(_8, int(TEXCOORD), _69);
    SparseTexel _23 = SparseTexel(_68, _69);
    uvec4 _27 = uvec4(_23._m1);
    _32 _33 = _32(_27.x, _27.y, _27.z, _27.w, _23._m0);
    uint _70;
    ivec4 _71;
    _70 = sparseImageLoadARB(_11, int(TEXCOORD), _71);
    SparseTexel _42 = SparseTexel(_70, _71);
    uvec4 _45 = uvec4(_42._m1);
    _32 _50 = _32(_45.x, _45.y, _45.z, _45.w, _42._m0);
    uint _56 = uint(sparseTexelsResidentARB(int(_50._m4))) + uint(sparseTexelsResidentARB(int(_33._m4)));
    SV_Target.x = int((_50._m0 + _33._m0) + _56);
    SV_Target.y = int((_50._m1 + _33._m1) + _56);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 68
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "TEXCOORD"
OpName %17 "SV_Target"
OpName %22 "SparseTexel"
OpName %32 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonWritable
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %17 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32i
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeInt 32 0
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypeVector %5 2
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%21 = OpTypeVector %5 4
%22 = OpTypeStruct %12 %21
%26 = OpTypeVector %12 4
%32 = OpTypeStruct %12 %12 %12 %12 %12
%37 = OpTypeBool
%39 = OpConstant %12 0
%40 = OpConstant %12 1
%61 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %66
%66 = OpLabel
%18 = OpLoad %9 %11
%19 = OpLoad %6 %8
%20 = OpLoad %12 %14
%23 = OpImageSparseFetch %22 %19 %20
%24 = OpCompositeExtract %12 %23 0
%25 = OpCompositeExtract %21 %23 1
%27 = OpBitcast %26 %25
%28 = OpCompositeExtract %12 %27 0
%29 = OpCompositeExtract %12 %27 1
%30 = OpCompositeExtract %12 %27 2
%31 = OpCompositeExtract %12 %27 3
%33 = OpCompositeConstruct %32 %28 %29 %30 %31 %24
%34 = OpCompositeExtract %12 %33 0
%35 = OpCompositeExtract %12 %33 1
%36 = OpCompositeExtract %12 %33 4
%38 = OpImageSparseTexelsResident %37 %36
%41 = OpSelect %12 %38 %40 %39
%42 = OpImageSparseRead %22 %18 %20
%43 = OpCompositeExtract %12 %42 0
%44 = OpCompositeExtract %21 %42 1
%45 = OpBitcast %26 %44
%46 = OpCompositeExtract %12 %45 0
%47 = OpCompositeExtract %12 %45 1
%48 = OpCompositeExtract %12 %45 2
%49 = OpCompositeExtract %12 %45 3
%50 = OpCompositeConstruct %32 %46 %47 %48 %49 %43
%51 = OpCompositeExtract %12 %50 0
%52 = OpCompositeExtract %12 %50 1
%53 = OpCompositeExtract %12 %50 4
%54 = OpImageSparseTexelsResident %37 %53
%55 = OpSelect %12 %54 %40 %39
%56 = OpIAdd %12 %55 %41
%57 = OpIAdd %12 %51 %34
%58 = OpIAdd %12 %57 %56
%59 = OpIAdd %12 %52 %35
%60 = OpIAdd %12 %59 %56
%62 = OpAccessChain %61 %17 %39
%63 = OpBitcast %5 %58
OpStore %62 %63
%64 = OpAccessChain %61 %17 %40
%65 = OpBitcast %5 %60
OpStore %64 %65
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-load-signed.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform isamplerBuffer _8;
layout(set = 0, binding = 0, r32i) uniform readonly iimageBuffer _11;

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out ivec2 SV_Target;

void main()
{
    uvec4 _24 = uvec4(texelFetch(_8, int(TEXCOORD)));
    uvec4 _28 = uvec4(imageLoad(_11, int(TEXCOORD)));
    SV_Target.x = int(_28.x + _24.x);
    SV_Target.y = int(_28.y + _24.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "TEXCOORD"
OpName %17 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonWritable
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %17 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32i
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeInt 32 0
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypeVector %5 2
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%21 = OpTypeVector %5 4
%23 = OpTypeVector %12 4
%33 = OpTypePointer Output %5
%35 = OpConstant %12 0
%38 = OpConstant %12 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%18 = OpLoad %9 %11
%19 = OpLoad %6 %8
%20 = OpLoad %12 %14
%22 = OpImageFetch %21 %19 %20
%24 = OpBitcast %23 %22
%25 = OpCompositeExtract %12 %24 0
%26 = OpCompositeExtract %12 %24 1
%27 = OpImageRead %21 %18 %20
%28 = OpBitcast %23 %27
%29 = OpCompositeExtract %12 %28 0
%30 = OpCompositeExtract %12 %28 1
%31 = OpIAdd %12 %29 %25
%32 = OpIAdd %12 %30 %26
%34 = OpAccessChain %33 %17 %35
%36 = OpBitcast %5 %31
OpStore %34 %36
%37 = OpAccessChain %33 %17 %38
%39 = OpBitcast %5 %32
OpStore %37 %39
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-load.frag
================================================
#version 460

uint _104;
uint _121;

layout(set = 0, binding = 0) uniform samplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _12;
layout(set = 0, binding = 2) uniform usamplerBuffer _13;
layout(set = 0, binding = 3) uniform usamplerBuffer _14;
layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _17;
layout(set = 0, binding = 1, r32ui) uniform readonly uimageBuffer _20;
layout(set = 0, binding = 2, r32ui) uniform readonly uimageBuffer _21;
layout(set = 0, binding = 3, r32ui) uniform readonly uimageBuffer _22;

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _38 = texelFetch(_8, int(TEXCOORD));
    vec4 _41 = imageLoad(_17, int(TEXCOORD));
    uint _48 = TEXCOORD * 2u;
    uvec2 _58 = uvec2(texelFetch(_12, int(_48)).x, texelFetch(_12, int(_48 + 1u)).x);
    uint _65 = TEXCOORD * 2u;
    uvec2 _71 = uvec2(imageLoad(_20, int(_65)).x, imageLoad(_20, int(_65 + 1u)).x);
    uint _78 = TEXCOORD * 2u;
    vec2 _85 = uintBitsToFloat(uvec2(texelFetch(_13, int(_78)).x, texelFetch(_13, int(_78 + 1u)).x));
    uint _90 = TEXCOORD * 2u;
    vec2 _97 = uintBitsToFloat(uvec2(imageLoad(_21, int(_90)).x, imageLoad(_21, int(_90 + 1u)).x));
    uint _102 = TEXCOORD * 6u;
    vec3 _114 = uintBitsToFloat(uvec3(_104, texelFetch(_14, int(_102 + 1u)).x, texelFetch(_14, int(_102 + 2u)).x));
    uint _120 = (TEXCOORD * 6u) + 3u;
    vec3 _129 = uintBitsToFloat(uvec3(_121, imageLoad(_22, int(_120 + 1u)).x, imageLoad(_22, int(_120 + 2u)).x));
    SV_Target.x = ((((((_41.x + _38.x) + uintBitsToFloat(_58.x)) + uintBitsToFloat(_71.x)) + _85.x) + _97.x) + _114.y) + _129.y;
    SV_Target.y = ((((((_41.y + _38.y) + uintBitsToFloat(_58.y)) + uintBitsToFloat(_71.y)) + _85.y) + _97.y) + _114.z) + _129.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 140
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %24 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %24 "TEXCOORD"
OpName %27 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 NonWritable
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %20 NonWritable
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 2
OpDecorate %21 NonWritable
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 3
OpDecorate %22 NonWritable
OpDecorate %24 Flat
OpDecorate %24 Location 0
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpVariable %11 UniformConstant
%14 = OpVariable %11 UniformConstant
%15 = OpTypeImage %5 Buffer 0 0 0 2 R32f
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %9 Buffer 0 0 0 2 R32ui
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpVariable %19 UniformConstant
%22 = OpVariable %19 UniformConstant
%23 = OpTypePointer Input %9
%24 = OpVariable %23 Input
%25 = OpTypeVector %5 2
%26 = OpTypePointer Output %25
%27 = OpVariable %26 Output
%37 = OpTypeVector %5 4
%47 = OpConstant %9 3
%49 = OpConstant %9 2
%50 = OpTypeVector %9 4
%55 = OpConstant %9 1
%57 = OpTypeVector %9 2
%103 = OpConstant %9 6
%111 = OpTypeVector %9 3
%113 = OpTypeVector %5 3
%134 = OpTypePointer Output %5
%136 = OpConstant %9 0
%3 = OpFunction %1 None %2
%4 = OpLabel
%104 = OpUndef %9
%121 = OpUndef %9
OpBranch %138
%138 = OpLabel
%28 = OpLoad %18 %22
%29 = OpLoad %18 %21
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %10 %14
%33 = OpLoad %10 %13
%34 = OpLoad %10 %12
%35 = OpLoad %6 %8
%36 = OpLoad %9 %24
%38 = OpImageFetch %37 %35 %36
%39 = OpCompositeExtract %5 %38 0
%40 = OpCompositeExtract %5 %38 1
%41 = OpImageRead %37 %31 %36
%42 = OpCompositeExtract %5 %41 0
%43 = OpCompositeExtract %5 %41 1
%44 = OpFAdd %5 %42 %39
%45 = OpFAdd %5 %43 %40
%46 = OpShiftLeftLogical %9 %36 %47
%48 = OpIMul %9 %36 %49
%51 = OpImageFetch %50 %34 %48
%52 = OpCompositeExtract %9 %51 0
%54 = OpIAdd %9 %48 %55
%53 = OpImageFetch %50 %34 %54
%56 = OpCompositeExtract %9 %53 0
%58 = OpCompositeConstruct %57 %52 %56
%59 = OpCompositeExtract %9 %58 0
%60 = OpCompositeExtract %9 %58 1
%61 = OpBitcast %5 %59
%62 = OpBitcast %5 %60
%63 = OpFAdd %5 %44 %61
%64 = OpFAdd %5 %45 %62
%65 = OpIMul %9 %36 %49
%66 = OpImageRead %50 %30 %65
%67 = OpCompositeExtract %9 %66 0
%69 = OpIAdd %9 %65 %55
%68 = OpImageRead %50 %30 %69
%70 = OpCompositeExtract %9 %68 0
%71 = OpCompositeConstruct %57 %67 %70
%72 = OpCompositeExtract %9 %71 0
%73 = OpCompositeExtract %9 %71 1
%74 = OpBitcast %5 %72
%75 = OpBitcast %5 %73
%76 = OpFAdd %5 %63 %74
%77 = OpFAdd %5 %64 %75
%78 = OpIMul %9 %36 %49
%79 = OpImageFetch %50 %33 %78
%80 = OpCompositeExtract %9 %79 0
%82 = OpIAdd %9 %78 %55
%81 = OpImageFetch %50 %33 %82
%83 = OpCompositeExtract %9 %81 0
%84 = OpCompositeConstruct %57 %80 %83
%85 = OpBitcast %25 %84
%86 = OpCompositeExtract %5 %85 0
%87 = OpCompositeExtract %5 %85 1
%88 = OpFAdd %5 %76 %86
%89 = OpFAdd %5 %77 %87
%90 = OpIMul %9 %36 %49
%91 = OpImageRead %50 %29 %90
%92 = OpCompositeExtract %9 %91 0
%94 = OpIAdd %9 %90 %55
%93 = OpImageRead %50 %29 %94
%95 = OpCompositeExtract %9 %93 0
%96 = OpCompositeConstruct %57 %92 %95
%97 = OpBitcast %25 %96
%98 = OpCompositeExtract %5 %97 0
%99 = OpCompositeExtract %5 %97 1
%100 = OpFAdd %5 %88 %98
%101 = OpFAdd %5 %89 %99
%102 = OpIMul %9 %36 %103
%106 = OpIAdd %9 %102 %55
%105 = OpImageFetch %50 %32 %106
%107 = OpCompositeExtract %9 %105 0
%109 = OpIAdd %9 %102 %49
%108 = OpImageFetch %50 %32 %109
%110 = OpCompositeExtract %9 %108 0
%112 = OpCompositeConstruct %111 %104 %107 %110
%114 = OpBitcast %113 %112
%115 = OpCompositeExtract %5 %114 1
%116 = OpCompositeExtract %5 %114 2
%117 = OpFAdd %5 %100 %115
%118 = OpFAdd %5 %101 %116
%119 = OpIMul %9 %36 %103
%120 = OpIAdd %9 %119 %47
%123 = OpIAdd %9 %120 %55
%122 = OpImageRead %50 %28 %123
%124 = OpCompositeExtract %9 %122 0
%126 = OpIAdd %9 %120 %49
%125 = OpImageRead %50 %28 %126
%127 = OpCompositeExtract %9 %125 0
%128 = OpCompositeConstruct %111 %121 %124 %127
%129 = OpBitcast %113 %128
%130 = OpCompositeExtract %5 %129 1
%131 = OpCompositeExtract %5 %129 2
%132 = OpFAdd %5 %117 %130
%133 = OpFAdd %5 %118 %131
%135 = OpAccessChain %134 %27 %136
OpStore %135 %132
%137 = OpAccessChain %134 %27 %55
OpStore %137 %133
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-load.ssbo.frag
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _14;

layout(set = 0, binding = 2, std430) restrict readonly buffer _16_18
{
    uvec2 _m0[];
} _18;

layout(set = 0, binding = 3, scalar) restrict readonly buffer _21_23
{
    uvec3 _m0[];
} _23;

layout(set = 0, binding = 1, std430) readonly buffer _28_30
{
    uvec2 _m0[];
} _30;

layout(set = 0, binding = 2, std430) readonly buffer _32_34
{
    uvec2 _m0[];
} _34;

layout(set = 0, binding = 3, scalar) readonly buffer _36_38
{
    uvec3 _m0[];
} _38;

layout(set = 0, binding = 0) uniform samplerBuffer _8;
layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _26;

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _48 = texelFetch(_8, int(TEXCOORD));
    vec4 _51 = imageLoad(_26, int(TEXCOORD));
    vec2 _78 = uintBitsToFloat(_18._m0[TEXCOORD]);
    vec2 _85 = uintBitsToFloat(_34._m0[TEXCOORD]);
    vec3 _96 = uintBitsToFloat(_23._m0[TEXCOORD * 2u]);
    vec3 _106 = uintBitsToFloat(_38._m0[(TEXCOORD * 2u) + 1u]);
    SV_Target.x = ((((((_51.x + _48.x) + uintBitsToFloat(_14._m0[TEXCOORD].x)) + uintBitsToFloat(_30._m0[TEXCOORD].x)) + _78.x) + _85.x) + _96.y) + _106.y;
    SV_Target.y = ((((((_51.y + _48.y) + uintBitsToFloat(_14._m0[TEXCOORD].y)) + uintBitsToFloat(_30._m0[TEXCOORD].y)) + _78.y) + _85.y) + _96.z) + _106.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 116
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %40 %43
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpName %28 "SSBO"
OpName %32 "SSBO"
OpName %36 "SSBO"
OpName %40 "TEXCOORD"
OpName %43 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 8
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonWritable
OpDecorate %18 Restrict
OpDecorate %20 ArrayStride 12
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 3
OpDecorate %23 NonWritable
OpDecorate %23 Restrict
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %26 NonWritable
OpDecorate %27 ArrayStride 8
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 1
OpDecorate %30 NonWritable
OpDecorate %31 ArrayStride 8
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 2
OpDecorate %34 NonWritable
OpDecorate %35 ArrayStride 12
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 3
OpDecorate %38 NonWritable
OpDecorate %40 Flat
OpDecorate %40 Location 0
OpDecorate %43 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeVector %9 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeVector %9 3
%20 = OpTypeRuntimeArray %19
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeImage %5 Buffer 0 0 0 2 R32f
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeRuntimeArray %10
%28 = OpTypeStruct %27
%29 = OpTypePointer StorageBuffer %28
%30 = OpVariable %29 StorageBuffer
%31 = OpTypeRuntimeArray %10
%32 = OpTypeStruct %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeRuntimeArray %19
%36 = OpTypeStruct %35
%37 = OpTypePointer StorageBuffer %36
%38 = OpVariable %37 StorageBuffer
%39 = OpTypePointer Input %9
%40 = OpVariable %39 Input
%41 = OpTypeVector %5 2
%42 = OpTypePointer Output %41
%43 = OpVariable %42 Output
%47 = OpTypeVector %5 4
%57 = OpConstant %9 3
%58 = OpTypePointer StorageBuffer %10
%60 = OpConstant %9 0
%91 = OpConstant %9 2
%92 = OpTypePointer StorageBuffer %19
%95 = OpTypeVector %5 3
%102 = OpConstant %9 1
%111 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%44 = OpLoad %24 %26
%45 = OpLoad %6 %8
%46 = OpLoad %9 %40
%48 = OpImageFetch %47 %45 %46
%49 = OpCompositeExtract %5 %48 0
%50 = OpCompositeExtract %5 %48 1
%51 = OpImageRead %47 %44 %46
%52 = OpCompositeExtract %5 %51 0
%53 = OpCompositeExtract %5 %51 1
%54 = OpFAdd %5 %52 %49
%55 = OpFAdd %5 %53 %50
%56 = OpShiftLeftLogical %9 %46 %57
%59 = OpAccessChain %58 %14 %60 %46
%61 = OpLoad %10 %59
%62 = OpCompositeExtract %9 %61 0
%63 = OpCompositeExtract %9 %61 1
%64 = OpBitcast %5 %62
%65 = OpBitcast %5 %63
%66 = OpFAdd %5 %54 %64
%67 = OpFAdd %5 %55 %65
%68 = OpAccessChain %58 %30 %60 %46
%69 = OpLoad %10 %68
%70 = OpCompositeExtract %9 %69 0
%71 = OpCompositeExtract %9 %69 1
%72 = OpBitcast %5 %70
%73 = OpBitcast %5 %71
%74 = OpFAdd %5 %66 %72
%75 = OpFAdd %5 %67 %73
%76 = OpAccessChain %58 %18 %60 %46
%77 = OpLoad %10 %76
%78 = OpBitcast %41 %77
%79 = OpCompositeExtract %5 %78 0
%80 = OpCompositeExtract %5 %78 1
%81 = OpFAdd %5 %74 %79
%82 = OpFAdd %5 %75 %80
%83 = OpAccessChain %58 %34 %60 %46
%84 = OpLoad %10 %83
%85 = OpBitcast %41 %84
%86 = OpCompositeExtract %5 %85 0
%87 = OpCompositeExtract %5 %85 1
%88 = OpFAdd %5 %81 %86
%89 = OpFAdd %5 %82 %87
%90 = OpIMul %9 %46 %91
%93 = OpAccessChain %92 %23 %60 %90
%94 = OpLoad %19 %93
%96 = OpBitcast %95 %94
%97 = OpCompositeExtract %5 %96 1
%98 = OpCompositeExtract %5 %96 2
%99 = OpFAdd %5 %88 %97
%100 = OpFAdd %5 %89 %98
%101 = OpIMul %9 %46 %91
%103 = OpIAdd %9 %101 %102
%104 = OpAccessChain %92 %38 %60 %103
%105 = OpLoad %19 %104
%106 = OpBitcast %95 %105
%107 = OpCompositeExtract %5 %106 1
%108 = OpCompositeExtract %5 %106 2
%109 = OpFAdd %5 %99 %107
%110 = OpFAdd %5 %100 %108
%112 = OpAccessChain %111 %43 %60
OpStore %112 %109
%113 = OpAccessChain %111 %43 %102
OpStore %113 %110
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-store-signed.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform writeonly iimageBuffer _8;

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in ivec2 DATA;

void main()
{
    uint _20 = uint(DATA.x);
    imageStore(_8, int(INDEX), ivec4(uvec4(_20, uint(DATA.y), _20, _20)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %14
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %14 "DATA"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %14 Component 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeVector %5 2
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%16 = OpTypePointer Input %5
%18 = OpConstant %9 0
%22 = OpConstant %9 1
%26 = OpTypeVector %9 4
%28 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%15 = OpLoad %6 %8
%17 = OpAccessChain %16 %14 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %9 %19
%21 = OpAccessChain %16 %14 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %9 %23
%25 = OpLoad %9 %11
%27 = OpCompositeConstruct %26 %20 %24 %20 %20
%29 = OpBitcast %28 %27
OpImageWrite %15 %25 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-store.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform writeonly imageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _12;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _13;
layout(set = 0, binding = 3, r32ui) uniform writeonly uimageBuffer _14;

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in vec2 DATA;

void main()
{
    imageStore(_8, int(INDEX), vec4(DATA.x, DATA.y, DATA.x, DATA.x));
    uint _38 = INDEX * 2u;
    imageStore(_12, int(_38), uvec4(floatBitsToUint(DATA.x)));
    imageStore(_12, int(_38 + 1u), uvec4(floatBitsToUint(DATA.y)));
    uint _44 = INDEX * 2u;
    imageStore(_13, int(_44), uvec4(floatBitsToUint(DATA.x)));
    imageStore(_13, int(_44 + 1u), uvec4(floatBitsToUint(DATA.y)));
    uint _52 = (INDEX * 5u) + 3u;
    imageStore(_14, int(_52), uvec4(floatBitsToUint(DATA.x)));
    imageStore(_14, int(_52 + 1u), uvec4(floatBitsToUint(DATA.y)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "INDEX"
OpName %19 "DATA"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %12 NonReadable
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 2
OpDecorate %13 NonReadable
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %14 NonReadable
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %19 Component 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeImage %9 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpVariable %11 UniformConstant
%14 = OpVariable %11 UniformConstant
%15 = OpTypePointer Input %9
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 2
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%24 = OpTypePointer Input %5
%26 = OpConstant %9 0
%29 = OpConstant %9 1
%32 = OpTypeVector %5 4
%37 = OpConstant %9 3
%39 = OpConstant %9 2
%40 = OpTypeVector %9 4
%51 = OpConstant %9 5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%20 = OpLoad %10 %14
%21 = OpLoad %10 %13
%22 = OpLoad %10 %12
%23 = OpLoad %6 %8
%25 = OpAccessChain %24 %19 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %24 %19 %29
%30 = OpLoad %5 %28
%31 = OpLoad %9 %16
%33 = OpCompositeConstruct %32 %27 %30 %27 %27
OpImageWrite %23 %31 %33
%34 = OpBitcast %9 %27
%35 = OpBitcast %9 %30
%36 = OpShiftLeftLogical %9 %31 %37
%38 = OpIMul %9 %31 %39
%41 = OpCompositeConstruct %40 %34 %34 %34 %34
OpImageWrite %22 %38 %41
%42 = OpCompositeConstruct %40 %35 %35 %35 %35
%43 = OpIAdd %9 %38 %29
OpImageWrite %22 %43 %42
%44 = OpIMul %9 %31 %39
%45 = OpBitcast %9 %27
%46 = OpBitcast %9 %30
%47 = OpCompositeConstruct %40 %45 %45 %45 %45
OpImageWrite %21 %44 %47
%48 = OpCompositeConstruct %40 %46 %46 %46 %46
%49 = OpIAdd %9 %44 %29
OpImageWrite %21 %49 %48
%50 = OpIMul %9 %31 %51
%52 = OpIAdd %9 %50 %37
%53 = OpBitcast %9 %27
%54 = OpBitcast %9 %30
%55 = OpCompositeConstruct %40 %53 %53 %53 %53
OpImageWrite %20 %52 %55
%56 = OpCompositeConstruct %40 %54 %54 %54 %54
%57 = OpIAdd %9 %52 %29
OpImageWrite %20 %57 %56
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-store.ssbo.frag
================================================
#version 460

layout(set = 0, binding = 1, std430) writeonly buffer SSBO
{
    uvec2 _m0[];
} _14;

layout(set = 0, binding = 2, std430) writeonly buffer _16_18
{
    uvec2 _m0[];
} _18;

layout(set = 0, binding = 3, std430) writeonly buffer _20_22
{
    uint _m0[];
} _22;

layout(set = 0, binding = 0) uniform writeonly imageBuffer _8;

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in vec2 DATA;

void main()
{
    imageStore(_8, int(INDEX), vec4(DATA.x, DATA.y, DATA.x, DATA.x));
    _14._m0[INDEX] = uvec2(floatBitsToUint(DATA.x), floatBitsToUint(DATA.y));
    _18._m0[INDEX] = uvec2(floatBitsToUint(DATA.x), floatBitsToUint(DATA.y));
    uint _52 = (INDEX * 5u) + 3u;
    _22._m0[_52] = floatBitsToUint(DATA.x);
    _22._m0[_52 + 1u] = floatBitsToUint(DATA.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %24 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %24 "INDEX"
OpName %27 "DATA"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %15 ArrayStride 8
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonReadable
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 3
OpDecorate %22 NonReadable
OpDecorate %24 Flat
OpDecorate %24 Location 0
OpDecorate %27 Flat
OpDecorate %27 Location 0
OpDecorate %27 Component 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeVector %9 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %9
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypePointer Input %9
%24 = OpVariable %23 Input
%25 = OpTypeVector %5 2
%26 = OpTypePointer Input %25
%27 = OpVariable %26 Input
%29 = OpTypePointer Input %5
%31 = OpConstant %9 0
%34 = OpConstant %9 1
%37 = OpTypeVector %5 4
%42 = OpConstant %9 3
%44 = OpTypePointer StorageBuffer %10
%51 = OpConstant %9 5
%55 = OpTypePointer StorageBuffer %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%28 = OpLoad %6 %8
%30 = OpAccessChain %29 %27 %31
%32 = OpLoad %5 %30
%33 = OpAccessChain %29 %27 %34
%35 = OpLoad %5 %33
%36 = OpLoad %9 %24
%38 = OpCompositeConstruct %37 %32 %35 %32 %32
OpImageWrite %28 %36 %38
%39 = OpBitcast %9 %32
%40 = OpBitcast %9 %35
%41 = OpShiftLeftLogical %9 %36 %42
%43 = OpCompositeConstruct %10 %39 %40
%45 = OpAccessChain %44 %14 %31 %36
OpStore %45 %43
%46 = OpBitcast %9 %32
%47 = OpBitcast %9 %35
%48 = OpCompositeConstruct %10 %46 %47
%49 = OpAccessChain %44 %18 %31 %36
OpStore %49 %48
%50 = OpIMul %9 %36 %51
%52 = OpIAdd %9 %50 %42
%53 = OpBitcast %9 %32
%54 = OpBitcast %9 %35
%56 = OpAccessChain %55 %22 %31 %52
OpStore %56 %53
%58 = OpIAdd %9 %52 %34
%57 = OpAccessChain %55 %22 %31 %58
OpStore %57 %54
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/buffer-update-counter.frag
================================================
#version 460

layout(set = 0, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _8;
layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _9;
layout(set = 0, binding = 1, r32ui) uniform readonly writeonly uimageBuffer _10;
layout(set = 7, binding = 1, r32ui) uniform uimageBuffer _11;

layout(location = 0) out uint SV_Target;

void main()
{
    uint _19 = imageAtomicAdd(_9, int(0u), 1u);
    uint _22 = imageAtomicAdd(_11, int(0u), 4294967295u);
    SV_Target = (_22 - 1u) + _19;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 7
OpDecorate %9 Binding 0
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 1
OpDecorate %10 NonReadable
OpDecorate %10 NonWritable
OpDecorate %11 DescriptorSet 7
OpDecorate %11 Binding 1
OpDecorate %13 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpVariable %7 UniformConstant
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%16 = OpTypePointer Image %5
%18 = OpConstant %5 0
%20 = OpConstant %5 1
%23 = OpConstant %5 4294967295
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%14 = OpLoad %6 %10
%15 = OpLoad %6 %8
%17 = OpImageTexelPointer %16 %9 %18 %18
%19 = OpAtomicIAdd %5 %17 %20 %18 %20
%21 = OpImageTexelPointer %16 %11 %18 %18
%22 = OpAtomicIAdd %5 %21 %20 %18 %23
%24 = OpISub %5 %22 %20
%25 = OpIAdd %5 %24 %19
OpStore %13 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/calculate-lod.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform texture1D _8;
layout(set = 0, binding = 1) uniform texture1DArray _11;
layout(set = 0, binding = 2) uniform texture2D _14;
layout(set = 0, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 5) uniform textureCube _23;
layout(set = 0, binding = 6) uniform textureCubeArray _26;
layout(set = 0, binding = 0) uniform sampler _29;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    SV_Target.x = (((((textureQueryLod(sampler1DArray(_11, _29), TEXCOORD.x).x + textureQueryLod(sampler1D(_8, _29), TEXCOORD.x).x) + textureQueryLod(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y)).x) + textureQueryLod(sampler2DArray(_17, _29), vec2(TEXCOORD.x, TEXCOORD.y)).x) + textureQueryLod(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x) + textureQueryLod(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x) + textureQueryLod(samplerCubeArray(_26, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x;
    SV_Target.y = (((((textureQueryLod(sampler1DArray(_11, _29), TEXCOORD.x).y + textureQueryLod(sampler1D(_8, _29), TEXCOORD.x).y) + textureQueryLod(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y)).y) + textureQueryLod(sampler2DArray(_17, _29), vec2(TEXCOORD.x, TEXCOORD.y)).y) + textureQueryLod(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).y) + textureQueryLod(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).y) + textureQueryLod(samplerCubeArray(_26, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 124
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeSampler
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeVector %5 3
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%44 = OpTypePointer Input %5
%46 = OpTypeInt 32 0
%47 = OpConstant %46 0
%50 = OpConstant %46 1
%53 = OpConstant %46 2
%55 = OpTypeSampledImage %6
%59 = OpTypeSampledImage %9
%64 = OpTypeSampledImage %12
%70 = OpTypeSampledImage %15
%76 = OpTypeSampledImage %18
%82 = OpTypeSampledImage %21
%88 = OpTypeSampledImage %24
%119 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %122
%122 = OpLabel
%36 = OpLoad %24 %26
%37 = OpLoad %21 %23
%38 = OpLoad %18 %20
%39 = OpLoad %15 %17
%40 = OpLoad %12 %14
%41 = OpLoad %9 %11
%42 = OpLoad %6 %8
%43 = OpLoad %27 %29
%45 = OpAccessChain %44 %32 %47
%48 = OpLoad %5 %45
%49 = OpAccessChain %44 %32 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %44 %32 %53
%54 = OpLoad %5 %52
%56 = OpSampledImage %55 %42 %43
%57 = OpImageQueryLod %33 %56 %48
%58 = OpCompositeExtract %5 %57 0
%60 = OpSampledImage %59 %41 %43
%61 = OpImageQueryLod %33 %60 %48
%62 = OpCompositeExtract %5 %61 0
%63 = OpFAdd %5 %62 %58
%65 = OpSampledImage %64 %40 %43
%67 = OpCompositeConstruct %33 %48 %51
%66 = OpImageQueryLod %33 %65 %67
%68 = OpCompositeExtract %5 %66 0
%69 = OpFAdd %5 %63 %68
%71 = OpSampledImage %70 %39 %43
%73 = OpCompositeConstruct %33 %48 %51
%72 = OpImageQueryLod %33 %71 %73
%74 = OpCompositeExtract %5 %72 0
%75 = OpFAdd %5 %69 %74
%77 = OpSampledImage %76 %38 %43
%79 = OpCompositeConstruct %30 %48 %51 %54
%78 = OpImageQueryLod %33 %77 %79
%80 = OpCompositeExtract %5 %78 0
%81 = OpFAdd %5 %75 %80
%83 = OpSampledImage %82 %37 %43
%85 = OpCompositeConstruct %30 %48 %51 %54
%84 = OpImageQueryLod %33 %83 %85
%86 = OpCompositeExtract %5 %84 0
%87 = OpFAdd %5 %81 %86
%89 = OpSampledImage %88 %36 %43
%91 = OpCompositeConstruct %30 %48 %51 %54
%90 = OpImageQueryLod %33 %89 %91
%92 = OpCompositeExtract %5 %90 0
%93 = OpFAdd %5 %87 %92
%94 = OpImageQueryLod %33 %56 %48
%95 = OpCompositeExtract %5 %94 1
%96 = OpImageQueryLod %33 %60 %48
%97 = OpCompositeExtract %5 %96 1
%98 = OpFAdd %5 %97 %95
%100 = OpCompositeConstruct %33 %48 %51
%99 = OpImageQueryLod %33 %65 %100
%101 = OpCompositeExtract %5 %99 1
%102 = OpFAdd %5 %98 %101
%104 = OpCompositeConstruct %33 %48 %51
%103 = OpImageQueryLod %33 %71 %104
%105 = OpCompositeExtract %5 %103 1
%106 = OpFAdd %5 %102 %105
%108 = OpCompositeConstruct %30 %48 %51 %54
%107 = OpImageQueryLod %33 %77 %108
%109 = OpCompositeExtract %5 %107 1
%110 = OpFAdd %5 %106 %109
%112 = OpCompositeConstruct %30 %48 %51 %54
%111 = OpImageQueryLod %33 %83 %112
%113 = OpCompositeExtract %5 %111 1
%114 = OpFAdd %5 %110 %113
%116 = OpCompositeConstruct %30 %48 %51 %54
%115 = OpImageQueryLod %33 %89 %116
%117 = OpCompositeExtract %5 %115 1
%118 = OpFAdd %5 %114 %117
%120 = OpAccessChain %119 %35 %47
OpStore %120 %93
%121 = OpAccessChain %119 %35 %50
OpStore %121 %118
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/call-shader.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _10
{
    vec4 _m0;
};

layout(set = 0, binding = 0) uniform writeonly image2D IMG;
layout(location = 0) callableDataEXT _10 _12;
layout(location = 1) callableDataEXT _10 _13;
layout(location = 2) callableDataEXT _10 _14;
layout(location = 3) callableDataEXT _10 _15;

void main()
{
    executeCallableEXT(0u, 3);
    executeCallableEXT(1u, 2);
    executeCallableEXT(2u, 1);
    executeCallableEXT(3u, 0);
    imageStore(IMG, ivec2(uvec2(gl_LaunchIDEXT.x, gl_LaunchIDEXT.y)), vec4(((_14._m0.x + _15._m0.x) + _13._m0.x) + _12._m0.x, ((_14._m0.y + _15._m0.y) + _13._m0.y) + _12._m0.y, ((_14._m0.z + _15._m0.z) + _13._m0.z) + _12._m0.z, ((_14._m0.w + _15._m0.w) + _13._m0.w) + _12._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %13 %14 %15 %60
OpName %3 "main"
OpName %8 "IMG"
OpName %10 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %60 BuiltIn LaunchIdKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypeStruct %9
%11 = OpTypePointer CallableDataKHR %10
%12 = OpVariable %11 CallableDataKHR
%13 = OpVariable %11 CallableDataKHR
%14 = OpVariable %11 CallableDataKHR
%15 = OpVariable %11 CallableDataKHR
%16 = OpTypeInt 32 0
%17 = OpConstant %16 0
%18 = OpTypePointer CallableDataKHR %9
%25 = OpConstant %16 1
%32 = OpConstant %16 2
%39 = OpConstant %16 3
%58 = OpTypeVector %16 3
%59 = OpTypePointer Input %58
%60 = OpVariable %59 Input
%61 = OpTypePointer Input %16
%67 = OpTypeVector %16 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
OpExecuteCallableKHR %17 %15
%19 = OpInBoundsAccessChain %18 %15 %17
%20 = OpLoad %9 %19
%21 = OpCompositeExtract %5 %20 0
%22 = OpCompositeExtract %5 %20 1
%23 = OpCompositeExtract %5 %20 2
%24 = OpCompositeExtract %5 %20 3
OpExecuteCallableKHR %25 %14
%26 = OpInBoundsAccessChain %18 %14 %17
%27 = OpLoad %9 %26
%28 = OpCompositeExtract %5 %27 0
%29 = OpCompositeExtract %5 %27 1
%30 = OpCompositeExtract %5 %27 2
%31 = OpCompositeExtract %5 %27 3
OpExecuteCallableKHR %32 %13
%33 = OpInBoundsAccessChain %18 %13 %17
%34 = OpLoad %9 %33
%35 = OpCompositeExtract %5 %34 0
%36 = OpCompositeExtract %5 %34 1
%37 = OpCompositeExtract %5 %34 2
%38 = OpCompositeExtract %5 %34 3
OpExecuteCallableKHR %39 %12
%40 = OpInBoundsAccessChain %18 %12 %17
%41 = OpLoad %9 %40
%42 = OpCompositeExtract %5 %41 0
%43 = OpCompositeExtract %5 %41 1
%44 = OpCompositeExtract %5 %41 2
%45 = OpCompositeExtract %5 %41 3
%46 = OpFAdd %5 %28 %21
%47 = OpFAdd %5 %29 %22
%48 = OpFAdd %5 %30 %23
%49 = OpFAdd %5 %31 %24
%50 = OpFAdd %5 %46 %35
%51 = OpFAdd %5 %47 %36
%52 = OpFAdd %5 %48 %37
%53 = OpFAdd %5 %49 %38
%54 = OpFAdd %5 %50 %42
%55 = OpFAdd %5 %51 %43
%56 = OpFAdd %5 %52 %44
%57 = OpFAdd %5 %53 %45
%62 = OpAccessChain %61 %60 %17
%63 = OpLoad %16 %62
%64 = OpAccessChain %61 %60 %25
%65 = OpLoad %16 %64
%66 = OpLoad %6 %8
%68 = OpCompositeConstruct %67 %63 %65
%69 = OpCompositeConstruct %9 %54 %55 %56 %57
OpImageWrite %66 %68 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/clip.demote-to-helper.frag
================================================
#version 460
#extension GL_EXT_demote_to_helper_invocation : require

layout(location = 0) in vec2 TEXCOORD;

void demote_cond(bool _27)
{
    if (_27)
    {
        demote;
    }
}

void main()
{
    demote_cond((TEXCOORD.x + (-10.0)) < 0.0);
    demote_cond((TEXCOORD.y + (-20.0)) < 0.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability DemoteToHelperInvocation
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %28 "demote_cond"
OpDecorate %8 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%11 = OpTypeInt 32 0
%12 = OpConstant %11 0
%15 = OpConstant %11 1
%18 = OpConstant %5 -10
%19 = OpTypeBool
%21 = OpConstant %5 0
%23 = OpConstant %5 -20
%26 = OpTypeFunction %1 %19
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%10 = OpAccessChain %9 %8 %12
%13 = OpLoad %5 %10
%14 = OpAccessChain %9 %8 %15
%16 = OpLoad %5 %14
%17 = OpFAdd %5 %13 %18
%20 = OpFOrdLessThan %19 %17 %21
%33 = OpFunctionCall %1 %28 %20
%22 = OpFAdd %5 %16 %23
%24 = OpFOrdLessThan %19 %22 %21
%34 = OpFunctionCall %1 %28 %24
OpReturn
OpFunctionEnd
%28 = OpFunction %1 None %26
%27 = OpFunctionParameter %19
%29 = OpLabel
OpSelectionMerge %31 None
OpBranchConditional %27 %30 %31
%30 = OpLabel
OpDemoteToHelperInvocation
OpBranch %31
%31 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/clip.frag
================================================
#version 460

layout(location = 0) in vec2 TEXCOORD;
bool discard_state;

void discard_cond(bool _30)
{
    if (_30)
    {
        discard_state = true;
    }
}

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    discard_cond((TEXCOORD.x + (-10.0)) < 0.0);
    discard_cond((TEXCOORD.y + (-20.0)) < 0.0);
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %23 "discard_state"
OpName %31 "discard_cond"
OpName %39 "discard_exit"
OpDecorate %8 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%11 = OpTypeInt 32 0
%12 = OpConstant %11 0
%15 = OpConstant %11 1
%18 = OpConstant %5 -10
%19 = OpTypeBool
%21 = OpConstant %5 0
%22 = OpTypePointer Private %19
%23 = OpVariable %22 Private
%24 = OpConstantFalse %19
%26 = OpConstant %5 -20
%29 = OpTypeFunction %1 %19
%35 = OpConstantTrue %19
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %23 %24
OpBranch %28
%28 = OpLabel
%10 = OpAccessChain %9 %8 %12
%13 = OpLoad %5 %10
%14 = OpAccessChain %9 %8 %15
%16 = OpLoad %5 %14
%17 = OpFAdd %5 %13 %18
%20 = OpFOrdLessThan %19 %17 %21
%37 = OpFunctionCall %1 %31 %20
%25 = OpFAdd %5 %16 %26
%27 = OpFOrdLessThan %19 %25 %21
%38 = OpFunctionCall %1 %31 %27
%45 = OpFunctionCall %1 %39
OpReturn
OpFunctionEnd
%31 = OpFunction %1 None %29
%30 = OpFunctionParameter %19
%32 = OpLabel
OpSelectionMerge %34 None
OpBranchConditional %30 %33 %34
%33 = OpLabel
OpStore %23 %35
OpBranch %34
%34 = OpLabel
OpReturn
OpFunctionEnd
%39 = OpFunction %1 None %2
%40 = OpLabel
%43 = OpLoad %19 %23
OpSelectionMerge %42 None
OpBranchConditional %43 %41 %42
%41 = OpLabel
OpKill
%42 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/compute-shader-derivatives-cube-array.noderivs.sm66.ssbo.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _13;

layout(set = 0, binding = 0) uniform textureCubeArray _8;
layout(set = 0, binding = 0) uniform sampler _16;

void main()
{
    float _24 = float(gl_GlobalInvocationID.x);
    vec3 coord = vec3(_24 * 0.100000001490116119384765625, _24 * 0.20000000298023223876953125, _24 * 0.300000011920928955078125);
    vec3 d_coord_dx = subgroupQuadSwapHorizontal(coord) - coord;
    vec3 d_coord_dy = subgroupQuadSwapVertical(coord) - coord;
    vec3 abs_uvw = abs(coord);
    float _47 = abs_uvw.y;
    float _49 = abs_uvw.z;
    float max_component = max(_49, max(_47, abs_uvw.x));
    bool z_major = _49 >= max_component;
    bool y_major = _47 >= max_component;
    vec3 d_str_dx = mix(mix(d_coord_dx.zyx, d_coord_dx.xzy, bvec3(y_major)), d_coord_dx, bvec3(z_major));
    vec3 d_str_dy = mix(mix(d_coord_dy.zyx, d_coord_dy.xzy, bvec3(y_major)), d_coord_dy, bvec3(z_major));
    vec4 cube_gradients = ((vec4(d_str_dx.x, d_str_dy.x, d_str_dx.y, d_str_dy.y) * max_component) - (mix(mix(coord.zyx, coord.xzy, bvec3(y_major)), coord, bvec3(z_major)).xxyy * vec4(d_str_dx.z, d_str_dy.z, d_str_dx.z, d_str_dy.z))) * (0.5 / (max_component * max_component));
    vec2 _90 = vec2(textureSize(_8, 0).xy);
    vec2 _91 = cube_gradients.xz * _90;
    vec2 _92 = cube_gradients.yw * _90;
    _13._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_13._m0[gl_GlobalInvocationID.x]) + max(min(log2(max(dot(_91, _91), dot(_92, _92))) * 0.5, float(textureQueryLevels(_8)) - (-1.0)), 0.0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 114
; Schema: 0
OpCapability Shader
OpCapability SampledCubeArray
OpCapability ImageQuery
OpCapability GroupNonUniformQuad
%44 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %19
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %11 "SSBO"
OpName %34 "coord"
OpName %40 "d_coord_dx"
OpName %41 "d_coord_dy"
OpName %45 "abs_uvw"
OpName %50 "max_component"
OpName %52 "z_major"
OpName %53 "y_major"
OpName %63 "d_str_dx"
OpName %69 "d_str_dy"
OpName %75 "str"
OpName %82 "cube_gradients"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeSampler
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeVector %9 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %9
%22 = OpConstant %9 0
%26 = OpConstant %5 0.100000001
%28 = OpConstant %5 0.200000003
%30 = OpConstant %5 0.300000012
%33 = OpTypeVector %5 3
%35 = OpTypeInt 32 1
%37 = OpConstant %9 3
%39 = OpConstant %9 1
%42 = OpTypeVector %5 2
%43 = OpTypeVector %5 4
%51 = OpTypeBool
%56 = OpConstant %5 0.5
%59 = OpTypeVector %51 3
%85 = OpTypeVector %35 3
%87 = OpConstant %35 0
%88 = OpTypeVector %35 2
%101 = OpConstant %5 -1
%104 = OpConstant %5 0
%105 = OpTypePointer StorageBuffer %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %112
%112 = OpLabel
%21 = OpAccessChain %20 %19 %22
%23 = OpLoad %9 %21
%24 = OpConvertUToF %5 %23
%25 = OpFMul %5 %24 %26
%27 = OpFMul %5 %24 %28
%29 = OpFMul %5 %24 %30
%31 = OpLoad %6 %8
%32 = OpLoad %14 %16
%34 = OpCompositeConstruct %33 %25 %27 %29
%36 = OpGroupNonUniformQuadSwap %33 %37 %34 %22
%38 = OpGroupNonUniformQuadSwap %33 %37 %34 %39
%40 = OpFSub %33 %36 %34
%41 = OpFSub %33 %38 %34
%45 = OpExtInst %33 %44 FAbs %34
%46 = OpCompositeExtract %5 %45 0
%47 = OpCompositeExtract %5 %45 1
%48 = OpExtInst %5 %44 FMax %47 %46
%49 = OpCompositeExtract %5 %45 2
%50 = OpExtInst %5 %44 FMax %49 %48
%52 = OpFOrdGreaterThanEqual %51 %49 %50
%53 = OpFOrdGreaterThanEqual %51 %47 %50
%54 = OpFMul %5 %50 %50
%55 = OpFDiv %5 %56 %54
%57 = OpVectorShuffle %33 %40 %40 0 2 1
%58 = OpVectorShuffle %33 %40 %40 2 1 0
%60 = OpCompositeConstruct %59 %52 %52 %52
%61 = OpCompositeConstruct %59 %53 %53 %53
%62 = OpSelect %33 %61 %57 %58
%63 = OpSelect %33 %60 %40 %62
%64 = OpVectorShuffle %33 %41 %41 0 2 1
%65 = OpVectorShuffle %33 %41 %41 2 1 0
%66 = OpCompositeConstruct %59 %52 %52 %52
%67 = OpCompositeConstruct %59 %53 %53 %53
%68 = OpSelect %33 %67 %64 %65
%69 = OpSelect %33 %66 %41 %68
%70 = OpVectorShuffle %33 %34 %34 0 2 1
%71 = OpVectorShuffle %33 %34 %34 2 1 0
%72 = OpCompositeConstruct %59 %52 %52 %52
%73 = OpCompositeConstruct %59 %53 %53 %53
%74 = OpSelect %33 %73 %70 %71
%75 = OpSelect %33 %72 %34 %74
%76 = OpVectorShuffle %43 %63 %69 0 3 1 4
%77 = OpVectorShuffle %43 %75 %75 0 0 1 1
%78 = OpVectorShuffle %43 %63 %69 2 5 2 5
%79 = OpVectorTimesScalar %43 %76 %50
%80 = OpFMul %43 %77 %78
%81 = OpFSub %43 %79 %80
%82 = OpVectorTimesScalar %43 %81 %55
%83 = OpVectorShuffle %42 %82 %82 0 2
%84 = OpVectorShuffle %42 %82 %82 1 3
%86 = OpImageQuerySizeLod %85 %31 %87
%89 = OpVectorShuffle %88 %86 %86 0 1
%90 = OpConvertSToF %42 %89
%91 = OpFMul %42 %83 %90
%92 = OpFMul %42 %84 %90
%93 = OpDot %5 %91 %91
%94 = OpDot %5 %92 %92
%95 = OpExtInst %5 %44 FMax %93 %94
%96 = OpExtInst %5 %44 Log2 %95
%97 = OpFMul %5 %96 %56
%98 = OpImageQueryLevels %35 %31
%99 = OpConvertSToF %5 %98
%100 = OpFSub %5 %99 %101
%102 = OpExtInst %5 %44 FMin %97 %100
%103 = OpExtInst %5 %44 FMax %102 %104
%106 = OpAccessChain %105 %13 %22 %23
%107 = OpLoad %9 %106
%108 = OpBitcast %5 %107
%109 = OpFAdd %5 %108 %103
%110 = OpBitcast %9 %109
%111 = OpAccessChain %105 %13 %22 %23
OpStore %111 %110
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/compute-shader-derivatives-cube.noderivs.sm66.ssbo.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _13;

layout(set = 0, binding = 0) uniform textureCube _8;
layout(set = 0, binding = 0) uniform sampler _16;

void main()
{
    float _24 = float(gl_GlobalInvocationID.x);
    vec3 coord = vec3(_24 * 0.100000001490116119384765625, _24 * 0.20000000298023223876953125, _24 * 0.300000011920928955078125);
    vec3 d_coord_dx = subgroupQuadSwapHorizontal(coord) - coord;
    vec3 d_coord_dy = subgroupQuadSwapVertical(coord) - coord;
    vec3 abs_uvw = abs(coord);
    float _47 = abs_uvw.y;
    float _49 = abs_uvw.z;
    float max_component = max(_49, max(_47, abs_uvw.x));
    bool z_major = _49 >= max_component;
    bool y_major = _47 >= max_component;
    vec3 d_str_dx = mix(mix(d_coord_dx.zyx, d_coord_dx.xzy, bvec3(y_major)), d_coord_dx, bvec3(z_major));
    vec3 d_str_dy = mix(mix(d_coord_dy.zyx, d_coord_dy.xzy, bvec3(y_major)), d_coord_dy, bvec3(z_major));
    vec4 cube_gradients = ((vec4(d_str_dx.x, d_str_dy.x, d_str_dx.y, d_str_dy.y) * max_component) - (mix(mix(coord.zyx, coord.xzy, bvec3(y_major)), coord, bvec3(z_major)).xxyy * vec4(d_str_dx.z, d_str_dy.z, d_str_dx.z, d_str_dy.z))) * (0.5 / (max_component * max_component));
    vec2 _88 = vec2(textureSize(_8, 0));
    vec2 _89 = cube_gradients.xz * _88;
    vec2 _90 = cube_gradients.yw * _88;
    _13._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_13._m0[gl_GlobalInvocationID.x]) + max(min(log2(max(dot(_89, _89), dot(_90, _90))) * 0.5, float(textureQueryLevels(_8)) - (-1.0)), 0.0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 112
; Schema: 0
OpCapability Shader
OpCapability ImageQuery
OpCapability GroupNonUniformQuad
%44 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %19
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %11 "SSBO"
OpName %34 "coord"
OpName %40 "d_coord_dx"
OpName %41 "d_coord_dy"
OpName %45 "abs_uvw"
OpName %50 "max_component"
OpName %52 "z_major"
OpName %53 "y_major"
OpName %63 "d_str_dx"
OpName %69 "d_str_dy"
OpName %75 "str"
OpName %82 "cube_gradients"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeSampler
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeVector %9 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %9
%22 = OpConstant %9 0
%26 = OpConstant %5 0.100000001
%28 = OpConstant %5 0.200000003
%30 = OpConstant %5 0.300000012
%33 = OpTypeVector %5 3
%35 = OpTypeInt 32 1
%37 = OpConstant %9 3
%39 = OpConstant %9 1
%42 = OpTypeVector %5 2
%43 = OpTypeVector %5 4
%51 = OpTypeBool
%56 = OpConstant %5 0.5
%59 = OpTypeVector %51 3
%85 = OpTypeVector %35 2
%87 = OpConstant %35 0
%99 = OpConstant %5 -1
%102 = OpConstant %5 0
%103 = OpTypePointer StorageBuffer %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %110
%110 = OpLabel
%21 = OpAccessChain %20 %19 %22
%23 = OpLoad %9 %21
%24 = OpConvertUToF %5 %23
%25 = OpFMul %5 %24 %26
%27 = OpFMul %5 %24 %28
%29 = OpFMul %5 %24 %30
%31 = OpLoad %6 %8
%32 = OpLoad %14 %16
%34 = OpCompositeConstruct %33 %25 %27 %29
%36 = OpGroupNonUniformQuadSwap %33 %37 %34 %22
%38 = OpGroupNonUniformQuadSwap %33 %37 %34 %39
%40 = OpFSub %33 %36 %34
%41 = OpFSub %33 %38 %34
%45 = OpExtInst %33 %44 FAbs %34
%46 = OpCompositeExtract %5 %45 0
%47 = OpCompositeExtract %5 %45 1
%48 = OpExtInst %5 %44 FMax %47 %46
%49 = OpCompositeExtract %5 %45 2
%50 = OpExtInst %5 %44 FMax %49 %48
%52 = OpFOrdGreaterThanEqual %51 %49 %50
%53 = OpFOrdGreaterThanEqual %51 %47 %50
%54 = OpFMul %5 %50 %50
%55 = OpFDiv %5 %56 %54
%57 = OpVectorShuffle %33 %40 %40 0 2 1
%58 = OpVectorShuffle %33 %40 %40 2 1 0
%60 = OpCompositeConstruct %59 %52 %52 %52
%61 = OpCompositeConstruct %59 %53 %53 %53
%62 = OpSelect %33 %61 %57 %58
%63 = OpSelect %33 %60 %40 %62
%64 = OpVectorShuffle %33 %41 %41 0 2 1
%65 = OpVectorShuffle %33 %41 %41 2 1 0
%66 = OpCompositeConstruct %59 %52 %52 %52
%67 = OpCompositeConstruct %59 %53 %53 %53
%68 = OpSelect %33 %67 %64 %65
%69 = OpSelect %33 %66 %41 %68
%70 = OpVectorShuffle %33 %34 %34 0 2 1
%71 = OpVectorShuffle %33 %34 %34 2 1 0
%72 = OpCompositeConstruct %59 %52 %52 %52
%73 = OpCompositeConstruct %59 %53 %53 %53
%74 = OpSelect %33 %73 %70 %71
%75 = OpSelect %33 %72 %34 %74
%76 = OpVectorShuffle %43 %63 %69 0 3 1 4
%77 = OpVectorShuffle %43 %75 %75 0 0 1 1
%78 = OpVectorShuffle %43 %63 %69 2 5 2 5
%79 = OpVectorTimesScalar %43 %76 %50
%80 = OpFMul %43 %77 %78
%81 = OpFSub %43 %79 %80
%82 = OpVectorTimesScalar %43 %81 %55
%83 = OpVectorShuffle %42 %82 %82 0 2
%84 = OpVectorShuffle %42 %82 %82 1 3
%86 = OpImageQuerySizeLod %85 %31 %87
%88 = OpConvertSToF %42 %86
%89 = OpFMul %42 %83 %88
%90 = OpFMul %42 %84 %88
%91 = OpDot %5 %89 %89
%92 = OpDot %5 %90 %90
%93 = OpExtInst %5 %44 FMax %91 %92
%94 = OpExtInst %5 %44 Log2 %93
%95 = OpFMul %5 %94 %56
%96 = OpImageQueryLevels %35 %31
%97 = OpConvertSToF %5 %96
%98 = OpFSub %5 %97 %99
%100 = OpExtInst %5 %44 FMin %95 %98
%101 = OpExtInst %5 %44 FMax %100 %102
%104 = OpAccessChain %103 %13 %22 %23
%105 = OpLoad %9 %104
%106 = OpBitcast %5 %105
%107 = OpFAdd %5 %106 %101
%108 = OpBitcast %9 %107
%109 = OpAccessChain %103 %13 %22 %23
OpStore %109 %108
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/compute-shader-derivatives-single-thread.sm66.ssbo.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uint _m0[];
} _25;

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 1) uniform texture2DArray _11;
layout(set = 0, binding = 2) uniform textureCube _14;
layout(set = 0, binding = 3) uniform textureCubeArray _17;
layout(set = 0, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _28;
layout(set = 0, binding = 1) uniform samplerShadow _29;

void main()
{
    float _37 = float(gl_GlobalInvocationID.x);
    float _38 = _37 * 0.100000001490116119384765625;
    float _40 = _37 * 0.20000000298023223876953125;
    float _42 = _37 * 0.300000011920928955078125;
    float _44 = _37 * 0.4000000059604644775390625;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(sampler2D(_8, _28), vec2(_38, _40), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(sampler2DArray(_11, _28), vec3(_38, _40, _42), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(samplerCube(_14, _28), vec3(_38, _40, _42), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(samplerCubeArray(_17, _28), vec4(_38, _40, _42, _44), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(sampler3D(_20, _28), vec3(_38, _40, _42), 0.0).x);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(sampler2D(_8, _28), vec2(_38, _40), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(sampler2DArray(_11, _28), vec3(_38, _40, _42), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(samplerCube(_14, _28), vec3(_38, _40, _42), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(samplerCubeArray(_17, _28), vec4(_38, _40, _42, _44), 0.0).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureLod(sampler3D(_20, _28), vec3(_38, _40, _42), 0.0).x);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureLod(sampler2DShadow(_8, _29), vec3(vec2(_38, _40), _44), 0.0)).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(sampler2DArrayShadow(_11, _29), vec4(vec3(_38, _40, _42), _44), vec2(0.0), vec2(0.0))).x);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(samplerCubeShadow(_14, _29), vec4(vec3(_38, _40, _42), _44), vec3(0.0), vec3(0.0))).x);
    vec4 _229 = vec4(_38, _40, _42, _44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(samplerCubeArrayShadow(_17, _29), _229, _44, vec3(0.0), vec3(0.0))).x);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + 0.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 324
; Schema: 0
OpCapability Shader
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %32
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %23 "SSBO"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %22 ArrayStride 4
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 Coherent
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 1
OpDecorate %32 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeInt 32 0
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeSampler
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpVariable %27 UniformConstant
%30 = OpTypeVector %21 3
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypePointer Input %21
%35 = OpConstant %21 0
%39 = OpConstant %5 0.100000001
%41 = OpConstant %5 0.200000003
%43 = OpConstant %5 0.300000012
%45 = OpConstant %5 0.400000006
%48 = OpTypeSampledImage %6
%50 = OpConstant %5 0
%51 = OpTypeVector %5 4
%53 = OpTypeVector %5 2
%56 = OpTypePointer StorageBuffer %21
%65 = OpTypeSampledImage %9
%68 = OpTypeVector %5 3
%79 = OpTypeSampledImage %12
%92 = OpTypeSampledImage %15
%105 = OpTypeSampledImage %18
%116 = OpConstant %21 2
%117 = OpConstant %21 2120
%180 = OpTypeImage %5 2D 1 0 0 1 Unknown
%181 = OpTypeSampledImage %180
%195 = OpTypeImage %5 2D 1 1 0 1 Unknown
%196 = OpTypeSampledImage %195
%210 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%211 = OpTypeSampledImage %210
%225 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%226 = OpTypeSampledImage %225
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %322
%322 = OpLabel
%34 = OpAccessChain %33 %32 %35
%36 = OpLoad %21 %34
%37 = OpConvertUToF %5 %36
%38 = OpFMul %5 %37 %39
%40 = OpFMul %5 %37 %41
%42 = OpFMul %5 %37 %43
%44 = OpFMul %5 %37 %45
%46 = OpLoad %6 %8
%47 = OpLoad %26 %28
%49 = OpSampledImage %48 %46 %47
%54 = OpCompositeConstruct %53 %38 %40
%52 = OpImageSampleExplicitLod %51 %49 %54 Lod %50
%55 = OpCompositeExtract %5 %52 0
%57 = OpAccessChain %56 %25 %35 %36
%58 = OpLoad %21 %57
%59 = OpBitcast %5 %58
%60 = OpFAdd %5 %59 %55
%61 = OpBitcast %21 %60
%62 = OpAccessChain %56 %25 %35 %36
OpStore %62 %61
%63 = OpLoad %9 %11
%64 = OpLoad %26 %28
%66 = OpSampledImage %65 %63 %64
%69 = OpCompositeConstruct %68 %38 %40 %42
%67 = OpImageSampleExplicitLod %51 %66 %69 Lod %50
%70 = OpCompositeExtract %5 %67 0
%71 = OpAccessChain %56 %25 %35 %36
%72 = OpLoad %21 %71
%73 = OpBitcast %5 %72
%74 = OpFAdd %5 %73 %70
%75 = OpBitcast %21 %74
%76 = OpAccessChain %56 %25 %35 %36
OpStore %76 %75
%77 = OpLoad %12 %14
%78 = OpLoad %26 %28
%80 = OpSampledImage %79 %77 %78
%82 = OpCompositeConstruct %68 %38 %40 %42
%81 = OpImageSampleExplicitLod %51 %80 %82 Lod %50
%83 = OpCompositeExtract %5 %81 0
%84 = OpAccessChain %56 %25 %35 %36
%85 = OpLoad %21 %84
%86 = OpBitcast %5 %85
%87 = OpFAdd %5 %86 %83
%88 = OpBitcast %21 %87
%89 = OpAccessChain %56 %25 %35 %36
OpStore %89 %88
%90 = OpLoad %15 %17
%91 = OpLoad %26 %28
%93 = OpSampledImage %92 %90 %91
%95 = OpCompositeConstruct %51 %38 %40 %42 %44
%94 = OpImageSampleExplicitLod %51 %93 %95 Lod %50
%96 = OpCompositeExtract %5 %94 0
%97 = OpAccessChain %56 %25 %35 %36
%98 = OpLoad %21 %97
%99 = OpBitcast %5 %98
%100 = OpFAdd %5 %99 %96
%101 = OpBitcast %21 %100
%102 = OpAccessChain %56 %25 %35 %36
OpStore %102 %101
%103 = OpLoad %18 %20
%104 = OpLoad %26 %28
%106 = OpSampledImage %105 %103 %104
%108 = OpCompositeConstruct %68 %38 %40 %42
%107 = OpImageSampleExplicitLod %51 %106 %108 Lod %50
%109 = OpCompositeExtract %5 %107 0
%110 = OpAccessChain %56 %25 %35 %36
%111 = OpLoad %21 %110
%112 = OpBitcast %5 %111
%113 = OpFAdd %5 %112 %109
%114 = OpBitcast %21 %113
%115 = OpAccessChain %56 %25 %35 %36
OpStore %115 %114
OpControlBarrier %116 %116 %117
%118 = OpLoad %6 %8
%119 = OpLoad %26 %28
%120 = OpSampledImage %48 %118 %119
%122 = OpCompositeConstruct %53 %38 %40
%121 = OpImageSampleExplicitLod %51 %120 %122 Lod %50
%123 = OpCompositeExtract %5 %121 0
%124 = OpAccessChain %56 %25 %35 %36
%125 = OpLoad %21 %124
%126 = OpBitcast %5 %125
%127 = OpFAdd %5 %126 %123
%128 = OpBitcast %21 %127
%129 = OpAccessChain %56 %25 %35 %36
OpStore %129 %128
%130 = OpLoad %9 %11
%131 = OpLoad %26 %28
%132 = OpSampledImage %65 %130 %131
%134 = OpCompositeConstruct %68 %38 %40 %42
%133 = OpImageSampleExplicitLod %51 %132 %134 Lod %50
%135 = OpCompositeExtract %5 %133 0
%136 = OpAccessChain %56 %25 %35 %36
%137 = OpLoad %21 %136
%138 = OpBitcast %5 %137
%139 = OpFAdd %5 %138 %135
%140 = OpBitcast %21 %139
%141 = OpAccessChain %56 %25 %35 %36
OpStore %141 %140
%142 = OpLoad %12 %14
%143 = OpLoad %26 %28
%144 = OpSampledImage %79 %142 %143
%146 = OpCompositeConstruct %68 %38 %40 %42
%145 = OpImageSampleExplicitLod %51 %144 %146 Lod %50
%147 = OpCompositeExtract %5 %145 0
%148 = OpAccessChain %56 %25 %35 %36
%149 = OpLoad %21 %148
%150 = OpBitcast %5 %149
%151 = OpFAdd %5 %150 %147
%152 = OpBitcast %21 %151
%153 = OpAccessChain %56 %25 %35 %36
OpStore %153 %152
%154 = OpLoad %15 %17
%155 = OpLoad %26 %28
%156 = OpSampledImage %92 %154 %155
%158 = OpCompositeConstruct %51 %38 %40 %42 %44
%157 = OpImageSampleExplicitLod %51 %156 %158 Lod %50
%159 = OpCompositeExtract %5 %157 0
%160 = OpAccessChain %56 %25 %35 %36
%161 = OpLoad %21 %160
%162 = OpBitcast %5 %161
%163 = OpFAdd %5 %162 %159
%164 = OpBitcast %21 %163
%165 = OpAccessChain %56 %25 %35 %36
OpStore %165 %164
%166 = OpLoad %18 %20
%167 = OpLoad %26 %28
%168 = OpSampledImage %105 %166 %167
%170 = OpCompositeConstruct %68 %38 %40 %42
%169 = OpImageSampleExplicitLod %51 %168 %170 Lod %50
%171 = OpCompositeExtract %5 %169 0
%172 = OpAccessChain %56 %25 %35 %36
%173 = OpLoad %21 %172
%174 = OpBitcast %5 %173
%175 = OpFAdd %5 %174 %171
%176 = OpBitcast %21 %175
%177 = OpAccessChain %56 %25 %35 %36
OpStore %177 %176
OpControlBarrier %116 %116 %117
%178 = OpLoad %6 %8
%179 = OpLoad %26 %29
%182 = OpSampledImage %181 %178 %179
%184 = OpCompositeConstruct %53 %38 %40
%183 = OpImageSampleDrefExplicitLod %5 %182 %184 %44 Lod %50
%185 = OpCompositeConstruct %51 %183 %183 %183 %183
%186 = OpCompositeExtract %5 %185 0
%187 = OpAccessChain %56 %25 %35 %36
%188 = OpLoad %21 %187
%189 = OpBitcast %5 %188
%190 = OpFAdd %5 %189 %186
%191 = OpBitcast %21 %190
%192 = OpAccessChain %56 %25 %35 %36
OpStore %192 %191
%193 = OpLoad %9 %11
%194 = OpLoad %26 %29
%197 = OpSampledImage %196 %193 %194
%199 = OpCompositeConstruct %68 %38 %40 %42
%198 = OpImageSampleDrefExplicitLod %5 %197 %199 %44 Lod %50
%200 = OpCompositeConstruct %51 %198 %198 %198 %198
%201 = OpCompositeExtract %5 %200 0
%202 = OpAccessChain %56 %25 %35 %36
%203 = OpLoad %21 %202
%204 = OpBitcast %5 %203
%205 = OpFAdd %5 %204 %201
%206 = OpBitcast %21 %205
%207 = OpAccessChain %56 %25 %35 %36
OpStore %207 %206
%208 = OpLoad %12 %14
%209 = OpLoad %26 %29
%212 = OpSampledImage %211 %208 %209
%214 = OpCompositeConstruct %68 %38 %40 %42
%213 = OpImageSampleDrefExplicitLod %5 %212 %214 %44 Lod %50
%215 = OpCompositeConstruct %51 %213 %213 %213 %213
%216 = OpCompositeExtract %5 %215 0
%217 = OpAccessChain %56 %25 %35 %36
%218 = OpLoad %21 %217
%219 = OpBitcast %5 %218
%220 = OpFAdd %5 %219 %216
%221 = OpBitcast %21 %220
%222 = OpAccessChain %56 %25 %35 %36
OpStore %222 %221
%223 = OpLoad %15 %17
%224 = OpLoad %26 %29
%227 = OpSampledImage %226 %223 %224
%229 = OpCompositeConstruct %51 %38 %40 %42 %44
%228 = OpImageSampleDrefExplicitLod %5 %227 %229 %44 Lod %50
%230 = OpCompositeConstruct %51 %228 %228 %228 %228
%231 = OpCompositeExtract %5 %230 0
%232 = OpAccessChain %56 %25 %35 %36
%233 = OpLoad %21 %232
%234 = OpBitcast %5 %233
%235 = OpFAdd %5 %234 %231
%236 = OpBitcast %21 %235
%237 = OpAccessChain %56 %25 %35 %36
OpStore %237 %236
OpControlBarrier %116 %116 %117
%238 = OpAccessChain %56 %25 %35 %36
%239 = OpLoad %21 %238
%240 = OpBitcast %5 %239
%241 = OpFAdd %5 %240 %50
%242 = OpBitcast %21 %241
%243 = OpAccessChain %56 %25 %35 %36
OpStore %243 %242
%244 = OpAccessChain %56 %25 %35 %36
%245 = OpLoad %21 %244
%246 = OpBitcast %5 %245
%247 = OpFAdd %5 %246 %50
%248 = OpBitcast %21 %247
%249 = OpAccessChain %56 %25 %35 %36
OpStore %249 %248
OpControlBarrier %116 %116 %117
%250 = OpAccessChain %56 %25 %35 %36
%251 = OpLoad %21 %250
%252 = OpBitcast %5 %251
%253 = OpFAdd %5 %252 %50
%254 = OpBitcast %21 %253
%255 = OpAccessChain %56 %25 %35 %36
OpStore %255 %254
%256 = OpAccessChain %56 %25 %35 %36
%257 = OpLoad %21 %256
%258 = OpBitcast %5 %257
%259 = OpFAdd %5 %258 %50
%260 = OpBitcast %21 %259
%261 = OpAccessChain %56 %25 %35 %36
OpStore %261 %260
OpControlBarrier %116 %116 %117
%262 = OpAccessChain %56 %25 %35 %36
%263 = OpLoad %21 %262
%264 = OpBitcast %5 %263
%265 = OpFAdd %5 %264 %50
%266 = OpBitcast %21 %265
%267 = OpAccessChain %56 %25 %35 %36
OpStore %267 %266
%268 = OpAccessChain %56 %25 %35 %36
%269 = OpLoad %21 %268
%270 = OpBitcast %5 %269
%271 = OpFAdd %5 %270 %50
%272 = OpBitcast %21 %271
%273 = OpAccessChain %56 %25 %35 %36
OpStore %273 %272
OpControlBarrier %116 %116 %117
%274 = OpLoad %6 %8
%275 = OpLoad %26 %28
%276 = OpAccessChain %56 %25 %35 %36
%277 = OpLoad %21 %276
%278 = OpBitcast %5 %277
%279 = OpFAdd %5 %278 %50
%280 = OpBitcast %21 %279
%281 = OpAccessChain %56 %25 %35 %36
OpStore %281 %280
%282 = OpLoad %9 %11
%283 = OpLoad %26 %28
%284 = OpAccessChain %56 %25 %35 %36
%285 = OpLoad %21 %284
%286 = OpBitcast %5 %285
%287 = OpFAdd %5 %286 %50
%288 = OpBitcast %21 %287
%289 = OpAccessChain %56 %25 %35 %36
OpStore %289 %288
%290 = OpLoad %18 %20
%291 = OpLoad %26 %28
%292 = OpAccessChain %56 %25 %35 %36
%293 = OpLoad %21 %292
%294 = OpBitcast %5 %293
%295 = OpFAdd %5 %294 %50
%296 = OpBitcast %21 %295
%297 = OpAccessChain %56 %25 %35 %36
OpStore %297 %296
OpControlBarrier %116 %116 %117
%298 = OpLoad %6 %8
%299 = OpLoad %26 %28
%300 = OpAccessChain %56 %25 %35 %36
%301 = OpLoad %21 %300
%302 = OpBitcast %5 %301
%303 = OpFAdd %5 %302 %50
%304 = OpBitcast %21 %303
%305 = OpAccessChain %56 %25 %35 %36
OpStore %305 %304
%306 = OpLoad %9 %11
%307 = OpLoad %26 %28
%308 = OpAccessChain %56 %25 %35 %36
%309 = OpLoad %21 %308
%310 = OpBitcast %5 %309
%311 = OpFAdd %5 %310 %50
%312 = OpBitcast %21 %311
%313 = OpAccessChain %56 %25 %35 %36
OpStore %313 %312
%314 = OpLoad %18 %20
%315 = OpLoad %26 %28
%316 = OpAccessChain %56 %25 %35 %36
%317 = OpLoad %21 %316
%318 = OpBitcast %5 %317
%319 = OpFAdd %5 %318 %50
%320 = OpBitcast %21 %319
%321 = OpAccessChain %56 %25 %35 %36
OpStore %321 %320
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/compute-shader-derivatives.noderivs.sm66.ssbo.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uint _m0[];
} _25;

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 1) uniform texture2DArray _11;
layout(set = 0, binding = 2) uniform textureCube _14;
layout(set = 0, binding = 3) uniform textureCubeArray _17;
layout(set = 0, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _28;
layout(set = 0, binding = 1) uniform samplerShadow _29;

void main()
{
    float _37 = float(gl_GlobalInvocationID.x);
    float _38 = _37 * 0.100000001490116119384765625;
    float _40 = _37 * 0.20000000298023223876953125;
    float _42 = _37 * 0.300000011920928955078125;
    float _44 = _37 * 0.4000000059604644775390625;
    vec2 _53 = vec2(_38, _40);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(sampler2D(_8, _28), vec2(_38, _40), _53 - subgroupQuadSwapHorizontal(_53), _53 - subgroupQuadSwapVertical(_53)).x);
    vec2 _74 = vec2(_38, _40);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(sampler2DArray(_11, _28), vec3(_38, _40, _42), _74 - subgroupQuadSwapHorizontal(_74), _74 - subgroupQuadSwapVertical(_74)).x);
    vec3 _93 = vec3(_38, _40, _42);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(samplerCube(_14, _28), vec3(_38, _40, _42), _93 - subgroupQuadSwapHorizontal(_93), _93 - subgroupQuadSwapVertical(_93)).x);
    vec3 _111 = vec3(_38, _40, _42);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(samplerCubeArray(_17, _28), vec4(_38, _40, _42, _44), _111 - subgroupQuadSwapHorizontal(_111), _111 - subgroupQuadSwapVertical(_111)).x);
    vec3 _129 = vec3(_38, _40, _42);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(sampler3D(_20, _28), vec3(_38, _40, _42), _129 - subgroupQuadSwapHorizontal(_129), _129 - subgroupQuadSwapVertical(_129)).x);
    groupMemoryBarrier();
    barrier();
    vec2 _148 = vec2(_38, _40);
    float _154 = exp2(_44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(sampler2D(_8, _28), vec2(_38, _40), (_148 - subgroupQuadSwapHorizontal(_148)) * _154, (_148 - subgroupQuadSwapVertical(_148)) * _154).x);
    vec2 _169 = vec2(_38, _40);
    float _174 = exp2(_44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(sampler2DArray(_11, _28), vec3(_38, _40, _42), (_169 - subgroupQuadSwapHorizontal(_169)) * _174, (_169 - subgroupQuadSwapVertical(_169)) * _174).x);
    vec3 _189 = vec3(_38, _40, _42);
    float _194 = exp2(_44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(samplerCube(_14, _28), vec3(_38, _40, _42), (_189 - subgroupQuadSwapHorizontal(_189)) * _194, (_189 - subgroupQuadSwapVertical(_189)) * _194).x);
    vec3 _209 = vec3(_38, _40, _42);
    float _214 = exp2(_44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(samplerCubeArray(_17, _28), vec4(_38, _40, _42, _44), (_209 - subgroupQuadSwapHorizontal(_209)) * _214, (_209 - subgroupQuadSwapVertical(_209)) * _214).x);
    vec3 _229 = vec3(_38, _40, _42);
    float _234 = exp2(_44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + textureGrad(sampler3D(_20, _28), vec3(_38, _40, _42), (_229 - subgroupQuadSwapHorizontal(_229)) * _234, (_229 - subgroupQuadSwapVertical(_229)) * _234).x);
    groupMemoryBarrier();
    barrier();
    vec2 _251 = vec2(_38, _40);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(sampler2DShadow(_8, _29), vec3(vec2(_38, _40), _44), _251 - subgroupQuadSwapHorizontal(_251), _251 - subgroupQuadSwapVertical(_251))).x);
    vec2 _271 = vec2(_38, _40);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(sampler2DArrayShadow(_11, _29), vec4(vec3(_38, _40, _42), _44), _271 - subgroupQuadSwapHorizontal(_271), _271 - subgroupQuadSwapVertical(_271))).x);
    vec3 _291 = vec3(_38, _40, _42);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(samplerCubeShadow(_14, _29), vec4(vec3(_38, _40, _42), _44), _291 - subgroupQuadSwapHorizontal(_291), _291 - subgroupQuadSwapVertical(_291))).x);
    vec3 _311 = vec3(_38, _40, _42);
    vec4 _317 = vec4(_38, _40, _42, _44);
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + vec4(textureGrad(samplerCubeArrayShadow(_17, _29), _317, _44, _311 - subgroupQuadSwapHorizontal(_311), _311 - subgroupQuadSwapVertical(_311))).x);
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + (subgroupQuadBroadcast(_44, 1u) - subgroupQuadBroadcast(_44, 0u)));
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + (subgroupQuadBroadcast(_44, 2u) - subgroupQuadBroadcast(_44, 0u)));
    groupMemoryBarrier();
    barrier();
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + (subgroupQuadBroadcast(_44, 1u) - subgroupQuadBroadcast(_44, 0u)));
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + (subgroupQuadBroadcast(_44, 2u) - subgroupQuadBroadcast(_44, 0u)));
    groupMemoryBarrier();
    barrier();
    float _363 = subgroupQuadSwapHorizontal(_44) - _44;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + (((gl_SubgroupInvocationID & 1u) != 0u) ? subgroupQuadSwapHorizontal(_363) : _363));
    float _378 = subgroupQuadSwapVertical(_44) - _44;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + (((gl_SubgroupInvocationID & 2u) != 0u) ? subgroupQuadSwapVertical(_378) : _378));
    groupMemoryBarrier();
    barrier();
    vec2 _392 = vec2(_38, _40);
    vec2 _401 = vec2(textureSize(_8, 0));
    vec2 _402 = (subgroupQuadSwapHorizontal(_392) - _392) * _401;
    vec2 _403 = (subgroupQuadSwapVertical(_392) - _392) * _401;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + max(min(log2(max(dot(_402, _402), dot(_403, _403))) * 0.5, float(textureQueryLevels(_8)) - (-1.0)), 0.0));
    vec2 _424 = vec2(_38, _40);
    vec2 _432 = vec2(textureSize(_11, 0).xy);
    vec2 _433 = (subgroupQuadSwapHorizontal(_424) - _424) * _432;
    vec2 _434 = (subgroupQuadSwapVertical(_424) - _424) * _432;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + max(min(log2(max(dot(_433, _433), dot(_434, _434))) * 0.5, float(textureQueryLevels(_11)) - (-1.0)), 0.0));
    vec3 _453 = vec3(_38, _40, _42);
    vec3 _459 = vec3(textureSize(_20, 0));
    vec3 _460 = (subgroupQuadSwapHorizontal(_453) - _453) * _459;
    vec3 _461 = (subgroupQuadSwapVertical(_453) - _453) * _459;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + max(min(log2(max(dot(_460, _460), dot(_461, _461))) * 0.5, float(textureQueryLevels(_20)) - (-1.0)), 0.0));
    groupMemoryBarrier();
    barrier();
    vec2 _480 = vec2(_38, _40);
    vec2 _486 = vec2(textureSize(_8, 0));
    vec2 _487 = (subgroupQuadSwapHorizontal(_480) - _480) * _486;
    vec2 _488 = (subgroupQuadSwapVertical(_480) - _480) * _486;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + clamp(log2(max(dot(_487, _487), dot(_488, _488))) * 0.5, -128.0, 128.0));
    vec2 _505 = vec2(_38, _40);
    vec2 _512 = vec2(textureSize(_11, 0).xy);
    vec2 _513 = (subgroupQuadSwapHorizontal(_505) - _505) * _512;
    vec2 _514 = (subgroupQuadSwapVertical(_505) - _505) * _512;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + clamp(log2(max(dot(_513, _513), dot(_514, _514))) * 0.5, -128.0, 128.0));
    vec3 _529 = vec3(_38, _40, _42);
    vec3 _535 = vec3(textureSize(_20, 0));
    vec3 _536 = (subgroupQuadSwapHorizontal(_529) - _529) * _535;
    vec3 _537 = (subgroupQuadSwapVertical(_529) - _529) * _535;
    _25._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_25._m0[gl_GlobalInvocationID.x]) + clamp(log2(max(dot(_536, _536), dot(_537, _537))) * 0.5, -128.0, 128.0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 552
; Schema: 0
OpCapability Shader
OpCapability SampledCubeArray
OpCapability ImageQuery
OpCapability GroupNonUniform
OpCapability GroupNonUniformQuad
%153 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %32 %365
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %23 "SSBO"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %22 ArrayStride 4
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 Coherent
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 1
OpDecorate %32 BuiltIn GlobalInvocationId
OpDecorate %365 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeInt 32 0
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeSampler
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpVariable %27 UniformConstant
%30 = OpTypeVector %21 3
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypePointer Input %21
%35 = OpConstant %21 0
%39 = OpConstant %5 0.100000001
%41 = OpConstant %5 0.200000003
%43 = OpConstant %5 0.300000012
%45 = OpConstant %5 0.400000006
%48 = OpTypeSampledImage %6
%50 = OpConstant %5 0
%51 = OpTypeVector %5 4
%52 = OpTypeVector %5 2
%55 = OpConstant %21 3
%57 = OpConstant %21 1
%63 = OpTypePointer StorageBuffer %21
%72 = OpTypeSampledImage %9
%80 = OpTypeVector %5 3
%91 = OpTypeSampledImage %12
%109 = OpTypeSampledImage %15
%127 = OpTypeSampledImage %18
%143 = OpConstant %21 2
%144 = OpConstant %21 2120
%248 = OpTypeImage %5 2D 1 0 0 1 Unknown
%249 = OpTypeSampledImage %248
%268 = OpTypeImage %5 2D 1 1 0 1 Unknown
%269 = OpTypeSampledImage %268
%288 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%289 = OpTypeSampledImage %288
%308 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%309 = OpTypeSampledImage %308
%365 = OpVariable %33 Input
%368 = OpTypeBool
%393 = OpTypeInt 32 1
%398 = OpTypeVector %393 2
%400 = OpConstant %393 0
%409 = OpConstant %5 0.5
%413 = OpConstant %5 -1
%429 = OpTypeVector %393 3
%495 = OpConstant %5 -128
%496 = OpConstant %5 128
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %550
%550 = OpLabel
%34 = OpAccessChain %33 %32 %35
%36 = OpLoad %21 %34
%37 = OpConvertUToF %5 %36
%38 = OpFMul %5 %37 %39
%40 = OpFMul %5 %37 %41
%42 = OpFMul %5 %37 %43
%44 = OpFMul %5 %37 %45
%46 = OpLoad %6 %8
%47 = OpLoad %26 %28
%49 = OpSampledImage %48 %46 %47
%53 = OpCompositeConstruct %52 %38 %40
%54 = OpGroupNonUniformQuadSwap %52 %55 %53 %35
%56 = OpGroupNonUniformQuadSwap %52 %55 %53 %57
%58 = OpFSub %52 %53 %54
%59 = OpFSub %52 %53 %56
%61 = OpCompositeConstruct %52 %38 %40
%60 = OpImageSampleExplicitLod %51 %49 %61 Grad %58 %59
%62 = OpCompositeExtract %5 %60 0
%64 = OpAccessChain %63 %25 %35 %36
%65 = OpLoad %21 %64
%66 = OpBitcast %5 %65
%67 = OpFAdd %5 %66 %62
%68 = OpBitcast %21 %67
%69 = OpAccessChain %63 %25 %35 %36
OpStore %69 %68
%70 = OpLoad %9 %11
%71 = OpLoad %26 %28
%73 = OpSampledImage %72 %70 %71
%74 = OpCompositeConstruct %52 %38 %40
%75 = OpGroupNonUniformQuadSwap %52 %55 %74 %35
%76 = OpGroupNonUniformQuadSwap %52 %55 %74 %57
%77 = OpFSub %52 %74 %75
%78 = OpFSub %52 %74 %76
%81 = OpCompositeConstruct %80 %38 %40 %42
%79 = OpImageSampleExplicitLod %51 %73 %81 Grad %77 %78
%82 = OpCompositeExtract %5 %79 0
%83 = OpAccessChain %63 %25 %35 %36
%84 = OpLoad %21 %83
%85 = OpBitcast %5 %84
%86 = OpFAdd %5 %85 %82
%87 = OpBitcast %21 %86
%88 = OpAccessChain %63 %25 %35 %36
OpStore %88 %87
%89 = OpLoad %12 %14
%90 = OpLoad %26 %28
%92 = OpSampledImage %91 %89 %90
%93 = OpCompositeConstruct %80 %38 %40 %42
%94 = OpGroupNonUniformQuadSwap %80 %55 %93 %35
%95 = OpGroupNonUniformQuadSwap %80 %55 %93 %57
%96 = OpFSub %80 %93 %94
%97 = OpFSub %80 %93 %95
%99 = OpCompositeConstruct %80 %38 %40 %42
%98 = OpImageSampleExplicitLod %51 %92 %99 Grad %96 %97
%100 = OpCompositeExtract %5 %98 0
%101 = OpAccessChain %63 %25 %35 %36
%102 = OpLoad %21 %101
%103 = OpBitcast %5 %102
%104 = OpFAdd %5 %103 %100
%105 = OpBitcast %21 %104
%106 = OpAccessChain %63 %25 %35 %36
OpStore %106 %105
%107 = OpLoad %15 %17
%108 = OpLoad %26 %28
%110 = OpSampledImage %109 %107 %108
%111 = OpCompositeConstruct %80 %38 %40 %42
%112 = OpGroupNonUniformQuadSwap %80 %55 %111 %35
%113 = OpGroupNonUniformQuadSwap %80 %55 %111 %57
%114 = OpFSub %80 %111 %112
%115 = OpFSub %80 %111 %113
%117 = OpCompositeConstruct %51 %38 %40 %42 %44
%116 = OpImageSampleExplicitLod %51 %110 %117 Grad %114 %115
%118 = OpCompositeExtract %5 %116 0
%119 = OpAccessChain %63 %25 %35 %36
%120 = OpLoad %21 %119
%121 = OpBitcast %5 %120
%122 = OpFAdd %5 %121 %118
%123 = OpBitcast %21 %122
%124 = OpAccessChain %63 %25 %35 %36
OpStore %124 %123
%125 = OpLoad %18 %20
%126 = OpLoad %26 %28
%128 = OpSampledImage %127 %125 %126
%129 = OpCompositeConstruct %80 %38 %40 %42
%130 = OpGroupNonUniformQuadSwap %80 %55 %129 %35
%131 = OpGroupNonUniformQuadSwap %80 %55 %129 %57
%132 = OpFSub %80 %129 %130
%133 = OpFSub %80 %129 %131
%135 = OpCompositeConstruct %80 %38 %40 %42
%134 = OpImageSampleExplicitLod %51 %128 %135 Grad %132 %133
%136 = OpCompositeExtract %5 %134 0
%137 = OpAccessChain %63 %25 %35 %36
%138 = OpLoad %21 %137
%139 = OpBitcast %5 %138
%140 = OpFAdd %5 %139 %136
%141 = OpBitcast %21 %140
%142 = OpAccessChain %63 %25 %35 %36
OpStore %142 %141
OpControlBarrier %143 %143 %144
%145 = OpLoad %6 %8
%146 = OpLoad %26 %28
%147 = OpSampledImage %48 %145 %146
%148 = OpCompositeConstruct %52 %38 %40
%149 = OpGroupNonUniformQuadSwap %52 %55 %148 %35
%150 = OpGroupNonUniformQuadSwap %52 %55 %148 %57
%151 = OpFSub %52 %148 %149
%152 = OpFSub %52 %148 %150
%154 = OpExtInst %5 %153 Exp2 %44
%155 = OpVectorTimesScalar %52 %151 %154
%156 = OpVectorTimesScalar %52 %152 %154
%158 = OpCompositeConstruct %52 %38 %40
%157 = OpImageSampleExplicitLod %51 %147 %158 Grad %155 %156
%159 = OpCompositeExtract %5 %157 0
%160 = OpAccessChain %63 %25 %35 %36
%161 = OpLoad %21 %160
%162 = OpBitcast %5 %161
%163 = OpFAdd %5 %162 %159
%164 = OpBitcast %21 %163
%165 = OpAccessChain %63 %25 %35 %36
OpStore %165 %164
%166 = OpLoad %9 %11
%167 = OpLoad %26 %28
%168 = OpSampledImage %72 %166 %167
%169 = OpCompositeConstruct %52 %38 %40
%170 = OpGroupNonUniformQuadSwap %52 %55 %169 %35
%171 = OpGroupNonUniformQuadSwap %52 %55 %169 %57
%172 = OpFSub %52 %169 %170
%173 = OpFSub %52 %169 %171
%174 = OpExtInst %5 %153 Exp2 %44
%175 = OpVectorTimesScalar %52 %172 %174
%176 = OpVectorTimesScalar %52 %173 %174
%178 = OpCompositeConstruct %80 %38 %40 %42
%177 = OpImageSampleExplicitLod %51 %168 %178 Grad %175 %176
%179 = OpCompositeExtract %5 %177 0
%180 = OpAccessChain %63 %25 %35 %36
%181 = OpLoad %21 %180
%182 = OpBitcast %5 %181
%183 = OpFAdd %5 %182 %179
%184 = OpBitcast %21 %183
%185 = OpAccessChain %63 %25 %35 %36
OpStore %185 %184
%186 = OpLoad %12 %14
%187 = OpLoad %26 %28
%188 = OpSampledImage %91 %186 %187
%189 = OpCompositeConstruct %80 %38 %40 %42
%190 = OpGroupNonUniformQuadSwap %80 %55 %189 %35
%191 = OpGroupNonUniformQuadSwap %80 %55 %189 %57
%192 = OpFSub %80 %189 %190
%193 = OpFSub %80 %189 %191
%194 = OpExtInst %5 %153 Exp2 %44
%195 = OpVectorTimesScalar %80 %192 %194
%196 = OpVectorTimesScalar %80 %193 %194
%198 = OpCompositeConstruct %80 %38 %40 %42
%197 = OpImageSampleExplicitLod %51 %188 %198 Grad %195 %196
%199 = OpCompositeExtract %5 %197 0
%200 = OpAccessChain %63 %25 %35 %36
%201 = OpLoad %21 %200
%202 = OpBitcast %5 %201
%203 = OpFAdd %5 %202 %199
%204 = OpBitcast %21 %203
%205 = OpAccessChain %63 %25 %35 %36
OpStore %205 %204
%206 = OpLoad %15 %17
%207 = OpLoad %26 %28
%208 = OpSampledImage %109 %206 %207
%209 = OpCompositeConstruct %80 %38 %40 %42
%210 = OpGroupNonUniformQuadSwap %80 %55 %209 %35
%211 = OpGroupNonUniformQuadSwap %80 %55 %209 %57
%212 = OpFSub %80 %209 %210
%213 = OpFSub %80 %209 %211
%214 = OpExtInst %5 %153 Exp2 %44
%215 = OpVectorTimesScalar %80 %212 %214
%216 = OpVectorTimesScalar %80 %213 %214
%218 = OpCompositeConstruct %51 %38 %40 %42 %44
%217 = OpImageSampleExplicitLod %51 %208 %218 Grad %215 %216
%219 = OpCompositeExtract %5 %217 0
%220 = OpAccessChain %63 %25 %35 %36
%221 = OpLoad %21 %220
%222 = OpBitcast %5 %221
%223 = OpFAdd %5 %222 %219
%224 = OpBitcast %21 %223
%225 = OpAccessChain %63 %25 %35 %36
OpStore %225 %224
%226 = OpLoad %18 %20
%227 = OpLoad %26 %28
%228 = OpSampledImage %127 %226 %227
%229 = OpCompositeConstruct %80 %38 %40 %42
%230 = OpGroupNonUniformQuadSwap %80 %55 %229 %35
%231 = OpGroupNonUniformQuadSwap %80 %55 %229 %57
%232 = OpFSub %80 %229 %230
%233 = OpFSub %80 %229 %231
%234 = OpExtInst %5 %153 Exp2 %44
%235 = OpVectorTimesScalar %80 %232 %234
%236 = OpVectorTimesScalar %80 %233 %234
%238 = OpCompositeConstruct %80 %38 %40 %42
%237 = OpImageSampleExplicitLod %51 %228 %238 Grad %235 %236
%239 = OpCompositeExtract %5 %237 0
%240 = OpAccessChain %63 %25 %35 %36
%241 = OpLoad %21 %240
%242 = OpBitcast %5 %241
%243 = OpFAdd %5 %242 %239
%244 = OpBitcast %21 %243
%245 = OpAccessChain %63 %25 %35 %36
OpStore %245 %244
OpControlBarrier %143 %143 %144
%246 = OpLoad %6 %8
%247 = OpLoad %26 %29
%250 = OpSampledImage %249 %246 %247
%251 = OpCompositeConstruct %52 %38 %40
%252 = OpGroupNonUniformQuadSwap %52 %55 %251 %35
%253 = OpGroupNonUniformQuadSwap %52 %55 %251 %57
%254 = OpFSub %52 %251 %252
%255 = OpFSub %52 %251 %253
%257 = OpCompositeConstruct %52 %38 %40
%256 = OpImageSampleDrefExplicitLod %5 %250 %257 %44 Grad %254 %255
%258 = OpCompositeConstruct %51 %256 %256 %256 %256
%259 = OpCompositeExtract %5 %258 0
%260 = OpAccessChain %63 %25 %35 %36
%261 = OpLoad %21 %260
%262 = OpBitcast %5 %261
%263 = OpFAdd %5 %262 %259
%264 = OpBitcast %21 %263
%265 = OpAccessChain %63 %25 %35 %36
OpStore %265 %264
%266 = OpLoad %9 %11
%267 = OpLoad %26 %29
%270 = OpSampledImage %269 %266 %267
%271 = OpCompositeConstruct %52 %38 %40
%272 = OpGroupNonUniformQuadSwap %52 %55 %271 %35
%273 = OpGroupNonUniformQuadSwap %52 %55 %271 %57
%274 = OpFSub %52 %271 %272
%275 = OpFSub %52 %271 %273
%277 = OpCompositeConstruct %80 %38 %40 %42
%276 = OpImageSampleDrefExplicitLod %5 %270 %277 %44 Grad %274 %275
%278 = OpCompositeConstruct %51 %276 %276 %276 %276
%279 = OpCompositeExtract %5 %278 0
%280 = OpAccessChain %63 %25 %35 %36
%281 = OpLoad %21 %280
%282 = OpBitcast %5 %281
%283 = OpFAdd %5 %282 %279
%284 = OpBitcast %21 %283
%285 = OpAccessChain %63 %25 %35 %36
OpStore %285 %284
%286 = OpLoad %12 %14
%287 = OpLoad %26 %29
%290 = OpSampledImage %289 %286 %287
%291 = OpCompositeConstruct %80 %38 %40 %42
%292 = OpGroupNonUniformQuadSwap %80 %55 %291 %35
%293 = OpGroupNonUniformQuadSwap %80 %55 %291 %57
%294 = OpFSub %80 %291 %292
%295 = OpFSub %80 %291 %293
%297 = OpCompositeConstruct %80 %38 %40 %42
%296 = OpImageSampleDrefExplicitLod %5 %290 %297 %44 Grad %294 %295
%298 = OpCompositeConstruct %51 %296 %296 %296 %296
%299 = OpCompositeExtract %5 %298 0
%300 = OpAccessChain %63 %25 %35 %36
%301 = OpLoad %21 %300
%302 = OpBitcast %5 %301
%303 = OpFAdd %5 %302 %299
%304 = OpBitcast %21 %303
%305 = OpAccessChain %63 %25 %35 %36
OpStore %305 %304
%306 = OpLoad %15 %17
%307 = OpLoad %26 %29
%310 = OpSampledImage %309 %306 %307
%311 = OpCompositeConstruct %80 %38 %40 %42
%312 = OpGroupNonUniformQuadSwap %80 %55 %311 %35
%313 = OpGroupNonUniformQuadSwap %80 %55 %311 %57
%314 = OpFSub %80 %311 %312
%315 = OpFSub %80 %311 %313
%317 = OpCompositeConstruct %51 %38 %40 %42 %44
%316 = OpImageSampleDrefExplicitLod %5 %310 %317 %44 Grad %314 %315
%318 = OpCompositeConstruct %51 %316 %316 %316 %316
%319 = OpCompositeExtract %5 %318 0
%320 = OpAccessChain %63 %25 %35 %36
%321 = OpLoad %21 %320
%322 = OpBitcast %5 %321
%323 = OpFAdd %5 %322 %319
%324 = OpBitcast %21 %323
%325 = OpAccessChain %63 %25 %35 %36
OpStore %325 %324
OpControlBarrier %143 %143 %144
%326 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %35
%327 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %57
%328 = OpFSub %5 %327 %326
%329 = OpAccessChain %63 %25 %35 %36
%330 = OpLoad %21 %329
%331 = OpBitcast %5 %330
%332 = OpFAdd %5 %331 %328
%333 = OpBitcast %21 %332
%334 = OpAccessChain %63 %25 %35 %36
OpStore %334 %333
%335 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %35
%336 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %143
%337 = OpFSub %5 %336 %335
%338 = OpAccessChain %63 %25 %35 %36
%339 = OpLoad %21 %338
%340 = OpBitcast %5 %339
%341 = OpFAdd %5 %340 %337
%342 = OpBitcast %21 %341
%343 = OpAccessChain %63 %25 %35 %36
OpStore %343 %342
OpControlBarrier %143 %143 %144
%344 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %35
%345 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %57
%346 = OpFSub %5 %345 %344
%347 = OpAccessChain %63 %25 %35 %36
%348 = OpLoad %21 %347
%349 = OpBitcast %5 %348
%350 = OpFAdd %5 %349 %346
%351 = OpBitcast %21 %350
%352 = OpAccessChain %63 %25 %35 %36
OpStore %352 %351
%353 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %35
%354 = OpGroupNonUniformQuadBroadcast %5 %55 %44 %143
%355 = OpFSub %5 %354 %353
%356 = OpAccessChain %63 %25 %35 %36
%357 = OpLoad %21 %356
%358 = OpBitcast %5 %357
%359 = OpFAdd %5 %358 %355
%360 = OpBitcast %21 %359
%361 = OpAccessChain %63 %25 %35 %36
OpStore %361 %360
OpControlBarrier %143 %143 %144
%362 = OpGroupNonUniformQuadSwap %5 %55 %44 %35
%363 = OpFSub %5 %362 %44
%364 = OpGroupNonUniformQuadSwap %5 %55 %363 %35
%366 = OpLoad %21 %365
%367 = OpBitwiseAnd %21 %366 %57
%369 = OpINotEqual %368 %367 %35
%370 = OpSelect %5 %369 %364 %363
%371 = OpAccessChain %63 %25 %35 %36
%372 = OpLoad %21 %371
%373 = OpBitcast %5 %372
%374 = OpFAdd %5 %373 %370
%375 = OpBitcast %21 %374
%376 = OpAccessChain %63 %25 %35 %36
OpStore %376 %375
%377 = OpGroupNonUniformQuadSwap %5 %55 %44 %57
%378 = OpFSub %5 %377 %44
%379 = OpGroupNonUniformQuadSwap %5 %55 %378 %57
%380 = OpLoad %21 %365
%381 = OpBitwiseAnd %21 %380 %143
%382 = OpINotEqual %368 %381 %35
%383 = OpSelect %5 %382 %379 %378
%384 = OpAccessChain %63 %25 %35 %36
%385 = OpLoad %21 %384
%386 = OpBitcast %5 %385
%387 = OpFAdd %5 %386 %383
%388 = OpBitcast %21 %387
%389 = OpAccessChain %63 %25 %35 %36
OpStore %389 %388
OpControlBarrier %143 %143 %144
%390 = OpLoad %6 %8
%391 = OpLoad %26 %28
%392 = OpCompositeConstruct %52 %38 %40
%394 = OpGroupNonUniformQuadSwap %52 %55 %392 %35
%395 = OpGroupNonUniformQuadSwap %52 %55 %392 %57
%396 = OpFSub %52 %394 %392
%397 = OpFSub %52 %395 %392
%399 = OpImageQuerySizeLod %398 %390 %400
%401 = OpConvertSToF %52 %399
%402 = OpFMul %52 %396 %401
%403 = OpFMul %52 %397 %401
%404 = OpDot %5 %402 %402
%405 = OpDot %5 %403 %403
%406 = OpExtInst %5 %153 FMax %404 %405
%407 = OpExtInst %5 %153 Log2 %406
%408 = OpFMul %5 %407 %409
%410 = OpImageQueryLevels %393 %390
%411 = OpConvertSToF %5 %410
%412 = OpFSub %5 %411 %413
%414 = OpExtInst %5 %153 FMin %408 %412
%415 = OpExtInst %5 %153 FMax %414 %50
%416 = OpAccessChain %63 %25 %35 %36
%417 = OpLoad %21 %416
%418 = OpBitcast %5 %417
%419 = OpFAdd %5 %418 %415
%420 = OpBitcast %21 %419
%421 = OpAccessChain %63 %25 %35 %36
OpStore %421 %420
%422 = OpLoad %9 %11
%423 = OpLoad %26 %28
%424 = OpCompositeConstruct %52 %38 %40
%425 = OpGroupNonUniformQuadSwap %52 %55 %424 %35
%426 = OpGroupNonUniformQuadSwap %52 %55 %424 %57
%427 = OpFSub %52 %425 %424
%428 = OpFSub %52 %426 %424
%430 = OpImageQuerySizeLod %429 %422 %400
%431 = OpVectorShuffle %398 %430 %430 0 1
%432 = OpConvertSToF %52 %431
%433 = OpFMul %52 %427 %432
%434 = OpFMul %52 %428 %432
%435 = OpDot %5 %433 %433
%436 = OpDot %5 %434 %434
%437 = OpExtInst %5 %153 FMax %435 %436
%438 = OpExtInst %5 %153 Log2 %437
%439 = OpFMul %5 %438 %409
%440 = OpImageQueryLevels %393 %422
%441 = OpConvertSToF %5 %440
%442 = OpFSub %5 %441 %413
%443 = OpExtInst %5 %153 FMin %439 %442
%444 = OpExtInst %5 %153 FMax %443 %50
%445 = OpAccessChain %63 %25 %35 %36
%446 = OpLoad %21 %445
%447 = OpBitcast %5 %446
%448 = OpFAdd %5 %447 %444
%449 = OpBitcast %21 %448
%450 = OpAccessChain %63 %25 %35 %36
OpStore %450 %449
%451 = OpLoad %18 %20
%452 = OpLoad %26 %28
%453 = OpCompositeConstruct %80 %38 %40 %42
%454 = OpGroupNonUniformQuadSwap %80 %55 %453 %35
%455 = OpGroupNonUniformQuadSwap %80 %55 %453 %57
%456 = OpFSub %80 %454 %453
%457 = OpFSub %80 %455 %453
%458 = OpImageQuerySizeLod %429 %451 %400
%459 = OpConvertSToF %80 %458
%460 = OpFMul %80 %456 %459
%461 = OpFMul %80 %457 %459
%462 = OpDot %5 %460 %460
%463 = OpDot %5 %461 %461
%464 = OpExtInst %5 %153 FMax %462 %463
%465 = OpExtInst %5 %153 Log2 %464
%466 = OpFMul %5 %465 %409
%467 = OpImageQueryLevels %393 %451
%468 = OpConvertSToF %5 %467
%469 = OpFSub %5 %468 %413
%470 = OpExtInst %5 %153 FMin %466 %469
%471 = OpExtInst %5 %153 FMax %470 %50
%472 = OpAccessChain %63 %25 %35 %36
%473 = OpLoad %21 %472
%474 = OpBitcast %5 %473
%475 = OpFAdd %5 %474 %471
%476 = OpBitcast %21 %475
%477 = OpAccessChain %63 %25 %35 %36
OpStore %477 %476
OpControlBarrier %143 %143 %144
%478 = OpLoad %6 %8
%479 = OpLoad %26 %28
%480 = OpCompositeConstruct %52 %38 %40
%481 = OpGroupNonUniformQuadSwap %52 %55 %480 %35
%482 = OpGroupNonUniformQuadSwap %52 %55 %480 %57
%483 = OpFSub %52 %481 %480
%484 = OpFSub %52 %482 %480
%485 = OpImageQuerySizeLod %398 %478 %400
%486 = OpConvertSToF %52 %485
%487 = OpFMul %52 %483 %486
%488 = OpFMul %52 %484 %486
%489 = OpDot %5 %487 %487
%490 = OpDot %5 %488 %488
%491 = OpExtInst %5 %153 FMax %489 %490
%492 = OpExtInst %5 %153 Log2 %491
%493 = OpFMul %5 %492 %409
%494 = OpExtInst %5 %153 FClamp %493 %495 %496
%497 = OpAccessChain %63 %25 %35 %36
%498 = OpLoad %21 %497
%499 = OpBitcast %5 %498
%500 = OpFAdd %5 %499 %494
%501 = OpBitcast %21 %500
%502 = OpAccessChain %63 %25 %35 %36
OpStore %502 %501
%503 = OpLoad %9 %11
%504 = OpLoad %26 %28
%505 = OpCompositeConstruct %52 %38 %40
%506 = OpGroupNonUniformQuadSwap %52 %55 %505 %35
%507 = OpGroupNonUniformQuadSwap %52 %55 %505 %57
%508 = OpFSub %52 %506 %505
%509 = OpFSub %52 %507 %505
%510 = OpImageQuerySizeLod %429 %503 %400
%511 = OpVectorShuffle %398 %510 %510 0 1
%512 = OpConvertSToF %52 %511
%513 = OpFMul %52 %508 %512
%514 = OpFMul %52 %509 %512
%515 = OpDot %5 %513 %513
%516 = OpDot %5 %514 %514
%517 = OpExtInst %5 %153 FMax %515 %516
%518 = OpExtInst %5 %153 Log2 %517
%519 = OpFMul %5 %518 %409
%520 = OpExtInst %5 %153 FClamp %519 %495 %496
%521 = OpAccessChain %63 %25 %35 %36
%522 = OpLoad %21 %521
%523 = OpBitcast %5 %522
%524 = OpFAdd %5 %523 %520
%525 = OpBitcast %21 %524
%526 = OpAccessChain %63 %25 %35 %36
OpStore %526 %525
%527 = OpLoad %18 %20
%528 = OpLoad %26 %28
%529 = OpCompositeConstruct %80 %38 %40 %42
%530 = OpGroupNonUniformQuadSwap %80 %55 %529 %35
%531 = OpGroupNonUniformQuadSwap %80 %55 %529 %57
%532 = OpFSub %80 %530 %529
%533 = OpFSub %80 %531 %529
%534 = OpImageQuerySizeLod %429 %527 %400
%535 = OpConvertSToF %80 %534
%536 = OpFMul %80 %532 %535
%537 = OpFMul %80 %533 %535
%538 = OpDot %5 %536 %536
%539 = OpDot %5 %537 %537
%540 = OpExtInst %5 %153 FMax %538 %539
%541 = OpExtInst %5 %153 Log2 %540
%542 = OpFMul %5 %541 %409
%543 = OpExtInst %5 %153 FClamp %542 %495 %496
%544 = OpAccessChain %63 %25 %35 %36
%545 = OpLoad %21 %544
%546 = OpBitcast %5 %545
%547 = OpFAdd %5 %546 %543
%548 = OpBitcast %21 %547
%549 = OpAccessChain %63 %25 %35 %36
OpStore %549 %548
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/cos.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = cos(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Cos %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/countbits.frag
================================================
#version 460

layout(location = 0) flat in uint A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(bitCount(A));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 14
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %12
%12 = OpLabel
%10 = OpLoad %5 %7
%11 = OpBitCount %5 %10
OpStore %9 %11
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/coverage.frag
================================================
#version 460

layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(gl_SampleMaskIn[0u]);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 18
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn SampleMask
OpDecorate %11 Flat
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Output %5
%7 = OpVariable %6 Output
%8 = OpConstant %5 1
%9 = OpTypeArray %5 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %5
%14 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %16
%16 = OpLabel
%13 = OpAccessChain %12 %11 %14
%15 = OpLoad %5 %13
OpStore %7 %15
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/derivative.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float16_t _33 = float16_t(TEXCOORD.x);
    float16_t _34 = float16_t(TEXCOORD.y);
    SV_Target.x = ((((dFdxFine(TEXCOORD.x) + dFdxCoarse(TEXCOORD.x)) + float(float16_t(dFdxCoarse(float(_33))))) + float(float16_t(dFdxFine(float(_33))))) + dFdxCoarse(TEXCOORD.x)) + dFdxFine(TEXCOORD.x);
    SV_Target.y = ((((dFdxFine(TEXCOORD.y) + dFdxCoarse(TEXCOORD.y)) + float(float16_t(dFdxCoarse(float(_34))))) + float(float16_t(dFdxFine(float(_34))))) + dFdxCoarse(TEXCOORD.y)) + dFdxFine(TEXCOORD.y);
    SV_Target.z = ((((dFdyFine(TEXCOORD.x) + dFdyCoarse(TEXCOORD.x)) + float(float16_t(dFdyCoarse(float(_33))))) + float(float16_t(dFdyFine(float(_33))))) + dFdyCoarse(TEXCOORD.x)) + dFdyFine(TEXCOORD.x);
    SV_Target.w = ((((dFdyFine(TEXCOORD.y) + dFdyCoarse(TEXCOORD.y)) + float(float16_t(dFdyCoarse(float(_34))))) + float(float16_t(dFdyFine(float(_34))))) + dFdyCoarse(TEXCOORD.y)) + dFdyFine(TEXCOORD.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 100
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DerivativeControl
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%32 = OpTypeFloat 16
%91 = OpTypePointer Output %5
%95 = OpConstant %14 2
%97 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %98
%98 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%20 = OpDPdxCoarse %5 %16
%21 = OpDPdxCoarse %5 %19
%22 = OpDPdyCoarse %5 %16
%23 = OpDPdyCoarse %5 %19
%24 = OpDPdxFine %5 %16
%25 = OpDPdxFine %5 %19
%26 = OpFAdd %5 %24 %20
%27 = OpFAdd %5 %25 %21
%28 = OpDPdyFine %5 %16
%29 = OpDPdyFine %5 %19
%30 = OpFAdd %5 %28 %22
%31 = OpFAdd %5 %29 %23
%33 = OpFConvert %32 %16
%34 = OpFConvert %32 %19
%35 = OpFConvert %5 %33
%36 = OpDPdxCoarse %5 %35
%37 = OpFConvert %32 %36
%38 = OpFConvert %5 %34
%39 = OpDPdxCoarse %5 %38
%40 = OpFConvert %32 %39
%41 = OpFConvert %5 %37
%42 = OpFConvert %5 %40
%43 = OpFAdd %5 %26 %41
%44 = OpFAdd %5 %27 %42
%45 = OpFConvert %5 %33
%46 = OpDPdyCoarse %5 %45
%47 = OpFConvert %32 %46
%48 = OpFConvert %5 %34
%49 = OpDPdyCoarse %5 %48
%50 = OpFConvert %32 %49
%51 = OpFConvert %5 %47
%52 = OpFConvert %5 %50
%53 = OpFAdd %5 %30 %51
%54 = OpFAdd %5 %31 %52
%55 = OpFConvert %5 %33
%56 = OpDPdxFine %5 %55
%57 = OpFConvert %32 %56
%58 = OpFConvert %5 %34
%59 = OpDPdxFine %5 %58
%60 = OpFConvert %32 %59
%61 = OpFConvert %5 %57
%62 = OpFConvert %5 %60
%63 = OpFAdd %5 %43 %61
%64 = OpFAdd %5 %44 %62
%65 = OpFConvert %5 %33
%66 = OpDPdyFine %5 %65
%67 = OpFConvert %32 %66
%68 = OpFConvert %5 %34
%69 = OpDPdyFine %5 %68
%70 = OpFConvert %32 %69
%71 = OpFConvert %5 %67
%72 = OpFConvert %5 %70
%73 = OpFAdd %5 %53 %71
%74 = OpFAdd %5 %54 %72
%75 = OpDPdxCoarse %5 %16
%76 = OpDPdxCoarse %5 %19
%77 = OpFAdd %5 %63 %75
%78 = OpFAdd %5 %64 %76
%79 = OpDPdyCoarse %5 %16
%80 = OpDPdyCoarse %5 %19
%81 = OpFAdd %5 %73 %79
%82 = OpFAdd %5 %74 %80
%83 = OpDPdxFine %5 %16
%84 = OpDPdxFine %5 %19
%85 = OpFAdd %5 %77 %83
%86 = OpFAdd %5 %78 %84
%87 = OpDPdyFine %5 %16
%88 = OpDPdyFine %5 %19
%89 = OpFAdd %5 %81 %87
%90 = OpFAdd %5 %82 %88
%92 = OpAccessChain %91 %11 %15
OpStore %92 %85
%93 = OpAccessChain %91 %11 %18
OpStore %93 %86
%94 = OpAccessChain %91 %11 %95
OpStore %94 %89
%96 = OpAccessChain %91 %11 %97
OpStore %96 %90
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/derivative.sm60.frag
================================================
#version 460

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    mediump float _32 = TEXCOORD.x;
    mediump float _33 = TEXCOORD.y;
    SV_Target.x = ((dFdxFine(TEXCOORD.x) + dFdxCoarse(TEXCOORD.x)) + dFdxCoarse(_32)) + dFdxFine(_32);
    SV_Target.y = ((dFdxFine(TEXCOORD.y) + dFdxCoarse(TEXCOORD.y)) + dFdxCoarse(_33)) + dFdxFine(_33);
    SV_Target.z = ((dFdyFine(TEXCOORD.x) + dFdyCoarse(TEXCOORD.x)) + dFdyCoarse(_32)) + dFdyFine(_32);
    SV_Target.w = ((dFdyFine(TEXCOORD.y) + dFdyCoarse(TEXCOORD.y)) + dFdyCoarse(_33)) + dFdyFine(_33);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
OpCapability Shader
OpCapability DerivativeControl
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
OpDecorate %32 RelaxedPrecision
OpDecorate %33 RelaxedPrecision
OpDecorate %34 RelaxedPrecision
OpDecorate %35 RelaxedPrecision
OpDecorate %38 RelaxedPrecision
OpDecorate %39 RelaxedPrecision
OpDecorate %42 RelaxedPrecision
OpDecorate %43 RelaxedPrecision
OpDecorate %46 RelaxedPrecision
OpDecorate %47 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%50 = OpTypePointer Output %5
%54 = OpConstant %14 2
%56 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%20 = OpDPdxCoarse %5 %16
%21 = OpDPdxCoarse %5 %19
%22 = OpDPdyCoarse %5 %16
%23 = OpDPdyCoarse %5 %19
%24 = OpDPdxFine %5 %16
%25 = OpDPdxFine %5 %19
%26 = OpFAdd %5 %24 %20
%27 = OpFAdd %5 %25 %21
%28 = OpDPdyFine %5 %16
%29 = OpDPdyFine %5 %19
%30 = OpFAdd %5 %28 %22
%31 = OpFAdd %5 %29 %23
%32 = OpCopyObject %5 %16
%33 = OpCopyObject %5 %19
%34 = OpDPdxCoarse %5 %32
%35 = OpDPdxCoarse %5 %33
%36 = OpFAdd %5 %26 %34
%37 = OpFAdd %5 %27 %35
%38 = OpDPdyCoarse %5 %32
%39 = OpDPdyCoarse %5 %33
%40 = OpFAdd %5 %30 %38
%41 = OpFAdd %5 %31 %39
%42 = OpDPdxFine %5 %32
%43 = OpDPdxFine %5 %33
%44 = OpFAdd %5 %36 %42
%45 = OpFAdd %5 %37 %43
%46 = OpDPdyFine %5 %32
%47 = OpDPdyFine %5 %33
%48 = OpFAdd %5 %40 %46
%49 = OpFAdd %5 %41 %47
%51 = OpAccessChain %50 %11 %15
OpStore %51 %44
%52 = OpAccessChain %50 %11 %18
OpStore %52 %45
%53 = OpAccessChain %50 %11 %54
OpStore %53 %48
%55 = OpAccessChain %50 %11 %56
OpStore %55 %49
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/derivative.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float16_t _33 = float16_t(TEXCOORD.x);
    float16_t _34 = float16_t(TEXCOORD.y);
    SV_Target.x = ((dFdxFine(TEXCOORD.x) + dFdxCoarse(TEXCOORD.x)) + float(float16_t(dFdxCoarse(float(_33))))) + float(float16_t(dFdxFine(float(_33))));
    SV_Target.y = ((dFdxFine(TEXCOORD.y) + dFdxCoarse(TEXCOORD.y)) + float(float16_t(dFdxCoarse(float(_34))))) + float(float16_t(dFdxFine(float(_34))));
    SV_Target.z = ((dFdyFine(TEXCOORD.x) + dFdyCoarse(TEXCOORD.x)) + float(float16_t(dFdyCoarse(float(_33))))) + float(float16_t(dFdyFine(float(_33))));
    SV_Target.w = ((dFdyFine(TEXCOORD.y) + dFdyCoarse(TEXCOORD.y)) + float(float16_t(dFdyCoarse(float(_34))))) + float(float16_t(dFdyFine(float(_34))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 84
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DerivativeControl
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%32 = OpTypeFloat 16
%75 = OpTypePointer Output %5
%79 = OpConstant %14 2
%81 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %82
%82 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%20 = OpDPdxCoarse %5 %16
%21 = OpDPdxCoarse %5 %19
%22 = OpDPdyCoarse %5 %16
%23 = OpDPdyCoarse %5 %19
%24 = OpDPdxFine %5 %16
%25 = OpDPdxFine %5 %19
%26 = OpFAdd %5 %24 %20
%27 = OpFAdd %5 %25 %21
%28 = OpDPdyFine %5 %16
%29 = OpDPdyFine %5 %19
%30 = OpFAdd %5 %28 %22
%31 = OpFAdd %5 %29 %23
%33 = OpFConvert %32 %16
%34 = OpFConvert %32 %19
%35 = OpFConvert %5 %33
%36 = OpDPdxCoarse %5 %35
%37 = OpFConvert %32 %36
%38 = OpFConvert %5 %34
%39 = OpDPdxCoarse %5 %38
%40 = OpFConvert %32 %39
%41 = OpFConvert %5 %37
%42 = OpFConvert %5 %40
%43 = OpFAdd %5 %26 %41
%44 = OpFAdd %5 %27 %42
%45 = OpFConvert %5 %33
%46 = OpDPdyCoarse %5 %45
%47 = OpFConvert %32 %46
%48 = OpFConvert %5 %34
%49 = OpDPdyCoarse %5 %48
%50 = OpFConvert %32 %49
%51 = OpFConvert %5 %47
%52 = OpFConvert %5 %50
%53 = OpFAdd %5 %30 %51
%54 = OpFAdd %5 %31 %52
%55 = OpFConvert %5 %33
%56 = OpDPdxFine %5 %55
%57 = OpFConvert %32 %56
%58 = OpFConvert %5 %34
%59 = OpDPdxFine %5 %58
%60 = OpFConvert %32 %59
%61 = OpFConvert %5 %57
%62 = OpFConvert %5 %60
%63 = OpFAdd %5 %43 %61
%64 = OpFAdd %5 %44 %62
%65 = OpFConvert %5 %33
%66 = OpDPdyFine %5 %65
%67 = OpFConvert %32 %66
%68 = OpFConvert %5 %34
%69 = OpDPdyFine %5 %68
%70 = OpFConvert %32 %69
%71 = OpFConvert %5 %67
%72 = OpFConvert %5 %70
%73 = OpFAdd %5 %53 %71
%74 = OpFAdd %5 %54 %72
%76 = OpAccessChain %75 %11 %15
OpStore %76 %63
%77 = OpAccessChain %75 %11 %18
OpStore %77 %64
%78 = OpAccessChain %75 %11 %79
OpStore %78 %73
%80 = OpAccessChain %75 %11 %81
OpStore %80 %74
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/derivatives.sm66.comp
================================================
#version 460
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

layout(set = 0, binding = 0) uniform texture3D _8;
layout(set = 0, binding = 0) uniform writeonly image3D _11;
layout(set = 0, binding = 0) uniform sampler _14;

void main()
{
    imageStore(_11, ivec3(uvec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, gl_GlobalInvocationID.z)), vec4(texture(sampler3D(_8, _14), vec3((float(gl_GlobalInvocationID.x) + 0.5) * 0.015625, (float(gl_GlobalInvocationID.y) + 0.5) * 0.015625, (float(gl_GlobalInvocationID.z) + 0.5) * 0.015625))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability ComputeDerivativeGroupQuadsKHR
OpExtension "SPV_KHR_compute_shader_derivatives"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 DerivativeGroupQuadsKHR
OpExecutionMode %3 LocalSize 4 4 4
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 3D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 3D 0 0 0 2 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeSampler
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%24 = OpConstant %15 1
%27 = OpConstant %15 2
%33 = OpConstant %5 0.5
%37 = OpConstant %5 0.015625
%42 = OpTypeSampledImage %6
%44 = OpConstant %5 0
%45 = OpTypeVector %5 4
%47 = OpTypeVector %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %56
%56 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%23 = OpAccessChain %19 %18 %24
%25 = OpLoad %15 %23
%26 = OpAccessChain %19 %18 %27
%28 = OpLoad %15 %26
%29 = OpConvertUToF %5 %22
%30 = OpConvertUToF %5 %25
%31 = OpConvertUToF %5 %28
%32 = OpFAdd %5 %29 %33
%34 = OpFAdd %5 %30 %33
%35 = OpFAdd %5 %31 %33
%36 = OpFMul %5 %32 %37
%38 = OpFMul %5 %34 %37
%39 = OpFMul %5 %35 %37
%40 = OpLoad %6 %8
%41 = OpLoad %12 %14
%43 = OpSampledImage %42 %40 %41
%48 = OpCompositeConstruct %47 %36 %38 %39
%46 = OpImageSampleImplicitLod %45 %43 %48 None
%49 = OpCompositeExtract %5 %46 0
%50 = OpCompositeExtract %5 %46 1
%51 = OpCompositeExtract %5 %46 2
%52 = OpCompositeExtract %5 %46 3
%53 = OpLoad %9 %11
%54 = OpCompositeConstruct %16 %22 %25 %28
%55 = OpCompositeConstruct %45 %49 %50 %51 %52
OpImageWrite %53 %54 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/discard.demote-to-helper.frag
================================================
#version 460
#extension GL_EXT_demote_to_helper_invocation : require

layout(location = 0) in vec2 TEXCOORD;

void main()
{
    if (TEXCOORD.x > 10.0)
    {
        demote;
    }
    else
    {
        if (TEXCOORD.y > 20.0)
        {
            demote;
        }
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability DemoteToHelperInvocation
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpDecorate %8 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%11 = OpTypeInt 32 0
%12 = OpConstant %11 0
%14 = OpTypeBool
%16 = OpConstant %5 10
%18 = OpConstant %11 1
%21 = OpConstant %5 20
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %22
%22 = OpLabel
%10 = OpAccessChain %9 %8 %12
%13 = OpLoad %5 %10
%15 = OpFOrdGreaterThan %14 %13 %16
OpSelectionMerge %27 None
OpBranchConditional %15 %26 %23
%26 = OpLabel
OpDemoteToHelperInvocation
OpBranch %27
%23 = OpLabel
%17 = OpAccessChain %9 %8 %18
%19 = OpLoad %5 %17
%20 = OpFOrdGreaterThan %14 %19 %21
OpSelectionMerge %25 None
OpBranchConditional %20 %24 %25
%24 = OpLabel
OpDemoteToHelperInvocation
OpBranch %25
%25 = OpLabel
OpBranch %27
%27 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/discard.frag
================================================
#version 460

layout(location = 0) in vec2 TEXCOORD;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (TEXCOORD.x > 10.0)
    {
        discard_state = true;
    }
    else
    {
        if (TEXCOORD.y > 20.0)
        {
            discard_state = true;
        }
    }
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %18 "discard_state"
OpName %32 "discard_exit"
OpDecorate %8 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%11 = OpTypeInt 32 0
%12 = OpConstant %11 0
%14 = OpTypeBool
%16 = OpConstant %5 10
%17 = OpTypePointer Private %14
%18 = OpVariable %17 Private
%19 = OpConstantFalse %14
%21 = OpConstant %11 1
%24 = OpConstant %5 20
%31 = OpConstantTrue %14
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %18 %19
OpBranch %25
%25 = OpLabel
%10 = OpAccessChain %9 %8 %12
%13 = OpLoad %5 %10
%15 = OpFOrdGreaterThan %14 %13 %16
OpSelectionMerge %30 None
OpBranchConditional %15 %29 %26
%29 = OpLabel
OpStore %18 %31
OpBranch %30
%26 = OpLabel
%20 = OpAccessChain %9 %8 %21
%22 = OpLoad %5 %20
%23 = OpFOrdGreaterThan %14 %22 %24
OpSelectionMerge %28 None
OpBranchConditional %23 %27 %28
%27 = OpLabel
OpStore %18 %31
OpBranch %28
%28 = OpLabel
OpBranch %30
%30 = OpLabel
%38 = OpFunctionCall %1 %32
OpReturn
OpFunctionEnd
%32 = OpFunction %1 None %2
%33 = OpLabel
%36 = OpLoad %14 %18
OpSelectionMerge %35 None
OpBranchConditional %36 %34 %35
%34 = OpLabel
OpKill
%35 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/dispatch-rays-dimensions.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform writeonly uimage2D UTex;

void main()
{
    imageStore(UTex, ivec2(uvec2(gl_LaunchSizeEXT.x, gl_LaunchSizeEXT.y)), uvec4(1u, 2u, 3u, 1u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %11
OpName %3 "main"
OpName %8 "UTex"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 BuiltIn LaunchSizeKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 2D 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 3
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %5
%14 = OpConstant %5 0
%17 = OpConstant %5 1
%20 = OpConstant %5 2
%21 = OpConstant %5 3
%22 = OpTypeVector %5 2
%24 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%13 = OpAccessChain %12 %11 %14
%15 = OpLoad %5 %13
%16 = OpAccessChain %12 %11 %17
%18 = OpLoad %5 %16
%19 = OpLoad %6 %8
%23 = OpCompositeConstruct %22 %15 %18
%25 = OpCompositeConstruct %24 %17 %20 %21 %17
OpImageWrite %19 %23 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/dispatch-rays-index.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform writeonly uimage2D UTex;

void main()
{
    imageStore(UTex, ivec2(uvec2(gl_LaunchIDEXT.x, gl_LaunchIDEXT.y)), uvec4(1u, 2u, 3u, 1u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %11
OpName %3 "main"
OpName %8 "UTex"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 BuiltIn LaunchIdKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 2D 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 3
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %5
%14 = OpConstant %5 0
%17 = OpConstant %5 1
%20 = OpConstant %5 2
%21 = OpConstant %5 3
%22 = OpTypeVector %5 2
%24 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%13 = OpAccessChain %12 %11 %14
%15 = OpLoad %5 %13
%16 = OpAccessChain %12 %11 %17
%18 = OpLoad %5 %16
%19 = OpLoad %6 %8
%23 = OpCompositeConstruct %22 %15 %18
%25 = OpCompositeConstruct %24 %17 %20 %21 %17
OpImageWrite %19 %23 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/dot2.frag
================================================
#version 460

layout(location = 0) in vec2 A;
layout(location = 0, component = 2) in vec2 B;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = dot(vec2(A.x, A.y), vec2(B.x, B.y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 0
OpDecorate %9 Component 2
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %5
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %8 %15
%21 = OpLoad %5 %20
%22 = OpAccessChain %12 %8 %18
%23 = OpLoad %5 %22
%25 = OpCompositeConstruct %6 %21 %23
%26 = OpCompositeConstruct %6 %16 %19
%24 = OpDot %5 %25 %26
OpStore %11 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/dot3.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 1) in vec3 B;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = dot(vec3(A.x, A.y, A.z), vec3(B.x, B.y, B.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %5
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %8 %15
%24 = OpLoad %5 %23
%25 = OpAccessChain %12 %8 %18
%26 = OpLoad %5 %25
%27 = OpAccessChain %12 %8 %21
%28 = OpLoad %5 %27
%30 = OpCompositeConstruct %6 %24 %26 %28
%31 = OpCompositeConstruct %6 %16 %19 %22
%29 = OpDot %5 %30 %31
OpStore %11 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/dot4.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = dot(vec4(A.x, A.y, A.z, A.w), vec4(B.x, B.y, B.z, B.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %5
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %9 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %12 %8 %15
%27 = OpLoad %5 %26
%28 = OpAccessChain %12 %8 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %12 %8 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %12 %8 %24
%33 = OpLoad %5 %32
%35 = OpCompositeConstruct %6 %27 %29 %31 %33
%36 = OpCompositeConstruct %6 %16 %19 %22 %25
%34 = OpDot %5 %35 %36
OpStore %11 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/eval-centroid.frag
================================================
#version 460

layout(location = 0) in vec2 TEXCOORD[2];
layout(location = 0, component = 2) in float UV;
layout(location = 0, component = 3) in float UV_1[2];
layout(location = 2) flat in uint A;
layout(location = 2, component = 1) flat in uint B;
layout(location = 0) out vec2 SV_Target;

void main()
{
    float _38 = interpolateAtCentroid(UV_1[A]) + interpolateAtCentroid(UV);
    SV_Target.x = (interpolateAtCentroid(TEXCOORD[B].x) + interpolateAtCentroid(TEXCOORD[A].x)) + _38;
    SV_Target.y = (interpolateAtCentroid(TEXCOORD[B].y) + interpolateAtCentroid(TEXCOORD[A].y)) + _38;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability InterpolationFunction
%26 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %13 %16 %18 %19 %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %13 "UV"
OpName %16 "UV_1"
OpName %18 "A"
OpName %19 "B"
OpName %21 "SV_Target"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 2
OpDecorate %16 Location 0
OpDecorate %16 Component 3
OpDecorate %18 Flat
OpDecorate %18 Location 2
OpDecorate %19 Flat
OpDecorate %19 Location 2
OpDecorate %19 Component 1
OpDecorate %21 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypeInt 32 0
%8 = OpConstant %7 2
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%14 = OpTypeArray %5 %8
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %7
%18 = OpVariable %17 Input
%19 = OpVariable %17 Input
%20 = OpTypePointer Output %6
%21 = OpVariable %20 Output
%25 = OpConstant %7 0
%29 = OpConstant %7 1
%43 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%22 = OpLoad %7 %19
%23 = OpLoad %7 %18
%24 = OpAccessChain %12 %11 %23 %25
%27 = OpExtInst %5 %26 InterpolateAtCentroid %24
%28 = OpAccessChain %12 %11 %23 %29
%30 = OpExtInst %5 %26 InterpolateAtCentroid %28
%31 = OpAccessChain %12 %11 %22 %25
%32 = OpExtInst %5 %26 InterpolateAtCentroid %31
%33 = OpAccessChain %12 %11 %22 %29
%34 = OpExtInst %5 %26 InterpolateAtCentroid %33
%35 = OpExtInst %5 %26 InterpolateAtCentroid %13
%36 = OpAccessChain %12 %16 %23
%37 = OpExtInst %5 %26 InterpolateAtCentroid %36
%38 = OpFAdd %5 %37 %35
%39 = OpFAdd %5 %32 %27
%40 = OpFAdd %5 %39 %38
%41 = OpFAdd %5 %34 %30
%42 = OpFAdd %5 %41 %38
%44 = OpAccessChain %43 %21 %25
OpStore %44 %40
%45 = OpAccessChain %43 %21 %29
OpStore %45 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/eval-sample-index.frag
================================================
#version 460

layout(location = 0) in vec2 TEXCOORD;
layout(location = 1) flat in int SAMPLE;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _16 = uint(SAMPLE);
    SV_Target.x = interpolateAtSample(TEXCOORD.x, _16) + interpolateAtSample(TEXCOORD.x, 1u);
    SV_Target.y = interpolateAtSample(TEXCOORD.y, _16) + interpolateAtSample(TEXCOORD.y, 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability InterpolationFunction
%20 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11 %13
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SAMPLE"
OpName %13 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Flat
OpDecorate %11 Location 1
OpDecorate %13 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeInt 32 1
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Output %6
%13 = OpVariable %12 Output
%15 = OpTypeInt 32 0
%17 = OpTypePointer Input %5
%19 = OpConstant %15 0
%21 = OpConstant %15 1
%31 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%14 = OpLoad %9 %11
%16 = OpBitcast %15 %14
%18 = OpAccessChain %17 %8 %19
%22 = OpExtInst %5 %20 InterpolateAtSample %18 %21
%23 = OpAccessChain %17 %8 %21
%24 = OpExtInst %5 %20 InterpolateAtSample %23 %21
%25 = OpAccessChain %17 %8 %19
%26 = OpExtInst %5 %20 InterpolateAtSample %25 %16
%27 = OpAccessChain %17 %8 %21
%28 = OpExtInst %5 %20 InterpolateAtSample %27 %16
%29 = OpFAdd %5 %26 %22
%30 = OpFAdd %5 %28 %24
%32 = OpAccessChain %31 %13 %19
OpStore %32 %29
%33 = OpAccessChain %31 %13 %21
OpStore %33 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/eval-snapped.frag
================================================
#version 460

layout(location = 0) in vec2 TEXCOORD;
layout(location = 1) flat in ivec2 CODE;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _20 = uint(CODE.x);
    uint _24 = uint(CODE.y);
    SV_Target.x = interpolateAtOffset(TEXCOORD.x, vec2(float(int(uint(bitfieldExtract(int(_20), int(0u), int(4u))))) * 0.0625, float(int(uint(bitfieldExtract(int(_24), int(0u), int(4u))))) * 0.0625)) + interpolateAtOffset(TEXCOORD.x, vec2(-0.4375, 0.25));
    SV_Target.y = interpolateAtOffset(TEXCOORD.y, vec2(float(int(uint(bitfieldExtract(int(_20), int(0u), int(4u))))) * 0.0625, float(int(uint(bitfieldExtract(int(_24), int(0u), int(4u))))) * 0.0625)) + interpolateAtOffset(TEXCOORD.y, vec2(-0.4375, 0.25));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability InterpolationFunction
%27 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %12 %14
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %12 "CODE"
OpName %14 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %12 Flat
OpDecorate %12 Location 1
OpDecorate %14 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeInt 32 1
%10 = OpTypeVector %9 2
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpTypePointer Input %9
%17 = OpTypeInt 32 0
%18 = OpConstant %17 0
%22 = OpConstant %17 1
%25 = OpTypePointer Input %5
%28 = OpConstant %5 -0.4375
%29 = OpConstant %5 0.25
%30 = OpConstantComposite %6 %28 %29
%36 = OpConstant %17 4
%39 = OpConstant %5 0.0625
%56 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%16 = OpAccessChain %15 %12 %18
%19 = OpLoad %9 %16
%20 = OpBitcast %17 %19
%21 = OpAccessChain %15 %12 %22
%23 = OpLoad %9 %21
%24 = OpBitcast %17 %23
%26 = OpAccessChain %25 %8 %18
%31 = OpExtInst %5 %27 InterpolateAtOffset %26 %30
%32 = OpAccessChain %25 %8 %22
%33 = OpExtInst %5 %27 InterpolateAtOffset %32 %30
%34 = OpAccessChain %25 %8 %18
%35 = OpBitFieldSExtract %17 %20 %18 %36
%37 = OpConvertSToF %5 %35
%38 = OpFMul %5 %37 %39
%40 = OpBitFieldSExtract %17 %24 %18 %36
%41 = OpConvertSToF %5 %40
%42 = OpFMul %5 %41 %39
%43 = OpCompositeConstruct %6 %38 %42
%44 = OpExtInst %5 %27 InterpolateAtOffset %34 %43
%45 = OpAccessChain %25 %8 %22
%46 = OpBitFieldSExtract %17 %20 %18 %36
%47 = OpConvertSToF %5 %46
%48 = OpFMul %5 %47 %39
%49 = OpBitFieldSExtract %17 %24 %18 %36
%50 = OpConvertSToF %5 %49
%51 = OpFMul %5 %50 %39
%52 = OpCompositeConstruct %6 %48 %51
%53 = OpExtInst %5 %27 InterpolateAtOffset %45 %52
%54 = OpFAdd %5 %44 %31
%55 = OpFAdd %5 %53 %33
%57 = OpAccessChain %56 %14 %18
OpStore %57 %54
%58 = OpAccessChain %56 %14 %22
OpStore %58 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/exp.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = exp2(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Exp2 %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/f16-to-f32.frag
================================================
#version 460

layout(location = 0) flat in uint A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = unpackHalf2x16(A).x;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 18
; Schema: 0
OpCapability Shader
%12 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%13 = OpTypeVector %8 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %16
%16 = OpLabel
%11 = OpLoad %5 %7
%14 = OpExtInst %13 %12 UnpackHalf2x16 %11
%15 = OpCompositeExtract %8 %14 0
OpStore %10 %15
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/f32-to-f16.frag
================================================
#version 460

layout(location = 0) flat in float A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = packHalf2x16(vec2(A, 0.0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 19
; Schema: 0
OpCapability Shader
%12 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeInt 32 0
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%14 = OpConstant %5 0
%15 = OpTypeVector %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %17
%17 = OpLabel
%11 = OpLoad %5 %7
%16 = OpCompositeConstruct %15 %11 %14
%13 = OpExtInst %8 %12 PackHalf2x16 %16
OpStore %10 %13
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/fabs.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = abs(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 FAbs %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbithi-16.sm62.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) flat in mediump uint A;
layout(location = 0) out uint SV_Target;

void main()
{
    uint16_t _12 = uint16_t(A);
    uint _15 = uint(findMSB(uint(_12)));
    uint _21 = (_15 == 4294967295u) ? 4294967295u : (15u - _15);
    SV_Target = uint(findMSB(uint(_12)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability Int16
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 RelaxedPrecision
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%11 = OpTypeInt 16 0
%16 = OpTypeBool
%18 = OpConstant %5 4294967295
%20 = OpConstant %5 15
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%10 = OpLoad %5 %7
%12 = OpUConvert %11 %10
%14 = OpUConvert %5 %12
%15 = OpExtInst %5 %13 FindUMsb %14
%17 = OpIEqual %16 %15 %18
%19 = OpISub %5 %20 %15
%21 = OpSelect %5 %17 %18 %19
%22 = OpISub %5 %20 %21
%23 = OpIEqual %16 %21 %18
%24 = OpUConvert %5 %12
%25 = OpExtInst %5 %13 FindUMsb %24
OpStore %9 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbithi-64.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif

layout(location = 0) flat in uint A;
layout(location = 0, component = 1) flat in uint B;
layout(location = 0) out uint SV_Target;

void main()
{
    uint64_t _18 = uint64_t(A) | (uint64_t(B) << 32ul);
    uvec2 _22 = uvec2(findMSB(unpackUint2x32(_18)));
    uint _27 = uint(max(int(_22.x), int(_22.y | 32u)));
    uint _33 = (_27 == 4294967295u) ? 4294967295u : (63u - _27);
    uvec2 _37 = uvec2(findMSB(unpackUint2x32(_18)));
    SV_Target = uint(max(int(_37.x), int(_37.y | 32u)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpCapability Int64
%19 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %8 "B"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %8 Component 1
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%13 = OpTypeInt 64 0
%17 = OpConstant %13 32
%20 = OpTypeVector %5 2
%26 = OpConstant %5 32
%28 = OpTypeBool
%30 = OpConstant %5 4294967295
%32 = OpConstant %5 63
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%11 = OpLoad %5 %8
%12 = OpLoad %5 %7
%14 = OpUConvert %13 %12
%15 = OpUConvert %13 %11
%16 = OpShiftLeftLogical %13 %15 %17
%18 = OpBitwiseOr %13 %14 %16
%21 = OpBitcast %20 %18
%22 = OpExtInst %20 %19 FindUMsb %21
%23 = OpCompositeExtract %5 %22 0
%24 = OpCompositeExtract %5 %22 1
%25 = OpBitwiseOr %5 %24 %26
%27 = OpExtInst %5 %19 SMax %23 %25
%29 = OpIEqual %28 %27 %30
%31 = OpISub %5 %32 %27
%33 = OpSelect %5 %29 %30 %31
%34 = OpISub %5 %32 %33
%35 = OpIEqual %28 %33 %30
%36 = OpBitcast %20 %18
%37 = OpExtInst %20 %19 FindUMsb %36
%38 = OpCompositeExtract %5 %37 0
%39 = OpCompositeExtract %5 %37 1
%40 = OpBitwiseOr %5 %39 %26
%41 = OpExtInst %5 %19 SMax %38 %40
OpStore %10 %41
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbithi.frag
================================================
#version 460

layout(location = 0) flat in uint A;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _12 = uint(findMSB(A));
    uint _18 = (_12 == 4294967295u) ? 4294967295u : (31u - _12);
    SV_Target = uint(findMSB(A));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 24
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%13 = OpTypeBool
%15 = OpConstant %5 4294967295
%17 = OpConstant %5 31
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %22
%22 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 FindUMsb %10
%14 = OpIEqual %13 %12 %15
%16 = OpISub %5 %17 %12
%18 = OpSelect %5 %14 %15 %16
%19 = OpISub %5 %17 %18
%20 = OpIEqual %13 %18 %15
%21 = OpExtInst %5 %11 FindUMsb %10
OpStore %9 %21
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbitlo-16.sm62.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) flat in mediump uint A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(findLSB(uint(uint16_t(A))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 18
; Schema: 0
OpCapability Shader
OpCapability Int16
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 RelaxedPrecision
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%11 = OpTypeInt 16 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %16
%16 = OpLabel
%10 = OpLoad %5 %7
%12 = OpUConvert %11 %10
%14 = OpUConvert %5 %12
%15 = OpExtInst %5 %13 FindILsb %14
OpStore %9 %15
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbitlo-64.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif

layout(location = 0) flat in uint A;
layout(location = 0, component = 1) flat in uint B;
layout(location = 0) out uint SV_Target;

void main()
{
    uvec2 _22 = uvec2(findLSB(unpackUint2x32(uint64_t(A) | (uint64_t(B) << 32ul))));
    SV_Target = min(_22.x, (_22.y | 32u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpCapability Int64
%19 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %8 "B"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %8 Component 1
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%13 = OpTypeInt 64 0
%17 = OpConstant %13 32
%20 = OpTypeVector %5 2
%26 = OpConstant %5 32
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%11 = OpLoad %5 %8
%12 = OpLoad %5 %7
%14 = OpUConvert %13 %12
%15 = OpUConvert %13 %11
%16 = OpShiftLeftLogical %13 %15 %17
%18 = OpBitwiseOr %13 %14 %16
%21 = OpBitcast %20 %18
%22 = OpExtInst %20 %19 FindILsb %21
%23 = OpCompositeExtract %5 %22 0
%24 = OpCompositeExtract %5 %22 1
%25 = OpBitwiseOr %5 %24 %26
%27 = OpExtInst %5 %19 UMin %23 %25
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbitlo.frag
================================================
#version 460

layout(location = 0) flat in uint A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(findLSB(A));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 FindILsb %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbitshi-16.sm62.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) flat in mediump int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint16_t _12 = uint16_t(A);
    uint _16 = uint(findMSB(int(uint(int16_t(_12)))));
    uint _22 = (_16 == 4294967295u) ? 4294967295u : (15u - _16);
    SV_Target = int(uint(findMSB(int(uint(int16_t(_12))))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpCapability Int16
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 RelaxedPrecision
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%11 = OpTypeInt 16 0
%14 = OpTypeInt 32 0
%17 = OpTypeBool
%19 = OpConstant %14 4294967295
%21 = OpConstant %14 15
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%10 = OpLoad %5 %7
%12 = OpSConvert %11 %10
%15 = OpSConvert %14 %12
%16 = OpExtInst %14 %13 FindSMsb %15
%18 = OpIEqual %17 %16 %19
%20 = OpISub %14 %21 %16
%22 = OpSelect %14 %18 %19 %20
%23 = OpISub %14 %21 %22
%24 = OpIEqual %17 %22 %19
%25 = OpSConvert %14 %12
%26 = OpExtInst %14 %13 FindSMsb %25
%27 = OpBitcast %5 %26
OpStore %9 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbitshi-64.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif

layout(location = 0) flat in uint A;
layout(location = 0, component = 1) flat in uint B;
layout(location = 0) out int SV_Target;

void main()
{
    uint64_t _19 = (uint64_t(B) << 32ul) | uint64_t(A);
    uvec2 _22 = unpackUint2x32(_19);
    uvec2 _29 = uvec2(findMSB(_22 ^ uvec2(ivec2(int(_22.y) >> 31))));
    uint _34 = uint(max(int(_29.x), int(_29.y | 32u)));
    uint _40 = (_34 == 4294967295u) ? 4294967295u : (63u - _34);
    uvec2 _43 = unpackUint2x32(_19);
    uvec2 _48 = uvec2(findMSB(_43 ^ uvec2(ivec2(int(_43.y) >> 31))));
    SV_Target = int(uint(max(int(_48.x), int(_48.y | 32u))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability Int64
%20 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %8 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %8 "B"
OpName %11 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %8 Component 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypeInt 32 1
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%14 = OpTypeInt 64 0
%17 = OpConstant %14 32
%21 = OpTypeVector %5 2
%25 = OpConstant %9 31
%27 = OpTypeVector %9 2
%33 = OpConstant %5 32
%35 = OpTypeBool
%37 = OpConstant %5 4294967295
%39 = OpConstant %5 63
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%12 = OpLoad %5 %8
%13 = OpLoad %5 %7
%15 = OpUConvert %14 %12
%16 = OpShiftLeftLogical %14 %15 %17
%18 = OpUConvert %14 %13
%19 = OpBitwiseOr %14 %16 %18
%22 = OpBitcast %21 %19
%23 = OpCompositeExtract %5 %22 1
%24 = OpShiftRightArithmetic %9 %23 %25
%28 = OpCompositeConstruct %27 %24 %24
%26 = OpBitwiseXor %21 %22 %28
%29 = OpExtInst %21 %20 FindUMsb %26
%30 = OpCompositeExtract %5 %29 0
%31 = OpCompositeExtract %5 %29 1
%32 = OpBitwiseOr %5 %31 %33
%34 = OpExtInst %5 %20 SMax %30 %32
%36 = OpIEqual %35 %34 %37
%38 = OpISub %5 %39 %34
%40 = OpSelect %5 %36 %37 %38
%41 = OpISub %5 %39 %40
%42 = OpIEqual %35 %40 %37
%43 = OpBitcast %21 %19
%44 = OpCompositeExtract %5 %43 1
%45 = OpShiftRightArithmetic %9 %44 %25
%47 = OpCompositeConstruct %27 %45 %45
%46 = OpBitwiseXor %21 %43 %47
%48 = OpExtInst %21 %20 FindUMsb %46
%49 = OpCompositeExtract %5 %48 0
%50 = OpCompositeExtract %5 %48 1
%51 = OpBitwiseOr %5 %50 %33
%52 = OpExtInst %5 %20 SMax %49 %51
%53 = OpBitcast %9 %52
OpStore %11 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/firstbitshi.frag
================================================
#version 460

layout(location = 0) flat in int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint _12 = uint(A);
    uint _14 = uint(findMSB(int(_12)));
    uint _20 = (_14 == 4294967295u) ? 4294967295u : (31u - _14);
    SV_Target = int(uint(findMSB(int(_12))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
%13 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%11 = OpTypeInt 32 0
%15 = OpTypeBool
%17 = OpConstant %11 4294967295
%19 = OpConstant %11 31
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%10 = OpLoad %5 %7
%12 = OpBitcast %11 %10
%14 = OpExtInst %11 %13 FindSMsb %12
%16 = OpIEqual %15 %14 %17
%18 = OpISub %11 %19 %14
%20 = OpSelect %11 %16 %17 %18
%21 = OpISub %11 %19 %20
%22 = OpIEqual %15 %20 %17
%23 = OpExtInst %11 %13 FindSMsb %12
%24 = OpBitcast %5 %23
OpStore %9 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/flattened_thread_id_in_group.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 2, local_size_z = 2) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_LocalInvocationIndex), uvec4(floatBitsToUint(10.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 20
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %11
OpExecutionMode %3 LocalSize 2 2 2
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%13 = OpTypeFloat 32
%14 = OpConstant %13 10
%16 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %18
%18 = OpLabel
%9 = OpLoad %6 %8
%12 = OpLoad %5 %11
%15 = OpBitcast %5 %14
%17 = OpCompositeConstruct %16 %15 %15 %15 %15
OpImageWrite %9 %12 %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/fma.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = float(fma(double(A.x), double(A.y), double(A.z)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 31
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
%26 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%22 = OpTypeFloat 64
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %29
%29 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%23 = OpFConvert %22 %15
%24 = OpFConvert %22 %18
%25 = OpFConvert %22 %21
%27 = OpExtInst %22 %26 Fma %23 %24 %25
%28 = OpFConvert %5 %27
OpStore %10 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/fmad-precise.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 0) out float SV_Target;

void main()
{
    precise float _22 = A.x * A.y;
    precise float _23 = _22 + A.z;
    SV_Target = _23;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
OpDecorate %22 NoContraction
OpDecorate %23 NoContraction
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpFMul %5 %15 %18
%23 = OpFAdd %5 %22 %21
OpStore %10 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/fmad.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = fma(A.x, A.y, A.z);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
%22 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%23 = OpExtInst %5 %22 Fma %15 %18 %21
OpStore %10 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/fmax.frag
================================================
#version 460
#extension GL_EXT_spirv_intrinsics : require

layout(location = 0) in vec2 A;
layout(location = 0) out float SV_Target;

spirv_instruction(set = "GLSL.std.450", id = 80) float spvNMax(float, float);
spirv_instruction(set = "GLSL.std.450", id = 80) vec2 spvNMax(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 80) vec3 spvNMax(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 80) vec4 spvNMax(vec4, vec4);

void main()
{
    SV_Target = spvNMax(A.x, A.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
OpCapability Shader
%19 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %21
%21 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%20 = OpExtInst %5 %19 NMax %15 %18
OpStore %10 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/fmin.frag
================================================
#version 460
#extension GL_EXT_spirv_intrinsics : require

layout(location = 0) in vec2 A;
layout(location = 0) out float SV_Target;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);

void main()
{
    SV_Target = spvNMin(A.x, A.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
OpCapability Shader
%19 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %21
%21 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%20 = OpExtInst %5 %19 NMin %15 %18
OpStore %10 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/frc.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = fract(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Fract %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/get-dimensions-w-only.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture1D _8;
layout(set = 0, binding = 1) uniform texture1DArray _11;
layout(set = 0, binding = 2) uniform texture2D _14;
layout(set = 0, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 4) uniform texture2DMS _20;
layout(set = 0, binding = 5) uniform texture2DMSArray _23;
layout(set = 0, binding = 6) uniform texture3D _26;
layout(set = 0, binding = 7) uniform textureCube _29;
layout(set = 0, binding = 8) uniform textureCubeArray _32;

layout(location = 0) flat in uint LEVEL;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (((((((uint(textureQueryLevels(_11)) + uint(textureQueryLevels(_8))) + uint(textureQueryLevels(_14))) + uint(textureQueryLevels(_17))) + uint(textureSamples(_20))) + uint(textureSamples(_23))) + uint(textureQueryLevels(_26))) + uint(textureQueryLevels(_29))) + uint(textureQueryLevels(_32));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 69
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %35 %37
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %35 "LEVEL"
OpName %37 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 7
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 8
OpDecorate %35 Flat
OpDecorate %35 Location 0
OpDecorate %37 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 2D 0 0 1 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 1 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 3D 0 0 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeInt 32 0
%34 = OpTypePointer Input %33
%35 = OpVariable %34 Input
%36 = OpTypePointer Output %33
%37 = OpVariable %36 Output
%49 = OpTypeVector %33 2
%54 = OpTypeVector %33 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %67
%67 = OpLabel
%38 = OpLoad %30 %32
%39 = OpLoad %27 %29
%40 = OpLoad %24 %26
%41 = OpLoad %21 %23
%42 = OpLoad %18 %20
%43 = OpLoad %15 %17
%44 = OpLoad %12 %14
%45 = OpLoad %9 %11
%46 = OpLoad %6 %8
%47 = OpLoad %33 %35
%48 = OpImageQueryLevels %33 %46
%50 = OpImageQueryLevels %33 %45
%51 = OpIAdd %33 %50 %48
%52 = OpImageQueryLevels %33 %44
%53 = OpIAdd %33 %51 %52
%55 = OpImageQueryLevels %33 %43
%56 = OpIAdd %33 %53 %55
%57 = OpImageQuerySamples %33 %42
%58 = OpIAdd %33 %56 %57
%59 = OpImageQuerySamples %33 %41
%60 = OpIAdd %33 %58 %59
%61 = OpImageQueryLevels %33 %40
%62 = OpIAdd %33 %60 %61
%63 = OpImageQueryLevels %33 %39
%64 = OpIAdd %33 %62 %63
%65 = OpImageQueryLevels %33 %38
%66 = OpIAdd %33 %64 %65
OpStore %37 %66
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/get-dimensions-xyz-only.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture1D _8;
layout(set = 0, binding = 1) uniform texture1DArray _11;
layout(set = 0, binding = 2) uniform texture2D _14;
layout(set = 0, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 4) uniform texture2DMS _20;
layout(set = 0, binding = 5) uniform texture2DMSArray _23;
layout(set = 0, binding = 6) uniform texture3D _26;
layout(set = 0, binding = 7) uniform textureCube _29;
layout(set = 0, binding = 8) uniform textureCubeArray _32;
layout(set = 0, binding = 9) uniform samplerBuffer _35;
layout(set = 0, binding = 10) uniform usamplerBuffer _39;
layout(set = 0, binding = 11) uniform usamplerBuffer _40;
layout(set = 0, binding = 0) uniform readonly writeonly image1D _43;
layout(set = 0, binding = 1) uniform readonly writeonly image1DArray _46;
layout(set = 0, binding = 2) uniform readonly writeonly image2D _49;
layout(set = 0, binding = 3) uniform readonly writeonly image2DArray _52;
layout(set = 0, binding = 6) uniform readonly writeonly image3D _55;
layout(set = 0, binding = 9) uniform readonly writeonly imageBuffer _58;
layout(set = 0, binding = 10, r32ui) uniform readonly writeonly uimageBuffer _61;
layout(set = 0, binding = 11, r32ui) uniform readonly writeonly uimageBuffer _62;

layout(location = 0) flat in uint LEVEL;
layout(location = 0) out uint SV_Target;

void main()
{
    uvec2 _100 = mix(uvec2(0u), uvec2(textureSize(_11, int(LEVEL))), bvec2(LEVEL < uint(textureQueryLevels(_11))));
    uvec2 _108 = mix(uvec2(0u), uvec2(textureSize(_14, int(LEVEL))), bvec2(LEVEL < uint(textureQueryLevels(_14))));
    uvec3 _118 = mix(uvec3(0u), uvec3(textureSize(_17, int(LEVEL))), bvec3(LEVEL < uint(textureQueryLevels(_17))));
    uvec2 _123 = uvec2(textureSize(_20));
    uvec3 _126 = uvec3(textureSize(_23));
    uvec3 _134 = mix(uvec3(0u), uvec3(textureSize(_26, int(LEVEL))), bvec3(LEVEL < uint(textureQueryLevels(_26))));
    uvec2 _143 = mix(uvec2(0u), uvec2(textureSize(_29, int(LEVEL))), bvec2(LEVEL < uint(textureQueryLevels(_29))));
    uvec3 _151 = mix(uvec3(0u), uvec3(textureSize(_32, int(LEVEL))), bvec3(LEVEL < uint(textureQueryLevels(_32))));
    uint _156 = uint(textureSize(_35));
    uint _158 = uint(imageSize(_43));
    uvec2 _160 = uvec2(imageSize(_46));
    uvec2 _163 = uvec2(imageSize(_49));
    uvec3 _166 = uvec3(imageSize(_52));
    uvec3 _170 = uvec3(imageSize(_55));
    uint _174 = uint(imageSize(_58));
    uint _177 = uint(textureSize(_39)) / 4u;
    uint _181 = uint(imageSize(_61)) / 4u;
    uint _184 = uint(textureSize(_40)) * 4u;
    uint _187 = uint(imageSize(_62)) * 4u;
    uint _215 = (((((((((((((((((((((((((((LEVEL < uint(textureQueryLevels(_8))) ? uint(textureSize(_8, int(LEVEL))) : 0u) + 32u) + _100.y) + _100.x) + _108.y) + _108.x) + _118.y) + _118.x) + _118.z) + _123.y) + _123.x) + _126.y) + _126.x) + _126.z) + _134.y) + _134.x) + _134.z) + _143.y) + _143.x) + _151.y) + _151.x) + _151.z) + (_156 * _156)) + (_158 * _158)) + _160.y) + _160.x) + _163.y;
    SV_Target = (((((((((((_215 + _163.x) + _166.y) + _166.x) + _166.z) + _170.y) + _170.x) + _170.z) + (_174 * _174)) + (_177 * _177)) + (_181 * _181)) + (_184 * _184)) + (_187 * _187);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 230
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability Image1D
OpCapability SampledCubeArray
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %64 %66
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %64 "LEVEL"
OpName %66 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 7
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 8
OpDecorate %35 DescriptorSet 0
OpDecorate %35 Binding 9
OpDecorate %39 DescriptorSet 0
OpDecorate %39 Binding 10
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 11
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 0
OpDecorate %43 NonReadable
OpDecorate %43 NonWritable
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 1
OpDecorate %46 NonReadable
OpDecorate %46 NonWritable
OpDecorate %49 DescriptorSet 0
OpDecorate %49 Binding 2
OpDecorate %49 NonReadable
OpDecorate %49 NonWritable
OpDecorate %52 DescriptorSet 0
OpDecorate %52 Binding 3
OpDecorate %52 NonReadable
OpDecorate %52 NonWritable
OpDecorate %55 DescriptorSet 0
OpDecorate %55 Binding 6
OpDecorate %55 NonReadable
OpDecorate %55 NonWritable
OpDecorate %58 DescriptorSet 0
OpDecorate %58 Binding 9
OpDecorate %58 NonReadable
OpDecorate %58 NonWritable
OpDecorate %61 DescriptorSet 0
OpDecorate %61 Binding 10
OpDecorate %61 NonReadable
OpDecorate %61 NonWritable
OpDecorate %62 DescriptorSet 0
OpDecorate %62 Binding 11
OpDecorate %62 NonReadable
OpDecorate %62 NonWritable
OpDecorate %64 Flat
OpDecorate %64 Location 0
OpDecorate %66 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 2D 0 0 1 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 1 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 3D 0 0 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%34 = OpTypePointer UniformConstant %33
%35 = OpVariable %34 UniformConstant
%36 = OpTypeInt 32 0
%37 = OpTypeImage %36 Buffer 0 0 0 1 Unknown
%38 = OpTypePointer UniformConstant %37
%39 = OpVariable %38 UniformConstant
%40 = OpVariable %38 UniformConstant
%41 = OpTypeImage %5 1D 0 0 0 2 Unknown
%42 = OpTypePointer UniformConstant %41
%43 = OpVariable %42 UniformConstant
%44 = OpTypeImage %5 1D 0 1 0 2 Unknown
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeImage %5 2D 0 0 0 2 Unknown
%48 = OpTypePointer UniformConstant %47
%49 = OpVariable %48 UniformConstant
%50 = OpTypeImage %5 2D 0 1 0 2 Unknown
%51 = OpTypePointer UniformConstant %50
%52 = OpVariable %51 UniformConstant
%53 = OpTypeImage %5 3D 0 0 0 2 Unknown
%54 = OpTypePointer UniformConstant %53
%55 = OpVariable %54 UniformConstant
%56 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%57 = OpTypePointer UniformConstant %56
%58 = OpVariable %57 UniformConstant
%59 = OpTypeImage %36 Buffer 0 0 0 2 R32ui
%60 = OpTypePointer UniformConstant %59
%61 = OpVariable %60 UniformConstant
%62 = OpVariable %60 UniformConstant
%63 = OpTypePointer Input %36
%64 = OpVariable %63 Input
%65 = OpTypePointer Output %36
%66 = OpVariable %65 Output
%90 = OpTypeBool
%93 = OpConstantNull %36
%94 = OpTypeVector %36 2
%98 = OpTypeVector %90 2
%101 = OpConstantNull %94
%109 = OpConstantNull %94
%112 = OpTypeVector %36 3
%116 = OpTypeVector %90 3
%119 = OpConstantNull %112
%135 = OpConstantNull %112
%144 = OpConstantNull %94
%152 = OpConstantNull %112
%178 = OpConstant %36 4
%190 = OpConstant %36 32
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %228
%228 = OpLabel
%67 = OpLoad %59 %62
%68 = OpLoad %59 %61
%69 = OpLoad %56 %58
%70 = OpLoad %53 %55
%71 = OpLoad %50 %52
%72 = OpLoad %47 %49
%73 = OpLoad %44 %46
%74 = OpLoad %41 %43
%75 = OpLoad %37 %40
%76 = OpLoad %37 %39
%77 = OpLoad %33 %35
%78 = OpLoad %30 %32
%79 = OpLoad %27 %29
%80 = OpLoad %24 %26
%81 = OpLoad %21 %23
%82 = OpLoad %18 %20
%83 = OpLoad %15 %17
%84 = OpLoad %12 %14
%85 = OpLoad %9 %11
%86 = OpLoad %6 %8
%87 = OpLoad %36 %64
%88 = OpImageQuerySizeLod %36 %86 %87
%89 = OpImageQueryLevels %36 %86
%91 = OpULessThan %90 %87 %89
%92 = OpSelect %36 %91 %88 %93
%95 = OpImageQuerySizeLod %94 %85 %87
%96 = OpImageQueryLevels %36 %85
%97 = OpULessThan %90 %87 %96
%99 = OpCompositeConstruct %98 %97 %97
%100 = OpSelect %94 %99 %95 %101
%102 = OpCompositeExtract %36 %100 0
%103 = OpCompositeExtract %36 %100 1
%104 = OpImageQuerySizeLod %94 %84 %87
%105 = OpImageQueryLevels %36 %84
%106 = OpULessThan %90 %87 %105
%107 = OpCompositeConstruct %98 %106 %106
%108 = OpSelect %94 %107 %104 %109
%110 = OpCompositeExtract %36 %108 0
%111 = OpCompositeExtract %36 %108 1
%113 = OpImageQuerySizeLod %112 %83 %87
%114 = OpImageQueryLevels %36 %83
%115 = OpULessThan %90 %87 %114
%117 = OpCompositeConstruct %116 %115 %115 %115
%118 = OpSelect %112 %117 %113 %119
%120 = OpCompositeExtract %36 %118 0
%121 = OpCompositeExtract %36 %118 1
%122 = OpCompositeExtract %36 %118 2
%123 = OpImageQuerySize %94 %82
%124 = OpCompositeExtract %36 %123 0
%125 = OpCompositeExtract %36 %123 1
%126 = OpImageQuerySize %112 %81
%127 = OpCompositeExtract %36 %126 0
%128 = OpCompositeExtract %36 %126 1
%129 = OpCompositeExtract %36 %126 2
%130 = OpImageQuerySizeLod %112 %80 %87
%131 = OpImageQueryLevels %36 %80
%132 = OpULessThan %90 %87 %131
%133 = OpCompositeConstruct %116 %132 %132 %132
%134 = OpSelect %112 %133 %130 %135
%136 = OpCompositeExtract %36 %134 0
%137 = OpCompositeExtract %36 %134 1
%138 = OpCompositeExtract %36 %134 2
%139 = OpImageQuerySizeLod %94 %79 %87
%140 = OpImageQueryLevels %36 %79
%141 = OpULessThan %90 %87 %140
%142 = OpCompositeConstruct %98 %141 %141
%143 = OpSelect %94 %142 %139 %144
%145 = OpCompositeExtract %36 %143 0
%146 = OpCompositeExtract %36 %143 1
%147 = OpImageQuerySizeLod %112 %78 %87
%148 = OpImageQueryLevels %36 %78
%149 = OpULessThan %90 %87 %148
%150 = OpCompositeConstruct %116 %149 %149 %149
%151 = OpSelect %112 %150 %147 %152
%153 = OpCompositeExtract %36 %151 0
%154 = OpCompositeExtract %36 %151 1
%155 = OpCompositeExtract %36 %151 2
%156 = OpImageQuerySize %36 %77
%157 = OpIMul %36 %156 %156
%158 = OpImageQuerySize %36 %74
%159 = OpIMul %36 %158 %158
%160 = OpImageQuerySize %94 %73
%161 = OpCompositeExtract %36 %160 0
%162 = OpCompositeExtract %36 %160 1
%163 = OpImageQuerySize %94 %72
%164 = OpCompositeExtract %36 %163 0
%165 = OpCompositeExtract %36 %163 1
%166 = OpImageQuerySize %112 %71
%167 = OpCompositeExtract %36 %166 0
%168 = OpCompositeExtract %36 %166 1
%169 = OpCompositeExtract %36 %166 2
%170 = OpImageQuerySize %112 %70
%171 = OpCompositeExtract %36 %170 0
%172 = OpCompositeExtract %36 %170 1
%173 = OpCompositeExtract %36 %170 2
%174 = OpImageQuerySize %36 %69
%175 = OpIMul %36 %174 %174
%176 = OpImageQuerySize %36 %76
%177 = OpUDiv %36 %176 %178
%179 = OpIMul %36 %177 %177
%180 = OpImageQuerySize %36 %68
%181 = OpUDiv %36 %180 %178
%182 = OpIMul %36 %181 %181
%183 = OpImageQuerySize %36 %75
%184 = OpIMul %36 %183 %178
%185 = OpIMul %36 %184 %184
%186 = OpImageQuerySize %36 %67
%187 = OpIMul %36 %186 %178
%188 = OpIMul %36 %187 %187
%189 = OpIAdd %36 %92 %190
%191 = OpIAdd %36 %189 %103
%192 = OpIAdd %36 %191 %102
%193 = OpIAdd %36 %192 %111
%194 = OpIAdd %36 %193 %110
%195 = OpIAdd %36 %194 %121
%196 = OpIAdd %36 %195 %120
%197 = OpIAdd %36 %196 %122
%198 = OpIAdd %36 %197 %125
%199 = OpIAdd %36 %198 %124
%200 = OpIAdd %36 %199 %128
%201 = OpIAdd %36 %200 %127
%202 = OpIAdd %36 %201 %129
%203 = OpIAdd %36 %202 %137
%204 = OpIAdd %36 %203 %136
%205 = OpIAdd %36 %204 %138
%206 = OpIAdd %36 %205 %146
%207 = OpIAdd %36 %206 %145
%208 = OpIAdd %36 %207 %154
%209 = OpIAdd %36 %208 %153
%210 = OpIAdd %36 %209 %155
%211 = OpIAdd %36 %210 %157
%212 = OpIAdd %36 %211 %159
%213 = OpIAdd %36 %212 %162
%214 = OpIAdd %36 %213 %161
%215 = OpIAdd %36 %214 %165
%216 = OpIAdd %36 %215 %164
%217 = OpIAdd %36 %216 %168
%218 = OpIAdd %36 %217 %167
%219 = OpIAdd %36 %218 %169
%220 = OpIAdd %36 %219 %172
%221 = OpIAdd %36 %220 %171
%222 = OpIAdd %36 %221 %173
%223 = OpIAdd %36 %222 %175
%224 = OpIAdd %36 %223 %179
%225 = OpIAdd %36 %224 %182
%226 = OpIAdd %36 %225 %185
%227 = OpIAdd %36 %226 %188
OpStore %66 %227
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/get-dimensions.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

uint _67;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 1, binding = 0) uniform usamplerBuffer _21[];
layout(set = 1, binding = 0) uniform usamplerBuffer _24[];
layout(set = 3, binding = 0) uniform readonly writeonly image2D _28[];
layout(set = 4, binding = 0) uniform readonly writeonly imageBuffer _32[];
layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _36[];
layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _39[];

layout(location = 0) flat in uint LEVEL;
layout(location = 0, component = 1) flat in uint INDEX;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _54 = registers._m0 + (INDEX + 0u);
    uint _58 = uint(textureQueryLevels(_13[nonuniformEXT(_54)]));
    uvec4 _66 = uvec4(mix(uvec2(0u), uvec2(textureSize(_13[nonuniformEXT(_54)], int(LEVEL))), bvec2(LEVEL < _58)), _67, _58);
    uint _79 = uint(textureSize(_17[nonuniformEXT(registers._m1 + (INDEX + 0u))]));
    uvec2 _89 = uvec2(imageSize(_28[nonuniformEXT(registers._m3 + (INDEX + 0u))]));
    uint _100 = uint(imageSize(_32[nonuniformEXT(registers._m4 + (INDEX + 0u))]));
    uint _110 = uint(textureSize(_21[nonuniformEXT(registers._m1 + (INDEX + 0u))])) / 4u;
    uint _120 = uint(imageSize(_36[nonuniformEXT(registers._m4 + (INDEX + 0u))])) / 4u;
    uint _129 = uint(textureSize(_24[nonuniformEXT(registers._m1 + (INDEX + 0u))])) * 4u;
    uint _138 = uint(imageSize(_39[nonuniformEXT(registers._m4 + (INDEX + 0u))])) * 4u;
    SV_Target = ((((((((((_66.y + 32u) + _66.x) + _66.w) + (_79 * _79)) + _89.y) + _89.x) + (_100 * _100)) + (_110 * _110)) + (_120 * _120)) + (_129 * _129)) + (_138 * _138);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 154
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %41 %42 %44
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %41 "LEVEL"
OpName %42 "INDEX"
OpName %44 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 1
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %28 DescriptorSet 3
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %28 NonWritable
OpDecorate %32 DescriptorSet 4
OpDecorate %32 Binding 0
OpDecorate %32 NonReadable
OpDecorate %32 NonWritable
OpDecorate %36 DescriptorSet 4
OpDecorate %36 Binding 0
OpDecorate %36 NonReadable
OpDecorate %36 NonWritable
OpDecorate %39 DescriptorSet 4
OpDecorate %39 Binding 0
OpDecorate %39 NonReadable
OpDecorate %39 NonWritable
OpDecorate %41 Flat
OpDecorate %41 Location 0
OpDecorate %42 Flat
OpDecorate %42 Location 0
OpDecorate %42 Component 1
OpDecorate %44 Location 0
OpDecorate %54 NonUniform
OpDecorate %55 NonUniform
OpDecorate %77 NonUniform
OpDecorate %78 NonUniform
OpDecorate %87 NonUniform
OpDecorate %88 NonUniform
OpDecorate %98 NonUniform
OpDecorate %99 NonUniform
OpDecorate %107 NonUniform
OpDecorate %108 NonUniform
OpDecorate %117 NonUniform
OpDecorate %118 NonUniform
OpDecorate %126 NonUniform
OpDecorate %127 NonUniform
OpDecorate %135 NonUniform
OpDecorate %136 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeImage %9 2D 0 0 0 2 Unknown
%26 = OpTypeRuntimeArray %25
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeRuntimeArray %33
%38 = OpTypePointer UniformConstant %37
%39 = OpVariable %38 UniformConstant
%40 = OpTypePointer Input %5
%41 = OpVariable %40 Input
%42 = OpVariable %40 Input
%43 = OpTypePointer Output %5
%44 = OpVariable %43 Output
%48 = OpConstant %5 0
%49 = OpTypePointer UniformConstant %10
%51 = OpTypePointer PushConstant %5
%56 = OpTypeVector %5 2
%59 = OpTypeBool
%61 = OpTypeVector %59 2
%64 = OpConstantNull %56
%65 = OpTypeVector %5 4
%72 = OpTypePointer UniformConstant %14
%75 = OpConstant %5 1
%82 = OpTypePointer UniformConstant %25
%85 = OpConstant %5 3
%93 = OpTypePointer UniformConstant %29
%96 = OpConstant %5 4
%103 = OpTypePointer UniformConstant %18
%113 = OpTypePointer UniformConstant %33
%141 = OpConstant %5 32
%3 = OpFunction %1 None %2
%4 = OpLabel
%67 = OpUndef %5
OpBranch %152
%152 = OpLabel
%45 = OpLoad %5 %42
%46 = OpLoad %5 %41
%47 = OpIAdd %5 %45 %48
%52 = OpAccessChain %51 %8 %48
%53 = OpLoad %5 %52
%54 = OpIAdd %5 %53 %47
%50 = OpAccessChain %49 %13 %54
%55 = OpLoad %10 %50
%57 = OpImageQuerySizeLod %56 %55 %46
%58 = OpImageQueryLevels %5 %55
%60 = OpULessThan %59 %46 %58
%62 = OpCompositeConstruct %61 %60 %60
%63 = OpSelect %56 %62 %57 %64
%66 = OpCompositeConstruct %65 %63 %67 %58
%68 = OpCompositeExtract %5 %66 0
%69 = OpCompositeExtract %5 %66 1
%70 = OpCompositeExtract %5 %66 3
%71 = OpIAdd %5 %45 %48
%74 = OpAccessChain %51 %8 %75
%76 = OpLoad %5 %74
%77 = OpIAdd %5 %76 %71
%73 = OpAccessChain %72 %17 %77
%78 = OpLoad %14 %73
%79 = OpImageQuerySize %5 %78
%80 = OpIMul %5 %79 %79
%81 = OpIAdd %5 %45 %48
%84 = OpAccessChain %51 %8 %85
%86 = OpLoad %5 %84
%87 = OpIAdd %5 %86 %81
%83 = OpAccessChain %82 %28 %87
%88 = OpLoad %25 %83
%89 = OpImageQuerySize %56 %88
%90 = OpCompositeExtract %5 %89 0
%91 = OpCompositeExtract %5 %89 1
%92 = OpIAdd %5 %45 %48
%95 = OpAccessChain %51 %8 %96
%97 = OpLoad %5 %95
%98 = OpIAdd %5 %97 %92
%94 = OpAccessChain %93 %32 %98
%99 = OpLoad %29 %94
%100 = OpImageQuerySize %5 %99
%101 = OpIMul %5 %100 %100
%102 = OpIAdd %5 %45 %48
%105 = OpAccessChain %51 %8 %75
%106 = OpLoad %5 %105
%107 = OpIAdd %5 %106 %102
%104 = OpAccessChain %103 %21 %107
%108 = OpLoad %18 %104
%109 = OpImageQuerySize %5 %108
%110 = OpUDiv %5 %109 %96
%111 = OpIMul %5 %110 %110
%112 = OpIAdd %5 %45 %48
%115 = OpAccessChain %51 %8 %96
%116 = OpLoad %5 %115
%117 = OpIAdd %5 %116 %112
%114 = OpAccessChain %113 %36 %117
%118 = OpLoad %33 %114
%119 = OpImageQuerySize %5 %118
%120 = OpUDiv %5 %119 %96
%121 = OpIMul %5 %120 %120
%122 = OpIAdd %5 %45 %48
%124 = OpAccessChain %51 %8 %75
%125 = OpLoad %5 %124
%126 = OpIAdd %5 %125 %122
%123 = OpAccessChain %103 %24 %126
%127 = OpLoad %18 %123
%128 = OpImageQuerySize %5 %127
%129 = OpIMul %5 %128 %96
%130 = OpIMul %5 %129 %129
%131 = OpIAdd %5 %45 %48
%133 = OpAccessChain %51 %8 %96
%134 = OpLoad %5 %133
%135 = OpIAdd %5 %134 %131
%132 = OpAccessChain %113 %39 %135
%136 = OpLoad %33 %132
%137 = OpImageQuerySize %5 %136
%138 = OpIMul %5 %137 %96
%139 = OpIMul %5 %138 %138
%140 = OpIAdd %5 %69 %141
%142 = OpIAdd %5 %140 %68
%143 = OpIAdd %5 %142 %70
%144 = OpIAdd %5 %143 %80
%145 = OpIAdd %5 %144 %91
%146 = OpIAdd %5 %145 %90
%147 = OpIAdd %5 %146 %101
%148 = OpIAdd %5 %147 %111
%149 = OpIAdd %5 %148 %121
%150 = OpIAdd %5 %149 %130
%151 = OpIAdd %5 %150 %139
OpStore %44 %151
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/get-dimensions.bindless.root-constant.ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

uint _73;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _22[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _24_27
{
    uint _m0[];
} _27[];

layout(set = 4, binding = 0, std430) writeonly readonly buffer _37_40
{
    uint _m0[];
} _40[];

layout(set = 4, binding = 0, std430) writeonly readonly buffer _42_45
{
    uint _m0[];
} _45[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 3, binding = 0) uniform readonly writeonly image2D _31[];
layout(set = 4, binding = 0) uniform readonly writeonly imageBuffer _35[];

layout(location = 0) flat in uint LEVEL;
layout(location = 0, component = 1) flat in uint INDEX;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _60 = registers._m0 + (INDEX + 0u);
    uint _64 = uint(textureQueryLevels(_13[nonuniformEXT(_60)]));
    uvec4 _72 = uvec4(mix(uvec2(0u), uvec2(textureSize(_13[nonuniformEXT(_60)], int(LEVEL))), bvec2(LEVEL < _64)), _73, _64);
    uint _85 = uint(textureSize(_17[nonuniformEXT(registers._m1 + (INDEX + 0u))]));
    uvec2 _95 = uvec2(imageSize(_31[nonuniformEXT(registers._m3 + (INDEX + 0u))]));
    uint _106 = uint(imageSize(_35[nonuniformEXT(registers._m4 + (INDEX + 0u))]));
    uint _115 = uint(_22[nonuniformEXT(registers._m1 + (INDEX + 0u))]._m0.length()) / 4u;
    uint _124 = uint(_40[nonuniformEXT(registers._m4 + (INDEX + 0u))]._m0.length()) / 4u;
    uint _133 = uint(_27[nonuniformEXT(registers._m1 + (INDEX + 0u))]._m0.length()) * 4u;
    uint _142 = uint(_45[nonuniformEXT(registers._m4 + (INDEX + 0u))]._m0.length()) * 4u;
    SV_Target = ((((((((((_72.y + 32u) + _72.x) + _72.w) + (_85 * _85)) + _95.y) + _95.x) + (_106 * _106)) + (_115 * _115)) + (_124 * _124)) + (_133 * _133)) + (_142 * _142);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 158
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %47 %48 %50
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %19 "SSBO"
OpName %24 "SSBO"
OpName %37 "SSBO"
OpName %42 "SSBO"
OpName %47 "LEVEL"
OpName %48 "INDEX"
OpName %50 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %18 ArrayStride 4
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %22 DescriptorSet 1
OpDecorate %22 Binding 0
OpDecorate %22 NonWritable
OpDecorate %22 Restrict
OpDecorate %23 ArrayStride 4
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %27 NonWritable
OpDecorate %27 Restrict
OpDecorate %31 DescriptorSet 3
OpDecorate %31 Binding 0
OpDecorate %31 NonReadable
OpDecorate %31 NonWritable
OpDecorate %35 DescriptorSet 4
OpDecorate %35 Binding 0
OpDecorate %35 NonReadable
OpDecorate %35 NonWritable
OpDecorate %36 ArrayStride 4
OpMemberDecorate %37 0 Offset 0
OpDecorate %37 Block
OpDecorate %40 DescriptorSet 4
OpDecorate %40 Binding 0
OpDecorate %40 NonReadable
OpDecorate %40 NonWritable
OpDecorate %41 ArrayStride 4
OpMemberDecorate %42 0 Offset 0
OpDecorate %42 Block
OpDecorate %45 DescriptorSet 4
OpDecorate %45 Binding 0
OpDecorate %45 NonReadable
OpDecorate %45 NonWritable
OpDecorate %47 Flat
OpDecorate %47 Location 0
OpDecorate %48 Flat
OpDecorate %48 Location 0
OpDecorate %48 Component 1
OpDecorate %50 Location 0
OpDecorate %60 NonUniform
OpDecorate %61 NonUniform
OpDecorate %83 NonUniform
OpDecorate %84 NonUniform
OpDecorate %93 NonUniform
OpDecorate %94 NonUniform
OpDecorate %104 NonUniform
OpDecorate %105 NonUniform
OpDecorate %113 NonUniform
OpDecorate %110 NonUniform
OpDecorate %122 NonUniform
OpDecorate %119 NonUniform
OpDecorate %131 NonUniform
OpDecorate %128 NonUniform
OpDecorate %140 NonUniform
OpDecorate %137 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeRuntimeArray %5
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %5
%24 = OpTypeStruct %23
%25 = OpTypeRuntimeArray %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeImage %9 2D 0 0 0 2 Unknown
%29 = OpTypeRuntimeArray %28
%30 = OpTypePointer UniformConstant %29
%31 = OpVariable %30 UniformConstant
%32 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%33 = OpTypeRuntimeArray %32
%34 = OpTypePointer UniformConstant %33
%35 = OpVariable %34 UniformConstant
%36 = OpTypeRuntimeArray %5
%37 = OpTypeStruct %36
%38 = OpTypeRuntimeArray %37
%39 = OpTypePointer StorageBuffer %38
%40 = OpVariable %39 StorageBuffer
%41 = OpTypeRuntimeArray %5
%42 = OpTypeStruct %41
%43 = OpTypeRuntimeArray %42
%44 = OpTypePointer StorageBuffer %43
%45 = OpVariable %44 StorageBuffer
%46 = OpTypePointer Input %5
%47 = OpVariable %46 Input
%48 = OpVariable %46 Input
%49 = OpTypePointer Output %5
%50 = OpVariable %49 Output
%54 = OpConstant %5 0
%55 = OpTypePointer UniformConstant %10
%57 = OpTypePointer PushConstant %5
%62 = OpTypeVector %5 2
%65 = OpTypeBool
%67 = OpTypeVector %65 2
%70 = OpConstantNull %62
%71 = OpTypeVector %5 4
%78 = OpTypePointer UniformConstant %14
%81 = OpConstant %5 1
%88 = OpTypePointer UniformConstant %28
%91 = OpConstant %5 3
%99 = OpTypePointer UniformConstant %32
%102 = OpConstant %5 4
%109 = OpTypePointer StorageBuffer %19
%118 = OpTypePointer StorageBuffer %37
%127 = OpTypePointer StorageBuffer %24
%136 = OpTypePointer StorageBuffer %42
%145 = OpConstant %5 32
%3 = OpFunction %1 None %2
%4 = OpLabel
%73 = OpUndef %5
OpBranch %156
%156 = OpLabel
%51 = OpLoad %5 %48
%52 = OpLoad %5 %47
%53 = OpIAdd %5 %51 %54
%58 = OpAccessChain %57 %8 %54
%59 = OpLoad %5 %58
%60 = OpIAdd %5 %59 %53
%56 = OpAccessChain %55 %13 %60
%61 = OpLoad %10 %56
%63 = OpImageQuerySizeLod %62 %61 %52
%64 = OpImageQueryLevels %5 %61
%66 = OpULessThan %65 %52 %64
%68 = OpCompositeConstruct %67 %66 %66
%69 = OpSelect %62 %68 %63 %70
%72 = OpCompositeConstruct %71 %69 %73 %64
%74 = OpCompositeExtract %5 %72 0
%75 = OpCompositeExtract %5 %72 1
%76 = OpCompositeExtract %5 %72 3
%77 = OpIAdd %5 %51 %54
%80 = OpAccessChain %57 %8 %81
%82 = OpLoad %5 %80
%83 = OpIAdd %5 %82 %77
%79 = OpAccessChain %78 %17 %83
%84 = OpLoad %14 %79
%85 = OpImageQuerySize %5 %84
%86 = OpIMul %5 %85 %85
%87 = OpIAdd %5 %51 %54
%90 = OpAccessChain %57 %8 %91
%92 = OpLoad %5 %90
%93 = OpIAdd %5 %92 %87
%89 = OpAccessChain %88 %31 %93
%94 = OpLoad %28 %89
%95 = OpImageQuerySize %62 %94
%96 = OpCompositeExtract %5 %95 0
%97 = OpCompositeExtract %5 %95 1
%98 = OpIAdd %5 %51 %54
%101 = OpAccessChain %57 %8 %102
%103 = OpLoad %5 %101
%104 = OpIAdd %5 %103 %98
%100 = OpAccessChain %99 %35 %104
%105 = OpLoad %32 %100
%106 = OpImageQuerySize %5 %105
%107 = OpIMul %5 %106 %106
%108 = OpIAdd %5 %51 %54
%111 = OpAccessChain %57 %8 %81
%112 = OpLoad %5 %111
%113 = OpIAdd %5 %112 %108
%110 = OpAccessChain %109 %22 %113
%114 = OpArrayLength %5 %110 0
%115 = OpUDiv %5 %114 %102
%116 = OpIMul %5 %115 %115
%117 = OpIAdd %5 %51 %54
%120 = OpAccessChain %57 %8 %102
%121 = OpLoad %5 %120
%122 = OpIAdd %5 %121 %117
%119 = OpAccessChain %118 %40 %122
%123 = OpArrayLength %5 %119 0
%124 = OpUDiv %5 %123 %102
%125 = OpIMul %5 %124 %124
%126 = OpIAdd %5 %51 %54
%129 = OpAccessChain %57 %8 %81
%130 = OpLoad %5 %129
%131 = OpIAdd %5 %130 %126
%128 = OpAccessChain %127 %27 %131
%132 = OpArrayLength %5 %128 0
%133 = OpIMul %5 %132 %102
%134 = OpIMul %5 %133 %133
%135 = OpIAdd %5 %51 %54
%138 = OpAccessChain %57 %8 %102
%139 = OpLoad %5 %138
%140 = OpIAdd %5 %139 %135
%137 = OpAccessChain %136 %45 %140
%141 = OpArrayLength %5 %137 0
%142 = OpIMul %5 %141 %102
%143 = OpIMul %5 %142 %142
%144 = OpIAdd %5 %75 %145
%146 = OpIAdd %5 %144 %74
%147 = OpIAdd %5 %146 %76
%148 = OpIAdd %5 %147 %86
%149 = OpIAdd %5 %148 %97
%150 = OpIAdd %5 %149 %96
%151 = OpIAdd %5 %150 %107
%152 = OpIAdd %5 %151 %116
%153 = OpIAdd %5 %152 %125
%154 = OpIAdd %5 %153 %134
%155 = OpIAdd %5 %154 %143
OpStore %50 %155
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/get-dimensions.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

uint _96;
uint _97;
uint _109;
uint _120;
uint _140;
uint _169;

layout(set = 0, binding = 0) uniform texture1D _8;
layout(set = 0, binding = 1) uniform texture1DArray _11;
layout(set = 0, binding = 2) uniform texture2D _14;
layout(set = 0, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 4) uniform texture2DMS _20;
layout(set = 0, binding = 5) uniform texture2DMSArray _23;
layout(set = 0, binding = 6) uniform texture3D _26;
layout(set = 0, binding = 7) uniform textureCube _29;
layout(set = 0, binding = 8) uniform textureCubeArray _32;
layout(set = 0, binding = 9) uniform samplerBuffer _35;
layout(set = 0, binding = 10) uniform usamplerBuffer _39;
layout(set = 0, binding = 11) uniform usamplerBuffer _40;
layout(set = 0, binding = 0) uniform readonly writeonly image1D _43;
layout(set = 0, binding = 1) uniform readonly writeonly image1DArray _46;
layout(set = 0, binding = 2) uniform readonly writeonly image2D _49;
layout(set = 0, binding = 3) uniform readonly writeonly image2DArray _52;
layout(set = 0, binding = 6) uniform readonly writeonly image3D _55;
layout(set = 0, binding = 9) uniform readonly writeonly imageBuffer _58;
layout(set = 0, binding = 10, r32ui) uniform readonly writeonly uimageBuffer _61;
layout(set = 0, binding = 11, r32ui) uniform readonly writeonly uimageBuffer _62;

layout(location = 0) flat in uint LEVEL;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _89 = uint(textureQueryLevels(_8));
    uvec4 _95 = uvec4((LEVEL < _89) ? uint(textureSize(_8, int(LEVEL))) : 0u, _96, _97, _89);
    uint _102 = uint(textureQueryLevels(_11));
    uvec4 _108 = uvec4(mix(uvec2(0u), uvec2(textureSize(_11, int(LEVEL))), bvec2(LEVEL < _102)), _109, _102);
    uint _114 = uint(textureQueryLevels(_14));
    uvec4 _119 = uvec4(mix(uvec2(0u), uvec2(textureSize(_14, int(LEVEL))), bvec2(LEVEL < _114)), _120, _114);
    uint _126 = uint(textureQueryLevels(_17));
    uvec4 _132 = uvec4(mix(uvec3(0u), uvec3(textureSize(_17, int(LEVEL))), bvec3(LEVEL < _126)), _126);
    uvec4 _139 = uvec4(uvec2(textureSize(_20)), _140, uint(textureSamples(_20)));
    uvec4 _146 = uvec4(uvec3(textureSize(_23)), uint(textureSamples(_23)));
    uint _152 = uint(textureQueryLevels(_26));
    uvec4 _157 = uvec4(mix(uvec3(0u), uvec3(textureSize(_26, int(LEVEL))), bvec3(LEVEL < _152)), _152);
    uint _163 = uint(textureQueryLevels(_29));
    uvec4 _168 = uvec4(mix(uvec2(0u), uvec2(textureSize(_29, int(LEVEL))), bvec2(LEVEL < _163)), _169, _163);
    uint _174 = uint(textureQueryLevels(_32));
    uvec4 _179 = uvec4(mix(uvec3(0u), uvec3(textureSize(_32, int(LEVEL))), bvec3(LEVEL < _174)), _174);
    uint _184 = uint(textureSize(_35));
    uint _186 = uint(imageSize(_43));
    uvec2 _188 = uvec2(imageSize(_46));
    uvec2 _191 = uvec2(imageSize(_49));
    uvec3 _194 = uvec3(imageSize(_52));
    uvec3 _198 = uvec3(imageSize(_55));
    uint _202 = uint(imageSize(_58));
    uint _205 = uint(textureSize(_39)) / 4u;
    uint _209 = uint(imageSize(_61)) / 4u;
    uint _212 = uint(textureSize(_40)) * 4u;
    uint _215 = uint(imageSize(_62)) * 4u;
    uint _246 = ((((((((((((((((((((((((((((_95.w + 32u) + _95.x) + _108.y) + _108.x) + _108.w) + _119.y) + _119.x) + _119.w) + _132.y) + _132.x) + _132.z) + _132.w) + _139.y) + _139.x) + _139.w) + _146.y) + _146.x) + _146.z) + _146.w) + _157.y) + _157.x) + _157.z) + _157.w) + _168.y) + _168.x) + _168.w) + _179.y) + _179.x) + _179.z;
    SV_Target = (((((((((((((((((_246 + _179.w) + (_184 * _184)) + (_186 * _186)) + _188.y) + _188.x) + _191.y) + _191.x) + _194.y) + _194.x) + _194.z) + _198.y) + _198.x) + _198.z) + (_202 * _202)) + (_205 * _205)) + (_209 * _209)) + (_212 * _212)) + (_215 * _215);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 267
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability Image1D
OpCapability SampledCubeArray
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %64 %66
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %64 "LEVEL"
OpName %66 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 7
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 8
OpDecorate %35 DescriptorSet 0
OpDecorate %35 Binding 9
OpDecorate %39 DescriptorSet 0
OpDecorate %39 Binding 10
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 11
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 0
OpDecorate %43 NonReadable
OpDecorate %43 NonWritable
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 1
OpDecorate %46 NonReadable
OpDecorate %46 NonWritable
OpDecorate %49 DescriptorSet 0
OpDecorate %49 Binding 2
OpDecorate %49 NonReadable
OpDecorate %49 NonWritable
OpDecorate %52 DescriptorSet 0
OpDecorate %52 Binding 3
OpDecorate %52 NonReadable
OpDecorate %52 NonWritable
OpDecorate %55 DescriptorSet 0
OpDecorate %55 Binding 6
OpDecorate %55 NonReadable
OpDecorate %55 NonWritable
OpDecorate %58 DescriptorSet 0
OpDecorate %58 Binding 9
OpDecorate %58 NonReadable
OpDecorate %58 NonWritable
OpDecorate %61 DescriptorSet 0
OpDecorate %61 Binding 10
OpDecorate %61 NonReadable
OpDecorate %61 NonWritable
OpDecorate %62 DescriptorSet 0
OpDecorate %62 Binding 11
OpDecorate %62 NonReadable
OpDecorate %62 NonWritable
OpDecorate %64 Flat
OpDecorate %64 Location 0
OpDecorate %66 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 2D 0 0 1 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 1 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 3D 0 0 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%34 = OpTypePointer UniformConstant %33
%35 = OpVariable %34 UniformConstant
%36 = OpTypeInt 32 0
%37 = OpTypeImage %36 Buffer 0 0 0 1 Unknown
%38 = OpTypePointer UniformConstant %37
%39 = OpVariable %38 UniformConstant
%40 = OpVariable %38 UniformConstant
%41 = OpTypeImage %5 1D 0 0 0 2 Unknown
%42 = OpTypePointer UniformConstant %41
%43 = OpVariable %42 UniformConstant
%44 = OpTypeImage %5 1D 0 1 0 2 Unknown
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeImage %5 2D 0 0 0 2 Unknown
%48 = OpTypePointer UniformConstant %47
%49 = OpVariable %48 UniformConstant
%50 = OpTypeImage %5 2D 0 1 0 2 Unknown
%51 = OpTypePointer UniformConstant %50
%52 = OpVariable %51 UniformConstant
%53 = OpTypeImage %5 3D 0 0 0 2 Unknown
%54 = OpTypePointer UniformConstant %53
%55 = OpVariable %54 UniformConstant
%56 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%57 = OpTypePointer UniformConstant %56
%58 = OpVariable %57 UniformConstant
%59 = OpTypeImage %36 Buffer 0 0 0 2 R32ui
%60 = OpTypePointer UniformConstant %59
%61 = OpVariable %60 UniformConstant
%62 = OpVariable %60 UniformConstant
%63 = OpTypePointer Input %36
%64 = OpVariable %63 Input
%65 = OpTypePointer Output %36
%66 = OpVariable %65 Output
%90 = OpTypeBool
%93 = OpConstantNull %36
%94 = OpTypeVector %36 4
%100 = OpTypeVector %36 2
%104 = OpTypeVector %90 2
%107 = OpConstantNull %100
%118 = OpConstantNull %100
%124 = OpTypeVector %36 3
%128 = OpTypeVector %90 3
%131 = OpConstantNull %124
%156 = OpConstantNull %124
%167 = OpConstantNull %100
%178 = OpConstantNull %124
%206 = OpConstant %36 4
%218 = OpConstant %36 32
%3 = OpFunction %1 None %2
%4 = OpLabel
%96 = OpUndef %36
%97 = OpUndef %36
%109 = OpUndef %36
%120 = OpUndef %36
%140 = OpUndef %36
%169 = OpUndef %36
OpBranch %265
%265 = OpLabel
%67 = OpLoad %59 %62
%68 = OpLoad %59 %61
%69 = OpLoad %56 %58
%70 = OpLoad %53 %55
%71 = OpLoad %50 %52
%72 = OpLoad %47 %49
%73 = OpLoad %44 %46
%74 = OpLoad %41 %43
%75 = OpLoad %37 %40
%76 = OpLoad %37 %39
%77 = OpLoad %33 %35
%78 = OpLoad %30 %32
%79 = OpLoad %27 %29
%80 = OpLoad %24 %26
%81 = OpLoad %21 %23
%82 = OpLoad %18 %20
%83 = OpLoad %15 %17
%84 = OpLoad %12 %14
%85 = OpLoad %9 %11
%86 = OpLoad %6 %8
%87 = OpLoad %36 %64
%88 = OpImageQuerySizeLod %36 %86 %87
%89 = OpImageQueryLevels %36 %86
%91 = OpULessThan %90 %87 %89
%92 = OpSelect %36 %91 %88 %93
%95 = OpCompositeConstruct %94 %92 %96 %97 %89
%98 = OpCompositeExtract %36 %95 0
%99 = OpCompositeExtract %36 %95 3
%101 = OpImageQuerySizeLod %100 %85 %87
%102 = OpImageQueryLevels %36 %85
%103 = OpULessThan %90 %87 %102
%105 = OpCompositeConstruct %104 %103 %103
%106 = OpSelect %100 %105 %101 %107
%108 = OpCompositeConstruct %94 %106 %109 %102
%110 = OpCompositeExtract %36 %108 0
%111 = OpCompositeExtract %36 %108 1
%112 = OpCompositeExtract %36 %108 3
%113 = OpImageQuerySizeLod %100 %84 %87
%114 = OpImageQueryLevels %36 %84
%115 = OpULessThan %90 %87 %114
%116 = OpCompositeConstruct %104 %115 %115
%117 = OpSelect %100 %116 %113 %118
%119 = OpCompositeConstruct %94 %117 %120 %114
%121 = OpCompositeExtract %36 %119 0
%122 = OpCompositeExtract %36 %119 1
%123 = OpCompositeExtract %36 %119 3
%125 = OpImageQuerySizeLod %124 %83 %87
%126 = OpImageQueryLevels %36 %83
%127 = OpULessThan %90 %87 %126
%129 = OpCompositeConstruct %128 %127 %127 %127
%130 = OpSelect %124 %129 %125 %131
%132 = OpCompositeConstruct %94 %130 %126
%133 = OpCompositeExtract %36 %132 0
%134 = OpCompositeExtract %36 %132 1
%135 = OpCompositeExtract %36 %132 2
%136 = OpCompositeExtract %36 %132 3
%137 = OpImageQuerySize %100 %82
%138 = OpImageQuerySamples %36 %82
%139 = OpCompositeConstruct %94 %137 %140 %138
%141 = OpCompositeExtract %36 %139 0
%142 = OpCompositeExtract %36 %139 1
%143 = OpCompositeExtract %36 %139 3
%144 = OpImageQuerySize %124 %81
%145 = OpImageQuerySamples %36 %81
%146 = OpCompositeConstruct %94 %144 %145
%147 = OpCompositeExtract %36 %146 0
%148 = OpCompositeExtract %36 %146 1
%149 = OpCompositeExtract %36 %146 2
%150 = OpCompositeExtract %36 %146 3
%151 = OpImageQuerySizeLod %124 %80 %87
%152 = OpImageQueryLevels %36 %80
%153 = OpULessThan %90 %87 %152
%154 = OpCompositeConstruct %128 %153 %153 %153
%155 = OpSelect %124 %154 %151 %156
%157 = OpCompositeConstruct %94 %155 %152
%158 = OpCompositeExtract %36 %157 0
%159 = OpCompositeExtract %36 %157 1
%160 = OpCompositeExtract %36 %157 2
%161 = OpCompositeExtract %36 %157 3
%162 = OpImageQuerySizeLod %100 %79 %87
%163 = OpImageQueryLevels %36 %79
%164 = OpULessThan %90 %87 %163
%165 = OpCompositeConstruct %104 %164 %164
%166 = OpSelect %100 %165 %162 %167
%168 = OpCompositeConstruct %94 %166 %169 %163
%170 = OpCompositeExtract %36 %168 0
%171 = OpCompositeExtract %36 %168 1
%172 = OpCompositeExtract %36 %168 3
%173 = OpImageQuerySizeLod %124 %78 %87
%174 = OpImageQueryLevels %36 %78
%175 = OpULessThan %90 %87 %174
%176 = OpCompositeConstruct %128 %175 %175 %175
%177 = OpSelect %124 %176 %173 %178
%179 = OpCompositeConstruct %94 %177 %174
%180 = OpCompositeExtract %36 %179 0
%181 = OpCompositeExtract %36 %179 1
%182 = OpCompositeExtract %36 %179 2
%183 = OpCompositeExtract %36 %179 3
%184 = OpImageQuerySize %36 %77
%185 = OpIMul %36 %184 %184
%186 = OpImageQuerySize %36 %74
%187 = OpIMul %36 %186 %186
%188 = OpImageQuerySize %100 %73
%189 = OpCompositeExtract %36 %188 0
%190 = OpCompositeExtract %36 %188 1
%191 = OpImageQuerySize %100 %72
%192 = OpCompositeExtract %36 %191 0
%193 = OpCompositeExtract %36 %191 1
%194 = OpImageQuerySize %124 %71
%195 = OpCompositeExtract %36 %194 0
%196 = OpCompositeExtract %36 %194 1
%197 = OpCompositeExtract %36 %194 2
%198 = OpImageQuerySize %124 %70
%199 = OpCompositeExtract %36 %198 0
%200 = OpCompositeExtract %36 %198 1
%201 = OpCompositeExtract %36 %198 2
%202 = OpImageQuerySize %36 %69
%203 = OpIMul %36 %202 %202
%204 = OpImageQuerySize %36 %76
%205 = OpUDiv %36 %204 %206
%207 = OpIMul %36 %205 %205
%208 = OpImageQuerySize %36 %68
%209 = OpUDiv %36 %208 %206
%210 = OpIMul %36 %209 %209
%211 = OpImageQuerySize %36 %75
%212 = OpIMul %36 %211 %206
%213 = OpIMul %36 %212 %212
%214 = OpImageQuerySize %36 %67
%215 = OpIMul %36 %214 %206
%216 = OpIMul %36 %215 %215
%217 = OpIAdd %36 %99 %218
%219 = OpIAdd %36 %217 %98
%220 = OpIAdd %36 %219 %111
%221 = OpIAdd %36 %220 %110
%222 = OpIAdd %36 %221 %112
%223 = OpIAdd %36 %222 %122
%224 = OpIAdd %36 %223 %121
%225 = OpIAdd %36 %224 %123
%226 = OpIAdd %36 %225 %134
%227 = OpIAdd %36 %226 %133
%228 = OpIAdd %36 %227 %135
%229 = OpIAdd %36 %228 %136
%230 = OpIAdd %36 %229 %142
%231 = OpIAdd %36 %230 %141
%232 = OpIAdd %36 %231 %143
%233 = OpIAdd %36 %232 %148
%234 = OpIAdd %36 %233 %147
%235 = OpIAdd %36 %234 %149
%236 = OpIAdd %36 %235 %150
%237 = OpIAdd %36 %236 %159
%238 = OpIAdd %36 %237 %158
%239 = OpIAdd %36 %238 %160
%240 = OpIAdd %36 %239 %161
%241 = OpIAdd %36 %240 %171
%242 = OpIAdd %36 %241 %170
%243 = OpIAdd %36 %242 %172
%244 = OpIAdd %36 %243 %181
%245 = OpIAdd %36 %244 %180
%246 = OpIAdd %36 %245 %182
%247 = OpIAdd %36 %246 %183
%248 = OpIAdd %36 %247 %185
%249 = OpIAdd %36 %248 %187
%250 = OpIAdd %36 %249 %190
%251 = OpIAdd %36 %250 %189
%252 = OpIAdd %36 %251 %193
%253 = OpIAdd %36 %252 %192
%254 = OpIAdd %36 %253 %196
%255 = OpIAdd %36 %254 %195
%256 = OpIAdd %36 %255 %197
%257 = OpIAdd %36 %256 %200
%258 = OpIAdd %36 %257 %199
%259 = OpIAdd %36 %258 %201
%260 = OpIAdd %36 %259 %203
%261 = OpIAdd %36 %260 %207
%262 = OpIAdd %36 %261 %210
%263 = OpIAdd %36 %262 %213
%264 = OpIAdd %36 %263 %216
OpStore %66 %264
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/get-dimensions.ssbo.frag
================================================
#version 460

layout(set = 0, binding = 10, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 11, std430) writeonly readonly buffer _11_13
{
    uint _m0[];
} _13;

layout(location = 0) flat in uint LEVEL;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _19 = uint(_9._m0.length()) / 4u;
    uint _25 = uint(_13._m0.length()) * 4u;
    SV_Target = ((_19 * _19) + 16u) + (_25 * _25);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %15 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %15 "LEVEL"
OpName %17 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 10
OpDecorate %9 NonReadable
OpDecorate %9 NonWritable
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 11
OpDecorate %13 NonReadable
OpDecorate %13 NonWritable
OpDecorate %15 Flat
OpDecorate %15 Location 0
OpDecorate %17 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%16 = OpTypePointer Output %5
%17 = OpVariable %16 Output
%20 = OpConstant %5 4
%23 = OpConstant %5 16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%18 = OpArrayLength %5 %9 0
%19 = OpUDiv %5 %18 %20
%21 = OpIMul %5 %19 %19
%22 = OpIAdd %5 %21 %23
%24 = OpArrayLength %5 %13 0
%25 = OpIMul %5 %24 %20
%26 = OpIMul %5 %25 %25
%27 = OpIAdd %5 %22 %26
OpStore %17 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/group_id.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 2, local_size_z = 2) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_WorkGroupID.x), uvec4(floatBitsToUint(10.0)));
    imageStore(_8, int(gl_WorkGroupID.y), uvec4(floatBitsToUint(20.0)));
    imageStore(_8, int(gl_WorkGroupID.z), uvec4(floatBitsToUint(30.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 2 2 2
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeFloat 32
%24 = OpConstant %23 10
%26 = OpTypeVector %5 4
%28 = OpConstant %23 20
%31 = OpConstant %23 30
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%25 = OpBitcast %5 %24
%27 = OpCompositeConstruct %26 %25 %25 %25 %25
OpImageWrite %9 %16 %27
%29 = OpBitcast %5 %28
%30 = OpCompositeConstruct %26 %29 %29 %29 %29
OpImageWrite %9 %19 %30
%32 = OpBitcast %5 %31
%33 = OpCompositeConstruct %26 %32 %32 %32 %32
OpImageWrite %9 %22 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/hcos.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = cosh(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Cosh %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/hsin.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = sinh(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Sinh %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/htan.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = tanh(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Tanh %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/imad.frag
================================================
#version 460

layout(location = 0) flat in ivec3 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int((uint(A.x) * uint(A.y)) + uint(A.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpIMul %13 %16 %20
%26 = OpIAdd %13 %25 %24
%27 = OpBitcast %5 %26
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/imax.frag
================================================
#version 460

layout(location = 0) flat in ivec2 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int(uint(max(int(uint(A.x)), int(uint(A.y)))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
%21 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%22 = OpExtInst %13 %21 SMax %16 %20
%23 = OpBitcast %5 %22
OpStore %10 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/imin.frag
================================================
#version 460

layout(location = 0) flat in ivec2 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int(uint(min(int(uint(A.x)), int(uint(A.y)))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
%21 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%22 = OpExtInst %13 %21 SMin %16 %20
%23 = OpBitcast %5 %22
OpStore %10 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/instance-id.vert
================================================
#version 460

void main()
{
    float _16 = float(uint(gl_InstanceIndex) - uint(gl_BaseInstance));
    gl_Position.x = _16;
    gl_Position.y = _16;
    gl_Position.z = _16;
    gl_Position.w = _16;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability DrawParameters
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "SV_InstanceID"
OpName %11 "SV_Position"
OpDecorate %7 BuiltIn InstanceIndex
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn BaseInstance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%13 = OpVariable %6 Input
%17 = OpTypePointer Output %8
%19 = OpConstant %5 0
%21 = OpConstant %5 1
%23 = OpConstant %5 2
%25 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%12 = OpLoad %5 %7
%14 = OpLoad %5 %13
%15 = OpISub %5 %12 %14
%16 = OpConvertUToF %8 %15
%18 = OpAccessChain %17 %11 %19
OpStore %18 %16
%20 = OpAccessChain %17 %11 %21
OpStore %20 %16
%22 = OpAccessChain %17 %11 %23
OpStore %22 %16
%24 = OpAccessChain %17 %11 %25
OpStore %24 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/is-helper-lane-2.demote-to-helper.sm66.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _22;
    float _25;
    float _27;
    float _29;
    if (gl_HelperInvocation)
    {
        _22 = 0.0;
        _25 = 0.0;
        _27 = 0.0;
        _29 = 0.0;
    }
    else
    {
        _22 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : TEXCOORD.x);
        _25 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : TEXCOORD.y);
        _27 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : (TEXCOORD.x * TEXCOORD.x));
        _29 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : (TEXCOORD.y * TEXCOORD.y));
    }
    SV_Target.x = _22;
    SV_Target.y = _25;
    SV_Target.z = _27;
    SV_Target.w = _29;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformArithmetic
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11 %52
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
OpDecorate %52 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%20 = OpTypeBool
%23 = OpConstant %5 0
%31 = OpTypePointer Output %5
%35 = OpConstant %14 2
%37 = OpConstant %14 3
%51 = OpTypePointer Input %20
%52 = OpVariable %51 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %48
%48 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%21 = OpLoad %20 %52
OpSelectionMerge %50 None
OpBranchConditional %21 %50 %49
%49 = OpLabel
%38 = OpFMul %5 %16 %16
%39 = OpFMul %5 %19 %19
%40 = OpLoad %20 %52
%41 = OpSelect %5 %40 %23 %16
%24 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %41
%42 = OpLoad %20 %52
%43 = OpSelect %5 %42 %23 %19
%26 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %43
%44 = OpLoad %20 %52
%45 = OpSelect %5 %44 %23 %38
%28 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %45
%46 = OpLoad %20 %52
%47 = OpSelect %5 %46 %23 %39
%30 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %47
OpBranch %50
%50 = OpLabel
%22 = OpPhi %5 %23 %48 %24 %49
%25 = OpPhi %5 %23 %48 %26 %49
%27 = OpPhi %5 %23 %48 %28 %49
%29 = OpPhi %5 %23 %48 %30 %49
%32 = OpAccessChain %31 %11 %15
OpStore %32 %22
%33 = OpAccessChain %31 %11 %18
OpStore %33 %25
%34 = OpAccessChain %31 %11 %35
OpStore %34 %27
%36 = OpAccessChain %31 %11 %37
OpStore %36 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/is-helper-lane-2.sm66.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _22;
    float _25;
    float _27;
    float _29;
    if (gl_HelperInvocation)
    {
        _22 = 0.0;
        _25 = 0.0;
        _27 = 0.0;
        _29 = 0.0;
    }
    else
    {
        _22 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : TEXCOORD.x);
        _25 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : TEXCOORD.y);
        _27 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : (TEXCOORD.x * TEXCOORD.x));
        _29 = subgroupExclusiveAdd(gl_HelperInvocation ? 0.0 : (TEXCOORD.y * TEXCOORD.y));
    }
    SV_Target.x = _22;
    SV_Target.y = _25;
    SV_Target.z = _27;
    SV_Target.w = _29;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformArithmetic
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11 %52
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
OpDecorate %52 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%20 = OpTypeBool
%23 = OpConstant %5 0
%31 = OpTypePointer Output %5
%35 = OpConstant %14 2
%37 = OpConstant %14 3
%51 = OpTypePointer Input %20
%52 = OpVariable %51 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %48
%48 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%21 = OpLoad %20 %52
OpSelectionMerge %50 None
OpBranchConditional %21 %50 %49
%49 = OpLabel
%38 = OpFMul %5 %16 %16
%39 = OpFMul %5 %19 %19
%40 = OpLoad %20 %52
%41 = OpSelect %5 %40 %23 %16
%24 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %41
%42 = OpLoad %20 %52
%43 = OpSelect %5 %42 %23 %19
%26 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %43
%44 = OpLoad %20 %52
%45 = OpSelect %5 %44 %23 %38
%28 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %45
%46 = OpLoad %20 %52
%47 = OpSelect %5 %46 %23 %39
%30 = OpGroupNonUniformFAdd %5 %37 ExclusiveScan %47
OpBranch %50
%50 = OpLabel
%22 = OpPhi %5 %23 %48 %24 %49
%25 = OpPhi %5 %23 %48 %26 %49
%27 = OpPhi %5 %23 %48 %28 %49
%29 = OpPhi %5 %23 %48 %30 %49
%32 = OpAccessChain %31 %11 %15
OpStore %32 %22
%33 = OpAccessChain %31 %11 %18
OpStore %33 %25
%34 = OpAccessChain %31 %11 %35
OpStore %34 %27
%36 = OpAccessChain %31 %11 %37
OpStore %36 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/is-helper-lane.demote-to-helper.sm66.frag
================================================
#version 460
#extension GL_EXT_demote_to_helper_invocation : require
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    if (TEXCOORD.x > 10.0)
    {
        demote;
    }
    else
    {
        if (TEXCOORD.y > 20.0)
        {
            demote;
        }
    }
    bool _25 = helperInvocationEXT();
    float _26;
    float _29;
    float _31;
    float _33;
    if (_25)
    {
        _26 = 0.0;
        _29 = 0.0;
        _31 = 0.0;
        _33 = 0.0;
    }
    else
    {
        bool _44 = helperInvocationEXT();
        bool _46 = helperInvocationEXT();
        bool _48 = helperInvocationEXT();
        bool _50 = helperInvocationEXT();
        _26 = subgroupExclusiveAdd(_44 ? 0.0 : TEXCOORD.x);
        _29 = subgroupExclusiveAdd(_46 ? 0.0 : TEXCOORD.y);
        _31 = subgroupExclusiveAdd(_48 ? 0.0 : (TEXCOORD.x * TEXCOORD.x));
        _33 = subgroupExclusiveAdd(_50 ? 0.0 : (TEXCOORD.y * TEXCOORD.y));
    }
    SV_Target.x = _26;
    SV_Target.y = _29;
    SV_Target.z = _31;
    SV_Target.w = _33;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformArithmetic
OpCapability DemoteToHelperInvocation
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%20 = OpTypeBool
%22 = OpConstant %5 10
%24 = OpConstant %5 20
%27 = OpConstant %5 0
%35 = OpTypePointer Output %5
%39 = OpConstant %14 2
%41 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %52
%52 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%21 = OpFOrdGreaterThan %20 %16 %22
OpSelectionMerge %57 None
OpBranchConditional %21 %56 %53
%56 = OpLabel
OpDemoteToHelperInvocation
OpBranch %57
%53 = OpLabel
%23 = OpFOrdGreaterThan %20 %19 %24
OpSelectionMerge %55 None
OpBranchConditional %23 %54 %55
%54 = OpLabel
OpDemoteToHelperInvocation
OpBranch %55
%55 = OpLabel
OpBranch %57
%57 = OpLabel
%25 = OpIsHelperInvocationEXT %20
OpSelectionMerge %59 None
OpBranchConditional %25 %59 %58
%58 = OpLabel
%42 = OpFMul %5 %16 %16
%43 = OpFMul %5 %19 %19
%44 = OpIsHelperInvocationEXT %20
%45 = OpSelect %5 %44 %27 %16
%28 = OpGroupNonUniformFAdd %5 %41 ExclusiveScan %45
%46 = OpIsHelperInvocationEXT %20
%47 = OpSelect %5 %46 %27 %19
%30 = OpGroupNonUniformFAdd %5 %41 ExclusiveScan %47
%48 = OpIsHelperInvocationEXT %20
%49 = OpSelect %5 %48 %27 %42
%32 = OpGroupNonUniformFAdd %5 %41 ExclusiveScan %49
%50 = OpIsHelperInvocationEXT %20
%51 = OpSelect %5 %50 %27 %43
%34 = OpGroupNonUniformFAdd %5 %41 ExclusiveScan %51
OpBranch %59
%59 = OpLabel
%26 = OpPhi %5 %27 %57 %28 %58
%29 = OpPhi %5 %27 %57 %30 %58
%31 = OpPhi %5 %27 %57 %32 %58
%33 = OpPhi %5 %27 %57 %34 %58
%36 = OpAccessChain %35 %11 %15
OpStore %36 %26
%37 = OpAccessChain %35 %11 %18
OpStore %37 %29
%38 = OpAccessChain %35 %11 %39
OpStore %38 %31
%40 = OpAccessChain %35 %11 %41
OpStore %40 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/is-helper-lane.sm66.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_Target;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (TEXCOORD.x > 10.0)
    {
        discard_state = true;
    }
    else
    {
        if (TEXCOORD.y > 20.0)
        {
            discard_state = true;
        }
    }
    float _29;
    float _32;
    float _34;
    float _36;
    if (gl_HelperInvocation || discard_state)
    {
        _29 = 0.0;
        _32 = 0.0;
        _34 = 0.0;
        _36 = 0.0;
    }
    else
    {
        _29 = subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0.0 : TEXCOORD.x);
        _32 = subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0.0 : TEXCOORD.y);
        _34 = subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0.0 : (TEXCOORD.x * TEXCOORD.x));
        _36 = subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0.0 : (TEXCOORD.y * TEXCOORD.y));
    }
    SV_Target.x = _29;
    SV_Target.y = _32;
    SV_Target.z = _34;
    SV_Target.w = _36;
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 84
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformArithmetic
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11 %65
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %11 "SV_Target"
OpName %24 "discard_state"
OpName %76 "discard_exit"
OpDecorate %8 Location 0
OpDecorate %11 Location 0
OpDecorate %65 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%20 = OpTypeBool
%22 = OpConstant %5 10
%23 = OpTypePointer Private %20
%24 = OpVariable %23 Private
%25 = OpConstantFalse %20
%27 = OpConstant %5 20
%30 = OpConstant %5 0
%38 = OpTypePointer Output %5
%42 = OpConstant %14 2
%44 = OpConstant %14 3
%63 = OpConstantTrue %20
%64 = OpTypePointer Input %20
%65 = OpVariable %64 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %24 %25
OpBranch %55
%55 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%21 = OpFOrdGreaterThan %20 %16 %22
OpSelectionMerge %60 None
OpBranchConditional %21 %59 %56
%59 = OpLabel
OpStore %24 %63
OpBranch %60
%56 = OpLabel
%26 = OpFOrdGreaterThan %20 %19 %27
OpSelectionMerge %58 None
OpBranchConditional %26 %57 %58
%57 = OpLabel
OpStore %24 %63
OpBranch %58
%58 = OpLabel
OpBranch %60
%60 = OpLabel
%66 = OpLoad %20 %65
%67 = OpLoad %20 %24
%28 = OpLogicalOr %20 %66 %67
OpSelectionMerge %62 None
OpBranchConditional %28 %62 %61
%61 = OpLabel
%45 = OpFMul %5 %16 %16
%46 = OpFMul %5 %19 %19
%68 = OpLoad %20 %65
%69 = OpLoad %20 %24
%47 = OpLogicalOr %20 %68 %69
%48 = OpSelect %5 %47 %30 %16
%31 = OpGroupNonUniformFAdd %5 %44 ExclusiveScan %48
%70 = OpLoad %20 %65
%71 = OpLoad %20 %24
%49 = OpLogicalOr %20 %70 %71
%50 = OpSelect %5 %49 %30 %19
%33 = OpGroupNonUniformFAdd %5 %44 ExclusiveScan %50
%72 = OpLoad %20 %65
%73 = OpLoad %20 %24
%51 = OpLogicalOr %20 %72 %73
%52 = OpSelect %5 %51 %30 %45
%35 = OpGroupNonUniformFAdd %5 %44 ExclusiveScan %52
%74 = OpLoad %20 %65
%75 = OpLoad %20 %24
%53 = OpLogicalOr %20 %74 %75
%54 = OpSelect %5 %53 %30 %46
%37 = OpGroupNonUniformFAdd %5 %44 ExclusiveScan %54
OpBranch %62
%62 = OpLabel
%29 = OpPhi %5 %30 %60 %31 %61
%32 = OpPhi %5 %30 %60 %33 %61
%34 = OpPhi %5 %30 %60 %35 %61
%36 = OpPhi %5 %30 %60 %37 %61
%39 = OpAccessChain %38 %11 %15
OpStore %39 %29
%40 = OpAccessChain %38 %11 %18
OpStore %40 %32
%41 = OpAccessChain %38 %11 %42
OpStore %41 %34
%43 = OpAccessChain %38 %11 %44
OpStore %43 %36
%82 = OpFunctionCall %1 %76
OpReturn
OpFunctionEnd
%76 = OpFunction %1 None %2
%77 = OpLabel
%80 = OpLoad %20 %24
OpSelectionMerge %79 None
OpBranchConditional %80 %78 %79
%78 = OpLabel
OpKill
%79 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/isfinite.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (!(isnan(A.x) || isinf(A.x))) ? A.y : A.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%22 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%23 = OpIsNan %22 %15
%24 = OpIsInf %22 %15
%25 = OpLogicalOr %22 %23 %24
%26 = OpLogicalNot %22 %25
%27 = OpSelect %5 %26 %18 %21
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/isinf.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = isinf(A.x) ? A.y : A.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%22 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%23 = OpIsInf %22 %15
%24 = OpSelect %5 %23 %18 %21
OpStore %10 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/isnan.frag
================================================
#version 460

layout(location = 0) in vec3 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = isnan(A.x) ? A.y : A.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%22 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%23 = OpIsNan %22 %15
%24 = OpSelect %5 %23 %18 %21
OpStore %10 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/log.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = log2(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Log2 %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/make-double.frag
================================================
#version 460

layout(location = 0) flat in uvec2 VALUE;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = float(packDouble2x32(uvec2(VALUE.x, VALUE.y)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
%19 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %8 "VALUE"
OpName %11 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeFloat 32
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpConstant %5 0
%17 = OpConstant %5 1
%20 = OpTypeFloat 64
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%13 = OpAccessChain %12 %8 %14
%15 = OpLoad %5 %13
%16 = OpAccessChain %12 %8 %17
%18 = OpLoad %5 %16
%22 = OpCompositeConstruct %6 %15 %18
%21 = OpExtInst %20 %19 PackDouble2x32 %22
%23 = OpFConvert %9 %21
OpStore %11 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/msaa-uav.sm67.comp
================================================
#version 460
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2DMS _8;
layout(set = 0, binding = 1, r32f) uniform coherent image2DMSArray _11;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _15;

void main()
{
    uint _33 = gl_GlobalInvocationID.x ^ 1u;
    uint _34 = gl_GlobalInvocationID.y ^ 2u;
    uint _35 = gl_GlobalInvocationID.z ^ 3u;
    uint _44 = ((_34 << 4u) + (_33 << 2u)) + (_35 << 6u);
    float _45 = float(_44);
    float _47 = float(_44 | 1u);
    float _49 = float(_44 | 2u);
    float _51 = float(_44 | 3u);
    if (gl_GlobalInvocationID.z == 3u)
    {
        imageStore(_8, ivec2(uvec2(_33, _34)), 0u, vec4(_45));
        imageStore(_8, ivec2(uvec2(_33, _34)), 1u, vec4(_47));
        imageStore(_8, ivec2(uvec2(_33, _34)), 2u, vec4(_49));
        imageStore(_8, ivec2(uvec2(_33, _34)), 3u, vec4(_51));
    }
    else
    {
        imageStore(_11, ivec3(uvec3(_33, _34, _35)), 0u, vec4(_45));
        imageStore(_11, ivec3(uvec3(_33, _34, _35)), 1u, vec4(_47));
        imageStore(_11, ivec3(uvec3(_33, _34, _35)), 2u, vec4(_49));
        imageStore(_11, ivec3(uvec3(_33, _34, _35)), 3u, vec4(_51));
    }
    groupMemoryBarrier();
    barrier();
    float _32[4];
    float _102;
    float _104;
    float _106;
    float _108;
    if (gl_GlobalInvocationID.z == 0u)
    {
        vec4 _83 = imageLoad(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), 0u);
        float _85 = _83.x;
        _32[0u] = _85;
        vec4 _86 = imageLoad(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), 1u);
        float _88 = _86.x;
        _32[1u] = _88;
        vec4 _90 = imageLoad(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), 2u);
        float _92 = _90.x;
        _32[2u] = _92;
        vec4 _94 = imageLoad(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), 3u);
        float _96 = _94.x;
        _32[3u] = _96;
        _102 = _96;
        _104 = _92;
        _106 = _88;
        _108 = _85;
    }
    else
    {
        _32[0u] = imageLoad(_11, ivec3(uvec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, gl_GlobalInvocationID.z)), 0u).x;
        uint _128;
        _128 = 1u;
        for (;;)
        {
            _32[_128] = imageLoad(_11, ivec3(uvec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, gl_GlobalInvocationID.z)), _128).x;
            uint _129 = _128 + 1u;
            if (_129 == 4u)
            {
                break;
            }
            else
            {
                _128 = _129;
            }
        }
        _102 = _32[3u];
        _104 = _32[2u];
        _106 = _32[1u];
        _108 = _32[0u];
    }
    uint _115 = (((gl_GlobalInvocationID.y << 2u) + gl_GlobalInvocationID.x) + (gl_GlobalInvocationID.z << 4u)) * 4u;
    imageStore(_15, int(_115), uvec4(floatBitsToUint(_108)));
    imageStore(_15, int(_115 + 1u), uvec4(floatBitsToUint(_106)));
    imageStore(_15, int(_115 + 2u), uvec4(floatBitsToUint(_104)));
    imageStore(_15, int(_115 + 3u), uvec4(floatBitsToUint(_102)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 149
; Schema: 0
OpCapability Shader
OpCapability StorageImageMultisample
OpCapability ImageBuffer
OpCapability ImageMSArray
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 4 4 4
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %11 Coherent
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 2
OpDecorate %15 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 1 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 1 2 R32f
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeInt 32 0
%13 = OpTypeImage %12 Buffer 0 0 0 2 R32ui
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeVector %12 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %12
%21 = OpConstant %12 0
%24 = OpConstant %12 1
%27 = OpConstant %12 2
%29 = OpConstant %12 4
%30 = OpTypeArray %5 %29
%31 = OpTypePointer Function %30
%36 = OpConstant %12 3
%37 = OpTypeBool
%40 = OpConstant %12 6
%53 = OpTypeVector %12 2
%55 = OpTypeVector %5 4
%78 = OpConstant %12 2120
%80 = OpTypePointer Function %5
%120 = OpTypeVector %12 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%32 = OpVariable %31 Function
OpBranch %139
%139 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %12 %20
%23 = OpAccessChain %19 %18 %24
%25 = OpLoad %12 %23
%26 = OpAccessChain %19 %18 %27
%28 = OpLoad %12 %26
%33 = OpBitwiseXor %12 %22 %24
%34 = OpBitwiseXor %12 %25 %27
%35 = OpBitwiseXor %12 %28 %36
%38 = OpIEqual %37 %28 %36
%39 = OpShiftLeftLogical %12 %35 %40
%41 = OpShiftLeftLogical %12 %34 %29
%42 = OpShiftLeftLogical %12 %33 %27
%43 = OpIAdd %12 %41 %42
%44 = OpIAdd %12 %43 %39
%45 = OpConvertUToF %5 %44
%46 = OpBitwiseOr %12 %44 %24
%47 = OpConvertUToF %5 %46
%48 = OpBitwiseOr %12 %44 %27
%49 = OpConvertUToF %5 %48
%50 = OpBitwiseOr %12 %44 %36
%51 = OpConvertUToF %5 %50
OpSelectionMerge %142 None
OpBranchConditional %38 %141 %140
%141 = OpLabel
%52 = OpLoad %6 %8
%54 = OpCompositeConstruct %53 %33 %34
%56 = OpCompositeConstruct %55 %45 %45 %45 %45
OpImageWrite %52 %54 %56 Sample %21
%57 = OpLoad %6 %8
%58 = OpCompositeConstruct %53 %33 %34
%59 = OpCompositeConstruct %55 %47 %47 %47 %47
OpImageWrite %57 %58 %59 Sample %24
%60 = OpLoad %6 %8
%61 = OpCompositeConstruct %53 %33 %34
%62 = OpCompositeConstruct %55 %49 %49 %49 %49
OpImageWrite %60 %61 %62 Sample %27
%63 = OpLoad %6 %8
%64 = OpCompositeConstruct %53 %33 %34
%65 = OpCompositeConstruct %55 %51 %51 %51 %51
OpImageWrite %63 %64 %65 Sample %36
OpBranch %142
%140 = OpLabel
%66 = OpLoad %9 %11
%67 = OpCompositeConstruct %16 %33 %34 %35
%68 = OpCompositeConstruct %55 %45 %45 %45 %45
OpImageWrite %66 %67 %68 Sample %21
%69 = OpLoad %9 %11
%70 = OpCompositeConstruct %16 %33 %34 %35
%71 = OpCompositeConstruct %55 %47 %47 %47 %47
OpImageWrite %69 %70 %71 Sample %24
%72 = OpLoad %9 %11
%73 = OpCompositeConstruct %16 %33 %34 %35
%74 = OpCompositeConstruct %55 %49 %49 %49 %49
OpImageWrite %72 %73 %74 Sample %27
%75 = OpLoad %9 %11
%76 = OpCompositeConstruct %16 %33 %34 %35
%77 = OpCompositeConstruct %55 %51 %51 %51 %51
OpImageWrite %75 %76 %77 Sample %36
OpBranch %142
%142 = OpLabel
OpControlBarrier %27 %27 %78
%79 = OpIEqual %37 %28 %21
%81 = OpAccessChain %80 %32 %21
OpSelectionMerge %147 None
OpBranchConditional %79 %146 %143
%146 = OpLabel
%82 = OpLoad %6 %8
%84 = OpCompositeConstruct %53 %22 %25
%83 = OpImageRead %55 %82 %84 Sample %21
%85 = OpCompositeExtract %5 %83 0
OpStore %81 %85
%87 = OpCompositeConstruct %53 %22 %25
%86 = OpImageRead %55 %82 %87 Sample %24
%88 = OpCompositeExtract %5 %86 0
%89 = OpAccessChain %80 %32 %24
OpStore %89 %88
%91 = OpCompositeConstruct %53 %22 %25
%90 = OpImageRead %55 %82 %91 Sample %27
%92 = OpCompositeExtract %5 %90 0
%93 = OpAccessChain %80 %32 %27
OpStore %93 %92
%95 = OpCompositeConstruct %53 %22 %25
%94 = OpImageRead %55 %82 %95 Sample %36
%96 = OpCompositeExtract %5 %94 0
%97 = OpAccessChain %80 %32 %36
OpStore %97 %96
OpBranch %147
%143 = OpLabel
%98 = OpLoad %9 %11
%100 = OpCompositeConstruct %16 %22 %25 %28
%99 = OpImageRead %55 %98 %100 Sample %21
%101 = OpCompositeExtract %5 %99 0
OpStore %81 %101
OpBranch %144
%144 = OpLabel
%128 = OpPhi %12 %24 %143 %129 %144
%130 = OpLoad %9 %11
%132 = OpCompositeConstruct %16 %22 %25 %28
%131 = OpImageRead %55 %130 %132 Sample %128
%133 = OpCompositeExtract %5 %131 0
%134 = OpAccessChain %80 %32 %128
OpStore %134 %133
%129 = OpIAdd %12 %128 %24
%135 = OpIEqual %37 %129 %29
OpLoopMerge %145 %144 None
OpBranchConditional %135 %145 %144
%145 = OpLabel
%109 = OpLoad %5 %81
%136 = OpInBoundsAccessChain %80 %32 %24
%107 = OpLoad %5 %136
%137 = OpInBoundsAccessChain %80 %32 %27
%105 = OpLoad %5 %137
%138 = OpInBoundsAccessChain %80 %32 %36
%103 = OpLoad %5 %138
OpBranch %147
%147 = OpLabel
%102 = OpPhi %5 %96 %146 %103 %145
%104 = OpPhi %5 %92 %146 %105 %145
%106 = OpPhi %5 %88 %146 %107 %145
%108 = OpPhi %5 %85 %146 %109 %145
%110 = OpShiftLeftLogical %12 %28 %29
%111 = OpShiftLeftLogical %12 %25 %27
%112 = OpIAdd %12 %111 %22
%113 = OpIAdd %12 %112 %110
%114 = OpLoad %13 %15
%115 = OpIMul %12 %113 %29
%116 = OpBitcast %12 %108
%117 = OpBitcast %12 %106
%118 = OpBitcast %12 %104
%119 = OpBitcast %12 %102
%121 = OpCompositeConstruct %120 %116 %116 %116 %116
OpImageWrite %114 %115 %121
%122 = OpCompositeConstruct %120 %117 %117 %117 %117
%123 = OpIAdd %12 %115 %24
OpImageWrite %114 %123 %122
%124 = OpCompositeConstruct %120 %118 %118 %118 %118
%125 = OpIAdd %12 %115 %27
OpImageWrite %114 %125 %124
%126 = OpCompositeConstruct %120 %119 %119 %119 %119
%127 = OpIAdd %12 %115 %36
OpImageWrite %114 %127 %126
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/msad.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _11;

void main()
{
    uint _21 = gl_GlobalInvocationID.x * 4u;
    uvec4 _38 = uvec4(texelFetch(_8, int(_21)).x, texelFetch(_8, int(_21 + 1u)).x, texelFetch(_8, int(_21 + 2u)).x, texelFetch(_8, int(_21 + 3u)).x);
    uint _39 = _38.w;
    uint _40 = _38.y;
    uint _41 = _38.z;
    uint _42 = _38.x;
    uint _47 = 24u & 31u;
    uint _53 = bitfieldInsert(_40 >> 8u, _41, int(_47), int(min((8u & 31u), (32u - _47))));
    uint _57 = 16u & 31u;
    uint _60 = bitfieldInsert(_40 >> 16u, _41, int(_57), int(min((16u & 31u), (32u - _57))));
    uint _63 = 8u & 31u;
    uint _66 = bitfieldInsert(_40 >> 24u, _41, int(_63), int(min((24u & 31u), (32u - _63))));
    uvec4 _77 = uvec4(bitfieldExtract(_42, int(0u), int(8u)), bitfieldExtract(_42, int(8u), int(8u)), bitfieldExtract(_42, int(16u), int(8u)), bitfieldExtract(_42, int(24u), int(8u)));
    uvec4 _83 = mix(uvec4(abs(ivec4(_77 - uvec4(bitfieldExtract(_40, int(0u), int(8u)), bitfieldExtract(_40, int(8u), int(8u)), bitfieldExtract(_40, int(16u), int(8u)), bitfieldExtract(_40, int(24u), int(8u)))))), uvec4(0u), equal(_77, uvec4(0u)));
    uvec4 _100 = uvec4(bitfieldExtract(_42, int(0u), int(8u)), bitfieldExtract(_42, int(8u), int(8u)), bitfieldExtract(_42, int(16u), int(8u)), bitfieldExtract(_42, int(24u), int(8u)));
    uvec4 _106 = mix(uvec4(abs(ivec4(_100 - uvec4(bitfieldExtract(_53, int(0u), int(8u)), bitfieldExtract(_53, int(8u), int(8u)), bitfieldExtract(_53, int(16u), int(8u)), bitfieldExtract(_53, int(24u), int(8u)))))), uvec4(0u), equal(_100, uvec4(0u)));
    uvec4 _123 = uvec4(bitfieldExtract(_42, int(0u), int(8u)), bitfieldExtract(_42, int(8u), int(8u)), bitfieldExtract(_42, int(16u), int(8u)), bitfieldExtract(_42, int(24u), int(8u)));
    uvec4 _129 = mix(uvec4(abs(ivec4(_123 - uvec4(bitfieldExtract(_60, int(0u), int(8u)), bitfieldExtract(_60, int(8u), int(8u)), bitfieldExtract(_60, int(16u), int(8u)), bitfieldExtract(_60, int(24u), int(8u)))))), uvec4(0u), equal(_123, uvec4(0u)));
    uvec4 _146 = uvec4(bitfieldExtract(_42, int(0u), int(8u)), bitfieldExtract(_42, int(8u), int(8u)), bitfieldExtract(_42, int(16u), int(8u)), bitfieldExtract(_42, int(24u), int(8u)));
    uvec4 _152 = mix(uvec4(abs(ivec4(_146 - uvec4(bitfieldExtract(_66, int(0u), int(8u)), bitfieldExtract(_66, int(8u), int(8u)), bitfieldExtract(_66, int(16u), int(8u)), bitfieldExtract(_66, int(24u), int(8u)))))), uvec4(0u), equal(_146, uvec4(0u)));
    uint _161 = gl_GlobalInvocationID.x * 4u;
    imageStore(_11, int(_161), uvec4(_39 + (((_83.x + _83.y) + _83.z) + _83.w)));
    imageStore(_11, int(_161 + 1u), uvec4(_39 + (((_106.x + _106.y) + _106.z) + _106.w)));
    imageStore(_11, int(_161 + 2u), uvec4(_39 + (((_129.x + _129.y) + _129.z) + _129.w)));
    imageStore(_11, int(_161 + 3u), uvec4(_39 + (((_152.x + _152.y) + _152.z) + _152.w)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 171
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
%52 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 4
%23 = OpTypeVector %5 4
%28 = OpConstant %5 1
%32 = OpConstant %5 2
%36 = OpConstant %5 3
%44 = OpConstant %5 8
%46 = OpConstant %5 31
%48 = OpConstant %5 24
%50 = OpConstant %5 32
%55 = OpConstant %5 16
%67 = OpTypeBool
%68 = OpTypeVector %67 4
%79 = OpConstantNull %23
%102 = OpConstantNull %23
%125 = OpConstantNull %23
%148 = OpConstantNull %23
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %169
%169 = OpLabel
%12 = OpLoad %9 %11
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpIMul %5 %20 %22
%24 = OpImageFetch %23 %13 %21
%25 = OpCompositeExtract %5 %24 0
%27 = OpIAdd %5 %21 %28
%26 = OpImageFetch %23 %13 %27
%29 = OpCompositeExtract %5 %26 0
%31 = OpIAdd %5 %21 %32
%30 = OpImageFetch %23 %13 %31
%33 = OpCompositeExtract %5 %30 0
%35 = OpIAdd %5 %21 %36
%34 = OpImageFetch %23 %13 %35
%37 = OpCompositeExtract %5 %34 0
%38 = OpCompositeConstruct %23 %25 %29 %33 %37
%39 = OpCompositeExtract %5 %38 3
%40 = OpCompositeExtract %5 %38 1
%41 = OpCompositeExtract %5 %38 2
%42 = OpCompositeExtract %5 %38 0
%43 = OpShiftRightLogical %5 %40 %44
%45 = OpBitwiseAnd %5 %44 %46
%47 = OpBitwiseAnd %5 %48 %46
%49 = OpISub %5 %50 %47
%51 = OpExtInst %5 %52 UMin %45 %49
%53 = OpBitFieldInsert %5 %43 %41 %47 %51
%54 = OpShiftRightLogical %5 %40 %55
%56 = OpBitwiseAnd %5 %55 %46
%57 = OpBitwiseAnd %5 %55 %46
%58 = OpISub %5 %50 %57
%59 = OpExtInst %5 %52 UMin %56 %58
%60 = OpBitFieldInsert %5 %54 %41 %57 %59
%61 = OpShiftRightLogical %5 %40 %48
%62 = OpBitwiseAnd %5 %48 %46
%63 = OpBitwiseAnd %5 %44 %46
%64 = OpISub %5 %50 %63
%65 = OpExtInst %5 %52 UMin %62 %64
%66 = OpBitFieldInsert %5 %61 %41 %63 %65
%69 = OpBitFieldUExtract %5 %42 %19 %44
%70 = OpBitFieldUExtract %5 %40 %19 %44
%71 = OpBitFieldUExtract %5 %42 %44 %44
%72 = OpBitFieldUExtract %5 %40 %44 %44
%73 = OpBitFieldUExtract %5 %42 %55 %44
%74 = OpBitFieldUExtract %5 %40 %55 %44
%75 = OpBitFieldUExtract %5 %42 %48 %44
%76 = OpBitFieldUExtract %5 %40 %48 %44
%77 = OpCompositeConstruct %23 %69 %71 %73 %75
%78 = OpCompositeConstruct %23 %70 %72 %74 %76
%80 = OpIEqual %68 %77 %79
%81 = OpISub %23 %77 %78
%82 = OpExtInst %23 %52 SAbs %81
%83 = OpSelect %23 %80 %79 %82
%84 = OpCompositeExtract %5 %83 0
%85 = OpCompositeExtract %5 %83 1
%86 = OpIAdd %5 %84 %85
%87 = OpCompositeExtract %5 %83 2
%88 = OpIAdd %5 %86 %87
%89 = OpCompositeExtract %5 %83 3
%90 = OpIAdd %5 %88 %89
%91 = OpIAdd %5 %39 %90
%92 = OpBitFieldUExtract %5 %42 %19 %44
%93 = OpBitFieldUExtract %5 %53 %19 %44
%94 = OpBitFieldUExtract %5 %42 %44 %44
%95 = OpBitFieldUExtract %5 %53 %44 %44
%96 = OpBitFieldUExtract %5 %42 %55 %44
%97 = OpBitFieldUExtract %5 %53 %55 %44
%98 = OpBitFieldUExtract %5 %42 %48 %44
%99 = OpBitFieldUExtract %5 %53 %48 %44
%100 = OpCompositeConstruct %23 %92 %94 %96 %98
%101 = OpCompositeConstruct %23 %93 %95 %97 %99
%103 = OpIEqual %68 %100 %102
%104 = OpISub %23 %100 %101
%105 = OpExtInst %23 %52 SAbs %104
%106 = OpSelect %23 %103 %102 %105
%107 = OpCompositeExtract %5 %106 0
%108 = OpCompositeExtract %5 %106 1
%109 = OpIAdd %5 %107 %108
%110 = OpCompositeExtract %5 %106 2
%111 = OpIAdd %5 %109 %110
%112 = OpCompositeExtract %5 %106 3
%113 = OpIAdd %5 %111 %112
%114 = OpIAdd %5 %39 %113
%115 = OpBitFieldUExtract %5 %42 %19 %44
%116 = OpBitFieldUExtract %5 %60 %19 %44
%117 = OpBitFieldUExtract %5 %42 %44 %44
%118 = OpBitFieldUExtract %5 %60 %44 %44
%119 = OpBitFieldUExtract %5 %42 %55 %44
%120 = OpBitFieldUExtract %5 %60 %55 %44
%121 = OpBitFieldUExtract %5 %42 %48 %44
%122 = OpBitFieldUExtract %5 %60 %48 %44
%123 = OpCompositeConstruct %23 %115 %117 %119 %121
%124 = OpCompositeConstruct %23 %116 %118 %120 %122
%126 = OpIEqual %68 %123 %125
%127 = OpISub %23 %123 %124
%128 = OpExtInst %23 %52 SAbs %127
%129 = OpSelect %23 %126 %125 %128
%130 = OpCompositeExtract %5 %129 0
%131 = OpCompositeExtract %5 %129 1
%132 = OpIAdd %5 %130 %131
%133 = OpCompositeExtract %5 %129 2
%134 = OpIAdd %5 %132 %133
%135 = OpCompositeExtract %5 %129 3
%136 = OpIAdd %5 %134 %135
%137 = OpIAdd %5 %39 %136
%138 = OpBitFieldUExtract %5 %42 %19 %44
%139 = OpBitFieldUExtract %5 %66 %19 %44
%140 = OpBitFieldUExtract %5 %42 %44 %44
%141 = OpBitFieldUExtract %5 %66 %44 %44
%142 = OpBitFieldUExtract %5 %42 %55 %44
%143 = OpBitFieldUExtract %5 %66 %55 %44
%144 = OpBitFieldUExtract %5 %42 %48 %44
%145 = OpBitFieldUExtract %5 %66 %48 %44
%146 = OpCompositeConstruct %23 %138 %140 %142 %144
%147 = OpCompositeConstruct %23 %139 %141 %143 %145
%149 = OpIEqual %68 %146 %148
%150 = OpISub %23 %146 %147
%151 = OpExtInst %23 %52 SAbs %150
%152 = OpSelect %23 %149 %148 %151
%153 = OpCompositeExtract %5 %152 0
%154 = OpCompositeExtract %5 %152 1
%155 = OpIAdd %5 %153 %154
%156 = OpCompositeExtract %5 %152 2
%157 = OpIAdd %5 %155 %156
%158 = OpCompositeExtract %5 %152 3
%159 = OpIAdd %5 %157 %158
%160 = OpIAdd %5 %39 %159
%161 = OpIMul %5 %20 %22
%162 = OpCompositeConstruct %23 %91 %91 %91 %91
OpImageWrite %12 %161 %162
%163 = OpCompositeConstruct %23 %114 %114 %114 %114
%164 = OpIAdd %5 %161 %28
OpImageWrite %12 %164 %163
%165 = OpCompositeConstruct %23 %137 %137 %137 %137
%166 = OpIAdd %5 %161 %32
OpImageWrite %12 %166 %165
%167 = OpCompositeConstruct %23 %160 %160 %160 %160
%168 = OpIAdd %5 %161 %36
OpImageWrite %12 %168 %167
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/object-ray-direction.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _22;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _21;
    _21.x = gl_ObjectRayDirectionEXT.x;
    _21.y = gl_ObjectRayDirectionEXT.y;
    _21.z = gl_ObjectRayDirectionEXT.z;
    payload._m0 = _21;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %15
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn ObjectRayDirectionKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %6
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %5
%18 = OpTypeInt 32 0
%19 = OpConstant %18 0
%24 = OpConstant %18 1
%28 = OpConstant %18 2
%31 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%22 = OpUndef %6
OpBranch %33
%33 = OpLabel
%17 = OpAccessChain %16 %15 %19
%20 = OpLoad %5 %17
%21 = OpCompositeInsert %6 %20 %22 0
%23 = OpAccessChain %16 %15 %24
%25 = OpLoad %5 %23
%26 = OpCompositeInsert %6 %25 %21 1
%27 = OpAccessChain %16 %15 %28
%29 = OpLoad %5 %27
%30 = OpCompositeInsert %6 %29 %26 2
%32 = OpInBoundsAccessChain %31 %9 %19
OpStore %32 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/object-ray-origin.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _22;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _21;
    _21.x = gl_ObjectRayOriginEXT.x;
    _21.y = gl_ObjectRayOriginEXT.y;
    _21.z = gl_ObjectRayOriginEXT.z;
    payload._m0 = _21;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %15
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn ObjectRayOriginKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %6
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %5
%18 = OpTypeInt 32 0
%19 = OpConstant %18 0
%24 = OpConstant %18 1
%28 = OpConstant %18 2
%31 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%22 = OpUndef %6
OpBranch %33
%33 = OpLabel
%17 = OpAccessChain %16 %15 %19
%20 = OpLoad %5 %17
%21 = OpCompositeInsert %6 %20 %22 0
%23 = OpAccessChain %16 %15 %24
%25 = OpLoad %5 %23
%26 = OpCompositeInsert %6 %25 %21 1
%27 = OpAccessChain %16 %15 %28
%29 = OpLoad %5 %27
%30 = OpCompositeInsert %6 %29 %26 2
%32 = OpInBoundsAccessChain %31 %9 %19
OpStore %32 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/object-to-world-3x4.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _59;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _58;
    _58.x = fma(gl_ObjectToWorldEXT[2u].x, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].x, payload._m0.y, gl_ObjectToWorldEXT[0u].x * payload._m0.x)) + gl_ObjectToWorldEXT[3u].x;
    _58.y = fma(gl_ObjectToWorldEXT[2u].y, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].y, payload._m0.y, gl_ObjectToWorldEXT[0u].y * payload._m0.x)) + gl_ObjectToWorldEXT[3u].y;
    _58.z = fma(gl_ObjectToWorldEXT[2u].z, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].z, payload._m0.y, gl_ObjectToWorldEXT[0u].z * payload._m0.x)) + gl_ObjectToWorldEXT[3u].z;
    payload._m0 = _58;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
%54 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %24
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %24 BuiltIn ObjectToWorldKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer IncomingRayPayloadKHR %6
%16 = OpTypeInt 32 0
%17 = OpConstant %16 0
%22 = OpTypeMatrix %6 4
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %5
%29 = OpConstant %16 1
%32 = OpConstant %16 2
%35 = OpConstant %16 3
%3 = OpFunction %1 None %2
%4 = OpLabel
%59 = OpUndef %6
OpBranch %70
%70 = OpLabel
%15 = OpInBoundsAccessChain %14 %9 %17
%18 = OpLoad %6 %15
%19 = OpCompositeExtract %5 %18 0
%20 = OpCompositeExtract %5 %18 1
%21 = OpCompositeExtract %5 %18 2
%26 = OpAccessChain %25 %24 %17 %17
%27 = OpLoad %5 %26
%28 = OpAccessChain %25 %24 %29 %17
%30 = OpLoad %5 %28
%31 = OpAccessChain %25 %24 %32 %17
%33 = OpLoad %5 %31
%34 = OpAccessChain %25 %24 %35 %17
%36 = OpLoad %5 %34
%37 = OpAccessChain %25 %24 %17 %29
%38 = OpLoad %5 %37
%39 = OpAccessChain %25 %24 %29 %29
%40 = OpLoad %5 %39
%41 = OpAccessChain %25 %24 %32 %29
%42 = OpLoad %5 %41
%43 = OpAccessChain %25 %24 %35 %29
%44 = OpLoad %5 %43
%45 = OpAccessChain %25 %24 %17 %32
%46 = OpLoad %5 %45
%47 = OpAccessChain %25 %24 %29 %32
%48 = OpLoad %5 %47
%49 = OpAccessChain %25 %24 %32 %32
%50 = OpLoad %5 %49
%51 = OpAccessChain %25 %24 %35 %32
%52 = OpLoad %5 %51
%53 = OpFMul %5 %27 %19
%55 = OpExtInst %5 %54 Fma %30 %20 %53
%56 = OpExtInst %5 %54 Fma %33 %21 %55
%57 = OpFAdd %5 %56 %36
%58 = OpCompositeInsert %6 %57 %59 0
%60 = OpFMul %5 %38 %19
%61 = OpExtInst %5 %54 Fma %40 %20 %60
%62 = OpExtInst %5 %54 Fma %42 %21 %61
%63 = OpFAdd %5 %62 %44
%64 = OpCompositeInsert %6 %63 %58 1
%65 = OpFMul %5 %46 %19
%66 = OpExtInst %5 %54 Fma %48 %20 %65
%67 = OpExtInst %5 %54 Fma %50 %21 %66
%68 = OpFAdd %5 %67 %52
%69 = OpCompositeInsert %6 %68 %64 2
OpStore %15 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/object-to-world-4x3.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _59;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _58;
    _58.x = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].x, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].x, payload._m0.x * gl_ObjectToWorldEXT[0u].x)) + gl_ObjectToWorldEXT[3u].x;
    _58.y = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].y, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].y, payload._m0.x * gl_ObjectToWorldEXT[0u].y)) + gl_ObjectToWorldEXT[3u].y;
    _58.z = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].z, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].z, payload._m0.x * gl_ObjectToWorldEXT[0u].z)) + gl_ObjectToWorldEXT[3u].z;
    payload._m0 = _58;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
%54 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %16
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %16 BuiltIn ObjectToWorldKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypeMatrix %6 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpTypeInt 32 0
%20 = OpConstant %19 0
%23 = OpConstant %19 1
%26 = OpConstant %19 2
%41 = OpConstant %19 3
%47 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%59 = OpUndef %6
OpBranch %70
%70 = OpLabel
%18 = OpAccessChain %17 %16 %20 %20
%21 = OpLoad %5 %18
%22 = OpAccessChain %17 %16 %20 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %17 %16 %20 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %17 %16 %23 %20
%29 = OpLoad %5 %28
%30 = OpAccessChain %17 %16 %23 %23
%31 = OpLoad %5 %30
%32 = OpAccessChain %17 %16 %23 %26
%33 = OpLoad %5 %32
%34 = OpAccessChain %17 %16 %26 %20
%35 = OpLoad %5 %34
%36 = OpAccessChain %17 %16 %26 %23
%37 = OpLoad %5 %36
%38 = OpAccessChain %17 %16 %26 %26
%39 = OpLoad %5 %38
%40 = OpAccessChain %17 %16 %41 %20
%42 = OpLoad %5 %40
%43 = OpAccessChain %17 %16 %41 %23
%44 = OpLoad %5 %43
%45 = OpAccessChain %17 %16 %41 %26
%46 = OpLoad %5 %45
%48 = OpInBoundsAccessChain %47 %9 %20
%49 = OpLoad %6 %48
%50 = OpCompositeExtract %5 %49 0
%51 = OpCompositeExtract %5 %49 1
%52 = OpCompositeExtract %5 %49 2
%53 = OpFMul %5 %50 %21
%55 = OpExtInst %5 %54 Fma %51 %29 %53
%56 = OpExtInst %5 %54 Fma %52 %35 %55
%57 = OpFAdd %5 %56 %42
%58 = OpCompositeInsert %6 %57 %59 0
%60 = OpFMul %5 %50 %24
%61 = OpExtInst %5 %54 Fma %51 %31 %60
%62 = OpExtInst %5 %54 Fma %52 %37 %61
%63 = OpFAdd %5 %62 %44
%64 = OpCompositeInsert %6 %63 %58 1
%65 = OpFMul %5 %50 %27
%66 = OpExtInst %5 %54 Fma %51 %33 %65
%67 = OpExtInst %5 %54 Fma %52 %39 %66
%68 = OpFAdd %5 %67 %46
%69 = OpCompositeInsert %6 %68 %64 2
OpStore %48 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) buffer _12_14
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 1, std430) buffer _18_20
{
    u16vec4 _m0[];
} _20;

void main()
{
    _14._m0[gl_GlobalInvocationID.x] = uvec4(uvec4(unpack8(_9._m0[gl_GlobalInvocationID.x])));
    _20._m0[gl_GlobalInvocationID.x] = u16vec4(u16vec4(unpack8(_9._m0[gl_GlobalInvocationID.x])));
    _14._m0[gl_GlobalInvocationID.x] = uvec4(uvec4(i8vec4(unpack8(_9._m0[gl_GlobalInvocationID.x]))));
    _20._m0[gl_GlobalInvocationID.x] = u16vec4(u16vec4(i8vec4(unpack8(_9._m0[gl_GlobalInvocationID.x]))));
    uint _69 = gl_GlobalInvocationID.x + 100000u;
    uint _78 = pack32(u8vec4(uvec4(_14._m0[_69])));
    _14._m0[gl_GlobalInvocationID.x + 1000u] = uvec4(_78, _78, pack32(u8vec4(clamp(ivec4(uvec4(_14._m0[_69])), ivec4(0), ivec4(255)))), pack32(u8vec4(clamp(ivec4(uvec4(_14._m0[_69])), ivec4(-128), ivec4(127)))));
    uint _103 = gl_GlobalInvocationID.x + 100000u;
    uint16_t _131 = uint16_t(pack32(u8vec4(u16vec4(_20._m0[_103]))));
    _20._m0[gl_GlobalInvocationID.x + 1000u] = u16vec4(_131, _131, uint16_t(pack32(u8vec4(clamp(i16vec4(u16vec4(_20._m0[_103])), i16vec4(0), i16vec4(255))))), uint16_t(pack32(u8vec4(clamp(i16vec4(u16vec4(_20._m0[_103])), i16vec4(-128), i16vec4(127))))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 139
; Schema: 0
OpCapability Shader
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer16BitAccess
%80 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %23
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %18 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %23 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 16 0
%16 = OpTypeVector %15 4
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeVector %5 3
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypePointer Input %5
%26 = OpConstant %5 0
%28 = OpTypePointer StorageBuffer %5
%31 = OpTypeInt 8 0
%32 = OpTypeVector %31 4
%40 = OpTypePointer StorageBuffer %10
%49 = OpTypePointer StorageBuffer %16
%68 = OpConstant %5 100000
%81 = OpTypeInt 32 1
%82 = OpConstant %81 0
%83 = OpConstant %81 255
%84 = OpTypeVector %81 4
%85 = OpConstantComposite %84 %82 %82 %82 %82
%86 = OpConstantComposite %84 %83 %83 %83 %83
%91 = OpConstant %81 -128
%92 = OpConstant %81 127
%93 = OpConstantComposite %84 %91 %91 %91 %91
%94 = OpConstantComposite %84 %92 %92 %92 %92
%99 = OpConstant %5 1000
%114 = OpTypeInt 16 1
%115 = OpConstant %114 0
%116 = OpConstant %114 255
%117 = OpTypeVector %114 4
%118 = OpConstantComposite %117 %115 %115 %115 %115
%119 = OpConstantComposite %117 %116 %116 %116 %116
%124 = OpConstant %114 -128
%125 = OpConstant %114 127
%126 = OpConstantComposite %117 %124 %124 %124 %124
%127 = OpConstantComposite %117 %125 %125 %125 %125
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %137
%137 = OpLabel
%25 = OpAccessChain %24 %23 %26
%27 = OpLoad %5 %25
%29 = OpAccessChain %28 %9 %26 %27
%30 = OpLoad %5 %29
%33 = OpBitcast %32 %30
%34 = OpUConvert %10 %33
%35 = OpCompositeExtract %5 %34 0
%36 = OpCompositeExtract %5 %34 1
%37 = OpCompositeExtract %5 %34 2
%38 = OpCompositeExtract %5 %34 3
%39 = OpCompositeConstruct %10 %35 %36 %37 %38
%41 = OpAccessChain %40 %14 %26 %27
OpStore %41 %39
%42 = OpBitcast %32 %30
%43 = OpUConvert %16 %42
%44 = OpCompositeExtract %15 %43 0
%45 = OpCompositeExtract %15 %43 1
%46 = OpCompositeExtract %15 %43 2
%47 = OpCompositeExtract %15 %43 3
%48 = OpCompositeConstruct %16 %44 %45 %46 %47
%50 = OpAccessChain %49 %20 %26 %27
OpStore %50 %48
%51 = OpBitcast %32 %30
%52 = OpSConvert %10 %51
%53 = OpCompositeExtract %5 %52 0
%54 = OpCompositeExtract %5 %52 1
%55 = OpCompositeExtract %5 %52 2
%56 = OpCompositeExtract %5 %52 3
%57 = OpCompositeConstruct %10 %53 %54 %55 %56
%58 = OpAccessChain %40 %14 %26 %27
OpStore %58 %57
%59 = OpBitcast %32 %30
%60 = OpSConvert %16 %59
%61 = OpCompositeExtract %15 %60 0
%62 = OpCompositeExtract %15 %60 1
%63 = OpCompositeExtract %15 %60 2
%64 = OpCompositeExtract %15 %60 3
%65 = OpCompositeConstruct %16 %61 %62 %63 %64
%66 = OpAccessChain %49 %20 %26 %27
OpStore %66 %65
%67 = OpIAdd %5 %27 %68
%69 = OpIAdd %5 %27 %68
%70 = OpAccessChain %40 %14 %26 %69
%71 = OpLoad %10 %70
%72 = OpCompositeExtract %5 %71 0
%73 = OpCompositeExtract %5 %71 1
%74 = OpCompositeExtract %5 %71 2
%75 = OpCompositeExtract %5 %71 3
%76 = OpCompositeConstruct %10 %72 %73 %74 %75
%77 = OpUConvert %32 %76
%78 = OpBitcast %5 %77
%79 = OpCompositeConstruct %10 %72 %73 %74 %75
%87 = OpExtInst %84 %80 SClamp %79 %85 %86
%88 = OpUConvert %32 %87
%89 = OpBitcast %5 %88
%90 = OpCompositeConstruct %10 %72 %73 %74 %75
%95 = OpExtInst %84 %80 SClamp %90 %93 %94
%96 = OpUConvert %32 %95
%97 = OpBitcast %5 %96
%98 = OpIAdd %5 %27 %99
%100 = OpIAdd %5 %27 %99
%101 = OpCompositeConstruct %10 %78 %78 %89 %97
%102 = OpAccessChain %40 %14 %26 %100
OpStore %102 %101
%103 = OpIAdd %5 %27 %68
%104 = OpAccessChain %49 %20 %26 %103
%105 = OpLoad %16 %104
%106 = OpCompositeExtract %15 %105 0
%107 = OpCompositeExtract %15 %105 1
%108 = OpCompositeExtract %15 %105 2
%109 = OpCompositeExtract %15 %105 3
%110 = OpCompositeConstruct %16 %106 %107 %108 %109
%111 = OpUConvert %32 %110
%112 = OpBitcast %5 %111
%113 = OpCompositeConstruct %16 %106 %107 %108 %109
%120 = OpExtInst %117 %80 SClamp %113 %118 %119
%121 = OpUConvert %32 %120
%122 = OpBitcast %5 %121
%123 = OpCompositeConstruct %16 %106 %107 %108 %109
%128 = OpExtInst %117 %80 SClamp %123 %126 %127
%129 = OpUConvert %32 %128
%130 = OpBitcast %5 %129
%131 = OpUConvert %15 %112
%132 = OpUConvert %15 %122
%133 = OpUConvert %15 %130
%134 = OpIAdd %5 %27 %99
%135 = OpCompositeConstruct %16 %131 %131 %132 %133
%136 = OpAccessChain %49 %20 %26 %134
OpStore %136 %135
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-all-any.sm67.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;

bool QuadAny(bool _24)
{
    return ((subgroupQuadBroadcast(_24, 0u) || subgroupQuadBroadcast(_24, 1u)) || subgroupQuadBroadcast(_24, 2u)) || subgroupQuadBroadcast(_24, 3u);
}

bool QuadAll(bool _43)
{
    return ((subgroupQuadBroadcast(_43, 0u) && subgroupQuadBroadcast(_43, 1u)) && subgroupQuadBroadcast(_43, 2u)) && subgroupQuadBroadcast(_43, 3u);
}

void main()
{
    imageStore(_9, int(gl_GlobalInvocationID.x * 2u), uvec4(uint(QuadAny(imageLoad(_8, int(gl_GlobalInvocationID.x)).x != 0u))));
    imageStore(_9, int((gl_GlobalInvocationID.x * 2u) + 1u), uvec4(uint(QuadAll(imageLoad(_8, int(gl_GlobalInvocationID.x)).x != 0u))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 68
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformQuad
OpCapability ComputeDerivativeGroupQuadsKHR
OpExtension "SPV_KHR_compute_shader_derivatives"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 DerivativeGroupQuadsKHR
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %25 "QuadAny"
OpName %44 "QuadAll"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpTypeVector %5 4
%21 = OpTypeBool
%23 = OpTypeFunction %21 %21
%28 = OpConstant %5 3
%30 = OpConstant %5 1
%32 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %66
%66 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpLoad %6 %8
%19 = OpImageRead %18 %17 %16
%20 = OpCompositeExtract %5 %19 0
%22 = OpINotEqual %21 %20 %15
%38 = OpFunctionCall %21 %25 %22
%39 = OpLoad %6 %8
%40 = OpImageRead %18 %39 %16
%41 = OpCompositeExtract %5 %40 0
%42 = OpINotEqual %21 %41 %15
%54 = OpFunctionCall %21 %44 %42
%55 = OpSelect %5 %38 %30 %15
%56 = OpShiftLeftLogical %5 %16 %30
%57 = OpLoad %6 %9
%58 = OpIMul %5 %16 %32
%59 = OpCompositeConstruct %18 %55 %55 %55 %55
OpImageWrite %57 %58 %59
%60 = OpSelect %5 %54 %30 %15
%61 = OpBitwiseOr %5 %56 %30
%62 = OpLoad %6 %9
%63 = OpIMul %5 %16 %32
%64 = OpIAdd %5 %63 %30
%65 = OpCompositeConstruct %18 %60 %60 %60 %60
OpImageWrite %62 %64 %65
OpReturn
OpFunctionEnd
%25 = OpFunction %21 None %23
%24 = OpFunctionParameter %21
%26 = OpLabel
%27 = OpGroupNonUniformQuadBroadcast %21 %28 %24 %15
%29 = OpGroupNonUniformQuadBroadcast %21 %28 %24 %30
%31 = OpGroupNonUniformQuadBroadcast %21 %28 %24 %32
%33 = OpGroupNonUniformQuadBroadcast %21 %28 %24 %28
%34 = OpLogicalOr %21 %27 %29
%35 = OpLogicalOr %21 %34 %31
%36 = OpLogicalOr %21 %35 %33
OpReturnValue %36
OpFunctionEnd
%44 = OpFunction %21 None %23
%43 = OpFunctionParameter %21
%45 = OpLabel
%46 = OpGroupNonUniformQuadBroadcast %21 %28 %43 %15
%47 = OpGroupNonUniformQuadBroadcast %21 %28 %43 %30
%48 = OpGroupNonUniformQuadBroadcast %21 %28 %43 %32
%49 = OpGroupNonUniformQuadBroadcast %21 %28 %43 %28
%50 = OpLogicalAnd %21 %46 %47
%51 = OpLogicalAnd %21 %50 %48
%52 = OpLogicalAnd %21 %51 %49
OpReturnValue %52
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-all-any.sm67.quad-maximal-reconvergence.noglsl.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability QuadControlKHR
OpCapability ComputeDerivativeGroupQuadsKHR
OpExtension "SPV_KHR_compute_shader_derivatives"
OpExtension "SPV_KHR_quad_control"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 DerivativeGroupQuadsKHR
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpTypeVector %5 4
%21 = OpTypeBool
%29 = OpConstant %5 1
%34 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpLoad %6 %8
%19 = OpImageRead %18 %17 %16
%20 = OpCompositeExtract %5 %19 0
%22 = OpINotEqual %21 %20 %15
%23 = OpGroupNonUniformQuadAnyKHR %21 %22
%24 = OpLoad %6 %8
%25 = OpImageRead %18 %24 %16
%26 = OpCompositeExtract %5 %25 0
%27 = OpINotEqual %21 %26 %15
%28 = OpGroupNonUniformQuadAllKHR %21 %27
%30 = OpSelect %5 %23 %29 %15
%31 = OpShiftLeftLogical %5 %16 %29
%32 = OpLoad %6 %9
%33 = OpIMul %5 %16 %34
%35 = OpCompositeConstruct %18 %30 %30 %30 %30
OpImageWrite %32 %33 %35
%36 = OpSelect %5 %28 %29 %15
%37 = OpBitwiseOr %5 %31 %29
%38 = OpLoad %6 %9
%39 = OpIMul %5 %16 %34
%40 = OpIAdd %5 %39 %29
%41 = OpCompositeConstruct %18 %36 %36 %36 %36
OpImageWrite %38 %40 %41
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/quad-read-at-2d.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_shuffle : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;

void main()
{
    uint _23 = (gl_WorkGroupID.x << 6u) + gl_LocalInvocationIndex;
    uvec4 _27 = imageLoad(_8, int(_23));
    uint _28 = _27.x;
    uint _40 = _23 * 2u;
    imageStore(_9, int(_40), uvec4(subgroupQuadBroadcast(_28, 1u)));
    imageStore(_9, int(_40 + 1u), uvec4(subgroupShuffle(_28, (gl_SubgroupInvocationID & 4294967292u) + (_28 & 3u))));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformShuffle
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %13 %17 %33
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %13 BuiltIn LocalInvocationIndex
OpDecorate %17 BuiltIn WorkgroupId
OpDecorate %33 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%19 = OpConstant %5 0
%22 = OpConstant %5 6
%25 = OpConstant %5 2
%26 = OpTypeVector %5 4
%30 = OpConstant %5 3
%31 = OpConstant %5 1
%33 = OpVariable %12 Input
%36 = OpConstant %5 4294967292
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%10 = OpLoad %6 %9
%11 = OpLoad %6 %8
%14 = OpLoad %5 %13
%18 = OpAccessChain %12 %17 %19
%20 = OpLoad %5 %18
%21 = OpShiftLeftLogical %5 %20 %22
%23 = OpIAdd %5 %21 %14
%24 = OpShiftLeftLogical %5 %23 %25
%27 = OpImageRead %26 %11 %23
%28 = OpCompositeExtract %5 %27 0
%29 = OpGroupNonUniformQuadBroadcast %5 %30 %28 %31
%32 = OpBitwiseAnd %5 %28 %30
%34 = OpLoad %5 %33
%35 = OpBitwiseAnd %5 %34 %36
%37 = OpIAdd %5 %35 %32
%38 = OpGroupNonUniformShuffle %5 %30 %28 %37
%39 = OpShiftLeftLogical %5 %23 %30
%40 = OpIMul %5 %23 %25
%41 = OpCompositeConstruct %26 %29 %29 %29 %29
OpImageWrite %10 %40 %41
%42 = OpCompositeConstruct %26 %38 %38 %38 %38
%43 = OpIAdd %5 %40 %31
OpImageWrite %10 %43 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-read-at-2d.sm66.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_shuffle : require
layout(local_size_x = 8, local_size_y = 4, local_size_z = 2) in;

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;

void main()
{
    uint _21 = (gl_WorkGroupID.x << 6u) + gl_LocalInvocationIndex;
    uvec4 _26 = imageLoad(_8, int(_21));
    uint _27 = _26.x;
    uint _40 = _21 * 2u;
    imageStore(_9, int(_40), uvec4(subgroupQuadBroadcast(_27, 1u)));
    imageStore(_9, int(_40 + 1u), uvec4(subgroupShuffle(_27, (gl_SubgroupInvocationID & 4294967292u) + (_27 & 3u))));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformShuffle
OpCapability GroupNonUniformQuad
OpCapability ComputeDerivativeGroupQuadsKHR
OpExtension "SPV_KHR_compute_shader_derivatives"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %11 %15 %32
OpExecutionMode %3 DerivativeGroupQuadsKHR
OpExecutionMode %3 LocalSize 8 4 2
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %11 BuiltIn LocalInvocationIndex
OpDecorate %15 BuiltIn WorkgroupId
OpDecorate %32 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%13 = OpTypeVector %5 3
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%17 = OpConstant %5 0
%20 = OpConstant %5 6
%23 = OpConstant %5 2
%25 = OpTypeVector %5 4
%29 = OpConstant %5 3
%30 = OpConstant %5 1
%32 = OpVariable %10 Input
%35 = OpConstant %5 4294967292
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%12 = OpLoad %5 %11
%16 = OpAccessChain %10 %15 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%21 = OpIAdd %5 %19 %12
%22 = OpShiftLeftLogical %5 %21 %23
%24 = OpLoad %6 %8
%26 = OpImageRead %25 %24 %21
%27 = OpCompositeExtract %5 %26 0
%28 = OpGroupNonUniformQuadBroadcast %5 %29 %27 %30
%31 = OpBitwiseAnd %5 %27 %29
%33 = OpLoad %5 %32
%34 = OpBitwiseAnd %5 %33 %35
%36 = OpIAdd %5 %34 %31
%37 = OpGroupNonUniformShuffle %5 %29 %27 %36
%38 = OpShiftLeftLogical %5 %21 %29
%39 = OpLoad %6 %9
%40 = OpIMul %5 %21 %23
%41 = OpCompositeConstruct %25 %28 %28 %28 %28
OpImageWrite %39 %40 %41
%42 = OpCompositeConstruct %25 %37 %37 %37 %37
%43 = OpIAdd %5 %40 %30
OpImageWrite %39 %43 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-read-at.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_shuffle : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;

void main()
{
    uvec4 _22 = imageLoad(_8, int(gl_GlobalInvocationID.x));
    uint _23 = _22.x;
    uint _35 = gl_GlobalInvocationID.x * 2u;
    imageStore(_9, int(_35), uvec4(subgroupQuadBroadcast(_23, 1u)));
    imageStore(_9, int(_35 + 1u), uvec4(subgroupShuffle(_23, (gl_SubgroupInvocationID & 4294967292u) + (_23 & 3u))));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformShuffle
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14 %28
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %14 BuiltIn GlobalInvocationId
OpDecorate %28 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 2
%21 = OpTypeVector %5 4
%25 = OpConstant %5 3
%26 = OpConstant %5 1
%28 = OpVariable %15 Input
%31 = OpConstant %5 4294967292
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%10 = OpLoad %6 %9
%11 = OpLoad %6 %8
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpImageRead %21 %11 %18
%23 = OpCompositeExtract %5 %22 0
%24 = OpGroupNonUniformQuadBroadcast %5 %25 %23 %26
%27 = OpBitwiseAnd %5 %23 %25
%29 = OpLoad %5 %28
%30 = OpBitwiseAnd %5 %29 %31
%32 = OpIAdd %5 %30 %27
%33 = OpGroupNonUniformShuffle %5 %25 %23 %32
%34 = OpShiftLeftLogical %5 %18 %25
%35 = OpIMul %5 %18 %20
%36 = OpCompositeConstruct %21 %24 %24 %24 %24
OpImageWrite %10 %35 %36
%37 = OpCompositeConstruct %21 %33 %33 %33 %33
%38 = OpIAdd %5 %35 %26
OpImageWrite %10 %38 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-read-at.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_shuffle : require

layout(location = 0) in float V;
layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec2 SV_Target;

void main()
{
    SV_Target.x = subgroupQuadBroadcast(V, 2u);
    SV_Target.y = subgroupShuffle(V, (gl_SubgroupInvocationID & 4294967292u) + INDEX);
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniform
OpCapability GroupNonUniformShuffle
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10 %13 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "V"
OpName %10 "INDEX"
OpName %13 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %10 Flat
OpDecorate %10 Location 1
OpDecorate %13 Location 0
OpDecorate %19 BuiltIn SubgroupLocalInvocationId
OpDecorate %19 Flat
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeInt 32 0
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpTypeVector %5 2
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%17 = OpConstant %8 3
%18 = OpConstant %8 2
%19 = OpVariable %9 Input
%22 = OpConstant %8 4294967292
%25 = OpTypePointer Output %5
%27 = OpConstant %8 0
%29 = OpConstant %8 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%14 = OpLoad %8 %10
%15 = OpLoad %5 %7
%16 = OpGroupNonUniformQuadBroadcast %5 %17 %15 %18
%20 = OpLoad %8 %19
%21 = OpBitwiseAnd %8 %20 %22
%23 = OpIAdd %8 %21 %14
%24 = OpGroupNonUniformShuffle %5 %17 %15 %23
%26 = OpAccessChain %25 %13 %27
OpStore %26 %16
%28 = OpAccessChain %25 %13 %29
OpStore %28 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-swap.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;

void main()
{
    uvec4 _22 = imageLoad(_8, int(gl_GlobalInvocationID.x));
    uint _23 = _22.x;
    uint _31 = gl_GlobalInvocationID.x * 4u;
    imageStore(_9, int(_31), uvec4(_23));
    imageStore(_9, int(_31 + 1u), uvec4(subgroupQuadSwapHorizontal(_23)));
    imageStore(_9, int(_31 + 2u), uvec4(subgroupQuadSwapVertical(_23)));
    imageStore(_9, int(_31 + 3u), uvec4(subgroupQuadSwapDiagonal(_23)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 2
%21 = OpTypeVector %5 4
%25 = OpConstant %5 3
%27 = OpConstant %5 1
%30 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%10 = OpLoad %6 %9
%11 = OpLoad %6 %8
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpImageRead %21 %11 %18
%23 = OpCompositeExtract %5 %22 0
%24 = OpGroupNonUniformQuadSwap %5 %25 %23 %17
%26 = OpGroupNonUniformQuadSwap %5 %25 %23 %27
%28 = OpGroupNonUniformQuadSwap %5 %25 %23 %20
%29 = OpShiftLeftLogical %5 %18 %30
%31 = OpIMul %5 %18 %30
%32 = OpCompositeConstruct %21 %23 %23 %23 %23
OpImageWrite %10 %31 %32
%33 = OpCompositeConstruct %21 %24 %24 %24 %24
%34 = OpIAdd %5 %31 %27
OpImageWrite %10 %34 %33
%35 = OpCompositeConstruct %21 %26 %26 %26 %26
%36 = OpIAdd %5 %31 %20
OpImageWrite %10 %36 %35
%37 = OpCompositeConstruct %21 %28 %28 %28 %28
%38 = OpIAdd %5 %31 %25
OpImageWrite %10 %38 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/quad-swap.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_quad : require

layout(location = 0) in float A;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = A;
    SV_Target.y = subgroupQuadSwapHorizontal(A);
    SV_Target.z = subgroupQuadSwapVertical(A);
    SV_Target.w = subgroupQuadSwapDiagonal(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %10 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%13 = OpTypeInt 32 0
%14 = OpConstant %13 3
%15 = OpConstant %13 0
%17 = OpConstant %13 1
%19 = OpConstant %13 2
%20 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%11 = OpLoad %5 %7
%12 = OpGroupNonUniformQuadSwap %5 %14 %11 %15
%16 = OpGroupNonUniformQuadSwap %5 %14 %11 %17
%18 = OpGroupNonUniformQuadSwap %5 %14 %11 %19
%21 = OpAccessChain %20 %10 %15
OpStore %21 %11
%22 = OpAccessChain %20 %10 %17
OpStore %22 %12
%23 = OpAccessChain %20 %10 %19
OpStore %23 %16
%24 = OpAccessChain %20 %10 %14
OpStore %24 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/raw-gather-offset-sparse.sm67.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_ARB_sparse_texture2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct SparseTexel
{
    uint _m0;
    uvec4 _m1;
};

struct _54
{
    uint16_t _m0;
    uint16_t _m1;
    uint16_t _m2;
    uint16_t _m3;
    uint _m4;
};

struct _80
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
};

struct _118
{
    uint64_t _m0;
    uint64_t _m1;
    uint64_t _m2;
    uint64_t _m3;
    uint _m4;
};

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _14;

layout(set = 0, binding = 0, std430) writeonly buffer _17_19
{
    uvec4 _m0[];
} _19;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    u64vec4 _m0[];
} _25;

layout(set = 0, binding = 0) uniform mediump utexture2D _8;
layout(set = 0, binding = 1) uniform utexture2D _9;
layout(set = 0, binding = 2) uniform utexture2D _10;
layout(set = 0, binding = 0) uniform sampler _28;

void main()
{
    uint _147;
    uvec4 _148;
    _147 = sparseTextureGatherOffsetARB(usampler2D(_8, _28), vec2(0.5), ivec2(-3, -2), _148);
    SparseTexel _42 = SparseTexel(_147, _148);
    u16vec4 _49 = u16vec4(_42._m1);
    _54 _55 = _54(_49.x, _49.y, _49.z, _49.w, _42._m0);
    uint _65 = uint(_55._m0);
    uint _66 = uint(_55._m1);
    uint _149;
    uvec4 _150;
    _149 = sparseTextureGatherOffsetARB(usampler2D(_9, _28), vec2(0.5), ivec2(int(_65), int(_66)), _150);
    SparseTexel _72 = SparseTexel(_149, _150);
    uvec4 _75 = _72._m1;
    _80 _81 = _80(_75.x, _75.y, _75.z, _75.w, _72._m0);
    vec2 _91 = vec2(0.5);
    uint _151;
    uvec4 _152;
    _151 = sparseTextureGatherOffsetARB(usampler2D(_10, _28), _91, ivec2(-3, -2), _152);
    SparseTexel _92 = SparseTexel(_151, _152);
    uvec4 _93 = textureGatherOffset(usampler2D(_10, _28), _91, ivec2(-3, -2), int(1u));
    uvec4 _94 = _92._m1;
    u64vec4 _112 = u64vec4(packUint2x32(uvec2(_94.x, _93.x)), packUint2x32(uvec2(_94.y, _93.y)), packUint2x32(uvec2(_94.z, _93.z)), packUint2x32(uvec2(_94.w, _93.w)));
    _118 _119 = _118(_112.x, _112.y, _112.z, _112.w, _92._m0);
    _19._m0[0u] = uvec4(_65, _66, uint(_55._m2), uint(_55._m3));
    _19._m0[1u] = uvec4(_81._m0, _81._m1, _81._m2, _81._m3);
    _25._m0[1u] = u64vec4(_119._m0, _119._m1, _119._m2, _119._m3);
    _14._m0[16u] = uint(sparseTexelsResidentARB(int(_55._m4)));
    uint _141 = 16u + 1u;
    _14._m0[_141] = uint(sparseTexelsResidentARB(int(_81._m4)));
    uint _143 = 16u + 2u;
    _14._m0[_143] = uint(sparseTexelsResidentARB(int(_119._m4)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 147
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int16
OpCapability ImageGatherExtended
OpCapability SparseResidency
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %12 "SSBO"
OpName %17 "SSBO"
OpName %23 "SSBO"
OpName %41 "SparseTexel"
OpName %54 ""
OpName %80 ""
OpName %118 ""
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %22 ArrayStride 32
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonReadable
OpDecorate %14 Aliased
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %19 NonReadable
OpDecorate %19 Aliased
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %25 Aliased
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeRuntimeArray %5
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 4
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeInt 64 0
%21 = OpTypeVector %20 4
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeSampler
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%31 = OpTypeSampledImage %6
%33 = OpTypeFloat 32
%34 = OpConstant %33 0.5
%35 = OpTypeVector %33 2
%37 = OpTypeInt 32 1
%38 = OpConstant %37 -3
%39 = OpConstant %37 -2
%40 = OpConstant %5 0
%41 = OpTypeStruct %5 %15
%43 = OpTypeVector %37 2
%44 = OpConstantComposite %43 %38 %39
%47 = OpTypeInt 16 0
%48 = OpTypeVector %47 4
%54 = OpTypeStruct %47 %47 %47 %47 %5
%61 = OpTypeBool
%63 = OpConstant %5 1
%80 = OpTypeStruct %5 %5 %5 %5 %5
%97 = OpTypeVector %5 2
%118 = OpTypeStruct %20 %20 %20 %20 %5
%130 = OpTypePointer StorageBuffer %15
%135 = OpTypePointer StorageBuffer %21
%137 = OpConstant %5 16
%138 = OpTypePointer StorageBuffer %5
%144 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %145
%145 = OpLabel
%29 = OpLoad %6 %8
%30 = OpLoad %26 %28
%32 = OpSampledImage %31 %29 %30
%36 = OpCompositeConstruct %35 %34 %34
%42 = OpImageSparseGather %41 %32 %36 %40 ConstOffset %44
%45 = OpCompositeExtract %5 %42 0
%46 = OpCompositeExtract %15 %42 1
%49 = OpUConvert %48 %46
%50 = OpCompositeExtract %47 %49 0
%51 = OpCompositeExtract %47 %49 1
%52 = OpCompositeExtract %47 %49 2
%53 = OpCompositeExtract %47 %49 3
%55 = OpCompositeConstruct %54 %50 %51 %52 %53 %45
%56 = OpCompositeExtract %47 %55 0
%57 = OpCompositeExtract %47 %55 1
%58 = OpCompositeExtract %47 %55 2
%59 = OpCompositeExtract %47 %55 3
%60 = OpCompositeExtract %5 %55 4
%62 = OpImageSparseTexelsResident %61 %60
%64 = OpSelect %5 %62 %63 %40
%65 = OpUConvert %5 %56
%66 = OpUConvert %5 %57
%67 = OpLoad %6 %9
%68 = OpSampledImage %31 %67 %30
%69 = OpCompositeConstruct %35 %34 %34
%70 = OpBitcast %37 %65
%71 = OpBitcast %37 %66
%73 = OpCompositeConstruct %43 %70 %71
%72 = OpImageSparseGather %41 %68 %69 %40 Offset %73
%74 = OpCompositeExtract %5 %72 0
%75 = OpCompositeExtract %15 %72 1
%76 = OpCompositeExtract %5 %75 0
%77 = OpCompositeExtract %5 %75 1
%78 = OpCompositeExtract %5 %75 2
%79 = OpCompositeExtract %5 %75 3
%81 = OpCompositeConstruct %80 %76 %77 %78 %79 %74
%82 = OpCompositeExtract %5 %81 0
%83 = OpCompositeExtract %5 %81 1
%84 = OpCompositeExtract %5 %81 2
%85 = OpCompositeExtract %5 %81 3
%86 = OpCompositeExtract %5 %81 4
%87 = OpImageSparseTexelsResident %61 %86
%88 = OpSelect %5 %87 %63 %40
%89 = OpLoad %6 %10
%90 = OpSampledImage %31 %89 %30
%91 = OpCompositeConstruct %35 %34 %34
%92 = OpImageSparseGather %41 %90 %91 %40 ConstOffset %44
%93 = OpImageGather %15 %90 %91 %63 ConstOffset %44
%94 = OpCompositeExtract %15 %92 1
%95 = OpCompositeExtract %5 %94 0
%96 = OpCompositeExtract %5 %93 0
%98 = OpCompositeConstruct %97 %95 %96
%99 = OpBitcast %20 %98
%100 = OpCompositeExtract %5 %94 1
%101 = OpCompositeExtract %5 %93 1
%102 = OpCompositeConstruct %97 %100 %101
%103 = OpBitcast %20 %102
%104 = OpCompositeExtract %5 %94 2
%105 = OpCompositeExtract %5 %93 2
%106 = OpCompositeConstruct %97 %104 %105
%107 = OpBitcast %20 %106
%108 = OpCompositeExtract %5 %94 3
%109 = OpCompositeExtract %5 %93 3
%110 = OpCompositeConstruct %97 %108 %109
%111 = OpBitcast %20 %110
%112 = OpCompositeConstruct %21 %99 %103 %107 %111
%113 = OpCompositeExtract %5 %92 0
%114 = OpCompositeExtract %20 %112 0
%115 = OpCompositeExtract %20 %112 1
%116 = OpCompositeExtract %20 %112 2
%117 = OpCompositeExtract %20 %112 3
%119 = OpCompositeConstruct %118 %114 %115 %116 %117 %113
%120 = OpCompositeExtract %20 %119 0
%121 = OpCompositeExtract %20 %119 1
%122 = OpCompositeExtract %20 %119 2
%123 = OpCompositeExtract %20 %119 3
%124 = OpCompositeExtract %5 %119 4
%125 = OpImageSparseTexelsResident %61 %124
%126 = OpSelect %5 %125 %63 %40
%127 = OpUConvert %5 %58
%128 = OpUConvert %5 %59
%129 = OpCompositeConstruct %15 %65 %66 %127 %128
%131 = OpAccessChain %130 %19 %40 %40
OpStore %131 %129
%132 = OpCompositeConstruct %15 %82 %83 %84 %85
%133 = OpAccessChain %130 %19 %40 %63
OpStore %133 %132
%134 = OpCompositeConstruct %21 %120 %121 %122 %123
%136 = OpAccessChain %135 %25 %40 %63
OpStore %136 %134
%139 = OpAccessChain %138 %14 %40 %137
OpStore %139 %64
%141 = OpIAdd %5 %137 %63
%140 = OpAccessChain %138 %14 %40 %141
OpStore %140 %88
%143 = OpIAdd %5 %137 %144
%142 = OpAccessChain %138 %14 %40 %143
OpStore %142 %126
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/raw-gather-offset.sm67.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uvec4 _m0[];
} _15;

layout(set = 0, binding = 0, std430) writeonly buffer _19_21
{
    u64vec4 _m0[];
} _21;

layout(set = 0, binding = 0) uniform mediump utexture2D _8;
layout(set = 0, binding = 1) uniform utexture2D _9;
layout(set = 0, binding = 2) uniform utexture2D _10;
layout(set = 0, binding = 0) uniform sampler _24;

void main()
{
    u16vec4 _42 = u16vec4(textureGatherOffset(usampler2D(_8, _24), vec2(0.5), ivec2(1, 2)));
    uint _47 = uint(_42.x);
    uint _48 = uint(_42.y);
    vec2 _62 = vec2(0.5);
    uvec4 _65 = textureGatherOffset(usampler2D(_10, _24), _62, ivec2(4, 5));
    uvec4 _67 = textureGatherOffset(usampler2D(_10, _24), _62, ivec2(4, 5), int(1u));
    _15._m0[0u] = uvec4(_47, _48, uint(_42.z), uint(_42.w));
    _15._m0[1u] = uvec4(textureGatherOffset(usampler2D(_9, _24), vec2(0.5), ivec2(int(_47), int(_48))));
    _21._m0[1u] = u64vec4(u64vec4(packUint2x32(uvec2(_65.x, _67.x)), packUint2x32(uvec2(_65.y, _67.y)), packUint2x32(uvec2(_65.z, _67.z)), packUint2x32(uvec2(_65.w, _67.w))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int16
OpCapability ImageGatherExtended
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %13 "SSBO"
OpName %19 "SSBO"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %12 ArrayStride 16
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %18 ArrayStride 32
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonReadable
OpDecorate %15 Aliased
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %21 Aliased
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeVector %5 4
%12 = OpTypeRuntimeArray %11
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeInt 64 0
%17 = OpTypeVector %16 4
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeSampler
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%27 = OpTypeSampledImage %6
%29 = OpTypeFloat 32
%30 = OpConstant %29 0.5
%31 = OpTypeVector %29 2
%33 = OpTypeInt 32 1
%34 = OpConstant %33 1
%35 = OpConstant %33 2
%36 = OpConstant %5 0
%38 = OpTypeVector %33 2
%39 = OpConstantComposite %38 %34 %35
%40 = OpTypeInt 16 0
%41 = OpTypeVector %40 4
%63 = OpConstant %33 4
%64 = OpConstant %33 5
%66 = OpConstantComposite %38 %63 %64
%68 = OpConstant %5 1
%71 = OpTypeVector %5 2
%94 = OpTypePointer StorageBuffer %11
%99 = OpTypePointer StorageBuffer %17
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %101
%101 = OpLabel
%25 = OpLoad %6 %8
%26 = OpLoad %22 %24
%28 = OpSampledImage %27 %25 %26
%32 = OpCompositeConstruct %31 %30 %30
%37 = OpImageGather %11 %28 %32 %36 ConstOffset %39
%42 = OpUConvert %41 %37
%43 = OpCompositeExtract %40 %42 0
%44 = OpCompositeExtract %40 %42 1
%45 = OpCompositeExtract %40 %42 2
%46 = OpCompositeExtract %40 %42 3
%47 = OpUConvert %5 %43
%48 = OpUConvert %5 %44
%49 = OpLoad %6 %9
%50 = OpSampledImage %27 %49 %26
%51 = OpCompositeConstruct %31 %30 %30
%52 = OpBitcast %33 %47
%53 = OpBitcast %33 %48
%55 = OpCompositeConstruct %38 %52 %53
%54 = OpImageGather %11 %50 %51 %36 Offset %55
%56 = OpCompositeExtract %5 %54 0
%57 = OpCompositeExtract %5 %54 1
%58 = OpCompositeExtract %5 %54 2
%59 = OpCompositeExtract %5 %54 3
%60 = OpLoad %6 %10
%61 = OpSampledImage %27 %60 %26
%62 = OpCompositeConstruct %31 %30 %30
%65 = OpImageGather %11 %61 %62 %36 ConstOffset %66
%67 = OpImageGather %11 %61 %62 %68 ConstOffset %66
%69 = OpCompositeExtract %5 %65 0
%70 = OpCompositeExtract %5 %67 0
%72 = OpCompositeConstruct %71 %69 %70
%73 = OpBitcast %16 %72
%74 = OpCompositeExtract %5 %65 1
%75 = OpCompositeExtract %5 %67 1
%76 = OpCompositeConstruct %71 %74 %75
%77 = OpBitcast %16 %76
%78 = OpCompositeExtract %5 %65 2
%79 = OpCompositeExtract %5 %67 2
%80 = OpCompositeConstruct %71 %78 %79
%81 = OpBitcast %16 %80
%82 = OpCompositeExtract %5 %65 3
%83 = OpCompositeExtract %5 %67 3
%84 = OpCompositeConstruct %71 %82 %83
%85 = OpBitcast %16 %84
%86 = OpCompositeConstruct %17 %73 %77 %81 %85
%87 = OpCompositeExtract %16 %86 0
%88 = OpCompositeExtract %16 %86 1
%89 = OpCompositeExtract %16 %86 2
%90 = OpCompositeExtract %16 %86 3
%91 = OpUConvert %5 %45
%92 = OpUConvert %5 %46
%93 = OpCompositeConstruct %11 %47 %48 %91 %92
%95 = OpAccessChain %94 %15 %36 %36
OpStore %95 %93
%96 = OpCompositeConstruct %11 %56 %57 %58 %59
%97 = OpAccessChain %94 %15 %36 %68
OpStore %97 %96
%98 = OpCompositeConstruct %17 %87 %88 %89 %90
%100 = OpAccessChain %99 %21 %36 %68
OpStore %100 %98
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/raw-gather-sparse.sm67.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_ARB_sparse_texture2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct SparseTexel
{
    uint _m0;
    uvec4 _m1;
};

struct _49
{
    uint16_t _m0;
    uint16_t _m1;
    uint16_t _m2;
    uint16_t _m3;
    uint _m4;
};

struct _70
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
};

struct _108
{
    uint64_t _m0;
    uint64_t _m1;
    uint64_t _m2;
    uint64_t _m3;
    uint _m4;
};

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _14;

layout(set = 0, binding = 0, std430) writeonly buffer _17_19
{
    uvec4 _m0[];
} _19;

layout(set = 0, binding = 0, std430) writeonly buffer _23_25
{
    u64vec4 _m0[];
} _25;

layout(set = 0, binding = 0) uniform mediump utexture2D _8;
layout(set = 0, binding = 1) uniform utexture2D _9;
layout(set = 0, binding = 2) uniform utexture2D _10;
layout(set = 0, binding = 0) uniform sampler _28;

void main()
{
    uint _139;
    uvec4 _140;
    _139 = sparseTextureGatherARB(usampler2D(_8, _28), vec2(0.5), _140);
    SparseTexel _39 = SparseTexel(_139, _140);
    u16vec4 _44 = u16vec4(_39._m1);
    _49 _50 = _49(_44.x, _44.y, _44.z, _44.w, _39._m0);
    uint _141;
    uvec4 _142;
    _141 = sparseTextureGatherARB(usampler2D(_9, _28), vec2(0.5), _142);
    SparseTexel _63 = SparseTexel(_141, _142);
    uvec4 _65 = _63._m1;
    _70 _71 = _70(_65.x, _65.y, _65.z, _65.w, _63._m0);
    vec2 _81 = vec2(0.5);
    uint _143;
    uvec4 _144;
    _143 = sparseTextureGatherARB(usampler2D(_10, _28), _81, _144);
    SparseTexel _82 = SparseTexel(_143, _144);
    uvec4 _83 = textureGather(usampler2D(_10, _28), _81, int(1u));
    uvec4 _84 = _82._m1;
    u64vec4 _102 = u64vec4(packUint2x32(uvec2(_84.x, _83.x)), packUint2x32(uvec2(_84.y, _83.y)), packUint2x32(uvec2(_84.z, _83.z)), packUint2x32(uvec2(_84.w, _83.w)));
    _108 _109 = _108(_102.x, _102.y, _102.z, _102.w, _82._m0);
    _19._m0[0u] = uvec4(uint(_50._m0), uint(_50._m1), uint(_50._m2), uint(_50._m3));
    _19._m0[1u] = uvec4(_71._m0, _71._m1, _71._m2, _71._m3);
    _25._m0[1u] = u64vec4(_109._m0, _109._m1, _109._m2, _109._m3);
    _14._m0[16u] = uint(sparseTexelsResidentARB(int(_50._m4)));
    uint _133 = 16u + 1u;
    _14._m0[_133] = uint(sparseTexelsResidentARB(int(_71._m4)));
    uint _135 = 16u + 2u;
    _14._m0[_135] = uint(sparseTexelsResidentARB(int(_109._m4)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 139
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int16
OpCapability SparseResidency
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %12 "SSBO"
OpName %17 "SSBO"
OpName %23 "SSBO"
OpName %38 "SparseTexel"
OpName %49 ""
OpName %70 ""
OpName %108 ""
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %22 ArrayStride 32
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonReadable
OpDecorate %14 Aliased
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %19 NonReadable
OpDecorate %19 Aliased
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %25 Aliased
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeRuntimeArray %5
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 4
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeInt 64 0
%21 = OpTypeVector %20 4
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeSampler
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%31 = OpTypeSampledImage %6
%33 = OpTypeFloat 32
%34 = OpConstant %33 0.5
%35 = OpTypeVector %33 2
%37 = OpConstant %5 0
%38 = OpTypeStruct %5 %15
%42 = OpTypeInt 16 0
%43 = OpTypeVector %42 4
%49 = OpTypeStruct %42 %42 %42 %42 %5
%56 = OpTypeBool
%58 = OpConstant %5 1
%70 = OpTypeStruct %5 %5 %5 %5 %5
%87 = OpTypeVector %5 2
%108 = OpTypeStruct %20 %20 %20 %20 %5
%122 = OpTypePointer StorageBuffer %15
%127 = OpTypePointer StorageBuffer %21
%129 = OpConstant %5 16
%130 = OpTypePointer StorageBuffer %5
%136 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %137
%137 = OpLabel
%29 = OpLoad %6 %8
%30 = OpLoad %26 %28
%32 = OpSampledImage %31 %29 %30
%36 = OpCompositeConstruct %35 %34 %34
%39 = OpImageSparseGather %38 %32 %36 %37
%40 = OpCompositeExtract %5 %39 0
%41 = OpCompositeExtract %15 %39 1
%44 = OpUConvert %43 %41
%45 = OpCompositeExtract %42 %44 0
%46 = OpCompositeExtract %42 %44 1
%47 = OpCompositeExtract %42 %44 2
%48 = OpCompositeExtract %42 %44 3
%50 = OpCompositeConstruct %49 %45 %46 %47 %48 %40
%51 = OpCompositeExtract %42 %50 0
%52 = OpCompositeExtract %42 %50 1
%53 = OpCompositeExtract %42 %50 2
%54 = OpCompositeExtract %42 %50 3
%55 = OpCompositeExtract %5 %50 4
%57 = OpImageSparseTexelsResident %56 %55
%59 = OpSelect %5 %57 %58 %37
%60 = OpLoad %6 %9
%61 = OpSampledImage %31 %60 %30
%62 = OpCompositeConstruct %35 %34 %34
%63 = OpImageSparseGather %38 %61 %62 %37
%64 = OpCompositeExtract %5 %63 0
%65 = OpCompositeExtract %15 %63 1
%66 = OpCompositeExtract %5 %65 0
%67 = OpCompositeExtract %5 %65 1
%68 = OpCompositeExtract %5 %65 2
%69 = OpCompositeExtract %5 %65 3
%71 = OpCompositeConstruct %70 %66 %67 %68 %69 %64
%72 = OpCompositeExtract %5 %71 0
%73 = OpCompositeExtract %5 %71 1
%74 = OpCompositeExtract %5 %71 2
%75 = OpCompositeExtract %5 %71 3
%76 = OpCompositeExtract %5 %71 4
%77 = OpImageSparseTexelsResident %56 %76
%78 = OpSelect %5 %77 %58 %37
%79 = OpLoad %6 %10
%80 = OpSampledImage %31 %79 %30
%81 = OpCompositeConstruct %35 %34 %34
%82 = OpImageSparseGather %38 %80 %81 %37
%83 = OpImageGather %15 %80 %81 %58
%84 = OpCompositeExtract %15 %82 1
%85 = OpCompositeExtract %5 %84 0
%86 = OpCompositeExtract %5 %83 0
%88 = OpCompositeConstruct %87 %85 %86
%89 = OpBitcast %20 %88
%90 = OpCompositeExtract %5 %84 1
%91 = OpCompositeExtract %5 %83 1
%92 = OpCompositeConstruct %87 %90 %91
%93 = OpBitcast %20 %92
%94 = OpCompositeExtract %5 %84 2
%95 = OpCompositeExtract %5 %83 2
%96 = OpCompositeConstruct %87 %94 %95
%97 = OpBitcast %20 %96
%98 = OpCompositeExtract %5 %84 3
%99 = OpCompositeExtract %5 %83 3
%100 = OpCompositeConstruct %87 %98 %99
%101 = OpBitcast %20 %100
%102 = OpCompositeConstruct %21 %89 %93 %97 %101
%103 = OpCompositeExtract %5 %82 0
%104 = OpCompositeExtract %20 %102 0
%105 = OpCompositeExtract %20 %102 1
%106 = OpCompositeExtract %20 %102 2
%107 = OpCompositeExtract %20 %102 3
%109 = OpCompositeConstruct %108 %104 %105 %106 %107 %103
%110 = OpCompositeExtract %20 %109 0
%111 = OpCompositeExtract %20 %109 1
%112 = OpCompositeExtract %20 %109 2
%113 = OpCompositeExtract %20 %109 3
%114 = OpCompositeExtract %5 %109 4
%115 = OpImageSparseTexelsResident %56 %114
%116 = OpSelect %5 %115 %58 %37
%117 = OpUConvert %5 %51
%118 = OpUConvert %5 %52
%119 = OpUConvert %5 %53
%120 = OpUConvert %5 %54
%121 = OpCompositeConstruct %15 %117 %118 %119 %120
%123 = OpAccessChain %122 %19 %37 %37
OpStore %123 %121
%124 = OpCompositeConstruct %15 %72 %73 %74 %75
%125 = OpAccessChain %122 %19 %37 %58
OpStore %125 %124
%126 = OpCompositeConstruct %21 %110 %111 %112 %113
%128 = OpAccessChain %127 %25 %37 %58
OpStore %128 %126
%131 = OpAccessChain %130 %14 %37 %129
OpStore %131 %59
%133 = OpIAdd %5 %129 %58
%132 = OpAccessChain %130 %14 %37 %133
OpStore %132 %78
%135 = OpIAdd %5 %129 %136
%134 = OpAccessChain %130 %14 %37 %135
OpStore %134 %116
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/raw-gather.sm67.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uvec4 _m0[];
} _15;

layout(set = 0, binding = 0, std430) writeonly buffer _19_21
{
    u64vec4 _m0[];
} _21;

layout(set = 0, binding = 0) uniform mediump utexture2D _8;
layout(set = 0, binding = 1) uniform utexture2D _9;
layout(set = 0, binding = 2) uniform utexture2D _10;
layout(set = 0, binding = 0) uniform sampler _24;

void main()
{
    u16vec4 _37 = u16vec4(textureGather(usampler2D(_8, _24), vec2(0.5)));
    vec2 _52 = vec2(0.5);
    uvec4 _53 = textureGather(usampler2D(_10, _24), _52);
    uvec4 _54 = textureGather(usampler2D(_10, _24), _52, int(1u));
    _15._m0[0u] = uvec4(uint(_37.x), uint(_37.y), uint(_37.z), uint(_37.w));
    _15._m0[1u] = uvec4(textureGather(usampler2D(_9, _24), vec2(0.5)));
    _21._m0[1u] = u64vec4(u64vec4(packUint2x32(uvec2(_53.x, _54.x)), packUint2x32(uvec2(_53.y, _54.y)), packUint2x32(uvec2(_53.z, _54.z)), packUint2x32(uvec2(_53.w, _54.w))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 92
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int16
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %13 "SSBO"
OpName %19 "SSBO"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %12 ArrayStride 16
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %18 ArrayStride 32
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonReadable
OpDecorate %15 Aliased
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %21 Aliased
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeVector %5 4
%12 = OpTypeRuntimeArray %11
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeInt 64 0
%17 = OpTypeVector %16 4
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeSampler
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%27 = OpTypeSampledImage %6
%29 = OpTypeFloat 32
%30 = OpConstant %29 0.5
%31 = OpTypeVector %29 2
%33 = OpConstant %5 0
%35 = OpTypeInt 16 0
%36 = OpTypeVector %35 4
%55 = OpConstant %5 1
%58 = OpTypeVector %5 2
%83 = OpTypePointer StorageBuffer %11
%88 = OpTypePointer StorageBuffer %17
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %90
%90 = OpLabel
%25 = OpLoad %6 %8
%26 = OpLoad %22 %24
%28 = OpSampledImage %27 %25 %26
%32 = OpCompositeConstruct %31 %30 %30
%34 = OpImageGather %11 %28 %32 %33
%37 = OpUConvert %36 %34
%38 = OpCompositeExtract %35 %37 0
%39 = OpCompositeExtract %35 %37 1
%40 = OpCompositeExtract %35 %37 2
%41 = OpCompositeExtract %35 %37 3
%42 = OpLoad %6 %9
%43 = OpSampledImage %27 %42 %26
%44 = OpCompositeConstruct %31 %30 %30
%45 = OpImageGather %11 %43 %44 %33
%46 = OpCompositeExtract %5 %45 0
%47 = OpCompositeExtract %5 %45 1
%48 = OpCompositeExtract %5 %45 2
%49 = OpCompositeExtract %5 %45 3
%50 = OpLoad %6 %10
%51 = OpSampledImage %27 %50 %26
%52 = OpCompositeConstruct %31 %30 %30
%53 = OpImageGather %11 %51 %52 %33
%54 = OpImageGather %11 %51 %52 %55
%56 = OpCompositeExtract %5 %53 0
%57 = OpCompositeExtract %5 %54 0
%59 = OpCompositeConstruct %58 %56 %57
%60 = OpBitcast %16 %59
%61 = OpCompositeExtract %5 %53 1
%62 = OpCompositeExtract %5 %54 1
%63 = OpCompositeConstruct %58 %61 %62
%64 = OpBitcast %16 %63
%65 = OpCompositeExtract %5 %53 2
%66 = OpCompositeExtract %5 %54 2
%67 = OpCompositeConstruct %58 %65 %66
%68 = OpBitcast %16 %67
%69 = OpCompositeExtract %5 %53 3
%70 = OpCompositeExtract %5 %54 3
%71 = OpCompositeConstruct %58 %69 %70
%72 = OpBitcast %16 %71
%73 = OpCompositeConstruct %17 %60 %64 %68 %72
%74 = OpCompositeExtract %16 %73 0
%75 = OpCompositeExtract %16 %73 1
%76 = OpCompositeExtract %16 %73 2
%77 = OpCompositeExtract %16 %73 3
%78 = OpUConvert %5 %38
%79 = OpUConvert %5 %39
%80 = OpUConvert %5 %40
%81 = OpUConvert %5 %41
%82 = OpCompositeConstruct %11 %78 %79 %80 %81
%84 = OpAccessChain %83 %15 %33 %33
OpStore %84 %82
%85 = OpCompositeConstruct %11 %46 %47 %48 %49
%86 = OpAccessChain %83 %15 %33 %55
OpStore %86 %85
%87 = OpCompositeConstruct %17 %74 %75 %76 %77
%89 = OpAccessChain %88 %21 %33 %55
OpStore %89 %87
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query-phi-multi.invalid.sm66.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];

rayQueryEXT RayQueryHeap[2];

void main()
{
    float _33 = float(gl_GlobalInvocationID.x) + 0.5;
    float _35 = float(gl_GlobalInvocationID.y) + 0.5;
    uvec4 _40 = floatBitsToUint(_16._m0[0u]);
    uint _44;
    if (_40.y == 0u)
    {
        _44 = 0u;
    }
    else
    {
        _44 = 1u;
    }
    uint _45 = _40.x;
    rayQueryInitializeEXT(RayQueryHeap[_44], _8[_45], 4u, 255u, vec3(_33, _35, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
    rayQueryInitializeEXT(RayQueryHeap[1u], _8[_45], 4u, 255u, vec3(_33, _35, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 68
; Schema: 0
OpCapability Shader
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %24
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %14 ""
OpName %21 "RayQueryHeap"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %24 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypeArray %12 %10
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeRayQueryKHR
%18 = OpConstant %9 2
%19 = OpTypeArray %17 %18
%20 = OpTypePointer Private %19
%21 = OpVariable %20 Private
%22 = OpTypeVector %9 3
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %9
%27 = OpConstant %9 0
%34 = OpConstant %11 0.5
%36 = OpTypePointer Uniform %12
%39 = OpTypeVector %9 4
%42 = OpTypeBool
%46 = OpTypePointer UniformConstant %5
%49 = OpTypePointer Private %17
%51 = OpConstant %9 4
%52 = OpConstant %9 255
%53 = OpConstant %11 0
%54 = OpConstant %11 -1
%55 = OpConstant %11 1
%56 = OpTypeVector %11 3
%58 = OpConstant %11 0.100000001
%60 = OpConstant %11 1000
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%26 = OpAccessChain %25 %24 %27
%28 = OpLoad %9 %26
%29 = OpAccessChain %25 %24 %10
%30 = OpLoad %9 %29
%31 = OpConvertUToF %11 %28
%32 = OpConvertUToF %11 %30
%33 = OpFAdd %11 %31 %34
%35 = OpFAdd %11 %32 %34
%37 = OpAccessChain %36 %16 %27 %27
%38 = OpLoad %12 %37
%40 = OpBitcast %39 %38
%41 = OpCompositeExtract %9 %40 1
%43 = OpIEqual %42 %41 %27
OpSelectionMerge %66 None
OpBranchConditional %43 %66 %65
%65 = OpLabel
OpBranch %66
%66 = OpLabel
%44 = OpPhi %9 %27 %64 %10 %65
%45 = OpCompositeExtract %9 %40 0
%47 = OpAccessChain %46 %8 %45
%48 = OpLoad %5 %47
%50 = OpInBoundsAccessChain %49 %21 %44
%57 = OpCompositeConstruct %56 %33 %35 %54
%59 = OpCompositeConstruct %56 %53 %53 %55
OpRayQueryInitializeKHR %50 %48 %51 %52 %57 %58 %59 %60
%61 = OpInBoundsAccessChain %49 %21 %10
%62 = OpCompositeConstruct %56 %33 %35 %54
%63 = OpCompositeConstruct %56 %53 %53 %55
OpRayQueryInitializeKHR %61 %48 %51 %52 %62 %58 %63 %60
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query-phi-simple.sm66.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];

rayQueryEXT RayQueryHeap;

void main()
{
    uvec4 _38 = floatBitsToUint(_16._m0[0u]);
    uint _42;
    if (_38.y == 0u)
    {
        _42 = 0u;
    }
    else
    {
        _42 = 0u;
    }
    rayQueryInitializeEXT(RayQueryHeap, _8[_38.x], 4u, 255u, vec3(float(gl_GlobalInvocationID.x) + 0.5, float(gl_GlobalInvocationID.y) + 0.5, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %22
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %14 ""
OpName %19 "RayQueryHeap"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %22 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypeArray %12 %10
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeRayQueryKHR
%18 = OpTypePointer Private %17
%19 = OpVariable %18 Private
%20 = OpTypeVector %9 3
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypePointer Input %9
%25 = OpConstant %9 0
%32 = OpConstant %11 0.5
%34 = OpTypePointer Uniform %12
%37 = OpTypeVector %9 4
%40 = OpTypeBool
%44 = OpTypePointer UniformConstant %5
%47 = OpConstant %9 4
%48 = OpConstant %9 255
%49 = OpConstant %11 0
%50 = OpConstant %11 -1
%51 = OpConstant %11 1
%52 = OpTypeVector %11 3
%54 = OpConstant %11 0.100000001
%56 = OpConstant %11 1000
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %9 %24
%27 = OpAccessChain %23 %22 %10
%28 = OpLoad %9 %27
%29 = OpConvertUToF %11 %26
%30 = OpConvertUToF %11 %28
%31 = OpFAdd %11 %29 %32
%33 = OpFAdd %11 %30 %32
%35 = OpAccessChain %34 %16 %25 %25
%36 = OpLoad %12 %35
%38 = OpBitcast %37 %36
%39 = OpCompositeExtract %9 %38 1
%41 = OpIEqual %40 %39 %25
OpSelectionMerge %59 None
OpBranchConditional %41 %59 %58
%58 = OpLabel
OpBranch %59
%59 = OpLabel
%42 = OpPhi %9 %25 %57 %25 %58
%43 = OpCompositeExtract %9 %38 0
%45 = OpAccessChain %44 %8 %43
%46 = OpLoad %5 %45
%53 = OpCompositeConstruct %52 %31 %33 %50
%55 = OpCompositeConstruct %52 %49 %49 %51
OpRayQueryInitializeKHR %19 %46 %47 %48 %53 %54 %55 %56
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query-select-multi.invalid.sm66.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];

rayQueryEXT RayQueryHeap[2];

void main()
{
    float _33 = float(gl_GlobalInvocationID.x) + 0.5;
    float _35 = float(gl_GlobalInvocationID.y) + 0.5;
    uvec4 _40 = floatBitsToUint(_16._m0[0u]);
    uint _45 = _40.x;
    rayQueryInitializeEXT(RayQueryHeap[uint(_40.y != 0u)], _8[_45], 4u, 255u, vec3(_33, _35, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
    rayQueryInitializeEXT(RayQueryHeap[1u], _8[_45], 4u, 255u, vec3(_33, _35, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
OpCapability Shader
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %24
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %14 ""
OpName %21 "RayQueryHeap"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %24 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypeArray %12 %10
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeRayQueryKHR
%18 = OpConstant %9 2
%19 = OpTypeArray %17 %18
%20 = OpTypePointer Private %19
%21 = OpVariable %20 Private
%22 = OpTypeVector %9 3
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %9
%27 = OpConstant %9 0
%34 = OpConstant %11 0.5
%36 = OpTypePointer Uniform %12
%39 = OpTypeVector %9 4
%42 = OpTypeBool
%46 = OpTypePointer UniformConstant %5
%49 = OpTypePointer Private %17
%51 = OpConstant %9 4
%52 = OpConstant %9 255
%53 = OpConstant %11 0
%54 = OpConstant %11 -1
%55 = OpConstant %11 1
%56 = OpTypeVector %11 3
%58 = OpConstant %11 0.100000001
%60 = OpConstant %11 1000
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%26 = OpAccessChain %25 %24 %27
%28 = OpLoad %9 %26
%29 = OpAccessChain %25 %24 %10
%30 = OpLoad %9 %29
%31 = OpConvertUToF %11 %28
%32 = OpConvertUToF %11 %30
%33 = OpFAdd %11 %31 %34
%35 = OpFAdd %11 %32 %34
%37 = OpAccessChain %36 %16 %27 %27
%38 = OpLoad %12 %37
%40 = OpBitcast %39 %38
%41 = OpCompositeExtract %9 %40 1
%43 = OpINotEqual %42 %41 %27
%44 = OpSelect %9 %43 %10 %27
%45 = OpCompositeExtract %9 %40 0
%47 = OpAccessChain %46 %8 %45
%48 = OpLoad %5 %47
%50 = OpInBoundsAccessChain %49 %21 %44
%57 = OpCompositeConstruct %56 %33 %35 %54
%59 = OpCompositeConstruct %56 %53 %53 %55
OpRayQueryInitializeKHR %50 %48 %51 %52 %57 %58 %59 %60
%61 = OpInBoundsAccessChain %49 %21 %10
%62 = OpCompositeConstruct %56 %33 %35 %54
%63 = OpCompositeConstruct %56 %53 %53 %55
OpRayQueryInitializeKHR %61 %48 %51 %52 %62 %58 %63 %60
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query-select-simple.sm66.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];

rayQueryEXT RayQueryHeap;

void main()
{
    uvec4 _38 = floatBitsToUint(_16._m0[0u]);
    rayQueryInitializeEXT(RayQueryHeap, _8[_38.x], 4u, 255u, vec3(float(gl_GlobalInvocationID.x) + 0.5, float(gl_GlobalInvocationID.y) + 0.5, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
OpCapability Shader
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %22
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %14 ""
OpName %19 "RayQueryHeap"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %22 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypeArray %12 %10
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeRayQueryKHR
%18 = OpTypePointer Private %17
%19 = OpVariable %18 Private
%20 = OpTypeVector %9 3
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypePointer Input %9
%25 = OpConstant %9 0
%32 = OpConstant %11 0.5
%34 = OpTypePointer Uniform %12
%37 = OpTypeVector %9 4
%40 = OpTypeBool
%44 = OpTypePointer UniformConstant %5
%47 = OpConstant %9 4
%48 = OpConstant %9 255
%49 = OpConstant %11 0
%50 = OpConstant %11 -1
%51 = OpConstant %11 1
%52 = OpTypeVector %11 3
%54 = OpConstant %11 0.100000001
%56 = OpConstant %11 1000
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %9 %24
%27 = OpAccessChain %23 %22 %10
%28 = OpLoad %9 %27
%29 = OpConvertUToF %11 %26
%30 = OpConvertUToF %11 %28
%31 = OpFAdd %11 %29 %32
%33 = OpFAdd %11 %30 %32
%35 = OpAccessChain %34 %16 %25 %25
%36 = OpLoad %12 %35
%38 = OpBitcast %37 %36
%39 = OpCompositeExtract %9 %38 1
%41 = OpINotEqual %40 %39 %25
%42 = OpSelect %9 %41 %25 %25
%43 = OpCompositeExtract %9 %38 0
%45 = OpAccessChain %44 %8 %43
%46 = OpLoad %5 %45
%53 = OpCompositeConstruct %52 %31 %33 %50
%55 = OpCompositeConstruct %52 %49 %49 %51
OpRayQueryInitializeKHR %19 %46 %47 %48 %53 %54 %55 %56
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query-store-multi.invalid.sm66.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];

rayQueryEXT RayQueryHeap[8];

void main()
{
    float _38 = float(gl_GlobalInvocationID.x) + 0.5;
    float _40 = float(gl_GlobalInvocationID.y) + 0.5;
    uint _34[4];
    _34[0u] = 0u;
    _34[1u] = 1u;
    _34[2u] = 2u;
    _34[3u] = 3u;
    uint _35[4];
    _35[0u] = 4u;
    _35[1u] = 5u;
    _35[2u] = 6u;
    _35[3u] = 7u;
    uvec4 _59 = floatBitsToUint(_16._m0[0u]);
    uint _60 = _59.y;
    uint _67 = _59.x;
    rayQueryInitializeEXT(RayQueryHeap[_34[_60 ^ 1u]], _8[_67], 4u, 255u, vec3(_38, _40, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
    rayQueryInitializeEXT(RayQueryHeap[_35[_60 ^ 2u]], _8[_67], 1u, 255u, vec3(_38, _40, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %24
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %14 ""
OpName %21 "RayQueryHeap"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %24 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypeArray %12 %10
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeRayQueryKHR
%18 = OpConstant %9 8
%19 = OpTypeArray %17 %18
%20 = OpTypePointer Private %19
%21 = OpVariable %20 Private
%22 = OpTypeVector %9 3
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %9
%27 = OpConstant %9 0
%31 = OpConstant %9 4
%32 = OpTypeArray %9 %31
%33 = OpTypePointer Function %32
%39 = OpConstant %11 0.5
%41 = OpTypePointer Function %9
%44 = OpConstant %9 2
%46 = OpConstant %9 3
%49 = OpConstant %9 5
%51 = OpConstant %9 6
%53 = OpConstant %9 7
%55 = OpTypePointer Uniform %12
%58 = OpTypeVector %9 4
%68 = OpTypePointer UniformConstant %5
%71 = OpTypePointer Private %17
%73 = OpConstant %9 255
%74 = OpConstant %11 0
%75 = OpConstant %11 -1
%76 = OpConstant %11 1
%77 = OpTypeVector %11 3
%79 = OpConstant %11 0.100000001
%81 = OpConstant %11 1000
%3 = OpFunction %1 None %2
%4 = OpLabel
%34 = OpVariable %33 Function
%35 = OpVariable %33 Function
OpBranch %85
%85 = OpLabel
%26 = OpAccessChain %25 %24 %27
%28 = OpLoad %9 %26
%29 = OpAccessChain %25 %24 %10
%30 = OpLoad %9 %29
%36 = OpConvertUToF %11 %28
%37 = OpConvertUToF %11 %30
%38 = OpFAdd %11 %36 %39
%40 = OpFAdd %11 %37 %39
%42 = OpInBoundsAccessChain %41 %34 %27
OpStore %42 %27
%43 = OpInBoundsAccessChain %41 %34 %10
OpStore %43 %10
%45 = OpInBoundsAccessChain %41 %34 %44
OpStore %45 %44
%47 = OpInBoundsAccessChain %41 %34 %46
OpStore %47 %46
%48 = OpInBoundsAccessChain %41 %35 %27
OpStore %48 %31
%50 = OpInBoundsAccessChain %41 %35 %10
OpStore %50 %49
%52 = OpInBoundsAccessChain %41 %35 %44
OpStore %52 %51
%54 = OpInBoundsAccessChain %41 %35 %46
OpStore %54 %53
%56 = OpAccessChain %55 %16 %27 %27
%57 = OpLoad %12 %56
%59 = OpBitcast %58 %57
%60 = OpCompositeExtract %9 %59 1
%61 = OpBitwiseXor %9 %60 %10
%62 = OpInBoundsAccessChain %41 %34 %61
%63 = OpLoad %9 %62
%64 = OpBitwiseXor %9 %60 %44
%65 = OpInBoundsAccessChain %41 %35 %64
%66 = OpLoad %9 %65
%67 = OpCompositeExtract %9 %59 0
%69 = OpAccessChain %68 %8 %67
%70 = OpLoad %5 %69
%72 = OpInBoundsAccessChain %71 %21 %63
%78 = OpCompositeConstruct %77 %38 %40 %75
%80 = OpCompositeConstruct %77 %74 %74 %76
OpRayQueryInitializeKHR %72 %70 %31 %73 %78 %79 %80 %81
%82 = OpInBoundsAccessChain %71 %21 %66
%83 = OpCompositeConstruct %77 %38 %40 %75
%84 = OpCompositeConstruct %77 %74 %74 %76
OpRayQueryInitializeKHR %82 %70 %10 %73 %83 %79 %84 %81
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query-store-simple.sm66.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];

rayQueryEXT RayQueryHeap;

void main()
{
    uint _32[4];
    _32[0u] = 0u;
    _32[1u] = 0u;
    _32[2u] = 0u;
    _32[3u] = 0u;
    uvec4 _49 = floatBitsToUint(_16._m0[0u]);
    rayQueryInitializeEXT(RayQueryHeap, _8[_49.x], 4u, 255u, vec3(float(gl_GlobalInvocationID.x) + 0.5, float(gl_GlobalInvocationID.y) + 0.5, -1.0), 0.100000001490116119384765625, vec3(0.0, 0.0, 1.0), 1000.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 68
; Schema: 0
OpCapability Shader
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %22
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %14 ""
OpName %19 "RayQueryHeap"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %22 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypeArray %12 %10
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeRayQueryKHR
%18 = OpTypePointer Private %17
%19 = OpVariable %18 Private
%20 = OpTypeVector %9 3
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypePointer Input %9
%25 = OpConstant %9 0
%29 = OpConstant %9 4
%30 = OpTypeArray %9 %29
%31 = OpTypePointer Function %30
%36 = OpConstant %11 0.5
%38 = OpTypePointer Function %9
%42 = OpConstant %9 2
%44 = OpConstant %9 3
%45 = OpTypePointer Uniform %12
%48 = OpTypeVector %9 4
%54 = OpTypePointer UniformConstant %5
%57 = OpConstant %9 255
%58 = OpConstant %11 0
%59 = OpConstant %11 -1
%60 = OpConstant %11 1
%61 = OpTypeVector %11 3
%63 = OpConstant %11 0.100000001
%65 = OpConstant %11 1000
%3 = OpFunction %1 None %2
%4 = OpLabel
%32 = OpVariable %31 Function
OpBranch %66
%66 = OpLabel
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %9 %24
%27 = OpAccessChain %23 %22 %10
%28 = OpLoad %9 %27
%33 = OpConvertUToF %11 %26
%34 = OpConvertUToF %11 %28
%35 = OpFAdd %11 %33 %36
%37 = OpFAdd %11 %34 %36
%39 = OpInBoundsAccessChain %38 %32 %25
OpStore %39 %25
%40 = OpInBoundsAccessChain %38 %32 %10
OpStore %40 %25
%41 = OpInBoundsAccessChain %38 %32 %42
OpStore %41 %25
%43 = OpInBoundsAccessChain %38 %32 %44
OpStore %43 %25
%46 = OpAccessChain %45 %16 %25 %25
%47 = OpLoad %12 %46
%49 = OpBitcast %48 %47
%50 = OpCompositeExtract %9 %49 1
%51 = OpInBoundsAccessChain %38 %32 %50
%52 = OpLoad %9 %51
%53 = OpCompositeExtract %9 %49 0
%55 = OpAccessChain %54 %8 %53
%56 = OpLoad %5 %55
%62 = OpCompositeConstruct %61 %35 %37 %59
%64 = OpCompositeConstruct %61 %58 %58 %60
OpRayQueryInitializeKHR %19 %56 %29 %57 %62 %63 %64 %65
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-query.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
layout(primitive_culling);
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _12;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _13;
layout(set = 0, binding = 3, r32ui) uniform writeonly uimageBuffer _14;
layout(set = 0, binding = 4, r32ui) uniform writeonly uimageBuffer _15;

rayQueryEXT _26;

void main()
{
    rayQueryInitializeEXT(_26, _8, 132u, 170u, vec3(1.0, 2.0, 3.0), 4.0, vec3(5.0, 6.0, 7.0), 8.0);
    bool _42 = rayQueryProceedEXT(_26);
    uint _176;
    uint _177;
    uint _178;
    uint _179;
    if (_42)
    {
        uint _46;
        uint _49;
        uint _51;
        uint _45 = 0u;
        uint _48 = 0u;
        uint _50 = 0u;
        uint _53;
        uint _174;
        bool _175;
        for (;;)
        {
            uint _52 = rayQueryGetIntersectionInstanceIdEXT(_26, bool(0u));
            _53 = gl_LocalInvocationIndex << 6u;
            imageStore(_12, int(gl_LocalInvocationIndex * 64u), uvec4(_52));
            uint _58 = rayQueryGetIntersectionInstanceCustomIndexEXT(_26, bool(0u));
            imageStore(_12, int((gl_LocalInvocationIndex * 64u) + 1u), uvec4(_58));
            uint _64 = rayQueryGetIntersectionGeometryIndexEXT(_26, bool(0u));
            imageStore(_12, int((gl_LocalInvocationIndex * 64u) + 2u), uvec4(_64));
            uint _70 = rayQueryGetIntersectionPrimitiveIndexEXT(_26, bool(0u));
            imageStore(_12, int((gl_LocalInvocationIndex * 64u) + 3u), uvec4(_70));
            vec3 _76 = rayQueryGetIntersectionObjectRayOriginEXT(_26, bool(0u));
            imageStore(_13, int((gl_LocalInvocationIndex * 64u) + 4u), uvec4(floatBitsToUint(_76.x)));
            vec3 _84 = rayQueryGetIntersectionObjectRayDirectionEXT(_26, bool(0u));
            imageStore(_13, int((gl_LocalInvocationIndex * 64u) + 5u), uvec4(floatBitsToUint(_84.y)));
            uint _92 = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(_26, bool(0u));
            imageStore(_12, int((gl_LocalInvocationIndex * 64u) + 16u), uvec4(_92));
            mat4x3 _99 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _101 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _103 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _105 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _107 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            mat4x3 _109 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            mat4x3 _111 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            mat4x3 _113 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            uint _116 = gl_LocalInvocationIndex * 16u;
            imageStore(_14, int(_116), uvec4(floatBitsToUint(_99[0].x)));
            imageStore(_14, int(_116 + 1u), uvec4(floatBitsToUint(_101[1].x)));
            imageStore(_14, int(_116 + 2u), uvec4(floatBitsToUint(_103[2].x)));
            imageStore(_14, int(_116 + 3u), uvec4(floatBitsToUint(_105[3].x)));
            uint _130 = (gl_LocalInvocationIndex * 16u) + 4u;
            imageStore(_14, int(_130), uvec4(floatBitsToUint(_107[0].x)));
            imageStore(_14, int(_130 + 1u), uvec4(floatBitsToUint(_109[1].x)));
            imageStore(_14, int(_130 + 2u), uvec4(floatBitsToUint(_111[2].x)));
            imageStore(_14, int(_130 + 3u), uvec4(floatBitsToUint(_113[3].x)));
            mat4x3 _142 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _144 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _146 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(0u));
            mat4x3 _148 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            mat4x3 _150 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            mat4x3 _152 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(0u));
            uint _154 = gl_LocalInvocationIndex * 12u;
            imageStore(_15, int(_154), uvec4(floatBitsToUint(_142[0].x)));
            imageStore(_15, int(_154 + 1u), uvec4(floatBitsToUint(_144[0].y)));
            imageStore(_15, int(_154 + 2u), uvec4(floatBitsToUint(_146[0].z)));
            uint _165 = (gl_LocalInvocationIndex * 12u) + 3u;
            imageStore(_15, int(_165), uvec4(floatBitsToUint(_148[0].x)));
            imageStore(_15, int(_165 + 1u), uvec4(floatBitsToUint(_150[0].y)));
            imageStore(_15, int(_165 + 2u), uvec4(floatBitsToUint(_152[0].z)));
            _174 = rayQueryGetIntersectionTypeEXT(_26, bool(0u));
            _175 = _174 == 0u;
            if (_175)
            {
                float _207 = rayQueryGetIntersectionTEXT(_26, bool(0u));
                imageStore(_13, int((gl_LocalInvocationIndex * 64u) + 6u), uvec4(floatBitsToUint(_207)));
                vec2 _214 = rayQueryGetIntersectionBarycentricsEXT(_26, bool(0u));
                imageStore(_13, int((gl_LocalInvocationIndex * 64u) + 7u), uvec4(floatBitsToUint(_214.y)));
                bool _222 = rayQueryGetIntersectionFrontFaceEXT(_26, bool(0u));
                imageStore(_12, int((gl_LocalInvocationIndex * 64u) + 8u), uvec4(_222 ? 100u : 10u));
                rayQueryConfirmIntersectionEXT(_26);
                _46 = _45 + 1u;
                _49 = _48;
                _51 = _50;
            }
            else
            {
                uint frontier_phi_9_6_ladder;
                uint frontier_phi_9_6_ladder_1;
                uint frontier_phi_9_6_ladder_2;
                if (_174 == 1u)
                {
                    bool _368 = rayQueryGetIntersectionCandidateAABBOpaqueEXT(_26);
                    uint _370 = uint(!_368);
                    rayQueryGenerateIntersectionEXT(_26, 0.5);
                    frontier_phi_9_6_ladder = _45;
                    frontier_phi_9_6_ladder_1 = _370 + _48;
                    frontier_phi_9_6_ladder_2 = (_370 ^ 1u) + _50;
                }
                else
                {
                    frontier_phi_9_6_ladder = _45;
                    frontier_phi_9_6_ladder_1 = _48;
                    frontier_phi_9_6_ladder_2 = _50;
                }
                _46 = frontier_phi_9_6_ladder;
                _49 = frontier_phi_9_6_ladder_1;
                _51 = frontier_phi_9_6_ladder_2;
            }
            if (_46 == 4u)
            {
                rayQueryTerminateEXT(_26);
            }
            bool _391 = rayQueryProceedEXT(_26);
            if (_391)
            {
                _45 = _46;
                _48 = _49;
                _50 = _51;
                continue;
            }
            else
            {
                break;
            }
        }
        _176 = _53;
        _177 = _46;
        _178 = _49;
        _179 = _51;
    }
    else
    {
        _176 = gl_LocalInvocationIndex << 6u;
        _177 = 0u;
        _178 = 0u;
        _179 = 0u;
    }
    uint _180 = rayQueryGetRayFlagsEXT(_26);
    imageStore(_12, int((_176 | 9u) + 0u), uvec4(_180));
    vec3 _185 = rayQueryGetWorldRayOriginEXT(_26);
    imageStore(_13, int(_176 + 10u), uvec4(floatBitsToUint(_185.x)));
    vec3 _192 = rayQueryGetWorldRayDirectionEXT(_26);
    imageStore(_13, int((_176 | 11u) + 0u), uvec4(floatBitsToUint(_192.y)));
    float _199 = rayQueryGetRayTMinEXT(_26);
    imageStore(_13, int(_176 + 12u), uvec4(floatBitsToUint(_199)));
    uint _204 = rayQueryGetIntersectionTypeEXT(_26, bool(1u));
    if (!(_204 == 0u))
    {
        uint _233 = rayQueryGetIntersectionInstanceIdEXT(_26, bool(1u));
        imageStore(_12, int(_176 + 32u), uvec4(_233));
        uint _238 = rayQueryGetIntersectionInstanceCustomIndexEXT(_26, bool(1u));
        imageStore(_12, int((_176 | 33u) + 0u), uvec4(_238));
        uint _243 = rayQueryGetIntersectionGeometryIndexEXT(_26, bool(1u));
        imageStore(_12, int(_176 + 34u), uvec4(_243));
        uint _248 = rayQueryGetIntersectionPrimitiveIndexEXT(_26, bool(1u));
        imageStore(_12, int((_176 | 35u) + 0u), uvec4(_248));
        vec3 _253 = rayQueryGetIntersectionObjectRayOriginEXT(_26, bool(1u));
        imageStore(_13, int(_176 + 36u), uvec4(floatBitsToUint(_253.x)));
        vec3 _260 = rayQueryGetIntersectionObjectRayDirectionEXT(_26, bool(1u));
        imageStore(_13, int((_176 | 37u) + 0u), uvec4(floatBitsToUint(_260.y)));
        uint _267 = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(_26, bool(1u));
        imageStore(_12, int(_176 + 38u), uvec4(_267));
        mat4x3 _272 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _274 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _276 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _278 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _280 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        mat4x3 _282 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        mat4x3 _284 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        mat4x3 _286 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        uint _288 = gl_LocalInvocationIndex << 2u;
        uint _293 = (gl_LocalInvocationIndex * 16u) + 156u;
        imageStore(_14, int(_293), uvec4(floatBitsToUint(_272[0].x)));
        imageStore(_14, int(_293 + 1u), uvec4(floatBitsToUint(_274[1].x)));
        imageStore(_14, int(_293 + 2u), uvec4(floatBitsToUint(_276[2].x)));
        imageStore(_14, int(_293 + 3u), uvec4(floatBitsToUint(_278[3].x)));
        uint _309 = (gl_LocalInvocationIndex * 16u) + 160u;
        imageStore(_14, int(_309), uvec4(floatBitsToUint(_280[0].x)));
        imageStore(_14, int(_309 + 1u), uvec4(floatBitsToUint(_282[1].x)));
        imageStore(_14, int(_309 + 2u), uvec4(floatBitsToUint(_284[2].x)));
        imageStore(_14, int(_309 + 3u), uvec4(floatBitsToUint(_286[3].x)));
        mat4x3 _321 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _323 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _325 = rayQueryGetIntersectionWorldToObjectEXT(_26, bool(1u));
        mat4x3 _327 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        mat4x3 _329 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        mat4x3 _331 = rayQueryGetIntersectionObjectToWorldEXT(_26, bool(1u));
        uint _337 = (gl_LocalInvocationIndex * 12u) + 123u;
        imageStore(_15, int(_337), uvec4(floatBitsToUint(_321[0].x)));
        imageStore(_15, int(_337 + 1u), uvec4(floatBitsToUint(_323[0].y)));
        imageStore(_15, int(_337 + 2u), uvec4(floatBitsToUint(_325[0].z)));
        uint _350 = (gl_LocalInvocationIndex * 12u) + 126u;
        imageStore(_15, int(_350), uvec4(floatBitsToUint(_327[0].x)));
        imageStore(_15, int(_350 + 1u), uvec4(floatBitsToUint(_329[0].y)));
        imageStore(_15, int(_350 + 2u), uvec4(floatBitsToUint(_331[0].z)));
        float _359 = rayQueryGetIntersectionTEXT(_26, bool(1u));
        imageStore(_13, int((_176 | 43u) + 0u), uvec4(floatBitsToUint(_359)));
    }
    uint _231 = rayQueryGetIntersectionTypeEXT(_26, bool(1u));
    if (_231 == 1u)
    {
        imageStore(_12, int((_176 | 13u) + 0u), uvec4(_177));
        vec2 _377 = rayQueryGetIntersectionBarycentricsEXT(_26, bool(1u));
        imageStore(_13, int(_176 + 44u), uvec4(floatBitsToUint(_377.y)));
        bool _384 = rayQueryGetIntersectionFrontFaceEXT(_26, bool(1u));
        imageStore(_12, int((_176 | 45u) + 0u), uvec4(_384 ? 100u : 10u));
    }
    else
    {
        if (_231 == 2u)
        {
            imageStore(_12, int(_176 + 14u), uvec4(_178));
        }
        else
        {
            imageStore(_12, int((_176 | 15u) + 0u), uvec4(_179));
        }
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 427
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpExtension "SPV_KHR_ray_query"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %22
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %400 "frontier_phi_9.6.ladder"
OpName %401 "frontier_phi_9.6.ladder"
OpName %402 "frontier_phi_9.6.ladder"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %12 NonReadable
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 2
OpDecorate %13 NonReadable
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %14 NonReadable
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 4
OpDecorate %15 NonReadable
OpDecorate %22 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeImage %9 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpVariable %11 UniformConstant
%14 = OpVariable %11 UniformConstant
%15 = OpVariable %11 UniformConstant
%21 = OpTypePointer Input %9
%22 = OpVariable %21 Input
%24 = OpTypeRayQueryKHR
%25 = OpTypePointer Private %24
%26 = OpVariable %25 Private
%27 = OpConstant %9 132
%28 = OpConstant %9 170
%29 = OpTypeFloat 32
%30 = OpConstant %29 1
%31 = OpConstant %29 5
%32 = OpConstant %29 2
%33 = OpConstant %29 6
%34 = OpConstant %29 3
%35 = OpConstant %29 7
%36 = OpTypeVector %29 3
%38 = OpConstant %29 4
%40 = OpConstant %29 8
%41 = OpTypeBool
%44 = OpConstant %9 6
%47 = OpConstant %9 0
%55 = OpConstant %9 64
%56 = OpTypeVector %9 4
%60 = OpConstant %9 1
%66 = OpConstant %9 2
%72 = OpConstant %9 3
%79 = OpConstant %9 4
%87 = OpConstant %9 5
%94 = OpConstant %9 16
%98 = OpTypeMatrix %36 4
%155 = OpConstant %9 12
%182 = OpConstant %9 9
%188 = OpConstant %9 10
%195 = OpConstant %9 11
%213 = OpTypeVector %29 2
%217 = OpConstant %9 7
%224 = OpConstant %9 100
%226 = OpConstant %9 8
%235 = OpConstant %9 32
%240 = OpConstant %9 33
%245 = OpConstant %9 34
%250 = OpConstant %9 35
%256 = OpConstant %9 36
%263 = OpConstant %9 37
%269 = OpConstant %9 38
%290 = OpConstant %9 39
%292 = OpConstant %9 156
%306 = OpConstant %9 40
%308 = OpConstant %9 160
%334 = OpConstant %9 41
%336 = OpConstant %9 123
%347 = OpConstant %9 42
%349 = OpConstant %9 126
%361 = OpConstant %9 43
%372 = OpConstant %29 0.5
%374 = OpConstant %9 13
%380 = OpConstant %9 44
%387 = OpConstant %9 45
%393 = OpConstant %9 14
%397 = OpConstant %9 15
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %403
%403 = OpLabel
%16 = OpLoad %10 %15
%17 = OpLoad %10 %14
%18 = OpLoad %10 %13
%19 = OpLoad %10 %12
%20 = OpLoad %6 %8
%23 = OpLoad %9 %22
%37 = OpCompositeConstruct %36 %30 %32 %34
%39 = OpCompositeConstruct %36 %31 %33 %35
OpRayQueryInitializeKHR %26 %20 %27 %28 %37 %38 %39 %40
%42 = OpRayQueryProceedKHR %41 %26
OpSelectionMerge %417 None
OpBranchConditional %42 %405 %404
%405 = OpLabel
OpBranch %406
%406 = OpLabel
%45 = OpPhi %9 %47 %405 %46 %415
%48 = OpPhi %9 %47 %405 %49 %415
%50 = OpPhi %9 %47 %405 %51 %415
%52 = OpRayQueryGetIntersectionInstanceIdKHR %9 %26 %47
%53 = OpShiftLeftLogical %9 %23 %44
%54 = OpIMul %9 %23 %55
%57 = OpCompositeConstruct %56 %52 %52 %52 %52
OpImageWrite %19 %54 %57
%58 = OpRayQueryGetIntersectionInstanceCustomIndexKHR %9 %26 %47
%59 = OpBitwiseOr %9 %53 %60
%61 = OpIMul %9 %23 %55
%62 = OpIAdd %9 %61 %60
%63 = OpCompositeConstruct %56 %58 %58 %58 %58
OpImageWrite %19 %62 %63
%64 = OpRayQueryGetIntersectionGeometryIndexKHR %9 %26 %47
%65 = OpBitwiseOr %9 %53 %66
%67 = OpIMul %9 %23 %55
%68 = OpIAdd %9 %67 %66
%69 = OpCompositeConstruct %56 %64 %64 %64 %64
OpImageWrite %19 %68 %69
%70 = OpRayQueryGetIntersectionPrimitiveIndexKHR %9 %26 %47
%71 = OpBitwiseOr %9 %53 %72
%73 = OpIMul %9 %23 %55
%74 = OpIAdd %9 %73 %72
%75 = OpCompositeConstruct %56 %70 %70 %70 %70
OpImageWrite %19 %74 %75
%76 = OpRayQueryGetIntersectionObjectRayOriginKHR %36 %26 %47
%77 = OpCompositeExtract %29 %76 0
%78 = OpBitwiseOr %9 %53 %79
%80 = OpIMul %9 %23 %55
%81 = OpIAdd %9 %80 %79
%82 = OpBitcast %9 %77
%83 = OpCompositeConstruct %56 %82 %82 %82 %82
OpImageWrite %18 %81 %83
%84 = OpRayQueryGetIntersectionObjectRayDirectionKHR %36 %26 %47
%85 = OpCompositeExtract %29 %84 1
%86 = OpBitwiseOr %9 %53 %87
%88 = OpIMul %9 %23 %55
%89 = OpIAdd %9 %88 %87
%90 = OpBitcast %9 %85
%91 = OpCompositeConstruct %56 %90 %90 %90 %90
OpImageWrite %18 %89 %91
%92 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR %9 %26 %47
%93 = OpBitwiseOr %9 %53 %94
%95 = OpIMul %9 %23 %55
%96 = OpIAdd %9 %95 %94
%97 = OpCompositeConstruct %56 %92 %92 %92 %92
OpImageWrite %19 %96 %97
%99 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%100 = OpCompositeExtract %29 %99 0 0
%101 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%102 = OpCompositeExtract %29 %101 1 0
%103 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%104 = OpCompositeExtract %29 %103 2 0
%105 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%106 = OpCompositeExtract %29 %105 3 0
%107 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%108 = OpCompositeExtract %29 %107 0 0
%109 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%110 = OpCompositeExtract %29 %109 1 0
%111 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%112 = OpCompositeExtract %29 %111 2 0
%113 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%114 = OpCompositeExtract %29 %113 3 0
%115 = OpShiftLeftLogical %9 %23 %66
%116 = OpIMul %9 %23 %94
%117 = OpBitcast %9 %100
%118 = OpBitcast %9 %102
%119 = OpBitcast %9 %104
%120 = OpBitcast %9 %106
%121 = OpCompositeConstruct %56 %117 %117 %117 %117
OpImageWrite %17 %116 %121
%122 = OpCompositeConstruct %56 %118 %118 %118 %118
%123 = OpIAdd %9 %116 %60
OpImageWrite %17 %123 %122
%124 = OpCompositeConstruct %56 %119 %119 %119 %119
%125 = OpIAdd %9 %116 %66
OpImageWrite %17 %125 %124
%126 = OpCompositeConstruct %56 %120 %120 %120 %120
%127 = OpIAdd %9 %116 %72
OpImageWrite %17 %127 %126
%128 = OpBitwiseOr %9 %115 %60
%129 = OpIMul %9 %23 %94
%130 = OpIAdd %9 %129 %79
%131 = OpBitcast %9 %108
%132 = OpBitcast %9 %110
%133 = OpBitcast %9 %112
%134 = OpBitcast %9 %114
%135 = OpCompositeConstruct %56 %131 %131 %131 %131
OpImageWrite %17 %130 %135
%136 = OpCompositeConstruct %56 %132 %132 %132 %132
%137 = OpIAdd %9 %130 %60
OpImageWrite %17 %137 %136
%138 = OpCompositeConstruct %56 %133 %133 %133 %133
%139 = OpIAdd %9 %130 %66
OpImageWrite %17 %139 %138
%140 = OpCompositeConstruct %56 %134 %134 %134 %134
%141 = OpIAdd %9 %130 %72
OpImageWrite %17 %141 %140
%142 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%143 = OpCompositeExtract %29 %142 0 0
%144 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%145 = OpCompositeExtract %29 %144 0 1
%146 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %47
%147 = OpCompositeExtract %29 %146 0 2
%148 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%149 = OpCompositeExtract %29 %148 0 0
%150 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%151 = OpCompositeExtract %29 %150 0 1
%152 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %47
%153 = OpCompositeExtract %29 %152 0 2
%154 = OpIMul %9 %23 %155
%156 = OpBitcast %9 %143
%157 = OpBitcast %9 %145
%158 = OpBitcast %9 %147
%159 = OpCompositeConstruct %56 %156 %156 %156 %156
OpImageWrite %16 %154 %159
%160 = OpCompositeConstruct %56 %157 %157 %157 %157
%161 = OpIAdd %9 %154 %60
OpImageWrite %16 %161 %160
%162 = OpCompositeConstruct %56 %158 %158 %158 %158
%163 = OpIAdd %9 %154 %66
OpImageWrite %16 %163 %162
%164 = OpIMul %9 %23 %155
%165 = OpIAdd %9 %164 %72
%166 = OpBitcast %9 %149
%167 = OpBitcast %9 %151
%168 = OpBitcast %9 %153
%169 = OpCompositeConstruct %56 %166 %166 %166 %166
OpImageWrite %16 %165 %169
%170 = OpCompositeConstruct %56 %167 %167 %167 %167
%171 = OpIAdd %9 %165 %60
OpImageWrite %16 %171 %170
%172 = OpCompositeConstruct %56 %168 %168 %168 %168
%173 = OpIAdd %9 %165 %66
OpImageWrite %16 %173 %172
%174 = OpRayQueryGetIntersectionTypeKHR %9 %26 %47
%175 = OpIEqual %41 %174 %47
OpLoopMerge %416 %415 None
OpBranch %407
%407 = OpLabel
OpSelectionMerge %412 None
OpBranchConditional %175 %411 %408
%411 = OpLabel
%206 = OpIAdd %9 %45 %60
%207 = OpRayQueryGetIntersectionTKHR %29 %26 %47
%208 = OpBitwiseOr %9 %53 %44
%209 = OpIMul %9 %23 %55
%210 = OpIAdd %9 %209 %44
%211 = OpBitcast %9 %207
%212 = OpCompositeConstruct %56 %211 %211 %211 %211
OpImageWrite %18 %210 %212
%214 = OpRayQueryGetIntersectionBarycentricsKHR %213 %26 %47
%215 = OpCompositeExtract %29 %214 1
%216 = OpBitwiseOr %9 %53 %217
%218 = OpIMul %9 %23 %55
%219 = OpIAdd %9 %218 %217
%220 = OpBitcast %9 %215
%221 = OpCompositeConstruct %56 %220 %220 %220 %220
OpImageWrite %18 %219 %221
%222 = OpRayQueryGetIntersectionFrontFaceKHR %41 %26 %47
%223 = OpSelect %9 %222 %224 %188
%225 = OpBitwiseOr %9 %53 %226
%227 = OpIMul %9 %23 %55
%228 = OpIAdd %9 %227 %226
%229 = OpCompositeConstruct %56 %223 %223 %223 %223
OpImageWrite %19 %228 %229
OpRayQueryConfirmIntersectionKHR %26
OpBranch %412
%408 = OpLabel
%230 = OpIEqual %41 %174 %60
OpSelectionMerge %410 None
OpBranchConditional %230 %409 %410
%409 = OpLabel
%368 = OpRayQueryGetIntersectionCandidateAABBOpaqueKHR %41 %26
%369 = OpLogicalNot %41 %368
%370 = OpSelect %9 %369 %60 %47
%365 = OpIAdd %9 %370 %48
%371 = OpBitwiseXor %9 %370 %60
%366 = OpIAdd %9 %371 %50
OpRayQueryGenerateIntersectionKHR %26 %372
OpBranch %410
%410 = OpLabel
%400 = OpPhi %9 %45 %409 %45 %408
%401 = OpPhi %9 %365 %409 %48 %408
%402 = OpPhi %9 %366 %409 %50 %408
OpBranch %412
%412 = OpLabel
%46 = OpPhi %9 %206 %411 %400 %410
%49 = OpPhi %9 %48 %411 %401 %410
%51 = OpPhi %9 %50 %411 %402 %410
%367 = OpIEqual %41 %46 %79
OpSelectionMerge %414 None
OpBranchConditional %367 %413 %414
%413 = OpLabel
OpRayQueryTerminateKHR %26
OpBranch %414
%414 = OpLabel
OpBranch %415
%415 = OpLabel
%391 = OpRayQueryProceedKHR %41 %26
OpBranchConditional %391 %406 %416
%416 = OpLabel
OpBranch %417
%404 = OpLabel
%43 = OpShiftLeftLogical %9 %23 %44
OpBranch %417
%417 = OpLabel
%176 = OpPhi %9 %43 %404 %53 %416
%177 = OpPhi %9 %47 %404 %46 %416
%178 = OpPhi %9 %47 %404 %49 %416
%179 = OpPhi %9 %47 %404 %51 %416
%180 = OpRayQueryGetRayFlagsKHR %9 %26
%181 = OpBitwiseOr %9 %176 %182
%183 = OpIAdd %9 %181 %47
%184 = OpCompositeConstruct %56 %180 %180 %180 %180
OpImageWrite %19 %183 %184
%185 = OpRayQueryGetWorldRayOriginKHR %36 %26
%186 = OpCompositeExtract %29 %185 0
%187 = OpBitwiseOr %9 %176 %188
%189 = OpIAdd %9 %176 %188
%190 = OpBitcast %9 %186
%191 = OpCompositeConstruct %56 %190 %190 %190 %190
OpImageWrite %18 %189 %191
%192 = OpRayQueryGetWorldRayDirectionKHR %36 %26
%193 = OpCompositeExtract %29 %192 1
%194 = OpBitwiseOr %9 %176 %195
%196 = OpIAdd %9 %194 %47
%197 = OpBitcast %9 %193
%198 = OpCompositeConstruct %56 %197 %197 %197 %197
OpImageWrite %18 %196 %198
%199 = OpRayQueryGetRayTMinKHR %29 %26
%200 = OpBitwiseOr %9 %176 %155
%201 = OpIAdd %9 %176 %155
%202 = OpBitcast %9 %199
%203 = OpCompositeConstruct %56 %202 %202 %202 %202
OpImageWrite %18 %201 %203
%204 = OpRayQueryGetIntersectionTypeKHR %9 %26 %60
%205 = OpIEqual %41 %204 %47
OpSelectionMerge %419 None
OpBranchConditional %205 %419 %418
%418 = OpLabel
%233 = OpRayQueryGetIntersectionInstanceIdKHR %9 %26 %60
%234 = OpBitwiseOr %9 %176 %235
%236 = OpIAdd %9 %176 %235
%237 = OpCompositeConstruct %56 %233 %233 %233 %233
OpImageWrite %19 %236 %237
%238 = OpRayQueryGetIntersectionInstanceCustomIndexKHR %9 %26 %60
%239 = OpBitwiseOr %9 %176 %240
%241 = OpIAdd %9 %239 %47
%242 = OpCompositeConstruct %56 %238 %238 %238 %238
OpImageWrite %19 %241 %242
%243 = OpRayQueryGetIntersectionGeometryIndexKHR %9 %26 %60
%244 = OpBitwiseOr %9 %176 %245
%246 = OpIAdd %9 %176 %245
%247 = OpCompositeConstruct %56 %243 %243 %243 %243
OpImageWrite %19 %246 %247
%248 = OpRayQueryGetIntersectionPrimitiveIndexKHR %9 %26 %60
%249 = OpBitwiseOr %9 %176 %250
%251 = OpIAdd %9 %249 %47
%252 = OpCompositeConstruct %56 %248 %248 %248 %248
OpImageWrite %19 %251 %252
%253 = OpRayQueryGetIntersectionObjectRayOriginKHR %36 %26 %60
%254 = OpCompositeExtract %29 %253 0
%255 = OpBitwiseOr %9 %176 %256
%257 = OpIAdd %9 %176 %256
%258 = OpBitcast %9 %254
%259 = OpCompositeConstruct %56 %258 %258 %258 %258
OpImageWrite %18 %257 %259
%260 = OpRayQueryGetIntersectionObjectRayDirectionKHR %36 %26 %60
%261 = OpCompositeExtract %29 %260 1
%262 = OpBitwiseOr %9 %176 %263
%264 = OpIAdd %9 %262 %47
%265 = OpBitcast %9 %261
%266 = OpCompositeConstruct %56 %265 %265 %265 %265
OpImageWrite %18 %264 %266
%267 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR %9 %26 %60
%268 = OpBitwiseOr %9 %176 %269
%270 = OpIAdd %9 %176 %269
%271 = OpCompositeConstruct %56 %267 %267 %267 %267
OpImageWrite %19 %270 %271
%272 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%273 = OpCompositeExtract %29 %272 0 0
%274 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%275 = OpCompositeExtract %29 %274 1 0
%276 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%277 = OpCompositeExtract %29 %276 2 0
%278 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%279 = OpCompositeExtract %29 %278 3 0
%280 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%281 = OpCompositeExtract %29 %280 0 0
%282 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%283 = OpCompositeExtract %29 %282 1 0
%284 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%285 = OpCompositeExtract %29 %284 2 0
%286 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%287 = OpCompositeExtract %29 %286 3 0
%288 = OpShiftLeftLogical %9 %23 %66
%289 = OpIAdd %9 %288 %290
%291 = OpIMul %9 %23 %94
%293 = OpIAdd %9 %291 %292
%294 = OpBitcast %9 %273
%295 = OpBitcast %9 %275
%296 = OpBitcast %9 %277
%297 = OpBitcast %9 %279
%298 = OpCompositeConstruct %56 %294 %294 %294 %294
OpImageWrite %17 %293 %298
%299 = OpCompositeConstruct %56 %295 %295 %295 %295
%300 = OpIAdd %9 %293 %60
OpImageWrite %17 %300 %299
%301 = OpCompositeConstruct %56 %296 %296 %296 %296
%302 = OpIAdd %9 %293 %66
OpImageWrite %17 %302 %301
%303 = OpCompositeConstruct %56 %297 %297 %297 %297
%304 = OpIAdd %9 %293 %72
OpImageWrite %17 %304 %303
%305 = OpIAdd %9 %288 %306
%307 = OpIMul %9 %23 %94
%309 = OpIAdd %9 %307 %308
%310 = OpBitcast %9 %281
%311 = OpBitcast %9 %283
%312 = OpBitcast %9 %285
%313 = OpBitcast %9 %287
%314 = OpCompositeConstruct %56 %310 %310 %310 %310
OpImageWrite %17 %309 %314
%315 = OpCompositeConstruct %56 %311 %311 %311 %311
%316 = OpIAdd %9 %309 %60
OpImageWrite %17 %316 %315
%317 = OpCompositeConstruct %56 %312 %312 %312 %312
%318 = OpIAdd %9 %309 %66
OpImageWrite %17 %318 %317
%319 = OpCompositeConstruct %56 %313 %313 %313 %313
%320 = OpIAdd %9 %309 %72
OpImageWrite %17 %320 %319
%321 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%322 = OpCompositeExtract %29 %321 0 0
%323 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%324 = OpCompositeExtract %29 %323 0 1
%325 = OpRayQueryGetIntersectionWorldToObjectKHR %98 %26 %60
%326 = OpCompositeExtract %29 %325 0 2
%327 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%328 = OpCompositeExtract %29 %327 0 0
%329 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%330 = OpCompositeExtract %29 %329 0 1
%331 = OpRayQueryGetIntersectionObjectToWorldKHR %98 %26 %60
%332 = OpCompositeExtract %29 %331 0 2
%333 = OpIAdd %9 %288 %334
%335 = OpIMul %9 %23 %155
%337 = OpIAdd %9 %335 %336
%338 = OpBitcast %9 %322
%339 = OpBitcast %9 %324
%340 = OpBitcast %9 %326
%341 = OpCompositeConstruct %56 %338 %338 %338 %338
OpImageWrite %16 %337 %341
%342 = OpCompositeConstruct %56 %339 %339 %339 %339
%343 = OpIAdd %9 %337 %60
OpImageWrite %16 %343 %342
%344 = OpCompositeConstruct %56 %340 %340 %340 %340
%345 = OpIAdd %9 %337 %66
OpImageWrite %16 %345 %344
%346 = OpIAdd %9 %288 %347
%348 = OpIMul %9 %23 %155
%350 = OpIAdd %9 %348 %349
%351 = OpBitcast %9 %328
%352 = OpBitcast %9 %330
%353 = OpBitcast %9 %332
%354 = OpCompositeConstruct %56 %351 %351 %351 %351
OpImageWrite %16 %350 %354
%355 = OpCompositeConstruct %56 %352 %352 %352 %352
%356 = OpIAdd %9 %350 %60
OpImageWrite %16 %356 %355
%357 = OpCompositeConstruct %56 %353 %353 %353 %353
%358 = OpIAdd %9 %350 %66
OpImageWrite %16 %358 %357
%359 = OpRayQueryGetIntersectionTKHR %29 %26 %60
%360 = OpBitwiseOr %9 %176 %361
%362 = OpIAdd %9 %360 %47
%363 = OpBitcast %9 %359
%364 = OpCompositeConstruct %56 %363 %363 %363 %363
OpImageWrite %18 %362 %364
OpBranch %419
%419 = OpLabel
%231 = OpRayQueryGetIntersectionTypeKHR %9 %26 %60
%232 = OpIEqual %41 %231 %60
OpSelectionMerge %425 None
OpBranchConditional %232 %424 %420
%424 = OpLabel
%373 = OpBitwiseOr %9 %176 %374
%375 = OpIAdd %9 %373 %47
%376 = OpCompositeConstruct %56 %177 %177 %177 %177
OpImageWrite %19 %375 %376
%377 = OpRayQueryGetIntersectionBarycentricsKHR %213 %26 %60
%378 = OpCompositeExtract %29 %377 1
%379 = OpBitwiseOr %9 %176 %380
%381 = OpIAdd %9 %176 %380
%382 = OpBitcast %9 %378
%383 = OpCompositeConstruct %56 %382 %382 %382 %382
OpImageWrite %18 %381 %383
%384 = OpRayQueryGetIntersectionFrontFaceKHR %41 %26 %60
%385 = OpSelect %9 %384 %224 %188
%386 = OpBitwiseOr %9 %176 %387
%388 = OpIAdd %9 %386 %47
%389 = OpCompositeConstruct %56 %385 %385 %385 %385
OpImageWrite %19 %388 %389
OpBranch %425
%420 = OpLabel
%390 = OpIEqual %41 %231 %66
OpSelectionMerge %423 None
OpBranchConditional %390 %422 %421
%422 = OpLabel
%392 = OpBitwiseOr %9 %176 %393
%394 = OpIAdd %9 %176 %393
%395 = OpCompositeConstruct %56 %178 %178 %178 %178
OpImageWrite %19 %394 %395
OpBranch %423
%421 = OpLabel
%396 = OpBitwiseOr %9 %176 %397
%398 = OpIAdd %9 %396 %47
%399 = OpCompositeConstruct %56 %179 %179 %179 %179
OpImageWrite %19 %398 %399
OpBranch %423
%423 = OpLabel
OpBranch %425
%425 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-t-current.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _18;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _17;
    _17.x = gl_RayTmaxEXT;
    _17.y = gl_RayTmaxEXT;
    _17.z = gl_RayTmaxEXT;
    payload._m0 = _17;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %15
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn RayTmaxKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%21 = OpTypePointer IncomingRayPayloadKHR %6
%23 = OpTypeInt 32 0
%24 = OpConstant %23 0
%3 = OpFunction %1 None %2
%4 = OpLabel
%18 = OpUndef %6
OpBranch %25
%25 = OpLabel
%16 = OpLoad %5 %15
%17 = OpCompositeInsert %6 %16 %18 0
%19 = OpCompositeInsert %6 %16 %17 1
%20 = OpCompositeInsert %6 %16 %19 2
%22 = OpInBoundsAccessChain %21 %9 %24
OpStore %22 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/ray-t-min.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _18;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _17;
    _17.x = gl_RayTminEXT;
    _17.y = gl_RayTminEXT;
    _17.z = gl_RayTminEXT;
    payload._m0 = _17;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %15
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn RayTminKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%21 = OpTypePointer IncomingRayPayloadKHR %6
%23 = OpTypeInt 32 0
%24 = OpConstant %23 0
%3 = OpFunction %1 None %2
%4 = OpLabel
%18 = OpUndef %6
OpBranch %25
%25 = OpLabel
%16 = OpLoad %5 %15
%17 = OpCompositeInsert %6 %16 %18 0
%19 = OpCompositeInsert %6 %16 %17 1
%20 = OpCompositeInsert %6 %16 %19 2
%22 = OpInBoundsAccessChain %21 %9 %24
OpStore %22 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/render-target-sample-count.frag
================================================
#version 460

layout(constant_id = 0) const uint _10 = 1u;

layout(location = 0) out vec4 SV_Target;

void main()
{
    float _11 = float(_10);
    SV_Target.x = _11;
    SV_Target.y = _11;
    SV_Target.z = _11;
    SV_Target.w = _11;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 SpecId 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpSpecConstant %9 1
%12 = OpTypePointer Output %5
%14 = OpConstant %9 0
%16 = OpConstant %9 1
%18 = OpConstant %9 2
%20 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %21
%21 = OpLabel
%11 = OpConvertUToF %5 %10
%13 = OpAccessChain %12 %8 %14
OpStore %13 %11
%15 = OpAccessChain %12 %8 %16
OpStore %15 %11
%17 = OpAccessChain %12 %8 %18
OpStore %17 %11
%19 = OpAccessChain %12 %8 %20
OpStore %19 %11
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/render-target-sample-position.frag
================================================
#version 460

layout(constant_id = 0) const uint _13 = 1u;
const vec2 _63[31] = vec2[](vec2(0.0), vec2(0.25), vec2(-0.25), vec2(-0.125, -0.375), vec2(0.375, -0.125), vec2(-0.375, 0.125), vec2(0.125, 0.375), vec2(0.0625, -0.1875), vec2(-0.0625, 0.1875), vec2(0.3125, 0.0625), vec2(-0.1875, -0.3125), vec2(-0.3125, 0.3125), vec2(-0.4375, -0.0625), vec2(0.1875, 0.4375), vec2(0.4375, -0.4375), vec2(0.0625), vec2(-0.0625, -0.1875), vec2(-0.1875, 0.125), vec2(0.25, -0.0625), vec2(-0.3125, -0.125), vec2(0.125, 0.3125), vec2(0.3125, 0.1875), vec2(0.1875, -0.3125), vec2(-0.125, 0.375), vec2(0.0, -0.4375), vec2(-0.25, -0.375), vec2(-0.375, 0.25), vec2(-0.5, 0.0), vec2(0.4375, -0.25), vec2(0.375, 0.4375), vec2(-0.4375, -0.5));

layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _66 = _13 - 1u;
    uint _74 = ((uint(gl_SampleID) < _13) && (_13 <= 16u)) ? (_66 + uint(gl_SampleID)) : 0u;
    SV_Target.x = _63[_74].x;
    SV_Target.y = _63[_74].y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 86
; Schema: 0
OpCapability Shader
OpCapability SampleRateShading
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_SampleIndex"
OpName %11 "SV_Target"
OpName %65 "Texture2DMSSamplePositionLUT"
OpDecorate %7 BuiltIn SampleId
OpDecorate %7 Flat
OpDecorate %11 Location 0
OpDecorate %13 SpecId 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 2
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%13 = OpSpecConstant %5 1
%14 = OpConstant %8 0
%15 = OpConstantComposite %9 %14 %14
%16 = OpConstant %8 0.25
%17 = OpConstantComposite %9 %16 %16
%18 = OpConstant %8 -0.25
%19 = OpConstantComposite %9 %18 %18
%20 = OpConstant %8 -0.125
%21 = OpConstant %8 -0.375
%22 = OpConstantComposite %9 %20 %21
%23 = OpConstant %8 0.375
%24 = OpConstantComposite %9 %23 %20
%25 = OpConstant %8 0.125
%26 = OpConstantComposite %9 %21 %25
%27 = OpConstantComposite %9 %25 %23
%28 = OpConstant %8 0.0625
%29 = OpConstant %8 -0.1875
%30 = OpConstantComposite %9 %28 %29
%31 = OpConstant %8 -0.0625
%32 = OpConstant %8 0.1875
%33 = OpConstantComposite %9 %31 %32
%34 = OpConstant %8 0.3125
%35 = OpConstantComposite %9 %34 %28
%36 = OpConstant %8 -0.3125
%37 = OpConstantComposite %9 %29 %36
%38 = OpConstantComposite %9 %36 %34
%39 = OpConstant %8 -0.4375
%40 = OpConstantComposite %9 %39 %31
%41 = OpConstant %8 0.4375
%42 = OpConstantComposite %9 %32 %41
%43 = OpConstantComposite %9 %41 %39
%44 = OpConstantComposite %9 %28 %28
%45 = OpConstantComposite %9 %31 %29
%46 = OpConstantComposite %9 %29 %25
%47 = OpConstantComposite %9 %16 %31
%48 = OpConstantComposite %9 %36 %20
%49 = OpConstantComposite %9 %25 %34
%50 = OpConstantComposite %9 %34 %32
%51 = OpConstantComposite %9 %32 %36
%52 = OpConstantComposite %9 %20 %23
%53 = OpConstantComposite %9 %14 %39
%54 = OpConstantComposite %9 %18 %21
%55 = OpConstantComposite %9 %21 %16
%56 = OpConstant %8 -0.5
%57 = OpConstantComposite %9 %56 %14
%58 = OpConstantComposite %9 %41 %18
%59 = OpConstantComposite %9 %23 %41
%60 = OpConstantComposite %9 %39 %56
%61 = OpConstant %5 31
%62 = OpTypeArray %9 %61
%63 = OpConstantComposite %62 %15 %17 %19 %22 %24 %26 %27 %30 %33 %35 %37 %38 %40 %42 %43 %44 %45 %46 %47 %48 %49 %50 %51 %52 %53 %54 %55 %57 %58 %59 %60
%64 = OpTypePointer Private %62
%65 = OpVariable %64 Private %63
%67 = OpConstant %5 1
%69 = OpTypeBool
%72 = OpConstant %5 16
%75 = OpConstant %5 0
%76 = OpTypePointer Private %9
%81 = OpTypePointer Output %8
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %84
%84 = OpLabel
%12 = OpLoad %5 %7
%66 = OpISub %5 %13 %67
%68 = OpIAdd %5 %66 %12
%70 = OpULessThan %69 %12 %13
%71 = OpULessThanEqual %69 %13 %72
%73 = OpLogicalAnd %69 %70 %71
%74 = OpSelect %5 %73 %68 %75
%77 = OpAccessChain %76 %65 %74
%78 = OpLoad %9 %77
%79 = OpCompositeExtract %8 %78 0
%80 = OpCompositeExtract %8 %78 1
%82 = OpAccessChain %81 %11 %75
OpStore %82 %79
%83 = OpAccessChain %81 %11 %67
OpStore %83 %80
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/report-hit.rint
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    float _m0;
};

hitAttributeEXT _6 hit;

void main()
{
    float _14 = gl_RayTmaxEXT;
    _6 _11;
    _11._m0 = _14;
    hit = _11;
    bool _23 = reportIntersectionEXT(4.0, 100u);
    _6 _10;
    _10._m0 = gl_RayTminEXT;
    hit = _10;
    bool _28 = reportIntersectionEXT(3.0, 50u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint IntersectionKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "hit"
OpDecorate %13 BuiltIn RayTmaxKHR
OpDecorate %13 Volatile
OpDecorate %15 BuiltIn RayTminKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeStruct %5
%7 = OpTypePointer HitAttributeKHR %6
%8 = OpVariable %7 HitAttributeKHR
%9 = OpTypePointer Function %6
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%15 = OpVariable %12 Input
%17 = OpTypePointer Function %5
%19 = OpTypeInt 32 0
%20 = OpConstant %19 0
%22 = OpTypeBool
%24 = OpConstant %5 4
%25 = OpConstant %19 100
%29 = OpConstant %5 3
%30 = OpConstant %19 50
%3 = OpFunction %1 None %2
%4 = OpLabel
%10 = OpVariable %9 Function
%11 = OpVariable %9 Function
OpBranch %31
%31 = OpLabel
%14 = OpLoad %5 %13
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %11 %20
OpStore %18 %14
%21 = OpLoad %6 %11
OpStore %8 %21
%23 = OpReportIntersectionKHR %22 %24 %25
%26 = OpInBoundsAccessChain %17 %10 %20
OpStore %26 %16
%27 = OpLoad %6 %10
OpStore %8 %27
%28 = OpReportIntersectionKHR %22 %29 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/round-ne.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = roundEven(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 RoundEven %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/round-ni.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = floor(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Floor %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/round-pi.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = ceil(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Ceil %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/round-z.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = trunc(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Trunc %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rsqrt.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = inversesqrt(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 InverseSqrt %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rt-geometry-index.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = gl_BuiltIn_5352;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn RayGeometryIndexKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer IncomingRayPayloadKHR %5
%19 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %8 %19
OpStore %18 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rt-hit-kind.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = gl_HitKindEXT;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn HitKindKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer IncomingRayPayloadKHR %5
%19 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %8 %19
OpStore %18 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rt-instance-id.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = uint(gl_InstanceCustomIndexEXT);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn InstanceCustomIndexKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer IncomingRayPayloadKHR %5
%19 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %8 %19
OpStore %18 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rt-instance-index.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = uint(gl_InstanceID);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn InstanceId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer IncomingRayPayloadKHR %5
%19 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %8 %19
OpStore %18 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rt-primitive-index.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = uint(gl_PrimitiveID);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn PrimitiveId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer IncomingRayPayloadKHR %5
%19 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %8 %19
OpStore %18 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/rt-ray-flags.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = gl_IncomingRayFlagsEXT;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn IncomingRayFlagsKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer IncomingRayPayloadKHR %5
%19 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%16 = OpLoad %5 %15
%18 = OpInBoundsAccessChain %17 %8 %19
OpStore %18 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-bias-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require
#extension GL_ARB_sparse_texture_clamp : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _70
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 1, binding = 5) uniform textureCube _23;
layout(set = 1, binding = 6) uniform textureCubeArray _26;
layout(set = 0, binding = 0) uniform sampler _29;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _204;
    vec4 _205;
    _204 = sparseTextureClampARB(sampler1D(_8, _29), TEXCOORD.x, 1.5, _205, 0.0);
    SparseTexel _63 = SparseTexel(_204, _205);
    vec4 _65 = _63._m1;
    _70 _71 = _70(_65.x, _65.y, _65.z, _65.w, _63._m0);
    float _77 = float(sparseTexelsResidentARB(int(_71._m4)));
    uint _206;
    vec4 _207;
    _206 = sparseTextureClampARB(sampler1DArray(_11, _29), vec2(TEXCOORD.x, TEXCOORD.y), 1.5, _207, 0.0);
    SparseTexel _83 = SparseTexel(_206, _207);
    vec4 _86 = _83._m1;
    _70 _91 = _70(_86.x, _86.y, _86.z, _86.w, _83._m0);
    float _92 = _91._m0;
    float _97 = float(sparseTexelsResidentARB(int(_91._m4)));
    uint _208;
    vec4 _209;
    _208 = sparseTextureClampARB(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y), 1.5, _209, 0.0);
    SparseTexel _102 = SparseTexel(_208, _209);
    vec4 _105 = _102._m1;
    _70 _110 = _70(_105.x, _105.y, _105.z, _105.w, _102._m0);
    float _117 = float(sparseTexelsResidentARB(int(_110._m4)));
    uint _210;
    vec4 _211;
    _210 = sparseTextureClampARB(sampler2DArray(_17, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), 1.5, _211, 0.0);
    SparseTexel _122 = SparseTexel(_210, _211);
    vec4 _126 = _122._m1;
    _70 _131 = _70(_126.x, _126.y, _126.z, _126.w, _122._m0);
    float _132 = _131._m0;
    float _137 = float(sparseTexelsResidentARB(int(_131._m4)));
    uint _212;
    vec4 _213;
    _212 = sparseTextureClampARB(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), 1.5, _213, 0.0);
    SparseTexel _142 = SparseTexel(_212, _213);
    vec4 _145 = _142._m1;
    _70 _150 = _70(_145.x, _145.y, _145.z, _145.w, _142._m0);
    float _157 = float(sparseTexelsResidentARB(int(_150._m4)));
    uint _214;
    vec4 _215;
    _214 = sparseTextureClampARB(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), 1.5, _215, 0.0);
    SparseTexel _162 = SparseTexel(_214, _215);
    vec4 _165 = _162._m1;
    _70 _170 = _70(_165.x, _165.y, _165.z, _165.w, _162._m0);
    float _177 = float(sparseTexelsResidentARB(int(_170._m4)));
    uint _216;
    vec4 _217;
    _216 = sparseTextureClampARB(samplerCubeArray(_26, _29), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), 1.5, _217, 0.0);
    SparseTexel _182 = SparseTexel(_216, _217);
    vec4 _185 = _182._m1;
    _70 _190 = _70(_185.x, _185.y, _185.z, _185.w, _182._m0);
    float _191 = _190._m0;
    float _196 = float(sparseTexelsResidentARB(int(_190._m4)));
    SV_Target.x = ((((((((((((_77 + _71._m0) + _92) + _97) + _110._m0) + _117) + _132) + _137) + _150._m0) + _157) + _170._m0) + _177) + _191) + _196;
    SV_Target.y = ((((((((((((_77 + _71._m1) + _92) + _97) + _110._m1) + _117) + _132) + _137) + _150._m1) + _157) + _170._m1) + _177) + _191) + _196;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 204
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability MinLod
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpName %62 "SparseTexel"
OpName %70 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeSampler
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeVector %5 4
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%44 = OpTypePointer Input %5
%46 = OpTypeInt 32 0
%47 = OpConstant %46 0
%50 = OpConstant %46 1
%53 = OpConstant %46 2
%56 = OpConstant %46 3
%58 = OpTypeSampledImage %6
%60 = OpConstant %5 1.5
%61 = OpConstant %5 0
%62 = OpTypeStruct %46 %30
%70 = OpTypeStruct %5 %5 %5 %5 %46
%75 = OpTypeBool
%78 = OpConstant %5 1
%81 = OpTypeSampledImage %9
%100 = OpTypeSampledImage %12
%120 = OpTypeSampledImage %15
%123 = OpTypeVector %5 3
%140 = OpTypeSampledImage %18
%160 = OpTypeSampledImage %21
%180 = OpTypeSampledImage %24
%199 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %202
%202 = OpLabel
%36 = OpLoad %24 %26
%37 = OpLoad %21 %23
%38 = OpLoad %18 %20
%39 = OpLoad %15 %17
%40 = OpLoad %12 %14
%41 = OpLoad %9 %11
%42 = OpLoad %6 %8
%43 = OpLoad %27 %29
%45 = OpAccessChain %44 %32 %47
%48 = OpLoad %5 %45
%49 = OpAccessChain %44 %32 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %44 %32 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %44 %32 %56
%57 = OpLoad %5 %55
%59 = OpSampledImage %58 %42 %43
%63 = OpImageSparseSampleImplicitLod %62 %59 %48 Bias|MinLod %61 %60
%64 = OpCompositeExtract %46 %63 0
%65 = OpCompositeExtract %30 %63 1
%66 = OpCompositeExtract %5 %65 0
%67 = OpCompositeExtract %5 %65 1
%68 = OpCompositeExtract %5 %65 2
%69 = OpCompositeExtract %5 %65 3
%71 = OpCompositeConstruct %70 %66 %67 %68 %69 %64
%72 = OpCompositeExtract %5 %71 0
%73 = OpCompositeExtract %5 %71 1
%74 = OpCompositeExtract %46 %71 4
%76 = OpImageSparseTexelsResident %75 %74
%77 = OpSelect %5 %76 %78 %61
%79 = OpFAdd %5 %77 %72
%80 = OpFAdd %5 %77 %73
%82 = OpSampledImage %81 %41 %43
%84 = OpCompositeConstruct %33 %48 %51
%83 = OpImageSparseSampleImplicitLod %62 %82 %84 Bias|MinLod %61 %60
%85 = OpCompositeExtract %46 %83 0
%86 = OpCompositeExtract %30 %83 1
%87 = OpCompositeExtract %5 %86 0
%88 = OpCompositeExtract %5 %86 1
%89 = OpCompositeExtract %5 %86 2
%90 = OpCompositeExtract %5 %86 3
%91 = OpCompositeConstruct %70 %87 %88 %89 %90 %85
%92 = OpCompositeExtract %5 %91 0
%93 = OpCompositeExtract %46 %91 4
%94 = OpImageSparseTexelsResident %75 %93
%95 = OpFAdd %5 %79 %92
%96 = OpFAdd %5 %80 %92
%97 = OpSelect %5 %94 %78 %61
%98 = OpFAdd %5 %95 %97
%99 = OpFAdd %5 %96 %97
%101 = OpSampledImage %100 %40 %43
%103 = OpCompositeConstruct %33 %48 %51
%102 = OpImageSparseSampleImplicitLod %62 %101 %103 Bias|MinLod %61 %60
%104 = OpCompositeExtract %46 %102 0
%105 = OpCompositeExtract %30 %102 1
%106 = OpCompositeExtract %5 %105 0
%107 = OpCompositeExtract %5 %105 1
%108 = OpCompositeExtract %5 %105 2
%109 = OpCompositeExtract %5 %105 3
%110 = OpCompositeConstruct %70 %106 %107 %108 %109 %104
%111 = OpCompositeExtract %5 %110 0
%112 = OpCompositeExtract %5 %110 1
%113 = OpCompositeExtract %46 %110 4
%114 = OpImageSparseTexelsResident %75 %113
%115 = OpFAdd %5 %98 %111
%116 = OpFAdd %5 %99 %112
%117 = OpSelect %5 %114 %78 %61
%118 = OpFAdd %5 %115 %117
%119 = OpFAdd %5 %116 %117
%121 = OpSampledImage %120 %39 %43
%124 = OpCompositeConstruct %123 %48 %51 %54
%122 = OpImageSparseSampleImplicitLod %62 %121 %124 Bias|MinLod %61 %60
%125 = OpCompositeExtract %46 %122 0
%126 = OpCompositeExtract %30 %122 1
%127 = OpCompositeExtract %5 %126 0
%128 = OpCompositeExtract %5 %126 1
%129 = OpCompositeExtract %5 %126 2
%130 = OpCompositeExtract %5 %126 3
%131 = OpCompositeConstruct %70 %127 %128 %129 %130 %125
%132 = OpCompositeExtract %5 %131 0
%133 = OpCompositeExtract %46 %131 4
%134 = OpImageSparseTexelsResident %75 %133
%135 = OpFAdd %5 %118 %132
%136 = OpFAdd %5 %119 %132
%137 = OpSelect %5 %134 %78 %61
%138 = OpFAdd %5 %135 %137
%139 = OpFAdd %5 %136 %137
%141 = OpSampledImage %140 %38 %43
%143 = OpCompositeConstruct %123 %48 %51 %54
%142 = OpImageSparseSampleImplicitLod %62 %141 %143 Bias|MinLod %61 %60
%144 = OpCompositeExtract %46 %142 0
%145 = OpCompositeExtract %30 %142 1
%146 = OpCompositeExtract %5 %145 0
%147 = OpCompositeExtract %5 %145 1
%148 = OpCompositeExtract %5 %145 2
%149 = OpCompositeExtract %5 %145 3
%150 = OpCompositeConstruct %70 %146 %147 %148 %149 %144
%151 = OpCompositeExtract %5 %150 0
%152 = OpCompositeExtract %5 %150 1
%153 = OpCompositeExtract %46 %150 4
%154 = OpImageSparseTexelsResident %75 %153
%155 = OpFAdd %5 %138 %151
%156 = OpFAdd %5 %139 %152
%157 = OpSelect %5 %154 %78 %61
%158 = OpFAdd %5 %155 %157
%159 = OpFAdd %5 %156 %157
%161 = OpSampledImage %160 %37 %43
%163 = OpCompositeConstruct %123 %48 %51 %54
%162 = OpImageSparseSampleImplicitLod %62 %161 %163 Bias|MinLod %61 %60
%164 = OpCompositeExtract %46 %162 0
%165 = OpCompositeExtract %30 %162 1
%166 = OpCompositeExtract %5 %165 0
%167 = OpCompositeExtract %5 %165 1
%168 = OpCompositeExtract %5 %165 2
%169 = OpCompositeExtract %5 %165 3
%170 = OpCompositeConstruct %70 %166 %167 %168 %169 %164
%171 = OpCompositeExtract %5 %170 0
%172 = OpCompositeExtract %5 %170 1
%173 = OpCompositeExtract %46 %170 4
%174 = OpImageSparseTexelsResident %75 %173
%175 = OpFAdd %5 %158 %171
%176 = OpFAdd %5 %159 %172
%177 = OpSelect %5 %174 %78 %61
%178 = OpFAdd %5 %175 %177
%179 = OpFAdd %5 %176 %177
%181 = OpSampledImage %180 %36 %43
%183 = OpCompositeConstruct %30 %48 %51 %54 %57
%182 = OpImageSparseSampleImplicitLod %62 %181 %183 Bias|MinLod %61 %60
%184 = OpCompositeExtract %46 %182 0
%185 = OpCompositeExtract %30 %182 1
%186 = OpCompositeExtract %5 %185 0
%187 = OpCompositeExtract %5 %185 1
%188 = OpCompositeExtract %5 %185 2
%189 = OpCompositeExtract %5 %185 3
%190 = OpCompositeConstruct %70 %186 %187 %188 %189 %184
%191 = OpCompositeExtract %5 %190 0
%192 = OpCompositeExtract %46 %190 4
%193 = OpImageSparseTexelsResident %75 %192
%194 = OpFAdd %5 %178 %191
%195 = OpFAdd %5 %179 %191
%196 = OpSelect %5 %193 %78 %61
%197 = OpFAdd %5 %194 %196
%198 = OpFAdd %5 %195 %196
%200 = OpAccessChain %199 %35 %47
OpStore %200 %197
%201 = OpAccessChain %199 %35 %50
OpStore %201 %198
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-bias-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _54 = textureOffset(sampler1D(_8, _23), TEXCOORD.x, 1, TEXCOORD.w);
    vec4 _66 = textureOffset(sampler2D(_14, _23), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(2, 3), TEXCOORD.w);
    vec4 _85 = textureOffset(sampler3D(_20, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec3(-4, -5, 3), TEXCOORD.w);
    float _91 = textureOffset(sampler2DArray(_17, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec2(-1, -3), TEXCOORD.w).x + textureOffset(sampler1DArray(_11, _23), vec2(TEXCOORD.x, TEXCOORD.y), 2, TEXCOORD.w).x;
    SV_Target.x = ((_66.x + _54.x) + _91) + _85.x;
    SV_Target.y = ((_66.y + _54.y) + _91) + _85.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypeVector %5 2
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%36 = OpTypePointer Input %5
%38 = OpTypeInt 32 0
%39 = OpConstant %38 0
%42 = OpConstant %38 1
%45 = OpConstant %38 2
%48 = OpConstant %38 3
%50 = OpTypeSampledImage %6
%52 = OpTypeInt 32 1
%53 = OpConstant %52 1
%57 = OpTypeSampledImage %9
%59 = OpConstant %52 2
%63 = OpTypeSampledImage %12
%65 = OpConstant %52 3
%68 = OpTypeVector %52 2
%69 = OpConstantComposite %68 %59 %65
%72 = OpTypeSampledImage %15
%74 = OpConstant %52 -1
%75 = OpConstant %52 -3
%77 = OpTypeVector %5 3
%79 = OpConstantComposite %68 %74 %75
%81 = OpTypeSampledImage %18
%83 = OpConstant %52 -4
%84 = OpConstant %52 -5
%87 = OpTypeVector %52 3
%88 = OpConstantComposite %87 %83 %84 %65
%98 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %101
%101 = OpLabel
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %12 %14
%33 = OpLoad %9 %11
%34 = OpLoad %6 %8
%35 = OpLoad %21 %23
%37 = OpAccessChain %36 %26 %39
%40 = OpLoad %5 %37
%41 = OpAccessChain %36 %26 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %36 %26 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %36 %26 %48
%49 = OpLoad %5 %47
%51 = OpSampledImage %50 %34 %35
%54 = OpImageSampleImplicitLod %24 %51 %40 Bias|ConstOffset %49 %53
%55 = OpCompositeExtract %5 %54 0
%56 = OpCompositeExtract %5 %54 1
%58 = OpSampledImage %57 %33 %35
%61 = OpCompositeConstruct %27 %40 %43
%60 = OpImageSampleImplicitLod %24 %58 %61 Bias|ConstOffset %49 %59
%62 = OpCompositeExtract %5 %60 0
%64 = OpSampledImage %63 %32 %35
%67 = OpCompositeConstruct %27 %40 %43
%66 = OpImageSampleImplicitLod %24 %64 %67 Bias|ConstOffset %49 %69
%70 = OpCompositeExtract %5 %66 0
%71 = OpCompositeExtract %5 %66 1
%73 = OpSampledImage %72 %31 %35
%78 = OpCompositeConstruct %77 %40 %43 %46
%76 = OpImageSampleImplicitLod %24 %73 %78 Bias|ConstOffset %49 %79
%80 = OpCompositeExtract %5 %76 0
%82 = OpSampledImage %81 %30 %35
%86 = OpCompositeConstruct %77 %40 %43 %46
%85 = OpImageSampleImplicitLod %24 %82 %86 Bias|ConstOffset %49 %88
%89 = OpCompositeExtract %5 %85 0
%90 = OpCompositeExtract %5 %85 1
%91 = OpFAdd %5 %80 %62
%92 = OpFAdd %5 %70 %55
%93 = OpFAdd %5 %92 %91
%94 = OpFAdd %5 %93 %89
%95 = OpFAdd %5 %71 %56
%96 = OpFAdd %5 %95 %91
%97 = OpFAdd %5 %96 %90
%99 = OpAccessChain %98 %29 %39
OpStore %99 %94
%100 = OpAccessChain %98 %29 %42
OpStore %100 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-bias.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 1, binding = 5) uniform textureCube _23;
layout(set = 1, binding = 6) uniform textureCubeArray _26;
layout(set = 0, binding = 0) uniform sampler _29;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _60 = texture(sampler1D(_8, _29), TEXCOORD.x, TEXCOORD.w);
    vec4 _70 = texture(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w);
    vec4 _82 = texture(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w);
    vec4 _88 = texture(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w);
    float _98 = texture(samplerCubeArray(_26, _29), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), TEXCOORD.w).x + (texture(sampler2DArray(_17, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w).x + texture(sampler1DArray(_11, _29), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w).x);
    SV_Target.x = (((_98 + _60.x) + _70.x) + _82.x) + _88.x;
    SV_Target.y = (((_98 + _60.y) + _70.y) + _82.y) + _88.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 112
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeSampler
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeVector %5 4
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%44 = OpTypePointer Input %5
%46 = OpTypeInt 32 0
%47 = OpConstant %46 0
%50 = OpConstant %46 1
%53 = OpConstant %46 2
%56 = OpConstant %46 3
%58 = OpTypeSampledImage %6
%63 = OpTypeSampledImage %9
%68 = OpTypeSampledImage %12
%74 = OpTypeSampledImage %15
%77 = OpTypeVector %5 3
%80 = OpTypeSampledImage %18
%86 = OpTypeSampledImage %21
%92 = OpTypeSampledImage %24
%107 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %110
%110 = OpLabel
%36 = OpLoad %24 %26
%37 = OpLoad %21 %23
%38 = OpLoad %18 %20
%39 = OpLoad %15 %17
%40 = OpLoad %12 %14
%41 = OpLoad %9 %11
%42 = OpLoad %6 %8
%43 = OpLoad %27 %29
%45 = OpAccessChain %44 %32 %47
%48 = OpLoad %5 %45
%49 = OpAccessChain %44 %32 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %44 %32 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %44 %32 %56
%57 = OpLoad %5 %55
%59 = OpSampledImage %58 %42 %43
%60 = OpImageSampleImplicitLod %30 %59 %48 Bias %57
%61 = OpCompositeExtract %5 %60 0
%62 = OpCompositeExtract %5 %60 1
%64 = OpSampledImage %63 %41 %43
%66 = OpCompositeConstruct %33 %48 %51
%65 = OpImageSampleImplicitLod %30 %64 %66 Bias %57
%67 = OpCompositeExtract %5 %65 0
%69 = OpSampledImage %68 %40 %43
%71 = OpCompositeConstruct %33 %48 %51
%70 = OpImageSampleImplicitLod %30 %69 %71 Bias %57
%72 = OpCompositeExtract %5 %70 0
%73 = OpCompositeExtract %5 %70 1
%75 = OpSampledImage %74 %39 %43
%78 = OpCompositeConstruct %77 %48 %51 %54
%76 = OpImageSampleImplicitLod %30 %75 %78 Bias %57
%79 = OpCompositeExtract %5 %76 0
%81 = OpSampledImage %80 %38 %43
%83 = OpCompositeConstruct %77 %48 %51 %54
%82 = OpImageSampleImplicitLod %30 %81 %83 Bias %57
%84 = OpCompositeExtract %5 %82 0
%85 = OpCompositeExtract %5 %82 1
%87 = OpSampledImage %86 %37 %43
%89 = OpCompositeConstruct %77 %48 %51 %54
%88 = OpImageSampleImplicitLod %30 %87 %89 Bias %57
%90 = OpCompositeExtract %5 %88 0
%91 = OpCompositeExtract %5 %88 1
%93 = OpSampledImage %92 %36 %43
%95 = OpCompositeConstruct %30 %48 %51 %54 %57
%94 = OpImageSampleImplicitLod %30 %93 %95 Bias %57
%96 = OpCompositeExtract %5 %94 0
%97 = OpFAdd %5 %79 %67
%98 = OpFAdd %5 %96 %97
%99 = OpFAdd %5 %98 %61
%100 = OpFAdd %5 %99 %72
%101 = OpFAdd %5 %100 %84
%102 = OpFAdd %5 %101 %90
%103 = OpFAdd %5 %98 %62
%104 = OpFAdd %5 %103 %73
%105 = OpFAdd %5 %104 %85
%106 = OpFAdd %5 %105 %91
%108 = OpAccessChain %107 %35 %47
OpStore %108 %102
%109 = OpAccessChain %107 %35 %50
OpStore %109 %106
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-bias-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require
#extension GL_ARB_sparse_texture_clamp : require

struct SparseTexel
{
    uint _m0;
    float _m1;
};

struct _57
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 5) uniform textureCube _20;
layout(set = 0, binding = 0) uniform samplerShadow _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) in float DREF;
layout(location = 0) out float SV_Target;

void main()
{
    uint _126;
    float _127;
    _126 = sparseTextureClampARB(sampler1DShadow(_8, _23), vec2(TEXCOORD.x, DREF), 1.5, _127, 0.0);
    SparseTexel _54 = SparseTexel(_126, _127);
    float _56 = _54._m1;
    _57 _58 = _57(_56, _56, _56, _56, _54._m0);
    uint _128;
    float _129;
    _128 = sparseTextureClampARB(sampler1DArrayShadow(_11, _23), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), 1.5, _129, 0.0);
    SparseTexel _69 = SparseTexel(_128, _129);
    float _73 = _69._m1;
    _57 _74 = _57(_73, _73, _73, _73, _69._m0);
    uint _130;
    float _131;
    _130 = sparseTextureClampARB(sampler2DShadow(_14, _23), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), 1.5, _131, 0.0);
    SparseTexel _84 = SparseTexel(_130, _131);
    float _87 = _84._m1;
    _57 _88 = _57(_87, _87, _87, _87, _84._m0);
    uint _132;
    float _133;
    _132 = sparseTextureClampARB(sampler2DArrayShadow(_17, _23), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), 1.5, _133, 0.0);
    SparseTexel _98 = SparseTexel(_132, _133);
    float _102 = _98._m1;
    _57 _103 = _57(_102, _102, _102, _102, _98._m0);
    uint _134;
    float _135;
    _134 = sparseTextureClampARB(samplerCubeShadow(_20, _23), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), 1.5, _135, 0.0);
    SparseTexel _113 = SparseTexel(_134, _135);
    float _116 = _113._m1;
    _57 _117 = _57(_116, _116, _116, _116, _113._m0);
    SV_Target = ((((((((float(sparseTexelsResidentARB(int(_58._m4))) + _58._m0) + _74._m0) + float(sparseTexelsResidentARB(int(_74._m4)))) + _88._m0) + float(sparseTexelsResidentARB(int(_88._m4)))) + _103._m0) + float(sparseTexelsResidentARB(int(_103._m4)))) + _117._m0) + float(sparseTexelsResidentARB(int(_117._m4)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 126
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability MinLod
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %28 %30
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %28 "DREF"
OpName %30 "SV_Target"
OpName %53 "SparseTexel"
OpName %57 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %28 Location 1
OpDecorate %30 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypePointer Input %5
%28 = OpVariable %27 Input
%29 = OpTypePointer Output %5
%30 = OpVariable %29 Output
%39 = OpTypeInt 32 0
%40 = OpConstant %39 0
%43 = OpConstant %39 1
%46 = OpConstant %39 2
%48 = OpTypeImage %5 1D 1 0 0 1 Unknown
%49 = OpTypeSampledImage %48
%51 = OpConstant %5 1.5
%52 = OpConstant %5 0
%53 = OpTypeStruct %39 %5
%57 = OpTypeStruct %5 %5 %5 %5 %39
%61 = OpTypeBool
%64 = OpConstant %5 1
%66 = OpTypeImage %5 1D 1 1 0 1 Unknown
%67 = OpTypeSampledImage %66
%70 = OpTypeVector %5 2
%81 = OpTypeImage %5 2D 1 0 0 1 Unknown
%82 = OpTypeSampledImage %81
%95 = OpTypeImage %5 2D 1 1 0 1 Unknown
%96 = OpTypeSampledImage %95
%99 = OpTypeVector %5 3
%110 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%111 = OpTypeSampledImage %110
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %124
%124 = OpLabel
%31 = OpLoad %18 %20
%32 = OpLoad %15 %17
%33 = OpLoad %12 %14
%34 = OpLoad %9 %11
%35 = OpLoad %6 %8
%36 = OpLoad %21 %23
%37 = OpLoad %5 %28
%38 = OpAccessChain %27 %26 %40
%41 = OpLoad %5 %38
%42 = OpAccessChain %27 %26 %43
%44 = OpLoad %5 %42
%45 = OpAccessChain %27 %26 %46
%47 = OpLoad %5 %45
%50 = OpSampledImage %49 %35 %36
%54 = OpImageSparseSampleDrefImplicitLod %53 %50 %41 %37 Bias|MinLod %52 %51
%55 = OpCompositeExtract %39 %54 0
%56 = OpCompositeExtract %5 %54 1
%58 = OpCompositeConstruct %57 %56 %56 %56 %56 %55
%59 = OpCompositeExtract %5 %58 0
%60 = OpCompositeExtract %39 %58 4
%62 = OpImageSparseTexelsResident %61 %60
%63 = OpSelect %5 %62 %64 %52
%65 = OpFAdd %5 %63 %59
%68 = OpSampledImage %67 %34 %36
%71 = OpCompositeConstruct %70 %41 %44
%69 = OpImageSparseSampleDrefImplicitLod %53 %68 %71 %37 Bias|MinLod %52 %51
%72 = OpCompositeExtract %39 %69 0
%73 = OpCompositeExtract %5 %69 1
%74 = OpCompositeConstruct %57 %73 %73 %73 %73 %72
%75 = OpCompositeExtract %5 %74 0
%76 = OpCompositeExtract %39 %74 4
%77 = OpImageSparseTexelsResident %61 %76
%78 = OpFAdd %5 %65 %75
%79 = OpSelect %5 %77 %64 %52
%80 = OpFAdd %5 %78 %79
%83 = OpSampledImage %82 %33 %36
%85 = OpCompositeConstruct %70 %41 %44
%84 = OpImageSparseSampleDrefImplicitLod %53 %83 %85 %37 Bias|MinLod %52 %51
%86 = OpCompositeExtract %39 %84 0
%87 = OpCompositeExtract %5 %84 1
%88 = OpCompositeConstruct %57 %87 %87 %87 %87 %86
%89 = OpCompositeExtract %5 %88 0
%90 = OpCompositeExtract %39 %88 4
%91 = OpImageSparseTexelsResident %61 %90
%92 = OpFAdd %5 %80 %89
%93 = OpSelect %5 %91 %64 %52
%94 = OpFAdd %5 %92 %93
%97 = OpSampledImage %96 %32 %36
%100 = OpCompositeConstruct %99 %41 %44 %47
%98 = OpImageSparseSampleDrefImplicitLod %53 %97 %100 %37 Bias|MinLod %52 %51
%101 = OpCompositeExtract %39 %98 0
%102 = OpCompositeExtract %5 %98 1
%103 = OpCompositeConstruct %57 %102 %102 %102 %102 %101
%104 = OpCompositeExtract %5 %103 0
%105 = OpCompositeExtract %39 %103 4
%106 = OpImageSparseTexelsResident %61 %105
%107 = OpFAdd %5 %94 %104
%108 = OpSelect %5 %106 %64 %52
%109 = OpFAdd %5 %107 %108
%112 = OpSampledImage %111 %31 %36
%114 = OpCompositeConstruct %99 %41 %44 %47
%113 = OpImageSparseSampleDrefImplicitLod %53 %112 %114 %37 Bias|MinLod %52 %51
%115 = OpCompositeExtract %39 %113 0
%116 = OpCompositeExtract %5 %113 1
%117 = OpCompositeConstruct %57 %116 %116 %116 %116 %115
%118 = OpCompositeExtract %5 %117 0
%119 = OpCompositeExtract %39 %117 4
%120 = OpImageSparseTexelsResident %61 %119
%121 = OpFAdd %5 %109 %118
%122 = OpSelect %5 %120 %64 %52
%123 = OpFAdd %5 %121 %122
OpStore %30 %123
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-bias-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 0) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) in float DREF;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = ((vec4(textureOffset(sampler1DArrayShadow(_11, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), 2, TEXCOORD.w)).x + vec4(textureOffset(sampler1DShadow(_8, _20), vec2(TEXCOORD.x, DREF), 1, TEXCOORD.w)).x) + vec4(textureOffset(sampler2DShadow(_14, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), ivec2(2, 3), TEXCOORD.w)).x) + vec4(textureOffset(sampler2DArrayShadow(_17, _20), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), ivec2(-1, -3), TEXCOORD.w)).x;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 90
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %25 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %25 "DREF"
OpName %27 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %23 Location 0
OpDecorate %25 Location 1
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypePointer Input %5
%25 = OpVariable %24 Input
%26 = OpTypePointer Output %5
%27 = OpVariable %26 Output
%35 = OpTypeInt 32 0
%36 = OpConstant %35 0
%39 = OpConstant %35 1
%42 = OpConstant %35 2
%45 = OpConstant %35 3
%47 = OpTypeImage %5 1D 1 0 0 1 Unknown
%48 = OpTypeSampledImage %47
%50 = OpTypeInt 32 1
%51 = OpConstant %50 1
%55 = OpTypeImage %5 1D 1 1 0 1 Unknown
%56 = OpTypeSampledImage %55
%58 = OpConstant %50 2
%60 = OpTypeVector %5 2
%65 = OpTypeImage %5 2D 1 0 0 1 Unknown
%66 = OpTypeSampledImage %65
%68 = OpConstant %50 3
%71 = OpTypeVector %50 2
%72 = OpConstantComposite %71 %58 %68
%76 = OpTypeImage %5 2D 1 1 0 1 Unknown
%77 = OpTypeSampledImage %76
%79 = OpConstant %50 -1
%80 = OpConstant %50 -3
%82 = OpTypeVector %5 3
%84 = OpConstantComposite %71 %79 %80
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %88
%88 = OpLabel
%28 = OpLoad %15 %17
%29 = OpLoad %12 %14
%30 = OpLoad %9 %11
%31 = OpLoad %6 %8
%32 = OpLoad %18 %20
%33 = OpLoad %5 %25
%34 = OpAccessChain %24 %23 %36
%37 = OpLoad %5 %34
%38 = OpAccessChain %24 %23 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %24 %23 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %24 %23 %45
%46 = OpLoad %5 %44
%49 = OpSampledImage %48 %31 %32
%52 = OpImageSampleDrefImplicitLod %5 %49 %37 %33 Bias|ConstOffset %46 %51
%53 = OpCompositeConstruct %21 %52 %52 %52 %52
%54 = OpCompositeExtract %5 %53 0
%57 = OpSampledImage %56 %30 %32
%61 = OpCompositeConstruct %60 %37 %40
%59 = OpImageSampleDrefImplicitLod %5 %57 %61 %33 Bias|ConstOffset %46 %58
%62 = OpCompositeConstruct %21 %59 %59 %59 %59
%63 = OpCompositeExtract %5 %62 0
%64 = OpFAdd %5 %63 %54
%67 = OpSampledImage %66 %29 %32
%70 = OpCompositeConstruct %60 %37 %40
%69 = OpImageSampleDrefImplicitLod %5 %67 %70 %33 Bias|ConstOffset %46 %72
%73 = OpCompositeConstruct %21 %69 %69 %69 %69
%74 = OpCompositeExtract %5 %73 0
%75 = OpFAdd %5 %64 %74
%78 = OpSampledImage %77 %28 %32
%83 = OpCompositeConstruct %82 %37 %40 %43
%81 = OpImageSampleDrefImplicitLod %5 %78 %83 %33 Bias|ConstOffset %46 %84
%85 = OpCompositeConstruct %21 %81 %81 %81 %81
%86 = OpCompositeExtract %5 %85 0
%87 = OpFAdd %5 %75 %86
OpStore %27 %87
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-bias.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 5) uniform textureCube _20;
layout(set = 0, binding = 0) uniform samplerShadow _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) in float DREF;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (((vec4(texture(sampler1DArrayShadow(_11, _23), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), TEXCOORD.w)).x + vec4(texture(sampler1DShadow(_8, _23), vec2(TEXCOORD.x, DREF), TEXCOORD.w)).x) + vec4(texture(sampler2DShadow(_14, _23), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), TEXCOORD.w)).x) + vec4(texture(sampler2DArrayShadow(_17, _23), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), TEXCOORD.w)).x) + vec4(texture(samplerCubeShadow(_20, _23), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), TEXCOORD.w)).x;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %28 %30
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %28 "DREF"
OpName %30 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %28 Location 1
OpDecorate %30 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypePointer Input %5
%28 = OpVariable %27 Input
%29 = OpTypePointer Output %5
%30 = OpVariable %29 Output
%39 = OpTypeInt 32 0
%40 = OpConstant %39 0
%43 = OpConstant %39 1
%46 = OpConstant %39 2
%49 = OpConstant %39 3
%51 = OpTypeImage %5 1D 1 0 0 1 Unknown
%52 = OpTypeSampledImage %51
%57 = OpTypeImage %5 1D 1 1 0 1 Unknown
%58 = OpTypeSampledImage %57
%61 = OpTypeVector %5 2
%66 = OpTypeImage %5 2D 1 0 0 1 Unknown
%67 = OpTypeSampledImage %66
%74 = OpTypeImage %5 2D 1 1 0 1 Unknown
%75 = OpTypeSampledImage %74
%78 = OpTypeVector %5 3
%83 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%84 = OpTypeSampledImage %83
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%31 = OpLoad %18 %20
%32 = OpLoad %15 %17
%33 = OpLoad %12 %14
%34 = OpLoad %9 %11
%35 = OpLoad %6 %8
%36 = OpLoad %21 %23
%37 = OpLoad %5 %28
%38 = OpAccessChain %27 %26 %40
%41 = OpLoad %5 %38
%42 = OpAccessChain %27 %26 %43
%44 = OpLoad %5 %42
%45 = OpAccessChain %27 %26 %46
%47 = OpLoad %5 %45
%48 = OpAccessChain %27 %26 %49
%50 = OpLoad %5 %48
%53 = OpSampledImage %52 %35 %36
%54 = OpImageSampleDrefImplicitLod %5 %53 %41 %37 Bias %50
%55 = OpCompositeConstruct %24 %54 %54 %54 %54
%56 = OpCompositeExtract %5 %55 0
%59 = OpSampledImage %58 %34 %36
%62 = OpCompositeConstruct %61 %41 %44
%60 = OpImageSampleDrefImplicitLod %5 %59 %62 %37 Bias %50
%63 = OpCompositeConstruct %24 %60 %60 %60 %60
%64 = OpCompositeExtract %5 %63 0
%65 = OpFAdd %5 %64 %56
%68 = OpSampledImage %67 %33 %36
%70 = OpCompositeConstruct %61 %41 %44
%69 = OpImageSampleDrefImplicitLod %5 %68 %70 %37 Bias %50
%71 = OpCompositeConstruct %24 %69 %69 %69 %69
%72 = OpCompositeExtract %5 %71 0
%73 = OpFAdd %5 %65 %72
%76 = OpSampledImage %75 %32 %36
%79 = OpCompositeConstruct %78 %41 %44 %47
%77 = OpImageSampleDrefImplicitLod %5 %76 %79 %37 Bias %50
%80 = OpCompositeConstruct %24 %77 %77 %77 %77
%81 = OpCompositeExtract %5 %80 0
%82 = OpFAdd %5 %73 %81
%85 = OpSampledImage %84 %31 %36
%87 = OpCompositeConstruct %78 %41 %44 %47
%86 = OpImageSampleDrefImplicitLod %5 %85 %87 %37 Bias %50
%88 = OpCompositeConstruct %24 %86 %86 %86 %86
%89 = OpCompositeExtract %5 %88 0
%90 = OpFAdd %5 %82 %89
OpStore %30 %90
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    float _m1;
};

struct _64
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 5) uniform textureCube _20;
layout(set = 1, binding = 6) uniform textureCubeArray _23;
layout(set = 0, binding = 0) uniform samplerShadow _26;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _152;
    float _153;
    _152 = sparseTextureOffsetARB(sampler1DShadow(_8, _26), vec2(TEXCOORD.x, TEXCOORD.w), 1, _153);
    SparseTexel _61 = SparseTexel(_152, _153);
    float _63 = _61._m1;
    _64 _65 = _64(_63, _63, _63, _63, _61._m0);
    uint _154;
    float _155;
    _154 = sparseTextureOffsetARB(sampler1DArrayShadow(_11, _26), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 2, _155);
    SparseTexel _77 = SparseTexel(_154, _155);
    float _80 = _77._m1;
    _64 _81 = _64(_80, _80, _80, _80, _77._m0);
    uint _156;
    float _157;
    _156 = sparseTextureOffsetARB(sampler2DShadow(_14, _26), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), ivec2(1, 2), _157);
    SparseTexel _91 = SparseTexel(_156, _157);
    float _96 = _91._m1;
    _64 _97 = _64(_96, _96, _96, _96, _91._m0);
    uint _158;
    float _159;
    _158 = sparseTextureOffsetARB(sampler2DArrayShadow(_17, _26), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), ivec2(1, 2), _159);
    SparseTexel _107 = SparseTexel(_158, _159);
    float _111 = _107._m1;
    _64 _112 = _64(_111, _111, _111, _111, _107._m0);
    uint _160;
    float _161;
    _160 = sparseTextureARB(samplerCubeShadow(_20, _26), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), _161);
    SparseTexel _122 = SparseTexel(_160, _161);
    float _125 = _122._m1;
    _64 _126 = _64(_125, _125, _125, _125, _122._m0);
    vec4 _137 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w);
    uint _162;
    float _163;
    _162 = sparseTextureARB(samplerCubeArrayShadow(_23, _26), _137, TEXCOORD.w, _163);
    SparseTexel _136 = SparseTexel(_162, _163);
    float _139 = _136._m1;
    _64 _140 = _64(_139, _139, _139, _139, _136._m0);
    float _146 = ((((((((((float(sparseTexelsResidentARB(int(_65._m4))) + _65._m0) + _81._m0) + float(sparseTexelsResidentARB(int(_81._m4)))) + _97._m0) + float(sparseTexelsResidentARB(int(_97._m4)))) + _112._m0) + float(sparseTexelsResidentARB(int(_112._m4)))) + _126._m0) + float(sparseTexelsResidentARB(int(_126._m4)))) + _140._m0) + float(sparseTexelsResidentARB(int(_140._m4)));
    SV_Target.x = _146;
    SV_Target.y = _146;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 152
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %29 %32
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %29 "TEXCOORD"
OpName %32 "SV_Target"
OpName %60 "SparseTexel"
OpName %64 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %29 Location 0
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeSampler
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeVector %5 4
%28 = OpTypePointer Input %27
%29 = OpVariable %28 Input
%30 = OpTypeVector %5 2
%31 = OpTypePointer Output %30
%32 = OpVariable %31 Output
%40 = OpTypePointer Input %5
%42 = OpTypeInt 32 0
%43 = OpConstant %42 0
%46 = OpConstant %42 1
%49 = OpConstant %42 2
%52 = OpConstant %42 3
%54 = OpTypeImage %5 1D 1 0 0 1 Unknown
%55 = OpTypeSampledImage %54
%57 = OpTypeInt 32 1
%58 = OpConstant %57 1
%59 = OpConstant %5 0
%60 = OpTypeStruct %42 %5
%64 = OpTypeStruct %5 %5 %5 %5 %42
%68 = OpTypeBool
%71 = OpConstant %5 1
%73 = OpTypeImage %5 1D 1 1 0 1 Unknown
%74 = OpTypeSampledImage %73
%76 = OpConstant %57 2
%88 = OpTypeImage %5 2D 1 0 0 1 Unknown
%89 = OpTypeSampledImage %88
%93 = OpTypeVector %57 2
%94 = OpConstantComposite %93 %58 %76
%104 = OpTypeImage %5 2D 1 1 0 1 Unknown
%105 = OpTypeSampledImage %104
%108 = OpTypeVector %5 3
%119 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%120 = OpTypeSampledImage %119
%133 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%134 = OpTypeSampledImage %133
%147 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %150
%150 = OpLabel
%33 = OpLoad %21 %23
%34 = OpLoad %18 %20
%35 = OpLoad %15 %17
%36 = OpLoad %12 %14
%37 = OpLoad %9 %11
%38 = OpLoad %6 %8
%39 = OpLoad %24 %26
%41 = OpAccessChain %40 %29 %43
%44 = OpLoad %5 %41
%45 = OpAccessChain %40 %29 %46
%47 = OpLoad %5 %45
%48 = OpAccessChain %40 %29 %49
%50 = OpLoad %5 %48
%51 = OpAccessChain %40 %29 %52
%53 = OpLoad %5 %51
%56 = OpSampledImage %55 %38 %39
%61 = OpImageSparseSampleDrefImplicitLod %60 %56 %44 %53 ConstOffset %58
%62 = OpCompositeExtract %42 %61 0
%63 = OpCompositeExtract %5 %61 1
%65 = OpCompositeConstruct %64 %63 %63 %63 %63 %62
%66 = OpCompositeExtract %5 %65 0
%67 = OpCompositeExtract %42 %65 4
%69 = OpImageSparseTexelsResident %68 %67
%70 = OpSelect %5 %69 %71 %59
%72 = OpFAdd %5 %70 %66
%75 = OpSampledImage %74 %37 %39
%78 = OpCompositeConstruct %30 %44 %47
%77 = OpImageSparseSampleDrefImplicitLod %60 %75 %78 %53 ConstOffset %76
%79 = OpCompositeExtract %42 %77 0
%80 = OpCompositeExtract %5 %77 1
%81 = OpCompositeConstruct %64 %80 %80 %80 %80 %79
%82 = OpCompositeExtract %5 %81 0
%83 = OpCompositeExtract %42 %81 4
%84 = OpImageSparseTexelsResident %68 %83
%85 = OpFAdd %5 %72 %82
%86 = OpSelect %5 %84 %71 %59
%87 = OpFAdd %5 %85 %86
%90 = OpSampledImage %89 %36 %39
%92 = OpCompositeConstruct %30 %44 %47
%91 = OpImageSparseSampleDrefImplicitLod %60 %90 %92 %53 ConstOffset %94
%95 = OpCompositeExtract %42 %91 0
%96 = OpCompositeExtract %5 %91 1
%97 = OpCompositeConstruct %64 %96 %96 %96 %96 %95
%98 = OpCompositeExtract %5 %97 0
%99 = OpCompositeExtract %42 %97 4
%100 = OpImageSparseTexelsResident %68 %99
%101 = OpFAdd %5 %87 %98
%102 = OpSelect %5 %100 %71 %59
%103 = OpFAdd %5 %101 %102
%106 = OpSampledImage %105 %35 %39
%109 = OpCompositeConstruct %108 %44 %47 %50
%107 = OpImageSparseSampleDrefImplicitLod %60 %106 %109 %53 ConstOffset %94
%110 = OpCompositeExtract %42 %107 0
%111 = OpCompositeExtract %5 %107 1
%112 = OpCompositeConstruct %64 %111 %111 %111 %111 %110
%113 = OpCompositeExtract %5 %112 0
%114 = OpCompositeExtract %42 %112 4
%115 = OpImageSparseTexelsResident %68 %114
%116 = OpFAdd %5 %103 %113
%117 = OpSelect %5 %115 %71 %59
%118 = OpFAdd %5 %116 %117
%121 = OpSampledImage %120 %34 %39
%123 = OpCompositeConstruct %108 %44 %47 %50
%122 = OpImageSparseSampleDrefImplicitLod %60 %121 %123 %53 None
%124 = OpCompositeExtract %42 %122 0
%125 = OpCompositeExtract %5 %122 1
%126 = OpCompositeConstruct %64 %125 %125 %125 %125 %124
%127 = OpCompositeExtract %5 %126 0
%128 = OpCompositeExtract %42 %126 4
%129 = OpImageSparseTexelsResident %68 %128
%130 = OpFAdd %5 %118 %127
%131 = OpSelect %5 %129 %71 %59
%132 = OpFAdd %5 %130 %131
%135 = OpSampledImage %134 %33 %39
%137 = OpCompositeConstruct %27 %44 %47 %50 %53
%136 = OpImageSparseSampleDrefImplicitLod %60 %135 %137 %53 None
%138 = OpCompositeExtract %42 %136 0
%139 = OpCompositeExtract %5 %136 1
%140 = OpCompositeConstruct %64 %139 %139 %139 %139 %138
%141 = OpCompositeExtract %5 %140 0
%142 = OpCompositeExtract %42 %140 4
%143 = OpImageSparseTexelsResident %68 %142
%144 = OpFAdd %5 %132 %141
%145 = OpSelect %5 %143 %71 %59
%146 = OpFAdd %5 %144 %145
%148 = OpAccessChain %147 %32 %43
OpStore %148 %146
%149 = OpAccessChain %147 %32 %46
OpStore %149 %146
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-grad-offset-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    float _m1;
};

struct _56
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 0) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) in float DREF;
layout(location = 0) out float SV_Target;

void main()
{
    uint _124;
    float _125;
    _124 = sparseTextureGradOffsetARB(sampler1DShadow(_8, _20), vec2(TEXCOORD.x, DREF), TEXCOORD.z, TEXCOORD.w, 1, _125);
    SparseTexel _53 = SparseTexel(_124, _125);
    float _55 = _53._m1;
    _56 _57 = _56(_55, _55, _55, _55, _53._m0);
    uint _126;
    float _127;
    _126 = sparseTextureGradOffsetARB(sampler1DArrayShadow(_11, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), TEXCOORD.z, TEXCOORD.w, 2, _127);
    SparseTexel _70 = SparseTexel(_126, _127);
    float _74 = _70._m1;
    _56 _75 = _56(_74, _74, _74, _74, _70._m0);
    uint _128;
    float _129;
    _128 = sparseTextureGradOffsetARB(sampler2DShadow(_14, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(-3, -4), _129);
    SparseTexel _87 = SparseTexel(_128, _129);
    float _94 = _87._m1;
    _56 _95 = _56(_94, _94, _94, _94, _87._m0);
    uint _130;
    float _131;
    _130 = sparseTextureGradOffsetARB(sampler2DArrayShadow(_17, _20), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(4, -5), _131);
    SparseTexel _107 = SparseTexel(_130, _131);
    float _114 = _107._m1;
    _56 _115 = _56(_114, _114, _114, _114, _107._m0);
    SV_Target = ((((((float(sparseTexelsResidentARB(int(_57._m4))) + _57._m0) + _75._m0) + float(sparseTexelsResidentARB(int(_75._m4)))) + _95._m0) + float(sparseTexelsResidentARB(int(_95._m4)))) + _115._m0) + float(sparseTexelsResidentARB(int(_115._m4)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 124
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %25 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %25 "DREF"
OpName %27 "SV_Target"
OpName %52 "SparseTexel"
OpName %56 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %23 Location 0
OpDecorate %25 Location 1
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypePointer Input %5
%25 = OpVariable %24 Input
%26 = OpTypePointer Output %5
%27 = OpVariable %26 Output
%35 = OpTypeInt 32 0
%36 = OpConstant %35 0
%39 = OpConstant %35 1
%42 = OpConstant %35 2
%45 = OpConstant %35 3
%47 = OpTypeImage %5 1D 1 0 0 1 Unknown
%48 = OpTypeSampledImage %47
%50 = OpTypeInt 32 1
%51 = OpConstant %50 1
%52 = OpTypeStruct %35 %5
%56 = OpTypeStruct %5 %5 %5 %5 %35
%60 = OpTypeBool
%63 = OpConstant %5 1
%64 = OpConstant %5 0
%66 = OpTypeImage %5 1D 1 1 0 1 Unknown
%67 = OpTypeSampledImage %66
%69 = OpConstant %50 2
%71 = OpTypeVector %5 2
%82 = OpTypeImage %5 2D 1 0 0 1 Unknown
%83 = OpTypeSampledImage %82
%85 = OpConstant %50 -3
%86 = OpConstant %50 -4
%91 = OpTypeVector %50 2
%92 = OpConstantComposite %91 %85 %86
%102 = OpTypeImage %5 2D 1 1 0 1 Unknown
%103 = OpTypeSampledImage %102
%105 = OpConstant %50 4
%106 = OpConstant %50 -5
%108 = OpTypeVector %5 3
%112 = OpConstantComposite %91 %105 %106
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %122
%122 = OpLabel
%28 = OpLoad %15 %17
%29 = OpLoad %12 %14
%30 = OpLoad %9 %11
%31 = OpLoad %6 %8
%32 = OpLoad %18 %20
%33 = OpLoad %5 %25
%34 = OpAccessChain %24 %23 %36
%37 = OpLoad %5 %34
%38 = OpAccessChain %24 %23 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %24 %23 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %24 %23 %45
%46 = OpLoad %5 %44
%49 = OpSampledImage %48 %31 %32
%53 = OpImageSparseSampleDrefExplicitLod %52 %49 %37 %33 Grad|ConstOffset %43 %46 %51
%54 = OpCompositeExtract %35 %53 0
%55 = OpCompositeExtract %5 %53 1
%57 = OpCompositeConstruct %56 %55 %55 %55 %55 %54
%58 = OpCompositeExtract %5 %57 0
%59 = OpCompositeExtract %35 %57 4
%61 = OpImageSparseTexelsResident %60 %59
%62 = OpSelect %5 %61 %63 %64
%65 = OpFAdd %5 %62 %58
%68 = OpSampledImage %67 %30 %32
%72 = OpCompositeConstruct %71 %37 %40
%70 = OpImageSparseSampleDrefExplicitLod %52 %68 %72 %33 Grad|ConstOffset %43 %46 %69
%73 = OpCompositeExtract %35 %70 0
%74 = OpCompositeExtract %5 %70 1
%75 = OpCompositeConstruct %56 %74 %74 %74 %74 %73
%76 = OpCompositeExtract %5 %75 0
%77 = OpCompositeExtract %35 %75 4
%78 = OpImageSparseTexelsResident %60 %77
%79 = OpFAdd %5 %65 %76
%80 = OpSelect %5 %78 %63 %64
%81 = OpFAdd %5 %79 %80
%84 = OpSampledImage %83 %29 %32
%88 = OpCompositeConstruct %71 %37 %40
%89 = OpCompositeConstruct %71 %43 %43
%90 = OpCompositeConstruct %71 %46 %46
%87 = OpImageSparseSampleDrefExplicitLod %52 %84 %88 %33 Grad|ConstOffset %89 %90 %92
%93 = OpCompositeExtract %35 %87 0
%94 = OpCompositeExtract %5 %87 1
%95 = OpCompositeConstruct %56 %94 %94 %94 %94 %93
%96 = OpCompositeExtract %5 %95 0
%97 = OpCompositeExtract %35 %95 4
%98 = OpImageSparseTexelsResident %60 %97
%99 = OpFAdd %5 %81 %96
%100 = OpSelect %5 %98 %63 %64
%101 = OpFAdd %5 %99 %100
%104 = OpSampledImage %103 %28 %32
%109 = OpCompositeConstruct %108 %37 %40 %43
%110 = OpCompositeConstruct %71 %43 %43
%111 = OpCompositeConstruct %71 %46 %46
%107 = OpImageSparseSampleDrefExplicitLod %52 %104 %109 %33 Grad|ConstOffset %110 %111 %112
%113 = OpCompositeExtract %35 %107 0
%114 = OpCompositeExtract %5 %107 1
%115 = OpCompositeConstruct %56 %114 %114 %114 %114 %113
%116 = OpCompositeExtract %5 %115 0
%117 = OpCompositeExtract %35 %115 4
%118 = OpImageSparseTexelsResident %60 %117
%119 = OpFAdd %5 %101 %116
%120 = OpSelect %5 %118 %63 %64
%121 = OpFAdd %5 %119 %120
OpStore %27 %121
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-grad-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 0) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) in float DREF;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = ((vec4(textureGradOffset(sampler1DArrayShadow(_11, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), TEXCOORD.z, TEXCOORD.w, 2)).x + vec4(textureGradOffset(sampler1DShadow(_8, _20), vec2(TEXCOORD.x, DREF), TEXCOORD.z, TEXCOORD.w, 1)).x) + vec4(textureGradOffset(sampler2DShadow(_14, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(-3, -4))).x) + vec4(textureGradOffset(sampler2DArrayShadow(_17, _20), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(4, -5))).x;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %25 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %25 "DREF"
OpName %27 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %23 Location 0
OpDecorate %25 Location 1
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypePointer Input %5
%25 = OpVariable %24 Input
%26 = OpTypePointer Output %5
%27 = OpVariable %26 Output
%35 = OpTypeInt 32 0
%36 = OpConstant %35 0
%39 = OpConstant %35 1
%42 = OpConstant %35 2
%45 = OpConstant %35 3
%47 = OpTypeImage %5 1D 1 0 0 1 Unknown
%48 = OpTypeSampledImage %47
%50 = OpTypeInt 32 1
%51 = OpConstant %50 1
%55 = OpTypeImage %5 1D 1 1 0 1 Unknown
%56 = OpTypeSampledImage %55
%58 = OpConstant %50 2
%60 = OpTypeVector %5 2
%65 = OpTypeImage %5 2D 1 0 0 1 Unknown
%66 = OpTypeSampledImage %65
%68 = OpConstant %50 -3
%69 = OpConstant %50 -4
%74 = OpTypeVector %50 2
%75 = OpConstantComposite %74 %68 %69
%79 = OpTypeImage %5 2D 1 1 0 1 Unknown
%80 = OpTypeSampledImage %79
%82 = OpConstant %50 4
%83 = OpConstant %50 -5
%85 = OpTypeVector %5 3
%89 = OpConstantComposite %74 %82 %83
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %93
%93 = OpLabel
%28 = OpLoad %15 %17
%29 = OpLoad %12 %14
%30 = OpLoad %9 %11
%31 = OpLoad %6 %8
%32 = OpLoad %18 %20
%33 = OpLoad %5 %25
%34 = OpAccessChain %24 %23 %36
%37 = OpLoad %5 %34
%38 = OpAccessChain %24 %23 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %24 %23 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %24 %23 %45
%46 = OpLoad %5 %44
%49 = OpSampledImage %48 %31 %32
%52 = OpImageSampleDrefExplicitLod %5 %49 %37 %33 Grad|ConstOffset %43 %46 %51
%53 = OpCompositeConstruct %21 %52 %52 %52 %52
%54 = OpCompositeExtract %5 %53 0
%57 = OpSampledImage %56 %30 %32
%61 = OpCompositeConstruct %60 %37 %40
%59 = OpImageSampleDrefExplicitLod %5 %57 %61 %33 Grad|ConstOffset %43 %46 %58
%62 = OpCompositeConstruct %21 %59 %59 %59 %59
%63 = OpCompositeExtract %5 %62 0
%64 = OpFAdd %5 %63 %54
%67 = OpSampledImage %66 %29 %32
%71 = OpCompositeConstruct %60 %37 %40
%72 = OpCompositeConstruct %60 %43 %43
%73 = OpCompositeConstruct %60 %46 %46
%70 = OpImageSampleDrefExplicitLod %5 %67 %71 %33 Grad|ConstOffset %72 %73 %75
%76 = OpCompositeConstruct %21 %70 %70 %70 %70
%77 = OpCompositeExtract %5 %76 0
%78 = OpFAdd %5 %64 %77
%81 = OpSampledImage %80 %28 %32
%86 = OpCompositeConstruct %85 %37 %40 %43
%87 = OpCompositeConstruct %60 %43 %43
%88 = OpCompositeConstruct %60 %46 %46
%84 = OpImageSampleDrefExplicitLod %5 %81 %86 %33 Grad|ConstOffset %87 %88 %89
%90 = OpCompositeConstruct %21 %84 %84 %84 %84
%91 = OpCompositeExtract %5 %90 0
%92 = OpFAdd %5 %78 %91
OpStore %27 %92
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-grad.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 5) uniform textureCube _20;
layout(set = 0, binding = 0) uniform samplerShadow _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) in float DREF;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (((vec4(textureGrad(sampler1DArrayShadow(_11, _23), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), TEXCOORD.z, TEXCOORD.w)).x + vec4(textureGrad(sampler1DShadow(_8, _23), vec2(TEXCOORD.x, DREF), TEXCOORD.z, TEXCOORD.w)).x) + vec4(textureGrad(sampler2DShadow(_14, _23), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DREF), vec2(TEXCOORD.z), vec2(TEXCOORD.w))).x) + vec4(textureGrad(sampler2DArrayShadow(_17, _23), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), vec2(TEXCOORD.z), vec2(TEXCOORD.w))).x) + vec4(textureGrad(samplerCubeShadow(_20, _23), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DREF), vec3(TEXCOORD.z), vec3(TEXCOORD.w))).x;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %28 %30
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %28 "DREF"
OpName %30 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %28 Location 1
OpDecorate %30 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypePointer Input %5
%28 = OpVariable %27 Input
%29 = OpTypePointer Output %5
%30 = OpVariable %29 Output
%39 = OpTypeInt 32 0
%40 = OpConstant %39 0
%43 = OpConstant %39 1
%46 = OpConstant %39 2
%49 = OpConstant %39 3
%51 = OpTypeImage %5 1D 1 0 0 1 Unknown
%52 = OpTypeSampledImage %51
%57 = OpTypeImage %5 1D 1 1 0 1 Unknown
%58 = OpTypeSampledImage %57
%61 = OpTypeVector %5 2
%66 = OpTypeImage %5 2D 1 0 0 1 Unknown
%67 = OpTypeSampledImage %66
%76 = OpTypeImage %5 2D 1 1 0 1 Unknown
%77 = OpTypeSampledImage %76
%80 = OpTypeVector %5 3
%87 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%88 = OpTypeSampledImage %87
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %97
%97 = OpLabel
%31 = OpLoad %18 %20
%32 = OpLoad %15 %17
%33 = OpLoad %12 %14
%34 = OpLoad %9 %11
%35 = OpLoad %6 %8
%36 = OpLoad %21 %23
%37 = OpLoad %5 %28
%38 = OpAccessChain %27 %26 %40
%41 = OpLoad %5 %38
%42 = OpAccessChain %27 %26 %43
%44 = OpLoad %5 %42
%45 = OpAccessChain %27 %26 %46
%47 = OpLoad %5 %45
%48 = OpAccessChain %27 %26 %49
%50 = OpLoad %5 %48
%53 = OpSampledImage %52 %35 %36
%54 = OpImageSampleDrefExplicitLod %5 %53 %41 %37 Grad %47 %50
%55 = OpCompositeConstruct %24 %54 %54 %54 %54
%56 = OpCompositeExtract %5 %55 0
%59 = OpSampledImage %58 %34 %36
%62 = OpCompositeConstruct %61 %41 %44
%60 = OpImageSampleDrefExplicitLod %5 %59 %62 %37 Grad %47 %50
%63 = OpCompositeConstruct %24 %60 %60 %60 %60
%64 = OpCompositeExtract %5 %63 0
%65 = OpFAdd %5 %64 %56
%68 = OpSampledImage %67 %33 %36
%70 = OpCompositeConstruct %61 %41 %44
%71 = OpCompositeConstruct %61 %47 %47
%72 = OpCompositeConstruct %61 %50 %50
%69 = OpImageSampleDrefExplicitLod %5 %68 %70 %37 Grad %71 %72
%73 = OpCompositeConstruct %24 %69 %69 %69 %69
%74 = OpCompositeExtract %5 %73 0
%75 = OpFAdd %5 %65 %74
%78 = OpSampledImage %77 %32 %36
%81 = OpCompositeConstruct %80 %41 %44 %47
%82 = OpCompositeConstruct %61 %47 %47
%83 = OpCompositeConstruct %61 %50 %50
%79 = OpImageSampleDrefExplicitLod %5 %78 %81 %37 Grad %82 %83
%84 = OpCompositeConstruct %24 %79 %79 %79 %79
%85 = OpCompositeExtract %5 %84 0
%86 = OpFAdd %5 %75 %85
%89 = OpSampledImage %88 %31 %36
%91 = OpCompositeConstruct %80 %41 %44 %47
%92 = OpCompositeConstruct %80 %47 %47 %47
%93 = OpCompositeConstruct %80 %50 %50 %50
%90 = OpImageSampleDrefExplicitLod %5 %89 %91 %37 Grad %92 %93
%94 = OpCompositeConstruct %24 %90 %90 %90 %90
%95 = OpCompositeExtract %5 %94 0
%96 = OpFAdd %5 %86 %95
OpStore %30 %96
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-level.sm67.noglsl.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 112
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %29 %32
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %29 "TEXCOORD"
OpName %32 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %29 Location 0
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeSampler
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeVector %5 4
%28 = OpTypePointer Input %27
%29 = OpVariable %28 Input
%30 = OpTypeVector %5 2
%31 = OpTypePointer Output %30
%32 = OpVariable %31 Output
%33 = OpTypePointer Input %5
%35 = OpTypeInt 32 0
%36 = OpConstant %35 0
%39 = OpConstant %35 1
%42 = OpConstant %35 2
%45 = OpConstant %35 3
%49 = OpTypeImage %5 1D 1 0 0 1 Unknown
%50 = OpTypeSampledImage %49
%52 = OpConstant %5 0.25
%57 = OpTypeImage %5 1D 1 1 0 1 Unknown
%58 = OpTypeSampledImage %57
%60 = OpConstant %5 0.5
%67 = OpTypeImage %5 2D 1 0 0 1 Unknown
%68 = OpTypeSampledImage %67
%70 = OpConstant %5 1
%77 = OpTypeImage %5 2D 1 1 0 1 Unknown
%78 = OpTypeSampledImage %77
%80 = OpConstant %5 2
%82 = OpTypeVector %5 3
%88 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%89 = OpTypeSampledImage %88
%91 = OpConstant %5 3
%98 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%99 = OpTypeSampledImage %98
%101 = OpConstant %5 4
%107 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %110
%110 = OpLabel
%34 = OpAccessChain %33 %29 %36
%37 = OpLoad %5 %34
%38 = OpAccessChain %33 %29 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %33 %29 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %33 %29 %45
%46 = OpLoad %5 %44
%47 = OpLoad %6 %8
%48 = OpLoad %24 %26
%51 = OpSampledImage %50 %47 %48
%53 = OpImageSampleDrefExplicitLod %5 %51 %37 %46 Lod %52
%54 = OpCompositeConstruct %27 %53 %53 %53 %53
%55 = OpCompositeExtract %5 %54 0
%56 = OpLoad %9 %11
%59 = OpSampledImage %58 %56 %48
%62 = OpCompositeConstruct %30 %37 %40
%61 = OpImageSampleDrefExplicitLod %5 %59 %62 %46 Lod %60
%63 = OpCompositeConstruct %27 %61 %61 %61 %61
%64 = OpCompositeExtract %5 %63 0
%65 = OpFAdd %5 %64 %55
%66 = OpLoad %12 %14
%69 = OpSampledImage %68 %66 %48
%72 = OpCompositeConstruct %30 %37 %40
%71 = OpImageSampleDrefExplicitLod %5 %69 %72 %46 Lod %70
%73 = OpCompositeConstruct %27 %71 %71 %71 %71
%74 = OpCompositeExtract %5 %73 0
%75 = OpFAdd %5 %65 %74
%76 = OpLoad %15 %17
%79 = OpSampledImage %78 %76 %48
%83 = OpCompositeConstruct %82 %37 %40 %43
%81 = OpImageSampleDrefExplicitLod %5 %79 %83 %46 Lod %80
%84 = OpCompositeConstruct %27 %81 %81 %81 %81
%85 = OpCompositeExtract %5 %84 0
%86 = OpFAdd %5 %75 %85
%87 = OpLoad %18 %20
%90 = OpSampledImage %89 %87 %48
%93 = OpCompositeConstruct %82 %37 %40 %43
%92 = OpImageSampleDrefExplicitLod %5 %90 %93 %46 Lod %91
%94 = OpCompositeConstruct %27 %92 %92 %92 %92
%95 = OpCompositeExtract %5 %94 0
%96 = OpFAdd %5 %86 %95
%97 = OpLoad %21 %23
%100 = OpSampledImage %99 %97 %48
%103 = OpCompositeConstruct %27 %37 %40 %43 %46
%102 = OpImageSampleDrefExplicitLod %5 %100 %103 %46 Lod %101
%104 = OpCompositeConstruct %27 %102 %102 %102 %102
%105 = OpCompositeExtract %5 %104 0
%106 = OpFAdd %5 %96 %105
%108 = OpAccessChain %107 %32 %36
OpStore %108 %106
%109 = OpAccessChain %107 %32 %39
OpStore %109 %106
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-levelzero.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 5) uniform textureCube _20;
layout(set = 1, binding = 6) uniform textureCubeArray _23;
layout(set = 0, binding = 0) uniform samplerShadow _26;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _98 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w);
    float _101 = ((((vec4(textureLod(sampler1DArrayShadow(_11, _26), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 0.0)).x + vec4(textureLod(sampler1DShadow(_8, _26), vec2(TEXCOORD.x, TEXCOORD.w), 0.0)).x) + vec4(textureLod(sampler2DShadow(_14, _26), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 0.0)).x) + vec4(textureGrad(sampler2DArrayShadow(_17, _26), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), vec2(0.0), vec2(0.0))).x) + vec4(textureGrad(samplerCubeShadow(_20, _26), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), vec3(0.0), vec3(0.0))).x) + vec4(textureGrad(samplerCubeArrayShadow(_23, _26), _98, TEXCOORD.w, vec3(0.0), vec3(0.0))).x;
    SV_Target.x = _101;
    SV_Target.y = _101;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %29 %32
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %29 "TEXCOORD"
OpName %32 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %29 Location 0
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeSampler
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeVector %5 4
%28 = OpTypePointer Input %27
%29 = OpVariable %28 Input
%30 = OpTypeVector %5 2
%31 = OpTypePointer Output %30
%32 = OpVariable %31 Output
%40 = OpTypePointer Input %5
%42 = OpTypeInt 32 0
%43 = OpConstant %42 0
%46 = OpConstant %42 1
%49 = OpConstant %42 2
%52 = OpConstant %42 3
%54 = OpTypeImage %5 1D 1 0 0 1 Unknown
%55 = OpTypeSampledImage %54
%57 = OpConstant %5 0
%61 = OpTypeImage %5 1D 1 1 0 1 Unknown
%62 = OpTypeSampledImage %61
%69 = OpTypeImage %5 2D 1 0 0 1 Unknown
%70 = OpTypeSampledImage %69
%77 = OpTypeImage %5 2D 1 1 0 1 Unknown
%78 = OpTypeSampledImage %77
%81 = OpTypeVector %5 3
%86 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%87 = OpTypeSampledImage %86
%94 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%95 = OpTypeSampledImage %94
%102 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %105
%105 = OpLabel
%33 = OpLoad %21 %23
%34 = OpLoad %18 %20
%35 = OpLoad %15 %17
%36 = OpLoad %12 %14
%37 = OpLoad %9 %11
%38 = OpLoad %6 %8
%39 = OpLoad %24 %26
%41 = OpAccessChain %40 %29 %43
%44 = OpLoad %5 %41
%45 = OpAccessChain %40 %29 %46
%47 = OpLoad %5 %45
%48 = OpAccessChain %40 %29 %49
%50 = OpLoad %5 %48
%51 = OpAccessChain %40 %29 %52
%53 = OpLoad %5 %51
%56 = OpSampledImage %55 %38 %39
%58 = OpImageSampleDrefExplicitLod %5 %56 %44 %53 Lod %57
%59 = OpCompositeConstruct %27 %58 %58 %58 %58
%60 = OpCompositeExtract %5 %59 0
%63 = OpSampledImage %62 %37 %39
%65 = OpCompositeConstruct %30 %44 %47
%64 = OpImageSampleDrefExplicitLod %5 %63 %65 %53 Lod %57
%66 = OpCompositeConstruct %27 %64 %64 %64 %64
%67 = OpCompositeExtract %5 %66 0
%68 = OpFAdd %5 %67 %60
%71 = OpSampledImage %70 %36 %39
%73 = OpCompositeConstruct %30 %44 %47
%72 = OpImageSampleDrefExplicitLod %5 %71 %73 %53 Lod %57
%74 = OpCompositeConstruct %27 %72 %72 %72 %72
%75 = OpCompositeExtract %5 %74 0
%76 = OpFAdd %5 %68 %75
%79 = OpSampledImage %78 %35 %39
%82 = OpCompositeConstruct %81 %44 %47 %50
%80 = OpImageSampleDrefExplicitLod %5 %79 %82 %53 Lod %57
%83 = OpCompositeConstruct %27 %80 %80 %80 %80
%84 = OpCompositeExtract %5 %83 0
%85 = OpFAdd %5 %76 %84
%88 = OpSampledImage %87 %34 %39
%90 = OpCompositeConstruct %81 %44 %47 %50
%89 = OpImageSampleDrefExplicitLod %5 %88 %90 %53 Lod %57
%91 = OpCompositeConstruct %27 %89 %89 %89 %89
%92 = OpCompositeExtract %5 %91 0
%93 = OpFAdd %5 %85 %92
%96 = OpSampledImage %95 %33 %39
%98 = OpCompositeConstruct %27 %44 %47 %50 %53
%97 = OpImageSampleDrefExplicitLod %5 %96 %98 %53 Lod %57
%99 = OpCompositeConstruct %27 %97 %97 %97 %97
%100 = OpCompositeExtract %5 %99 0
%101 = OpFAdd %5 %93 %100
%103 = OpAccessChain %102 %32 %43
OpStore %103 %101
%104 = OpAccessChain %102 %32 %46
OpStore %104 %101
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-offset-levelzero-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    float _m1;
};

struct _56
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 0) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _121;
    float _122;
    _121 = sparseTextureLodOffsetARB(sampler1DShadow(_8, _20), vec2(TEXCOORD.x, TEXCOORD.w), 0.0, 1, _122);
    SparseTexel _53 = SparseTexel(_121, _122);
    float _55 = _53._m1;
    _56 _57 = _56(_55, _55, _55, _55, _53._m0);
    uint _123;
    float _124;
    _123 = sparseTextureLodOffsetARB(sampler1DArrayShadow(_11, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 0.0, 2, _124);
    SparseTexel _69 = SparseTexel(_123, _124);
    float _72 = _69._m1;
    _56 _73 = _56(_72, _72, _72, _72, _69._m0);
    uint _125;
    float _126;
    _125 = sparseTextureLodOffsetARB(sampler2DShadow(_14, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 0.0, ivec2(-3, -2), _126);
    SparseTexel _85 = SparseTexel(_125, _126);
    float _90 = _85._m1;
    _56 _91 = _56(_90, _90, _90, _90, _85._m0);
    uint _127;
    float _128;
    _127 = sparseTextureGradOffsetARB(sampler2DArrayShadow(_17, _20), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), vec2(0.0), vec2(0.0), ivec2(4, 5), _128);
    SparseTexel _103 = SparseTexel(_127, _128);
    float _108 = _103._m1;
    _56 _109 = _56(_108, _108, _108, _108, _103._m0);
    float _115 = ((((((float(sparseTexelsResidentARB(int(_57._m4))) + _57._m0) + _73._m0) + float(sparseTexelsResidentARB(int(_73._m4)))) + _91._m0) + float(sparseTexelsResidentARB(int(_91._m4)))) + _109._m0) + float(sparseTexelsResidentARB(int(_109._m4)));
    SV_Target.x = _115;
    SV_Target.y = _115;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 121
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %26
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %26 "SV_Target"
OpName %52 "SparseTexel"
OpName %56 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %23 Location 0
OpDecorate %26 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeVector %5 2
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%32 = OpTypePointer Input %5
%34 = OpTypeInt 32 0
%35 = OpConstant %34 0
%38 = OpConstant %34 1
%41 = OpConstant %34 2
%44 = OpConstant %34 3
%46 = OpTypeImage %5 1D 1 0 0 1 Unknown
%47 = OpTypeSampledImage %46
%49 = OpTypeInt 32 1
%50 = OpConstant %49 1
%51 = OpConstant %5 0
%52 = OpTypeStruct %34 %5
%56 = OpTypeStruct %5 %5 %5 %5 %34
%60 = OpTypeBool
%63 = OpConstant %5 1
%65 = OpTypeImage %5 1D 1 1 0 1 Unknown
%66 = OpTypeSampledImage %65
%68 = OpConstant %49 2
%80 = OpTypeImage %5 2D 1 0 0 1 Unknown
%81 = OpTypeSampledImage %80
%83 = OpConstant %49 -3
%84 = OpConstant %49 -2
%87 = OpTypeVector %49 2
%88 = OpConstantComposite %87 %83 %84
%98 = OpTypeImage %5 2D 1 1 0 1 Unknown
%99 = OpTypeSampledImage %98
%101 = OpConstant %49 4
%102 = OpConstant %49 5
%104 = OpTypeVector %5 3
%106 = OpConstantComposite %87 %101 %102
%116 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %119
%119 = OpLabel
%27 = OpLoad %15 %17
%28 = OpLoad %12 %14
%29 = OpLoad %9 %11
%30 = OpLoad %6 %8
%31 = OpLoad %18 %20
%33 = OpAccessChain %32 %23 %35
%36 = OpLoad %5 %33
%37 = OpAccessChain %32 %23 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %32 %23 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %32 %23 %44
%45 = OpLoad %5 %43
%48 = OpSampledImage %47 %30 %31
%53 = OpImageSparseSampleDrefExplicitLod %52 %48 %36 %45 Lod|ConstOffset %51 %50
%54 = OpCompositeExtract %34 %53 0
%55 = OpCompositeExtract %5 %53 1
%57 = OpCompositeConstruct %56 %55 %55 %55 %55 %54
%58 = OpCompositeExtract %5 %57 0
%59 = OpCompositeExtract %34 %57 4
%61 = OpImageSparseTexelsResident %60 %59
%62 = OpSelect %5 %61 %63 %51
%64 = OpFAdd %5 %62 %58
%67 = OpSampledImage %66 %29 %31
%70 = OpCompositeConstruct %24 %36 %39
%69 = OpImageSparseSampleDrefExplicitLod %52 %67 %70 %45 Lod|ConstOffset %51 %68
%71 = OpCompositeExtract %34 %69 0
%72 = OpCompositeExtract %5 %69 1
%73 = OpCompositeConstruct %56 %72 %72 %72 %72 %71
%74 = OpCompositeExtract %5 %73 0
%75 = OpCompositeExtract %34 %73 4
%76 = OpImageSparseTexelsResident %60 %75
%77 = OpFAdd %5 %64 %74
%78 = OpSelect %5 %76 %63 %51
%79 = OpFAdd %5 %77 %78
%82 = OpSampledImage %81 %28 %31
%86 = OpCompositeConstruct %24 %36 %39
%85 = OpImageSparseSampleDrefExplicitLod %52 %82 %86 %45 Lod|ConstOffset %51 %88
%89 = OpCompositeExtract %34 %85 0
%90 = OpCompositeExtract %5 %85 1
%91 = OpCompositeConstruct %56 %90 %90 %90 %90 %89
%92 = OpCompositeExtract %5 %91 0
%93 = OpCompositeExtract %34 %91 4
%94 = OpImageSparseTexelsResident %60 %93
%95 = OpFAdd %5 %79 %92
%96 = OpSelect %5 %94 %63 %51
%97 = OpFAdd %5 %95 %96
%100 = OpSampledImage %99 %27 %31
%105 = OpCompositeConstruct %104 %36 %39 %42
%103 = OpImageSparseSampleDrefExplicitLod %52 %100 %105 %45 Lod|ConstOffset %51 %106
%107 = OpCompositeExtract %34 %103 0
%108 = OpCompositeExtract %5 %103 1
%109 = OpCompositeConstruct %56 %108 %108 %108 %108 %107
%110 = OpCompositeExtract %5 %109 0
%111 = OpCompositeExtract %34 %109 4
%112 = OpImageSparseTexelsResident %60 %111
%113 = OpFAdd %5 %97 %110
%114 = OpSelect %5 %112 %63 %51
%115 = OpFAdd %5 %113 %114
%117 = OpAccessChain %116 %26 %35
OpStore %117 %115
%118 = OpAccessChain %116 %26 %38
OpStore %118 %115
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-offset-levelzero.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 0) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    float _87 = ((vec4(textureLodOffset(sampler1DArrayShadow(_11, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 0.0, 2)).x + vec4(textureLodOffset(sampler1DShadow(_8, _20), vec2(TEXCOORD.x, TEXCOORD.w), 0.0, 1)).x) + vec4(textureLodOffset(sampler2DShadow(_14, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 0.0, ivec2(-3, -2))).x) + vec4(textureGradOffset(sampler2DArrayShadow(_17, _20), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), vec2(0.0), vec2(0.0), ivec2(4, 5))).x;
    SV_Target.x = _87;
    SV_Target.y = _87;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %26
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %26 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %23 Location 0
OpDecorate %26 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeVector %5 2
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%32 = OpTypePointer Input %5
%34 = OpTypeInt 32 0
%35 = OpConstant %34 0
%38 = OpConstant %34 1
%41 = OpConstant %34 2
%44 = OpConstant %34 3
%46 = OpTypeImage %5 1D 1 0 0 1 Unknown
%47 = OpTypeSampledImage %46
%49 = OpTypeInt 32 1
%50 = OpConstant %49 1
%51 = OpConstant %5 0
%55 = OpTypeImage %5 1D 1 1 0 1 Unknown
%56 = OpTypeSampledImage %55
%58 = OpConstant %49 2
%64 = OpTypeImage %5 2D 1 0 0 1 Unknown
%65 = OpTypeSampledImage %64
%67 = OpConstant %49 -3
%68 = OpConstant %49 -2
%71 = OpTypeVector %49 2
%72 = OpConstantComposite %71 %67 %68
%76 = OpTypeImage %5 2D 1 1 0 1 Unknown
%77 = OpTypeSampledImage %76
%79 = OpConstant %49 4
%80 = OpConstant %49 5
%82 = OpTypeVector %5 3
%84 = OpConstantComposite %71 %79 %80
%88 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%27 = OpLoad %15 %17
%28 = OpLoad %12 %14
%29 = OpLoad %9 %11
%30 = OpLoad %6 %8
%31 = OpLoad %18 %20
%33 = OpAccessChain %32 %23 %35
%36 = OpLoad %5 %33
%37 = OpAccessChain %32 %23 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %32 %23 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %32 %23 %44
%45 = OpLoad %5 %43
%48 = OpSampledImage %47 %30 %31
%52 = OpImageSampleDrefExplicitLod %5 %48 %36 %45 Lod|ConstOffset %51 %50
%53 = OpCompositeConstruct %21 %52 %52 %52 %52
%54 = OpCompositeExtract %5 %53 0
%57 = OpSampledImage %56 %29 %31
%60 = OpCompositeConstruct %24 %36 %39
%59 = OpImageSampleDrefExplicitLod %5 %57 %60 %45 Lod|ConstOffset %51 %58
%61 = OpCompositeConstruct %21 %59 %59 %59 %59
%62 = OpCompositeExtract %5 %61 0
%63 = OpFAdd %5 %62 %54
%66 = OpSampledImage %65 %28 %31
%70 = OpCompositeConstruct %24 %36 %39
%69 = OpImageSampleDrefExplicitLod %5 %66 %70 %45 Lod|ConstOffset %51 %72
%73 = OpCompositeConstruct %21 %69 %69 %69 %69
%74 = OpCompositeExtract %5 %73 0
%75 = OpFAdd %5 %63 %74
%78 = OpSampledImage %77 %27 %31
%83 = OpCompositeConstruct %82 %36 %39 %42
%81 = OpImageSampleDrefExplicitLod %5 %78 %83 %45 Lod|ConstOffset %51 %84
%85 = OpCompositeConstruct %21 %81 %81 %81 %81
%86 = OpCompositeExtract %5 %85 0
%87 = OpFAdd %5 %75 %86
%89 = OpAccessChain %88 %26 %35
OpStore %89 %87
%90 = OpAccessChain %88 %26 %38
OpStore %90 %87
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 0, binding = 0) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    float _87 = ((vec4(textureOffset(sampler1DArrayShadow(_11, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), 2)).x + vec4(textureOffset(sampler1DShadow(_8, _20), vec2(TEXCOORD.x, TEXCOORD.w), 1)).x) + vec4(textureOffset(sampler2DShadow(_14, _20), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w), ivec2(-3, -2))).x) + vec4(textureOffset(sampler2DArrayShadow(_17, _20), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w), ivec2(4, 5))).x;
    SV_Target.x = _87;
    SV_Target.y = _87;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %26
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %26 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %23 Location 0
OpDecorate %26 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeVector %5 2
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%32 = OpTypePointer Input %5
%34 = OpTypeInt 32 0
%35 = OpConstant %34 0
%38 = OpConstant %34 1
%41 = OpConstant %34 2
%44 = OpConstant %34 3
%46 = OpTypeImage %5 1D 1 0 0 1 Unknown
%47 = OpTypeSampledImage %46
%49 = OpTypeInt 32 1
%50 = OpConstant %49 1
%51 = OpConstant %5 0
%55 = OpTypeImage %5 1D 1 1 0 1 Unknown
%56 = OpTypeSampledImage %55
%58 = OpConstant %49 2
%64 = OpTypeImage %5 2D 1 0 0 1 Unknown
%65 = OpTypeSampledImage %64
%67 = OpConstant %49 -3
%68 = OpConstant %49 -2
%71 = OpTypeVector %49 2
%72 = OpConstantComposite %71 %67 %68
%76 = OpTypeImage %5 2D 1 1 0 1 Unknown
%77 = OpTypeSampledImage %76
%79 = OpConstant %49 4
%80 = OpConstant %49 5
%82 = OpTypeVector %5 3
%84 = OpConstantComposite %71 %79 %80
%88 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%27 = OpLoad %15 %17
%28 = OpLoad %12 %14
%29 = OpLoad %9 %11
%30 = OpLoad %6 %8
%31 = OpLoad %18 %20
%33 = OpAccessChain %32 %23 %35
%36 = OpLoad %5 %33
%37 = OpAccessChain %32 %23 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %32 %23 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %32 %23 %44
%45 = OpLoad %5 %43
%48 = OpSampledImage %47 %30 %31
%52 = OpImageSampleDrefImplicitLod %5 %48 %36 %45 ConstOffset %50
%53 = OpCompositeConstruct %21 %52 %52 %52 %52
%54 = OpCompositeExtract %5 %53 0
%57 = OpSampledImage %56 %29 %31
%60 = OpCompositeConstruct %24 %36 %39
%59 = OpImageSampleDrefImplicitLod %5 %57 %60 %45 ConstOffset %58
%61 = OpCompositeConstruct %21 %59 %59 %59 %59
%62 = OpCompositeExtract %5 %61 0
%63 = OpFAdd %5 %62 %54
%66 = OpSampledImage %65 %28 %31
%70 = OpCompositeConstruct %24 %36 %39
%69 = OpImageSampleDrefImplicitLod %5 %66 %70 %45 ConstOffset %72
%73 = OpCompositeConstruct %21 %69 %69 %69 %69
%74 = OpCompositeExtract %5 %73 0
%75 = OpFAdd %5 %63 %74
%78 = OpSampledImage %77 %27 %31
%83 = OpCompositeConstruct %82 %36 %39 %42
%81 = OpImageSampleDrefImplicitLod %5 %78 %83 %45 ConstOffset %84
%85 = OpCompositeConstruct %21 %81 %81 %81 %81
%86 = OpCompositeExtract %5 %85 0
%87 = OpFAdd %5 %75 %86
%89 = OpAccessChain %88 %26 %35
OpStore %89 %87
%90 = OpAccessChain %88 %26 %38
OpStore %90 %87
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-cmp.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 5) uniform textureCube _20;
layout(set = 1, binding = 6) uniform textureCubeArray _23;
layout(set = 0, binding = 0) uniform samplerShadow _26;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _98 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w);
    float _101 = ((((vec4(texture(sampler1DArrayShadow(_11, _26), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w))).x + vec4(texture(sampler1DShadow(_8, _26), vec2(TEXCOORD.x, TEXCOORD.w))).x) + vec4(texture(sampler2DShadow(_14, _26), vec3(vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w))).x) + vec4(texture(sampler2DArrayShadow(_17, _26), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w))).x) + vec4(texture(samplerCubeShadow(_20, _26), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w))).x) + vec4(texture(samplerCubeArrayShadow(_23, _26), _98, TEXCOORD.w)).x;
    SV_Target.x = _101;
    SV_Target.y = _101;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %29 %32
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %29 "TEXCOORD"
OpName %32 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %29 Location 0
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeSampler
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeVector %5 4
%28 = OpTypePointer Input %27
%29 = OpVariable %28 Input
%30 = OpTypeVector %5 2
%31 = OpTypePointer Output %30
%32 = OpVariable %31 Output
%40 = OpTypePointer Input %5
%42 = OpTypeInt 32 0
%43 = OpConstant %42 0
%46 = OpConstant %42 1
%49 = OpConstant %42 2
%52 = OpConstant %42 3
%54 = OpTypeImage %5 1D 1 0 0 1 Unknown
%55 = OpTypeSampledImage %54
%57 = OpConstant %5 0
%61 = OpTypeImage %5 1D 1 1 0 1 Unknown
%62 = OpTypeSampledImage %61
%69 = OpTypeImage %5 2D 1 0 0 1 Unknown
%70 = OpTypeSampledImage %69
%77 = OpTypeImage %5 2D 1 1 0 1 Unknown
%78 = OpTypeSampledImage %77
%81 = OpTypeVector %5 3
%86 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%87 = OpTypeSampledImage %86
%94 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%95 = OpTypeSampledImage %94
%102 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %105
%105 = OpLabel
%33 = OpLoad %21 %23
%34 = OpLoad %18 %20
%35 = OpLoad %15 %17
%36 = OpLoad %12 %14
%37 = OpLoad %9 %11
%38 = OpLoad %6 %8
%39 = OpLoad %24 %26
%41 = OpAccessChain %40 %29 %43
%44 = OpLoad %5 %41
%45 = OpAccessChain %40 %29 %46
%47 = OpLoad %5 %45
%48 = OpAccessChain %40 %29 %49
%50 = OpLoad %5 %48
%51 = OpAccessChain %40 %29 %52
%53 = OpLoad %5 %51
%56 = OpSampledImage %55 %38 %39
%58 = OpImageSampleDrefImplicitLod %5 %56 %44 %53 None
%59 = OpCompositeConstruct %27 %58 %58 %58 %58
%60 = OpCompositeExtract %5 %59 0
%63 = OpSampledImage %62 %37 %39
%65 = OpCompositeConstruct %30 %44 %47
%64 = OpImageSampleDrefImplicitLod %5 %63 %65 %53 None
%66 = OpCompositeConstruct %27 %64 %64 %64 %64
%67 = OpCompositeExtract %5 %66 0
%68 = OpFAdd %5 %67 %60
%71 = OpSampledImage %70 %36 %39
%73 = OpCompositeConstruct %30 %44 %47
%72 = OpImageSampleDrefImplicitLod %5 %71 %73 %53 None
%74 = OpCompositeConstruct %27 %72 %72 %72 %72
%75 = OpCompositeExtract %5 %74 0
%76 = OpFAdd %5 %68 %75
%79 = OpSampledImage %78 %35 %39
%82 = OpCompositeConstruct %81 %44 %47 %50
%80 = OpImageSampleDrefImplicitLod %5 %79 %82 %53 None
%83 = OpCompositeConstruct %27 %80 %80 %80 %80
%84 = OpCompositeExtract %5 %83 0
%85 = OpFAdd %5 %76 %84
%88 = OpSampledImage %87 %34 %39
%90 = OpCompositeConstruct %81 %44 %47 %50
%89 = OpImageSampleDrefImplicitLod %5 %88 %90 %53 None
%91 = OpCompositeConstruct %27 %89 %89 %89 %89
%92 = OpCompositeExtract %5 %91 0
%93 = OpFAdd %5 %85 %92
%96 = OpSampledImage %95 %33 %39
%98 = OpCompositeConstruct %27 %44 %47 %50 %53
%97 = OpImageSampleDrefImplicitLod %5 %96 %98 %53 None
%99 = OpCompositeConstruct %27 %97 %97 %97 %97
%100 = OpCompositeExtract %5 %99 0
%101 = OpFAdd %5 %93 %100
%103 = OpAccessChain %102 %32 %43
OpStore %103 %101
%104 = OpAccessChain %102 %32 %46
OpStore %104 %101
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-grad-offset-dynamic.noglsl.invalid.sm67.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 136
; Schema: 0
OpCapability Shader
OpCapability ImageGatherExtended
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %25 ""
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %24 ArrayStride 16
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeInt 32 0
%22 = OpConstant %21 2
%23 = OpTypeVector %5 4
%24 = OpTypeArray %23 %22
%25 = OpTypeStruct %24
%26 = OpTypePointer Uniform %25
%27 = OpVariable %26 Uniform
%28 = OpTypeSampler
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypePointer Input %23
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%36 = OpTypePointer Input %5
%38 = OpConstant %21 0
%41 = OpConstant %21 1
%46 = OpConstant %21 3
%48 = OpTypePointer Uniform %23
%51 = OpTypeVector %21 4
%56 = OpTypeSampledImage %6
%58 = OpTypeInt 32 1
%62 = OpConstant %58 0
%63 = OpConstant %58 4
%67 = OpTypeSampledImage %9
%77 = OpTypeSampledImage %12
%85 = OpTypeVector %58 2
%91 = OpTypeSampledImage %15
%96 = OpTypeVector %5 3
%110 = OpTypeSampledImage %18
%119 = OpTypeVector %58 3
%131 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %134
%134 = OpLabel
%37 = OpAccessChain %36 %32 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %36 %32 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %36 %32 %22
%44 = OpLoad %5 %43
%45 = OpAccessChain %36 %32 %46
%47 = OpLoad %5 %45
%49 = OpAccessChain %48 %27 %38 %38
%50 = OpLoad %23 %49
%52 = OpBitcast %51 %50
%53 = OpCompositeExtract %21 %52 0
%54 = OpLoad %6 %8
%55 = OpLoad %28 %30
%57 = OpSampledImage %56 %54 %55
%59 = OpBitcast %58 %53
%61 = OpBitFieldSExtract %58 %59 %62 %63
%60 = OpImageSampleExplicitLod %23 %57 %39 Grad|Offset %44 %47 %61
%64 = OpCompositeExtract %5 %60 0
%65 = OpCompositeExtract %5 %60 1
%66 = OpLoad %9 %11
%68 = OpSampledImage %67 %66 %55
%69 = OpBitcast %58 %53
%71 = OpCompositeConstruct %33 %39 %42
%72 = OpBitFieldSExtract %58 %69 %62 %63
%70 = OpImageSampleExplicitLod %23 %68 %71 Grad|Offset %44 %42 %72
%73 = OpCompositeExtract %5 %70 0
%74 = OpCompositeExtract %21 %52 1
%75 = OpCompositeExtract %21 %52 2
%76 = OpLoad %12 %14
%78 = OpSampledImage %77 %76 %55
%79 = OpBitcast %58 %74
%80 = OpBitcast %58 %75
%82 = OpCompositeConstruct %33 %39 %42
%83 = OpCompositeConstruct %33 %44 %44
%84 = OpCompositeConstruct %33 %47 %47
%86 = OpCompositeConstruct %85 %79 %80
%87 = OpBitFieldSExtract %85 %86 %62 %63
%81 = OpImageSampleExplicitLod %23 %78 %82 Grad|Offset %83 %84 %87
%88 = OpCompositeExtract %5 %81 0
%89 = OpCompositeExtract %5 %81 1
%90 = OpLoad %15 %17
%92 = OpSampledImage %91 %90 %55
%93 = OpBitcast %58 %74
%94 = OpBitcast %58 %75
%97 = OpCompositeConstruct %96 %39 %42 %44
%98 = OpCompositeConstruct %33 %44 %44
%99 = OpCompositeConstruct %33 %47 %47
%100 = OpCompositeConstruct %85 %93 %94
%101 = OpBitFieldSExtract %85 %100 %62 %63
%95 = OpImageSampleExplicitLod %23 %92 %97 Grad|Offset %98 %99 %101
%102 = OpCompositeExtract %5 %95 0
%103 = OpAccessChain %48 %27 %38 %41
%104 = OpLoad %23 %103
%105 = OpBitcast %51 %104
%106 = OpCompositeExtract %21 %105 0
%107 = OpCompositeExtract %21 %105 1
%108 = OpCompositeExtract %21 %105 2
%109 = OpLoad %18 %20
%111 = OpSampledImage %110 %109 %55
%112 = OpBitcast %58 %106
%113 = OpBitcast %58 %107
%114 = OpBitcast %58 %108
%116 = OpCompositeConstruct %96 %39 %42 %44
%117 = OpCompositeConstruct %96 %44 %44 %44
%118 = OpCompositeConstruct %96 %47 %47 %47
%120 = OpCompositeConstruct %119 %112 %113 %114
%121 = OpBitFieldSExtract %119 %120 %62 %63
%115 = OpImageSampleExplicitLod %23 %111 %116 Grad|Offset %117 %118 %121
%122 = OpCompositeExtract %5 %115 0
%123 = OpCompositeExtract %5 %115 1
%124 = OpFAdd %5 %102 %73
%125 = OpFAdd %5 %88 %64
%126 = OpFAdd %5 %125 %124
%127 = OpFAdd %5 %126 %122
%128 = OpFAdd %5 %89 %65
%129 = OpFAdd %5 %128 %124
%130 = OpFAdd %5 %129 %123
%132 = OpAccessChain %131 %35 %38
OpStore %132 %127
%133 = OpAccessChain %131 %35 %41
OpStore %133 %130
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/sample-grad-offset-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _62
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _175;
    vec4 _176;
    _175 = sparseTextureGradOffsetARB(sampler1D(_8, _23), TEXCOORD.x, TEXCOORD.z, TEXCOORD.w, 1, _176);
    SparseTexel _55 = SparseTexel(_175, _176);
    vec4 _57 = _55._m1;
    _62 _63 = _62(_57.x, _57.y, _57.z, _57.w, _55._m0);
    uint _177;
    vec4 _178;
    _177 = sparseTextureGradOffsetARB(sampler1DArray(_11, _23), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.z, TEXCOORD.y, 2, _178);
    SparseTexel _75 = SparseTexel(_177, _178);
    vec4 _78 = _75._m1;
    _62 _83 = _62(_78.x, _78.y, _78.z, _78.w, _75._m0);
    uint _179;
    vec4 _180;
    _179 = sparseTextureGradOffsetARB(sampler2D(_14, _23), vec2(TEXCOORD.x, TEXCOORD.y), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(-3, -4), _180);
    SparseTexel _92 = SparseTexel(_179, _180);
    vec4 _99 = _92._m1;
    _62 _104 = _62(_99.x, _99.y, _99.z, _99.w, _92._m0);
    float _109 = float(sparseTexelsResidentARB(int(_104._m4)));
    uint _181;
    vec4 _182;
    _181 = sparseTextureGradOffsetARB(sampler2DArray(_17, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(4, -5), _182);
    SparseTexel _114 = SparseTexel(_181, _182);
    vec4 _121 = _114._m1;
    _62 _126 = _62(_121.x, _121.y, _121.z, _121.w, _114._m0);
    float _127 = _126._m0;
    float _130 = float(sparseTexelsResidentARB(int(_126._m4)));
    uint _183;
    vec4 _184;
    _183 = sparseTextureGradOffsetARB(sampler3D(_20, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(TEXCOORD.z), vec3(TEXCOORD.w), ivec3(5, 6, 7), _184);
    SparseTexel _136 = SparseTexel(_183, _184);
    vec4 _143 = _136._m1;
    _62 _148 = _62(_143.x, _143.y, _143.z, _143.w, _136._m0);
    float _153 = float(sparseTexelsResidentARB(int(_148._m4)));
    float _155 = float(sparseTexelsResidentARB(int(_83._m4))) + (_83._m0 + float(sparseTexelsResidentARB(int(_63._m4))));
    SV_Target.x = ((((((_155 + _63._m0) + _104._m0) + _109) + _127) + _130) + _148._m0) + _153;
    SV_Target.y = ((((((_155 + _63._m1) + _104._m1) + _109) + _127) + _130) + _148._m1) + _153;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 175
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %29 "SV_Target"
OpName %54 "SparseTexel"
OpName %62 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypeVector %5 2
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%36 = OpTypePointer Input %5
%38 = OpTypeInt 32 0
%39 = OpConstant %38 0
%42 = OpConstant %38 1
%45 = OpConstant %38 2
%48 = OpConstant %38 3
%50 = OpTypeSampledImage %6
%52 = OpTypeInt 32 1
%53 = OpConstant %52 1
%54 = OpTypeStruct %38 %24
%62 = OpTypeStruct %5 %5 %5 %5 %38
%67 = OpTypeBool
%70 = OpConstant %5 1
%71 = OpConstant %5 0
%72 = OpTypeSampledImage %9
%74 = OpConstant %52 2
%88 = OpTypeSampledImage %12
%90 = OpConstant %52 -3
%91 = OpConstant %52 -4
%96 = OpTypeVector %52 2
%97 = OpConstantComposite %96 %90 %91
%110 = OpTypeSampledImage %15
%112 = OpConstant %52 4
%113 = OpConstant %52 -5
%115 = OpTypeVector %5 3
%119 = OpConstantComposite %96 %112 %113
%131 = OpTypeSampledImage %18
%133 = OpConstant %52 5
%134 = OpConstant %52 6
%135 = OpConstant %52 7
%140 = OpTypeVector %52 3
%141 = OpConstantComposite %140 %133 %134 %135
%170 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %173
%173 = OpLabel
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %12 %14
%33 = OpLoad %9 %11
%34 = OpLoad %6 %8
%35 = OpLoad %21 %23
%37 = OpAccessChain %36 %26 %39
%40 = OpLoad %5 %37
%41 = OpAccessChain %36 %26 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %36 %26 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %36 %26 %48
%49 = OpLoad %5 %47
%51 = OpSampledImage %50 %34 %35
%55 = OpImageSparseSampleExplicitLod %54 %51 %40 Grad|ConstOffset %46 %49 %53
%56 = OpCompositeExtract %38 %55 0
%57 = OpCompositeExtract %24 %55 1
%58 = OpCompositeExtract %5 %57 0
%59 = OpCompositeExtract %5 %57 1
%60 = OpCompositeExtract %5 %57 2
%61 = OpCompositeExtract %5 %57 3
%63 = OpCompositeConstruct %62 %58 %59 %60 %61 %56
%64 = OpCompositeExtract %5 %63 0
%65 = OpCompositeExtract %5 %63 1
%66 = OpCompositeExtract %38 %63 4
%68 = OpImageSparseTexelsResident %67 %66
%69 = OpSelect %5 %68 %70 %71
%73 = OpSampledImage %72 %33 %35
%76 = OpCompositeConstruct %27 %40 %43
%75 = OpImageSparseSampleExplicitLod %54 %73 %76 Grad|ConstOffset %46 %43 %74
%77 = OpCompositeExtract %38 %75 0
%78 = OpCompositeExtract %24 %75 1
%79 = OpCompositeExtract %5 %78 0
%80 = OpCompositeExtract %5 %78 1
%81 = OpCompositeExtract %5 %78 2
%82 = OpCompositeExtract %5 %78 3
%83 = OpCompositeConstruct %62 %79 %80 %81 %82 %77
%84 = OpCompositeExtract %5 %83 0
%85 = OpCompositeExtract %38 %83 4
%86 = OpImageSparseTexelsResident %67 %85
%87 = OpSelect %5 %86 %70 %71
%89 = OpSampledImage %88 %32 %35
%93 = OpCompositeConstruct %27 %40 %43
%94 = OpCompositeConstruct %27 %46 %46
%95 = OpCompositeConstruct %27 %49 %49
%92 = OpImageSparseSampleExplicitLod %54 %89 %93 Grad|ConstOffset %94 %95 %97
%98 = OpCompositeExtract %38 %92 0
%99 = OpCompositeExtract %24 %92 1
%100 = OpCompositeExtract %5 %99 0
%101 = OpCompositeExtract %5 %99 1
%102 = OpCompositeExtract %5 %99 2
%103 = OpCompositeExtract %5 %99 3
%104 = OpCompositeConstruct %62 %100 %101 %102 %103 %98
%105 = OpCompositeExtract %5 %104 0
%106 = OpCompositeExtract %5 %104 1
%107 = OpCompositeExtract %38 %104 4
%108 = OpImageSparseTexelsResident %67 %107
%109 = OpSelect %5 %108 %70 %71
%111 = OpSampledImage %110 %31 %35
%116 = OpCompositeConstruct %115 %40 %43 %46
%117 = OpCompositeConstruct %27 %46 %46
%118 = OpCompositeConstruct %27 %49 %49
%114 = OpImageSparseSampleExplicitLod %54 %111 %116 Grad|ConstOffset %117 %118 %119
%120 = OpCompositeExtract %38 %114 0
%121 = OpCompositeExtract %24 %114 1
%122 = OpCompositeExtract %5 %121 0
%123 = OpCompositeExtract %5 %121 1
%124 = OpCompositeExtract %5 %121 2
%125 = OpCompositeExtract %5 %121 3
%126 = OpCompositeConstruct %62 %122 %123 %124 %125 %120
%127 = OpCompositeExtract %5 %126 0
%128 = OpCompositeExtract %38 %126 4
%129 = OpImageSparseTexelsResident %67 %128
%130 = OpSelect %5 %129 %70 %71
%132 = OpSampledImage %131 %30 %35
%137 = OpCompositeConstruct %115 %40 %43 %46
%138 = OpCompositeConstruct %115 %46 %46 %46
%139 = OpCompositeConstruct %115 %49 %49 %49
%136 = OpImageSparseSampleExplicitLod %54 %132 %137 Grad|ConstOffset %138 %139 %141
%142 = OpCompositeExtract %38 %136 0
%143 = OpCompositeExtract %24 %136 1
%144 = OpCompositeExtract %5 %143 0
%145 = OpCompositeExtract %5 %143 1
%146 = OpCompositeExtract %5 %143 2
%147 = OpCompositeExtract %5 %143 3
%148 = OpCompositeConstruct %62 %144 %145 %146 %147 %142
%149 = OpCompositeExtract %5 %148 0
%150 = OpCompositeExtract %5 %148 1
%151 = OpCompositeExtract %38 %148 4
%152 = OpImageSparseTexelsResident %67 %151
%153 = OpSelect %5 %152 %70 %71
%154 = OpFAdd %5 %84 %69
%155 = OpFAdd %5 %87 %154
%156 = OpFAdd %5 %155 %64
%157 = OpFAdd %5 %156 %105
%158 = OpFAdd %5 %157 %109
%159 = OpFAdd %5 %158 %127
%160 = OpFAdd %5 %159 %130
%161 = OpFAdd %5 %160 %149
%162 = OpFAdd %5 %161 %153
%163 = OpFAdd %5 %155 %65
%164 = OpFAdd %5 %163 %106
%165 = OpFAdd %5 %164 %109
%166 = OpFAdd %5 %165 %127
%167 = OpFAdd %5 %166 %130
%168 = OpFAdd %5 %167 %150
%169 = OpFAdd %5 %168 %153
%171 = OpAccessChain %170 %29 %39
OpStore %171 %162
%172 = OpAccessChain %170 %29 %42
OpStore %172 %169
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-grad-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _54 = textureGradOffset(sampler1D(_8, _23), TEXCOORD.x, TEXCOORD.z, TEXCOORD.w, 1);
    vec4 _67 = textureGradOffset(sampler2D(_14, _23), vec2(TEXCOORD.x, TEXCOORD.y), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(-3, -4));
    vec4 _91 = textureGradOffset(sampler3D(_20, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(TEXCOORD.z), vec3(TEXCOORD.w), ivec3(5, 6, 7));
    float _99 = textureGradOffset(sampler2DArray(_17, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec2(TEXCOORD.z), vec2(TEXCOORD.w), ivec2(4, -5)).x + textureGradOffset(sampler1DArray(_11, _23), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.z, TEXCOORD.y, 2).x;
    SV_Target.x = ((_67.x + _54.x) + _99) + _91.x;
    SV_Target.y = ((_67.y + _54.y) + _99) + _91.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 111
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypeVector %5 2
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%36 = OpTypePointer Input %5
%38 = OpTypeInt 32 0
%39 = OpConstant %38 0
%42 = OpConstant %38 1
%45 = OpConstant %38 2
%48 = OpConstant %38 3
%50 = OpTypeSampledImage %6
%52 = OpTypeInt 32 1
%53 = OpConstant %52 1
%57 = OpTypeSampledImage %9
%59 = OpConstant %52 2
%63 = OpTypeSampledImage %12
%65 = OpConstant %52 -3
%66 = OpConstant %52 -4
%71 = OpTypeVector %52 2
%72 = OpConstantComposite %71 %65 %66
%75 = OpTypeSampledImage %15
%77 = OpConstant %52 4
%78 = OpConstant %52 -5
%80 = OpTypeVector %5 3
%84 = OpConstantComposite %71 %77 %78
%86 = OpTypeSampledImage %18
%88 = OpConstant %52 5
%89 = OpConstant %52 6
%90 = OpConstant %52 7
%95 = OpTypeVector %52 3
%96 = OpConstantComposite %95 %88 %89 %90
%106 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %109
%109 = OpLabel
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %12 %14
%33 = OpLoad %9 %11
%34 = OpLoad %6 %8
%35 = OpLoad %21 %23
%37 = OpAccessChain %36 %26 %39
%40 = OpLoad %5 %37
%41 = OpAccessChain %36 %26 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %36 %26 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %36 %26 %48
%49 = OpLoad %5 %47
%51 = OpSampledImage %50 %34 %35
%54 = OpImageSampleExplicitLod %24 %51 %40 Grad|ConstOffset %46 %49 %53
%55 = OpCompositeExtract %5 %54 0
%56 = OpCompositeExtract %5 %54 1
%58 = OpSampledImage %57 %33 %35
%61 = OpCompositeConstruct %27 %40 %43
%60 = OpImageSampleExplicitLod %24 %58 %61 Grad|ConstOffset %46 %43 %59
%62 = OpCompositeExtract %5 %60 0
%64 = OpSampledImage %63 %32 %35
%68 = OpCompositeConstruct %27 %40 %43
%69 = OpCompositeConstruct %27 %46 %46
%70 = OpCompositeConstruct %27 %49 %49
%67 = OpImageSampleExplicitLod %24 %64 %68 Grad|ConstOffset %69 %70 %72
%73 = OpCompositeExtract %5 %67 0
%74 = OpCompositeExtract %5 %67 1
%76 = OpSampledImage %75 %31 %35
%81 = OpCompositeConstruct %80 %40 %43 %46
%82 = OpCompositeConstruct %27 %46 %46
%83 = OpCompositeConstruct %27 %49 %49
%79 = OpImageSampleExplicitLod %24 %76 %81 Grad|ConstOffset %82 %83 %84
%85 = OpCompositeExtract %5 %79 0
%87 = OpSampledImage %86 %30 %35
%92 = OpCompositeConstruct %80 %40 %43 %46
%93 = OpCompositeConstruct %80 %46 %46 %46
%94 = OpCompositeConstruct %80 %49 %49 %49
%91 = OpImageSampleExplicitLod %24 %87 %92 Grad|ConstOffset %93 %94 %96
%97 = OpCompositeExtract %5 %91 0
%98 = OpCompositeExtract %5 %91 1
%99 = OpFAdd %5 %85 %62
%100 = OpFAdd %5 %73 %55
%101 = OpFAdd %5 %100 %99
%102 = OpFAdd %5 %101 %97
%103 = OpFAdd %5 %74 %56
%104 = OpFAdd %5 %103 %99
%105 = OpFAdd %5 %104 %98
%107 = OpAccessChain %106 %29 %39
OpStore %107 %102
%108 = OpAccessChain %106 %29 %42
OpStore %108 %105
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-grad.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 1, binding = 5) uniform textureCube _23;
layout(set = 1, binding = 6) uniform textureCubeArray _26;
layout(set = 0, binding = 0) uniform sampler _29;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _60 = textureGrad(sampler1D(_8, _29), TEXCOORD.x, TEXCOORD.z, TEXCOORD.w);
    vec4 _70 = textureGrad(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y), vec2(TEXCOORD.z), vec2(TEXCOORD.w));
    vec4 _86 = textureGrad(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(TEXCOORD.z), vec3(TEXCOORD.w));
    vec4 _94 = textureGrad(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(TEXCOORD.z), vec3(TEXCOORD.w));
    float _108 = textureGrad(samplerCubeArray(_26, _29), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), vec3(TEXCOORD.z), vec3(TEXCOORD.w)).x + (textureGrad(sampler2DArray(_17, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec2(TEXCOORD.z), vec2(TEXCOORD.w)).x + textureGrad(sampler1DArray(_11, _29), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.z, TEXCOORD.y).x);
    SV_Target.x = (((_108 + _60.x) + _70.x) + _86.x) + _94.x;
    SV_Target.y = (((_108 + _60.y) + _70.y) + _86.y) + _94.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 122
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeSampler
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeVector %5 4
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%44 = OpTypePointer Input %5
%46 = OpTypeInt 32 0
%47 = OpConstant %46 0
%50 = OpConstant %46 1
%53 = OpConstant %46 2
%56 = OpConstant %46 3
%58 = OpTypeSampledImage %6
%63 = OpTypeSampledImage %9
%68 = OpTypeSampledImage %12
%76 = OpTypeSampledImage %15
%79 = OpTypeVector %5 3
%84 = OpTypeSampledImage %18
%92 = OpTypeSampledImage %21
%100 = OpTypeSampledImage %24
%117 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %120
%120 = OpLabel
%36 = OpLoad %24 %26
%37 = OpLoad %21 %23
%38 = OpLoad %18 %20
%39 = OpLoad %15 %17
%40 = OpLoad %12 %14
%41 = OpLoad %9 %11
%42 = OpLoad %6 %8
%43 = OpLoad %27 %29
%45 = OpAccessChain %44 %32 %47
%48 = OpLoad %5 %45
%49 = OpAccessChain %44 %32 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %44 %32 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %44 %32 %56
%57 = OpLoad %5 %55
%59 = OpSampledImage %58 %42 %43
%60 = OpImageSampleExplicitLod %30 %59 %48 Grad %54 %57
%61 = OpCompositeExtract %5 %60 0
%62 = OpCompositeExtract %5 %60 1
%64 = OpSampledImage %63 %41 %43
%66 = OpCompositeConstruct %33 %48 %51
%65 = OpImageSampleExplicitLod %30 %64 %66 Grad %54 %51
%67 = OpCompositeExtract %5 %65 0
%69 = OpSampledImage %68 %40 %43
%71 = OpCompositeConstruct %33 %48 %51
%72 = OpCompositeConstruct %33 %54 %54
%73 = OpCompositeConstruct %33 %57 %57
%70 = OpImageSampleExplicitLod %30 %69 %71 Grad %72 %73
%74 = OpCompositeExtract %5 %70 0
%75 = OpCompositeExtract %5 %70 1
%77 = OpSampledImage %76 %39 %43
%80 = OpCompositeConstruct %79 %48 %51 %54
%81 = OpCompositeConstruct %33 %54 %54
%82 = OpCompositeConstruct %33 %57 %57
%78 = OpImageSampleExplicitLod %30 %77 %80 Grad %81 %82
%83 = OpCompositeExtract %5 %78 0
%85 = OpSampledImage %84 %38 %43
%87 = OpCompositeConstruct %79 %48 %51 %54
%88 = OpCompositeConstruct %79 %54 %54 %54
%89 = OpCompositeConstruct %79 %57 %57 %57
%86 = OpImageSampleExplicitLod %30 %85 %87 Grad %88 %89
%90 = OpCompositeExtract %5 %86 0
%91 = OpCompositeExtract %5 %86 1
%93 = OpSampledImage %92 %37 %43
%95 = OpCompositeConstruct %79 %48 %51 %54
%96 = OpCompositeConstruct %79 %54 %54 %54
%97 = OpCompositeConstruct %79 %57 %57 %57
%94 = OpImageSampleExplicitLod %30 %93 %95 Grad %96 %97
%98 = OpCompositeExtract %5 %94 0
%99 = OpCompositeExtract %5 %94 1
%101 = OpSampledImage %100 %36 %43
%103 = OpCompositeConstruct %30 %48 %51 %54 %57
%104 = OpCompositeConstruct %79 %54 %54 %54
%105 = OpCompositeConstruct %79 %57 %57 %57
%102 = OpImageSampleExplicitLod %30 %101 %103 Grad %104 %105
%106 = OpCompositeExtract %5 %102 0
%107 = OpFAdd %5 %83 %67
%108 = OpFAdd %5 %106 %107
%109 = OpFAdd %5 %108 %61
%110 = OpFAdd %5 %109 %74
%111 = OpFAdd %5 %110 %90
%112 = OpFAdd %5 %111 %98
%113 = OpFAdd %5 %108 %62
%114 = OpFAdd %5 %113 %75
%115 = OpFAdd %5 %114 %91
%116 = OpFAdd %5 %115 %99
%118 = OpAccessChain %117 %35 %47
OpStore %118 %112
%119 = OpAccessChain %117 %35 %50
OpStore %119 %116
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-id.frag
================================================
#version 460

layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(gl_SampleID);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpCapability SampleRateShading
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_SampleIndex"
OpName %9 "SV_Target"
OpDecorate %7 BuiltIn SampleId
OpDecorate %7 Flat
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-level-offset-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _62
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _167;
    vec4 _168;
    _167 = sparseTextureLodOffsetARB(sampler1D(_8, _23), TEXCOORD.x, TEXCOORD.w, 1, _168);
    SparseTexel _55 = SparseTexel(_167, _168);
    vec4 _57 = _55._m1;
    _62 _63 = _62(_57.x, _57.y, _57.z, _57.w, _55._m0);
    uint _169;
    vec4 _170;
    _169 = sparseTextureLodOffsetARB(sampler1DArray(_11, _23), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w, 2, _170);
    SparseTexel _75 = SparseTexel(_169, _170);
    vec4 _78 = _75._m1;
    _62 _83 = _62(_78.x, _78.y, _78.z, _78.w, _75._m0);
    uint _171;
    vec4 _172;
    _171 = sparseTextureLodOffsetARB(sampler2D(_14, _23), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w, ivec2(2, 3), _172);
    SparseTexel _91 = SparseTexel(_171, _172);
    vec4 _96 = _91._m1;
    _62 _101 = _62(_96.x, _96.y, _96.z, _96.w, _91._m0);
    float _106 = float(sparseTexelsResidentARB(int(_101._m4)));
    uint _173;
    vec4 _174;
    _173 = sparseTextureLodOffsetARB(sampler2DArray(_17, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w, ivec2(-1, -3), _174);
    SparseTexel _111 = SparseTexel(_173, _174);
    vec4 _116 = _111._m1;
    _62 _121 = _62(_116.x, _116.y, _116.z, _116.w, _111._m0);
    float _122 = _121._m0;
    float _125 = float(sparseTexelsResidentARB(int(_121._m4)));
    uint _175;
    vec4 _176;
    _175 = sparseTextureLodOffsetARB(sampler3D(_20, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w, ivec3(-4, -5, 3), _176);
    SparseTexel _130 = SparseTexel(_175, _176);
    vec4 _135 = _130._m1;
    _62 _140 = _62(_135.x, _135.y, _135.z, _135.w, _130._m0);
    float _145 = float(sparseTexelsResidentARB(int(_140._m4)));
    float _147 = float(sparseTexelsResidentARB(int(_83._m4))) + (_83._m0 + float(sparseTexelsResidentARB(int(_63._m4))));
    SV_Target.x = ((((((_147 + _63._m0) + _101._m0) + _106) + _122) + _125) + _140._m0) + _145;
    SV_Target.y = ((((((_147 + _63._m1) + _101._m1) + _106) + _122) + _125) + _140._m1) + _145;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 167
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %29 "SV_Target"
OpName %54 "SparseTexel"
OpName %62 ""
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypeVector %5 2
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%36 = OpTypePointer Input %5
%38 = OpTypeInt 32 0
%39 = OpConstant %38 0
%42 = OpConstant %38 1
%45 = OpConstant %38 2
%48 = OpConstant %38 3
%50 = OpTypeSampledImage %6
%52 = OpTypeInt 32 1
%53 = OpConstant %52 1
%54 = OpTypeStruct %38 %24
%62 = OpTypeStruct %5 %5 %5 %5 %38
%67 = OpTypeBool
%70 = OpConstant %5 1
%71 = OpConstant %5 0
%72 = OpTypeSampledImage %9
%74 = OpConstant %52 2
%88 = OpTypeSampledImage %12
%90 = OpConstant %52 3
%93 = OpTypeVector %52 2
%94 = OpConstantComposite %93 %74 %90
%107 = OpTypeSampledImage %15
%109 = OpConstant %52 -1
%110 = OpConstant %52 -3
%112 = OpTypeVector %5 3
%114 = OpConstantComposite %93 %109 %110
%126 = OpTypeSampledImage %18
%128 = OpConstant %52 -4
%129 = OpConstant %52 -5
%132 = OpTypeVector %52 3
%133 = OpConstantComposite %132 %128 %129 %90
%162 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %165
%165 = OpLabel
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %12 %14
%33 = OpLoad %9 %11
%34 = OpLoad %6 %8
%35 = OpLoad %21 %23
%37 = OpAccessChain %36 %26 %39
%40 = OpLoad %5 %37
%41 = OpAccessChain %36 %26 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %36 %26 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %36 %26 %48
%49 = OpLoad %5 %47
%51 = OpSampledImage %50 %34 %35
%55 = OpImageSparseSampleExplicitLod %54 %51 %40 Lod|ConstOffset %49 %53
%56 = OpCompositeExtract %38 %55 0
%57 = OpCompositeExtract %24 %55 1
%58 = OpCompositeExtract %5 %57 0
%59 = OpCompositeExtract %5 %57 1
%60 = OpCompositeExtract %5 %57 2
%61 = OpCompositeExtract %5 %57 3
%63 = OpCompositeConstruct %62 %58 %59 %60 %61 %56
%64 = OpCompositeExtract %5 %63 0
%65 = OpCompositeExtract %5 %63 1
%66 = OpCompositeExtract %38 %63 4
%68 = OpImageSparseTexelsResident %67 %66
%69 = OpSelect %5 %68 %70 %71
%73 = OpSampledImage %72 %33 %35
%76 = OpCompositeConstruct %27 %40 %43
%75 = OpImageSparseSampleExplicitLod %54 %73 %76 Lod|ConstOffset %49 %74
%77 = OpCompositeExtract %38 %75 0
%78 = OpCompositeExtract %24 %75 1
%79 = OpCompositeExtract %5 %78 0
%80 = OpCompositeExtract %5 %78 1
%81 = OpCompositeExtract %5 %78 2
%82 = OpCompositeExtract %5 %78 3
%83 = OpCompositeConstruct %62 %79 %80 %81 %82 %77
%84 = OpCompositeExtract %5 %83 0
%85 = OpCompositeExtract %38 %83 4
%86 = OpImageSparseTexelsResident %67 %85
%87 = OpSelect %5 %86 %70 %71
%89 = OpSampledImage %88 %32 %35
%92 = OpCompositeConstruct %27 %40 %43
%91 = OpImageSparseSampleExplicitLod %54 %89 %92 Lod|ConstOffset %49 %94
%95 = OpCompositeExtract %38 %91 0
%96 = OpCompositeExtract %24 %91 1
%97 = OpCompositeExtract %5 %96 0
%98 = OpCompositeExtract %5 %96 1
%99 = OpCompositeExtract %5 %96 2
%100 = OpCompositeExtract %5 %96 3
%101 = OpCompositeConstruct %62 %97 %98 %99 %100 %95
%102 = OpCompositeExtract %5 %101 0
%103 = OpCompositeExtract %5 %101 1
%104 = OpCompositeExtract %38 %101 4
%105 = OpImageSparseTexelsResident %67 %104
%106 = OpSelect %5 %105 %70 %71
%108 = OpSampledImage %107 %31 %35
%113 = OpCompositeConstruct %112 %40 %43 %46
%111 = OpImageSparseSampleExplicitLod %54 %108 %113 Lod|ConstOffset %49 %114
%115 = OpCompositeExtract %38 %111 0
%116 = OpCompositeExtract %24 %111 1
%117 = OpCompositeExtract %5 %116 0
%118 = OpCompositeExtract %5 %116 1
%119 = OpCompositeExtract %5 %116 2
%120 = OpCompositeExtract %5 %116 3
%121 = OpCompositeConstruct %62 %117 %118 %119 %120 %115
%122 = OpCompositeExtract %5 %121 0
%123 = OpCompositeExtract %38 %121 4
%124 = OpImageSparseTexelsResident %67 %123
%125 = OpSelect %5 %124 %70 %71
%127 = OpSampledImage %126 %30 %35
%131 = OpCompositeConstruct %112 %40 %43 %46
%130 = OpImageSparseSampleExplicitLod %54 %127 %131 Lod|ConstOffset %49 %133
%134 = OpCompositeExtract %38 %130 0
%135 = OpCompositeExtract %24 %130 1
%136 = OpCompositeExtract %5 %135 0
%137 = OpCompositeExtract %5 %135 1
%138 = OpCompositeExtract %5 %135 2
%139 = OpCompositeExtract %5 %135 3
%140 = OpCompositeConstruct %62 %136 %137 %138 %139 %134
%141 = OpCompositeExtract %5 %140 0
%142 = OpCompositeExtract %5 %140 1
%143 = OpCompositeExtract %38 %140 4
%144 = OpImageSparseTexelsResident %67 %143
%145 = OpSelect %5 %144 %70 %71
%146 = OpFAdd %5 %84 %69
%147 = OpFAdd %5 %87 %146
%148 = OpFAdd %5 %147 %64
%149 = OpFAdd %5 %148 %102
%150 = OpFAdd %5 %149 %106
%151 = OpFAdd %5 %150 %122
%152 = OpFAdd %5 %151 %125
%153 = OpFAdd %5 %152 %141
%154 = OpFAdd %5 %153 %145
%155 = OpFAdd %5 %147 %65
%156 = OpFAdd %5 %155 %103
%157 = OpFAdd %5 %156 %106
%158 = OpFAdd %5 %157 %122
%159 = OpFAdd %5 %158 %125
%160 = OpFAdd %5 %159 %142
%161 = OpFAdd %5 %160 %145
%163 = OpAccessChain %162 %29 %39
OpStore %163 %154
%164 = OpAccessChain %162 %29 %42
OpStore %164 %161
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-level-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _54 = textureLodOffset(sampler1D(_8, _23), TEXCOORD.x, TEXCOORD.w, 1);
    vec4 _66 = textureLodOffset(sampler2D(_14, _23), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w, ivec2(2, 3));
    vec4 _85 = textureLodOffset(sampler3D(_20, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w, ivec3(-4, -5, 3));
    float _91 = textureLodOffset(sampler2DArray(_17, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w, ivec2(-1, -3)).x + textureLodOffset(sampler1DArray(_11, _23), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w, 2).x;
    SV_Target.x = ((_66.x + _54.x) + _91) + _85.x;
    SV_Target.y = ((_66.y + _54.y) + _91) + _85.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypeVector %5 2
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%36 = OpTypePointer Input %5
%38 = OpTypeInt 32 0
%39 = OpConstant %38 0
%42 = OpConstant %38 1
%45 = OpConstant %38 2
%48 = OpConstant %38 3
%50 = OpTypeSampledImage %6
%52 = OpTypeInt 32 1
%53 = OpConstant %52 1
%57 = OpTypeSampledImage %9
%59 = OpConstant %52 2
%63 = OpTypeSampledImage %12
%65 = OpConstant %52 3
%68 = OpTypeVector %52 2
%69 = OpConstantComposite %68 %59 %65
%72 = OpTypeSampledImage %15
%74 = OpConstant %52 -1
%75 = OpConstant %52 -3
%77 = OpTypeVector %5 3
%79 = OpConstantComposite %68 %74 %75
%81 = OpTypeSampledImage %18
%83 = OpConstant %52 -4
%84 = OpConstant %52 -5
%87 = OpTypeVector %52 3
%88 = OpConstantComposite %87 %83 %84 %65
%98 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %101
%101 = OpLabel
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %12 %14
%33 = OpLoad %9 %11
%34 = OpLoad %6 %8
%35 = OpLoad %21 %23
%37 = OpAccessChain %36 %26 %39
%40 = OpLoad %5 %37
%41 = OpAccessChain %36 %26 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %36 %26 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %36 %26 %48
%49 = OpLoad %5 %47
%51 = OpSampledImage %50 %34 %35
%54 = OpImageSampleExplicitLod %24 %51 %40 Lod|ConstOffset %49 %53
%55 = OpCompositeExtract %5 %54 0
%56 = OpCompositeExtract %5 %54 1
%58 = OpSampledImage %57 %33 %35
%61 = OpCompositeConstruct %27 %40 %43
%60 = OpImageSampleExplicitLod %24 %58 %61 Lod|ConstOffset %49 %59
%62 = OpCompositeExtract %5 %60 0
%64 = OpSampledImage %63 %32 %35
%67 = OpCompositeConstruct %27 %40 %43
%66 = OpImageSampleExplicitLod %24 %64 %67 Lod|ConstOffset %49 %69
%70 = OpCompositeExtract %5 %66 0
%71 = OpCompositeExtract %5 %66 1
%73 = OpSampledImage %72 %31 %35
%78 = OpCompositeConstruct %77 %40 %43 %46
%76 = OpImageSampleExplicitLod %24 %73 %78 Lod|ConstOffset %49 %79
%80 = OpCompositeExtract %5 %76 0
%82 = OpSampledImage %81 %30 %35
%86 = OpCompositeConstruct %77 %40 %43 %46
%85 = OpImageSampleExplicitLod %24 %82 %86 Lod|ConstOffset %49 %88
%89 = OpCompositeExtract %5 %85 0
%90 = OpCompositeExtract %5 %85 1
%91 = OpFAdd %5 %80 %62
%92 = OpFAdd %5 %70 %55
%93 = OpFAdd %5 %92 %91
%94 = OpFAdd %5 %93 %89
%95 = OpFAdd %5 %71 %56
%96 = OpFAdd %5 %95 %91
%97 = OpFAdd %5 %96 %90
%99 = OpAccessChain %98 %29 %39
OpStore %99 %94
%100 = OpAccessChain %98 %29 %42
OpStore %100 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-level.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 1, binding = 5) uniform textureCube _23;
layout(set = 1, binding = 6) uniform textureCubeArray _26;
layout(set = 0, binding = 0) uniform sampler _29;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _60 = textureLod(sampler1D(_8, _29), TEXCOORD.x, TEXCOORD.w);
    vec4 _70 = textureLod(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w);
    vec4 _82 = textureLod(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w);
    vec4 _88 = textureLod(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w);
    float _98 = textureLod(samplerCubeArray(_26, _29), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), TEXCOORD.w).x + (textureLod(sampler2DArray(_17, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), TEXCOORD.w).x + textureLod(sampler1DArray(_11, _29), vec2(TEXCOORD.x, TEXCOORD.y), TEXCOORD.w).x);
    SV_Target.x = (((_98 + _60.x) + _70.x) + _82.x) + _88.x;
    SV_Target.y = (((_98 + _60.y) + _70.y) + _82.y) + _88.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 112
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeSampler
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeVector %5 4
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%44 = OpTypePointer Input %5
%46 = OpTypeInt 32 0
%47 = OpConstant %46 0
%50 = OpConstant %46 1
%53 = OpConstant %46 2
%56 = OpConstant %46 3
%58 = OpTypeSampledImage %6
%63 = OpTypeSampledImage %9
%68 = OpTypeSampledImage %12
%74 = OpTypeSampledImage %15
%77 = OpTypeVector %5 3
%80 = OpTypeSampledImage %18
%86 = OpTypeSampledImage %21
%92 = OpTypeSampledImage %24
%107 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %110
%110 = OpLabel
%36 = OpLoad %24 %26
%37 = OpLoad %21 %23
%38 = OpLoad %18 %20
%39 = OpLoad %15 %17
%40 = OpLoad %12 %14
%41 = OpLoad %9 %11
%42 = OpLoad %6 %8
%43 = OpLoad %27 %29
%45 = OpAccessChain %44 %32 %47
%48 = OpLoad %5 %45
%49 = OpAccessChain %44 %32 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %44 %32 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %44 %32 %56
%57 = OpLoad %5 %55
%59 = OpSampledImage %58 %42 %43
%60 = OpImageSampleExplicitLod %30 %59 %48 Lod %57
%61 = OpCompositeExtract %5 %60 0
%62 = OpCompositeExtract %5 %60 1
%64 = OpSampledImage %63 %41 %43
%66 = OpCompositeConstruct %33 %48 %51
%65 = OpImageSampleExplicitLod %30 %64 %66 Lod %57
%67 = OpCompositeExtract %5 %65 0
%69 = OpSampledImage %68 %40 %43
%71 = OpCompositeConstruct %33 %48 %51
%70 = OpImageSampleExplicitLod %30 %69 %71 Lod %57
%72 = OpCompositeExtract %5 %70 0
%73 = OpCompositeExtract %5 %70 1
%75 = OpSampledImage %74 %39 %43
%78 = OpCompositeConstruct %77 %48 %51 %54
%76 = OpImageSampleExplicitLod %30 %75 %78 Lod %57
%79 = OpCompositeExtract %5 %76 0
%81 = OpSampledImage %80 %38 %43
%83 = OpCompositeConstruct %77 %48 %51 %54
%82 = OpImageSampleExplicitLod %30 %81 %83 Lod %57
%84 = OpCompositeExtract %5 %82 0
%85 = OpCompositeExtract %5 %82 1
%87 = OpSampledImage %86 %37 %43
%89 = OpCompositeConstruct %77 %48 %51 %54
%88 = OpImageSampleExplicitLod %30 %87 %89 Lod %57
%90 = OpCompositeExtract %5 %88 0
%91 = OpCompositeExtract %5 %88 1
%93 = OpSampledImage %92 %36 %43
%95 = OpCompositeConstruct %30 %48 %51 %54 %57
%94 = OpImageSampleExplicitLod %30 %93 %95 Lod %57
%96 = OpCompositeExtract %5 %94 0
%97 = OpFAdd %5 %79 %67
%98 = OpFAdd %5 %96 %97
%99 = OpFAdd %5 %98 %61
%100 = OpFAdd %5 %99 %72
%101 = OpFAdd %5 %100 %84
%102 = OpFAdd %5 %101 %90
%103 = OpFAdd %5 %98 %62
%104 = OpFAdd %5 %103 %73
%105 = OpFAdd %5 %104 %85
%106 = OpFAdd %5 %105 %91
%108 = OpAccessChain %107 %35 %47
OpStore %108 %102
%109 = OpAccessChain %107 %35 %50
OpStore %109 %106
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample-offset-dynamic.noglsl.invalid.sm67.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 144
; Schema: 0
OpCapability Shader
OpCapability ImageGatherExtended
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %25 ""
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %24 ArrayStride 16
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeInt 32 0
%22 = OpConstant %21 2
%23 = OpTypeVector %5 4
%24 = OpTypeArray %23 %22
%25 = OpTypeStruct %24
%26 = OpTypePointer Uniform %25
%27 = OpVariable %26 Uniform
%28 = OpTypeSampler
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypePointer Input %23
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%36 = OpTypePointer Input %5
%38 = OpConstant %21 0
%41 = OpConstant %21 1
%45 = OpTypePointer Uniform %23
%48 = OpTypeVector %21 4
%53 = OpTypeSampledImage %6
%55 = OpTypeInt 32 1
%57 = OpConstant %5 0
%60 = OpConstant %55 0
%61 = OpConstant %55 4
%70 = OpTypeSampledImage %9
%84 = OpTypeSampledImage %12
%90 = OpTypeVector %55 2
%102 = OpTypeSampledImage %15
%107 = OpTypeVector %5 3
%120 = OpTypeSampledImage %18
%127 = OpTypeVector %55 3
%139 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %142
%142 = OpLabel
%37 = OpAccessChain %36 %32 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %36 %32 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %36 %32 %22
%44 = OpLoad %5 %43
%46 = OpAccessChain %45 %27 %38 %38
%47 = OpLoad %23 %46
%49 = OpBitcast %48 %47
%50 = OpCompositeExtract %21 %49 0
%51 = OpLoad %6 %8
%52 = OpLoad %28 %30
%54 = OpSampledImage %53 %51 %52
%56 = OpBitcast %55 %50
%59 = OpBitFieldSExtract %55 %56 %60 %61
%58 = OpImageSampleImplicitLod %23 %54 %39 Offset %59
%62 = OpCompositeExtract %5 %58 0
%63 = OpCompositeExtract %5 %58 1
%64 = OpAccessChain %45 %27 %38 %38
%65 = OpLoad %23 %64
%66 = OpBitcast %48 %65
%67 = OpCompositeExtract %21 %66 0
%68 = OpLoad %9 %11
%69 = OpLoad %28 %30
%71 = OpSampledImage %70 %68 %69
%72 = OpBitcast %55 %67
%74 = OpCompositeConstruct %33 %39 %42
%75 = OpBitFieldSExtract %55 %72 %60 %61
%73 = OpImageSampleImplicitLod %23 %71 %74 Offset %75
%76 = OpCompositeExtract %5 %73 0
%77 = OpAccessChain %45 %27 %38 %38
%78 = OpLoad %23 %77
%79 = OpBitcast %48 %78
%80 = OpCompositeExtract %21 %79 1
%81 = OpCompositeExtract %21 %79 2
%82 = OpLoad %12 %14
%83 = OpLoad %28 %30
%85 = OpSampledImage %84 %82 %83
%86 = OpBitcast %55 %80
%87 = OpBitcast %55 %81
%89 = OpCompositeConstruct %33 %39 %42
%91 = OpCompositeConstruct %90 %86 %87
%92 = OpBitFieldSExtract %90 %91 %60 %61
%88 = OpImageSampleImplicitLod %23 %85 %89 Offset %92
%93 = OpCompositeExtract %5 %88 0
%94 = OpCompositeExtract %5 %88 1
%95 = OpAccessChain %45 %27 %38 %38
%96 = OpLoad %23 %95
%97 = OpBitcast %48 %96
%98 = OpCompositeExtract %21 %97 1
%99 = OpCompositeExtract %21 %97 2
%100 = OpLoad %15 %17
%101 = OpLoad %28 %30
%103 = OpSampledImage %102 %100 %101
%104 = OpBitcast %55 %98
%105 = OpBitcast %55 %99
%108 = OpCompositeConstruct %107 %39 %42 %44
%109 = OpCompositeConstruct %90 %104 %105
%110 = OpBitFieldSExtract %90 %109 %60 %61
%106 = OpImageSampleImplicitLod %23 %103 %108 Offset %110
%111 = OpCompositeExtract %5 %106 0
%112 = OpAccessChain %45 %27 %38 %41
%113 = OpLoad %23 %112
%114 = OpBitcast %48 %113
%115 = OpCompositeExtract %21 %114 0
%116 = OpCompositeExtract %21 %114 1
%117 = OpCompositeExtract %21 %114 2
%118 = OpLoad %18 %20
%119 = OpLoad %28 %30
%121 = OpSampledImage %120 %118 %119
%122 = OpBitcast %55 %115
%123 = OpBitcast %55 %116
%124 = OpBitcast %55 %117
%126 = OpCompositeConstruct %107 %39 %42 %44
%128 = OpCompositeConstruct %127 %122 %123 %124
%129 = OpBitFieldSExtract %127 %128 %60 %61
%125 = OpImageSampleImplicitLod %23 %121 %126 Offset %129
%130 = OpCompositeExtract %5 %125 0
%131 = OpCompositeExtract %5 %125 1
%132 = OpFAdd %5 %111 %76
%133 = OpFAdd %5 %93 %62
%134 = OpFAdd %5 %133 %132
%135 = OpFAdd %5 %134 %130
%136 = OpFAdd %5 %94 %63
%137 = OpFAdd %5 %136 %132
%138 = OpFAdd %5 %137 %131
%140 = OpAccessChain %139 %35 %38
OpStore %140 %135
%141 = OpAccessChain %139 %35 %41
OpStore %141 %138
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/sample-offset.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 0, binding = 0) uniform sampler _23;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _52 = textureOffset(sampler1D(_8, _23), TEXCOORD.x, 1);
    vec4 _65 = textureOffset(sampler2D(_14, _23), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(3, 4));
    vec4 _83 = textureOffset(sampler3D(_20, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec3(7, 6, 5));
    float _89 = textureOffset(sampler2DArray(_17, _23), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec2(5, 6)).x + textureOffset(sampler1DArray(_11, _23), vec2(TEXCOORD.x, TEXCOORD.y), 2).x;
    SV_Target.x = ((_65.x + _52.x) + _89) + _83.x;
    SV_Target.y = ((_65.y + _52.y) + _89) + _83.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 101
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %26 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %26 "TEXCOORD"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 Location 0
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeSampler
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeVector %5 4
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypeVector %5 2
%28 = OpTypePointer Output %27
%29 = OpVariable %28 Output
%36 = OpTypePointer Input %5
%38 = OpTypeInt 32 0
%39 = OpConstant %38 0
%42 = OpConstant %38 1
%45 = OpConstant %38 2
%47 = OpTypeSampledImage %6
%49 = OpTypeInt 32 1
%50 = OpConstant %49 1
%51 = OpConstant %5 0
%55 = OpTypeSampledImage %9
%57 = OpConstant %49 2
%61 = OpTypeSampledImage %12
%63 = OpConstant %49 3
%64 = OpConstant %49 4
%67 = OpTypeVector %49 2
%68 = OpConstantComposite %67 %63 %64
%71 = OpTypeSampledImage %15
%73 = OpConstant %49 5
%74 = OpConstant %49 6
%76 = OpTypeVector %5 3
%78 = OpConstantComposite %67 %73 %74
%80 = OpTypeSampledImage %18
%82 = OpConstant %49 7
%85 = OpTypeVector %49 3
%86 = OpConstantComposite %85 %82 %74 %73
%96 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %99
%99 = OpLabel
%30 = OpLoad %18 %20
%31 = OpLoad %15 %17
%32 = OpLoad %12 %14
%33 = OpLoad %9 %11
%34 = OpLoad %6 %8
%35 = OpLoad %21 %23
%37 = OpAccessChain %36 %26 %39
%40 = OpLoad %5 %37
%41 = OpAccessChain %36 %26 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %36 %26 %45
%46 = OpLoad %5 %44
%48 = OpSampledImage %47 %34 %35
%52 = OpImageSampleImplicitLod %24 %48 %40 ConstOffset %50
%53 = OpCompositeExtract %5 %52 0
%54 = OpCompositeExtract %5 %52 1
%56 = OpSampledImage %55 %33 %35
%59 = OpCompositeConstruct %27 %40 %43
%58 = OpImageSampleImplicitLod %24 %56 %59 ConstOffset %57
%60 = OpCompositeExtract %5 %58 0
%62 = OpSampledImage %61 %32 %35
%66 = OpCompositeConstruct %27 %40 %43
%65 = OpImageSampleImplicitLod %24 %62 %66 ConstOffset %68
%69 = OpCompositeExtract %5 %65 0
%70 = OpCompositeExtract %5 %65 1
%72 = OpSampledImage %71 %31 %35
%77 = OpCompositeConstruct %76 %40 %43 %46
%75 = OpImageSampleImplicitLod %24 %72 %77 ConstOffset %78
%79 = OpCompositeExtract %5 %75 0
%81 = OpSampledImage %80 %30 %35
%84 = OpCompositeConstruct %76 %40 %43 %46
%83 = OpImageSampleImplicitLod %24 %81 %84 ConstOffset %86
%87 = OpCompositeExtract %5 %83 0
%88 = OpCompositeExtract %5 %83 1
%89 = OpFAdd %5 %79 %60
%90 = OpFAdd %5 %69 %53
%91 = OpFAdd %5 %90 %89
%92 = OpFAdd %5 %91 %87
%93 = OpFAdd %5 %70 %54
%94 = OpFAdd %5 %93 %89
%95 = OpFAdd %5 %94 %88
%97 = OpAccessChain %96 %29 %39
OpStore %97 %92
%98 = OpAccessChain %96 %29 %42
OpStore %98 %95
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sample.frag
================================================
#version 460

layout(set = 1, binding = 0) uniform texture1D _8;
layout(set = 1, binding = 1) uniform texture1DArray _11;
layout(set = 1, binding = 2) uniform texture2D _14;
layout(set = 1, binding = 3) uniform texture2DArray _17;
layout(set = 1, binding = 4) uniform texture3D _20;
layout(set = 1, binding = 5) uniform textureCube _23;
layout(set = 1, binding = 6) uniform textureCubeArray _26;
layout(set = 0, binding = 0) uniform sampler _29;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _61 = texture(sampler1D(_8, _29), TEXCOORD.x);
    vec4 _71 = texture(sampler2D(_14, _29), vec2(TEXCOORD.x, TEXCOORD.y));
    vec4 _83 = texture(sampler3D(_20, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    vec4 _89 = texture(samplerCube(_23, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    float _99 = texture(samplerCubeArray(_26, _29), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w)).x + (texture(sampler2DArray(_17, _29), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x + texture(sampler1DArray(_11, _29), vec2(TEXCOORD.x, TEXCOORD.y)).x);
    SV_Target.x = (((_99 + _61.x) + _71.x) + _83.x) + _89.x;
    SV_Target.y = (((_99 + _61.y) + _71.y) + _83.y) + _89.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 113
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %32 "TEXCOORD"
OpName %35 "SV_Target"
OpDecorate %8 DescriptorSet 1
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 2
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 3
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 4
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 5
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 6
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %32 Location 0
OpDecorate %35 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeSampler
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeVector %5 4
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 2
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%44 = OpTypePointer Input %5
%46 = OpTypeInt 32 0
%47 = OpConstant %46 0
%50 = OpConstant %46 1
%53 = OpConstant %46 2
%56 = OpConstant %46 3
%58 = OpTypeSampledImage %6
%60 = OpConstant %5 0
%64 = OpTypeSampledImage %9
%69 = OpTypeSampledImage %12
%75 = OpTypeSampledImage %15
%78 = OpTypeVector %5 3
%81 = OpTypeSampledImage %18
%87 = OpTypeSampledImage %21
%93 = OpTypeSampledImage %24
%108 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %111
%111 = OpLabel
%36 = OpLoad %24 %26
%37 = OpLoad %21 %23
%38 = OpLoad %18 %20
%39 = OpLoad %15 %17
%40 = OpLoad %12 %14
%41 = OpLoad %9 %11
%42 = OpLoad %6 %8
%43 = OpLoad %27 %29
%45 = OpAccessChain %44 %32 %47
%48 = OpLoad %5 %45
%49 = OpAccessChain %44 %32 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %44 %32 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %44 %32 %56
%57 = OpLoad %5 %55
%59 = OpSampledImage %58 %42 %43
%61 = OpImageSampleImplicitLod %30 %59 %48 None
%62 = OpCompositeExtract %5 %61 0
%63 = OpCompositeExtract %5 %61 1
%65 = OpSampledImage %64 %41 %43
%67 = OpCompositeConstruct %33 %48 %51
%66 = OpImageSampleImplicitLod %30 %65 %67 None
%68 = OpCompositeExtract %5 %66 0
%70 = OpSampledImage %69 %40 %43
%72 = OpCompositeConstruct %33 %48 %51
%71 = OpImageSampleImplicitLod %30 %70 %72 None
%73 = OpCompositeExtract %5 %71 0
%74 = OpCompositeExtract %5 %71 1
%76 = OpSampledImage %75 %39 %43
%79 = OpCompositeConstruct %78 %48 %51 %54
%77 = OpImageSampleImplicitLod %30 %76 %79 None
%80 = OpCompositeExtract %5 %77 0
%82 = OpSampledImage %81 %38 %43
%84 = OpCompositeConstruct %78 %48 %51 %54
%83 = OpImageSampleImplicitLod %30 %82 %84 None
%85 = OpCompositeExtract %5 %83 0
%86 = OpCompositeExtract %5 %83 1
%88 = OpSampledImage %87 %37 %43
%90 = OpCompositeConstruct %78 %48 %51 %54
%89 = OpImageSampleImplicitLod %30 %88 %90 None
%91 = OpCompositeExtract %5 %89 0
%92 = OpCompositeExtract %5 %89 1
%94 = OpSampledImage %93 %36 %43
%96 = OpCompositeConstruct %30 %48 %51 %54 %57
%95 = OpImageSampleImplicitLod %30 %94 %96 None
%97 = OpCompositeExtract %5 %95 0
%98 = OpFAdd %5 %80 %68
%99 = OpFAdd %5 %97 %98
%100 = OpFAdd %5 %99 %62
%101 = OpFAdd %5 %100 %73
%102 = OpFAdd %5 %101 %85
%103 = OpFAdd %5 %102 %91
%104 = OpFAdd %5 %99 %63
%105 = OpFAdd %5 %104 %74
%106 = OpFAdd %5 %105 %86
%107 = OpFAdd %5 %106 %92
%109 = OpAccessChain %108 %35 %47
OpStore %109 %103
%110 = OpAccessChain %108 %35 %50
OpStore %110 %107
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/saturate.frag
================================================
#version 460
#extension GL_EXT_spirv_intrinsics : require

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

spirv_instruction(set = "GLSL.std.450", id = 81) float spvNClamp(float, float, float);
spirv_instruction(set = "GLSL.std.450", id = 81) vec2 spvNClamp(vec2, vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) vec3 spvNClamp(vec3, vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) vec4 spvNClamp(vec4, vec4, vec4);

void main()
{
    SV_Target = spvNClamp(A, 0.0, 1.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 17
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%12 = OpConstant %5 0
%13 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %15
%15 = OpLabel
%10 = OpLoad %5 %7
%14 = OpExtInst %5 %11 NClamp %10 %12 %13
OpStore %9 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sin.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = sin(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Sin %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _11_13
{
    uint _m0[];
} _13;

layout(set = 0, binding = 2, std430) restrict readonly buffer _17_19
{
    u16vec2 _m0[];
} _19;

layout(set = 0, binding = 3, std430) restrict readonly buffer _21_23
{
    u16vec2 _m0[];
} _23;

layout(set = 0, binding = 0, std430) buffer _25_27
{
    uint _m0[];
} _27;

layout(set = 0, binding = 1, std430) buffer _29_31
{
    uint _m0[];
} _31;

void main()
{
    _27._m0[gl_GlobalInvocationID.x] = (((_27._m0[gl_GlobalInvocationID.x] + (bitfieldExtract(_9._m0[gl_GlobalInvocationID.x], int(0u), int(8u)) * bitfieldExtract(_13._m0[gl_GlobalInvocationID.x], int(0u), int(8u)))) + (bitfieldExtract(_9._m0[gl_GlobalInvocationID.x], int(8u), int(8u)) * bitfieldExtract(_13._m0[gl_GlobalInvocationID.x], int(8u), int(8u)))) + (bitfieldExtract(_9._m0[gl_GlobalInvocationID.x], int(16u), int(8u)) * bitfieldExtract(_13._m0[gl_GlobalInvocationID.x], int(16u), int(8u)))) + (bitfieldExtract(_9._m0[gl_GlobalInvocationID.x], int(24u), int(8u)) * bitfieldExtract(_13._m0[gl_GlobalInvocationID.x], int(24u), int(8u)));
    _27._m0[gl_GlobalInvocationID.x] = (((_27._m0[gl_GlobalInvocationID.x] + (uint(bitfieldExtract(int(_9._m0[gl_GlobalInvocationID.x]), int(0u), int(8u))) * uint(bitfieldExtract(int(_13._m0[gl_GlobalInvocationID.x]), int(0u), int(8u))))) + (uint(bitfieldExtract(int(_9._m0[gl_GlobalInvocationID.x]), int(8u), int(8u))) * uint(bitfieldExtract(int(_13._m0[gl_GlobalInvocationID.x]), int(8u), int(8u))))) + (uint(bitfieldExtract(int(_9._m0[gl_GlobalInvocationID.x]), int(16u), int(8u))) * uint(bitfieldExtract(int(_13._m0[gl_GlobalInvocationID.x]), int(16u), int(8u))))) + (uint(bitfieldExtract(int(_9._m0[gl_GlobalInvocationID.x]), int(24u), int(8u))) * uint(bitfieldExtract(int(_13._m0[gl_GlobalInvocationID.x]), int(24u), int(8u))));
    f16vec2 _98 = uint16BitsToFloat16(_23._m0[gl_GlobalInvocationID.x]);
    f16vec2 _106 = uint16BitsToFloat16(_19._m0[gl_GlobalInvocationID.x]);
    vec2 _116 = vec2(f16vec2(_106.x * float16_t(2.0), _106.y * float16_t(2.0)) * f16vec2(_98.x * float16_t(3.0), _98.y * float16_t(3.0)));
    _31._m0[gl_GlobalInvocationID.x] = floatBitsToUint(uintBitsToFloat(_31._m0[gl_GlobalInvocationID.x]) + (_116.x + _116.y));
    f16vec2 _128 = uint16BitsToFloat16(_23._m0[gl_GlobalInvocationID.x]);
    precise float16_t _131 = _128.x * float16_t(5.0);
    precise float16_t _133 = _128.y * float16_t(5.0);
    f16vec2 _136 = uint16BitsToFloat16(_19._m0[gl_GlobalInvocationID.x]);
    precise float16_t _139 = _136.x * float16_t(4.0);
    precise float16_t _141 = _136.y * float16_t(4.0);
    precise f16vec2 _144 = f16vec2(_139, _141) * f16vec2(_131, _133);
    vec2 _145 = vec2(_144);
    precise float _148 = _145.x + _145.y;
    precise float _149 = uintBitsToFloat(_31._m0[gl_GlobalInvocationID.x]) + _148;
    _31._m0[gl_GlobalInvocationID.x] = floatBitsToUint(_149);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 154
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %34
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %17 "SSBO"
OpName %21 "SSBO"
OpName %25 "SSBO"
OpName %29 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 4
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 2
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 4
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 3
OpDecorate %23 NonWritable
OpDecorate %23 Restrict
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %34 BuiltIn GlobalInvocationId
OpDecorate %131 NoContraction
OpDecorate %133 NoContraction
OpDecorate %139 NoContraction
OpDecorate %141 NoContraction
OpDecorate %144 NoContraction
OpDecorate %148 NoContraction
OpDecorate %149 NoContraction
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeVector %14 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %15
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeRuntimeArray %5
%29 = OpTypeStruct %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeVector %5 3
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%35 = OpTypePointer Input %5
%37 = OpConstant %5 0
%39 = OpTypePointer StorageBuffer %5
%47 = OpConstant %5 8
%56 = OpConstant %5 16
%61 = OpConstant %5 24
%91 = OpTypeFloat 32
%93 = OpTypePointer StorageBuffer %15
%96 = OpTypeFloat 16
%97 = OpTypeVector %96 2
%102 = OpConstant %96 0x1.8p+1
%110 = OpConstant %96 0x1p+1
%115 = OpTypeVector %91 2
%132 = OpConstant %96 0x1.4p+2
%140 = OpConstant %96 0x1p+2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %152
%152 = OpLabel
%36 = OpAccessChain %35 %34 %37
%38 = OpLoad %5 %36
%40 = OpAccessChain %39 %27 %37 %38
%41 = OpLoad %5 %40
%42 = OpAccessChain %39 %13 %37 %38
%43 = OpLoad %5 %42
%44 = OpAccessChain %39 %9 %37 %38
%45 = OpLoad %5 %44
%46 = OpBitFieldUExtract %5 %45 %37 %47
%48 = OpBitFieldUExtract %5 %43 %37 %47
%49 = OpIMul %5 %46 %48
%50 = OpIAdd %5 %41 %49
%51 = OpBitFieldUExtract %5 %45 %47 %47
%52 = OpBitFieldUExtract %5 %43 %47 %47
%53 = OpIMul %5 %51 %52
%54 = OpIAdd %5 %50 %53
%55 = OpBitFieldUExtract %5 %45 %56 %47
%57 = OpBitFieldUExtract %5 %43 %56 %47
%58 = OpIMul %5 %55 %57
%59 = OpIAdd %5 %54 %58
%60 = OpBitFieldUExtract %5 %45 %61 %47
%62 = OpBitFieldUExtract %5 %43 %61 %47
%63 = OpIMul %5 %60 %62
%64 = OpIAdd %5 %59 %63
%65 = OpAccessChain %39 %27 %37 %38
OpStore %65 %64
%66 = OpAccessChain %39 %27 %37 %38
%67 = OpLoad %5 %66
%68 = OpAccessChain %39 %13 %37 %38
%69 = OpLoad %5 %68
%70 = OpAccessChain %39 %9 %37 %38
%71 = OpLoad %5 %70
%72 = OpBitFieldSExtract %5 %71 %37 %47
%73 = OpBitFieldSExtract %5 %69 %37 %47
%74 = OpIMul %5 %72 %73
%75 = OpIAdd %5 %67 %74
%76 = OpBitFieldSExtract %5 %71 %47 %47
%77 = OpBitFieldSExtract %5 %69 %47 %47
%78 = OpIMul %5 %76 %77
%79 = OpIAdd %5 %75 %78
%80 = OpBitFieldSExtract %5 %71 %56 %47
%81 = OpBitFieldSExtract %5 %69 %56 %47
%82 = OpIMul %5 %80 %81
%83 = OpIAdd %5 %79 %82
%84 = OpBitFieldSExtract %5 %71 %61 %47
%85 = OpBitFieldSExtract %5 %69 %61 %47
%86 = OpIMul %5 %84 %85
%87 = OpIAdd %5 %83 %86
%88 = OpAccessChain %39 %27 %37 %38
OpStore %88 %87
%89 = OpAccessChain %39 %31 %37 %38
%90 = OpLoad %5 %89
%92 = OpBitcast %91 %90
%94 = OpAccessChain %93 %23 %37 %38
%95 = OpLoad %15 %94
%98 = OpBitcast %97 %95
%99 = OpCompositeExtract %96 %98 0
%100 = OpCompositeExtract %96 %98 1
%101 = OpFMul %96 %99 %102
%103 = OpFMul %96 %100 %102
%104 = OpAccessChain %93 %19 %37 %38
%105 = OpLoad %15 %104
%106 = OpBitcast %97 %105
%107 = OpCompositeExtract %96 %106 0
%108 = OpCompositeExtract %96 %106 1
%109 = OpFMul %96 %107 %110
%111 = OpFMul %96 %108 %110
%112 = OpCompositeConstruct %97 %109 %111
%113 = OpCompositeConstruct %97 %101 %103
%114 = OpFMul %97 %112 %113
%116 = OpFConvert %115 %114
%117 = OpCompositeExtract %91 %116 0
%118 = OpCompositeExtract %91 %116 1
%119 = OpFAdd %91 %117 %118
%120 = OpFAdd %91 %92 %119
%121 = OpBitcast %5 %120
%122 = OpAccessChain %39 %31 %37 %38
OpStore %122 %121
%123 = OpAccessChain %39 %31 %37 %38
%124 = OpLoad %5 %123
%125 = OpBitcast %91 %124
%126 = OpAccessChain %93 %23 %37 %38
%127 = OpLoad %15 %126
%128 = OpBitcast %97 %127
%129 = OpCompositeExtract %96 %128 0
%130 = OpCompositeExtract %96 %128 1
%131 = OpFMul %96 %129 %132
%133 = OpFMul %96 %130 %132
%134 = OpAccessChain %93 %19 %37 %38
%135 = OpLoad %15 %134
%136 = OpBitcast %97 %135
%137 = OpCompositeExtract %96 %136 0
%138 = OpCompositeExtract %96 %136 1
%139 = OpFMul %96 %137 %140
%141 = OpFMul %96 %138 %140
%142 = OpCompositeConstruct %97 %139 %141
%143 = OpCompositeConstruct %97 %131 %133
%144 = OpFMul %97 %142 %143
%145 = OpFConvert %115 %144
%146 = OpCompositeExtract %91 %145 0
%147 = OpCompositeExtract %91 %145 1
%148 = OpFAdd %91 %146 %147
%149 = OpFAdd %91 %125 %148
%150 = OpBitcast %5 %149
%151 = OpAccessChain %39 %31 %37 %38
OpStore %151 %150
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 123
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability DotProductInput4x8BitPacked
OpCapability DotProduct
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_integer_dot_product"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %34
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %17 "SSBO"
OpName %21 "SSBO"
OpName %25 "SSBO"
OpName %29 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 4
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 2
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 4
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 3
OpDecorate %23 NonWritable
OpDecorate %23 Restrict
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %34 BuiltIn GlobalInvocationId
OpDecorate %100 NoContraction
OpDecorate %102 NoContraction
OpDecorate %108 NoContraction
OpDecorate %110 NoContraction
OpDecorate %113 NoContraction
OpDecorate %117 NoContraction
OpDecorate %118 NoContraction
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeVector %14 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %15
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeRuntimeArray %5
%29 = OpTypeStruct %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeVector %5 3
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%35 = OpTypePointer Input %5
%37 = OpConstant %5 0
%39 = OpTypePointer StorageBuffer %5
%60 = OpTypeFloat 32
%62 = OpTypePointer StorageBuffer %15
%65 = OpTypeFloat 16
%66 = OpTypeVector %65 2
%71 = OpConstant %65 0x1.8p+1
%79 = OpConstant %65 0x1p+1
%84 = OpTypeVector %60 2
%101 = OpConstant %65 0x1.4p+2
%109 = OpConstant %65 0x1p+2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %121
%121 = OpLabel
%36 = OpAccessChain %35 %34 %37
%38 = OpLoad %5 %36
%40 = OpAccessChain %39 %27 %37 %38
%41 = OpLoad %5 %40
%42 = OpAccessChain %39 %13 %37 %38
%43 = OpLoad %5 %42
%44 = OpAccessChain %39 %9 %37 %38
%45 = OpLoad %5 %44
%46 = OpUDot %5 %45 %43 PackedVectorFormat4x8Bit
%47 = OpIAdd %5 %41 %46
%48 = OpAccessChain %39 %27 %37 %38
OpStore %48 %47
%49 = OpAccessChain %39 %27 %37 %38
%50 = OpLoad %5 %49
%51 = OpAccessChain %39 %13 %37 %38
%52 = OpLoad %5 %51
%53 = OpAccessChain %39 %9 %37 %38
%54 = OpLoad %5 %53
%55 = OpSDot %5 %54 %52 PackedVectorFormat4x8Bit
%56 = OpIAdd %5 %50 %55
%57 = OpAccessChain %39 %27 %37 %38
OpStore %57 %56
%58 = OpAccessChain %39 %31 %37 %38
%59 = OpLoad %5 %58
%61 = OpBitcast %60 %59
%63 = OpAccessChain %62 %23 %37 %38
%64 = OpLoad %15 %63
%67 = OpBitcast %66 %64
%68 = OpCompositeExtract %65 %67 0
%69 = OpCompositeExtract %65 %67 1
%70 = OpFMul %65 %68 %71
%72 = OpFMul %65 %69 %71
%73 = OpAccessChain %62 %19 %37 %38
%74 = OpLoad %15 %73
%75 = OpBitcast %66 %74
%76 = OpCompositeExtract %65 %75 0
%77 = OpCompositeExtract %65 %75 1
%78 = OpFMul %65 %76 %79
%80 = OpFMul %65 %77 %79
%81 = OpCompositeConstruct %66 %78 %80
%82 = OpCompositeConstruct %66 %70 %72
%83 = OpFMul %66 %81 %82
%85 = OpFConvert %84 %83
%86 = OpCompositeExtract %60 %85 0
%87 = OpCompositeExtract %60 %85 1
%88 = OpFAdd %60 %86 %87
%89 = OpFAdd %60 %61 %88
%90 = OpBitcast %5 %89
%91 = OpAccessChain %39 %31 %37 %38
OpStore %91 %90
%92 = OpAccessChain %39 %31 %37 %38
%93 = OpLoad %5 %92
%94 = OpBitcast %60 %93
%95 = OpAccessChain %62 %23 %37 %38
%96 = OpLoad %15 %95
%97 = OpBitcast %66 %96
%98 = OpCompositeExtract %65 %97 0
%99 = OpCompositeExtract %65 %97 1
%100 = OpFMul %65 %98 %101
%102 = OpFMul %65 %99 %101
%103 = OpAccessChain %62 %19 %37 %38
%104 = OpLoad %15 %103
%105 = OpBitcast %66 %104
%106 = OpCompositeExtract %65 %105 0
%107 = OpCompositeExtract %65 %105 1
%108 = OpFMul %65 %106 %109
%110 = OpFMul %65 %107 %109
%111 = OpCompositeConstruct %66 %108 %110
%112 = OpCompositeConstruct %66 %100 %102
%113 = OpFMul %66 %111 %112
%114 = OpFConvert %84 %113
%115 = OpCompositeExtract %60 %114 0
%116 = OpCompositeExtract %60 %114 1
%117 = OpFAdd %60 %115 %116
%118 = OpFAdd %60 %94 %117
%119 = OpBitcast %5 %118
%120 = OpAccessChain %39 %31 %37 %38
OpStore %120 %119
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.mixed-float-dot-product.noglsl.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 143
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability DotProductFloat16AccFloat32VALVE
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_VALVE_mixed_float_dot_product"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %34
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %17 "SSBO"
OpName %21 "SSBO"
OpName %25 "SSBO"
OpName %29 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 4
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 2
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 4
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 3
OpDecorate %23 NonWritable
OpDecorate %23 Restrict
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %34 BuiltIn GlobalInvocationId
OpDecorate %125 NoContraction
OpDecorate %127 NoContraction
OpDecorate %133 NoContraction
OpDecorate %135 NoContraction
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeVector %14 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %15
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeRuntimeArray %5
%29 = OpTypeStruct %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeVector %5 3
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%35 = OpTypePointer Input %5
%37 = OpConstant %5 0
%39 = OpTypePointer StorageBuffer %5
%47 = OpConstant %5 8
%56 = OpConstant %5 16
%61 = OpConstant %5 24
%91 = OpTypeFloat 32
%93 = OpTypePointer StorageBuffer %15
%96 = OpTypeFloat 16
%97 = OpTypeVector %96 2
%102 = OpConstant %96 0x1.8p+1
%110 = OpConstant %96 0x1p+1
%126 = OpConstant %96 0x1.4p+2
%134 = OpConstant %96 0x1p+2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %141
%141 = OpLabel
%36 = OpAccessChain %35 %34 %37
%38 = OpLoad %5 %36
%40 = OpAccessChain %39 %27 %37 %38
%41 = OpLoad %5 %40
%42 = OpAccessChain %39 %13 %37 %38
%43 = OpLoad %5 %42
%44 = OpAccessChain %39 %9 %37 %38
%45 = OpLoad %5 %44
%46 = OpBitFieldUExtract %5 %45 %37 %47
%48 = OpBitFieldUExtract %5 %43 %37 %47
%49 = OpIMul %5 %46 %48
%50 = OpIAdd %5 %41 %49
%51 = OpBitFieldUExtract %5 %45 %47 %47
%52 = OpBitFieldUExtract %5 %43 %47 %47
%53 = OpIMul %5 %51 %52
%54 = OpIAdd %5 %50 %53
%55 = OpBitFieldUExtract %5 %45 %56 %47
%57 = OpBitFieldUExtract %5 %43 %56 %47
%58 = OpIMul %5 %55 %57
%59 = OpIAdd %5 %54 %58
%60 = OpBitFieldUExtract %5 %45 %61 %47
%62 = OpBitFieldUExtract %5 %43 %61 %47
%63 = OpIMul %5 %60 %62
%64 = OpIAdd %5 %59 %63
%65 = OpAccessChain %39 %27 %37 %38
OpStore %65 %64
%66 = OpAccessChain %39 %27 %37 %38
%67 = OpLoad %5 %66
%68 = OpAccessChain %39 %13 %37 %38
%69 = OpLoad %5 %68
%70 = OpAccessChain %39 %9 %37 %38
%71 = OpLoad %5 %70
%72 = OpBitFieldSExtract %5 %71 %37 %47
%73 = OpBitFieldSExtract %5 %69 %37 %47
%74 = OpIMul %5 %72 %73
%75 = OpIAdd %5 %67 %74
%76 = OpBitFieldSExtract %5 %71 %47 %47
%77 = OpBitFieldSExtract %5 %69 %47 %47
%78 = OpIMul %5 %76 %77
%79 = OpIAdd %5 %75 %78
%80 = OpBitFieldSExtract %5 %71 %56 %47
%81 = OpBitFieldSExtract %5 %69 %56 %47
%82 = OpIMul %5 %80 %81
%83 = OpIAdd %5 %79 %82
%84 = OpBitFieldSExtract %5 %71 %61 %47
%85 = OpBitFieldSExtract %5 %69 %61 %47
%86 = OpIMul %5 %84 %85
%87 = OpIAdd %5 %83 %86
%88 = OpAccessChain %39 %27 %37 %38
OpStore %88 %87
%89 = OpAccessChain %39 %31 %37 %38
%90 = OpLoad %5 %89
%92 = OpBitcast %91 %90
%94 = OpAccessChain %93 %23 %37 %38
%95 = OpLoad %15 %94
%98 = OpBitcast %97 %95
%99 = OpCompositeExtract %96 %98 0
%100 = OpCompositeExtract %96 %98 1
%101 = OpFMul %96 %99 %102
%103 = OpFMul %96 %100 %102
%104 = OpAccessChain %93 %19 %37 %38
%105 = OpLoad %15 %104
%106 = OpBitcast %97 %105
%107 = OpCompositeExtract %96 %106 0
%108 = OpCompositeExtract %96 %106 1
%109 = OpFMul %96 %107 %110
%111 = OpFMul %96 %108 %110
%112 = OpCompositeConstruct %97 %109 %111
%113 = OpCompositeConstruct %97 %101 %103
%114 = OpFDot2MixAcc32VALVE %91 %112 %113 %92
%115 = OpBitcast %5 %114
%116 = OpAccessChain %39 %31 %37 %38
OpStore %116 %115
%117 = OpAccessChain %39 %31 %37 %38
%118 = OpLoad %5 %117
%119 = OpBitcast %91 %118
%120 = OpAccessChain %93 %23 %37 %38
%121 = OpLoad %15 %120
%122 = OpBitcast %97 %121
%123 = OpCompositeExtract %96 %122 0
%124 = OpCompositeExtract %96 %122 1
%125 = OpFMul %96 %123 %126
%127 = OpFMul %96 %124 %126
%128 = OpAccessChain %93 %19 %37 %38
%129 = OpLoad %15 %128
%130 = OpBitcast %97 %129
%131 = OpCompositeExtract %96 %130 0
%132 = OpCompositeExtract %96 %130 1
%133 = OpFMul %96 %131 %134
%135 = OpFMul %96 %132 %134
%136 = OpCompositeConstruct %97 %133 %135
%137 = OpCompositeConstruct %97 %125 %127
%138 = OpFDot2MixAcc32VALVE %91 %136 %137 %119
%139 = OpBitcast %5 %138
%140 = OpAccessChain %39 %31 %37 %38
OpStore %140 %139
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/split-double.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    uvec2 _16 = unpackDouble2x32(double(A));
    SV_Target.x = _16.x;
    SV_Target.y = _16.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
%15 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %7 "A"
OpName %11 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeInt 32 0
%9 = OpTypeVector %8 2
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%13 = OpTypeFloat 64
%19 = OpTypePointer Output %8
%21 = OpConstant %8 0
%23 = OpConstant %8 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%12 = OpLoad %5 %7
%14 = OpFConvert %13 %12
%16 = OpExtInst %9 %15 UnpackDouble2x32 %14
%17 = OpCompositeExtract %8 %16 0
%18 = OpCompositeExtract %8 %16 1
%20 = OpAccessChain %19 %11 %21
OpStore %20 %17
%22 = OpAccessChain %19 %11 %23
OpStore %22 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/sqrt.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = sqrt(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Sqrt %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/tan.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = tan(A);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
OpCapability Shader
%11 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %9 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %13
%13 = OpLabel
%10 = OpLoad %5 %7
%12 = OpExtInst %5 %11 Tan %10
OpStore %9 %12
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-4offset.frag
================================================
#version 460

layout(set = 0, binding = 3) uniform texture2D _8;
layout(set = 0, binding = 1) uniform sampler _11;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) flat in ivec2 OFF;
layout(location = 1, component = 2) flat in ivec2 OFF_2;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _29 = uint(OFF_2.x);
    uint _33 = uint(OFF_2.y);
    uint _36 = uint(OFF.x);
    uint _39 = uint(OFF.y);
    SV_Target.x = textureGatherOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(_36), int(_39))).x;
    SV_Target.y = textureGatherOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(_36 + _29), int(_39 + _33))).y;
    SV_Target.z = textureGatherOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(_36 - _29), int(_39 - _33))).z;
    SV_Target.w = textureGatherOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(_29), int(_33))).w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 85
; Schema: 0
OpCapability Shader
OpCapability ImageGatherExtended
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %18 %19 %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "TEXCOORD"
OpName %18 "OFF"
OpName %19 "OFF_2"
OpName %21 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 Location 0
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %19 Flat
OpDecorate %19 Location 1
OpDecorate %19 Component 2
OpDecorate %21 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeSampler
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeVector %5 4
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypeInt 32 1
%16 = OpTypeVector %15 2
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpVariable %17 Input
%20 = OpTypePointer Output %12
%21 = OpVariable %20 Output
%24 = OpTypePointer Input %15
%26 = OpTypeInt 32 0
%27 = OpConstant %26 0
%31 = OpConstant %26 1
%40 = OpTypePointer Input %5
%49 = OpTypeSampledImage %6
%51 = OpTypeVector %5 2
%76 = OpTypePointer Output %5
%80 = OpConstant %26 2
%82 = OpConstant %26 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %83
%83 = OpLabel
%22 = OpLoad %6 %8
%23 = OpLoad %9 %11
%25 = OpAccessChain %24 %19 %27
%28 = OpLoad %15 %25
%29 = OpBitcast %26 %28
%30 = OpAccessChain %24 %19 %31
%32 = OpLoad %15 %30
%33 = OpBitcast %26 %32
%34 = OpAccessChain %24 %18 %27
%35 = OpLoad %15 %34
%36 = OpBitcast %26 %35
%37 = OpAccessChain %24 %18 %31
%38 = OpLoad %15 %37
%39 = OpBitcast %26 %38
%41 = OpAccessChain %40 %14 %27
%42 = OpLoad %5 %41
%43 = OpAccessChain %40 %14 %31
%44 = OpLoad %5 %43
%45 = OpISub %26 %36 %29
%46 = OpISub %26 %39 %33
%47 = OpIAdd %26 %36 %29
%48 = OpIAdd %26 %39 %33
%50 = OpSampledImage %49 %22 %23
%52 = OpCompositeConstruct %51 %42 %44
%53 = OpBitcast %15 %36
%54 = OpBitcast %15 %39
%56 = OpCompositeConstruct %16 %53 %54
%55 = OpImageGather %12 %50 %52 %27 Offset %56
%57 = OpCompositeExtract %5 %55 0
%58 = OpCompositeConstruct %51 %42 %44
%59 = OpBitcast %15 %47
%60 = OpBitcast %15 %48
%62 = OpCompositeConstruct %16 %59 %60
%61 = OpImageGather %12 %50 %58 %27 Offset %62
%63 = OpCompositeExtract %5 %61 1
%64 = OpCompositeConstruct %51 %42 %44
%65 = OpBitcast %15 %45
%66 = OpBitcast %15 %46
%68 = OpCompositeConstruct %16 %65 %66
%67 = OpImageGather %12 %50 %64 %27 Offset %68
%69 = OpCompositeExtract %5 %67 2
%70 = OpCompositeConstruct %51 %42 %44
%71 = OpBitcast %15 %29
%72 = OpBitcast %15 %33
%74 = OpCompositeConstruct %16 %71 %72
%73 = OpImageGather %12 %50 %70 %27 Offset %74
%75 = OpCompositeExtract %5 %73 3
%77 = OpAccessChain %76 %21 %27
OpStore %77 %57
%78 = OpAccessChain %76 %21 %31
OpStore %78 %63
%79 = OpAccessChain %76 %21 %80
OpStore %79 %69
%81 = OpAccessChain %76 %21 %82
OpStore %81 %75
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-cmp-offset-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _55
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 0, binding = 3) uniform texture2D _8;
layout(set = 0, binding = 4) uniform texture2DArray _11;
layout(set = 0, binding = 1) uniform samplerShadow _14;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec2 _41 = vec2(TEXCOORD.x, TEXCOORD.y);
    uint _105;
    vec4 _106;
    _105 = sparseTextureGatherOffsetARB(sampler2DShadow(_8, _14), _41, TEXCOORD.z, ivec2(-3, -4), _106);
    SparseTexel _46 = SparseTexel(_105, _106);
    vec4 _50 = _46._m1;
    _55 _56 = _55(_50.x, _50.y, _50.z, _50.w, _46._m0);
    vec3 _71 = vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z);
    uint _107;
    vec4 _108;
    _107 = sparseTextureGatherOffsetARB(sampler2DArrayShadow(_11, _14), _71, TEXCOORD.w, ivec2(-4, -5), _108);
    SparseTexel _73 = SparseTexel(_107, _108);
    vec4 _76 = _73._m1;
    _55 _81 = _55(_76.x, _76.y, _76.z, _76.w, _73._m0);
    float _89 = float(sparseTexelsResidentARB(int(_81._m4))) + float(sparseTexelsResidentARB(int(_56._m4)));
    SV_Target.x = (_81._m0 + _56._m0) + _89;
    SV_Target.y = (_81._m1 + _56._m1) + _89;
    SV_Target.z = (_81._m2 + _56._m2) + _89;
    SV_Target.w = (_81._m3 + _56._m3) + _89;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 105
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %17 "TEXCOORD"
OpName %19 "SV_Target"
OpName %45 "SparseTexel"
OpName %55 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %17 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeSampler
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeVector %5 4
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %15
%19 = OpVariable %18 Output
%23 = OpTypePointer Input %5
%25 = OpTypeInt 32 0
%26 = OpConstant %25 0
%29 = OpConstant %25 1
%32 = OpConstant %25 2
%35 = OpConstant %25 3
%37 = OpTypeImage %5 2D 1 0 0 1 Unknown
%38 = OpTypeSampledImage %37
%40 = OpTypeVector %5 2
%42 = OpTypeInt 32 1
%43 = OpConstant %42 -3
%44 = OpConstant %42 -4
%45 = OpTypeStruct %25 %15
%47 = OpTypeVector %42 2
%48 = OpConstantComposite %47 %43 %44
%55 = OpTypeStruct %5 %5 %5 %5 %25
%62 = OpTypeBool
%65 = OpConstant %5 1
%66 = OpConstant %5 0
%67 = OpTypeImage %5 2D 1 1 0 1 Unknown
%68 = OpTypeSampledImage %67
%70 = OpTypeVector %5 3
%72 = OpConstant %42 -5
%74 = OpConstantComposite %47 %44 %72
%98 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %103
%103 = OpLabel
%20 = OpLoad %9 %11
%21 = OpLoad %6 %8
%22 = OpLoad %12 %14
%24 = OpAccessChain %23 %17 %26
%27 = OpLoad %5 %24
%28 = OpAccessChain %23 %17 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %23 %17 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %23 %17 %35
%36 = OpLoad %5 %34
%39 = OpSampledImage %38 %21 %22
%41 = OpCompositeConstruct %40 %27 %30
%46 = OpImageSparseDrefGather %45 %39 %41 %33 ConstOffset %48
%49 = OpCompositeExtract %25 %46 0
%50 = OpCompositeExtract %15 %46 1
%51 = OpCompositeExtract %5 %50 0
%52 = OpCompositeExtract %5 %50 1
%53 = OpCompositeExtract %5 %50 2
%54 = OpCompositeExtract %5 %50 3
%56 = OpCompositeConstruct %55 %51 %52 %53 %54 %49
%57 = OpCompositeExtract %5 %56 0
%58 = OpCompositeExtract %5 %56 1
%59 = OpCompositeExtract %5 %56 2
%60 = OpCompositeExtract %5 %56 3
%61 = OpCompositeExtract %25 %56 4
%63 = OpImageSparseTexelsResident %62 %61
%64 = OpSelect %5 %63 %65 %66
%69 = OpSampledImage %68 %20 %22
%71 = OpCompositeConstruct %70 %27 %30 %33
%73 = OpImageSparseDrefGather %45 %69 %71 %36 ConstOffset %74
%75 = OpCompositeExtract %25 %73 0
%76 = OpCompositeExtract %15 %73 1
%77 = OpCompositeExtract %5 %76 0
%78 = OpCompositeExtract %5 %76 1
%79 = OpCompositeExtract %5 %76 2
%80 = OpCompositeExtract %5 %76 3
%81 = OpCompositeConstruct %55 %77 %78 %79 %80 %75
%82 = OpCompositeExtract %5 %81 0
%83 = OpCompositeExtract %5 %81 1
%84 = OpCompositeExtract %5 %81 2
%85 = OpCompositeExtract %5 %81 3
%86 = OpCompositeExtract %25 %81 4
%87 = OpImageSparseTexelsResident %62 %86
%88 = OpSelect %5 %87 %65 %66
%89 = OpFAdd %5 %88 %64
%90 = OpFAdd %5 %82 %57
%91 = OpFAdd %5 %90 %89
%92 = OpFAdd %5 %83 %58
%93 = OpFAdd %5 %92 %89
%94 = OpFAdd %5 %84 %59
%95 = OpFAdd %5 %94 %89
%96 = OpFAdd %5 %85 %60
%97 = OpFAdd %5 %96 %89
%99 = OpAccessChain %98 %19 %26
OpStore %99 %91
%100 = OpAccessChain %98 %19 %29
OpStore %100 %93
%101 = OpAccessChain %98 %19 %32
OpStore %101 %95
%102 = OpAccessChain %98 %19 %35
OpStore %102 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-cmp-offset.frag
================================================
#version 460

layout(set = 0, binding = 3) uniform texture2D _8;
layout(set = 0, binding = 4) uniform texture2DArray _11;
layout(set = 0, binding = 1) uniform samplerShadow _14;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec2 _41 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _45 = textureGatherOffset(sampler2DShadow(_8, _14), _41, TEXCOORD.z, ivec2(-3, -4));
    vec3 _56 = vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z);
    vec4 _58 = textureGatherOffset(sampler2DArrayShadow(_11, _14), _56, TEXCOORD.w, ivec2(-4, -5));
    SV_Target.x = _58.x + _45.x;
    SV_Target.y = _58.y + _45.y;
    SV_Target.z = _58.z + _45.z;
    SV_Target.w = _58.w + _45.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 75
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %17 "TEXCOORD"
OpName %19 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %17 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeSampler
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeVector %5 4
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %15
%19 = OpVariable %18 Output
%23 = OpTypePointer Input %5
%25 = OpTypeInt 32 0
%26 = OpConstant %25 0
%29 = OpConstant %25 1
%32 = OpConstant %25 2
%35 = OpConstant %25 3
%37 = OpTypeImage %5 2D 1 0 0 1 Unknown
%38 = OpTypeSampledImage %37
%40 = OpTypeVector %5 2
%42 = OpTypeInt 32 1
%43 = OpConstant %42 -3
%44 = OpConstant %42 -4
%46 = OpTypeVector %42 2
%47 = OpConstantComposite %46 %43 %44
%52 = OpTypeImage %5 2D 1 1 0 1 Unknown
%53 = OpTypeSampledImage %52
%55 = OpTypeVector %5 3
%57 = OpConstant %42 -5
%59 = OpConstantComposite %46 %44 %57
%68 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%20 = OpLoad %9 %11
%21 = OpLoad %6 %8
%22 = OpLoad %12 %14
%24 = OpAccessChain %23 %17 %26
%27 = OpLoad %5 %24
%28 = OpAccessChain %23 %17 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %23 %17 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %23 %17 %35
%36 = OpLoad %5 %34
%39 = OpSampledImage %38 %21 %22
%41 = OpCompositeConstruct %40 %27 %30
%45 = OpImageDrefGather %15 %39 %41 %33 ConstOffset %47
%48 = OpCompositeExtract %5 %45 0
%49 = OpCompositeExtract %5 %45 1
%50 = OpCompositeExtract %5 %45 2
%51 = OpCompositeExtract %5 %45 3
%54 = OpSampledImage %53 %20 %22
%56 = OpCompositeConstruct %55 %27 %30 %33
%58 = OpImageDrefGather %15 %54 %56 %36 ConstOffset %59
%60 = OpCompositeExtract %5 %58 0
%61 = OpCompositeExtract %5 %58 1
%62 = OpCompositeExtract %5 %58 2
%63 = OpCompositeExtract %5 %58 3
%64 = OpFAdd %5 %60 %48
%65 = OpFAdd %5 %61 %49
%66 = OpFAdd %5 %62 %50
%67 = OpFAdd %5 %63 %51
%69 = OpAccessChain %68 %19 %26
OpStore %69 %64
%70 = OpAccessChain %68 %19 %29
OpStore %70 %65
%71 = OpAccessChain %68 %19 %32
OpStore %71 %66
%72 = OpAccessChain %68 %19 %35
OpStore %72 %67
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-cmp.frag
================================================
#version 460

layout(set = 0, binding = 3) uniform texture2D _8;
layout(set = 0, binding = 4) uniform texture2DArray _11;
layout(set = 0, binding = 6) uniform textureCube _14;
layout(set = 0, binding = 7) uniform textureCubeArray _17;
layout(set = 0, binding = 1) uniform samplerShadow _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) flat in ivec2 OFF;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec2 _60 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _61 = textureGather(sampler2DShadow(_8, _20), _60, TEXCOORD.z);
    vec3 _70 = vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z);
    vec4 _71 = textureGather(sampler2DArrayShadow(_11, _20), _70, TEXCOORD.w);
    vec3 _83 = vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z);
    vec4 _84 = textureGather(samplerCubeShadow(_14, _20), _83, TEXCOORD.w);
    vec4 _96 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w);
    vec4 _97 = textureGather(samplerCubeArrayShadow(_17, _20), _96, TEXCOORD.w);
    vec2 _106 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _109 = textureGatherOffset(sampler2DShadow(_8, _20), _106, TEXCOORD.z, ivec2(int(uint(OFF.x)), int(uint(OFF.y))));
    SV_Target.x = (((_71.x + _61.x) + _84.x) + _97.x) + _109.x;
    SV_Target.y = (((_71.y + _61.y) + _84.y) + _97.y) + _109.y;
    SV_Target.z = (((_71.z + _61.z) + _84.z) + _97.z) + _109.z;
    SV_Target.w = (((_71.w + _61.w) + _84.w) + _97.w) + _109.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 126
; Schema: 0
OpCapability Shader
OpCapability ImageGatherExtended
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %27 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %27 "OFF"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 6
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 7
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %23 Location 0
OpDecorate %27 Flat
OpDecorate %27 Location 1
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeInt 32 1
%25 = OpTypeVector %24 2
%26 = OpTypePointer Input %25
%27 = OpVariable %26 Input
%28 = OpTypePointer Output %21
%29 = OpVariable %28 Output
%35 = OpTypePointer Input %24
%37 = OpTypeInt 32 0
%38 = OpConstant %37 0
%42 = OpConstant %37 1
%45 = OpTypePointer Input %5
%51 = OpConstant %37 2
%54 = OpConstant %37 3
%56 = OpTypeImage %5 2D 1 0 0 1 Unknown
%57 = OpTypeSampledImage %56
%59 = OpTypeVector %5 2
%66 = OpTypeImage %5 2D 1 1 0 1 Unknown
%67 = OpTypeSampledImage %66
%69 = OpTypeVector %5 3
%80 = OpTypeImage %5 Cube 1 0 0 1 Unknown
%81 = OpTypeSampledImage %80
%93 = OpTypeImage %5 Cube 1 1 0 1 Unknown
%94 = OpTypeSampledImage %93
%119 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %124
%124 = OpLabel
%30 = OpLoad %15 %17
%31 = OpLoad %12 %14
%32 = OpLoad %9 %11
%33 = OpLoad %6 %8
%34 = OpLoad %18 %20
%36 = OpAccessChain %35 %27 %38
%39 = OpLoad %24 %36
%40 = OpBitcast %37 %39
%41 = OpAccessChain %35 %27 %42
%43 = OpLoad %24 %41
%44 = OpBitcast %37 %43
%46 = OpAccessChain %45 %23 %38
%47 = OpLoad %5 %46
%48 = OpAccessChain %45 %23 %42
%49 = OpLoad %5 %48
%50 = OpAccessChain %45 %23 %51
%52 = OpLoad %5 %50
%53 = OpAccessChain %45 %23 %54
%55 = OpLoad %5 %53
%58 = OpSampledImage %57 %33 %34
%60 = OpCompositeConstruct %59 %47 %49
%61 = OpImageDrefGather %21 %58 %60 %52
%62 = OpCompositeExtract %5 %61 0
%63 = OpCompositeExtract %5 %61 1
%64 = OpCompositeExtract %5 %61 2
%65 = OpCompositeExtract %5 %61 3
%68 = OpSampledImage %67 %32 %34
%70 = OpCompositeConstruct %69 %47 %49 %52
%71 = OpImageDrefGather %21 %68 %70 %55
%72 = OpCompositeExtract %5 %71 0
%73 = OpCompositeExtract %5 %71 1
%74 = OpCompositeExtract %5 %71 2
%75 = OpCompositeExtract %5 %71 3
%76 = OpFAdd %5 %72 %62
%77 = OpFAdd %5 %73 %63
%78 = OpFAdd %5 %74 %64
%79 = OpFAdd %5 %75 %65
%82 = OpSampledImage %81 %31 %34
%83 = OpCompositeConstruct %69 %47 %49 %52
%84 = OpImageDrefGather %21 %82 %83 %55
%85 = OpCompositeExtract %5 %84 0
%86 = OpCompositeExtract %5 %84 1
%87 = OpCompositeExtract %5 %84 2
%88 = OpCompositeExtract %5 %84 3
%89 = OpFAdd %5 %76 %85
%90 = OpFAdd %5 %77 %86
%91 = OpFAdd %5 %78 %87
%92 = OpFAdd %5 %79 %88
%95 = OpSampledImage %94 %30 %34
%96 = OpCompositeConstruct %21 %47 %49 %52 %55
%97 = OpImageDrefGather %21 %95 %96 %55
%98 = OpCompositeExtract %5 %97 0
%99 = OpCompositeExtract %5 %97 1
%100 = OpCompositeExtract %5 %97 2
%101 = OpCompositeExtract %5 %97 3
%102 = OpFAdd %5 %89 %98
%103 = OpFAdd %5 %90 %99
%104 = OpFAdd %5 %91 %100
%105 = OpFAdd %5 %92 %101
%106 = OpCompositeConstruct %59 %47 %49
%107 = OpBitcast %24 %40
%108 = OpBitcast %24 %44
%110 = OpCompositeConstruct %25 %107 %108
%109 = OpImageDrefGather %21 %58 %106 %52 Offset %110
%111 = OpCompositeExtract %5 %109 0
%112 = OpCompositeExtract %5 %109 1
%113 = OpCompositeExtract %5 %109 2
%114 = OpCompositeExtract %5 %109 3
%115 = OpFAdd %5 %102 %111
%116 = OpFAdd %5 %103 %112
%117 = OpFAdd %5 %104 %113
%118 = OpFAdd %5 %105 %114
%120 = OpAccessChain %119 %29 %38
OpStore %120 %115
%121 = OpAccessChain %119 %29 %42
OpStore %121 %116
%122 = OpAccessChain %119 %29 %51
OpStore %122 %117
%123 = OpAccessChain %119 %29 %54
OpStore %123 %118
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-offset.frag
================================================
#version 460

layout(set = 0, binding = 3) uniform texture2D _8;
layout(set = 0, binding = 4) uniform texture2DArray _11;
layout(set = 0, binding = 1) uniform sampler _14;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _41 = textureGatherOffset(sampler2D(_8, _14), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(1, 2));
    vec4 _54 = textureGatherOffset(sampler2DArray(_11, _14), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec2(-2, -3), int(1u));
    SV_Target.x = _54.x + _41.x;
    SV_Target.y = _54.y + _41.y;
    SV_Target.z = _54.z + _41.z;
    SV_Target.w = _54.w + _41.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %17 "TEXCOORD"
OpName %19 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %17 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeSampler
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeVector %5 4
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %15
%19 = OpVariable %18 Output
%23 = OpTypePointer Input %5
%25 = OpTypeInt 32 0
%26 = OpConstant %25 0
%29 = OpConstant %25 1
%32 = OpConstant %25 2
%34 = OpTypeSampledImage %6
%36 = OpTypeVector %5 2
%38 = OpTypeInt 32 1
%39 = OpConstant %38 1
%40 = OpConstant %38 2
%42 = OpTypeVector %38 2
%43 = OpConstantComposite %42 %39 %40
%48 = OpTypeSampledImage %9
%50 = OpTypeVector %5 3
%52 = OpConstant %38 -2
%53 = OpConstant %38 -3
%55 = OpConstantComposite %42 %52 %53
%64 = OpTypePointer Output %5
%69 = OpConstant %25 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
%20 = OpLoad %9 %11
%21 = OpLoad %6 %8
%22 = OpLoad %12 %14
%24 = OpAccessChain %23 %17 %26
%27 = OpLoad %5 %24
%28 = OpAccessChain %23 %17 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %23 %17 %32
%33 = OpLoad %5 %31
%35 = OpSampledImage %34 %21 %22
%37 = OpCompositeConstruct %36 %27 %30
%41 = OpImageGather %15 %35 %37 %26 ConstOffset %43
%44 = OpCompositeExtract %5 %41 0
%45 = OpCompositeExtract %5 %41 1
%46 = OpCompositeExtract %5 %41 2
%47 = OpCompositeExtract %5 %41 3
%49 = OpSampledImage %48 %20 %22
%51 = OpCompositeConstruct %50 %27 %30 %33
%54 = OpImageGather %15 %49 %51 %29 ConstOffset %55
%56 = OpCompositeExtract %5 %54 0
%57 = OpCompositeExtract %5 %54 1
%58 = OpCompositeExtract %5 %54 2
%59 = OpCompositeExtract %5 %54 3
%60 = OpFAdd %5 %56 %44
%61 = OpFAdd %5 %57 %45
%62 = OpFAdd %5 %58 %46
%63 = OpFAdd %5 %59 %47
%65 = OpAccessChain %64 %19 %26
OpStore %65 %60
%66 = OpAccessChain %64 %19 %29
OpStore %66 %61
%67 = OpAccessChain %64 %19 %32
OpStore %67 %62
%68 = OpAccessChain %64 %19 %69
OpStore %68 %63
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-signed-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    ivec4 _m1;
};

struct _65
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
};

layout(set = 0, binding = 3) uniform itexture2D _8;
layout(set = 0, binding = 4) uniform itexture2DArray _11;
layout(set = 0, binding = 6) uniform itextureCube _14;
layout(set = 0, binding = 7) uniform itextureCubeArray _17;
layout(set = 0, binding = 1) uniform sampler _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out ivec4 SV_Target;

void main()
{
    uint _166;
    ivec4 _167;
    _166 = sparseTextureGatherOffsetARB(isampler2D(_8, _20), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(0, 1), _167);
    SparseTexel _54 = SparseTexel(_166, _167);
    uvec4 _60 = uvec4(_54._m1);
    _65 _66 = _65(_60.x, _60.y, _60.z, _60.w, _54._m0);
    uint _168;
    ivec4 _169;
    _168 = sparseTextureGatherOffsetARB(isampler2DArray(_11, _20), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec2(0, 1), _169, int(1u));
    SparseTexel _79 = SparseTexel(_168, _169);
    uvec4 _82 = uvec4(_79._m1);
    _65 _87 = _65(_82.x, _82.y, _82.z, _82.w, _79._m0);
    uint _170;
    ivec4 _171;
    _170 = sparseTextureGatherARB(isamplerCube(_14, _20), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), _171, int(2u));
    SparseTexel _98 = SparseTexel(_170, _171);
    uvec4 _101 = uvec4(_98._m1);
    _65 _106 = _65(_101.x, _101.y, _101.z, _101.w, _98._m0);
    uint _172;
    ivec4 _173;
    _172 = sparseTextureGatherARB(isamplerCubeArray(_17, _20), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), _173, int(3u));
    SparseTexel _117 = SparseTexel(_172, _173);
    uvec4 _120 = uvec4(_117._m1);
    _65 _125 = _65(_120.x, _120.y, _120.z, _120.w, _117._m0);
    uint _132 = uint(sparseTexelsResidentARB(int(_125._m4)));
    uint _134 = uint(sparseTexelsResidentARB(int(_106._m4))) + (uint(sparseTexelsResidentARB(int(_87._m4))) + uint(sparseTexelsResidentARB(int(_66._m4))));
    SV_Target.x = int(((((_134 + _66._m0) + _87._m0) + _106._m0) + _125._m0) + _132);
    SV_Target.y = int(((((_134 + _66._m1) + _87._m1) + _106._m1) + _125._m1) + _132);
    SV_Target.z = int(((((_134 + _66._m2) + _87._m2) + _106._m2) + _125._m2) + _132);
    SV_Target.w = int(((((_134 + _66._m3) + _87._m3) + _106._m3) + _125._m3) + _132);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 166
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %24 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %24 "TEXCOORD"
OpName %27 "SV_Target"
OpName %53 "SparseTexel"
OpName %65 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 6
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 7
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %24 Location 0
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypeVector %5 4
%26 = OpTypePointer Output %25
%27 = OpVariable %26 Output
%33 = OpTypePointer Input %21
%35 = OpTypeInt 32 0
%36 = OpConstant %35 0
%39 = OpConstant %35 1
%42 = OpConstant %35 2
%45 = OpConstant %35 3
%47 = OpTypeSampledImage %6
%49 = OpTypeVector %21 2
%51 = OpConstant %5 0
%52 = OpConstant %5 1
%53 = OpTypeStruct %35 %25
%55 = OpTypeVector %5 2
%56 = OpConstantComposite %55 %51 %52
%59 = OpTypeVector %35 4
%65 = OpTypeStruct %35 %35 %35 %35 %35
%72 = OpTypeBool
%75 = OpTypeSampledImage %9
%77 = OpTypeVector %21 3
%95 = OpTypeSampledImage %12
%114 = OpTypeSampledImage %15
%155 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %164
%164 = OpLabel
%28 = OpLoad %15 %17
%29 = OpLoad %12 %14
%30 = OpLoad %9 %11
%31 = OpLoad %6 %8
%32 = OpLoad %18 %20
%34 = OpAccessChain %33 %24 %36
%37 = OpLoad %21 %34
%38 = OpAccessChain %33 %24 %39
%40 = OpLoad %21 %38
%41 = OpAccessChain %33 %24 %42
%43 = OpLoad %21 %41
%44 = OpAccessChain %33 %24 %45
%46 = OpLoad %21 %44
%48 = OpSampledImage %47 %31 %32
%50 = OpCompositeConstruct %49 %37 %40
%54 = OpImageSparseGather %53 %48 %50 %36 ConstOffset %56
%57 = OpCompositeExtract %35 %54 0
%58 = OpCompositeExtract %25 %54 1
%60 = OpBitcast %59 %58
%61 = OpCompositeExtract %35 %60 0
%62 = OpCompositeExtract %35 %60 1
%63 = OpCompositeExtract %35 %60 2
%64 = OpCompositeExtract %35 %60 3
%66 = OpCompositeConstruct %65 %61 %62 %63 %64 %57
%67 = OpCompositeExtract %35 %66 0
%68 = OpCompositeExtract %35 %66 1
%69 = OpCompositeExtract %35 %66 2
%70 = OpCompositeExtract %35 %66 3
%71 = OpCompositeExtract %35 %66 4
%73 = OpImageSparseTexelsResident %72 %71
%74 = OpSelect %35 %73 %39 %36
%76 = OpSampledImage %75 %30 %32
%78 = OpCompositeConstruct %77 %37 %40 %43
%79 = OpImageSparseGather %53 %76 %78 %39 ConstOffset %56
%80 = OpCompositeExtract %35 %79 0
%81 = OpCompositeExtract %25 %79 1
%82 = OpBitcast %59 %81
%83 = OpCompositeExtract %35 %82 0
%84 = OpCompositeExtract %35 %82 1
%85 = OpCompositeExtract %35 %82 2
%86 = OpCompositeExtract %35 %82 3
%87 = OpCompositeConstruct %65 %83 %84 %85 %86 %80
%88 = OpCompositeExtract %35 %87 0
%89 = OpCompositeExtract %35 %87 1
%90 = OpCompositeExtract %35 %87 2
%91 = OpCompositeExtract %35 %87 3
%92 = OpCompositeExtract %35 %87 4
%93 = OpImageSparseTexelsResident %72 %92
%94 = OpSelect %35 %93 %39 %36
%96 = OpSampledImage %95 %29 %32
%97 = OpCompositeConstruct %77 %37 %40 %43
%98 = OpImageSparseGather %53 %96 %97 %42
%99 = OpCompositeExtract %35 %98 0
%100 = OpCompositeExtract %25 %98 1
%101 = OpBitcast %59 %100
%102 = OpCompositeExtract %35 %101 0
%103 = OpCompositeExtract %35 %101 1
%104 = OpCompositeExtract %35 %101 2
%105 = OpCompositeExtract %35 %101 3
%106 = OpCompositeConstruct %65 %102 %103 %104 %105 %99
%107 = OpCompositeExtract %35 %106 0
%108 = OpCompositeExtract %35 %106 1
%109 = OpCompositeExtract %35 %106 2
%110 = OpCompositeExtract %35 %106 3
%111 = OpCompositeExtract %35 %106 4
%112 = OpImageSparseTexelsResident %72 %111
%113 = OpSelect %35 %112 %39 %36
%115 = OpSampledImage %114 %28 %32
%116 = OpCompositeConstruct %22 %37 %40 %43 %46
%117 = OpImageSparseGather %53 %115 %116 %45
%118 = OpCompositeExtract %35 %117 0
%119 = OpCompositeExtract %25 %117 1
%120 = OpBitcast %59 %119
%121 = OpCompositeExtract %35 %120 0
%122 = OpCompositeExtract %35 %120 1
%123 = OpCompositeExtract %35 %120 2
%124 = OpCompositeExtract %35 %120 3
%125 = OpCompositeConstruct %65 %121 %122 %123 %124 %118
%126 = OpCompositeExtract %35 %125 0
%127 = OpCompositeExtract %35 %125 1
%128 = OpCompositeExtract %35 %125 2
%129 = OpCompositeExtract %35 %125 3
%130 = OpCompositeExtract %35 %125 4
%131 = OpImageSparseTexelsResident %72 %130
%132 = OpSelect %35 %131 %39 %36
%133 = OpIAdd %35 %94 %74
%134 = OpIAdd %35 %113 %133
%135 = OpIAdd %35 %134 %67
%136 = OpIAdd %35 %135 %88
%137 = OpIAdd %35 %136 %107
%138 = OpIAdd %35 %137 %126
%139 = OpIAdd %35 %138 %132
%140 = OpIAdd %35 %134 %68
%141 = OpIAdd %35 %140 %89
%142 = OpIAdd %35 %141 %108
%143 = OpIAdd %35 %142 %127
%144 = OpIAdd %35 %143 %132
%145 = OpIAdd %35 %134 %69
%146 = OpIAdd %35 %145 %90
%147 = OpIAdd %35 %146 %109
%148 = OpIAdd %35 %147 %128
%149 = OpIAdd %35 %148 %132
%150 = OpIAdd %35 %134 %70
%151 = OpIAdd %35 %150 %91
%152 = OpIAdd %35 %151 %110
%153 = OpIAdd %35 %152 %129
%154 = OpIAdd %35 %153 %132
%156 = OpAccessChain %155 %27 %36
%157 = OpBitcast %5 %139
OpStore %156 %157
%158 = OpAccessChain %155 %27 %39
%159 = OpBitcast %5 %144
OpStore %158 %159
%160 = OpAccessChain %155 %27 %42
%161 = OpBitcast %5 %149
OpStore %160 %161
%162 = OpAccessChain %155 %27 %45
%163 = OpBitcast %5 %154
OpStore %162 %163
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather-signed.frag
================================================
#version 460

layout(set = 0, binding = 3) uniform itexture2D _8;
layout(set = 0, binding = 4) uniform itexture2DArray _11;
layout(set = 0, binding = 6) uniform itextureCube _14;
layout(set = 0, binding = 7) uniform itextureCubeArray _17;
layout(set = 0, binding = 1) uniform sampler _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out ivec4 SV_Target;

void main()
{
    uvec4 _53 = uvec4(textureGather(isampler2D(_8, _20), vec2(TEXCOORD.x, TEXCOORD.y)));
    uvec4 _63 = uvec4(textureGather(isampler2DArray(_11, _20), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(1u)));
    uvec4 _76 = uvec4(textureGather(isamplerCube(_14, _20), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(2u)));
    uvec4 _89 = uvec4(textureGather(isamplerCubeArray(_17, _20), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), int(3u)));
    SV_Target.x = int(((_63.x + _53.x) + _76.x) + _89.x);
    SV_Target.y = int(((_63.y + _53.y) + _76.y) + _89.y);
    SV_Target.z = int(((_63.z + _53.z) + _76.z) + _89.z);
    SV_Target.w = int(((_63.w + _53.w) + _76.w) + _89.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 109
; Schema: 0
OpCapability Shader
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %24 %27
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %24 "TEXCOORD"
OpName %27 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 6
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 7
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %24 Location 0
OpDecorate %27 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypeVector %5 4
%26 = OpTypePointer Output %25
%27 = OpVariable %26 Output
%33 = OpTypePointer Input %21
%35 = OpTypeInt 32 0
%36 = OpConstant %35 0
%39 = OpConstant %35 1
%42 = OpConstant %35 2
%45 = OpConstant %35 3
%47 = OpTypeSampledImage %6
%49 = OpTypeVector %21 2
%52 = OpTypeVector %35 4
%58 = OpTypeSampledImage %9
%60 = OpTypeVector %21 3
%72 = OpTypeSampledImage %12
%85 = OpTypeSampledImage %15
%98 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %107
%107 = OpLabel
%28 = OpLoad %15 %17
%29 = OpLoad %12 %14
%30 = OpLoad %9 %11
%31 = OpLoad %6 %8
%32 = OpLoad %18 %20
%34 = OpAccessChain %33 %24 %36
%37 = OpLoad %21 %34
%38 = OpAccessChain %33 %24 %39
%40 = OpLoad %21 %38
%41 = OpAccessChain %33 %24 %42
%43 = OpLoad %21 %41
%44 = OpAccessChain %33 %24 %45
%46 = OpLoad %21 %44
%48 = OpSampledImage %47 %31 %32
%50 = OpCompositeConstruct %49 %37 %40
%51 = OpImageGather %25 %48 %50 %36
%53 = OpBitcast %52 %51
%54 = OpCompositeExtract %35 %53 0
%55 = OpCompositeExtract %35 %53 1
%56 = OpCompositeExtract %35 %53 2
%57 = OpCompositeExtract %35 %53 3
%59 = OpSampledImage %58 %30 %32
%61 = OpCompositeConstruct %60 %37 %40 %43
%62 = OpImageGather %25 %59 %61 %39
%63 = OpBitcast %52 %62
%64 = OpCompositeExtract %35 %63 0
%65 = OpCompositeExtract %35 %63 1
%66 = OpCompositeExtract %35 %63 2
%67 = OpCompositeExtract %35 %63 3
%68 = OpIAdd %35 %64 %54
%69 = OpIAdd %35 %65 %55
%70 = OpIAdd %35 %66 %56
%71 = OpIAdd %35 %67 %57
%73 = OpSampledImage %72 %29 %32
%74 = OpCompositeConstruct %60 %37 %40 %43
%75 = OpImageGather %25 %73 %74 %42
%76 = OpBitcast %52 %75
%77 = OpCompositeExtract %35 %76 0
%78 = OpCompositeExtract %35 %76 1
%79 = OpCompositeExtract %35 %76 2
%80 = OpCompositeExtract %35 %76 3
%81 = OpIAdd %35 %68 %77
%82 = OpIAdd %35 %69 %78
%83 = OpIAdd %35 %70 %79
%84 = OpIAdd %35 %71 %80
%86 = OpSampledImage %85 %28 %32
%87 = OpCompositeConstruct %22 %37 %40 %43 %46
%88 = OpImageGather %25 %86 %87 %45
%89 = OpBitcast %52 %88
%90 = OpCompositeExtract %35 %89 0
%91 = OpCompositeExtract %35 %89 1
%92 = OpCompositeExtract %35 %89 2
%93 = OpCompositeExtract %35 %89 3
%94 = OpIAdd %35 %81 %90
%95 = OpIAdd %35 %82 %91
%96 = OpIAdd %35 %83 %92
%97 = OpIAdd %35 %84 %93
%99 = OpAccessChain %98 %27 %36
%100 = OpBitcast %5 %94
OpStore %99 %100
%101 = OpAccessChain %98 %27 %39
%102 = OpBitcast %5 %95
OpStore %101 %102
%103 = OpAccessChain %98 %27 %42
%104 = OpBitcast %5 %96
OpStore %103 %104
%105 = OpAccessChain %98 %27 %45
%106 = OpBitcast %5 %97
OpStore %105 %106
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-gather.frag
================================================
#version 460

layout(set = 0, binding = 3) uniform texture2D _8;
layout(set = 0, binding = 4) uniform texture2DArray _11;
layout(set = 0, binding = 6) uniform textureCube _14;
layout(set = 0, binding = 7) uniform textureCubeArray _17;
layout(set = 0, binding = 1) uniform sampler _20;

layout(location = 0) in vec4 TEXCOORD;
layout(location = 1) flat in ivec2 OFF;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _60 = textureGather(sampler2D(_8, _20), vec2(TEXCOORD.x, TEXCOORD.y));
    vec4 _69 = textureGather(sampler2DArray(_11, _20), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(1u));
    vec4 _81 = textureGather(samplerCube(_14, _20), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(2u));
    vec4 _93 = textureGather(samplerCubeArray(_17, _20), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, TEXCOORD.w), int(3u));
    vec4 _105 = textureGatherOffset(sampler2D(_8, _20), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(uint(OFF.x)), int(uint(OFF.y))));
    SV_Target.x = (((_69.x + _60.x) + _81.x) + _93.x) + _105.x;
    SV_Target.y = (((_69.y + _60.y) + _81.y) + _93.y) + _105.y;
    SV_Target.z = (((_69.z + _60.z) + _81.z) + _93.z) + _105.z;
    SV_Target.w = (((_69.w + _60.w) + _81.w) + _93.w) + _105.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 122
; Schema: 0
OpCapability Shader
OpCapability ImageGatherExtended
OpCapability SampledCubeArray
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23 %27 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpName %27 "OFF"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 3
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 4
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 6
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 7
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %23 Location 0
OpDecorate %27 Flat
OpDecorate %27 Location 1
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 Cube 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 Cube 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeSampler
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 4
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeInt 32 1
%25 = OpTypeVector %24 2
%26 = OpTypePointer Input %25
%27 = OpVariable %26 Input
%28 = OpTypePointer Output %21
%29 = OpVariable %28 Output
%35 = OpTypePointer Input %24
%37 = OpTypeInt 32 0
%38 = OpConstant %37 0
%42 = OpConstant %37 1
%45 = OpTypePointer Input %5
%51 = OpConstant %37 2
%54 = OpConstant %37 3
%56 = OpTypeSampledImage %6
%58 = OpTypeVector %5 2
%65 = OpTypeSampledImage %9
%67 = OpTypeVector %5 3
%78 = OpTypeSampledImage %12
%90 = OpTypeSampledImage %15
%115 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %120
%120 = OpLabel
%30 = OpLoad %15 %17
%31 = OpLoad %12 %14
%32 = OpLoad %9 %11
%33 = OpLoad %6 %8
%34 = OpLoad %18 %20
%36 = OpAccessChain %35 %27 %38
%39 = OpLoad %24 %36
%40 = OpBitcast %37 %39
%41 = OpAccessChain %35 %27 %42
%43 = OpLoad %24 %41
%44 = OpBitcast %37 %43
%46 = OpAccessChain %45 %23 %38
%47 = OpLoad %5 %46
%48 = OpAccessChain %45 %23 %42
%49 = OpLoad %5 %48
%50 = OpAccessChain %45 %23 %51
%52 = OpLoad %5 %50
%53 = OpAccessChain %45 %23 %54
%55 = OpLoad %5 %53
%57 = OpSampledImage %56 %33 %34
%59 = OpCompositeConstruct %58 %47 %49
%60 = OpImageGather %21 %57 %59 %38
%61 = OpCompositeExtract %5 %60 0
%62 = OpCompositeExtract %5 %60 1
%63 = OpCompositeExtract %5 %60 2
%64 = OpCompositeExtract %5 %60 3
%66 = OpSampledImage %65 %32 %34
%68 = OpCompositeConstruct %67 %47 %49 %52
%69 = OpImageGather %21 %66 %68 %42
%70 = OpCompositeExtract %5 %69 0
%71 = OpCompositeExtract %5 %69 1
%72 = OpCompositeExtract %5 %69 2
%73 = OpCompositeExtract %5 %69 3
%74 = OpFAdd %5 %70 %61
%75 = OpFAdd %5 %71 %62
%76 = OpFAdd %5 %72 %63
%77 = OpFAdd %5 %73 %64
%79 = OpSampledImage %78 %31 %34
%80 = OpCompositeConstruct %67 %47 %49 %52
%81 = OpImageGather %21 %79 %80 %51
%82 = OpCompositeExtract %5 %81 0
%83 = OpCompositeExtract %5 %81 1
%84 = OpCompositeExtract %5 %81 2
%85 = OpCompositeExtract %5 %81 3
%86 = OpFAdd %5 %74 %82
%87 = OpFAdd %5 %75 %83
%88 = OpFAdd %5 %76 %84
%89 = OpFAdd %5 %77 %85
%91 = OpSampledImage %90 %30 %34
%92 = OpCompositeConstruct %21 %47 %49 %52 %55
%93 = OpImageGather %21 %91 %92 %54
%94 = OpCompositeExtract %5 %93 0
%95 = OpCompositeExtract %5 %93 1
%96 = OpCompositeExtract %5 %93 2
%97 = OpCompositeExtract %5 %93 3
%98 = OpFAdd %5 %86 %94
%99 = OpFAdd %5 %87 %95
%100 = OpFAdd %5 %88 %96
%101 = OpFAdd %5 %89 %97
%102 = OpCompositeConstruct %58 %47 %49
%103 = OpBitcast %24 %40
%104 = OpBitcast %24 %44
%106 = OpCompositeConstruct %25 %103 %104
%105 = OpImageGather %21 %57 %102 %38 Offset %106
%107 = OpCompositeExtract %5 %105 0
%108 = OpCompositeExtract %5 %105 1
%109 = OpCompositeExtract %5 %105 2
%110 = OpCompositeExtract %5 %105 3
%111 = OpFAdd %5 %98 %107
%112 = OpFAdd %5 %99 %108
%113 = OpFAdd %5 %100 %109
%114 = OpFAdd %5 %101 %110
%116 = OpAccessChain %115 %29 %38
OpStore %116 %111
%117 = OpAccessChain %115 %29 %42
OpStore %117 %112
%118 = OpAccessChain %115 %29 %51
OpStore %118 %113
%119 = OpAccessChain %115 %29 %54
OpStore %119 %114
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-load-feedback.frag
================================================
#version 460
#extension GL_ARB_sparse_texture2 : require
#extension GL_EXT_samplerless_texture_functions : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _83
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 0, binding = 1) uniform texture1D _8;
layout(set = 0, binding = 2) uniform texture1DArray _11;
layout(set = 0, binding = 3) uniform texture2D _14;
layout(set = 0, binding = 4) uniform texture2DArray _17;
layout(set = 0, binding = 5) uniform texture3D _20;
layout(set = 0, binding = 6) uniform texture2DMS _23;
layout(set = 0, binding = 7) uniform texture2DMSArray _26;
layout(set = 0, binding = 1, r32f) uniform readonly image1D _29;
layout(set = 0, binding = 2, r32f) uniform readonly image1DArray _32;
layout(set = 0, binding = 3, r32f) uniform readonly image2D _35;
layout(set = 0, binding = 4, r32f) uniform readonly image2DArray _38;
layout(set = 0, binding = 5, r32f) uniform readonly image3D _41;

layout(location = 0) flat in uvec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uint _299;
    vec4 _300;
    _299 = sparseTexelFetchARB(_8, int(TEXCOORD.x), int(TEXCOORD.y), _300);
    SparseTexel _76 = SparseTexel(_299, _300);
    vec4 _78 = _76._m1;
    _83 _84 = _83(_78.x, _78.y, _78.z, _78.w, _76._m0);
    float _90 = float(sparseTexelsResidentARB(int(_84._m4)));
    uint _301;
    vec4 _302;
    _301 = sparseTexelFetchARB(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), _302);
    SparseTexel _95 = SparseTexel(_301, _302);
    vec4 _99 = _95._m1;
    _83 _104 = _83(_99.x, _99.y, _99.z, _99.w, _95._m0);
    float _111 = float(sparseTexelsResidentARB(int(_104._m4)));
    uint _303;
    vec4 _304;
    _303 = sparseTexelFetchARB(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), _304);
    SparseTexel _114 = SparseTexel(_303, _304);
    vec4 _117 = _114._m1;
    _83 _122 = _83(_117.x, _117.y, _117.z, _117.w, _114._m0);
    float _129 = float(sparseTexelsResidentARB(int(_122._m4)));
    uint _305;
    vec4 _306;
    _305 = sparseTexelFetchARB(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), _306);
    SparseTexel _132 = SparseTexel(_305, _306);
    vec4 _136 = _132._m1;
    _83 _141 = _83(_136.x, _136.y, _136.z, _136.w, _132._m0);
    float _148 = float(sparseTexelsResidentARB(int(_141._m4)));
    uint _307;
    vec4 _308;
    _307 = sparseTexelFetchARB(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), _308);
    SparseTexel _151 = SparseTexel(_307, _308);
    vec4 _154 = _151._m1;
    _83 _159 = _83(_154.x, _154.y, _154.z, _154.w, _151._m0);
    float _166 = float(sparseTexelsResidentARB(int(_159._m4)));
    uint _309;
    vec4 _310;
    _309 = sparseTexelFetchARB(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), _310);
    SparseTexel _169 = SparseTexel(_309, _310);
    vec4 _172 = _169._m1;
    _83 _177 = _83(_172.x, _172.y, _172.z, _172.w, _169._m0);
    float _184 = float(sparseTexelsResidentARB(int(_177._m4)));
    uint _311;
    vec4 _312;
    _311 = sparseTexelFetchARB(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), _312);
    SparseTexel _187 = SparseTexel(_311, _312);
    vec4 _190 = _187._m1;
    _83 _195 = _83(_190.x, _190.y, _190.z, _190.w, _187._m0);
    float _202 = float(sparseTexelsResidentARB(int(_195._m4)));
    uint _313;
    vec4 _314;
    _313 = sparseImageLoadARB(_29, int(TEXCOORD.x), _314);
    SparseTexel _205 = SparseTexel(_313, _314);
    vec4 _207 = _205._m1;
    _83 _212 = _83(_207.x, _207.y, _207.z, _207.w, _205._m0);
    float _219 = float(sparseTexelsResidentARB(int(_212._m4)));
    uint _315;
    vec4 _316;
    _315 = sparseImageLoadARB(_32, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), _316);
    SparseTexel _222 = SparseTexel(_315, _316);
    vec4 _225 = _222._m1;
    _83 _230 = _83(_225.x, _225.y, _225.z, _225.w, _222._m0);
    float _237 = float(sparseTexelsResidentARB(int(_230._m4)));
    uint _317;
    vec4 _318;
    _317 = sparseImageLoadARB(_35, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), _318);
    SparseTexel _240 = SparseTexel(_317, _318);
    vec4 _243 = _240._m1;
    _83 _248 = _83(_243.x, _243.y, _243.z, _243.w, _240._m0);
    float _255 = float(sparseTexelsResidentARB(int(_248._m4)));
    uint _319;
    vec4 _320;
    _319 = sparseImageLoadARB(_38, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), _320);
    SparseTexel _258 = SparseTexel(_319, _320);
    vec4 _261 = _258._m1;
    _83 _266 = _83(_261.x, _261.y, _261.z, _261.w, _258._m0);
    float _273 = float(sparseTexelsResidentARB(int(_266._m4)));
    uint _321;
    vec4 _322;
    _321 = sparseImageLoadARB(_41, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), _322);
    SparseTexel _276 = SparseTexel(_321, _322);
    vec4 _279 = _276._m1;
    _83 _284 = _83(_279.x, _279.y, _279.z, _279.w, _276._m0);
    float _291 = float(sparseTexelsResidentARB(int(_284._m4)));
    SV_Target.x = ((((((((((((((((((((((_90 + _84._m0) + _104._m0) + _111) + _122._m0) + _129) + _141._m0) + _148) + _159._m0) + _166) + _177._m0) + _184) + _195._m0) + _202) + _212._m0) + _219) + _230._m0) + _237) + _248._m0) + _255) + _266._m0) + _273) + _284._m0) + _291;
    SV_Target.y = ((((((((((((((((((((((_90 + _84._m1) + _104._m1) + _111) + _122._m1) + _129) + _141._m1) + _148) + _159._m1) + _166) + _177._m1) + _184) + _195._m1) + _202) + _212._m1) + _219) + _230._m1) + _237) + _248._m1) + _255) + _266._m1) + _273) + _284._m1) + _291;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 299
; Schema: 0
OpCapability Shader
OpCapability SparseResidency
OpCapability Sampled1D
OpCapability Image1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %45 %48
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %45 "TEXCOORD"
OpName %48 "SV_Target"
OpName %75 "SparseTexel"
OpName %83 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 7
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 1
OpDecorate %29 NonWritable
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 2
OpDecorate %32 NonWritable
OpDecorate %35 DescriptorSet 0
OpDecorate %35 Binding 3
OpDecorate %35 NonWritable
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 4
OpDecorate %38 NonWritable
OpDecorate %41 DescriptorSet 0
OpDecorate %41 Binding 5
OpDecorate %41 NonWritable
OpDecorate %45 Flat
OpDecorate %45 Location 0
OpDecorate %48 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 0 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 2D 0 1 1 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %5 1D 0 0 0 2 R32f
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeImage %5 1D 0 1 0 2 R32f
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeImage %5 2D 0 0 0 2 R32f
%34 = OpTypePointer UniformConstant %33
%35 = OpVariable %34 UniformConstant
%36 = OpTypeImage %5 2D 0 1 0 2 R32f
%37 = OpTypePointer UniformConstant %36
%38 = OpVariable %37 UniformConstant
%39 = OpTypeImage %5 3D 0 0 0 2 R32f
%40 = OpTypePointer UniformConstant %39
%41 = OpVariable %40 UniformConstant
%42 = OpTypeInt 32 0
%43 = OpTypeVector %42 4
%44 = OpTypePointer Input %43
%45 = OpVariable %44 Input
%46 = OpTypeVector %5 2
%47 = OpTypePointer Output %46
%48 = OpVariable %47 Output
%61 = OpTypePointer Input %42
%63 = OpConstant %42 0
%66 = OpConstant %42 1
%69 = OpConstant %42 2
%72 = OpConstant %42 3
%74 = OpTypeVector %5 4
%75 = OpTypeStruct %42 %74
%83 = OpTypeStruct %5 %5 %5 %5 %42
%88 = OpTypeBool
%91 = OpConstant %5 1
%92 = OpConstant %5 0
%96 = OpTypeVector %42 2
%133 = OpTypeVector %42 3
%294 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %297
%297 = OpLabel
%49 = OpLoad %39 %41
%50 = OpLoad %36 %38
%51 = OpLoad %33 %35
%52 = OpLoad %30 %32
%53 = OpLoad %27 %29
%54 = OpLoad %24 %26
%55 = OpLoad %21 %23
%56 = OpLoad %18 %20
%57 = OpLoad %15 %17
%58 = OpLoad %12 %14
%59 = OpLoad %9 %11
%60 = OpLoad %6 %8
%62 = OpAccessChain %61 %45 %63
%64 = OpLoad %42 %62
%65 = OpAccessChain %61 %45 %66
%67 = OpLoad %42 %65
%68 = OpAccessChain %61 %45 %69
%70 = OpLoad %42 %68
%71 = OpAccessChain %61 %45 %72
%73 = OpLoad %42 %71
%76 = OpImageSparseFetch %75 %60 %64 Lod %67
%77 = OpCompositeExtract %42 %76 0
%78 = OpCompositeExtract %74 %76 1
%79 = OpCompositeExtract %5 %78 0
%80 = OpCompositeExtract %5 %78 1
%81 = OpCompositeExtract %5 %78 2
%82 = OpCompositeExtract %5 %78 3
%84 = OpCompositeConstruct %83 %79 %80 %81 %82 %77
%85 = OpCompositeExtract %5 %84 0
%86 = OpCompositeExtract %5 %84 1
%87 = OpCompositeExtract %42 %84 4
%89 = OpImageSparseTexelsResident %88 %87
%90 = OpSelect %5 %89 %91 %92
%93 = OpFAdd %5 %90 %85
%94 = OpFAdd %5 %90 %86
%97 = OpCompositeConstruct %96 %64 %67
%95 = OpImageSparseFetch %75 %59 %97 Lod %70
%98 = OpCompositeExtract %42 %95 0
%99 = OpCompositeExtract %74 %95 1
%100 = OpCompositeExtract %5 %99 0
%101 = OpCompositeExtract %5 %99 1
%102 = OpCompositeExtract %5 %99 2
%103 = OpCompositeExtract %5 %99 3
%104 = OpCompositeConstruct %83 %100 %101 %102 %103 %98
%105 = OpCompositeExtract %5 %104 0
%106 = OpCompositeExtract %5 %104 1
%107 = OpCompositeExtract %42 %104 4
%108 = OpImageSparseTexelsResident %88 %107
%109 = OpFAdd %5 %93 %105
%110 = OpFAdd %5 %94 %106
%111 = OpSelect %5 %108 %91 %92
%112 = OpFAdd %5 %109 %111
%113 = OpFAdd %5 %110 %111
%115 = OpCompositeConstruct %96 %64 %67
%114 = OpImageSparseFetch %75 %58 %115 Lod %70
%116 = OpCompositeExtract %42 %114 0
%117 = OpCompositeExtract %74 %114 1
%118 = OpCompositeExtract %5 %117 0
%119 = OpCompositeExtract %5 %117 1
%120 = OpCompositeExtract %5 %117 2
%121 = OpCompositeExtract %5 %117 3
%122 = OpCompositeConstruct %83 %118 %119 %120 %121 %116
%123 = OpCompositeExtract %5 %122 0
%124 = OpCompositeExtract %5 %122 1
%125 = OpCompositeExtract %42 %122 4
%126 = OpImageSparseTexelsResident %88 %125
%127 = OpFAdd %5 %112 %123
%128 = OpFAdd %5 %113 %124
%129 = OpSelect %5 %126 %91 %92
%130 = OpFAdd %5 %127 %129
%131 = OpFAdd %5 %128 %129
%134 = OpCompositeConstruct %133 %64 %67 %70
%132 = OpImageSparseFetch %75 %57 %134 Lod %73
%135 = OpCompositeExtract %42 %132 0
%136 = OpCompositeExtract %74 %132 1
%137 = OpCompositeExtract %5 %136 0
%138 = OpCompositeExtract %5 %136 1
%139 = OpCompositeExtract %5 %136 2
%140 = OpCompositeExtract %5 %136 3
%141 = OpCompositeConstruct %83 %137 %138 %139 %140 %135
%142 = OpCompositeExtract %5 %141 0
%143 = OpCompositeExtract %5 %141 1
%144 = OpCompositeExtract %42 %141 4
%145 = OpImageSparseTexelsResident %88 %144
%146 = OpFAdd %5 %130 %142
%147 = OpFAdd %5 %131 %143
%148 = OpSelect %5 %145 %91 %92
%149 = OpFAdd %5 %146 %148
%150 = OpFAdd %5 %147 %148
%152 = OpCompositeConstruct %133 %64 %67 %70
%151 = OpImageSparseFetch %75 %56 %152 Lod %73
%153 = OpCompositeExtract %42 %151 0
%154 = OpCompositeExtract %74 %151 1
%155 = OpCompositeExtract %5 %154 0
%156 = OpCompositeExtract %5 %154 1
%157 = OpCompositeExtract %5 %154 2
%158 = OpCompositeExtract %5 %154 3
%159 = OpCompositeConstruct %83 %155 %156 %157 %158 %153
%160 = OpCompositeExtract %5 %159 0
%161 = OpCompositeExtract %5 %159 1
%162 = OpCompositeExtract %42 %159 4
%163 = OpImageSparseTexelsResident %88 %162
%164 = OpFAdd %5 %149 %160
%165 = OpFAdd %5 %150 %161
%166 = OpSelect %5 %163 %91 %92
%167 = OpFAdd %5 %164 %166
%168 = OpFAdd %5 %165 %166
%170 = OpCompositeConstruct %96 %64 %67
%169 = OpImageSparseFetch %75 %55 %170 Sample %70
%171 = OpCompositeExtract %42 %169 0
%172 = OpCompositeExtract %74 %169 1
%173 = OpCompositeExtract %5 %172 0
%174 = OpCompositeExtract %5 %172 1
%175 = OpCompositeExtract %5 %172 2
%176 = OpCompositeExtract %5 %172 3
%177 = OpCompositeConstruct %83 %173 %174 %175 %176 %171
%178 = OpCompositeExtract %5 %177 0
%179 = OpCompositeExtract %5 %177 1
%180 = OpCompositeExtract %42 %177 4
%181 = OpImageSparseTexelsResident %88 %180
%182 = OpFAdd %5 %167 %178
%183 = OpFAdd %5 %168 %179
%184 = OpSelect %5 %181 %91 %92
%185 = OpFAdd %5 %182 %184
%186 = OpFAdd %5 %183 %184
%188 = OpCompositeConstruct %133 %64 %67 %70
%187 = OpImageSparseFetch %75 %54 %188 Sample %73
%189 = OpCompositeExtract %42 %187 0
%190 = OpCompositeExtract %74 %187 1
%191 = OpCompositeExtract %5 %190 0
%192 = OpCompositeExtract %5 %190 1
%193 = OpCompositeExtract %5 %190 2
%194 = OpCompositeExtract %5 %190 3
%195 = OpCompositeConstruct %83 %191 %192 %193 %194 %189
%196 = OpCompositeExtract %5 %195 0
%197 = OpCompositeExtract %5 %195 1
%198 = OpCompositeExtract %42 %195 4
%199 = OpImageSparseTexelsResident %88 %198
%200 = OpFAdd %5 %185 %196
%201 = OpFAdd %5 %186 %197
%202 = OpSelect %5 %199 %91 %92
%203 = OpFAdd %5 %200 %202
%204 = OpFAdd %5 %201 %202
%205 = OpImageSparseRead %75 %53 %64 None
%206 = OpCompositeExtract %42 %205 0
%207 = OpCompositeExtract %74 %205 1
%208 = OpCompositeExtract %5 %207 0
%209 = OpCompositeExtract %5 %207 1
%210 = OpCompositeExtract %5 %207 2
%211 = OpCompositeExtract %5 %207 3
%212 = OpCompositeConstruct %83 %208 %209 %210 %211 %206
%213 = OpCompositeExtract %5 %212 0
%214 = OpCompositeExtract %5 %212 1
%215 = OpCompositeExtract %42 %212 4
%216 = OpImageSparseTexelsResident %88 %215
%217 = OpFAdd %5 %203 %213
%218 = OpFAdd %5 %204 %214
%219 = OpSelect %5 %216 %91 %92
%220 = OpFAdd %5 %217 %219
%221 = OpFAdd %5 %218 %219
%223 = OpCompositeConstruct %96 %64 %67
%222 = OpImageSparseRead %75 %52 %223 None
%224 = OpCompositeExtract %42 %222 0
%225 = OpCompositeExtract %74 %222 1
%226 = OpCompositeExtract %5 %225 0
%227 = OpCompositeExtract %5 %225 1
%228 = OpCompositeExtract %5 %225 2
%229 = OpCompositeExtract %5 %225 3
%230 = OpCompositeConstruct %83 %226 %227 %228 %229 %224
%231 = OpCompositeExtract %5 %230 0
%232 = OpCompositeExtract %5 %230 1
%233 = OpCompositeExtract %42 %230 4
%234 = OpImageSparseTexelsResident %88 %233
%235 = OpFAdd %5 %220 %231
%236 = OpFAdd %5 %221 %232
%237 = OpSelect %5 %234 %91 %92
%238 = OpFAdd %5 %235 %237
%239 = OpFAdd %5 %236 %237
%241 = OpCompositeConstruct %96 %64 %67
%240 = OpImageSparseRead %75 %51 %241 None
%242 = OpCompositeExtract %42 %240 0
%243 = OpCompositeExtract %74 %240 1
%244 = OpCompositeExtract %5 %243 0
%245 = OpCompositeExtract %5 %243 1
%246 = OpCompositeExtract %5 %243 2
%247 = OpCompositeExtract %5 %243 3
%248 = OpCompositeConstruct %83 %244 %245 %246 %247 %242
%249 = OpCompositeExtract %5 %248 0
%250 = OpCompositeExtract %5 %248 1
%251 = OpCompositeExtract %42 %248 4
%252 = OpImageSparseTexelsResident %88 %251
%253 = OpFAdd %5 %238 %249
%254 = OpFAdd %5 %239 %250
%255 = OpSelect %5 %252 %91 %92
%256 = OpFAdd %5 %253 %255
%257 = OpFAdd %5 %254 %255
%259 = OpCompositeConstruct %133 %64 %67 %70
%258 = OpImageSparseRead %75 %50 %259 None
%260 = OpCompositeExtract %42 %258 0
%261 = OpCompositeExtract %74 %258 1
%262 = OpCompositeExtract %5 %261 0
%263 = OpCompositeExtract %5 %261 1
%264 = OpCompositeExtract %5 %261 2
%265 = OpCompositeExtract %5 %261 3
%266 = OpCompositeConstruct %83 %262 %263 %264 %265 %260
%267 = OpCompositeExtract %5 %266 0
%268 = OpCompositeExtract %5 %266 1
%269 = OpCompositeExtract %42 %266 4
%270 = OpImageSparseTexelsResident %88 %269
%271 = OpFAdd %5 %256 %267
%272 = OpFAdd %5 %257 %268
%273 = OpSelect %5 %270 %91 %92
%274 = OpFAdd %5 %271 %273
%275 = OpFAdd %5 %272 %273
%277 = OpCompositeConstruct %133 %64 %67 %70
%276 = OpImageSparseRead %75 %49 %277 None
%278 = OpCompositeExtract %42 %276 0
%279 = OpCompositeExtract %74 %276 1
%280 = OpCompositeExtract %5 %279 0
%281 = OpCompositeExtract %5 %279 1
%282 = OpCompositeExtract %5 %279 2
%283 = OpCompositeExtract %5 %279 3
%284 = OpCompositeConstruct %83 %280 %281 %282 %283 %278
%285 = OpCompositeExtract %5 %284 0
%286 = OpCompositeExtract %5 %284 1
%287 = OpCompositeExtract %42 %284 4
%288 = OpImageSparseTexelsResident %88 %287
%289 = OpFAdd %5 %274 %285
%290 = OpFAdd %5 %275 %286
%291 = OpSelect %5 %288 %91 %92
%292 = OpFAdd %5 %289 %291
%293 = OpFAdd %5 %290 %291
%295 = OpAccessChain %294 %48 %63
OpStore %295 %292
%296 = OpAccessChain %294 %48 %66
OpStore %296 %293
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-load-offset-dynamic.sm67.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, std140) uniform _31_33
{
    vec4 _m0[2];
} _33;

layout(set = 0, binding = 1) uniform texture1D _8;
layout(set = 0, binding = 2) uniform texture1DArray _11;
layout(set = 0, binding = 3) uniform texture2D _14;
layout(set = 0, binding = 4) uniform texture2DArray _17;
layout(set = 0, binding = 5) uniform texture3D _20;
layout(set = 0, binding = 6) uniform texture2DMS _23;
layout(set = 0, binding = 7) uniform texture2DMSArray _26;

layout(location = 0) flat in uvec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    uvec4 _55 = floatBitsToUint(_33._m0[0u]);
    uint _56 = _55.x;
    vec4 _60 = texelFetch(_8, int(TEXCOORD.x) + bitfieldExtract(int(_56), 0, 4), int(TEXCOORD.y));
    vec4 _69 = texelFetch(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)) + bitfieldExtract(ivec2(int(_56), 0), 0, 4), int(TEXCOORD.z));
    uint _80 = _55.y;
    uint _81 = _55.z;
    vec4 _85 = texelFetch(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)) + bitfieldExtract(ivec2(int(_80), int(_81)), 0, 4), int(TEXCOORD.z));
    vec4 _97 = texelFetch(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)) + bitfieldExtract(ivec3(int(_80), int(_81), 0), 0, 4), int(TEXCOORD.w));
    uvec4 _110 = floatBitsToUint(_33._m0[1u]);
    vec4 _118 = texelFetch(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)) + bitfieldExtract(ivec3(int(_110.x), int(_110.y), int(_110.z)), 0, 4), int(TEXCOORD.w));
    vec4 _130 = texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)) + bitfieldExtract(ivec2(int(_80), int(_81)), 0, 4), int(TEXCOORD.z));
    vec4 _142 = texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)) + bitfieldExtract(ivec3(int(_80), int(_81), 0), 0, 4), int(TEXCOORD.w));
    SV_Target.x = (((((_69.x + _60.x) + _85.x) + _97.x) + _118.x) + _130.x) + _142.x;
    SV_Target.y = (((((_69.y + _60.y) + _85.y) + _97.y) + _118.y) + _130.y) + _142.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 156
; Schema: 0
OpCapability Shader
OpCapability ImageGatherExtended
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %36 %39
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %31 ""
OpName %36 "TEXCOORD"
OpName %39 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 7
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %36 Flat
OpDecorate %36 Location 0
OpDecorate %39 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 0 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 2D 0 1 1 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeInt 32 0
%28 = OpConstant %27 2
%29 = OpTypeVector %5 4
%30 = OpTypeArray %29 %28
%31 = OpTypeStruct %30
%32 = OpTypePointer Uniform %31
%33 = OpVariable %32 Uniform
%34 = OpTypeVector %27 4
%35 = OpTypePointer Input %34
%36 = OpVariable %35 Input
%37 = OpTypeVector %5 2
%38 = OpTypePointer Output %37
%39 = OpVariable %38 Output
%40 = OpTypePointer Input %27
%42 = OpConstant %27 0
%45 = OpConstant %27 1
%50 = OpConstant %27 3
%52 = OpTypePointer Uniform %29
%58 = OpTypeInt 32 1
%63 = OpConstant %58 0
%64 = OpConstant %58 4
%70 = OpTypeVector %27 2
%72 = OpTypeVector %58 2
%98 = OpTypeVector %27 3
%100 = OpTypeVector %58 3
%151 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %154
%154 = OpLabel
%41 = OpAccessChain %40 %36 %42
%43 = OpLoad %27 %41
%44 = OpAccessChain %40 %36 %45
%46 = OpLoad %27 %44
%47 = OpAccessChain %40 %36 %28
%48 = OpLoad %27 %47
%49 = OpAccessChain %40 %36 %50
%51 = OpLoad %27 %49
%53 = OpAccessChain %52 %33 %42 %42
%54 = OpLoad %29 %53
%55 = OpBitcast %34 %54
%56 = OpCompositeExtract %27 %55 0
%57 = OpLoad %6 %8
%59 = OpBitcast %58 %56
%62 = OpBitFieldSExtract %58 %59 %63 %64
%61 = OpIAdd %58 %43 %62
%60 = OpImageFetch %29 %57 %61 Lod %46
%65 = OpCompositeExtract %5 %60 0
%66 = OpCompositeExtract %5 %60 1
%67 = OpLoad %9 %11
%68 = OpBitcast %58 %56
%71 = OpCompositeConstruct %70 %43 %46
%74 = OpCompositeConstruct %72 %68 %63
%75 = OpBitFieldSExtract %72 %74 %63 %64
%73 = OpIAdd %72 %71 %75
%69 = OpImageFetch %29 %67 %73 Lod %48
%76 = OpCompositeExtract %5 %69 0
%77 = OpCompositeExtract %5 %69 1
%78 = OpFAdd %5 %76 %65
%79 = OpFAdd %5 %77 %66
%80 = OpCompositeExtract %27 %55 1
%81 = OpCompositeExtract %27 %55 2
%82 = OpLoad %12 %14
%83 = OpBitcast %58 %80
%84 = OpBitcast %58 %81
%86 = OpCompositeConstruct %70 %43 %46
%88 = OpCompositeConstruct %72 %83 %84
%89 = OpBitFieldSExtract %72 %88 %63 %64
%87 = OpIAdd %72 %86 %89
%85 = OpImageFetch %29 %82 %87 Lod %48
%90 = OpCompositeExtract %5 %85 0
%91 = OpCompositeExtract %5 %85 1
%92 = OpFAdd %5 %78 %90
%93 = OpFAdd %5 %79 %91
%94 = OpLoad %15 %17
%95 = OpBitcast %58 %80
%96 = OpBitcast %58 %81
%99 = OpCompositeConstruct %98 %43 %46 %48
%102 = OpCompositeConstruct %100 %95 %96 %63
%103 = OpBitFieldSExtract %100 %102 %63 %64
%101 = OpIAdd %100 %99 %103
%97 = OpImageFetch %29 %94 %101 Lod %51
%104 = OpCompositeExtract %5 %97 0
%105 = OpCompositeExtract %5 %97 1
%106 = OpFAdd %5 %92 %104
%107 = OpFAdd %5 %93 %105
%108 = OpAccessChain %52 %33 %42 %45
%109 = OpLoad %29 %108
%110 = OpBitcast %34 %109
%111 = OpCompositeExtract %27 %110 0
%112 = OpCompositeExtract %27 %110 1
%113 = OpCompositeExtract %27 %110 2
%114 = OpLoad %18 %20
%115 = OpBitcast %58 %111
%116 = OpBitcast %58 %112
%117 = OpBitcast %58 %113
%119 = OpCompositeConstruct %98 %43 %46 %48
%121 = OpCompositeConstruct %100 %115 %116 %117
%122 = OpBitFieldSExtract %100 %121 %63 %64
%120 = OpIAdd %100 %119 %122
%118 = OpImageFetch %29 %114 %120 Lod %51
%123 = OpCompositeExtract %5 %118 0
%124 = OpCompositeExtract %5 %118 1
%125 = OpFAdd %5 %106 %123
%126 = OpFAdd %5 %107 %124
%127 = OpLoad %21 %23
%128 = OpBitcast %58 %80
%129 = OpBitcast %58 %81
%131 = OpCompositeConstruct %70 %43 %46
%133 = OpCompositeConstruct %72 %128 %129
%134 = OpBitFieldSExtract %72 %133 %63 %64
%132 = OpIAdd %72 %131 %134
%130 = OpImageFetch %29 %127 %132 Sample %48
%135 = OpCompositeExtract %5 %130 0
%136 = OpCompositeExtract %5 %130 1
%137 = OpFAdd %5 %125 %135
%138 = OpFAdd %5 %126 %136
%139 = OpLoad %24 %26
%140 = OpBitcast %58 %80
%141 = OpBitcast %58 %81
%143 = OpCompositeConstruct %98 %43 %46 %48
%145 = OpCompositeConstruct %100 %140 %141 %63
%146 = OpBitFieldSExtract %100 %145 %63 %64
%144 = OpIAdd %100 %143 %146
%142 = OpImageFetch %29 %139 %144 Sample %51
%147 = OpCompositeExtract %5 %142 0
%148 = OpCompositeExtract %5 %142 1
%149 = OpFAdd %5 %137 %147
%150 = OpFAdd %5 %138 %148
%152 = OpAccessChain %151 %39 %42
OpStore %152 %149
%153 = OpAccessChain %151 %39 %45
OpStore %153 %150
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-load-offset.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 1) uniform texture1D _8;
layout(set = 0, binding = 2) uniform texture1DArray _11;
layout(set = 0, binding = 3) uniform texture2D _14;
layout(set = 0, binding = 4) uniform texture2DArray _17;
layout(set = 0, binding = 5) uniform texture3D _20;
layout(set = 0, binding = 6) uniform texture2DMS _23;
layout(set = 0, binding = 7) uniform texture2DMSArray _26;

layout(location = 0) flat in uvec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _57 = texelFetchOffset(_8, int(TEXCOORD.x), int(TEXCOORD.y), 1);
    vec4 _61 = texelFetchOffset(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), 2);
    vec4 _70 = texelFetchOffset(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), ivec2(3, 4));
    vec4 _80 = texelFetchOffset(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), ivec2(-4, -3));
    vec4 _88 = texelFetchOffset(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), ivec3(-4, 2, 3));
    vec4 _96 = texelFetchOffset(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), ivec2(2, 3), int(TEXCOORD.z));
    vec4 _104 = texelFetchOffset(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), ivec2(4, 5), int(TEXCOORD.w));
    SV_Target.x = (((((_61.x + _57.x) + _70.x) + _80.x) + _88.x) + _96.x) + _104.x;
    SV_Target.y = (((((_61.y + _57.y) + _70.y) + _80.y) + _88.y) + _96.y) + _104.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 116
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %30 %33
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %30 "TEXCOORD"
OpName %33 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 7
OpDecorate %30 Flat
OpDecorate %30 Location 0
OpDecorate %33 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 0 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 2D 0 1 1 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeInt 32 0
%28 = OpTypeVector %27 4
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%31 = OpTypeVector %5 2
%32 = OpTypePointer Output %31
%33 = OpVariable %32 Output
%41 = OpTypePointer Input %27
%43 = OpConstant %27 0
%46 = OpConstant %27 1
%49 = OpConstant %27 2
%52 = OpConstant %27 3
%54 = OpTypeInt 32 1
%55 = OpConstant %54 1
%56 = OpTypeVector %5 4
%60 = OpConstant %54 2
%62 = OpTypeVector %27 2
%68 = OpConstant %54 3
%69 = OpConstant %54 4
%72 = OpTypeVector %54 2
%73 = OpConstantComposite %72 %68 %69
%78 = OpConstant %54 -4
%79 = OpConstant %54 -3
%81 = OpTypeVector %27 3
%83 = OpConstantComposite %72 %78 %79
%90 = OpTypeVector %54 3
%91 = OpConstantComposite %90 %78 %60 %68
%98 = OpConstantComposite %72 %60 %68
%103 = OpConstant %54 5
%106 = OpConstantComposite %72 %69 %103
%111 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%34 = OpLoad %24 %26
%35 = OpLoad %21 %23
%36 = OpLoad %18 %20
%37 = OpLoad %15 %17
%38 = OpLoad %12 %14
%39 = OpLoad %9 %11
%40 = OpLoad %6 %8
%42 = OpAccessChain %41 %30 %43
%44 = OpLoad %27 %42
%45 = OpAccessChain %41 %30 %46
%47 = OpLoad %27 %45
%48 = OpAccessChain %41 %30 %49
%50 = OpLoad %27 %48
%51 = OpAccessChain %41 %30 %52
%53 = OpLoad %27 %51
%57 = OpImageFetch %56 %40 %44 Lod|ConstOffset %47 %55
%58 = OpCompositeExtract %5 %57 0
%59 = OpCompositeExtract %5 %57 1
%63 = OpCompositeConstruct %62 %44 %47
%61 = OpImageFetch %56 %39 %63 Lod|ConstOffset %50 %60
%64 = OpCompositeExtract %5 %61 0
%65 = OpCompositeExtract %5 %61 1
%66 = OpFAdd %5 %64 %58
%67 = OpFAdd %5 %65 %59
%71 = OpCompositeConstruct %62 %44 %47
%70 = OpImageFetch %56 %38 %71 Lod|ConstOffset %50 %73
%74 = OpCompositeExtract %5 %70 0
%75 = OpCompositeExtract %5 %70 1
%76 = OpFAdd %5 %66 %74
%77 = OpFAdd %5 %67 %75
%82 = OpCompositeConstruct %81 %44 %47 %50
%80 = OpImageFetch %56 %37 %82 Lod|ConstOffset %53 %83
%84 = OpCompositeExtract %5 %80 0
%85 = OpCompositeExtract %5 %80 1
%86 = OpFAdd %5 %76 %84
%87 = OpFAdd %5 %77 %85
%89 = OpCompositeConstruct %81 %44 %47 %50
%88 = OpImageFetch %56 %36 %89 Lod|ConstOffset %53 %91
%92 = OpCompositeExtract %5 %88 0
%93 = OpCompositeExtract %5 %88 1
%94 = OpFAdd %5 %86 %92
%95 = OpFAdd %5 %87 %93
%97 = OpCompositeConstruct %62 %44 %47
%96 = OpImageFetch %56 %35 %97 ConstOffset|Sample %98 %50
%99 = OpCompositeExtract %5 %96 0
%100 = OpCompositeExtract %5 %96 1
%101 = OpFAdd %5 %94 %99
%102 = OpFAdd %5 %95 %100
%105 = OpCompositeConstruct %81 %44 %47 %50
%104 = OpImageFetch %56 %34 %105 ConstOffset|Sample %106 %53
%107 = OpCompositeExtract %5 %104 0
%108 = OpCompositeExtract %5 %104 1
%109 = OpFAdd %5 %101 %107
%110 = OpFAdd %5 %102 %108
%112 = OpAccessChain %111 %33 %43
OpStore %112 %109
%113 = OpAccessChain %111 %33 %46
OpStore %113 %110
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-load-signed.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 1) uniform itexture1D _8;
layout(set = 0, binding = 2) uniform itexture1DArray _11;
layout(set = 0, binding = 3) uniform itexture2D _14;
layout(set = 0, binding = 4) uniform itexture2DArray _17;
layout(set = 0, binding = 5) uniform itexture3D _20;
layout(set = 0, binding = 6) uniform itexture2DMS _23;
layout(set = 0, binding = 7) uniform itexture2DMSArray _26;
layout(set = 0, binding = 1, r32i) uniform readonly iimage1D _29;
layout(set = 0, binding = 2, r32i) uniform readonly iimage1DArray _32;
layout(set = 0, binding = 3, r32i) uniform readonly iimage2D _35;
layout(set = 0, binding = 4, r32i) uniform readonly iimage2DArray _38;
layout(set = 0, binding = 5, r32i) uniform readonly iimage3D _41;

layout(location = 0) flat in uvec4 TEXCOORD;
layout(location = 0) out ivec2 SV_Target;

void main()
{
    uvec4 _76 = uvec4(texelFetch(_8, int(TEXCOORD.x), int(TEXCOORD.y)));
    uvec4 _82 = uvec4(texelFetch(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z)));
    uvec4 _89 = uvec4(texelFetch(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z)));
    uvec4 _97 = uvec4(texelFetch(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w)));
    uvec4 _104 = uvec4(texelFetch(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w)));
    uvec4 _111 = uvec4(texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z)));
    uvec4 _118 = uvec4(texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w)));
    uvec4 _124 = uvec4(imageLoad(_29, int(TEXCOORD.x)));
    uvec4 _131 = uvec4(imageLoad(_32, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y))));
    uvec4 _138 = uvec4(imageLoad(_35, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y))));
    uvec4 _145 = uvec4(imageLoad(_38, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z))));
    uvec4 _152 = uvec4(imageLoad(_41, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z))));
    SV_Target.x = int(((((((((((_82.x + _76.x) + _89.x) + _97.x) + _104.x) + _111.x) + _118.x) + _124.x) + _131.x) + _138.x) + _145.x) + _152.x);
    SV_Target.y = int(((((((((((_82.y + _76.y) + _89.y) + _97.y) + _104.y) + _111.y) + _118.y) + _124.y) + _131.y) + _138.y) + _145.y) + _152.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 164
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability Image1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %45 %48
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %45 "TEXCOORD"
OpName %48 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 7
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 1
OpDecorate %29 NonWritable
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 2
OpDecorate %32 NonWritable
OpDecorate %35 DescriptorSet 0
OpDecorate %35 Binding 3
OpDecorate %35 NonWritable
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 4
OpDecorate %38 NonWritable
OpDecorate %41 DescriptorSet 0
OpDecorate %41 Binding 5
OpDecorate %41 NonWritable
OpDecorate %45 Flat
OpDecorate %45 Location 0
OpDecorate %48 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 0 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 2D 0 1 1 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %5 1D 0 0 0 2 R32i
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeImage %5 1D 0 1 0 2 R32i
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeImage %5 2D 0 0 0 2 R32i
%34 = OpTypePointer UniformConstant %33
%35 = OpVariable %34 UniformConstant
%36 = OpTypeImage %5 2D 0 1 0 2 R32i
%37 = OpTypePointer UniformConstant %36
%38 = OpVariable %37 UniformConstant
%39 = OpTypeImage %5 3D 0 0 0 2 R32i
%40 = OpTypePointer UniformConstant %39
%41 = OpVariable %40 UniformConstant
%42 = OpTypeInt 32 0
%43 = OpTypeVector %42 4
%44 = OpTypePointer Input %43
%45 = OpVariable %44 Input
%46 = OpTypeVector %5 2
%47 = OpTypePointer Output %46
%48 = OpVariable %47 Output
%61 = OpTypePointer Input %42
%63 = OpConstant %42 0
%66 = OpConstant %42 1
%69 = OpConstant %42 2
%72 = OpConstant %42 3
%74 = OpTypeVector %5 4
%80 = OpTypeVector %42 2
%95 = OpTypeVector %42 3
%157 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %162
%162 = OpLabel
%49 = OpLoad %39 %41
%50 = OpLoad %36 %38
%51 = OpLoad %33 %35
%52 = OpLoad %30 %32
%53 = OpLoad %27 %29
%54 = OpLoad %24 %26
%55 = OpLoad %21 %23
%56 = OpLoad %18 %20
%57 = OpLoad %15 %17
%58 = OpLoad %12 %14
%59 = OpLoad %9 %11
%60 = OpLoad %6 %8
%62 = OpAccessChain %61 %45 %63
%64 = OpLoad %42 %62
%65 = OpAccessChain %61 %45 %66
%67 = OpLoad %42 %65
%68 = OpAccessChain %61 %45 %69
%70 = OpLoad %42 %68
%71 = OpAccessChain %61 %45 %72
%73 = OpLoad %42 %71
%75 = OpImageFetch %74 %60 %64 Lod %67
%76 = OpBitcast %43 %75
%77 = OpCompositeExtract %42 %76 0
%78 = OpCompositeExtract %42 %76 1
%81 = OpCompositeConstruct %80 %64 %67
%79 = OpImageFetch %74 %59 %81 Lod %70
%82 = OpBitcast %43 %79
%83 = OpCompositeExtract %42 %82 0
%84 = OpCompositeExtract %42 %82 1
%85 = OpIAdd %42 %83 %77
%86 = OpIAdd %42 %84 %78
%88 = OpCompositeConstruct %80 %64 %67
%87 = OpImageFetch %74 %58 %88 Lod %70
%89 = OpBitcast %43 %87
%90 = OpCompositeExtract %42 %89 0
%91 = OpCompositeExtract %42 %89 1
%92 = OpIAdd %42 %85 %90
%93 = OpIAdd %42 %86 %91
%96 = OpCompositeConstruct %95 %64 %67 %70
%94 = OpImageFetch %74 %57 %96 Lod %73
%97 = OpBitcast %43 %94
%98 = OpCompositeExtract %42 %97 0
%99 = OpCompositeExtract %42 %97 1
%100 = OpIAdd %42 %92 %98
%101 = OpIAdd %42 %93 %99
%103 = OpCompositeConstruct %95 %64 %67 %70
%102 = OpImageFetch %74 %56 %103 Lod %73
%104 = OpBitcast %43 %102
%105 = OpCompositeExtract %42 %104 0
%106 = OpCompositeExtract %42 %104 1
%107 = OpIAdd %42 %100 %105
%108 = OpIAdd %42 %101 %106
%110 = OpCompositeConstruct %80 %64 %67
%109 = OpImageFetch %74 %55 %110 Sample %70
%111 = OpBitcast %43 %109
%112 = OpCompositeExtract %42 %111 0
%113 = OpCompositeExtract %42 %111 1
%114 = OpIAdd %42 %107 %112
%115 = OpIAdd %42 %108 %113
%117 = OpCompositeConstruct %95 %64 %67 %70
%116 = OpImageFetch %74 %54 %117 Sample %73
%118 = OpBitcast %43 %116
%119 = OpCompositeExtract %42 %118 0
%120 = OpCompositeExtract %42 %118 1
%121 = OpIAdd %42 %114 %119
%122 = OpIAdd %42 %115 %120
%123 = OpImageRead %74 %53 %64 None
%124 = OpBitcast %43 %123
%125 = OpCompositeExtract %42 %124 0
%126 = OpCompositeExtract %42 %124 1
%127 = OpIAdd %42 %121 %125
%128 = OpIAdd %42 %122 %126
%130 = OpCompositeConstruct %80 %64 %67
%129 = OpImageRead %74 %52 %130 None
%131 = OpBitcast %43 %129
%132 = OpCompositeExtract %42 %131 0
%133 = OpCompositeExtract %42 %131 1
%134 = OpIAdd %42 %127 %132
%135 = OpIAdd %42 %128 %133
%137 = OpCompositeConstruct %80 %64 %67
%136 = OpImageRead %74 %51 %137 None
%138 = OpBitcast %43 %136
%139 = OpCompositeExtract %42 %138 0
%140 = OpCompositeExtract %42 %138 1
%141 = OpIAdd %42 %134 %139
%142 = OpIAdd %42 %135 %140
%144 = OpCompositeConstruct %95 %64 %67 %70
%143 = OpImageRead %74 %50 %144 None
%145 = OpBitcast %43 %143
%146 = OpCompositeExtract %42 %145 0
%147 = OpCompositeExtract %42 %145 1
%148 = OpIAdd %42 %141 %146
%149 = OpIAdd %42 %142 %147
%151 = OpCompositeConstruct %95 %64 %67 %70
%150 = OpImageRead %74 %49 %151 None
%152 = OpBitcast %43 %150
%153 = OpCompositeExtract %42 %152 0
%154 = OpCompositeExtract %42 %152 1
%155 = OpIAdd %42 %148 %153
%156 = OpIAdd %42 %149 %154
%158 = OpAccessChain %157 %48 %63
%159 = OpBitcast %5 %155
OpStore %158 %159
%160 = OpAccessChain %157 %48 %66
%161 = OpBitcast %5 %156
OpStore %160 %161
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-load.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 1) uniform texture1D _8;
layout(set = 0, binding = 2) uniform texture1DArray _11;
layout(set = 0, binding = 3) uniform texture2D _14;
layout(set = 0, binding = 4) uniform texture2DArray _17;
layout(set = 0, binding = 5) uniform texture3D _20;
layout(set = 0, binding = 6) uniform texture2DMS _23;
layout(set = 0, binding = 7) uniform texture2DMSArray _26;
layout(set = 0, binding = 1, r32f) uniform readonly image1D _29;
layout(set = 0, binding = 2, r32f) uniform readonly image1DArray _32;
layout(set = 0, binding = 3, r32f) uniform readonly image2D _35;
layout(set = 0, binding = 4, r32f) uniform readonly image2DArray _38;
layout(set = 0, binding = 5, r32f) uniform readonly image3D _41;

layout(location = 0) flat in uvec4 TEXCOORD;
layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _75 = texelFetch(_8, int(TEXCOORD.x), int(TEXCOORD.y));
    vec4 _78 = texelFetch(_11, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z));
    vec4 _85 = texelFetch(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z));
    vec4 _91 = texelFetch(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w));
    vec4 _98 = texelFetch(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w));
    vec4 _104 = texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z));
    vec4 _110 = texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w));
    vec4 _116 = imageLoad(_29, int(TEXCOORD.x));
    vec4 _121 = imageLoad(_32, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)));
    vec4 _127 = imageLoad(_35, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)));
    vec4 _133 = imageLoad(_38, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)));
    vec4 _139 = imageLoad(_41, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)));
    SV_Target.x = ((((((((((_78.x + _75.x) + _85.x) + _91.x) + _98.x) + _104.x) + _110.x) + _116.x) + _121.x) + _127.x) + _133.x) + _139.x;
    SV_Target.y = ((((((((((_78.y + _75.y) + _85.y) + _91.y) + _98.y) + _104.y) + _110.y) + _116.y) + _121.y) + _127.y) + _133.y) + _139.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 150
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
OpCapability Image1D
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %45 %48
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %45 "TEXCOORD"
OpName %48 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 6
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 7
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 1
OpDecorate %29 NonWritable
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 2
OpDecorate %32 NonWritable
OpDecorate %35 DescriptorSet 0
OpDecorate %35 Binding 3
OpDecorate %35 NonWritable
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 4
OpDecorate %38 NonWritable
OpDecorate %41 DescriptorSet 0
OpDecorate %41 Binding 5
OpDecorate %41 NonWritable
OpDecorate %45 Flat
OpDecorate %45 Location 0
OpDecorate %48 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 1 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 1 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 1 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 2D 0 0 1 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %5 2D 0 1 1 1 Unknown
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %5 1D 0 0 0 2 R32f
%28 = OpTypePointer UniformConstant %27
%29 = OpVariable %28 UniformConstant
%30 = OpTypeImage %5 1D 0 1 0 2 R32f
%31 = OpTypePointer UniformConstant %30
%32 = OpVariable %31 UniformConstant
%33 = OpTypeImage %5 2D 0 0 0 2 R32f
%34 = OpTypePointer UniformConstant %33
%35 = OpVariable %34 UniformConstant
%36 = OpTypeImage %5 2D 0 1 0 2 R32f
%37 = OpTypePointer UniformConstant %36
%38 = OpVariable %37 UniformConstant
%39 = OpTypeImage %5 3D 0 0 0 2 R32f
%40 = OpTypePointer UniformConstant %39
%41 = OpVariable %40 UniformConstant
%42 = OpTypeInt 32 0
%43 = OpTypeVector %42 4
%44 = OpTypePointer Input %43
%45 = OpVariable %44 Input
%46 = OpTypeVector %5 2
%47 = OpTypePointer Output %46
%48 = OpVariable %47 Output
%61 = OpTypePointer Input %42
%63 = OpConstant %42 0
%66 = OpConstant %42 1
%69 = OpConstant %42 2
%72 = OpConstant %42 3
%74 = OpTypeVector %5 4
%79 = OpTypeVector %42 2
%92 = OpTypeVector %42 3
%145 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %148
%148 = OpLabel
%49 = OpLoad %39 %41
%50 = OpLoad %36 %38
%51 = OpLoad %33 %35
%52 = OpLoad %30 %32
%53 = OpLoad %27 %29
%54 = OpLoad %24 %26
%55 = OpLoad %21 %23
%56 = OpLoad %18 %20
%57 = OpLoad %15 %17
%58 = OpLoad %12 %14
%59 = OpLoad %9 %11
%60 = OpLoad %6 %8
%62 = OpAccessChain %61 %45 %63
%64 = OpLoad %42 %62
%65 = OpAccessChain %61 %45 %66
%67 = OpLoad %42 %65
%68 = OpAccessChain %61 %45 %69
%70 = OpLoad %42 %68
%71 = OpAccessChain %61 %45 %72
%73 = OpLoad %42 %71
%75 = OpImageFetch %74 %60 %64 Lod %67
%76 = OpCompositeExtract %5 %75 0
%77 = OpCompositeExtract %5 %75 1
%80 = OpCompositeConstruct %79 %64 %67
%78 = OpImageFetch %74 %59 %80 Lod %70
%81 = OpCompositeExtract %5 %78 0
%82 = OpCompositeExtract %5 %78 1
%83 = OpFAdd %5 %81 %76
%84 = OpFAdd %5 %82 %77
%86 = OpCompositeConstruct %79 %64 %67
%85 = OpImageFetch %74 %58 %86 Lod %70
%87 = OpCompositeExtract %5 %85 0
%88 = OpCompositeExtract %5 %85 1
%89 = OpFAdd %5 %83 %87
%90 = OpFAdd %5 %84 %88
%93 = OpCompositeConstruct %92 %64 %67 %70
%91 = OpImageFetch %74 %57 %93 Lod %73
%94 = OpCompositeExtract %5 %91 0
%95 = OpCompositeExtract %5 %91 1
%96 = OpFAdd %5 %89 %94
%97 = OpFAdd %5 %90 %95
%99 = OpCompositeConstruct %92 %64 %67 %70
%98 = OpImageFetch %74 %56 %99 Lod %73
%100 = OpCompositeExtract %5 %98 0
%101 = OpCompositeExtract %5 %98 1
%102 = OpFAdd %5 %96 %100
%103 = OpFAdd %5 %97 %101
%105 = OpCompositeConstruct %79 %64 %67
%104 = OpImageFetch %74 %55 %105 Sample %70
%106 = OpCompositeExtract %5 %104 0
%107 = OpCompositeExtract %5 %104 1
%108 = OpFAdd %5 %102 %106
%109 = OpFAdd %5 %103 %107
%111 = OpCompositeConstruct %92 %64 %67 %70
%110 = OpImageFetch %74 %54 %111 Sample %73
%112 = OpCompositeExtract %5 %110 0
%113 = OpCompositeExtract %5 %110 1
%114 = OpFAdd %5 %108 %112
%115 = OpFAdd %5 %109 %113
%116 = OpImageRead %74 %53 %64 None
%117 = OpCompositeExtract %5 %116 0
%118 = OpCompositeExtract %5 %116 1
%119 = OpFAdd %5 %114 %117
%120 = OpFAdd %5 %115 %118
%122 = OpCompositeConstruct %79 %64 %67
%121 = OpImageRead %74 %52 %122 None
%123 = OpCompositeExtract %5 %121 0
%124 = OpCompositeExtract %5 %121 1
%125 = OpFAdd %5 %119 %123
%126 = OpFAdd %5 %120 %124
%128 = OpCompositeConstruct %79 %64 %67
%127 = OpImageRead %74 %51 %128 None
%129 = OpCompositeExtract %5 %127 0
%130 = OpCompositeExtract %5 %127 1
%131 = OpFAdd %5 %125 %129
%132 = OpFAdd %5 %126 %130
%134 = OpCompositeConstruct %92 %64 %67 %70
%133 = OpImageRead %74 %50 %134 None
%135 = OpCompositeExtract %5 %133 0
%136 = OpCompositeExtract %5 %133 1
%137 = OpFAdd %5 %131 %135
%138 = OpFAdd %5 %132 %136
%140 = OpCompositeConstruct %92 %64 %67 %70
%139 = OpImageRead %74 %49 %140 None
%141 = OpCompositeExtract %5 %139 0
%142 = OpCompositeExtract %5 %139 1
%143 = OpFAdd %5 %137 %141
%144 = OpFAdd %5 %138 %142
%146 = OpAccessChain %145 %48 %63
OpStore %146 %143
%147 = OpAccessChain %145 %48 %66
OpStore %147 %144
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-store-signed.frag
================================================
#version 460

layout(set = 0, binding = 1) uniform writeonly iimage1D _8;
layout(set = 0, binding = 2) uniform coherent writeonly iimage1DArray _11;
layout(set = 0, binding = 3) uniform writeonly iimage2D _14;
layout(set = 0, binding = 4) uniform coherent writeonly iimage2DArray _17;
layout(set = 0, binding = 5) uniform writeonly iimage3D _20;

layout(location = 0) in vec3 TEXCOORD;

void main()
{
    uint _41 = uint(int(TEXCOORD.x));
    imageStore(_8, int(_41), ivec4(uvec4(1u, 2u, 1u, 1u)));
    uint _46 = uint(int(TEXCOORD.y));
    imageStore(_11, ivec2(uvec2(_41, _46)), ivec4(uvec4(3u, 4u, 3u, 3u)));
    imageStore(_14, ivec2(uvec2(_41, _46)), ivec4(uvec4(5u, 6u, 5u, 5u)));
    uint _58 = uint(int(TEXCOORD.z));
    imageStore(_17, ivec3(uvec3(_41, _46, _58)), ivec4(uvec4(7u, 8u, 7u, 7u)));
    imageStore(_20, ivec3(uvec3(_41, _46, _58)), ivec4(uvec4(9u, 4294967287u, 9u, 9u)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %24
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %24 "TEXCOORD"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %8 NonReadable
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %11 NonReadable
OpDecorate %11 Coherent
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %14 NonReadable
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %17 NonReadable
OpDecorate %17 Coherent
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %20 NonReadable
OpDecorate %24 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeImage %5 1D 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 2 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 2 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 2 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 2 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 3
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%30 = OpTypePointer Input %21
%32 = OpTypeInt 32 0
%33 = OpConstant %32 0
%36 = OpConstant %32 1
%39 = OpConstant %32 2
%42 = OpTypeVector %32 4
%44 = OpTypeVector %5 4
%47 = OpConstant %32 3
%48 = OpConstant %32 4
%49 = OpTypeVector %32 2
%53 = OpConstant %32 5
%54 = OpConstant %32 6
%59 = OpConstant %32 7
%60 = OpConstant %32 8
%61 = OpTypeVector %32 3
%65 = OpConstant %32 9
%66 = OpConstant %32 4294967287
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
%25 = OpLoad %18 %20
%26 = OpLoad %15 %17
%27 = OpLoad %12 %14
%28 = OpLoad %9 %11
%29 = OpLoad %6 %8
%31 = OpAccessChain %30 %24 %33
%34 = OpLoad %21 %31
%35 = OpAccessChain %30 %24 %36
%37 = OpLoad %21 %35
%38 = OpAccessChain %30 %24 %39
%40 = OpLoad %21 %38
%41 = OpConvertFToS %32 %34
%43 = OpCompositeConstruct %42 %36 %39 %36 %36
%45 = OpBitcast %44 %43
OpImageWrite %29 %41 %45
%46 = OpConvertFToS %32 %37
%50 = OpCompositeConstruct %49 %41 %46
%51 = OpCompositeConstruct %42 %47 %48 %47 %47
%52 = OpBitcast %44 %51
OpImageWrite %28 %50 %52
%55 = OpCompositeConstruct %49 %41 %46
%56 = OpCompositeConstruct %42 %53 %54 %53 %53
%57 = OpBitcast %44 %56
OpImageWrite %27 %55 %57
%58 = OpConvertFToS %32 %40
%62 = OpCompositeConstruct %61 %41 %46 %58
%63 = OpCompositeConstruct %42 %59 %60 %59 %59
%64 = OpBitcast %44 %63
OpImageWrite %26 %62 %64
%67 = OpCompositeConstruct %61 %41 %46 %58
%68 = OpCompositeConstruct %42 %65 %66 %65 %65
%69 = OpBitcast %44 %68
OpImageWrite %25 %67 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture-store.frag
================================================
#version 460

layout(set = 0, binding = 1) uniform writeonly image1D _8;
layout(set = 0, binding = 2) uniform coherent writeonly image1DArray _11;
layout(set = 0, binding = 3) uniform writeonly image2D _14;
layout(set = 0, binding = 4) uniform coherent writeonly image2DArray _17;
layout(set = 0, binding = 5) uniform writeonly image3D _20;

layout(location = 0) in vec3 TEXCOORD;

void main()
{
    uint _40 = uint(int(TEXCOORD.x));
    imageStore(_8, int(_40), vec4(1.0, 2.0, 1.0, 1.0));
    uint _45 = uint(int(TEXCOORD.y));
    imageStore(_11, ivec2(uvec2(_40, _45)), vec4(3.0, 4.0, 3.0, 3.0));
    imageStore(_14, ivec2(uvec2(_40, _45)), vec4(5.0, 6.0, 5.0, 5.0));
    uint _55 = uint(int(TEXCOORD.z));
    imageStore(_17, ivec3(uvec3(_40, _45, _55)), vec4(7.0, 8.0, 7.0, 7.0));
    imageStore(_20, ivec3(uvec3(_40, _45, _55)), vec4(9.0, -9.0, 9.0, 9.0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 67
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %23 "TEXCOORD"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 1
OpDecorate %8 NonReadable
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 2
OpDecorate %11 NonReadable
OpDecorate %11 Coherent
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 3
OpDecorate %14 NonReadable
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 4
OpDecorate %17 NonReadable
OpDecorate %17 Coherent
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 5
OpDecorate %20 NonReadable
OpDecorate %23 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 1D 0 1 0 2 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeImage %5 2D 0 0 0 2 Unknown
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeImage %5 2D 0 1 0 2 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 3D 0 0 0 2 Unknown
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeVector %5 3
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%29 = OpTypePointer Input %5
%31 = OpTypeInt 32 0
%32 = OpConstant %31 0
%35 = OpConstant %31 1
%38 = OpConstant %31 2
%41 = OpConstant %5 1
%42 = OpConstant %5 2
%43 = OpTypeVector %5 4
%46 = OpConstant %5 3
%47 = OpConstant %5 4
%48 = OpTypeVector %31 2
%51 = OpConstant %5 5
%52 = OpConstant %5 6
%56 = OpConstant %5 7
%57 = OpConstant %5 8
%58 = OpTypeVector %31 3
%61 = OpConstant %5 9
%62 = OpConstant %5 -9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %65
%65 = OpLabel
%24 = OpLoad %18 %20
%25 = OpLoad %15 %17
%26 = OpLoad %12 %14
%27 = OpLoad %9 %11
%28 = OpLoad %6 %8
%30 = OpAccessChain %29 %23 %32
%33 = OpLoad %5 %30
%34 = OpAccessChain %29 %23 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %29 %23 %38
%39 = OpLoad %5 %37
%40 = OpConvertFToS %31 %33
%44 = OpCompositeConstruct %43 %41 %42 %41 %41
OpImageWrite %28 %40 %44
%45 = OpConvertFToS %31 %36
%49 = OpCompositeConstruct %48 %40 %45
%50 = OpCompositeConstruct %43 %46 %47 %46 %46
OpImageWrite %27 %49 %50
%53 = OpCompositeConstruct %48 %40 %45
%54 = OpCompositeConstruct %43 %51 %52 %51 %51
OpImageWrite %26 %53 %54
%55 = OpConvertFToS %31 %39
%59 = OpCompositeConstruct %58 %40 %45 %55
%60 = OpCompositeConstruct %43 %56 %57 %56 %56
OpImageWrite %25 %59 %60
%63 = OpCompositeConstruct %58 %40 %45 %55
%64 = OpCompositeConstruct %43 %61 %62 %61 %61
OpImageWrite %24 %63 %64
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/texture2dms-sample-position.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

const vec2 _77[31] = vec2[](vec2(0.0), vec2(0.25), vec2(-0.25), vec2(-0.125, -0.375), vec2(0.375, -0.125), vec2(-0.375, 0.125), vec2(0.125, 0.375), vec2(0.0625, -0.1875), vec2(-0.0625, 0.1875), vec2(0.3125, 0.0625), vec2(-0.1875, -0.3125), vec2(-0.3125, 0.3125), vec2(-0.4375, -0.0625), vec2(0.1875, 0.4375), vec2(0.4375, -0.4375), vec2(0.0625), vec2(-0.0625, -0.1875), vec2(-0.1875, 0.125), vec2(0.25, -0.0625), vec2(-0.3125, -0.125), vec2(0.125, 0.3125), vec2(0.3125, 0.1875), vec2(0.1875, -0.3125), vec2(-0.125, 0.375), vec2(0.0, -0.4375), vec2(-0.25, -0.375), vec2(-0.375, 0.25), vec2(-0.5, 0.0), vec2(0.4375, -0.25), vec2(0.375, 0.4375), vec2(-0.4375, -0.5));

layout(set = 0, binding = 0, std140) uniform _13_15
{
    vec4 _m0[1];
} _15;

layout(set = 0, binding = 0) uniform texture2DMS _8;

layout(location = 0) out vec4 SV_Target;

void main()
{
    uvec4 _24 = floatBitsToUint(_15._m0[0u]);
    uint _25 = _24.x;
    uint _26 = uint(textureSamples(_8));
    uint _87 = ((_25 < _26) && (_26 <= 16u)) ? ((_26 - 1u) + _25) : 0u;
    SV_Target.x = _77[_87].x;
    SV_Target.y = _77[_87].y;
    SV_Target.z = 0.0;
    SV_Target.w = 0.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability ImageQuery
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 ""
OpName %17 "SV_Target"
OpName %79 "Texture2DMSSamplePositionLUT"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 ArrayStride 16
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %17 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 1 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeVector %5 4
%12 = OpTypeArray %11 %10
%13 = OpTypeStruct %12
%14 = OpTypePointer Uniform %13
%15 = OpVariable %14 Uniform
%16 = OpTypePointer Output %11
%17 = OpVariable %16 Output
%19 = OpConstant %9 0
%20 = OpTypePointer Uniform %11
%23 = OpTypeVector %9 4
%27 = OpTypeVector %5 2
%28 = OpConstant %5 0
%29 = OpConstantComposite %27 %28 %28
%30 = OpConstant %5 0.25
%31 = OpConstantComposite %27 %30 %30
%32 = OpConstant %5 -0.25
%33 = OpConstantComposite %27 %32 %32
%34 = OpConstant %5 -0.125
%35 = OpConstant %5 -0.375
%36 = OpConstantComposite %27 %34 %35
%37 = OpConstant %5 0.375
%38 = OpConstantComposite %27 %37 %34
%39 = OpConstant %5 0.125
%40 = OpConstantComposite %27 %35 %39
%41 = OpConstantComposite %27 %39 %37
%42 = OpConstant %5 0.0625
%43 = OpConstant %5 -0.1875
%44 = OpConstantComposite %27 %42 %43
%45 = OpConstant %5 -0.0625
%46 = OpConstant %5 0.1875
%47 = OpConstantComposite %27 %45 %46
%48 = OpConstant %5 0.3125
%49 = OpConstantComposite %27 %48 %42
%50 = OpConstant %5 -0.3125
%51 = OpConstantComposite %27 %43 %50
%52 = OpConstantComposite %27 %50 %48
%53 = OpConstant %5 -0.4375
%54 = OpConstantComposite %27 %53 %45
%55 = OpConstant %5 0.4375
%56 = OpConstantComposite %27 %46 %55
%57 = OpConstantComposite %27 %55 %53
%58 = OpConstantComposite %27 %42 %42
%59 = OpConstantComposite %27 %45 %43
%60 = OpConstantComposite %27 %43 %39
%61 = OpConstantComposite %27 %30 %45
%62 = OpConstantComposite %27 %50 %34
%63 = OpConstantComposite %27 %39 %48
%64 = OpConstantComposite %27 %48 %46
%65 = OpConstantComposite %27 %46 %50
%66 = OpConstantComposite %27 %34 %37
%67 = OpConstantComposite %27 %28 %53
%68 = OpConstantComposite %27 %32 %35
%69 = OpConstantComposite %27 %35 %30
%70 = OpConstant %5 -0.5
%71 = OpConstantComposite %27 %70 %28
%72 = OpConstantComposite %27 %55 %32
%73 = OpConstantComposite %27 %37 %55
%74 = OpConstantComposite %27 %53 %70
%75 = OpConstant %9 31
%76 = OpTypeArray %27 %75
%77 = OpConstantComposite %76 %29 %31 %33 %36 %38 %40 %41 %44 %47 %49 %51 %52 %54 %56 %57 %58 %59 %60 %61 %62 %63 %64 %65 %66 %67 %68 %69 %71 %72 %73 %74
%78 = OpTypePointer Private %76
%79 = OpVariable %78 Private %77
%82 = OpTypeBool
%85 = OpConstant %9 16
%88 = OpTypePointer Private %27
%93 = OpTypePointer Output %5
%97 = OpConstant %9 2
%99 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %100
%100 = OpLabel
%18 = OpLoad %6 %8
%21 = OpAccessChain %20 %15 %19 %19
%22 = OpLoad %11 %21
%24 = OpBitcast %23 %22
%25 = OpCompositeExtract %9 %24 0
%26 = OpImageQuerySamples %9 %18
%80 = OpISub %9 %26 %10
%81 = OpIAdd %9 %80 %25
%83 = OpULessThan %82 %25 %26
%84 = OpULessThanEqual %82 %26 %85
%86 = OpLogicalAnd %82 %83 %84
%87 = OpSelect %9 %86 %81 %19
%89 = OpAccessChain %88 %79 %87
%90 = OpLoad %27 %89
%91 = OpCompositeExtract %5 %90 0
%92 = OpCompositeExtract %5 %90 1
%94 = OpAccessChain %93 %17 %19
OpStore %94 %91
%95 = OpAccessChain %93 %17 %10
OpStore %95 %92
%96 = OpAccessChain %93 %17 %97
OpStore %96 %28
%98 = OpAccessChain %93 %17 %99
OpStore %98 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/thread_id.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 2, local_size_z = 2) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(10.0)));
    imageStore(_8, int(gl_GlobalInvocationID.y), uvec4(floatBitsToUint(20.0)));
    imageStore(_8, int(gl_GlobalInvocationID.z), uvec4(floatBitsToUint(30.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 2 2 2
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeFloat 32
%24 = OpConstant %23 10
%26 = OpTypeVector %5 4
%28 = OpConstant %23 20
%31 = OpConstant %23 30
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%25 = OpBitcast %5 %24
%27 = OpCompositeConstruct %26 %25 %25 %25 %25
OpImageWrite %9 %16 %27
%29 = OpBitcast %5 %28
%30 = OpCompositeConstruct %26 %29 %29 %29 %29
OpImageWrite %9 %19 %30
%32 = OpBitcast %5 %31
%33 = OpCompositeConstruct %26 %32 %32 %32 %32
OpImageWrite %9 %22 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/thread_id_in_group.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 2, local_size_z = 2) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_LocalInvocationID.x), uvec4(floatBitsToUint(10.0)));
    imageStore(_8, int(gl_LocalInvocationID.y), uvec4(floatBitsToUint(20.0)));
    imageStore(_8, int(gl_LocalInvocationID.z), uvec4(floatBitsToUint(30.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 2 2 2
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn LocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeFloat 32
%24 = OpConstant %23 10
%26 = OpTypeVector %5 4
%28 = OpConstant %23 20
%31 = OpConstant %23 30
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%25 = OpBitcast %5 %24
%27 = OpCompositeConstruct %26 %25 %25 %25 %25
OpImageWrite %9 %16 %27
%29 = OpBitcast %5 %28
%30 = OpCompositeConstruct %26 %29 %29 %29 %29
OpImageWrite %9 %19 %30
%32 = OpBitcast %5 %31
%33 = OpCompositeConstruct %26 %32 %32 %32 %32
OpImageWrite %9 %22 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/trace-ray-flags-2.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);

struct _20
{
    vec4 _m0;
};

layout(set = 0, binding = 0, std140) uniform FlagsUBO
{
    vec4 _m0[1];
} Flags;

layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
layout(set = 0, binding = 0) uniform writeonly image2D IMG;
layout(location = 0) rayPayloadEXT _20 _22;

void main()
{
    traceRayEXT(AS, floatBitsToUint(Flags._m0[0u]).x, 0u, 1u, 2u, 3u, vec3(1.0, 2.0, 3.0), 7.0, vec3(4.0, 5.0, 6.0), 8.0, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_22._m0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %19 %22
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %17 "FlagsUBO"
OpName %19 "Flags"
OpName %20 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpConstant %13 1
%15 = OpTypeVector %9 4
%16 = OpTypeArray %15 %14
%17 = OpTypeStruct %16
%18 = OpTypePointer Uniform %17
%19 = OpVariable %18 Uniform
%20 = OpTypeStruct %15
%21 = OpTypePointer RayPayloadKHR %20
%22 = OpVariable %21 RayPayloadKHR
%23 = OpConstant %13 0
%24 = OpTypePointer Uniform %15
%27 = OpTypeVector %13 4
%31 = OpConstant %13 2
%32 = OpConstant %13 3
%33 = OpConstant %9 1
%34 = OpConstant %9 4
%35 = OpConstant %9 2
%36 = OpConstant %9 5
%37 = OpConstant %9 3
%38 = OpConstant %9 6
%39 = OpConstant %9 7
%40 = OpConstant %9 8
%41 = OpTypeVector %9 3
%44 = OpTypePointer RayPayloadKHR %15
%52 = OpTypeVector %13 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %55
%55 = OpLabel
%25 = OpAccessChain %24 %19 %23 %23
%26 = OpLoad %15 %25
%28 = OpBitcast %27 %26
%29 = OpCompositeExtract %13 %28 0
%30 = OpLoad %6 %8
%42 = OpCompositeConstruct %41 %33 %35 %37
%43 = OpCompositeConstruct %41 %34 %36 %38
OpTraceRayKHR %30 %29 %23 %14 %31 %32 %42 %39 %43 %40 %22
%45 = OpInBoundsAccessChain %44 %22 %23
%46 = OpLoad %15 %45
%47 = OpLoad %10 %12
%48 = OpCompositeExtract %9 %46 0
%49 = OpCompositeExtract %9 %46 1
%50 = OpCompositeExtract %9 %46 2
%51 = OpCompositeExtract %9 %46 3
%53 = OpCompositeConstruct %52 %23 %23
%54 = OpCompositeConstruct %15 %48 %49 %50 %51
OpImageWrite %47 %53 %54
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/trace-ray-flags.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_EXT_nonuniform_qualifier : require
layout(primitive_culling);

struct _14
{
    vec4 _m0;
};

layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
layout(set = 0, binding = 0) uniform writeonly image2D IMG;
layout(location = 0) rayPayloadEXT _14 _16;

void main()
{
    traceRayEXT(AS, 273u, 0u, 1u, 2u, 3u, vec3(1.0, 2.0, 3.0), 7.0, vec3(4.0, 5.0, 6.0), 8.0, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_16._m0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %16
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %14 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %9 4
%14 = OpTypeStruct %13
%15 = OpTypePointer RayPayloadKHR %14
%16 = OpVariable %15 RayPayloadKHR
%18 = OpTypeInt 32 0
%19 = OpConstant %18 273
%20 = OpConstant %18 0
%21 = OpConstant %18 1
%22 = OpConstant %18 2
%23 = OpConstant %18 3
%24 = OpConstant %9 1
%25 = OpConstant %9 4
%26 = OpConstant %9 2
%27 = OpConstant %9 5
%28 = OpConstant %9 3
%29 = OpConstant %9 6
%30 = OpConstant %9 7
%31 = OpConstant %9 8
%32 = OpTypeVector %9 3
%35 = OpTypePointer RayPayloadKHR %13
%43 = OpTypeVector %18 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%17 = OpLoad %6 %8
%33 = OpCompositeConstruct %32 %24 %26 %28
%34 = OpCompositeConstruct %32 %25 %27 %29
OpTraceRayKHR %17 %19 %20 %21 %22 %23 %33 %30 %34 %31 %16
%36 = OpInBoundsAccessChain %35 %16 %20
%37 = OpLoad %13 %36
%38 = OpLoad %10 %12
%39 = OpCompositeExtract %9 %37 0
%40 = OpCompositeExtract %9 %37 1
%41 = OpCompositeExtract %9 %37 2
%42 = OpCompositeExtract %9 %37 3
%44 = OpCompositeConstruct %43 %20 %20
%45 = OpCompositeConstruct %13 %39 %40 %41 %42
OpImageWrite %38 %44 %45
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/trace-ray.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _14
{
    vec4 _m0;
};

layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
layout(set = 0, binding = 0) uniform writeonly image2D IMG;
layout(location = 0) rayPayloadEXT _14 _16;

void main()
{
    traceRayEXT(AS, 17u, 0u, 1u, 2u, 3u, vec3(1.0, 2.0, 3.0), 7.0, vec3(4.0, 5.0, 6.0), 8.0, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_16._m0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %16
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %14 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %9 4
%14 = OpTypeStruct %13
%15 = OpTypePointer RayPayloadKHR %14
%16 = OpVariable %15 RayPayloadKHR
%18 = OpTypeInt 32 0
%19 = OpConstant %18 17
%20 = OpConstant %18 0
%21 = OpConstant %18 1
%22 = OpConstant %18 2
%23 = OpConstant %18 3
%24 = OpConstant %9 1
%25 = OpConstant %9 4
%26 = OpConstant %9 2
%27 = OpConstant %9 5
%28 = OpConstant %9 3
%29 = OpConstant %9 6
%30 = OpConstant %9 7
%31 = OpConstant %9 8
%32 = OpTypeVector %9 3
%35 = OpTypePointer RayPayloadKHR %13
%43 = OpTypeVector %18 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%17 = OpLoad %6 %8
%33 = OpCompositeConstruct %32 %24 %26 %28
%34 = OpCompositeConstruct %32 %25 %27 %29
OpTraceRayKHR %17 %19 %20 %21 %22 %23 %33 %30 %34 %31 %16
%36 = OpInBoundsAccessChain %35 %16 %20
%37 = OpLoad %13 %36
%38 = OpLoad %10 %12
%39 = OpCompositeExtract %9 %37 0
%40 = OpCompositeExtract %9 %37 1
%41 = OpCompositeExtract %9 %37 2
%42 = OpCompositeExtract %9 %37 3
%44 = OpCompositeConstruct %43 %20 %20
%45 = OpCompositeConstruct %13 %39 %40 %41 %42
OpImageWrite %38 %44 %45
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/umad.frag
================================================
#version 460

layout(location = 0) flat in uvec3 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x * A.y) + A.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 25
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 3
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %23
%23 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpIMul %5 %14 %17
%22 = OpIAdd %5 %21 %20
OpStore %10 %22
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/umax.frag
================================================
#version 460

layout(location = 0) flat in uvec2 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = max(A.x, A.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
%18 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%19 = OpExtInst %5 %18 UMax %14 %17
OpStore %10 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/umin.frag
================================================
#version 460

layout(location = 0) flat in uvec2 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = min(A.x, A.y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
%18 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%19 = OpExtInst %5 %18 UMin %14 %17
OpStore %10 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/vertex-id.vert
================================================
#version 460

void main()
{
    float _16 = float(uint(gl_VertexIndex) - uint(gl_BaseVertex));
    gl_Position.x = _16;
    gl_Position.y = _16;
    gl_Position.z = _16;
    gl_Position.w = _16;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability DrawParameters
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "SV_VertexID"
OpName %11 "SV_Position"
OpDecorate %7 BuiltIn VertexIndex
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn BaseVertex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%13 = OpVariable %6 Input
%17 = OpTypePointer Output %8
%19 = OpConstant %5 0
%21 = OpConstant %5 1
%23 = OpConstant %5 2
%25 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%12 = OpLoad %5 %7
%14 = OpLoad %5 %13
%15 = OpISub %5 %12 %14
%16 = OpConvertUToF %8 %15
%18 = OpAccessChain %17 %11 %19
OpStore %18 %16
%20 = OpAccessChain %17 %11 %21
OpStore %20 %16
%22 = OpAccessChain %17 %11 %23
OpStore %22 %16
%24 = OpAccessChain %17 %11 %25
OpStore %24 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-all-true.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    if (subgroupAll(gl_GlobalInvocationID.x < 100u))
    {
        imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(1u));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 31
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%19 = OpConstant %5 100
%21 = OpConstant %5 3
%23 = OpConstant %5 2
%24 = OpConstant %5 1
%25 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpULessThan %17 %16 %19
%20 = OpGroupNonUniformAll %17 %21 %18
OpSelectionMerge %29 None
OpBranchConditional %20 %28 %29
%28 = OpLabel
%22 = OpShiftLeftLogical %5 %16 %23
%26 = OpCompositeConstruct %25 %24 %24 %24 %24
OpImageWrite %9 %16 %26
OpBranch %29
%29 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-all-true.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_vote : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    if (subgroupAll((INDEX < 100u) || (gl_HelperInvocation || discard_state)))
    {
        imageStore(_8, int(INDEX), uvec4(1u));
    }
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10 %37
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 "INDEX"
OpName %17 "discard_state"
OpName %40 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %10 Flat
OpDecorate %10 Location 0
OpDecorate %37 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%13 = OpTypeBool
%15 = OpConstant %5 40
%16 = OpTypePointer Private %13
%17 = OpVariable %16 Private
%18 = OpConstantFalse %13
%20 = OpConstant %5 100
%22 = OpConstant %5 3
%26 = OpConstant %5 2
%27 = OpConstant %5 1
%28 = OpTypeVector %5 4
%35 = OpConstantTrue %13
%36 = OpTypePointer Input %13
%37 = OpVariable %36 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %17 %18
OpBranch %30
%30 = OpLabel
%11 = OpLoad %6 %8
%12 = OpLoad %5 %10
%14 = OpIEqual %13 %12 %15
OpSelectionMerge %32 None
OpBranchConditional %14 %31 %32
%31 = OpLabel
OpStore %17 %35
OpBranch %32
%32 = OpLabel
%19 = OpULessThan %13 %12 %20
%38 = OpLoad %13 %37
%39 = OpLoad %13 %17
%23 = OpLogicalOr %13 %38 %39
%24 = OpLogicalOr %13 %19 %23
%21 = OpGroupNonUniformAll %13 %22 %24
OpSelectionMerge %34 None
OpBranchConditional %21 %33 %34
%33 = OpLabel
%25 = OpShiftLeftLogical %5 %12 %26
%29 = OpCompositeConstruct %28 %27 %27 %27 %27
OpImageWrite %11 %12 %29
OpBranch %34
%34 = OpLabel
%46 = OpFunctionCall %1 %40
OpReturn
OpFunctionEnd
%40 = OpFunction %1 None %2
%41 = OpLabel
%44 = OpLoad %13 %17
OpSelectionMerge %43 None
OpBranchConditional %44 %42 %43
%42 = OpLabel
OpKill
%43 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-any-true.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    if (subgroupAny(gl_GlobalInvocationID.x < 100u))
    {
        imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(1u));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 31
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%19 = OpConstant %5 100
%21 = OpConstant %5 3
%23 = OpConstant %5 2
%24 = OpConstant %5 1
%25 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpULessThan %17 %16 %19
%20 = OpGroupNonUniformAny %17 %21 %18
OpSelectionMerge %29 None
OpBranchConditional %20 %28 %29
%28 = OpLabel
%22 = OpShiftLeftLogical %5 %16 %23
%26 = OpCompositeConstruct %25 %24 %24 %24 %24
OpImageWrite %9 %16 %26
OpBranch %29
%29 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-any-true.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_vote : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    if (subgroupAny((INDEX < 100u) && (!(gl_HelperInvocation || discard_state))))
    {
        imageStore(_8, int(INDEX), uvec4(1u));
    }
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10 %38
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 "INDEX"
OpName %17 "discard_state"
OpName %41 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %10 Flat
OpDecorate %10 Location 0
OpDecorate %38 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%13 = OpTypeBool
%15 = OpConstant %5 40
%16 = OpTypePointer Private %13
%17 = OpVariable %16 Private
%18 = OpConstantFalse %13
%20 = OpConstant %5 100
%22 = OpConstant %5 3
%27 = OpConstant %5 2
%28 = OpConstant %5 1
%29 = OpTypeVector %5 4
%36 = OpConstantTrue %13
%37 = OpTypePointer Input %13
%38 = OpVariable %37 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %17 %18
OpBranch %31
%31 = OpLabel
%11 = OpLoad %6 %8
%12 = OpLoad %5 %10
%14 = OpIEqual %13 %12 %15
OpSelectionMerge %33 None
OpBranchConditional %14 %32 %33
%32 = OpLabel
OpStore %17 %36
OpBranch %33
%33 = OpLabel
%19 = OpULessThan %13 %12 %20
%39 = OpLoad %13 %38
%40 = OpLoad %13 %17
%23 = OpLogicalOr %13 %39 %40
%24 = OpLogicalNot %13 %23
%25 = OpLogicalAnd %13 %19 %24
%21 = OpGroupNonUniformAny %13 %22 %25
OpSelectionMerge %35 None
OpBranchConditional %21 %34 %35
%34 = OpLabel
%26 = OpShiftLeftLogical %5 %12 %27
%30 = OpCompositeConstruct %29 %28 %28 %28 %28
OpImageWrite %11 %12 %30
OpBranch %35
%35 = OpLabel
%47 = OpFunctionCall %1 %41
OpReturn
OpFunctionEnd
%41 = OpFunction %1 None %2
%42 = OpLabel
%45 = OpLoad %13 %17
OpSelectionMerge %44 None
OpBranchConditional %45 %43 %44
%43 = OpLabel
OpKill
%44 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag
================================================
#version 460
#extension GL_EXT_demote_to_helper_invocation : require
#extension GL_KHR_shader_subgroup_ballot : require

layout(location = 0) flat in uint INDEX;
layout(location = 0) out uvec4 SV_Target;

void main()
{
    if (INDEX == 40u)
    {
        demote;
    }
    bool _17 = helperInvocationEXT();
    uvec4 _20 = subgroupBallot((INDEX < 100u) && (!_17));
    SV_Target.x = _20.x;
    SV_Target.y = _20.y;
    SV_Target.z = _20.z;
    SV_Target.w = _20.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpCapability DemoteToHelperInvocation
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "INDEX"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%12 = OpTypeBool
%14 = OpConstant %5 40
%16 = OpConstant %5 100
%21 = OpConstant %5 3
%26 = OpTypePointer Output %5
%28 = OpConstant %5 0
%30 = OpConstant %5 1
%32 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%11 = OpLoad %5 %7
%13 = OpIEqual %12 %11 %14
OpSelectionMerge %36 None
OpBranchConditional %13 %35 %36
%35 = OpLabel
OpDemoteToHelperInvocation
OpBranch %36
%36 = OpLabel
%15 = OpULessThan %12 %11 %16
%17 = OpIsHelperInvocationEXT %12
%18 = OpLogicalNot %12 %17
%19 = OpLogicalAnd %12 %15 %18
%20 = OpGroupNonUniformBallot %8 %21 %19
%22 = OpCompositeExtract %5 %20 0
%23 = OpCompositeExtract %5 %20 1
%24 = OpCompositeExtract %5 %20 2
%25 = OpCompositeExtract %5 %20 3
%27 = OpAccessChain %26 %10 %28
OpStore %27 %22
%29 = OpAccessChain %26 %10 %30
OpStore %29 %23
%31 = OpAccessChain %26 %10 %32
OpStore %31 %24
%33 = OpAccessChain %26 %10 %21
OpStore %33 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-ballot-discard.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require

layout(location = 0) flat in uint INDEX;
layout(location = 0) out uvec4 SV_Target;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    uvec4 _23 = subgroupBallot((INDEX < 100u) && (!(gl_HelperInvocation || discard_state)));
    SV_Target.x = _23.x;
    SV_Target.y = _23.y;
    SV_Target.z = _23.z;
    SV_Target.w = _23.w;
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 53
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10 %42
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "INDEX"
OpName %10 "SV_Target"
OpName %16 "discard_state"
OpName %45 "discard_exit"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
OpDecorate %42 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%12 = OpTypeBool
%14 = OpConstant %5 40
%15 = OpTypePointer Private %12
%16 = OpVariable %15 Private
%17 = OpConstantFalse %12
%19 = OpConstant %5 100
%24 = OpConstant %5 3
%29 = OpTypePointer Output %5
%31 = OpConstant %5 0
%33 = OpConstant %5 1
%35 = OpConstant %5 2
%40 = OpConstantTrue %12
%41 = OpTypePointer Input %12
%42 = OpVariable %41 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %16 %17
OpBranch %37
%37 = OpLabel
%11 = OpLoad %5 %7
%13 = OpIEqual %12 %11 %14
OpSelectionMerge %39 None
OpBranchConditional %13 %38 %39
%38 = OpLabel
OpStore %16 %40
OpBranch %39
%39 = OpLabel
%18 = OpULessThan %12 %11 %19
%43 = OpLoad %12 %42
%44 = OpLoad %12 %16
%20 = OpLogicalOr %12 %43 %44
%21 = OpLogicalNot %12 %20
%22 = OpLogicalAnd %12 %18 %21
%23 = OpGroupNonUniformBallot %8 %24 %22
%25 = OpCompositeExtract %5 %23 0
%26 = OpCompositeExtract %5 %23 1
%27 = OpCompositeExtract %5 %23 2
%28 = OpCompositeExtract %5 %23 3
%30 = OpAccessChain %29 %10 %31
OpStore %30 %25
%32 = OpAccessChain %29 %10 %33
OpStore %32 %26
%34 = OpAccessChain %29 %10 %35
OpStore %34 %27
%36 = OpAccessChain %29 %10 %24
OpStore %36 %28
%51 = OpFunctionCall %1 %45
OpReturn
OpFunctionEnd
%45 = OpFunction %1 None %2
%46 = OpLabel
%49 = OpLoad %12 %16
OpSelectionMerge %48 None
OpBranchConditional %49 %47 %48
%47 = OpLabel
OpKill
%48 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-ballot.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uvec4 _21 = subgroupBallot(gl_GlobalInvocationID.x < 100u);
    uint _29 = gl_GlobalInvocationID.x * 4u;
    imageStore(_8, int(_29), uvec4(_21.x));
    imageStore(_8, int(_29 + 1u), uvec4(_21.y));
    imageStore(_8, int(_29 + 2u), uvec4(_21.z));
    imageStore(_8, int(_29 + 3u), uvec4(_21.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%19 = OpConstant %5 100
%20 = OpTypeVector %5 4
%22 = OpConstant %5 3
%28 = OpConstant %5 4
%33 = OpConstant %5 1
%36 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpULessThan %17 %16 %19
%21 = OpGroupNonUniformBallot %20 %22 %18
%23 = OpCompositeExtract %5 %21 0
%24 = OpCompositeExtract %5 %21 1
%25 = OpCompositeExtract %5 %21 2
%26 = OpCompositeExtract %5 %21 3
%27 = OpShiftLeftLogical %5 %16 %28
%29 = OpIMul %5 %16 %28
%30 = OpCompositeConstruct %20 %23 %23 %23 %23
OpImageWrite %9 %29 %30
%31 = OpCompositeConstruct %20 %24 %24 %24 %24
%32 = OpIAdd %5 %29 %33
OpImageWrite %9 %32 %31
%34 = OpCompositeConstruct %20 %25 %25 %25 %25
%35 = OpIAdd %5 %29 %36
OpImageWrite %9 %35 %34
%37 = OpCompositeConstruct %20 %26 %26 %26 %26
%38 = OpIAdd %5 %29 %22
OpImageWrite %9 %38 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-ballot.demote-to-helper.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require

layout(location = 0) flat in uint INDEX;
layout(location = 0) out uvec4 SV_Target;

void main()
{
    uvec4 _18 = subgroupBallot((INDEX < 100u) && (!gl_HelperInvocation));
    SV_Target.x = _18.x;
    SV_Target.y = _18.y;
    SV_Target.z = _18.z;
    SV_Target.w = _18.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10 %34
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "INDEX"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
OpDecorate %34 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%12 = OpTypeBool
%14 = OpConstant %5 100
%19 = OpConstant %5 3
%24 = OpTypePointer Output %5
%26 = OpConstant %5 0
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%33 = OpTypePointer Input %12
%34 = OpVariable %33 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%11 = OpLoad %5 %7
%13 = OpULessThan %12 %11 %14
%15 = OpLoad %12 %34
%16 = OpLogicalNot %12 %15
%17 = OpLogicalAnd %12 %13 %16
%18 = OpGroupNonUniformBallot %8 %19 %17
%20 = OpCompositeExtract %5 %18 0
%21 = OpCompositeExtract %5 %18 1
%22 = OpCompositeExtract %5 %18 2
%23 = OpCompositeExtract %5 %18 3
%25 = OpAccessChain %24 %10 %26
OpStore %25 %20
%27 = OpAccessChain %24 %10 %28
OpStore %27 %21
%29 = OpAccessChain %24 %10 %30
OpStore %29 %22
%31 = OpAccessChain %24 %10 %19
OpStore %31 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-ballot.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require

layout(location = 0) flat in uint INDEX;
layout(location = 0) out uvec4 SV_Target;

void main()
{
    uvec4 _18 = subgroupBallot((INDEX < 100u) && (!gl_HelperInvocation));
    SV_Target.x = _18.x;
    SV_Target.y = _18.y;
    SV_Target.z = _18.z;
    SV_Target.w = _18.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10 %34
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "INDEX"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
OpDecorate %34 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%12 = OpTypeBool
%14 = OpConstant %5 100
%19 = OpConstant %5 3
%24 = OpTypePointer Output %5
%26 = OpConstant %5 0
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%33 = OpTypePointer Input %12
%34 = OpVariable %33 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%11 = OpLoad %5 %7
%13 = OpULessThan %12 %11 %14
%15 = OpLoad %12 %34
%16 = OpLogicalNot %12 %15
%17 = OpLogicalAnd %12 %13 %16
%18 = OpGroupNonUniformBallot %8 %19 %17
%20 = OpCompositeExtract %5 %18 0
%21 = OpCompositeExtract %5 %18 1
%22 = OpCompositeExtract %5 %18 2
%23 = OpCompositeExtract %5 %18 3
%25 = OpAccessChain %24 %10 %26
OpStore %25 %20
%27 = OpAccessChain %24 %10 %28
OpStore %27 %21
%29 = OpAccessChain %24 %10 %30
OpStore %29 %22
%31 = OpAccessChain %24 %10 %19
OpStore %31 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-count-bits.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(subgroupBallotBitCount(subgroupBallot(gl_GlobalInvocationID.x < 100u))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%19 = OpConstant %5 100
%20 = OpTypeVector %5 4
%22 = OpConstant %5 3
%25 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpULessThan %17 %16 %19
%21 = OpGroupNonUniformBallot %20 %22 %18
%23 = OpGroupNonUniformBallotBitCount %5 %22 Reduce %21
%24 = OpShiftLeftLogical %5 %16 %25
%26 = OpCompositeConstruct %20 %23 %23 %23 %23
OpImageWrite %9 %16 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-active-count-bits.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    imageStore(_8, int(INDEX), uvec4(subgroupBallotBitCount(subgroupBallot((INDEX < 100u) && (!(gl_HelperInvocation || discard_state))))));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10 %36
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 "INDEX"
OpName %17 "discard_state"
OpName %39 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %10 Flat
OpDecorate %10 Location 0
OpDecorate %36 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%13 = OpTypeBool
%15 = OpConstant %5 40
%16 = OpTypePointer Private %13
%17 = OpVariable %16 Private
%18 = OpConstantFalse %13
%20 = OpConstant %5 100
%21 = OpTypeVector %5 4
%23 = OpConstant %5 3
%29 = OpConstant %5 2
%34 = OpConstantTrue %13
%35 = OpTypePointer Input %13
%36 = OpVariable %35 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %17 %18
OpBranch %31
%31 = OpLabel
%11 = OpLoad %6 %8
%12 = OpLoad %5 %10
%14 = OpIEqual %13 %12 %15
OpSelectionMerge %33 None
OpBranchConditional %14 %32 %33
%32 = OpLabel
OpStore %17 %34
OpBranch %33
%33 = OpLabel
%19 = OpULessThan %13 %12 %20
%37 = OpLoad %13 %36
%38 = OpLoad %13 %17
%24 = OpLogicalOr %13 %37 %38
%25 = OpLogicalNot %13 %24
%26 = OpLogicalAnd %13 %19 %25
%22 = OpGroupNonUniformBallot %21 %23 %26
%27 = OpGroupNonUniformBallotBitCount %5 %23 Reduce %22
%28 = OpShiftLeftLogical %5 %12 %29
%30 = OpCompositeConstruct %21 %27 %27 %27 %27
OpImageWrite %11 %12 %30
%45 = OpFunctionCall %1 %39
OpReturn
OpFunctionEnd
%39 = OpFunction %1 None %2
%40 = OpLabel
%43 = OpLoad %13 %17
OpSelectionMerge %42 None
OpBranchConditional %43 %41 %42
%41 = OpLabel
OpKill
%42 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-all-equal.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    if (subgroupAllEqual(gl_GlobalInvocationID.x))
    {
        imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(1u));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeBool
%19 = OpConstant %5 3
%21 = OpConstant %5 2
%22 = OpConstant %5 1
%23 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpGroupNonUniformAllEqual %17 %19 %16
OpSelectionMerge %27 None
OpBranchConditional %18 %26 %27
%26 = OpLabel
%20 = OpShiftLeftLogical %5 %16 %21
%24 = OpCompositeConstruct %23 %22 %22 %22 %22
OpImageWrite %9 %16 %24
OpBranch %27
%27 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-all-equal.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_vote : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    if (subgroupAllEqual(INDEX))
    {
        imageStore(_8, int(INDEX), uvec4(1u));
    }
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 "INDEX"
OpName %17 "discard_state"
OpName %32 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %10 Flat
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%13 = OpTypeBool
%15 = OpConstant %5 40
%16 = OpTypePointer Private %13
%17 = OpVariable %16 Private
%18 = OpConstantFalse %13
%20 = OpConstant %5 3
%22 = OpConstant %5 2
%23 = OpConstant %5 1
%24 = OpTypeVector %5 4
%31 = OpConstantTrue %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %17 %18
OpBranch %26
%26 = OpLabel
%11 = OpLoad %6 %8
%12 = OpLoad %5 %10
%14 = OpIEqual %13 %12 %15
OpSelectionMerge %28 None
OpBranchConditional %14 %27 %28
%27 = OpLabel
OpStore %17 %31
OpBranch %28
%28 = OpLabel
%19 = OpGroupNonUniformAllEqual %13 %20 %12
OpSelectionMerge %30 None
OpBranchConditional %19 %29 %30
%29 = OpLabel
%21 = OpShiftLeftLogical %5 %12 %22
%25 = OpCompositeConstruct %24 %23 %23 %23 %23
OpImageWrite %11 %12 %25
OpBranch %30
%30 = OpLabel
%38 = OpFunctionCall %1 %32
OpReturn
OpFunctionEnd
%32 = OpFunction %1 None %2
%33 = OpLabel
%36 = OpLoad %13 %17
OpSelectionMerge %35 None
OpBranchConditional %36 %34 %35
%34 = OpLabel
OpKill
%35 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-get-lane-count.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(0u), uvec4(gl_SubgroupSize));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 18
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %11
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 BuiltIn SubgroupSize
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%13 = OpConstant %5 0
%14 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %16
%16 = OpLabel
%9 = OpLoad %6 %8
%12 = OpLoad %5 %11
%15 = OpCompositeConstruct %14 %12 %12 %12 %12
OpImageWrite %9 %13 %15
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-get-lane-index.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(0u), uvec4(gl_SubgroupInvocationID));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 18
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %11
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%13 = OpConstant %5 0
%14 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %16
%16 = OpLabel
%9 = OpLoad %6 %8
%12 = OpLoad %5 %11
%15 = OpCompositeConstruct %14 %12 %12 %12 %12
OpImageWrite %9 %13 %15
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-is-first-lane.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    if (subgroupElect())
    {
        imageStore(_8, int(0u), uvec4(1u));
    }
    else
    {
        imageStore(_8, int(0u), uvec4(0u));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeBool
%12 = OpConstant %5 3
%13 = OpConstant %5 0
%14 = OpConstant %5 1
%15 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %18
%18 = OpLabel
%9 = OpLoad %6 %8
%11 = OpGroupNonUniformElect %10 %12
OpSelectionMerge %21 None
OpBranchConditional %11 %20 %19
%20 = OpLabel
%16 = OpCompositeConstruct %15 %14 %14 %14 %14
OpImageWrite %9 %13 %16
OpBranch %21
%19 = OpLabel
%17 = OpCompositeConstruct %15 %13 %13 %13 %13
OpImageWrite %9 %13 %17
OpBranch %21
%21 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-is-first-lane.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

layout(location = 0) flat in uint THR;
bool discard_state;

bool WaveIsFirstLane(bool _21)
{
    bool _29;
    if (_21)
    {
        _29 = false;
    }
    else
    {
        _29 = subgroupElect();
    }
    return _29;
}

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (THR == 40u)
    {
        discard_state = true;
    }
    if (WaveIsFirstLane(gl_HelperInvocation || discard_state))
    {
        imageStore(_8, int(0u), uvec4(1u));
    }
    else
    {
        imageStore(_8, int(0u), uvec4(0u));
    }
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10 %45
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 "THR"
OpName %17 "discard_state"
OpName %22 "WaveIsFirstLane"
OpName %48 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %10 Flat
OpDecorate %10 Location 0
OpDecorate %45 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%13 = OpTypeBool
%15 = OpConstant %5 40
%16 = OpTypePointer Private %13
%17 = OpVariable %16 Private
%18 = OpConstantFalse %13
%20 = OpTypeFunction %13 %13
%28 = OpConstant %5 3
%32 = OpConstant %5 0
%33 = OpConstant %5 1
%34 = OpTypeVector %5 4
%43 = OpConstantTrue %13
%44 = OpTypePointer Input %13
%45 = OpVariable %44 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %17 %18
OpBranch %37
%37 = OpLabel
%11 = OpLoad %6 %8
%12 = OpLoad %5 %10
%14 = OpIEqual %13 %12 %15
OpSelectionMerge %39 None
OpBranchConditional %14 %38 %39
%38 = OpLabel
OpStore %17 %43
OpBranch %39
%39 = OpLabel
%46 = OpLoad %13 %45
%47 = OpLoad %13 %17
%19 = OpLogicalOr %13 %46 %47
%31 = OpFunctionCall %13 %22 %19
OpSelectionMerge %42 None
OpBranchConditional %31 %41 %40
%41 = OpLabel
%35 = OpCompositeConstruct %34 %33 %33 %33 %33
OpImageWrite %11 %32 %35
OpBranch %42
%40 = OpLabel
%36 = OpCompositeConstruct %34 %32 %32 %32 %32
OpImageWrite %11 %32 %36
OpBranch %42
%42 = OpLabel
%54 = OpFunctionCall %1 %48
OpReturn
OpFunctionEnd
%22 = OpFunction %13 None %20
%21 = OpFunctionParameter %13
%23 = OpLabel
OpSelectionMerge %26 None
OpBranchConditional %21 %24 %25
%24 = OpLabel
OpBranch %26
%25 = OpLabel
%27 = OpGroupNonUniformElect %13 %28
OpBranch %26
%26 = OpLabel
%29 = OpPhi %13 %27 %25 %18 %24
OpReturnValue %29
OpFunctionEnd
%48 = OpFunction %1 None %2
%49 = OpLabel
%52 = OpLoad %13 %17
OpSelectionMerge %51 None
OpBranchConditional %52 %50 %51
%50 = OpLabel
OpKill
%51 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-match.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _11;

uvec4 WaveMatch(uint _26)
{
    uvec4 _35;
    for (;;)
    {
        bool _34 = _26 == subgroupBroadcastFirst(_26);
        _35 = subgroupBallot(_34);
        if (_34)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _35;
}

void main()
{
    uvec4 _37 = WaveMatch(texelFetch(_8, int(gl_GlobalInvocationID.x)).x);
    uvec4 _47 = WaveMatch(floatBitsToUint(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x)));
    uvec4 _64 = WaveMatch(floatBitsToUint(float(texelFetch(_8, int(gl_GlobalInvocationID.x)).x != 20u)));
    uint _73 = gl_GlobalInvocationID.x * 4u;
    imageStore(_11, int(_73), uvec4((_47.x | _37.x) | _64.x));
    imageStore(_11, int(_73 + 1u), uvec4((_47.y | _37.y) | _64.y));
    imageStore(_11, int(_73 + 2u), uvec4((_47.z | _37.z) | _64.z));
    imageStore(_11, int(_73 + 3u), uvec4((_47.w | _37.w) | _64.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 86
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %27 "WaveMatch"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypeVector %5 4
%24 = OpTypeBool
%25 = OpTypeFunction %21 %5
%33 = OpConstant %5 3
%44 = OpTypeFloat 32
%59 = OpConstant %5 20
%60 = OpConstant %44 0
%61 = OpConstant %44 1
%74 = OpConstant %5 4
%78 = OpConstant %5 1
%81 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %84
%84 = OpLabel
%12 = OpLoad %9 %11
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpImageFetch %21 %13 %20
%23 = OpCompositeExtract %5 %22 0
%37 = OpFunctionCall %21 %27 %23
%38 = OpCompositeExtract %5 %37 0
%39 = OpCompositeExtract %5 %37 1
%40 = OpCompositeExtract %5 %37 2
%41 = OpCompositeExtract %5 %37 3
%42 = OpImageFetch %21 %13 %20
%43 = OpCompositeExtract %5 %42 0
%45 = OpBitcast %44 %43
%46 = OpBitcast %5 %45
%47 = OpFunctionCall %21 %27 %46
%48 = OpCompositeExtract %5 %47 0
%49 = OpCompositeExtract %5 %47 1
%50 = OpCompositeExtract %5 %47 2
%51 = OpCompositeExtract %5 %47 3
%52 = OpBitwiseOr %5 %48 %38
%53 = OpBitwiseOr %5 %49 %39
%54 = OpBitwiseOr %5 %50 %40
%55 = OpBitwiseOr %5 %51 %41
%56 = OpImageFetch %21 %13 %20
%57 = OpCompositeExtract %5 %56 0
%58 = OpINotEqual %24 %57 %59
%62 = OpSelect %44 %58 %61 %60
%63 = OpBitcast %5 %62
%64 = OpFunctionCall %21 %27 %63
%65 = OpCompositeExtract %5 %64 0
%66 = OpCompositeExtract %5 %64 1
%67 = OpCompositeExtract %5 %64 2
%68 = OpCompositeExtract %5 %64 3
%69 = OpBitwiseOr %5 %52 %65
%70 = OpBitwiseOr %5 %53 %66
%71 = OpBitwiseOr %5 %54 %67
%72 = OpBitwiseOr %5 %55 %68
%73 = OpIMul %5 %20 %74
%75 = OpCompositeConstruct %21 %69 %69 %69 %69
OpImageWrite %12 %73 %75
%76 = OpCompositeConstruct %21 %70 %70 %70 %70
%77 = OpIAdd %5 %73 %78
OpImageWrite %12 %77 %76
%79 = OpCompositeConstruct %21 %71 %71 %71 %71
%80 = OpIAdd %5 %73 %81
OpImageWrite %12 %80 %79
%82 = OpCompositeConstruct %21 %72 %72 %72 %72
%83 = OpIAdd %5 %73 %33
OpImageWrite %12 %83 %82
OpReturn
OpFunctionEnd
%27 = OpFunction %21 None %25
%26 = OpFunctionParameter %5
%28 = OpLabel
OpBranch %29
%29 = OpLabel
OpLoopMerge %31 %30 None
OpBranch %30
%30 = OpLabel
%32 = OpGroupNonUniformBroadcastFirst %5 %33 %26
%34 = OpIEqual %24 %26 %32
%35 = OpGroupNonUniformBallot %21 %33 %34
OpBranchConditional %34 %31 %29
%31 = OpLabel
OpReturnValue %35
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-match.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require

uvec4 _36;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _11;

layout(location = 0) flat in uint THR;
bool discard_state;

uvec4 WaveMatch(uint _27, bool _28)
{
    uvec4 _41;
    if (_28)
    {
        _41 = _36;
    }
    else
    {
        uvec4 _40;
        for (;;)
        {
            bool _39 = _27 == subgroupBroadcastFirst(_27);
            _40 = subgroupBallot(_39);
            if (_39)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _41 = _40;
    }
    return _41;
}

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (THR == 40u)
    {
        discard_state = true;
    }
    uvec4 _43 = WaveMatch(texelFetch(_8, int(THR)).x, gl_HelperInvocation || discard_state);
    uvec4 _54 = WaveMatch(floatBitsToUint(uintBitsToFloat(texelFetch(_8, int(THR)).x)), gl_HelperInvocation || discard_state);
    uvec4 _72 = WaveMatch(floatBitsToUint(float(texelFetch(_8, int(THR)).x != 20u)), gl_HelperInvocation || discard_state);
    uint _82 = THR * 4u;
    imageStore(_11, int(_82), uvec4((_54.x | _43.x) | _72.x));
    imageStore(_11, int(_82 + 1u), uvec4((_54.y | _43.y) | _72.y));
    imageStore(_11, int(_82 + 2u), uvec4((_54.z | _43.z) | _72.z));
    imageStore(_11, int(_82 + 3u), uvec4((_54.w | _43.w) | _72.w));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 113
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %98
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "THR"
OpName %21 "discard_state"
OpName %29 "WaveMatch"
OpName %105 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %98 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%17 = OpTypeBool
%19 = OpConstant %5 40
%20 = OpTypePointer Private %17
%21 = OpVariable %20 Private
%22 = OpConstantFalse %17
%23 = OpTypeVector %5 4
%26 = OpTypeFunction %23 %5 %17
%38 = OpConstant %5 3
%51 = OpTypeFloat 32
%67 = OpConstant %5 20
%68 = OpConstant %51 0
%69 = OpConstant %51 1
%83 = OpConstant %5 4
%87 = OpConstant %5 1
%90 = OpConstant %5 2
%96 = OpConstantTrue %17
%97 = OpTypePointer Input %17
%98 = OpVariable %97 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %21 %22
OpBranch %93
%93 = OpLabel
%14 = OpLoad %9 %11
%15 = OpLoad %6 %8
%16 = OpLoad %5 %13
%18 = OpIEqual %17 %16 %19
OpSelectionMerge %95 None
OpBranchConditional %18 %94 %95
%94 = OpLabel
OpStore %21 %96
OpBranch %95
%95 = OpLabel
%24 = OpImageFetch %23 %15 %16
%25 = OpCompositeExtract %5 %24 0
%99 = OpLoad %17 %98
%100 = OpLoad %17 %21
%44 = OpLogicalOr %17 %99 %100
%43 = OpFunctionCall %23 %29 %25 %44
%45 = OpCompositeExtract %5 %43 0
%46 = OpCompositeExtract %5 %43 1
%47 = OpCompositeExtract %5 %43 2
%48 = OpCompositeExtract %5 %43 3
%49 = OpImageFetch %23 %15 %16
%50 = OpCompositeExtract %5 %49 0
%52 = OpBitcast %51 %50
%53 = OpBitcast %5 %52
%101 = OpLoad %17 %98
%102 = OpLoad %17 %21
%55 = OpLogicalOr %17 %101 %102
%54 = OpFunctionCall %23 %29 %53 %55
%56 = OpCompositeExtract %5 %54 0
%57 = OpCompositeExtract %5 %54 1
%58 = OpCompositeExtract %5 %54 2
%59 = OpCompositeExtract %5 %54 3
%60 = OpBitwiseOr %5 %56 %45
%61 = OpBitwiseOr %5 %57 %46
%62 = OpBitwiseOr %5 %58 %47
%63 = OpBitwiseOr %5 %59 %48
%64 = OpImageFetch %23 %15 %16
%65 = OpCompositeExtract %5 %64 0
%66 = OpINotEqual %17 %65 %67
%70 = OpSelect %51 %66 %69 %68
%71 = OpBitcast %5 %70
%103 = OpLoad %17 %98
%104 = OpLoad %17 %21
%73 = OpLogicalOr %17 %103 %104
%72 = OpFunctionCall %23 %29 %71 %73
%74 = OpCompositeExtract %5 %72 0
%75 = OpCompositeExtract %5 %72 1
%76 = OpCompositeExtract %5 %72 2
%77 = OpCompositeExtract %5 %72 3
%78 = OpBitwiseOr %5 %60 %74
%79 = OpBitwiseOr %5 %61 %75
%80 = OpBitwiseOr %5 %62 %76
%81 = OpBitwiseOr %5 %63 %77
%82 = OpIMul %5 %16 %83
%84 = OpCompositeConstruct %23 %78 %78 %78 %78
OpImageWrite %14 %82 %84
%85 = OpCompositeConstruct %23 %79 %79 %79 %79
%86 = OpIAdd %5 %82 %87
OpImageWrite %14 %86 %85
%88 = OpCompositeConstruct %23 %80 %80 %80 %80
%89 = OpIAdd %5 %82 %90
OpImageWrite %14 %89 %88
%91 = OpCompositeConstruct %23 %81 %81 %81 %81
%92 = OpIAdd %5 %82 %38
OpImageWrite %14 %92 %91
%111 = OpFunctionCall %1 %105
OpReturn
OpFunctionEnd
%29 = OpFunction %23 None %26
%27 = OpFunctionParameter %5
%28 = OpFunctionParameter %17
%30 = OpLabel
OpBranch %31
%31 = OpLabel
%36 = OpUndef %23
OpSelectionMerge %34 None
OpBranchConditional %28 %34 %35
%35 = OpLabel
OpLoopMerge %33 %32 None
OpBranch %32
%32 = OpLabel
%37 = OpGroupNonUniformBroadcastFirst %5 %38 %27
%39 = OpIEqual %17 %27 %37
%40 = OpGroupNonUniformBallot %23 %38 %39
OpBranchConditional %39 %33 %35
%33 = OpLabel
OpBranch %34
%34 = OpLabel
%41 = OpPhi %23 %40 %33 %36 %31
OpReturnValue %41
OpFunctionEnd
%105 = OpFunction %1 None %2
%106 = OpLabel
%109 = OpLoad %17 %21
OpSelectionMerge %108 None
OpBranchConditional %109 %107 %108
%107 = OpLabel
OpKill
%108 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-match.partitioned.noglsl.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 75
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformPartitionedEXT
OpExtension "SPV_NV_shader_subgroup_partitioned"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypeVector %5 4
%31 = OpTypeFloat 32
%45 = OpTypeBool
%47 = OpConstant %5 20
%48 = OpConstant %31 0
%49 = OpConstant %31 1
%62 = OpConstant %5 4
%66 = OpConstant %5 1
%69 = OpConstant %5 2
%72 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%12 = OpLoad %9 %11
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpImageFetch %21 %13 %20
%23 = OpCompositeExtract %5 %22 0
%24 = OpGroupNonUniformPartitionEXT %21 %23
%25 = OpCompositeExtract %5 %24 0
%26 = OpCompositeExtract %5 %24 1
%27 = OpCompositeExtract %5 %24 2
%28 = OpCompositeExtract %5 %24 3
%29 = OpImageFetch %21 %13 %20
%30 = OpCompositeExtract %5 %29 0
%32 = OpBitcast %31 %30
%33 = OpBitcast %5 %32
%34 = OpGroupNonUniformPartitionEXT %21 %33
%35 = OpCompositeExtract %5 %34 0
%36 = OpCompositeExtract %5 %34 1
%37 = OpCompositeExtract %5 %34 2
%38 = OpCompositeExtract %5 %34 3
%39 = OpBitwiseOr %5 %35 %25
%40 = OpBitwiseOr %5 %36 %26
%41 = OpBitwiseOr %5 %37 %27
%42 = OpBitwiseOr %5 %38 %28
%43 = OpImageFetch %21 %13 %20
%44 = OpCompositeExtract %5 %43 0
%46 = OpINotEqual %45 %44 %47
%50 = OpSelect %31 %46 %49 %48
%51 = OpBitcast %5 %50
%52 = OpGroupNonUniformPartitionEXT %21 %51
%53 = OpCompositeExtract %5 %52 0
%54 = OpCompositeExtract %5 %52 1
%55 = OpCompositeExtract %5 %52 2
%56 = OpCompositeExtract %5 %52 3
%57 = OpBitwiseOr %5 %39 %53
%58 = OpBitwiseOr %5 %40 %54
%59 = OpBitwiseOr %5 %41 %55
%60 = OpBitwiseOr %5 %42 %56
%61 = OpIMul %5 %20 %62
%63 = OpCompositeConstruct %21 %57 %57 %57 %57
OpImageWrite %12 %61 %63
%64 = OpCompositeConstruct %21 %58 %58 %58 %58
%65 = OpIAdd %5 %61 %66
OpImageWrite %12 %65 %64
%67 = OpCompositeConstruct %21 %59 %59 %59 %59
%68 = OpIAdd %5 %61 %69
OpImageWrite %12 %68 %67
%70 = OpCompositeConstruct %21 %60 %60 %60 %60
%71 = OpIAdd %5 %61 %72
OpImageWrite %12 %71 %70
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/wave-match.partitioned.noglsl.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 85
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformPartitionedEXT
OpExtension "SPV_NV_shader_subgroup_partitioned"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %83
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "THR"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %83 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%17 = OpTypeVector %5 4
%21 = OpTypeBool
%25 = OpConstant %5 3
%33 = OpTypeFloat 32
%52 = OpConstant %5 20
%53 = OpConstant %33 0
%54 = OpConstant %33 1
%71 = OpConstant %5 4
%75 = OpConstant %5 1
%78 = OpConstant %5 2
%82 = OpTypePointer Input %21
%83 = OpVariable %82 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %81
%81 = OpLabel
%14 = OpLoad %9 %11
%15 = OpLoad %6 %8
%16 = OpLoad %5 %13
%18 = OpImageFetch %17 %15 %16
%19 = OpCompositeExtract %5 %18 0
%20 = OpGroupNonUniformPartitionEXT %17 %19
%22 = OpLoad %21 %83
%23 = OpLogicalNot %21 %22
%24 = OpGroupNonUniformBallot %17 %25 %23
%26 = OpBitwiseAnd %17 %20 %24
%27 = OpCompositeExtract %5 %26 0
%28 = OpCompositeExtract %5 %26 1
%29 = OpCompositeExtract %5 %26 2
%30 = OpCompositeExtract %5 %26 3
%31 = OpImageFetch %17 %15 %16
%32 = OpCompositeExtract %5 %31 0
%34 = OpBitcast %33 %32
%35 = OpBitcast %5 %34
%36 = OpGroupNonUniformPartitionEXT %17 %35
%37 = OpLoad %21 %83
%38 = OpLogicalNot %21 %37
%39 = OpGroupNonUniformBallot %17 %25 %38
%40 = OpBitwiseAnd %17 %36 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpBitwiseOr %5 %41 %27
%46 = OpBitwiseOr %5 %42 %28
%47 = OpBitwiseOr %5 %43 %29
%48 = OpBitwiseOr %5 %44 %30
%49 = OpImageFetch %17 %15 %16
%50 = OpCompositeExtract %5 %49 0
%51 = OpINotEqual %21 %50 %52
%55 = OpSelect %33 %51 %54 %53
%56 = OpBitcast %5 %55
%57 = OpGroupNonUniformPartitionEXT %17 %56
%58 = OpLoad %21 %83
%59 = OpLogicalNot %21 %58
%60 = OpGroupNonUniformBallot %17 %25 %59
%61 = OpBitwiseAnd %17 %57 %60
%62 = OpCompositeExtract %5 %61 0
%63 = OpCompositeExtract %5 %61 1
%64 = OpCompositeExtract %5 %61 2
%65 = OpCompositeExtract %5 %61 3
%66 = OpBitwiseOr %5 %45 %62
%67 = OpBitwiseOr %5 %46 %63
%68 = OpBitwiseOr %5 %47 %64
%69 = OpBitwiseOr %5 %48 %65
%70 = OpIMul %5 %16 %71
%72 = OpCompositeConstruct %17 %66 %66 %66 %66
OpImageWrite %14 %70 %72
%73 = OpCompositeConstruct %17 %67 %67 %67 %67
%74 = OpIAdd %5 %70 %75
OpImageWrite %14 %74 %73
%76 = OpCompositeConstruct %17 %68 %68 %68 %68
%77 = OpIAdd %5 %70 %78
OpImageWrite %14 %77 %76
%79 = OpCompositeConstruct %17 %69 %69 %69 %69
%80 = OpIAdd %5 %70 %25
OpImageWrite %14 %80 %79
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/wave-multi-prefix-count-bits.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _9;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _12;

uint WaveMultiPrefixCountBits(bool _52, uvec4 _53)
{
    uvec4 _59 = subgroupBallot(true);
    uvec4 _61 = _59 & _53;
    uint _67;
    for (;;)
    {
        bool _64 = all(equal(_61, subgroupBroadcastFirst(_61)));
        _67 = subgroupBallotExclusiveBitCount(subgroupBallot(_64 && _52));
        if (_64)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _67;
}

void main()
{
    uint _23 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int(gl_GlobalInvocationID.x), uvec4(WaveMultiPrefixCountBits(texelFetch(_8, int(gl_GlobalInvocationID.x)).x != 10u, uvec4(uvec4(texelFetch(_9, int(_23)).x, texelFetch(_9, int(_23 + 1u)).x, texelFetch(_9, int(_23 + 2u)).x, texelFetch(_9, int(_23 + 3u)).x)))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %54 "WaveMultiPrefixCountBits"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%16 = OpTypeVector %5 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %5
%21 = OpConstant %5 0
%24 = OpConstant %5 4
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%47 = OpTypeBool
%49 = OpConstant %5 10
%50 = OpTypeVector %47 4
%51 = OpTypeFunction %5 %47 %25
%60 = OpConstantTrue %47
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %72
%72 = OpLabel
%13 = OpLoad %10 %12
%14 = OpLoad %6 %9
%15 = OpLoad %6 %8
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %5 %20
%23 = OpIMul %5 %22 %24
%26 = OpImageFetch %25 %14 %23
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %23 %30
%28 = OpImageFetch %25 %14 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %23 %34
%32 = OpImageFetch %25 %14 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %23 %38
%36 = OpImageFetch %25 %14 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpImageFetch %25 %15 %22
%46 = OpCompositeExtract %5 %45 0
%48 = OpINotEqual %47 %46 %49
%70 = OpCompositeConstruct %25 %41 %42 %43 %44
%69 = OpFunctionCall %5 %54 %48 %70
%71 = OpCompositeConstruct %25 %69 %69 %69 %69
OpImageWrite %13 %22 %71
OpReturn
OpFunctionEnd
%54 = OpFunction %5 None %51
%52 = OpFunctionParameter %47
%53 = OpFunctionParameter %25
%55 = OpLabel
%59 = OpGroupNonUniformBallot %25 %38 %60
%61 = OpBitwiseAnd %25 %59 %53
OpBranch %56
%56 = OpLabel
OpLoopMerge %58 %57 None
OpBranch %57
%57 = OpLabel
%62 = OpGroupNonUniformBroadcastFirst %25 %38 %61
%63 = OpIEqual %50 %61 %62
%64 = OpAll %47 %63
%65 = OpLogicalAnd %47 %64 %52
%66 = OpGroupNonUniformBallot %25 %38 %65
%67 = OpGroupNonUniformBallotBitCount %5 %38 ExclusiveScan %66
OpBranchConditional %64 %58 %56
%58 = OpLabel
OpReturnValue %67
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-multi-prefix-count-bits.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require

uint _63;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _9;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _12;

layout(location = 0) flat in uint THR;
bool discard_state;

uint WaveMultiPrefixCountBits(bool _53, uvec4 _54, bool _55)
{
    uint _73;
    if (_55)
    {
        _73 = _63;
    }
    else
    {
        uvec4 _64 = subgroupBallot(true);
        uvec4 _66 = _64 & _54;
        uint _72;
        for (;;)
        {
            bool _69 = all(equal(_66, subgroupBroadcastFirst(_66)));
            _72 = subgroupBallotExclusiveBitCount(subgroupBallot(_69 && _53));
            if (_69)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _73 = _72;
    }
    return _73;
}

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (THR == 40u)
    {
        discard_state = true;
    }
    uint _25 = THR * 4u;
    imageStore(_12, int(THR), uvec4(WaveMultiPrefixCountBits(texelFetch(_8, int(THR)).x != 10u, uvec4(uvec4(texelFetch(_9, int(_25)).x, texelFetch(_9, int(_25 + 1u)).x, texelFetch(_9, int(_25 + 2u)).x, texelFetch(_9, int(_25 + 3u)).x)), gl_HelperInvocation || discard_state)));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 94
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %83
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "THR"
OpName %23 "discard_state"
OpName %56 "WaveMultiPrefixCountBits"
OpName %86 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %83 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%19 = OpTypeBool
%21 = OpConstant %5 40
%22 = OpTypePointer Private %19
%23 = OpVariable %22 Private
%24 = OpConstantFalse %19
%26 = OpConstant %5 4
%27 = OpTypeVector %5 4
%32 = OpConstant %5 1
%36 = OpConstant %5 2
%40 = OpConstant %5 3
%50 = OpConstant %5 10
%51 = OpTypeVector %19 4
%52 = OpTypeFunction %5 %19 %27 %19
%65 = OpConstantTrue %19
%82 = OpTypePointer Input %19
%83 = OpVariable %82 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %23 %24
OpBranch %79
%79 = OpLabel
%15 = OpLoad %10 %12
%16 = OpLoad %6 %9
%17 = OpLoad %6 %8
%18 = OpLoad %5 %14
%20 = OpIEqual %19 %18 %21
OpSelectionMerge %81 None
OpBranchConditional %20 %80 %81
%80 = OpLabel
OpStore %23 %65
OpBranch %81
%81 = OpLabel
%25 = OpIMul %5 %18 %26
%28 = OpImageFetch %27 %16 %25
%29 = OpCompositeExtract %5 %28 0
%31 = OpIAdd %5 %25 %32
%30 = OpImageFetch %27 %16 %31
%33 = OpCompositeExtract %5 %30 0
%35 = OpIAdd %5 %25 %36
%34 = OpImageFetch %27 %16 %35
%37 = OpCompositeExtract %5 %34 0
%39 = OpIAdd %5 %25 %40
%38 = OpImageFetch %27 %16 %39
%41 = OpCompositeExtract %5 %38 0
%42 = OpCompositeConstruct %27 %29 %33 %37 %41
%43 = OpCompositeExtract %5 %42 0
%44 = OpCompositeExtract %5 %42 1
%45 = OpCompositeExtract %5 %42 2
%46 = OpCompositeExtract %5 %42 3
%47 = OpImageFetch %27 %17 %18
%48 = OpCompositeExtract %5 %47 0
%49 = OpINotEqual %19 %48 %50
%76 = OpCompositeConstruct %27 %43 %44 %45 %46
%84 = OpLoad %19 %83
%85 = OpLoad %19 %23
%77 = OpLogicalOr %19 %84 %85
%75 = OpFunctionCall %5 %56 %49 %76 %77
%78 = OpCompositeConstruct %27 %75 %75 %75 %75
OpImageWrite %15 %18 %78
%92 = OpFunctionCall %1 %86
OpReturn
OpFunctionEnd
%56 = OpFunction %5 None %52
%53 = OpFunctionParameter %19
%54 = OpFunctionParameter %27
%55 = OpFunctionParameter %19
%57 = OpLabel
%63 = OpUndef %5
OpSelectionMerge %61 None
OpBranchConditional %55 %61 %62
%62 = OpLabel
%64 = OpGroupNonUniformBallot %27 %40 %65
%66 = OpBitwiseAnd %27 %64 %54
OpBranch %58
%58 = OpLabel
OpLoopMerge %60 %59 None
OpBranch %59
%59 = OpLabel
%67 = OpGroupNonUniformBroadcastFirst %27 %40 %66
%68 = OpIEqual %51 %66 %67
%69 = OpAll %19 %68
%70 = OpLogicalAnd %19 %69 %53
%71 = OpGroupNonUniformBallot %27 %40 %70
%72 = OpGroupNonUniformBallotBitCount %5 %40 ExclusiveScan %71
OpBranchConditional %69 %60 %58
%60 = OpLabel
OpBranch %61
%61 = OpLabel
%73 = OpPhi %5 %72 %60 %63 %57
OpReturnValue %73
OpFunctionEnd
%86 = OpFunction %1 None %2
%87 = OpLabel
%90 = OpLoad %19 %23
OpSelectionMerge %89 None
OpBranchConditional %90 %88 %89
%88 = OpLabel
OpKill
%89 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-multi-prefix-op.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

uint _54;
float _101;
uint _145;
float _189;
uint _233;
uint _278;
uint _324;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _9;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _12;

uint WaveMultiPrefixSum(uint _50, uvec4 _51)
{
    uvec4 _60 = subgroupBallot(true);
    uvec4 _62 = _60 & _51;
    bool _65;
    uint _67;
    for (;;)
    {
        _65 = all(equal(_62, subgroupBroadcastFirst(_62)));
        if (_65)
        {
            _67 = subgroupExclusiveAdd(_50);
        }
        else
        {
            _67 = _54;
        }
        if (_65)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _67;
}

float WaveMultiPrefixSum(float _97, uvec4 _98)
{
    uvec4 _107 = subgroupBallot(true);
    uvec4 _108 = _107 & _98;
    bool _111;
    float _113;
    for (;;)
    {
        _111 = all(equal(_108, subgroupBroadcastFirst(_108)));
        if (_111)
        {
            _113 = subgroupExclusiveAdd(_97);
        }
        else
        {
            _113 = _101;
        }
        if (_111)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _113;
}

uint WaveMultiPrefixProduct(uint _141, uvec4 _142)
{
    uvec4 _151 = subgroupBallot(true);
    uvec4 _152 = _151 & _142;
    bool _155;
    uint _157;
    for (;;)
    {
        _155 = all(equal(_152, subgroupBroadcastFirst(_152)));
        if (_155)
        {
            _157 = subgroupExclusiveMul(_141);
        }
        else
        {
            _157 = _145;
        }
        if (_155)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _157;
}

float WaveMultiPrefixProduct(float _185, uvec4 _186)
{
    uvec4 _195 = subgroupBallot(true);
    uvec4 _196 = _195 & _186;
    bool _199;
    float _201;
    for (;;)
    {
        _199 = all(equal(_196, subgroupBroadcastFirst(_196)));
        if (_199)
        {
            _201 = subgroupExclusiveMul(_185);
        }
        else
        {
            _201 = _189;
        }
        if (_199)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _201;
}

uint WaveMultiPrefixBitOr(uint _229, uvec4 _230)
{
    uvec4 _239 = subgroupBallot(true);
    uvec4 _240 = _239 & _230;
    bool _243;
    uint _245;
    for (;;)
    {
        _243 = all(equal(_240, subgroupBroadcastFirst(_240)));
        if (_243)
        {
            _245 = subgroupExclusiveOr(_229);
        }
        else
        {
            _245 = _233;
        }
        if (_243)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _245;
}

uint WaveMultiPrefixBitAnd(uint _274, uvec4 _275)
{
    uvec4 _284 = subgroupBallot(true);
    uvec4 _285 = _284 & _275;
    bool _288;
    uint _290;
    for (;;)
    {
        _288 = all(equal(_285, subgroupBroadcastFirst(_285)));
        if (_288)
        {
            _290 = subgroupExclusiveAnd(_274);
        }
        else
        {
            _290 = _278;
        }
        if (_288)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _290;
}

uint WaveMultiPrefixBitXor(uint _320, uvec4 _321)
{
    uvec4 _330 = subgroupBallot(true);
    uvec4 _331 = _330 & _321;
    bool _334;
    uint _336;
    for (;;)
    {
        _334 = all(equal(_331, subgroupBroadcastFirst(_331)));
        if (_334)
        {
            _336 = subgroupExclusiveXor(_320);
        }
        else
        {
            _336 = _324;
        }
        if (_334)
        {
            break;
        }
        else
        {
            continue;
        }
    }
    return _336;
}

void main()
{
    uint _23 = gl_GlobalInvocationID.x * 4u;
    uint _71 = gl_GlobalInvocationID.x * 7u;
    imageStore(_12, int(gl_GlobalInvocationID.x * 7u), uvec4(WaveMultiPrefixSum(texelFetch(_8, int(gl_GlobalInvocationID.x)).x, uvec4(uvec4(texelFetch(_9, int(_23)).x, texelFetch(_9, int(_23 + 1u)).x, texelFetch(_9, int(_23 + 2u)).x, texelFetch(_9, int(_23 + 3u)).x)))));
    uint _75 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int((gl_GlobalInvocationID.x * 7u) + 1u), uvec4(uint(WaveMultiPrefixSum(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x), uvec4(uvec4(texelFetch(_9, int(_75)).x, texelFetch(_9, int(_75 + 1u)).x, texelFetch(_9, int(_75 + 2u)).x, texelFetch(_9, int(_75 + 3u)).x))))));
    uint _122 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int((gl_GlobalInvocationID.x * 7u) + 2u), uvec4(WaveMultiPrefixProduct(texelFetch(_8, int(gl_GlobalInvocationID.x)).x, uvec4(uvec4(texelFetch(_9, int(_122)).x, texelFetch(_9, int(_122 + 1u)).x, texelFetch(_9, int(_122 + 2u)).x, texelFetch(_9, int(_122 + 3u)).x)))));
    uint _165 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int((gl_GlobalInvocationID.x * 7u) + 3u), uvec4(uint(WaveMultiPrefixProduct(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x), uvec4(uvec4(texelFetch(_9, int(_165)).x, texelFetch(_9, int(_165 + 1u)).x, texelFetch(_9, int(_165 + 2u)).x, texelFetch(_9, int(_165 + 3u)).x))))));
    uint _210 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int((gl_GlobalInvocationID.x * 7u) + 4u), uvec4(WaveMultiPrefixBitOr(texelFetch(_8, int(gl_GlobalInvocationID.x)).x, uvec4(uvec4(texelFetch(_9, int(_210)).x, texelFetch(_9, int(_210 + 1u)).x, texelFetch(_9, int(_210 + 2u)).x, texelFetch(_9, int(_210 + 3u)).x)))));
    uint _253 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int((gl_GlobalInvocationID.x * 7u) + 5u), uvec4(WaveMultiPrefixBitAnd(uint(int(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x))), uvec4(uvec4(texelFetch(_9, int(_253)).x, texelFetch(_9, int(_253 + 1u)).x, texelFetch(_9, int(_253 + 2u)).x, texelFetch(_9, int(_253 + 3u)).x)))));
    uint _299 = gl_GlobalInvocationID.x * 4u;
    imageStore(_12, int((gl_GlobalInvocationID.x * 7u) + 6u), uvec4(WaveMultiPrefixBitXor(uint(int(uintBitsToFloat(texelFetch(_8, int(gl_GlobalInvocationID.x)).x))), uvec4(uvec4(texelFetch(_9, int(_299)).x, texelFetch(_9, int(_299 + 1u)).x, texelFetch(_9, int(_299 + 2u)).x, texelFetch(_9, int(_299 + 3u)).x)))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 347
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %52 "WaveMultiPrefixSum"
OpName %99 "WaveMultiPrefixSum"
OpName %143 "WaveMultiPrefixProduct"
OpName %187 "WaveMultiPrefixProduct"
OpName %231 "WaveMultiPrefixBitOr"
OpName %276 "WaveMultiPrefixBitAnd"
OpName %322 "WaveMultiPrefixBitXor"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%16 = OpTypeVector %5 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %5
%21 = OpConstant %5 0
%24 = OpConstant %5 4
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%47 = OpTypeBool
%48 = OpTypeVector %47 4
%49 = OpTypeFunction %5 %5 %25
%61 = OpConstantTrue %47
%72 = OpConstant %5 7
%94 = OpTypeFloat 32
%96 = OpTypeFunction %94 %94 %25
%295 = OpConstant %5 5
%341 = OpConstant %5 6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %345
%345 = OpLabel
%13 = OpLoad %10 %12
%14 = OpLoad %6 %9
%15 = OpLoad %6 %8
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %5 %20
%23 = OpIMul %5 %22 %24
%26 = OpImageFetch %25 %14 %23
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %23 %30
%28 = OpImageFetch %25 %14 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %23 %34
%32 = OpImageFetch %25 %14 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %23 %38
%36 = OpImageFetch %25 %14 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpImageFetch %25 %15 %22
%46 = OpCompositeExtract %5 %45 0
%70 = OpCompositeConstruct %25 %41 %42 %43 %44
%69 = OpFunctionCall %5 %52 %46 %70
%71 = OpIMul %5 %22 %72
%73 = OpIMul %5 %22 %72
%74 = OpCompositeConstruct %25 %69 %69 %69 %69
OpImageWrite %13 %73 %74
%75 = OpIMul %5 %22 %24
%76 = OpImageFetch %25 %14 %75
%77 = OpCompositeExtract %5 %76 0
%79 = OpIAdd %5 %75 %30
%78 = OpImageFetch %25 %14 %79
%80 = OpCompositeExtract %5 %78 0
%82 = OpIAdd %5 %75 %34
%81 = OpImageFetch %25 %14 %82
%83 = OpCompositeExtract %5 %81 0
%85 = OpIAdd %5 %75 %38
%84 = OpImageFetch %25 %14 %85
%86 = OpCompositeExtract %5 %84 0
%87 = OpCompositeConstruct %25 %77 %80 %83 %86
%88 = OpCompositeExtract %5 %87 0
%89 = OpCompositeExtract %5 %87 1
%90 = OpCompositeExtract %5 %87 2
%91 = OpCompositeExtract %5 %87 3
%92 = OpImageFetch %25 %15 %22
%93 = OpCompositeExtract %5 %92 0
%95 = OpBitcast %94 %93
%116 = OpCompositeConstruct %25 %88 %89 %90 %91
%115 = OpFunctionCall %94 %99 %95 %116
%117 = OpConvertFToU %5 %115
%118 = OpIAdd %5 %71 %30
%119 = OpIMul %5 %22 %72
%120 = OpIAdd %5 %119 %30
%121 = OpCompositeConstruct %25 %117 %117 %117 %117
OpImageWrite %13 %120 %121
%122 = OpIMul %5 %22 %24
%123 = OpImageFetch %25 %14 %122
%124 = OpCompositeExtract %5 %123 0
%126 = OpIAdd %5 %122 %30
%125 = OpImageFetch %25 %14 %126
%127 = OpCompositeExtract %5 %125 0
%129 = OpIAdd %5 %122 %34
%128 = OpImageFetch %25 %14 %129
%130 = OpCompositeExtract %5 %128 0
%132 = OpIAdd %5 %122 %38
%131 = OpImageFetch %25 %14 %132
%133 = OpCompositeExtract %5 %131 0
%134 = OpCompositeConstruct %25 %124 %127 %130 %133
%135 = OpCompositeExtract %5 %134 0
%136 = OpCompositeExtract %5 %134 1
%137 = OpCompositeExtract %5 %134 2
%138 = OpCompositeExtract %5 %134 3
%139 = OpImageFetch %25 %15 %22
%140 = OpCompositeExtract %5 %139 0
%160 = OpCompositeConstruct %25 %135 %136 %137 %138
%159 = OpFunctionCall %5 %143 %140 %160
%161 = OpIAdd %5 %71 %34
%162 = OpIMul %5 %22 %72
%163 = OpIAdd %5 %162 %34
%164 = OpCompositeConstruct %25 %159 %159 %159 %159
OpImageWrite %13 %163 %164
%165 = OpIMul %5 %22 %24
%166 = OpImageFetch %25 %14 %165
%167 = OpCompositeExtract %5 %166 0
%169 = OpIAdd %5 %165 %30
%168 = OpImageFetch %25 %14 %169
%170 = OpCompositeExtract %5 %168 0
%172 = OpIAdd %5 %165 %34
%171 = OpImageFetch %25 %14 %172
%173 = OpCompositeExtract %5 %171 0
%175 = OpIAdd %5 %165 %38
%174 = OpImageFetch %25 %14 %175
%176 = OpCompositeExtract %5 %174 0
%177 = OpCompositeConstruct %25 %167 %170 %173 %176
%178 = OpCompositeExtract %5 %177 0
%179 = OpCompositeExtract %5 %177 1
%180 = OpCompositeExtract %5 %177 2
%181 = OpCompositeExtract %5 %177 3
%182 = OpImageFetch %25 %15 %22
%183 = OpCompositeExtract %5 %182 0
%184 = OpBitcast %94 %183
%204 = OpCompositeConstruct %25 %178 %179 %180 %181
%203 = OpFunctionCall %94 %187 %184 %204
%205 = OpConvertFToU %5 %203
%206 = OpIAdd %5 %71 %38
%207 = OpIMul %5 %22 %72
%208 = OpIAdd %5 %207 %38
%209 = OpCompositeConstruct %25 %205 %205 %205 %205
OpImageWrite %13 %208 %209
%210 = OpIMul %5 %22 %24
%211 = OpImageFetch %25 %14 %210
%212 = OpCompositeExtract %5 %211 0
%214 = OpIAdd %5 %210 %30
%213 = OpImageFetch %25 %14 %214
%215 = OpCompositeExtract %5 %213 0
%217 = OpIAdd %5 %210 %34
%216 = OpImageFetch %25 %14 %217
%218 = OpCompositeExtract %5 %216 0
%220 = OpIAdd %5 %210 %38
%219 = OpImageFetch %25 %14 %220
%221 = OpCompositeExtract %5 %219 0
%222 = OpCompositeConstruct %25 %212 %215 %218 %221
%223 = OpCompositeExtract %5 %222 0
%224 = OpCompositeExtract %5 %222 1
%225 = OpCompositeExtract %5 %222 2
%226 = OpCompositeExtract %5 %222 3
%227 = OpImageFetch %25 %15 %22
%228 = OpCompositeExtract %5 %227 0
%248 = OpCompositeConstruct %25 %223 %224 %225 %226
%247 = OpFunctionCall %5 %231 %228 %248
%249 = OpIAdd %5 %71 %24
%250 = OpIMul %5 %22 %72
%251 = OpIAdd %5 %250 %24
%252 = OpCompositeConstruct %25 %247 %247 %247 %247
OpImageWrite %13 %251 %252
%253 = OpIMul %5 %22 %24
%254 = OpImageFetch %25 %14 %253
%255 = OpCompositeExtract %5 %254 0
%257 = OpIAdd %5 %253 %30
%256 = OpImageFetch %25 %14 %257
%258 = OpCompositeExtract %5 %256 0
%260 = OpIAdd %5 %253 %34
%259 = OpImageFetch %25 %14 %260
%261 = OpCompositeExtract %5 %259 0
%263 = OpIAdd %5 %253 %38
%262 = OpImageFetch %25 %14 %263
%264 = OpCompositeExtract %5 %262 0
%265 = OpCompositeConstruct %25 %255 %258 %261 %264
%266 = OpCompositeExtract %5 %265 0
%267 = OpCompositeExtract %5 %265 1
%268 = OpCompositeExtract %5 %265 2
%269 = OpCompositeExtract %5 %265 3
%270 = OpImageFetch %25 %15 %22
%271 = OpCompositeExtract %5 %270 0
%272 = OpBitcast %94 %271
%273 = OpConvertFToS %5 %272
%293 = OpCompositeConstruct %25 %266 %267 %268 %269
%292 = OpFunctionCall %5 %276 %273 %293
%294 = OpIAdd %5 %71 %295
%296 = OpIMul %5 %22 %72
%297 = OpIAdd %5 %296 %295
%298 = OpCompositeConstruct %25 %292 %292 %292 %292
OpImageWrite %13 %297 %298
%299 = OpIMul %5 %22 %24
%300 = OpImageFetch %25 %14 %299
%301 = OpCompositeExtract %5 %300 0
%303 = OpIAdd %5 %299 %30
%302 = OpImageFetch %25 %14 %303
%304 = OpCompositeExtract %5 %302 0
%306 = OpIAdd %5 %299 %34
%305 = OpImageFetch %25 %14 %306
%307 = OpCompositeExtract %5 %305 0
%309 = OpIAdd %5 %299 %38
%308 = OpImageFetch %25 %14 %309
%310 = OpCompositeExtract %5 %308 0
%311 = OpCompositeConstruct %25 %301 %304 %307 %310
%312 = OpCompositeExtract %5 %311 0
%313 = OpCompositeExtract %5 %311 1
%314 = OpCompositeExtract %5 %311 2
%315 = OpCompositeExtract %5 %311 3
%316 = OpImageFetch %25 %15 %22
%317 = OpCompositeExtract %5 %316 0
%318 = OpBitcast %94 %317
%319 = OpConvertFToS %5 %318
%339 = OpCompositeConstruct %25 %312 %313 %314 %315
%338 = OpFunctionCall %5 %322 %319 %339
%340 = OpIAdd %5 %71 %341
%342 = OpIMul %5 %22 %72
%343 = OpIAdd %5 %342 %341
%344 = OpCompositeConstruct %25 %338 %338 %338 %338
OpImageWrite %13 %343 %344
OpReturn
OpFunctionEnd
%52 = OpFunction %5 None %49
%50 = OpFunctionParameter %5
%51 = OpFunctionParameter %25
%53 = OpLabel
%54 = OpUndef %5
%60 = OpGroupNonUniformBallot %25 %38 %61
%62 = OpBitwiseAnd %25 %60 %51
OpBranch %55
%55 = OpLabel
OpLoopMerge %57 %56 None
OpBranch %56
%56 = OpLabel
%63 = OpGroupNonUniformBroadcastFirst %25 %38 %62
%64 = OpIEqual %48 %62 %63
%65 = OpAll %47 %64
OpSelectionMerge %59 None
OpBranchConditional %65 %58 %59
%58 = OpLabel
%66 = OpGroupNonUniformIAdd %5 %38 ExclusiveScan %50
OpBranch %59
%59 = OpLabel
%67 = OpPhi %5 %66 %58 %54 %56
OpBranchConditional %65 %57 %55
%57 = OpLabel
OpReturnValue %67
OpFunctionEnd
%99 = OpFunction %94 None %96
%97 = OpFunctionParameter %94
%98 = OpFunctionParameter %25
%100 = OpLabel
%101 = OpUndef %94
%107 = OpGroupNonUniformBallot %25 %38 %61
%108 = OpBitwiseAnd %25 %107 %98
OpBranch %102
%102 = OpLabel
OpLoopMerge %104 %103 None
OpBranch %103
%103 = OpLabel
%109 = OpGroupNonUniformBroadcastFirst %25 %38 %108
%110 = OpIEqual %48 %108 %109
%111 = OpAll %47 %110
OpSelectionMerge %106 None
OpBranchConditional %111 %105 %106
%105 = OpLabel
%112 = OpGroupNonUniformFAdd %94 %38 ExclusiveScan %97
OpBranch %106
%106 = OpLabel
%113 = OpPhi %94 %112 %105 %101 %103
OpBranchConditional %111 %104 %102
%104 = OpLabel
OpReturnValue %113
OpFunctionEnd
%143 = OpFunction %5 None %49
%141 = OpFunctionParameter %5
%142 = OpFunctionParameter %25
%144 = OpLabel
%145 = OpUndef %5
%151 = OpGroupNonUniformBallot %25 %38 %61
%152 = OpBitwiseAnd %25 %151 %142
OpBranch %146
%146 = OpLabel
OpLoopMerge %148 %147 None
OpBranch %147
%147 = OpLabel
%153 = OpGroupNonUniformBroadcastFirst %25 %38 %152
%154 = OpIEqual %48 %152 %153
%155 = OpAll %47 %154
OpSelectionMerge %150 None
OpBranchConditional %155 %149 %150
%149 = OpLabel
%156 = OpGroupNonUniformIMul %5 %38 ExclusiveScan %141
OpBranch %150
%150 = OpLabel
%157 = OpPhi %5 %156 %149 %145 %147
OpBranchConditional %155 %148 %146
%148 = OpLabel
OpReturnValue %157
OpFunctionEnd
%187 = OpFunction %94 None %96
%185 = OpFunctionParameter %94
%186 = OpFunctionParameter %25
%188 = OpLabel
%189 = OpUndef %94
%195 = OpGroupNonUniformBallot %25 %38 %61
%196 = OpBitwiseAnd %25 %195 %186
OpBranch %190
%190 = OpLabel
OpLoopMerge %192 %191 None
OpBranch %191
%191 = OpLabel
%197 = OpGroupNonUniformBroadcastFirst %25 %38 %196
%198 = OpIEqual %48 %196 %197
%199 = OpAll %47 %198
OpSelectionMerge %194 None
OpBranchConditional %199 %193 %194
%193 = OpLabel
%200 = OpGroupNonUniformFMul %94 %38 ExclusiveScan %185
OpBranch %194
%194 = OpLabel
%201 = OpPhi %94 %200 %193 %189 %191
OpBranchConditional %199 %192 %190
%192 = OpLabel
OpReturnValue %201
OpFunctionEnd
%231 = OpFunction %5 None %49
%229 = OpFunctionParameter %5
%230 = OpFunctionParameter %25
%232 = OpLabel
%233 = OpUndef %5
%239 = OpGroupNonUniformBallot %25 %38 %61
%240 = OpBitwiseAnd %25 %239 %230
OpBranch %234
%234 = OpLabel
OpLoopMerge %236 %235 None
OpBranch %235
%235 = OpLabel
%241 = OpGroupNonUniformBroadcastFirst %25 %38 %240
%242 = OpIEqual %48 %240 %241
%243 = OpAll %47 %242
OpSelectionMerge %238 None
OpBranchConditional %243 %237 %238
%237 = OpLabel
%244 = OpGroupNonUniformBitwiseOr %5 %38 ExclusiveScan %229
OpBranch %238
%238 = OpLabel
%245 = OpPhi %5 %244 %237 %233 %235
OpBranchConditional %243 %236 %234
%236 = OpLabel
OpReturnValue %245
OpFunctionEnd
%276 = OpFunction %5 None %49
%274 = OpFunctionParameter %5
%275 = OpFunctionParameter %25
%277 = OpLabel
%278 = OpUndef %5
%284 = OpGroupNonUniformBallot %25 %38 %61
%285 = OpBitwiseAnd %25 %284 %275
OpBranch %279
%279 = OpLabel
OpLoopMerge %281 %280 None
OpBranch %280
%280 = OpLabel
%286 = OpGroupNonUniformBroadcastFirst %25 %38 %285
%287 = OpIEqual %48 %285 %286
%288 = OpAll %47 %287
OpSelectionMerge %283 None
OpBranchConditional %288 %282 %283
%282 = OpLabel
%289 = OpGroupNonUniformBitwiseAnd %5 %38 ExclusiveScan %274
OpBranch %283
%283 = OpLabel
%290 = OpPhi %5 %289 %282 %278 %280
OpBranchConditional %288 %281 %279
%281 = OpLabel
OpReturnValue %290
OpFunctionEnd
%322 = OpFunction %5 None %49
%320 = OpFunctionParameter %5
%321 = OpFunctionParameter %25
%323 = OpLabel
%324 = OpUndef %5
%330 = OpGroupNonUniformBallot %25 %38 %61
%331 = OpBitwiseAnd %25 %330 %321
OpBranch %325
%325 = OpLabel
OpLoopMerge %327 %326 None
OpBranch %326
%326 = OpLabel
%332 = OpGroupNonUniformBroadcastFirst %25 %38 %331
%333 = OpIEqual %48 %331 %332
%334 = OpAll %47 %333
OpSelectionMerge %329 None
OpBranchConditional %334 %328 %329
%328 = OpLabel
%335 = OpGroupNonUniformBitwiseXor %5 %38 ExclusiveScan %320
OpBranch %329
%329 = OpLabel
%336 = OpPhi %5 %335 %328 %324 %326
OpBranchConditional %334 %327 %325
%327 = OpLabel
OpReturnValue %336
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-multi-prefix-op.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require

uint _51;
float _103;
uint _152;
float _201;
uint _250;
uint _300;
uint _351;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _9;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _12;

layout(location = 0) flat in uint THR;

uint WaveMultiPrefixSum(uint _46, uvec4 _47, bool _48)
{
    uint _67;
    if (_48)
    {
        _67 = _51;
    }
    else
    {
        uvec4 _59 = subgroupBallot(true);
        uvec4 _61 = _59 & _47;
        bool _64;
        uint _66;
        for (;;)
        {
            _64 = all(equal(_61, subgroupBroadcastFirst(_61)));
            if (_64)
            {
                _66 = subgroupExclusiveAdd(_46);
            }
            else
            {
                _66 = _51;
            }
            if (_64)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _67 = _66;
    }
    return _67;
}

float WaveMultiPrefixSum(float _98, uvec4 _99, bool _100)
{
    float _118;
    if (_100)
    {
        _118 = _103;
    }
    else
    {
        uvec4 _111 = subgroupBallot(true);
        uvec4 _112 = _111 & _99;
        bool _115;
        float _117;
        for (;;)
        {
            _115 = all(equal(_112, subgroupBroadcastFirst(_112)));
            if (_115)
            {
                _117 = subgroupExclusiveAdd(_98);
            }
            else
            {
                _117 = _103;
            }
            if (_115)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _118 = _117;
    }
    return _118;
}

uint WaveMultiPrefixProduct(uint _147, uvec4 _148, bool _149)
{
    uint _167;
    if (_149)
    {
        _167 = _152;
    }
    else
    {
        uvec4 _160 = subgroupBallot(true);
        uvec4 _161 = _160 & _148;
        bool _164;
        uint _166;
        for (;;)
        {
            _164 = all(equal(_161, subgroupBroadcastFirst(_161)));
            if (_164)
            {
                _166 = subgroupExclusiveMul(_147);
            }
            else
            {
                _166 = _152;
            }
            if (_164)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _167 = _166;
    }
    return _167;
}

float WaveMultiPrefixProduct(float _196, uvec4 _197, bool _198)
{
    float _216;
    if (_198)
    {
        _216 = _201;
    }
    else
    {
        uvec4 _209 = subgroupBallot(true);
        uvec4 _210 = _209 & _197;
        bool _213;
        float _215;
        for (;;)
        {
            _213 = all(equal(_210, subgroupBroadcastFirst(_210)));
            if (_213)
            {
                _215 = subgroupExclusiveMul(_196);
            }
            else
            {
                _215 = _201;
            }
            if (_213)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _216 = _215;
    }
    return _216;
}

uint WaveMultiPrefixBitOr(uint _245, uvec4 _246, bool _247)
{
    uint _265;
    if (_247)
    {
        _265 = _250;
    }
    else
    {
        uvec4 _258 = subgroupBallot(true);
        uvec4 _259 = _258 & _246;
        bool _262;
        uint _264;
        for (;;)
        {
            _262 = all(equal(_259, subgroupBroadcastFirst(_259)));
            if (_262)
            {
                _264 = subgroupExclusiveOr(_245);
            }
            else
            {
                _264 = _250;
            }
            if (_262)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _265 = _264;
    }
    return _265;
}

uint WaveMultiPrefixBitAnd(uint _295, uvec4 _296, bool _297)
{
    uint _315;
    if (_297)
    {
        _315 = _300;
    }
    else
    {
        uvec4 _308 = subgroupBallot(true);
        uvec4 _309 = _308 & _296;
        bool _312;
        uint _314;
        for (;;)
        {
            _312 = all(equal(_309, subgroupBroadcastFirst(_309)));
            if (_312)
            {
                _314 = subgroupExclusiveAnd(_295);
            }
            else
            {
                _314 = _300;
            }
            if (_312)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _315 = _314;
    }
    return _315;
}

uint WaveMultiPrefixBitXor(uint _346, uvec4 _347, bool _348)
{
    uint _366;
    if (_348)
    {
        _366 = _351;
    }
    else
    {
        uvec4 _359 = subgroupBallot(true);
        uvec4 _360 = _359 & _347;
        bool _363;
        uint _365;
        for (;;)
        {
            _363 = all(equal(_360, subgroupBroadcastFirst(_360)));
            if (_363)
            {
                _365 = subgroupExclusiveXor(_346);
            }
            else
            {
                _365 = _351;
            }
            if (_363)
            {
                break;
            }
            else
            {
                continue;
            }
        }
        _366 = _365;
    }
    return _366;
}

void main()
{
    uint _19 = THR * 4u;
    uint _72 = THR * 7u;
    imageStore(_12, int(THR * 7u), uvec4(WaveMultiPrefixSum(texelFetch(_8, int(THR)).x, uvec4(uvec4(texelFetch(_9, int(_19)).x, texelFetch(_9, int(_19 + 1u)).x, texelFetch(_9, int(_19 + 2u)).x, texelFetch(_9, int(_19 + 3u)).x)), gl_HelperInvocation)));
    uint _76 = THR * 4u;
    imageStore(_12, int((THR * 7u) + 1u), uvec4(uint(WaveMultiPrefixSum(uintBitsToFloat(texelFetch(_8, int(THR)).x), uvec4(uvec4(texelFetch(_9, int(_76)).x, texelFetch(_9, int(_76 + 1u)).x, texelFetch(_9, int(_76 + 2u)).x, texelFetch(_9, int(_76 + 3u)).x)), gl_HelperInvocation))));
    uint _128 = THR * 4u;
    imageStore(_12, int((THR * 7u) + 2u), uvec4(WaveMultiPrefixProduct(texelFetch(_8, int(THR)).x, uvec4(uvec4(texelFetch(_9, int(_128)).x, texelFetch(_9, int(_128 + 1u)).x, texelFetch(_9, int(_128 + 2u)).x, texelFetch(_9, int(_128 + 3u)).x)), gl_HelperInvocation)));
    uint _176 = THR * 4u;
    imageStore(_12, int((THR * 7u) + 3u), uvec4(uint(WaveMultiPrefixProduct(uintBitsToFloat(texelFetch(_8, int(THR)).x), uvec4(uvec4(texelFetch(_9, int(_176)).x, texelFetch(_9, int(_176 + 1u)).x, texelFetch(_9, int(_176 + 2u)).x, texelFetch(_9, int(_176 + 3u)).x)), gl_HelperInvocation))));
    uint _226 = THR * 4u;
    imageStore(_12, int((THR * 7u) + 4u), uvec4(WaveMultiPrefixBitOr(texelFetch(_8, int(THR)).x, uvec4(uvec4(texelFetch(_9, int(_226)).x, texelFetch(_9, int(_226 + 1u)).x, texelFetch(_9, int(_226 + 2u)).x, texelFetch(_9, int(_226 + 3u)).x)), gl_HelperInvocation)));
    uint _274 = THR * 4u;
    imageStore(_12, int((THR * 7u) + 5u), uvec4(WaveMultiPrefixBitAnd(uint(int(uintBitsToFloat(texelFetch(_8, int(THR)).x))), uvec4(uvec4(texelFetch(_9, int(_274)).x, texelFetch(_9, int(_274 + 1u)).x, texelFetch(_9, int(_274 + 2u)).x, texelFetch(_9, int(_274 + 3u)).x)), gl_HelperInvocation)));
    uint _325 = THR * 4u;
    imageStore(_12, int((THR * 7u) + 6u), uvec4(WaveMultiPrefixBitXor(uint(int(uintBitsToFloat(texelFetch(_8, int(THR)).x))), uvec4(uvec4(texelFetch(_9, int(_325)).x, texelFetch(_9, int(_325 + 1u)).x, texelFetch(_9, int(_325 + 2u)).x, texelFetch(_9, int(_325 + 3u)).x)), gl_HelperInvocation)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 380
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %378
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "THR"
OpName %49 "WaveMultiPrefixSum"
OpName %101 "WaveMultiPrefixSum"
OpName %150 "WaveMultiPrefixProduct"
OpName %199 "WaveMultiPrefixProduct"
OpName %248 "WaveMultiPrefixBitOr"
OpName %298 "WaveMultiPrefixBitAnd"
OpName %349 "WaveMultiPrefixBitXor"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %378 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%20 = OpConstant %5 4
%21 = OpTypeVector %5 4
%26 = OpConstant %5 1
%30 = OpConstant %5 2
%34 = OpConstant %5 3
%43 = OpTypeBool
%44 = OpTypeVector %43 4
%45 = OpTypeFunction %5 %5 %21 %43
%60 = OpConstantTrue %43
%73 = OpConstant %5 7
%95 = OpTypeFloat 32
%97 = OpTypeFunction %95 %95 %21 %43
%321 = OpConstant %5 5
%372 = OpConstant %5 6
%377 = OpTypePointer Input %43
%378 = OpVariable %377 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %376
%376 = OpLabel
%15 = OpLoad %10 %12
%16 = OpLoad %6 %9
%17 = OpLoad %6 %8
%18 = OpLoad %5 %14
%19 = OpIMul %5 %18 %20
%22 = OpImageFetch %21 %16 %19
%23 = OpCompositeExtract %5 %22 0
%25 = OpIAdd %5 %19 %26
%24 = OpImageFetch %21 %16 %25
%27 = OpCompositeExtract %5 %24 0
%29 = OpIAdd %5 %19 %30
%28 = OpImageFetch %21 %16 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %19 %34
%32 = OpImageFetch %21 %16 %33
%35 = OpCompositeExtract %5 %32 0
%36 = OpCompositeConstruct %21 %23 %27 %31 %35
%37 = OpCompositeExtract %5 %36 0
%38 = OpCompositeExtract %5 %36 1
%39 = OpCompositeExtract %5 %36 2
%40 = OpCompositeExtract %5 %36 3
%41 = OpImageFetch %21 %17 %18
%42 = OpCompositeExtract %5 %41 0
%70 = OpCompositeConstruct %21 %37 %38 %39 %40
%71 = OpLoad %43 %378
%69 = OpFunctionCall %5 %49 %42 %70 %71
%72 = OpIMul %5 %18 %73
%74 = OpIMul %5 %18 %73
%75 = OpCompositeConstruct %21 %69 %69 %69 %69
OpImageWrite %15 %74 %75
%76 = OpIMul %5 %18 %20
%77 = OpImageFetch %21 %16 %76
%78 = OpCompositeExtract %5 %77 0
%80 = OpIAdd %5 %76 %26
%79 = OpImageFetch %21 %16 %80
%81 = OpCompositeExtract %5 %79 0
%83 = OpIAdd %5 %76 %30
%82 = OpImageFetch %21 %16 %83
%84 = OpCompositeExtract %5 %82 0
%86 = OpIAdd %5 %76 %34
%85 = OpImageFetch %21 %16 %86
%87 = OpCompositeExtract %5 %85 0
%88 = OpCompositeConstruct %21 %78 %81 %84 %87
%89 = OpCompositeExtract %5 %88 0
%90 = OpCompositeExtract %5 %88 1
%91 = OpCompositeExtract %5 %88 2
%92 = OpCompositeExtract %5 %88 3
%93 = OpImageFetch %21 %17 %18
%94 = OpCompositeExtract %5 %93 0
%96 = OpBitcast %95 %94
%121 = OpCompositeConstruct %21 %89 %90 %91 %92
%122 = OpLoad %43 %378
%120 = OpFunctionCall %95 %101 %96 %121 %122
%123 = OpConvertFToU %5 %120
%124 = OpIAdd %5 %72 %26
%125 = OpIMul %5 %18 %73
%126 = OpIAdd %5 %125 %26
%127 = OpCompositeConstruct %21 %123 %123 %123 %123
OpImageWrite %15 %126 %127
%128 = OpIMul %5 %18 %20
%129 = OpImageFetch %21 %16 %128
%130 = OpCompositeExtract %5 %129 0
%132 = OpIAdd %5 %128 %26
%131 = OpImageFetch %21 %16 %132
%133 = OpCompositeExtract %5 %131 0
%135 = OpIAdd %5 %128 %30
%134 = OpImageFetch %21 %16 %135
%136 = OpCompositeExtract %5 %134 0
%138 = OpIAdd %5 %128 %34
%137 = OpImageFetch %21 %16 %138
%139 = OpCompositeExtract %5 %137 0
%140 = OpCompositeConstruct %21 %130 %133 %136 %139
%141 = OpCompositeExtract %5 %140 0
%142 = OpCompositeExtract %5 %140 1
%143 = OpCompositeExtract %5 %140 2
%144 = OpCompositeExtract %5 %140 3
%145 = OpImageFetch %21 %17 %18
%146 = OpCompositeExtract %5 %145 0
%170 = OpCompositeConstruct %21 %141 %142 %143 %144
%171 = OpLoad %43 %378
%169 = OpFunctionCall %5 %150 %146 %170 %171
%172 = OpIAdd %5 %72 %30
%173 = OpIMul %5 %18 %73
%174 = OpIAdd %5 %173 %30
%175 = OpCompositeConstruct %21 %169 %169 %169 %169
OpImageWrite %15 %174 %175
%176 = OpIMul %5 %18 %20
%177 = OpImageFetch %21 %16 %176
%178 = OpCompositeExtract %5 %177 0
%180 = OpIAdd %5 %176 %26
%179 = OpImageFetch %21 %16 %180
%181 = OpCompositeExtract %5 %179 0
%183 = OpIAdd %5 %176 %30
%182 = OpImageFetch %21 %16 %183
%184 = OpCompositeExtract %5 %182 0
%186 = OpIAdd %5 %176 %34
%185 = OpImageFetch %21 %16 %186
%187 = OpCompositeExtract %5 %185 0
%188 = OpCompositeConstruct %21 %178 %181 %184 %187
%189 = OpCompositeExtract %5 %188 0
%190 = OpCompositeExtract %5 %188 1
%191 = OpCompositeExtract %5 %188 2
%192 = OpCompositeExtract %5 %188 3
%193 = OpImageFetch %21 %17 %18
%194 = OpCompositeExtract %5 %193 0
%195 = OpBitcast %95 %194
%219 = OpCompositeConstruct %21 %189 %190 %191 %192
%220 = OpLoad %43 %378
%218 = OpFunctionCall %95 %199 %195 %219 %220
%221 = OpConvertFToU %5 %218
%222 = OpIAdd %5 %72 %34
%223 = OpIMul %5 %18 %73
%224 = OpIAdd %5 %223 %34
%225 = OpCompositeConstruct %21 %221 %221 %221 %221
OpImageWrite %15 %224 %225
%226 = OpIMul %5 %18 %20
%227 = OpImageFetch %21 %16 %226
%228 = OpCompositeExtract %5 %227 0
%230 = OpIAdd %5 %226 %26
%229 = OpImageFetch %21 %16 %230
%231 = OpCompositeExtract %5 %229 0
%233 = OpIAdd %5 %226 %30
%232 = OpImageFetch %21 %16 %233
%234 = OpCompositeExtract %5 %232 0
%236 = OpIAdd %5 %226 %34
%235 = OpImageFetch %21 %16 %236
%237 = OpCompositeExtract %5 %235 0
%238 = OpCompositeConstruct %21 %228 %231 %234 %237
%239 = OpCompositeExtract %5 %238 0
%240 = OpCompositeExtract %5 %238 1
%241 = OpCompositeExtract %5 %238 2
%242 = OpCompositeExtract %5 %238 3
%243 = OpImageFetch %21 %17 %18
%244 = OpCompositeExtract %5 %243 0
%268 = OpCompositeConstruct %21 %239 %240 %241 %242
%269 = OpLoad %43 %378
%267 = OpFunctionCall %5 %248 %244 %268 %269
%270 = OpIAdd %5 %72 %20
%271 = OpIMul %5 %18 %73
%272 = OpIAdd %5 %271 %20
%273 = OpCompositeConstruct %21 %267 %267 %267 %267
OpImageWrite %15 %272 %273
%274 = OpIMul %5 %18 %20
%275 = OpImageFetch %21 %16 %274
%276 = OpCompositeExtract %5 %275 0
%278 = OpIAdd %5 %274 %26
%277 = OpImageFetch %21 %16 %278
%279 = OpCompositeExtract %5 %277 0
%281 = OpIAdd %5 %274 %30
%280 = OpImageFetch %21 %16 %281
%282 = OpCompositeExtract %5 %280 0
%284 = OpIAdd %5 %274 %34
%283 = OpImageFetch %21 %16 %284
%285 = OpCompositeExtract %5 %283 0
%286 = OpCompositeConstruct %21 %276 %279 %282 %285
%287 = OpCompositeExtract %5 %286 0
%288 = OpCompositeExtract %5 %286 1
%289 = OpCompositeExtract %5 %286 2
%290 = OpCompositeExtract %5 %286 3
%291 = OpImageFetch %21 %17 %18
%292 = OpCompositeExtract %5 %291 0
%293 = OpBitcast %95 %292
%294 = OpConvertFToS %5 %293
%318 = OpCompositeConstruct %21 %287 %288 %289 %290
%319 = OpLoad %43 %378
%317 = OpFunctionCall %5 %298 %294 %318 %319
%320 = OpIAdd %5 %72 %321
%322 = OpIMul %5 %18 %73
%323 = OpIAdd %5 %322 %321
%324 = OpCompositeConstruct %21 %317 %317 %317 %317
OpImageWrite %15 %323 %324
%325 = OpIMul %5 %18 %20
%326 = OpImageFetch %21 %16 %325
%327 = OpCompositeExtract %5 %326 0
%329 = OpIAdd %5 %325 %26
%328 = OpImageFetch %21 %16 %329
%330 = OpCompositeExtract %5 %328 0
%332 = OpIAdd %5 %325 %30
%331 = OpImageFetch %21 %16 %332
%333 = OpCompositeExtract %5 %331 0
%335 = OpIAdd %5 %325 %34
%334 = OpImageFetch %21 %16 %335
%336 = OpCompositeExtract %5 %334 0
%337 = OpCompositeConstruct %21 %327 %330 %333 %336
%338 = OpCompositeExtract %5 %337 0
%339 = OpCompositeExtract %5 %337 1
%340 = OpCompositeExtract %5 %337 2
%341 = OpCompositeExtract %5 %337 3
%342 = OpImageFetch %21 %17 %18
%343 = OpCompositeExtract %5 %342 0
%344 = OpBitcast %95 %343
%345 = OpConvertFToS %5 %344
%369 = OpCompositeConstruct %21 %338 %339 %340 %341
%370 = OpLoad %43 %378
%368 = OpFunctionCall %5 %349 %345 %369 %370
%371 = OpIAdd %5 %72 %372
%373 = OpIMul %5 %18 %73
%374 = OpIAdd %5 %373 %372
%375 = OpCompositeConstruct %21 %368 %368 %368 %368
OpImageWrite %15 %374 %375
OpReturn
OpFunctionEnd
%49 = OpFunction %5 None %45
%46 = OpFunctionParameter %5
%47 = OpFunctionParameter %21
%48 = OpFunctionParameter %43
%50 = OpLabel
%51 = OpUndef %5
OpSelectionMerge %57 None
OpBranchConditional %48 %57 %58
%58 = OpLabel
%59 = OpGroupNonUniformBallot %21 %34 %60
%61 = OpBitwiseAnd %21 %59 %47
OpBranch %52
%52 = OpLabel
OpLoopMerge %54 %53 None
OpBranch %53
%53 = OpLabel
%62 = OpGroupNonUniformBroadcastFirst %21 %34 %61
%63 = OpIEqual %44 %61 %62
%64 = OpAll %43 %63
OpSelectionMerge %56 None
OpBranchConditional %64 %55 %56
%55 = OpLabel
%65 = OpGroupNonUniformIAdd %5 %34 ExclusiveScan %46
OpBranch %56
%56 = OpLabel
%66 = OpPhi %5 %65 %55 %51 %53
OpBranchConditional %64 %54 %52
%54 = OpLabel
OpBranch %57
%57 = OpLabel
%67 = OpPhi %5 %66 %54 %51 %50
OpReturnValue %67
OpFunctionEnd
%101 = OpFunction %95 None %97
%98 = OpFunctionParameter %95
%99 = OpFunctionParameter %21
%100 = OpFunctionParameter %43
%102 = OpLabel
%103 = OpUndef %95
OpSelectionMerge %109 None
OpBranchConditional %100 %109 %110
%110 = OpLabel
%111 = OpGroupNonUniformBallot %21 %34 %60
%112 = OpBitwiseAnd %21 %111 %99
OpBranch %104
%104 = OpLabel
OpLoopMerge %106 %105 None
OpBranch %105
%105 = OpLabel
%113 = OpGroupNonUniformBroadcastFirst %21 %34 %112
%114 = OpIEqual %44 %112 %113
%115 = OpAll %43 %114
OpSelectionMerge %108 None
OpBranchConditional %115 %107 %108
%107 = OpLabel
%116 = OpGroupNonUniformFAdd %95 %34 ExclusiveScan %98
OpBranch %108
%108 = OpLabel
%117 = OpPhi %95 %116 %107 %103 %105
OpBranchConditional %115 %106 %104
%106 = OpLabel
OpBranch %109
%109 = OpLabel
%118 = OpPhi %95 %117 %106 %103 %102
OpReturnValue %118
OpFunctionEnd
%150 = OpFunction %5 None %45
%147 = OpFunctionParameter %5
%148 = OpFunctionParameter %21
%149 = OpFunctionParameter %43
%151 = OpLabel
%152 = OpUndef %5
OpSelectionMerge %158 None
OpBranchConditional %149 %158 %159
%159 = OpLabel
%160 = OpGroupNonUniformBallot %21 %34 %60
%161 = OpBitwiseAnd %21 %160 %148
OpBranch %153
%153 = OpLabel
OpLoopMerge %155 %154 None
OpBranch %154
%154 = OpLabel
%162 = OpGroupNonUniformBroadcastFirst %21 %34 %161
%163 = OpIEqual %44 %161 %162
%164 = OpAll %43 %163
OpSelectionMerge %157 None
OpBranchConditional %164 %156 %157
%156 = OpLabel
%165 = OpGroupNonUniformIMul %5 %34 ExclusiveScan %147
OpBranch %157
%157 = OpLabel
%166 = OpPhi %5 %165 %156 %152 %154
OpBranchConditional %164 %155 %153
%155 = OpLabel
OpBranch %158
%158 = OpLabel
%167 = OpPhi %5 %166 %155 %152 %151
OpReturnValue %167
OpFunctionEnd
%199 = OpFunction %95 None %97
%196 = OpFunctionParameter %95
%197 = OpFunctionParameter %21
%198 = OpFunctionParameter %43
%200 = OpLabel
%201 = OpUndef %95
OpSelectionMerge %207 None
OpBranchConditional %198 %207 %208
%208 = OpLabel
%209 = OpGroupNonUniformBallot %21 %34 %60
%210 = OpBitwiseAnd %21 %209 %197
OpBranch %202
%202 = OpLabel
OpLoopMerge %204 %203 None
OpBranch %203
%203 = OpLabel
%211 = OpGroupNonUniformBroadcastFirst %21 %34 %210
%212 = OpIEqual %44 %210 %211
%213 = OpAll %43 %212
OpSelectionMerge %206 None
OpBranchConditional %213 %205 %206
%205 = OpLabel
%214 = OpGroupNonUniformFMul %95 %34 ExclusiveScan %196
OpBranch %206
%206 = OpLabel
%215 = OpPhi %95 %214 %205 %201 %203
OpBranchConditional %213 %204 %202
%204 = OpLabel
OpBranch %207
%207 = OpLabel
%216 = OpPhi %95 %215 %204 %201 %200
OpReturnValue %216
OpFunctionEnd
%248 = OpFunction %5 None %45
%245 = OpFunctionParameter %5
%246 = OpFunctionParameter %21
%247 = OpFunctionParameter %43
%249 = OpLabel
%250 = OpUndef %5
OpSelectionMerge %256 None
OpBranchConditional %247 %256 %257
%257 = OpLabel
%258 = OpGroupNonUniformBallot %21 %34 %60
%259 = OpBitwiseAnd %21 %258 %246
OpBranch %251
%251 = OpLabel
OpLoopMerge %253 %252 None
OpBranch %252
%252 = OpLabel
%260 = OpGroupNonUniformBroadcastFirst %21 %34 %259
%261 = OpIEqual %44 %259 %260
%262 = OpAll %43 %261
OpSelectionMerge %255 None
OpBranchConditional %262 %254 %255
%254 = OpLabel
%263 = OpGroupNonUniformBitwiseOr %5 %34 ExclusiveScan %245
OpBranch %255
%255 = OpLabel
%264 = OpPhi %5 %263 %254 %250 %252
OpBranchConditional %262 %253 %251
%253 = OpLabel
OpBranch %256
%256 = OpLabel
%265 = OpPhi %5 %264 %253 %250 %249
OpReturnValue %265
OpFunctionEnd
%298 = OpFunction %5 None %45
%295 = OpFunctionParameter %5
%296 = OpFunctionParameter %21
%297 = OpFunctionParameter %43
%299 = OpLabel
%300 = OpUndef %5
OpSelectionMerge %306 None
OpBranchConditional %297 %306 %307
%307 = OpLabel
%308 = OpGroupNonUniformBallot %21 %34 %60
%309 = OpBitwiseAnd %21 %308 %296
OpBranch %301
%301 = OpLabel
OpLoopMerge %303 %302 None
OpBranch %302
%302 = OpLabel
%310 = OpGroupNonUniformBroadcastFirst %21 %34 %309
%311 = OpIEqual %44 %309 %310
%312 = OpAll %43 %311
OpSelectionMerge %305 None
OpBranchConditional %312 %304 %305
%304 = OpLabel
%313 = OpGroupNonUniformBitwiseAnd %5 %34 ExclusiveScan %295
OpBranch %305
%305 = OpLabel
%314 = OpPhi %5 %313 %304 %300 %302
OpBranchConditional %312 %303 %301
%303 = OpLabel
OpBranch %306
%306 = OpLabel
%315 = OpPhi %5 %314 %303 %300 %299
OpReturnValue %315
OpFunctionEnd
%349 = OpFunction %5 None %45
%346 = OpFunctionParameter %5
%347 = OpFunctionParameter %21
%348 = OpFunctionParameter %43
%350 = OpLabel
%351 = OpUndef %5
OpSelectionMerge %357 None
OpBranchConditional %348 %357 %358
%358 = OpLabel
%359 = OpGroupNonUniformBallot %21 %34 %60
%360 = OpBitwiseAnd %21 %359 %347
OpBranch %352
%352 = OpLabel
OpLoopMerge %354 %353 None
OpBranch %353
%353 = OpLabel
%361 = OpGroupNonUniformBroadcastFirst %21 %34 %360
%362 = OpIEqual %44 %360 %361
%363 = OpAll %43 %362
OpSelectionMerge %356 None
OpBranchConditional %363 %355 %356
%355 = OpLabel
%364 = OpGroupNonUniformBitwiseXor %5 %34 ExclusiveScan %346
OpBranch %356
%356 = OpLabel
%365 = OpPhi %5 %364 %355 %351 %353
OpBranchConditional %363 %354 %352
%354 = OpLabel
OpBranch %357
%357 = OpLabel
%366 = OpPhi %5 %365 %354 %351 %350
OpReturnValue %366
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-multi-prefix-op.partitioned.noglsl.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 232
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformPartitionedEXT
OpExtension "SPV_NV_shader_subgroup_partitioned"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%16 = OpTypeVector %5 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %5
%21 = OpConstant %5 0
%24 = OpConstant %5 4
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%50 = OpTypeBool
%51 = OpConstantTrue %50
%54 = OpConstant %5 7
%76 = OpTypeFloat 32
%196 = OpConstant %5 5
%226 = OpConstant %5 6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %230
%230 = OpLabel
%13 = OpLoad %10 %12
%14 = OpLoad %6 %9
%15 = OpLoad %6 %8
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %5 %20
%23 = OpIMul %5 %22 %24
%26 = OpImageFetch %25 %14 %23
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %23 %30
%28 = OpImageFetch %25 %14 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %23 %34
%32 = OpImageFetch %25 %14 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %23 %38
%36 = OpImageFetch %25 %14 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpImageFetch %25 %15 %22
%46 = OpCompositeExtract %5 %45 0
%48 = OpCompositeConstruct %25 %41 %42 %43 %44
%49 = OpGroupNonUniformBallot %25 %38 %51
%52 = OpBitwiseAnd %25 %48 %49
%47 = OpGroupNonUniformIAdd %5 %38 PartitionedExclusiveScanEXT %46 %52
%53 = OpIMul %5 %22 %54
%55 = OpIMul %5 %22 %54
%56 = OpCompositeConstruct %25 %47 %47 %47 %47
OpImageWrite %13 %55 %56
%57 = OpIMul %5 %22 %24
%58 = OpImageFetch %25 %14 %57
%59 = OpCompositeExtract %5 %58 0
%61 = OpIAdd %5 %57 %30
%60 = OpImageFetch %25 %14 %61
%62 = OpCompositeExtract %5 %60 0
%64 = OpIAdd %5 %57 %34
%63 = OpImageFetch %25 %14 %64
%65 = OpCompositeExtract %5 %63 0
%67 = OpIAdd %5 %57 %38
%66 = OpImageFetch %25 %14 %67
%68 = OpCompositeExtract %5 %66 0
%69 = OpCompositeConstruct %25 %59 %62 %65 %68
%70 = OpCompositeExtract %5 %69 0
%71 = OpCompositeExtract %5 %69 1
%72 = OpCompositeExtract %5 %69 2
%73 = OpCompositeExtract %5 %69 3
%74 = OpImageFetch %25 %15 %22
%75 = OpCompositeExtract %5 %74 0
%77 = OpBitcast %76 %75
%79 = OpCompositeConstruct %25 %70 %71 %72 %73
%80 = OpGroupNonUniformBallot %25 %38 %51
%81 = OpBitwiseAnd %25 %79 %80
%78 = OpGroupNonUniformFAdd %76 %38 PartitionedExclusiveScanEXT %77 %81
%82 = OpConvertFToU %5 %78
%83 = OpIAdd %5 %53 %30
%84 = OpIMul %5 %22 %54
%85 = OpIAdd %5 %84 %30
%86 = OpCompositeConstruct %25 %82 %82 %82 %82
OpImageWrite %13 %85 %86
%87 = OpIMul %5 %22 %24
%88 = OpImageFetch %25 %14 %87
%89 = OpCompositeExtract %5 %88 0
%91 = OpIAdd %5 %87 %30
%90 = OpImageFetch %25 %14 %91
%92 = OpCompositeExtract %5 %90 0
%94 = OpIAdd %5 %87 %34
%93 = OpImageFetch %25 %14 %94
%95 = OpCompositeExtract %5 %93 0
%97 = OpIAdd %5 %87 %38
%96 = OpImageFetch %25 %14 %97
%98 = OpCompositeExtract %5 %96 0
%99 = OpCompositeConstruct %25 %89 %92 %95 %98
%100 = OpCompositeExtract %5 %99 0
%101 = OpCompositeExtract %5 %99 1
%102 = OpCompositeExtract %5 %99 2
%103 = OpCompositeExtract %5 %99 3
%104 = OpImageFetch %25 %15 %22
%105 = OpCompositeExtract %5 %104 0
%107 = OpCompositeConstruct %25 %100 %101 %102 %103
%108 = OpGroupNonUniformBallot %25 %38 %51
%109 = OpBitwiseAnd %25 %107 %108
%106 = OpGroupNonUniformIMul %5 %38 PartitionedExclusiveScanEXT %105 %109
%110 = OpIAdd %5 %53 %34
%111 = OpIMul %5 %22 %54
%112 = OpIAdd %5 %111 %34
%113 = OpCompositeConstruct %25 %106 %106 %106 %106
OpImageWrite %13 %112 %113
%114 = OpIMul %5 %22 %24
%115 = OpImageFetch %25 %14 %114
%116 = OpCompositeExtract %5 %115 0
%118 = OpIAdd %5 %114 %30
%117 = OpImageFetch %25 %14 %118
%119 = OpCompositeExtract %5 %117 0
%121 = OpIAdd %5 %114 %34
%120 = OpImageFetch %25 %14 %121
%122 = OpCompositeExtract %5 %120 0
%124 = OpIAdd %5 %114 %38
%123 = OpImageFetch %25 %14 %124
%125 = OpCompositeExtract %5 %123 0
%126 = OpCompositeConstruct %25 %116 %119 %122 %125
%127 = OpCompositeExtract %5 %126 0
%128 = OpCompositeExtract %5 %126 1
%129 = OpCompositeExtract %5 %126 2
%130 = OpCompositeExtract %5 %126 3
%131 = OpImageFetch %25 %15 %22
%132 = OpCompositeExtract %5 %131 0
%133 = OpBitcast %76 %132
%135 = OpCompositeConstruct %25 %127 %128 %129 %130
%136 = OpGroupNonUniformBallot %25 %38 %51
%137 = OpBitwiseAnd %25 %135 %136
%134 = OpGroupNonUniformFMul %76 %38 PartitionedExclusiveScanEXT %133 %137
%138 = OpConvertFToU %5 %134
%139 = OpIAdd %5 %53 %38
%140 = OpIMul %5 %22 %54
%141 = OpIAdd %5 %140 %38
%142 = OpCompositeConstruct %25 %138 %138 %138 %138
OpImageWrite %13 %141 %142
%143 = OpIMul %5 %22 %24
%144 = OpImageFetch %25 %14 %143
%145 = OpCompositeExtract %5 %144 0
%147 = OpIAdd %5 %143 %30
%146 = OpImageFetch %25 %14 %147
%148 = OpCompositeExtract %5 %146 0
%150 = OpIAdd %5 %143 %34
%149 = OpImageFetch %25 %14 %150
%151 = OpCompositeExtract %5 %149 0
%153 = OpIAdd %5 %143 %38
%152 = OpImageFetch %25 %14 %153
%154 = OpCompositeExtract %5 %152 0
%155 = OpCompositeConstruct %25 %145 %148 %151 %154
%156 = OpCompositeExtract %5 %155 0
%157 = OpCompositeExtract %5 %155 1
%158 = OpCompositeExtract %5 %155 2
%159 = OpCompositeExtract %5 %155 3
%160 = OpImageFetch %25 %15 %22
%161 = OpCompositeExtract %5 %160 0
%163 = OpCompositeConstruct %25 %156 %157 %158 %159
%164 = OpGroupNonUniformBallot %25 %38 %51
%165 = OpBitwiseAnd %25 %163 %164
%162 = OpGroupNonUniformBitwiseOr %5 %38 PartitionedExclusiveScanEXT %161 %165
%166 = OpIAdd %5 %53 %24
%167 = OpIMul %5 %22 %54
%168 = OpIAdd %5 %167 %24
%169 = OpCompositeConstruct %25 %162 %162 %162 %162
OpImageWrite %13 %168 %169
%170 = OpIMul %5 %22 %24
%171 = OpImageFetch %25 %14 %170
%172 = OpCompositeExtract %5 %171 0
%174 = OpIAdd %5 %170 %30
%173 = OpImageFetch %25 %14 %174
%175 = OpCompositeExtract %5 %173 0
%177 = OpIAdd %5 %170 %34
%176 = OpImageFetch %25 %14 %177
%178 = OpCompositeExtract %5 %176 0
%180 = OpIAdd %5 %170 %38
%179 = OpImageFetch %25 %14 %180
%181 = OpCompositeExtract %5 %179 0
%182 = OpCompositeConstruct %25 %172 %175 %178 %181
%183 = OpCompositeExtract %5 %182 0
%184 = OpCompositeExtract %5 %182 1
%185 = OpCompositeExtract %5 %182 2
%186 = OpCompositeExtract %5 %182 3
%187 = OpImageFetch %25 %15 %22
%188 = OpCompositeExtract %5 %187 0
%189 = OpBitcast %76 %188
%190 = OpConvertFToS %5 %189
%192 = OpCompositeConstruct %25 %183 %184 %185 %186
%193 = OpGroupNonUniformBallot %25 %38 %51
%194 = OpBitwiseAnd %25 %192 %193
%191 = OpGroupNonUniformBitwiseAnd %5 %38 PartitionedExclusiveScanEXT %190 %194
%195 = OpIAdd %5 %53 %196
%197 = OpIMul %5 %22 %54
%198 = OpIAdd %5 %197 %196
%199 = OpCompositeConstruct %25 %191 %191 %191 %191
OpImageWrite %13 %198 %199
%200 = OpIMul %5 %22 %24
%201 = OpImageFetch %25 %14 %200
%202 = OpCompositeExtract %5 %201 0
%204 = OpIAdd %5 %200 %30
%203 = OpImageFetch %25 %14 %204
%205 = OpCompositeExtract %5 %203 0
%207 = OpIAdd %5 %200 %34
%206 = OpImageFetch %25 %14 %207
%208 = OpCompositeExtract %5 %206 0
%210 = OpIAdd %5 %200 %38
%209 = OpImageFetch %25 %14 %210
%211 = OpCompositeExtract %5 %209 0
%212 = OpCompositeConstruct %25 %202 %205 %208 %211
%213 = OpCompositeExtract %5 %212 0
%214 = OpCompositeExtract %5 %212 1
%215 = OpCompositeExtract %5 %212 2
%216 = OpCompositeExtract %5 %212 3
%217 = OpImageFetch %25 %15 %22
%218 = OpCompositeExtract %5 %217 0
%219 = OpBitcast %76 %218
%220 = OpConvertFToS %5 %219
%222 = OpCompositeConstruct %25 %213 %214 %215 %216
%223 = OpGroupNonUniformBallot %25 %38 %51
%224 = OpBitwiseAnd %25 %222 %223
%221 = OpGroupNonUniformBitwiseXor %5 %38 PartitionedExclusiveScanEXT %220 %224
%225 = OpIAdd %5 %53 %226
%227 = OpIMul %5 %22 %54
%228 = OpIAdd %5 %227 %226
%229 = OpCompositeConstruct %25 %221 %221 %221 %221
OpImageWrite %13 %228 %229
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/wave-multi-prefix-op.partitioned.noglsl.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 265
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformPartitionedEXT
OpExtension "SPV_NV_shader_subgroup_partitioned"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %263
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "THR"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %263 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%20 = OpConstant %5 4
%21 = OpTypeVector %5 4
%26 = OpConstant %5 1
%30 = OpConstant %5 2
%34 = OpConstant %5 3
%46 = OpTypeBool
%51 = OpTypeVector %46 4
%55 = OpConstant %5 7
%77 = OpTypeFloat 32
%222 = OpConstant %5 5
%257 = OpConstant %5 6
%262 = OpTypePointer Input %46
%263 = OpVariable %262 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %261
%261 = OpLabel
%15 = OpLoad %10 %12
%16 = OpLoad %6 %9
%17 = OpLoad %6 %8
%18 = OpLoad %5 %14
%19 = OpIMul %5 %18 %20
%22 = OpImageFetch %21 %16 %19
%23 = OpCompositeExtract %5 %22 0
%25 = OpIAdd %5 %19 %26
%24 = OpImageFetch %21 %16 %25
%27 = OpCompositeExtract %5 %24 0
%29 = OpIAdd %5 %19 %30
%28 = OpImageFetch %21 %16 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %19 %34
%32 = OpImageFetch %21 %16 %33
%35 = OpCompositeExtract %5 %32 0
%36 = OpCompositeConstruct %21 %23 %27 %31 %35
%37 = OpCompositeExtract %5 %36 0
%38 = OpCompositeExtract %5 %36 1
%39 = OpCompositeExtract %5 %36 2
%40 = OpCompositeExtract %5 %36 3
%41 = OpImageFetch %21 %17 %18
%42 = OpCompositeExtract %5 %41 0
%44 = OpCompositeConstruct %21 %37 %38 %39 %40
%47 = OpLoad %46 %263
%48 = OpGroupNonUniformBallot %21 %34 %47
%49 = OpLogicalNot %46 %47
%45 = OpGroupNonUniformBallot %21 %34 %49
%50 = OpBitwiseAnd %21 %44 %45
%52 = OpCompositeConstruct %51 %47 %47 %47 %47
%53 = OpSelect %21 %52 %48 %50
%43 = OpGroupNonUniformIAdd %5 %34 PartitionedExclusiveScanEXT %42 %53
%54 = OpIMul %5 %18 %55
%56 = OpIMul %5 %18 %55
%57 = OpCompositeConstruct %21 %43 %43 %43 %43
OpImageWrite %15 %56 %57
%58 = OpIMul %5 %18 %20
%59 = OpImageFetch %21 %16 %58
%60 = OpCompositeExtract %5 %59 0
%62 = OpIAdd %5 %58 %26
%61 = OpImageFetch %21 %16 %62
%63 = OpCompositeExtract %5 %61 0
%65 = OpIAdd %5 %58 %30
%64 = OpImageFetch %21 %16 %65
%66 = OpCompositeExtract %5 %64 0
%68 = OpIAdd %5 %58 %34
%67 = OpImageFetch %21 %16 %68
%69 = OpCompositeExtract %5 %67 0
%70 = OpCompositeConstruct %21 %60 %63 %66 %69
%71 = OpCompositeExtract %5 %70 0
%72 = OpCompositeExtract %5 %70 1
%73 = OpCompositeExtract %5 %70 2
%74 = OpCompositeExtract %5 %70 3
%75 = OpImageFetch %21 %17 %18
%76 = OpCompositeExtract %5 %75 0
%78 = OpBitcast %77 %76
%80 = OpCompositeConstruct %21 %71 %72 %73 %74
%82 = OpLoad %46 %263
%83 = OpGroupNonUniformBallot %21 %34 %82
%84 = OpLogicalNot %46 %82
%81 = OpGroupNonUniformBallot %21 %34 %84
%85 = OpBitwiseAnd %21 %80 %81
%86 = OpCompositeConstruct %51 %82 %82 %82 %82
%87 = OpSelect %21 %86 %83 %85
%79 = OpGroupNonUniformFAdd %77 %34 PartitionedExclusiveScanEXT %78 %87
%88 = OpConvertFToU %5 %79
%89 = OpIAdd %5 %54 %26
%90 = OpIMul %5 %18 %55
%91 = OpIAdd %5 %90 %26
%92 = OpCompositeConstruct %21 %88 %88 %88 %88
OpImageWrite %15 %91 %92
%93 = OpIMul %5 %18 %20
%94 = OpImageFetch %21 %16 %93
%95 = OpCompositeExtract %5 %94 0
%97 = OpIAdd %5 %93 %26
%96 = OpImageFetch %21 %16 %97
%98 = OpCompositeExtract %5 %96 0
%100 = OpIAdd %5 %93 %30
%99 = OpImageFetch %21 %16 %100
%101 = OpCompositeExtract %5 %99 0
%103 = OpIAdd %5 %93 %34
%102 = OpImageFetch %21 %16 %103
%104 = OpCompositeExtract %5 %102 0
%105 = OpCompositeConstruct %21 %95 %98 %101 %104
%106 = OpCompositeExtract %5 %105 0
%107 = OpCompositeExtract %5 %105 1
%108 = OpCompositeExtract %5 %105 2
%109 = OpCompositeExtract %5 %105 3
%110 = OpImageFetch %21 %17 %18
%111 = OpCompositeExtract %5 %110 0
%113 = OpCompositeConstruct %21 %106 %107 %108 %109
%115 = OpLoad %46 %263
%116 = OpGroupNonUniformBallot %21 %34 %115
%117 = OpLogicalNot %46 %115
%114 = OpGroupNonUniformBallot %21 %34 %117
%118 = OpBitwiseAnd %21 %113 %114
%119 = OpCompositeConstruct %51 %115 %115 %115 %115
%120 = OpSelect %21 %119 %116 %118
%112 = OpGroupNonUniformIMul %5 %34 PartitionedExclusiveScanEXT %111 %120
%121 = OpIAdd %5 %54 %30
%122 = OpIMul %5 %18 %55
%123 = OpIAdd %5 %122 %30
%124 = OpCompositeConstruct %21 %112 %112 %112 %112
OpImageWrite %15 %123 %124
%125 = OpIMul %5 %18 %20
%126 = OpImageFetch %21 %16 %125
%127 = OpCompositeExtract %5 %126 0
%129 = OpIAdd %5 %125 %26
%128 = OpImageFetch %21 %16 %129
%130 = OpCompositeExtract %5 %128 0
%132 = OpIAdd %5 %125 %30
%131 = OpImageFetch %21 %16 %132
%133 = OpCompositeExtract %5 %131 0
%135 = OpIAdd %5 %125 %34
%134 = OpImageFetch %21 %16 %135
%136 = OpCompositeExtract %5 %134 0
%137 = OpCompositeConstruct %21 %127 %130 %133 %136
%138 = OpCompositeExtract %5 %137 0
%139 = OpCompositeExtract %5 %137 1
%140 = OpCompositeExtract %5 %137 2
%141 = OpCompositeExtract %5 %137 3
%142 = OpImageFetch %21 %17 %18
%143 = OpCompositeExtract %5 %142 0
%144 = OpBitcast %77 %143
%146 = OpCompositeConstruct %21 %138 %139 %140 %141
%148 = OpLoad %46 %263
%149 = OpGroupNonUniformBallot %21 %34 %148
%150 = OpLogicalNot %46 %148
%147 = OpGroupNonUniformBallot %21 %34 %150
%151 = OpBitwiseAnd %21 %146 %147
%152 = OpCompositeConstruct %51 %148 %148 %148 %148
%153 = OpSelect %21 %152 %149 %151
%145 = OpGroupNonUniformFMul %77 %34 PartitionedExclusiveScanEXT %144 %153
%154 = OpConvertFToU %5 %145
%155 = OpIAdd %5 %54 %34
%156 = OpIMul %5 %18 %55
%157 = OpIAdd %5 %156 %34
%158 = OpCompositeConstruct %21 %154 %154 %154 %154
OpImageWrite %15 %157 %158
%159 = OpIMul %5 %18 %20
%160 = OpImageFetch %21 %16 %159
%161 = OpCompositeExtract %5 %160 0
%163 = OpIAdd %5 %159 %26
%162 = OpImageFetch %21 %16 %163
%164 = OpCompositeExtract %5 %162 0
%166 = OpIAdd %5 %159 %30
%165 = OpImageFetch %21 %16 %166
%167 = OpCompositeExtract %5 %165 0
%169 = OpIAdd %5 %159 %34
%168 = OpImageFetch %21 %16 %169
%170 = OpCompositeExtract %5 %168 0
%171 = OpCompositeConstruct %21 %161 %164 %167 %170
%172 = OpCompositeExtract %5 %171 0
%173 = OpCompositeExtract %5 %171 1
%174 = OpCompositeExtract %5 %171 2
%175 = OpCompositeExtract %5 %171 3
%176 = OpImageFetch %21 %17 %18
%177 = OpCompositeExtract %5 %176 0
%179 = OpCompositeConstruct %21 %172 %173 %174 %175
%181 = OpLoad %46 %263
%182 = OpGroupNonUniformBallot %21 %34 %181
%183 = OpLogicalNot %46 %181
%180 = OpGroupNonUniformBallot %21 %34 %183
%184 = OpBitwiseAnd %21 %179 %180
%185 = OpCompositeConstruct %51 %181 %181 %181 %181
%186 = OpSelect %21 %185 %182 %184
%178 = OpGroupNonUniformBitwiseOr %5 %34 PartitionedExclusiveScanEXT %177 %186
%187 = OpIAdd %5 %54 %20
%188 = OpIMul %5 %18 %55
%189 = OpIAdd %5 %188 %20
%190 = OpCompositeConstruct %21 %178 %178 %178 %178
OpImageWrite %15 %189 %190
%191 = OpIMul %5 %18 %20
%192 = OpImageFetch %21 %16 %191
%193 = OpCompositeExtract %5 %192 0
%195 = OpIAdd %5 %191 %26
%194 = OpImageFetch %21 %16 %195
%196 = OpCompositeExtract %5 %194 0
%198 = OpIAdd %5 %191 %30
%197 = OpImageFetch %21 %16 %198
%199 = OpCompositeExtract %5 %197 0
%201 = OpIAdd %5 %191 %34
%200 = OpImageFetch %21 %16 %201
%202 = OpCompositeExtract %5 %200 0
%203 = OpCompositeConstruct %21 %193 %196 %199 %202
%204 = OpCompositeExtract %5 %203 0
%205 = OpCompositeExtract %5 %203 1
%206 = OpCompositeExtract %5 %203 2
%207 = OpCompositeExtract %5 %203 3
%208 = OpImageFetch %21 %17 %18
%209 = OpCompositeExtract %5 %208 0
%210 = OpBitcast %77 %209
%211 = OpConvertFToS %5 %210
%213 = OpCompositeConstruct %21 %204 %205 %206 %207
%215 = OpLoad %46 %263
%216 = OpGroupNonUniformBallot %21 %34 %215
%217 = OpLogicalNot %46 %215
%214 = OpGroupNonUniformBallot %21 %34 %217
%218 = OpBitwiseAnd %21 %213 %214
%219 = OpCompositeConstruct %51 %215 %215 %215 %215
%220 = OpSelect %21 %219 %216 %218
%212 = OpGroupNonUniformBitwiseAnd %5 %34 PartitionedExclusiveScanEXT %211 %220
%221 = OpIAdd %5 %54 %222
%223 = OpIMul %5 %18 %55
%224 = OpIAdd %5 %223 %222
%225 = OpCompositeConstruct %21 %212 %212 %212 %212
OpImageWrite %15 %224 %225
%226 = OpIMul %5 %18 %20
%227 = OpImageFetch %21 %16 %226
%228 = OpCompositeExtract %5 %227 0
%230 = OpIAdd %5 %226 %26
%229 = OpImageFetch %21 %16 %230
%231 = OpCompositeExtract %5 %229 0
%233 = OpIAdd %5 %226 %30
%232 = OpImageFetch %21 %16 %233
%234 = OpCompositeExtract %5 %232 0
%236 = OpIAdd %5 %226 %34
%235 = OpImageFetch %21 %16 %236
%237 = OpCompositeExtract %5 %235 0
%238 = OpCompositeConstruct %21 %228 %231 %234 %237
%239 = OpCompositeExtract %5 %238 0
%240 = OpCompositeExtract %5 %238 1
%241 = OpCompositeExtract %5 %238 2
%242 = OpCompositeExtract %5 %238 3
%243 = OpImageFetch %21 %17 %18
%244 = OpCompositeExtract %5 %243 0
%245 = OpBitcast %77 %244
%246 = OpConvertFToS %5 %245
%248 = OpCompositeConstruct %21 %239 %240 %241 %242
%250 = OpLoad %46 %263
%251 = OpGroupNonUniformBallot %21 %34 %250
%252 = OpLogicalNot %46 %250
%249 = OpGroupNonUniformBallot %21 %34 %252
%253 = OpBitwiseAnd %21 %248 %249
%254 = OpCompositeConstruct %51 %250 %250 %250 %250
%255 = OpSelect %21 %254 %251 %253
%247 = OpGroupNonUniformBitwiseXor %5 %34 PartitionedExclusiveScanEXT %246 %255
%256 = OpIAdd %5 %54 %257
%258 = OpIMul %5 %18 %55
%259 = OpIAdd %5 %258 %257
%260 = OpCompositeConstruct %21 %247 %247 %247 %247
OpImageWrite %15 %259 %260
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/dxil-builtin/wave-prefix.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _10;

void main()
{
    imageStore(_8, int(gl_GlobalInvocationID.x * 2u), uvec4(subgroupBallotExclusiveBitCount(subgroupBallot(gl_GlobalInvocationID.x < 100u))));
    imageStore(_8, int(gl_GlobalInvocationID.x * 2u), uvec4(subgroupExclusiveAdd(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 2u) + 1u), uvec4(subgroupExclusiveMul(gl_GlobalInvocationID.x)));
    imageStore(_9, int(gl_GlobalInvocationID.x * 2u), uvec4(subgroupExclusiveAdd(gl_GlobalInvocationID.x)));
    imageStore(_9, int((gl_GlobalInvocationID.x * 2u) + 1u), uvec4(subgroupExclusiveMul(gl_GlobalInvocationID.x)));
    float _50 = float(gl_GlobalInvocationID.x);
    imageStore(_10, int(gl_GlobalInvocationID.x * 2u), uvec4(uint(subgroupExclusiveAdd(_50))));
    imageStore(_10, int((gl_GlobalInvocationID.x * 2u) + 1u), uvec4(uint(subgroupExclusiveMul(_50))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %10 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypeBool
%23 = OpConstant %5 100
%24 = OpTypeVector %5 4
%26 = OpConstant %5 3
%30 = OpConstant %5 2
%37 = OpConstant %5 4
%40 = OpConstant %5 1
%49 = OpTypeFloat 32
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%11 = OpLoad %6 %10
%12 = OpLoad %6 %9
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpULessThan %21 %20 %23
%25 = OpGroupNonUniformBallot %24 %26 %22
%27 = OpGroupNonUniformBallotBitCount %5 %26 ExclusiveScan %25
%28 = OpShiftLeftLogical %5 %20 %26
%29 = OpIMul %5 %20 %30
%31 = OpCompositeConstruct %24 %27 %27 %27 %27
OpImageWrite %13 %29 %31
%32 = OpGroupNonUniformIAdd %5 %26 ExclusiveScan %20
%33 = OpIMul %5 %20 %30
%34 = OpCompositeConstruct %24 %32 %32 %32 %32
OpImageWrite %13 %33 %34
%35 = OpGroupNonUniformIMul %5 %26 ExclusiveScan %20
%36 = OpBitwiseOr %5 %28 %37
%38 = OpIMul %5 %20 %30
%39 = OpIAdd %5 %38 %40
%41 = OpCompositeConstruct %24 %35 %35 %35 %35
OpImageWrite %13 %39 %41
%42 = OpGroupNonUniformIAdd %5 %26 ExclusiveScan %20
%43 = OpIMul %5 %20 %30
%44 = OpCompositeConstruct %24 %42 %42 %42 %42
OpImageWrite %12 %43 %44
%45 = OpGroupNonUniformIMul %5 %26 ExclusiveScan %20
%46 = OpIMul %5 %20 %30
%47 = OpIAdd %5 %46 %40
%48 = OpCompositeConstruct %24 %45 %45 %45 %45
OpImageWrite %12 %47 %48
%50 = OpConvertUToF %49 %20
%51 = OpGroupNonUniformFAdd %49 %26 ExclusiveScan %50
%52 = OpConvertFToU %5 %51
%53 = OpIMul %5 %20 %30
%54 = OpCompositeConstruct %24 %52 %52 %52 %52
OpImageWrite %11 %53 %54
%55 = OpGroupNonUniformFMul %49 %26 ExclusiveScan %50
%56 = OpConvertFToU %5 %55
%57 = OpIMul %5 %20 %30
%58 = OpIAdd %5 %57 %40
%59 = OpCompositeConstruct %24 %56 %56 %56 %56
OpImageWrite %11 %58 %59
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-prefix.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _10;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    imageStore(_8, int(INDEX * 2u), uvec4(subgroupBallotExclusiveBitCount(subgroupBallot((INDEX < 100u) && (!(gl_HelperInvocation || discard_state))))));
    imageStore(_8, int(INDEX * 2u), uvec4(subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_8, int((INDEX * 2u) + 1u), uvec4(subgroupExclusiveMul((gl_HelperInvocation || discard_state) ? 1u : INDEX)));
    imageStore(_9, int(INDEX * 2u), uvec4(subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_9, int((INDEX * 2u) + 1u), uvec4(subgroupExclusiveMul((gl_HelperInvocation || discard_state) ? 1u : INDEX)));
    float _63 = float(INDEX);
    imageStore(_10, int(INDEX * 2u), uvec4(uint(subgroupExclusiveAdd((gl_HelperInvocation || discard_state) ? 0.0 : _63))));
    imageStore(_10, int((INDEX * 2u) + 1u), uvec4(uint(subgroupExclusiveMul((gl_HelperInvocation || discard_state) ? 1.0 : _63))));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %84
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %21 "discard_state"
OpName %99 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %10 NonReadable
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %84 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypePointer Input %5
%12 = OpVariable %11 Input
%17 = OpTypeBool
%19 = OpConstant %5 40
%20 = OpTypePointer Private %17
%21 = OpVariable %20 Private
%22 = OpConstantFalse %17
%24 = OpConstant %5 100
%25 = OpTypeVector %5 4
%27 = OpConstant %5 3
%34 = OpConstant %5 2
%38 = OpConstant %5 0
%44 = OpConstant %5 1
%47 = OpConstant %5 4
%62 = OpTypeFloat 32
%66 = OpConstant %62 0
%73 = OpConstant %62 1
%82 = OpConstantTrue %17
%83 = OpTypePointer Input %17
%84 = OpVariable %83 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %21 %22
OpBranch %79
%79 = OpLabel
%13 = OpLoad %6 %10
%14 = OpLoad %6 %9
%15 = OpLoad %6 %8
%16 = OpLoad %5 %12
%18 = OpIEqual %17 %16 %19
OpSelectionMerge %81 None
OpBranchConditional %18 %80 %81
%80 = OpLabel
OpStore %21 %82
OpBranch %81
%81 = OpLabel
%23 = OpULessThan %17 %16 %24
%85 = OpLoad %17 %84
%86 = OpLoad %17 %21
%28 = OpLogicalOr %17 %85 %86
%29 = OpLogicalNot %17 %28
%30 = OpLogicalAnd %17 %23 %29
%26 = OpGroupNonUniformBallot %25 %27 %30
%31 = OpGroupNonUniformBallotBitCount %5 %27 ExclusiveScan %26
%32 = OpShiftLeftLogical %5 %16 %27
%33 = OpIMul %5 %16 %34
%35 = OpCompositeConstruct %25 %31 %31 %31 %31
OpImageWrite %15 %33 %35
%87 = OpLoad %17 %84
%88 = OpLoad %17 %21
%37 = OpLogicalOr %17 %87 %88
%39 = OpSelect %5 %37 %38 %16
%36 = OpGroupNonUniformIAdd %5 %27 ExclusiveScan %39
%40 = OpIMul %5 %16 %34
%41 = OpCompositeConstruct %25 %36 %36 %36 %36
OpImageWrite %15 %40 %41
%89 = OpLoad %17 %84
%90 = OpLoad %17 %21
%43 = OpLogicalOr %17 %89 %90
%45 = OpSelect %5 %43 %44 %16
%42 = OpGroupNonUniformIMul %5 %27 ExclusiveScan %45
%46 = OpBitwiseOr %5 %32 %47
%48 = OpIMul %5 %16 %34
%49 = OpIAdd %5 %48 %44
%50 = OpCompositeConstruct %25 %42 %42 %42 %42
OpImageWrite %15 %49 %50
%91 = OpLoad %17 %84
%92 = OpLoad %17 %21
%52 = OpLogicalOr %17 %91 %92
%53 = OpSelect %5 %52 %38 %16
%51 = OpGroupNonUniformIAdd %5 %27 ExclusiveScan %53
%54 = OpIMul %5 %16 %34
%55 = OpCompositeConstruct %25 %51 %51 %51 %51
OpImageWrite %14 %54 %55
%93 = OpLoad %17 %84
%94 = OpLoad %17 %21
%57 = OpLogicalOr %17 %93 %94
%58 = OpSelect %5 %57 %44 %16
%56 = OpGroupNonUniformIMul %5 %27 ExclusiveScan %58
%59 = OpIMul %5 %16 %34
%60 = OpIAdd %5 %59 %44
%61 = OpCompositeConstruct %25 %56 %56 %56 %56
OpImageWrite %14 %60 %61
%63 = OpConvertUToF %62 %16
%95 = OpLoad %17 %84
%96 = OpLoad %17 %21
%65 = OpLogicalOr %17 %95 %96
%67 = OpSelect %62 %65 %66 %63
%64 = OpGroupNonUniformFAdd %62 %27 ExclusiveScan %67
%68 = OpConvertFToU %5 %64
%69 = OpIMul %5 %16 %34
%70 = OpCompositeConstruct %25 %68 %68 %68 %68
OpImageWrite %13 %69 %70
%97 = OpLoad %17 %84
%98 = OpLoad %17 %21
%72 = OpLogicalOr %17 %97 %98
%74 = OpSelect %62 %72 %73 %63
%71 = OpGroupNonUniformFMul %62 %27 ExclusiveScan %74
%75 = OpConvertFToU %5 %71
%76 = OpIMul %5 %16 %34
%77 = OpIAdd %5 %76 %44
%78 = OpCompositeConstruct %25 %75 %75 %75 %75
OpImageWrite %13 %77 %78
%105 = OpFunctionCall %1 %99
OpReturn
OpFunctionEnd
%99 = OpFunction %1 None %2
%100 = OpLabel
%103 = OpLoad %17 %21
OpSelectionMerge %102 None
OpBranchConditional %103 %101 %102
%101 = OpLabel
OpKill
%102 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-read-lane-at-optimizations.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_quad : require
#extension GL_KHR_shader_subgroup_shuffle : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uvec4 _18 = imageLoad(_8, int(gl_GlobalInvocationID.x));
    float _21 = uintBitsToFloat(_18.x);
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 0u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 1u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 2u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 3u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupShuffle(_21, (gl_SubgroupInvocationID + 4u) & 4294967292u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 0u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 1u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 2u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 3u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupShuffle(_21, (gl_SubgroupInvocationID & 4294967288u) | 4u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 0u))));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(uintBitsToFloat(imageLoad(_8, int(gl_GlobalInvocationID.x)).x) + subgroupQuadBroadcast(_21, 3u))));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 145
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformShuffle
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12 %22
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
OpDecorate %22 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeVector %5 4
%20 = OpTypeFloat 32
%22 = OpVariable %13 Input
%25 = OpConstant %5 4294967292
%27 = OpConstant %5 3
%37 = OpConstant %5 1
%48 = OpConstant %5 2
%67 = OpConstant %5 4
%116 = OpConstant %5 4294967288
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %143
%143 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpImageRead %17 %9 %16
%19 = OpCompositeExtract %5 %18 0
%21 = OpBitcast %20 %19
%23 = OpLoad %5 %22
%24 = OpBitwiseAnd %5 %23 %25
%26 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %15
%28 = OpImageRead %17 %9 %16
%29 = OpCompositeExtract %5 %28 0
%30 = OpBitcast %20 %29
%31 = OpFAdd %20 %30 %26
%32 = OpBitcast %5 %31
%33 = OpCompositeConstruct %17 %32 %32 %32 %32
OpImageWrite %9 %16 %33
%34 = OpLoad %5 %22
%35 = OpBitwiseAnd %5 %34 %25
%36 = OpBitwiseOr %5 %35 %37
%38 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %37
%39 = OpImageRead %17 %9 %16
%40 = OpCompositeExtract %5 %39 0
%41 = OpBitcast %20 %40
%42 = OpFAdd %20 %41 %38
%43 = OpBitcast %5 %42
%44 = OpCompositeConstruct %17 %43 %43 %43 %43
OpImageWrite %9 %16 %44
%45 = OpLoad %5 %22
%46 = OpBitwiseAnd %5 %45 %25
%47 = OpBitwiseOr %5 %46 %48
%49 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %48
%50 = OpImageRead %17 %9 %16
%51 = OpCompositeExtract %5 %50 0
%52 = OpBitcast %20 %51
%53 = OpFAdd %20 %52 %49
%54 = OpBitcast %5 %53
%55 = OpCompositeConstruct %17 %54 %54 %54 %54
OpImageWrite %9 %16 %55
%56 = OpLoad %5 %22
%57 = OpBitwiseOr %5 %56 %27
%58 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %27
%59 = OpImageRead %17 %9 %16
%60 = OpCompositeExtract %5 %59 0
%61 = OpBitcast %20 %60
%62 = OpFAdd %20 %61 %58
%63 = OpBitcast %5 %62
%64 = OpCompositeConstruct %17 %63 %63 %63 %63
OpImageWrite %9 %16 %64
%65 = OpLoad %5 %22
%66 = OpIAdd %5 %65 %67
%68 = OpBitwiseAnd %5 %66 %25
%69 = OpGroupNonUniformShuffle %20 %27 %21 %68
%70 = OpImageRead %17 %9 %16
%71 = OpCompositeExtract %5 %70 0
%72 = OpBitcast %20 %71
%73 = OpFAdd %20 %72 %69
%74 = OpBitcast %5 %73
%75 = OpCompositeConstruct %17 %74 %74 %74 %74
OpImageWrite %9 %16 %75
%76 = OpLoad %5 %22
%77 = OpBitwiseAnd %5 %76 %25
%78 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %15
%79 = OpImageRead %17 %9 %16
%80 = OpCompositeExtract %5 %79 0
%81 = OpBitcast %20 %80
%82 = OpFAdd %20 %81 %78
%83 = OpBitcast %5 %82
%84 = OpCompositeConstruct %17 %83 %83 %83 %83
OpImageWrite %9 %16 %84
%85 = OpLoad %5 %22
%86 = OpBitwiseAnd %5 %85 %25
%87 = OpBitwiseOr %5 %86 %37
%88 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %37
%89 = OpImageRead %17 %9 %16
%90 = OpCompositeExtract %5 %89 0
%91 = OpBitcast %20 %90
%92 = OpFAdd %20 %91 %88
%93 = OpBitcast %5 %92
%94 = OpCompositeConstruct %17 %93 %93 %93 %93
OpImageWrite %9 %16 %94
%95 = OpLoad %5 %22
%96 = OpBitwiseAnd %5 %95 %25
%97 = OpBitwiseOr %5 %96 %48
%98 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %48
%99 = OpImageRead %17 %9 %16
%100 = OpCompositeExtract %5 %99 0
%101 = OpBitcast %20 %100
%102 = OpFAdd %20 %101 %98
%103 = OpBitcast %5 %102
%104 = OpCompositeConstruct %17 %103 %103 %103 %103
OpImageWrite %9 %16 %104
%105 = OpLoad %5 %22
%106 = OpBitwiseOr %5 %105 %27
%107 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %27
%108 = OpImageRead %17 %9 %16
%109 = OpCompositeExtract %5 %108 0
%110 = OpBitcast %20 %109
%111 = OpFAdd %20 %110 %107
%112 = OpBitcast %5 %111
%113 = OpCompositeConstruct %17 %112 %112 %112 %112
OpImageWrite %9 %16 %113
%114 = OpLoad %5 %22
%115 = OpBitwiseAnd %5 %114 %116
%117 = OpBitwiseOr %5 %115 %67
%118 = OpGroupNonUniformShuffle %20 %27 %21 %117
%119 = OpImageRead %17 %9 %16
%120 = OpCompositeExtract %5 %119 0
%121 = OpBitcast %20 %120
%122 = OpFAdd %20 %121 %118
%123 = OpBitcast %5 %122
%124 = OpCompositeConstruct %17 %123 %123 %123 %123
OpImageWrite %9 %16 %124
%125 = OpLoad %5 %22
%126 = OpBitwiseAnd %5 %125 %25
%127 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %15
%128 = OpImageRead %17 %9 %16
%129 = OpCompositeExtract %5 %128 0
%130 = OpBitcast %20 %129
%131 = OpFAdd %20 %130 %127
%132 = OpBitcast %5 %131
%133 = OpCompositeConstruct %17 %132 %132 %132 %132
OpImageWrite %9 %16 %133
%134 = OpLoad %5 %22
%135 = OpBitwiseOr %5 %134 %27
%136 = OpGroupNonUniformQuadBroadcast %20 %27 %21 %27
%137 = OpImageRead %17 %9 %16
%138 = OpCompositeExtract %5 %137 0
%139 = OpBitcast %20 %138
%140 = OpFAdd %20 %139 %136
%141 = OpBitcast %5 %140
%142 = OpCompositeConstruct %17 %141 %141 %141 %141
OpImageWrite %9 %16 %142
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-read-lane-at.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_shuffle : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_quad : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _27 = (gl_SubgroupSize + 4294967295u) & gl_GlobalInvocationID.x;
    uint _102 = gl_GlobalInvocationID.x * 3u;
    imageStore(_8, int(_102), uvec4((((((((subgroupBroadcast(gl_GlobalInvocationID.x, 11u) + subgroupShuffle(gl_GlobalInvocationID.x, _27)) + subgroupQuadSwapHorizontal(gl_GlobalInvocationID.x)) + subgroupQuadSwapVertical(gl_GlobalInvocationID.x)) + subgroupQuadSwapDiagonal(gl_GlobalInvocationID.x)) + subgroupShuffleXor(gl_GlobalInvocationID.x, 4u)) + subgroupShuffleXor(gl_GlobalInvocationID.x, 8u)) + subgroupShuffleXor(gl_GlobalInvocationID.x, 16u)) + subgroupShuffleXor(gl_GlobalInvocationID.x, 32u)));
    imageStore(_8, int(_102 + 1u), uvec4((((((((subgroupBroadcast(gl_GlobalInvocationID.y, 11u) + subgroupShuffle(gl_GlobalInvocationID.y, _27)) + subgroupQuadSwapHorizontal(gl_GlobalInvocationID.y)) + subgroupQuadSwapVertical(gl_GlobalInvocationID.y)) + subgroupQuadSwapDiagonal(gl_GlobalInvocationID.y)) + subgroupShuffleXor(gl_GlobalInvocationID.y, 4u)) + subgroupShuffleXor(gl_GlobalInvocationID.y, 8u)) + subgroupShuffleXor(gl_GlobalInvocationID.y, 16u)) + subgroupShuffleXor(gl_GlobalInvocationID.y, 32u)));
    imageStore(_8, int(_102 + 2u), uvec4((((((((subgroupBroadcast(gl_GlobalInvocationID.z, 11u) + subgroupShuffle(gl_GlobalInvocationID.z, _27)) + subgroupQuadSwapHorizontal(gl_GlobalInvocationID.z)) + subgroupQuadSwapVertical(gl_GlobalInvocationID.z)) + subgroupQuadSwapDiagonal(gl_GlobalInvocationID.z)) + subgroupShuffleXor(gl_GlobalInvocationID.z, 4u)) + subgroupShuffleXor(gl_GlobalInvocationID.z, 8u)) + subgroupShuffleXor(gl_GlobalInvocationID.z, 16u)) + subgroupShuffleXor(gl_GlobalInvocationID.z, 32u)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 111
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformShuffle
OpCapability GroupNonUniformQuad
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12 %23 %39
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
OpDecorate %23 BuiltIn SubgroupSize
OpDecorate %39 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpVariable %13 Input
%26 = OpConstant %5 4294967295
%29 = OpConstant %5 3
%33 = OpConstant %5 11
%39 = OpVariable %13 Input
%66 = OpConstant %5 4
%75 = OpConstant %5 8
%84 = OpConstant %5 16
%93 = OpConstant %5 32
%101 = OpConstant %5 12
%103 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %109
%109 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%24 = OpLoad %5 %23
%25 = OpIAdd %5 %24 %26
%27 = OpBitwiseAnd %5 %25 %16
%28 = OpGroupNonUniformShuffle %5 %29 %16 %27
%30 = OpGroupNonUniformShuffle %5 %29 %19 %27
%31 = OpGroupNonUniformShuffle %5 %29 %22 %27
%32 = OpGroupNonUniformBroadcast %5 %29 %16 %33
%34 = OpGroupNonUniformBroadcast %5 %29 %19 %33
%35 = OpGroupNonUniformBroadcast %5 %29 %22 %33
%36 = OpIAdd %5 %32 %28
%37 = OpIAdd %5 %34 %30
%38 = OpIAdd %5 %35 %31
%40 = OpLoad %5 %39
%41 = OpBitwiseXor %5 %40 %18
%42 = OpGroupNonUniformQuadSwap %5 %29 %16 %15
%43 = OpGroupNonUniformQuadSwap %5 %29 %19 %15
%44 = OpGroupNonUniformQuadSwap %5 %29 %22 %15
%45 = OpIAdd %5 %36 %42
%46 = OpIAdd %5 %37 %43
%47 = OpIAdd %5 %38 %44
%48 = OpLoad %5 %39
%49 = OpBitwiseXor %5 %48 %21
%50 = OpGroupNonUniformQuadSwap %5 %29 %16 %18
%51 = OpGroupNonUniformQuadSwap %5 %29 %19 %18
%52 = OpGroupNonUniformQuadSwap %5 %29 %22 %18
%53 = OpIAdd %5 %45 %50
%54 = OpIAdd %5 %46 %51
%55 = OpIAdd %5 %47 %52
%56 = OpLoad %5 %39
%57 = OpBitwiseXor %5 %56 %29
%58 = OpGroupNonUniformQuadSwap %5 %29 %16 %21
%59 = OpGroupNonUniformQuadSwap %5 %29 %19 %21
%60 = OpGroupNonUniformQuadSwap %5 %29 %22 %21
%61 = OpIAdd %5 %53 %58
%62 = OpIAdd %5 %54 %59
%63 = OpIAdd %5 %55 %60
%64 = OpLoad %5 %39
%65 = OpBitwiseXor %5 %64 %66
%67 = OpGroupNonUniformShuffleXor %5 %29 %16 %66
%68 = OpGroupNonUniformShuffleXor %5 %29 %19 %66
%69 = OpGroupNonUniformShuffleXor %5 %29 %22 %66
%70 = OpIAdd %5 %61 %67
%71 = OpIAdd %5 %62 %68
%72 = OpIAdd %5 %63 %69
%73 = OpLoad %5 %39
%74 = OpBitwiseXor %5 %73 %75
%76 = OpGroupNonUniformShuffleXor %5 %29 %16 %75
%77 = OpGroupNonUniformShuffleXor %5 %29 %19 %75
%78 = OpGroupNonUniformShuffleXor %5 %29 %22 %75
%79 = OpIAdd %5 %70 %76
%80 = OpIAdd %5 %71 %77
%81 = OpIAdd %5 %72 %78
%82 = OpLoad %5 %39
%83 = OpBitwiseXor %5 %82 %84
%85 = OpGroupNonUniformShuffleXor %5 %29 %16 %84
%86 = OpGroupNonUniformShuffleXor %5 %29 %19 %84
%87 = OpGroupNonUniformShuffleXor %5 %29 %22 %84
%88 = OpIAdd %5 %79 %85
%89 = OpIAdd %5 %80 %86
%90 = OpIAdd %5 %81 %87
%91 = OpLoad %5 %39
%92 = OpBitwiseXor %5 %91 %93
%94 = OpGroupNonUniformShuffleXor %5 %29 %16 %93
%95 = OpGroupNonUniformShuffleXor %5 %29 %19 %93
%96 = OpGroupNonUniformShuffleXor %5 %29 %22 %93
%97 = OpIAdd %5 %88 %94
%98 = OpIAdd %5 %89 %95
%99 = OpIAdd %5 %90 %96
%100 = OpIMul %5 %16 %101
%102 = OpIMul %5 %16 %29
%104 = OpCompositeConstruct %103 %97 %97 %97 %97
OpImageWrite %9 %102 %104
%105 = OpCompositeConstruct %103 %98 %98 %98 %98
%106 = OpIAdd %5 %102 %18
OpImageWrite %9 %106 %105
%107 = OpCompositeConstruct %103 %99 %99 %99 %99
%108 = OpIAdd %5 %102 %21
OpImageWrite %9 %108 %107
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-read-lane-first.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    uint _29 = gl_GlobalInvocationID.x * 3u;
    imageStore(_8, int(_29), uvec4(subgroupBroadcastFirst(gl_GlobalInvocationID.x)));
    imageStore(_8, int(_29 + 1u), uvec4(subgroupBroadcastFirst(gl_GlobalInvocationID.y)));
    imageStore(_8, int(_29 + 2u), uvec4(subgroupBroadcastFirst(gl_GlobalInvocationID.z)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%24 = OpConstant %5 3
%28 = OpConstant %5 12
%30 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %36
%36 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %12 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %12 %21
%22 = OpLoad %5 %20
%23 = OpGroupNonUniformBroadcastFirst %5 %24 %16
%25 = OpGroupNonUniformBroadcastFirst %5 %24 %19
%26 = OpGroupNonUniformBroadcastFirst %5 %24 %22
%27 = OpIMul %5 %16 %28
%29 = OpIMul %5 %16 %24
%31 = OpCompositeConstruct %30 %23 %23 %23 %23
OpImageWrite %9 %29 %31
%32 = OpCompositeConstruct %30 %25 %25 %25 %25
%33 = OpIAdd %5 %29 %18
OpImageWrite %9 %33 %32
%34 = OpCompositeConstruct %30 %26 %26 %26 %26
%35 = OpIAdd %5 %29 %21
OpImageWrite %9 %35 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-read-lane-first.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_shuffle : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

layout(location = 0) flat in uvec3 INDEX;
bool discard_state;

uint WaveReadFirstLane(uint _31, bool _32)
{
    uvec4 _37 = subgroupBallot(!_32);
    return (subgroupBallotBitCount(_37) != 0u) ? subgroupShuffle(_31, subgroupBallotFindLSB(_37)) : 0u;
}

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX.x == 40u)
    {
        discard_state = true;
    }
    uint _53 = INDEX.x * 3u;
    imageStore(_8, int(_53), uvec4(WaveReadFirstLane(INDEX.x, gl_HelperInvocation || discard_state)));
    imageStore(_8, int(_53 + 1u), uvec4(WaveReadFirstLane(INDEX.y, gl_HelperInvocation || discard_state)));
    imageStore(_8, int(_53 + 2u), uvec4(WaveReadFirstLane(INDEX.z, gl_HelperInvocation || discard_state)));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformShuffle
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %64
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %27 "discard_state"
OpName %33 "WaveReadFirstLane"
OpName %71 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %64 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 3
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpTypeBool
%25 = OpConstant %5 40
%26 = OpTypePointer Private %23
%27 = OpVariable %26 Private
%28 = OpConstantFalse %23
%30 = OpTypeFunction %5 %5 %23
%35 = OpTypeVector %5 4
%38 = OpConstant %5 3
%44 = OpConstantNull %5
%52 = OpConstant %5 12
%62 = OpConstantTrue %23
%63 = OpTypePointer Input %23
%64 = OpVariable %63 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %27 %28
OpBranch %59
%59 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %11 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %11 %21
%22 = OpLoad %5 %20
%24 = OpIEqual %23 %16 %25
OpSelectionMerge %61 None
OpBranchConditional %24 %60 %61
%60 = OpLabel
OpStore %27 %62
OpBranch %61
%61 = OpLabel
%65 = OpLoad %23 %64
%66 = OpLoad %23 %27
%29 = OpLogicalOr %23 %65 %66
%46 = OpFunctionCall %5 %33 %16 %29
%67 = OpLoad %23 %64
%68 = OpLoad %23 %27
%47 = OpLogicalOr %23 %67 %68
%48 = OpFunctionCall %5 %33 %19 %47
%69 = OpLoad %23 %64
%70 = OpLoad %23 %27
%49 = OpLogicalOr %23 %69 %70
%50 = OpFunctionCall %5 %33 %22 %49
%51 = OpIMul %5 %16 %52
%53 = OpIMul %5 %16 %38
%54 = OpCompositeConstruct %35 %46 %46 %46 %46
OpImageWrite %12 %53 %54
%55 = OpCompositeConstruct %35 %48 %48 %48 %48
%56 = OpIAdd %5 %53 %18
OpImageWrite %12 %56 %55
%57 = OpCompositeConstruct %35 %50 %50 %50 %50
%58 = OpIAdd %5 %53 %21
OpImageWrite %12 %58 %57
%77 = OpFunctionCall %1 %71
OpReturn
OpFunctionEnd
%33 = OpFunction %5 None %30
%31 = OpFunctionParameter %5
%32 = OpFunctionParameter %23
%34 = OpLabel
%36 = OpLogicalNot %23 %32
%37 = OpGroupNonUniformBallot %35 %38 %36
%39 = OpGroupNonUniformBallotFindLSB %5 %38 %37
%40 = OpGroupNonUniformShuffle %5 %38 %31 %39
%41 = OpGroupNonUniformBallotBitCount %5 %38 Reduce %37
%42 = OpINotEqual %23 %41 %15
%43 = OpSelect %5 %42 %40 %44
OpReturnValue %43
OpFunctionEnd
%71 = OpFunction %1 None %2
%72 = OpLabel
%75 = OpLoad %23 %27
OpSelectionMerge %74 None
OpBranchConditional %75 %73 %74
%73 = OpLabel
OpKill
%74 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-reduce-helpers.sm67.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _10;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    uint _22 = INDEX * 28u;
    imageStore(_8, int(INDEX * 7u), uvec4(subgroupAdd(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 1u), uvec4(subgroupMul(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 2u), uvec4(subgroupAnd(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 3u), uvec4(subgroupOr(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 4u), uvec4(subgroupXor(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 5u), uvec4(subgroupMin(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 6u), uvec4(subgroupMax(INDEX)));
    imageStore(_9, int(INDEX * 7u), uvec4(subgroupAdd(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 1u), uvec4(subgroupMul(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 2u), uvec4(subgroupAnd(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 3u), uvec4(subgroupOr(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 4u), uvec4(subgroupXor(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 5u), uvec4(uint(subgroupMin(int(INDEX)))));
    imageStore(_9, int((INDEX * 7u) + 6u), uvec4(uint(subgroupMax(int(INDEX)))));
    float _110 = float(INDEX);
    imageStore(_10, int(INDEX * 7u), uvec4(uint(subgroupAdd(_110))));
    imageStore(_10, int((INDEX * 7u) + 1u), uvec4(uint(subgroupMul(_110))));
    imageStore(_10, int((INDEX * 7u) + 5u), uvec4(uint(subgroupMin(_110))));
    imageStore(_10, int((INDEX * 7u) + 6u), uvec4(uint(subgroupMax(_110))));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 146
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %18 "discard_state"
OpName %138 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %10 NonReadable
OpDecorate %12 Flat
OpDecorate %12 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypePointer Input %5
%12 = OpVariable %11 Input
%14 = OpTypeBool
%16 = OpConstant %5 40
%17 = OpTypePointer Private %14
%18 = OpVariable %17 Private
%19 = OpConstantFalse %14
%21 = OpConstant %5 3
%23 = OpConstant %5 28
%26 = OpConstant %5 7
%27 = OpTypeVector %5 4
%31 = OpConstant %5 4
%35 = OpConstant %5 1
%39 = OpConstant %5 8
%43 = OpConstant %5 2
%47 = OpConstant %5 12
%54 = OpConstant %5 16
%61 = OpConstant %5 20
%65 = OpConstant %5 5
%69 = OpConstant %5 24
%73 = OpConstant %5 6
%109 = OpTypeFloat 32
%137 = OpConstantTrue %14
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %18 %19
OpBranch %134
%134 = OpLabel
%13 = OpLoad %5 %12
%15 = OpIEqual %14 %13 %16
OpSelectionMerge %136 None
OpBranchConditional %15 %135 %136
%135 = OpLabel
OpStore %18 %137
OpBranch %136
%136 = OpLabel
%20 = OpGroupNonUniformIAdd %5 %21 Reduce %13
%22 = OpIMul %5 %13 %23
%24 = OpLoad %6 %8
%25 = OpIMul %5 %13 %26
%28 = OpCompositeConstruct %27 %20 %20 %20 %20
OpImageWrite %24 %25 %28
%29 = OpGroupNonUniformIMul %5 %21 Reduce %13
%30 = OpIAdd %5 %22 %31
%32 = OpLoad %6 %8
%33 = OpIMul %5 %13 %26
%34 = OpIAdd %5 %33 %35
%36 = OpCompositeConstruct %27 %29 %29 %29 %29
OpImageWrite %32 %34 %36
%37 = OpGroupNonUniformBitwiseAnd %5 %21 Reduce %13
%38 = OpIAdd %5 %22 %39
%40 = OpLoad %6 %8
%41 = OpIMul %5 %13 %26
%42 = OpIAdd %5 %41 %43
%44 = OpCompositeConstruct %27 %37 %37 %37 %37
OpImageWrite %40 %42 %44
%45 = OpGroupNonUniformBitwiseOr %5 %21 Reduce %13
%46 = OpIAdd %5 %22 %47
%48 = OpLoad %6 %8
%49 = OpIMul %5 %13 %26
%50 = OpIAdd %5 %49 %21
%51 = OpCompositeConstruct %27 %45 %45 %45 %45
OpImageWrite %48 %50 %51
%52 = OpGroupNonUniformBitwiseXor %5 %21 Reduce %13
%53 = OpIAdd %5 %22 %54
%55 = OpLoad %6 %8
%56 = OpIMul %5 %13 %26
%57 = OpIAdd %5 %56 %31
%58 = OpCompositeConstruct %27 %52 %52 %52 %52
OpImageWrite %55 %57 %58
%59 = OpGroupNonUniformUMin %5 %21 Reduce %13
%60 = OpIAdd %5 %22 %61
%62 = OpLoad %6 %8
%63 = OpIMul %5 %13 %26
%64 = OpIAdd %5 %63 %65
%66 = OpCompositeConstruct %27 %59 %59 %59 %59
OpImageWrite %62 %64 %66
%67 = OpGroupNonUniformUMax %5 %21 Reduce %13
%68 = OpIAdd %5 %22 %69
%70 = OpLoad %6 %8
%71 = OpIMul %5 %13 %26
%72 = OpIAdd %5 %71 %73
%74 = OpCompositeConstruct %27 %67 %67 %67 %67
OpImageWrite %70 %72 %74
%75 = OpGroupNonUniformIAdd %5 %21 Reduce %13
%76 = OpLoad %6 %9
%77 = OpIMul %5 %13 %26
%78 = OpCompositeConstruct %27 %75 %75 %75 %75
OpImageWrite %76 %77 %78
%79 = OpGroupNonUniformIMul %5 %21 Reduce %13
%80 = OpLoad %6 %9
%81 = OpIMul %5 %13 %26
%82 = OpIAdd %5 %81 %35
%83 = OpCompositeConstruct %27 %79 %79 %79 %79
OpImageWrite %80 %82 %83
%84 = OpGroupNonUniformBitwiseAnd %5 %21 Reduce %13
%85 = OpLoad %6 %9
%86 = OpIMul %5 %13 %26
%87 = OpIAdd %5 %86 %43
%88 = OpCompositeConstruct %27 %84 %84 %84 %84
OpImageWrite %85 %87 %88
%89 = OpGroupNonUniformBitwiseOr %5 %21 Reduce %13
%90 = OpLoad %6 %9
%91 = OpIMul %5 %13 %26
%92 = OpIAdd %5 %91 %21
%93 = OpCompositeConstruct %27 %89 %89 %89 %89
OpImageWrite %90 %92 %93
%94 = OpGroupNonUniformBitwiseXor %5 %21 Reduce %13
%95 = OpLoad %6 %9
%96 = OpIMul %5 %13 %26
%97 = OpIAdd %5 %96 %31
%98 = OpCompositeConstruct %27 %94 %94 %94 %94
OpImageWrite %95 %97 %98
%99 = OpGroupNonUniformSMin %5 %21 Reduce %13
%100 = OpLoad %6 %9
%101 = OpIMul %5 %13 %26
%102 = OpIAdd %5 %101 %65
%103 = OpCompositeConstruct %27 %99 %99 %99 %99
OpImageWrite %100 %102 %103
%104 = OpGroupNonUniformSMax %5 %21 Reduce %13
%105 = OpLoad %6 %9
%106 = OpIMul %5 %13 %26
%107 = OpIAdd %5 %106 %73
%108 = OpCompositeConstruct %27 %104 %104 %104 %104
OpImageWrite %105 %107 %108
%110 = OpConvertUToF %109 %13
%111 = OpGroupNonUniformFAdd %109 %21 Reduce %110
%112 = OpConvertFToU %5 %111
%113 = OpLoad %6 %10
%114 = OpIMul %5 %13 %26
%115 = OpCompositeConstruct %27 %112 %112 %112 %112
OpImageWrite %113 %114 %115
%116 = OpGroupNonUniformFMul %109 %21 Reduce %110
%117 = OpConvertFToU %5 %116
%118 = OpLoad %6 %10
%119 = OpIMul %5 %13 %26
%120 = OpIAdd %5 %119 %35
%121 = OpCompositeConstruct %27 %117 %117 %117 %117
OpImageWrite %118 %120 %121
%122 = OpGroupNonUniformFMin %109 %21 Reduce %110
%123 = OpConvertFToU %5 %122
%124 = OpLoad %6 %10
%125 = OpIMul %5 %13 %26
%126 = OpIAdd %5 %125 %65
%127 = OpCompositeConstruct %27 %123 %123 %123 %123
OpImageWrite %124 %126 %127
%128 = OpGroupNonUniformFMax %109 %21 Reduce %110
%129 = OpConvertFToU %5 %128
%130 = OpLoad %6 %10
%131 = OpIMul %5 %13 %26
%132 = OpIAdd %5 %131 %73
%133 = OpCompositeConstruct %27 %129 %129 %129 %129
OpImageWrite %130 %132 %133
%144 = OpFunctionCall %1 %138
OpReturn
OpFunctionEnd
%138 = OpFunction %1 None %2
%139 = OpLabel
%142 = OpLoad %14 %18
OpSelectionMerge %141 None
OpBranchConditional %142 %140 %141
%140 = OpLabel
OpKill
%141 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-reduce-helpers.sm67.quad-maximal-reconvergence.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _10;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    uint _22 = INDEX * 28u;
    imageStore(_8, int(INDEX * 7u), uvec4(subgroupAdd(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 1u), uvec4(subgroupMul(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 2u), uvec4(subgroupAnd(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 3u), uvec4(subgroupOr(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 4u), uvec4(subgroupXor(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 5u), uvec4(subgroupMin(INDEX)));
    imageStore(_8, int((INDEX * 7u) + 6u), uvec4(subgroupMax(INDEX)));
    imageStore(_9, int(INDEX * 7u), uvec4(subgroupAdd(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 1u), uvec4(subgroupMul(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 2u), uvec4(subgroupAnd(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 3u), uvec4(subgroupOr(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 4u), uvec4(subgroupXor(INDEX)));
    imageStore(_9, int((INDEX * 7u) + 5u), uvec4(uint(subgroupMin(int(INDEX)))));
    imageStore(_9, int((INDEX * 7u) + 6u), uvec4(uint(subgroupMax(int(INDEX)))));
    float _110 = float(INDEX);
    imageStore(_10, int(INDEX * 7u), uvec4(uint(subgroupAdd(_110))));
    imageStore(_10, int((INDEX * 7u) + 1u), uvec4(uint(subgroupMul(_110))));
    imageStore(_10, int((INDEX * 7u) + 5u), uvec4(uint(subgroupMin(_110))));
    imageStore(_10, int((INDEX * 7u) + 6u), uvec4(uint(subgroupMax(_110))));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 146
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpExtension "SPV_KHR_maximal_reconvergence"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 MaximallyReconvergesKHR
OpName %3 "main"
OpName %12 "INDEX"
OpName %18 "discard_state"
OpName %138 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %10 NonReadable
OpDecorate %12 Flat
OpDecorate %12 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypePointer Input %5
%12 = OpVariable %11 Input
%14 = OpTypeBool
%16 = OpConstant %5 40
%17 = OpTypePointer Private %14
%18 = OpVariable %17 Private
%19 = OpConstantFalse %14
%21 = OpConstant %5 3
%23 = OpConstant %5 28
%26 = OpConstant %5 7
%27 = OpTypeVector %5 4
%31 = OpConstant %5 4
%35 = OpConstant %5 1
%39 = OpConstant %5 8
%43 = OpConstant %5 2
%47 = OpConstant %5 12
%54 = OpConstant %5 16
%61 = OpConstant %5 20
%65 = OpConstant %5 5
%69 = OpConstant %5 24
%73 = OpConstant %5 6
%109 = OpTypeFloat 32
%137 = OpConstantTrue %14
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %18 %19
OpBranch %134
%134 = OpLabel
%13 = OpLoad %5 %12
%15 = OpIEqual %14 %13 %16
OpSelectionMerge %136 None
OpBranchConditional %15 %135 %136
%135 = OpLabel
OpStore %18 %137
OpBranch %136
%136 = OpLabel
%20 = OpGroupNonUniformIAdd %5 %21 Reduce %13
%22 = OpIMul %5 %13 %23
%24 = OpLoad %6 %8
%25 = OpIMul %5 %13 %26
%28 = OpCompositeConstruct %27 %20 %20 %20 %20
OpImageWrite %24 %25 %28
%29 = OpGroupNonUniformIMul %5 %21 Reduce %13
%30 = OpIAdd %5 %22 %31
%32 = OpLoad %6 %8
%33 = OpIMul %5 %13 %26
%34 = OpIAdd %5 %33 %35
%36 = OpCompositeConstruct %27 %29 %29 %29 %29
OpImageWrite %32 %34 %36
%37 = OpGroupNonUniformBitwiseAnd %5 %21 Reduce %13
%38 = OpIAdd %5 %22 %39
%40 = OpLoad %6 %8
%41 = OpIMul %5 %13 %26
%42 = OpIAdd %5 %41 %43
%44 = OpCompositeConstruct %27 %37 %37 %37 %37
OpImageWrite %40 %42 %44
%45 = OpGroupNonUniformBitwiseOr %5 %21 Reduce %13
%46 = OpIAdd %5 %22 %47
%48 = OpLoad %6 %8
%49 = OpIMul %5 %13 %26
%50 = OpIAdd %5 %49 %21
%51 = OpCompositeConstruct %27 %45 %45 %45 %45
OpImageWrite %48 %50 %51
%52 = OpGroupNonUniformBitwiseXor %5 %21 Reduce %13
%53 = OpIAdd %5 %22 %54
%55 = OpLoad %6 %8
%56 = OpIMul %5 %13 %26
%57 = OpIAdd %5 %56 %31
%58 = OpCompositeConstruct %27 %52 %52 %52 %52
OpImageWrite %55 %57 %58
%59 = OpGroupNonUniformUMin %5 %21 Reduce %13
%60 = OpIAdd %5 %22 %61
%62 = OpLoad %6 %8
%63 = OpIMul %5 %13 %26
%64 = OpIAdd %5 %63 %65
%66 = OpCompositeConstruct %27 %59 %59 %59 %59
OpImageWrite %62 %64 %66
%67 = OpGroupNonUniformUMax %5 %21 Reduce %13
%68 = OpIAdd %5 %22 %69
%70 = OpLoad %6 %8
%71 = OpIMul %5 %13 %26
%72 = OpIAdd %5 %71 %73
%74 = OpCompositeConstruct %27 %67 %67 %67 %67
OpImageWrite %70 %72 %74
%75 = OpGroupNonUniformIAdd %5 %21 Reduce %13
%76 = OpLoad %6 %9
%77 = OpIMul %5 %13 %26
%78 = OpCompositeConstruct %27 %75 %75 %75 %75
OpImageWrite %76 %77 %78
%79 = OpGroupNonUniformIMul %5 %21 Reduce %13
%80 = OpLoad %6 %9
%81 = OpIMul %5 %13 %26
%82 = OpIAdd %5 %81 %35
%83 = OpCompositeConstruct %27 %79 %79 %79 %79
OpImageWrite %80 %82 %83
%84 = OpGroupNonUniformBitwiseAnd %5 %21 Reduce %13
%85 = OpLoad %6 %9
%86 = OpIMul %5 %13 %26
%87 = OpIAdd %5 %86 %43
%88 = OpCompositeConstruct %27 %84 %84 %84 %84
OpImageWrite %85 %87 %88
%89 = OpGroupNonUniformBitwiseOr %5 %21 Reduce %13
%90 = OpLoad %6 %9
%91 = OpIMul %5 %13 %26
%92 = OpIAdd %5 %91 %21
%93 = OpCompositeConstruct %27 %89 %89 %89 %89
OpImageWrite %90 %92 %93
%94 = OpGroupNonUniformBitwiseXor %5 %21 Reduce %13
%95 = OpLoad %6 %9
%96 = OpIMul %5 %13 %26
%97 = OpIAdd %5 %96 %31
%98 = OpCompositeConstruct %27 %94 %94 %94 %94
OpImageWrite %95 %97 %98
%99 = OpGroupNonUniformSMin %5 %21 Reduce %13
%100 = OpLoad %6 %9
%101 = OpIMul %5 %13 %26
%102 = OpIAdd %5 %101 %65
%103 = OpCompositeConstruct %27 %99 %99 %99 %99
OpImageWrite %100 %102 %103
%104 = OpGroupNonUniformSMax %5 %21 Reduce %13
%105 = OpLoad %6 %9
%106 = OpIMul %5 %13 %26
%107 = OpIAdd %5 %106 %73
%108 = OpCompositeConstruct %27 %104 %104 %104 %104
OpImageWrite %105 %107 %108
%110 = OpConvertUToF %109 %13
%111 = OpGroupNonUniformFAdd %109 %21 Reduce %110
%112 = OpConvertFToU %5 %111
%113 = OpLoad %6 %10
%114 = OpIMul %5 %13 %26
%115 = OpCompositeConstruct %27 %112 %112 %112 %112
OpImageWrite %113 %114 %115
%116 = OpGroupNonUniformFMul %109 %21 Reduce %110
%117 = OpConvertFToU %5 %116
%118 = OpLoad %6 %10
%119 = OpIMul %5 %13 %26
%120 = OpIAdd %5 %119 %35
%121 = OpCompositeConstruct %27 %117 %117 %117 %117
OpImageWrite %118 %120 %121
%122 = OpGroupNonUniformFMin %109 %21 Reduce %110
%123 = OpConvertFToU %5 %122
%124 = OpLoad %6 %10
%125 = OpIMul %5 %13 %26
%126 = OpIAdd %5 %125 %65
%127 = OpCompositeConstruct %27 %123 %123 %123 %123
OpImageWrite %124 %126 %127
%128 = OpGroupNonUniformFMax %109 %21 Reduce %110
%129 = OpConvertFToU %5 %128
%130 = OpLoad %6 %10
%131 = OpIMul %5 %13 %26
%132 = OpIAdd %5 %131 %73
%133 = OpCompositeConstruct %27 %129 %129 %129 %129
OpImageWrite %130 %132 %133
%144 = OpFunctionCall %1 %138
OpReturn
OpFunctionEnd
%138 = OpFunction %1 None %2
%139 = OpLabel
%142 = OpLoad %14 %18
OpSelectionMerge %141 None
OpBranchConditional %142 %140 %141
%140 = OpLabel
OpKill
%141 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-reduce.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _10;

void main()
{
    uint _23 = gl_GlobalInvocationID.x * 28u;
    imageStore(_8, int(gl_GlobalInvocationID.x * 7u), uvec4(subgroupAdd(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 7u) + 1u), uvec4(subgroupMul(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 7u) + 2u), uvec4(subgroupAnd(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 7u) + 3u), uvec4(subgroupOr(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 7u) + 4u), uvec4(subgroupXor(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 7u) + 5u), uvec4(subgroupMin(gl_GlobalInvocationID.x)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 7u) + 6u), uvec4(subgroupMax(gl_GlobalInvocationID.x)));
    imageStore(_9, int(gl_GlobalInvocationID.x * 7u), uvec4(subgroupAdd(gl_GlobalInvocationID.x)));
    imageStore(_9, int((gl_GlobalInvocationID.x * 7u) + 1u), uvec4(subgroupMul(gl_GlobalInvocationID.x)));
    imageStore(_9, int((gl_GlobalInvocationID.x * 7u) + 2u), uvec4(subgroupAnd(gl_GlobalInvocationID.x)));
    imageStore(_9, int((gl_GlobalInvocationID.x * 7u) + 3u), uvec4(subgroupOr(gl_GlobalInvocationID.x)));
    imageStore(_9, int((gl_GlobalInvocationID.x * 7u) + 4u), uvec4(subgroupXor(gl_GlobalInvocationID.x)));
    imageStore(_9, int((gl_GlobalInvocationID.x * 7u) + 5u), uvec4(uint(subgroupMin(int(gl_GlobalInvocationID.x)))));
    imageStore(_9, int((gl_GlobalInvocationID.x * 7u) + 6u), uvec4(uint(subgroupMax(int(gl_GlobalInvocationID.x)))));
    float _97 = float(gl_GlobalInvocationID.x);
    imageStore(_10, int(gl_GlobalInvocationID.x * 7u), uvec4(uint(subgroupAdd(_97))));
    imageStore(_10, int((gl_GlobalInvocationID.x * 7u) + 1u), uvec4(uint(subgroupMul(_97))));
    imageStore(_10, int((gl_GlobalInvocationID.x * 7u) + 5u), uvec4(uint(subgroupMin(_97))));
    imageStore(_10, int((gl_GlobalInvocationID.x * 7u) + 6u), uvec4(uint(subgroupMax(_97))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 119
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %10 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 3
%24 = OpConstant %5 28
%26 = OpConstant %5 7
%27 = OpTypeVector %5 4
%31 = OpConstant %5 4
%34 = OpConstant %5 1
%38 = OpConstant %5 8
%41 = OpConstant %5 2
%45 = OpConstant %5 12
%51 = OpConstant %5 16
%57 = OpConstant %5 20
%60 = OpConstant %5 5
%64 = OpConstant %5 24
%67 = OpConstant %5 6
%96 = OpTypeFloat 32
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %117
%117 = OpLabel
%11 = OpLoad %6 %10
%12 = OpLoad %6 %9
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpGroupNonUniformIAdd %5 %22 Reduce %20
%23 = OpIMul %5 %20 %24
%25 = OpIMul %5 %20 %26
%28 = OpCompositeConstruct %27 %21 %21 %21 %21
OpImageWrite %13 %25 %28
%29 = OpGroupNonUniformIMul %5 %22 Reduce %20
%30 = OpIAdd %5 %23 %31
%32 = OpIMul %5 %20 %26
%33 = OpIAdd %5 %32 %34
%35 = OpCompositeConstruct %27 %29 %29 %29 %29
OpImageWrite %13 %33 %35
%36 = OpGroupNonUniformBitwiseAnd %5 %22 Reduce %20
%37 = OpIAdd %5 %23 %38
%39 = OpIMul %5 %20 %26
%40 = OpIAdd %5 %39 %41
%42 = OpCompositeConstruct %27 %36 %36 %36 %36
OpImageWrite %13 %40 %42
%43 = OpGroupNonUniformBitwiseOr %5 %22 Reduce %20
%44 = OpIAdd %5 %23 %45
%46 = OpIMul %5 %20 %26
%47 = OpIAdd %5 %46 %22
%48 = OpCompositeConstruct %27 %43 %43 %43 %43
OpImageWrite %13 %47 %48
%49 = OpGroupNonUniformBitwiseXor %5 %22 Reduce %20
%50 = OpIAdd %5 %23 %51
%52 = OpIMul %5 %20 %26
%53 = OpIAdd %5 %52 %31
%54 = OpCompositeConstruct %27 %49 %49 %49 %49
OpImageWrite %13 %53 %54
%55 = OpGroupNonUniformUMin %5 %22 Reduce %20
%56 = OpIAdd %5 %23 %57
%58 = OpIMul %5 %20 %26
%59 = OpIAdd %5 %58 %60
%61 = OpCompositeConstruct %27 %55 %55 %55 %55
OpImageWrite %13 %59 %61
%62 = OpGroupNonUniformUMax %5 %22 Reduce %20
%63 = OpIAdd %5 %23 %64
%65 = OpIMul %5 %20 %26
%66 = OpIAdd %5 %65 %67
%68 = OpCompositeConstruct %27 %62 %62 %62 %62
OpImageWrite %13 %66 %68
%69 = OpGroupNonUniformIAdd %5 %22 Reduce %20
%70 = OpIMul %5 %20 %26
%71 = OpCompositeConstruct %27 %69 %69 %69 %69
OpImageWrite %12 %70 %71
%72 = OpGroupNonUniformIMul %5 %22 Reduce %20
%73 = OpIMul %5 %20 %26
%74 = OpIAdd %5 %73 %34
%75 = OpCompositeConstruct %27 %72 %72 %72 %72
OpImageWrite %12 %74 %75
%76 = OpGroupNonUniformBitwiseAnd %5 %22 Reduce %20
%77 = OpIMul %5 %20 %26
%78 = OpIAdd %5 %77 %41
%79 = OpCompositeConstruct %27 %76 %76 %76 %76
OpImageWrite %12 %78 %79
%80 = OpGroupNonUniformBitwiseOr %5 %22 Reduce %20
%81 = OpIMul %5 %20 %26
%82 = OpIAdd %5 %81 %22
%83 = OpCompositeConstruct %27 %80 %80 %80 %80
OpImageWrite %12 %82 %83
%84 = OpGroupNonUniformBitwiseXor %5 %22 Reduce %20
%85 = OpIMul %5 %20 %26
%86 = OpIAdd %5 %85 %31
%87 = OpCompositeConstruct %27 %84 %84 %84 %84
OpImageWrite %12 %86 %87
%88 = OpGroupNonUniformSMin %5 %22 Reduce %20
%89 = OpIMul %5 %20 %26
%90 = OpIAdd %5 %89 %60
%91 = OpCompositeConstruct %27 %88 %88 %88 %88
OpImageWrite %12 %90 %91
%92 = OpGroupNonUniformSMax %5 %22 Reduce %20
%93 = OpIMul %5 %20 %26
%94 = OpIAdd %5 %93 %67
%95 = OpCompositeConstruct %27 %92 %92 %92 %92
OpImageWrite %12 %94 %95
%97 = OpConvertUToF %96 %20
%98 = OpGroupNonUniformFAdd %96 %22 Reduce %97
%99 = OpConvertFToU %5 %98
%100 = OpIMul %5 %20 %26
%101 = OpCompositeConstruct %27 %99 %99 %99 %99
OpImageWrite %11 %100 %101
%102 = OpGroupNonUniformFMul %96 %22 Reduce %97
%103 = OpConvertFToU %5 %102
%104 = OpIMul %5 %20 %26
%105 = OpIAdd %5 %104 %34
%106 = OpCompositeConstruct %27 %103 %103 %103 %103
OpImageWrite %11 %105 %106
%107 = OpGroupNonUniformFMin %96 %22 Reduce %97
%108 = OpConvertFToU %5 %107
%109 = OpIMul %5 %20 %26
%110 = OpIAdd %5 %109 %60
%111 = OpCompositeConstruct %27 %108 %108 %108 %108
OpImageWrite %11 %110 %111
%112 = OpGroupNonUniformFMax %96 %22 Reduce %97
%113 = OpConvertFToU %5 %112
%114 = OpIMul %5 %20 %26
%115 = OpIAdd %5 %114 %67
%116 = OpCompositeConstruct %27 %113 %113 %113 %113
OpImageWrite %11 %115 %116
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-reduce.frag
================================================
#version 460
#extension GL_KHR_shader_subgroup_arithmetic : require

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _9;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimageBuffer _10;

layout(location = 0) flat in uint INDEX;
bool discard_state;

void discard_exit()
{
    if (discard_state)
    {
        discard;
    }
}

void main()
{
    discard_state = false;
    if (INDEX == 40u)
    {
        discard_state = true;
    }
    uint _28 = INDEX * 28u;
    imageStore(_8, int(INDEX * 7u), uvec4(subgroupAdd((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_8, int((INDEX * 7u) + 1u), uvec4(subgroupMul((gl_HelperInvocation || discard_state) ? 1u : INDEX)));
    imageStore(_8, int((INDEX * 7u) + 2u), uvec4(subgroupAnd((gl_HelperInvocation || discard_state) ? 4294967295u : INDEX)));
    imageStore(_8, int((INDEX * 7u) + 3u), uvec4(subgroupOr((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_8, int((INDEX * 7u) + 4u), uvec4(subgroupXor((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_8, int((INDEX * 7u) + 5u), uvec4(subgroupMin((gl_HelperInvocation || discard_state) ? 4294967295u : INDEX)));
    imageStore(_8, int((INDEX * 7u) + 6u), uvec4(subgroupMax((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_9, int(INDEX * 7u), uvec4(subgroupAdd((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_9, int((INDEX * 7u) + 1u), uvec4(subgroupMul((gl_HelperInvocation || discard_state) ? 1u : INDEX)));
    imageStore(_9, int((INDEX * 7u) + 2u), uvec4(subgroupAnd((gl_HelperInvocation || discard_state) ? 4294967295u : INDEX)));
    imageStore(_9, int((INDEX * 7u) + 3u), uvec4(subgroupOr((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_9, int((INDEX * 7u) + 4u), uvec4(subgroupXor((gl_HelperInvocation || discard_state) ? 0u : INDEX)));
    imageStore(_9, int((INDEX * 7u) + 5u), uvec4(uint(subgroupMin(int((gl_HelperInvocation || discard_state) ? 2147483647u : INDEX)))));
    imageStore(_9, int((INDEX * 7u) + 6u), uvec4(uint(subgroupMax(int((gl_HelperInvocation || discard_state) ? 2147483648u : INDEX)))));
    float _131 = float(INDEX);
    imageStore(_10, int(INDEX * 7u), uvec4(uint(subgroupAdd((gl_HelperInvocation || discard_state) ? 0.0 : _131))));
    imageStore(_10, int((INDEX * 7u) + 1u), uvec4(uint(subgroupMul((gl_HelperInvocation || discard_state) ? 1.0 : _131))));
    imageStore(_10, int((INDEX * 7u) + 5u), uvec4(uint(subgroupMin((gl_HelperInvocation || discard_state) ? uintBitsToFloat(0x7f800000u /* inf */) : _131))));
    imageStore(_10, int((INDEX * 7u) + 6u), uvec4(uint(subgroupMax((gl_HelperInvocation || discard_state) ? uintBitsToFloat(0xff800000u /* -inf */) : _131))));
    discard_exit();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 213
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability GroupNonUniformArithmetic
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %168
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %21 "discard_state"
OpName %205 "discard_exit"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 NonReadable
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %10 NonReadable
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %168 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypePointer Input %5
%12 = OpVariable %11 Input
%17 = OpTypeBool
%19 = OpConstant %5 40
%20 = OpTypePointer Private %17
%21 = OpVariable %20 Private
%22 = OpConstantFalse %17
%24 = OpConstant %5 3
%26 = OpConstant %5 0
%29 = OpConstant %5 28
%31 = OpConstant %5 7
%32 = OpTypeVector %5 4
%36 = OpConstant %5 1
%39 = OpConstant %5 4
%45 = OpConstant %5 4294967295
%48 = OpConstant %5 8
%51 = OpConstant %5 2
%57 = OpConstant %5 12
%65 = OpConstant %5 16
%73 = OpConstant %5 20
%76 = OpConstant %5 5
%82 = OpConstant %5 24
%85 = OpConstant %5 6
%118 = OpConstant %5 2147483647
%125 = OpConstant %5 2147483648
%130 = OpTypeFloat 32
%134 = OpConstant %130 0
%141 = OpConstant %130 1
%149 = OpConstant %130 0x1p+128
%157 = OpConstant %130 -0x1p+128
%166 = OpConstantTrue %17
%167 = OpTypePointer Input %17
%168 = OpVariable %167 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %21 %22
OpBranch %163
%163 = OpLabel
%13 = OpLoad %6 %10
%14 = OpLoad %6 %9
%15 = OpLoad %6 %8
%16 = OpLoad %5 %12
%18 = OpIEqual %17 %16 %19
OpSelectionMerge %165 None
OpBranchConditional %18 %164 %165
%164 = OpLabel
OpStore %21 %166
OpBranch %165
%165 = OpLabel
%169 = OpLoad %17 %168
%170 = OpLoad %17 %21
%25 = OpLogicalOr %17 %169 %170
%27 = OpSelect %5 %25 %26 %16
%23 = OpGroupNonUniformIAdd %5 %24 Reduce %27
%28 = OpIMul %5 %16 %29
%30 = OpIMul %5 %16 %31
%33 = OpCompositeConstruct %32 %23 %23 %23 %23
OpImageWrite %15 %30 %33
%171 = OpLoad %17 %168
%172 = OpLoad %17 %21
%35 = OpLogicalOr %17 %171 %172
%37 = OpSelect %5 %35 %36 %16
%34 = OpGroupNonUniformIMul %5 %24 Reduce %37
%38 = OpIAdd %5 %28 %39
%40 = OpIMul %5 %16 %31
%41 = OpIAdd %5 %40 %36
%42 = OpCompositeConstruct %32 %34 %34 %34 %34
OpImageWrite %15 %41 %42
%173 = OpLoad %17 %168
%174 = OpLoad %17 %21
%44 = OpLogicalOr %17 %173 %174
%46 = OpSelect %5 %44 %45 %16
%43 = OpGroupNonUniformBitwiseAnd %5 %24 Reduce %46
%47 = OpIAdd %5 %28 %48
%49 = OpIMul %5 %16 %31
%50 = OpIAdd %5 %49 %51
%52 = OpCompositeConstruct %32 %43 %43 %43 %43
OpImageWrite %15 %50 %52
%175 = OpLoad %17 %168
%176 = OpLoad %17 %21
%54 = OpLogicalOr %17 %175 %176
%55 = OpSelect %5 %54 %26 %16
%53 = OpGroupNonUniformBitwiseOr %5 %24 Reduce %55
%56 = OpIAdd %5 %28 %57
%58 = OpIMul %5 %16 %31
%59 = OpIAdd %5 %58 %24
%60 = OpCompositeConstruct %32 %53 %53 %53 %53
OpImageWrite %15 %59 %60
%177 = OpLoad %17 %168
%178 = OpLoad %17 %21
%62 = OpLogicalOr %17 %177 %178
%63 = OpSelect %5 %62 %26 %16
%61 = OpGroupNonUniformBitwiseXor %5 %24 Reduce %63
%64 = OpIAdd %5 %28 %65
%66 = OpIMul %5 %16 %31
%67 = OpIAdd %5 %66 %39
%68 = OpCompositeConstruct %32 %61 %61 %61 %61
OpImageWrite %15 %67 %68
%179 = OpLoad %17 %168
%180 = OpLoad %17 %21
%70 = OpLogicalOr %17 %179 %180
%71 = OpSelect %5 %70 %45 %16
%69 = OpGroupNonUniformUMin %5 %24 Reduce %71
%72 = OpIAdd %5 %28 %73
%74 = OpIMul %5 %16 %31
%75 = OpIAdd %5 %74 %76
%77 = OpCompositeConstruct %32 %69 %69 %69 %69
OpImageWrite %15 %75 %77
%181 = OpLoad %17 %168
%182 = OpLoad %17 %21
%79 = OpLogicalOr %17 %181 %182
%80 = OpSelect %5 %79 %26 %16
%78 = OpGroupNonUniformUMax %5 %24 Reduce %80
%81 = OpIAdd %5 %28 %82
%83 = OpIMul %5 %16 %31
%84 = OpIAdd %5 %83 %85
%86 = OpCompositeConstruct %32 %78 %78 %78 %78
OpImageWrite %15 %84 %86
%183 = OpLoad %17 %168
%184 = OpLoad %17 %21
%88 = OpLogicalOr %17 %183 %184
%89 = OpSelect %5 %88 %26 %16
%87 = OpGroupNonUniformIAdd %5 %24 Reduce %89
%90 = OpIMul %5 %16 %31
%91 = OpCompositeConstruct %32 %87 %87 %87 %87
OpImageWrite %14 %90 %91
%185 = OpLoad %17 %168
%186 = OpLoad %17 %21
%93 = OpLogicalOr %17 %185 %186
%94 = OpSelect %5 %93 %36 %16
%92 = OpGroupNonUniformIMul %5 %24 Reduce %94
%95 = OpIMul %5 %16 %31
%96 = OpIAdd %5 %95 %36
%97 = OpCompositeConstruct %32 %92 %92 %92 %92
OpImageWrite %14 %96 %97
%187 = OpLoad %17 %168
%188 = OpLoad %17 %21
%99 = OpLogicalOr %17 %187 %188
%100 = OpSelect %5 %99 %45 %16
%98 = OpGroupNonUniformBitwiseAnd %5 %24 Reduce %100
%101 = OpIMul %5 %16 %31
%102 = OpIAdd %5 %101 %51
%103 = OpCompositeConstruct %32 %98 %98 %98 %98
OpImageWrite %14 %102 %103
%189 = OpLoad %17 %168
%190 = OpLoad %17 %21
%105 = OpLogicalOr %17 %189 %190
%106 = OpSelect %5 %105 %26 %16
%104 = OpGroupNonUniformBitwiseOr %5 %24 Reduce %106
%107 = OpIMul %5 %16 %31
%108 = OpIAdd %5 %107 %24
%109 = OpCompositeConstruct %32 %104 %104 %104 %104
OpImageWrite %14 %108 %109
%191 = OpLoad %17 %168
%192 = OpLoad %17 %21
%111 = OpLogicalOr %17 %191 %192
%112 = OpSelect %5 %111 %26 %16
%110 = OpGroupNonUniformBitwiseXor %5 %24 Reduce %112
%113 = OpIMul %5 %16 %31
%114 = OpIAdd %5 %113 %39
%115 = OpCompositeConstruct %32 %110 %110 %110 %110
OpImageWrite %14 %114 %115
%193 = OpLoad %17 %168
%194 = OpLoad %17 %21
%117 = OpLogicalOr %17 %193 %194
%119 = OpSelect %5 %117 %118 %16
%116 = OpGroupNonUniformSMin %5 %24 Reduce %119
%120 = OpIMul %5 %16 %31
%121 = OpIAdd %5 %120 %76
%122 = OpCompositeConstruct %32 %116 %116 %116 %116
OpImageWrite %14 %121 %122
%195 = OpLoad %17 %168
%196 = OpLoad %17 %21
%124 = OpLogicalOr %17 %195 %196
%126 = OpSelect %5 %124 %125 %16
%123 = OpGroupNonUniformSMax %5 %24 Reduce %126
%127 = OpIMul %5 %16 %31
%128 = OpIAdd %5 %127 %85
%129 = OpCompositeConstruct %32 %123 %123 %123 %123
OpImageWrite %14 %128 %129
%131 = OpConvertUToF %130 %16
%197 = OpLoad %17 %168
%198 = OpLoad %17 %21
%133 = OpLogicalOr %17 %197 %198
%135 = OpSelect %130 %133 %134 %131
%132 = OpGroupNonUniformFAdd %130 %24 Reduce %135
%136 = OpConvertFToU %5 %132
%137 = OpIMul %5 %16 %31
%138 = OpCompositeConstruct %32 %136 %136 %136 %136
OpImageWrite %13 %137 %138
%199 = OpLoad %17 %168
%200 = OpLoad %17 %21
%140 = OpLogicalOr %17 %199 %200
%142 = OpSelect %130 %140 %141 %131
%139 = OpGroupNonUniformFMul %130 %24 Reduce %142
%143 = OpConvertFToU %5 %139
%144 = OpIMul %5 %16 %31
%145 = OpIAdd %5 %144 %36
%146 = OpCompositeConstruct %32 %143 %143 %143 %143
OpImageWrite %13 %145 %146
%201 = OpLoad %17 %168
%202 = OpLoad %17 %21
%148 = OpLogicalOr %17 %201 %202
%150 = OpSelect %130 %148 %149 %131
%147 = OpGroupNonUniformFMin %130 %24 Reduce %150
%151 = OpConvertFToU %5 %147
%152 = OpIMul %5 %16 %31
%153 = OpIAdd %5 %152 %76
%154 = OpCompositeConstruct %32 %151 %151 %151 %151
OpImageWrite %13 %153 %154
%203 = OpLoad %17 %168
%204 = OpLoad %17 %21
%156 = OpLogicalOr %17 %203 %204
%158 = OpSelect %130 %156 %157 %131
%155 = OpGroupNonUniformFMax %130 %24 Reduce %158
%159 = OpConvertFToU %5 %155
%160 = OpIMul %5 %16 %31
%161 = OpIAdd %5 %160 %85
%162 = OpCompositeConstruct %32 %159 %159 %159 %159
OpImageWrite %13 %161 %162
%211 = OpFunctionCall %1 %205
OpReturn
OpFunctionEnd
%205 = OpFunction %1 None %2
%206 = OpLabel
%209 = OpLoad %17 %21
OpSelectionMerge %208 None
OpBranchConditional %209 %207 %208
%207 = OpLabel
OpKill
%208 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/wave-size.sm66.comp
================================================
#version 460
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(10.0)));
}


#if 0
// SPIR-V disassembly
// WaveSize(128)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 24
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %11
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %11 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 3
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %5
%14 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpConstant %17 10
%20 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %22
%22 = OpLabel
%13 = OpAccessChain %12 %11 %14
%15 = OpLoad %5 %13
%16 = OpLoad %6 %8
%19 = OpBitcast %5 %18
%21 = OpCompositeConstruct %20 %19 %19 %19 %19
OpImageWrite %16 %15 %21
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/world-ray-direction.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _22;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _21;
    _21.x = gl_WorldRayDirectionEXT.x;
    _21.y = gl_WorldRayDirectionEXT.y;
    _21.z = gl_WorldRayDirectionEXT.z;
    payload._m0 = _21;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %15
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn WorldRayDirectionKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %6
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %5
%18 = OpTypeInt 32 0
%19 = OpConstant %18 0
%24 = OpConstant %18 1
%28 = OpConstant %18 2
%31 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%22 = OpUndef %6
OpBranch %33
%33 = OpLabel
%17 = OpAccessChain %16 %15 %19
%20 = OpLoad %5 %17
%21 = OpCompositeInsert %6 %20 %22 0
%23 = OpAccessChain %16 %15 %24
%25 = OpLoad %5 %23
%26 = OpCompositeInsert %6 %25 %21 1
%27 = OpAccessChain %16 %15 %28
%29 = OpLoad %5 %27
%30 = OpCompositeInsert %6 %29 %26 2
%32 = OpInBoundsAccessChain %31 %9 %19
OpStore %32 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/world-ray-origin.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _22;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _21;
    _21.x = gl_WorldRayOriginEXT.x;
    _21.y = gl_WorldRayOriginEXT.y;
    _21.z = gl_WorldRayOriginEXT.z;
    payload._m0 = _21;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %15
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %15 BuiltIn WorldRayOriginKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer Input %6
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %5
%18 = OpTypeInt 32 0
%19 = OpConstant %18 0
%24 = OpConstant %18 1
%28 = OpConstant %18 2
%31 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%22 = OpUndef %6
OpBranch %33
%33 = OpLabel
%17 = OpAccessChain %16 %15 %19
%20 = OpLoad %5 %17
%21 = OpCompositeInsert %6 %20 %22 0
%23 = OpAccessChain %16 %15 %24
%25 = OpLoad %5 %23
%26 = OpCompositeInsert %6 %25 %21 1
%27 = OpAccessChain %16 %15 %28
%29 = OpLoad %5 %27
%30 = OpCompositeInsert %6 %29 %26 2
%32 = OpInBoundsAccessChain %31 %9 %19
OpStore %32 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/world-to-object-3x4.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _59;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _58;
    _58.x = fma(gl_WorldToObjectEXT[2u].x, payload._m0.z, fma(gl_WorldToObjectEXT[1u].x, payload._m0.y, gl_WorldToObjectEXT[0u].x * payload._m0.x)) + gl_WorldToObjectEXT[3u].x;
    _58.y = fma(gl_WorldToObjectEXT[2u].y, payload._m0.z, fma(gl_WorldToObjectEXT[1u].y, payload._m0.y, gl_WorldToObjectEXT[0u].y * payload._m0.x)) + gl_WorldToObjectEXT[3u].y;
    _58.z = fma(gl_WorldToObjectEXT[2u].z, payload._m0.z, fma(gl_WorldToObjectEXT[1u].z, payload._m0.y, gl_WorldToObjectEXT[0u].z * payload._m0.x)) + gl_WorldToObjectEXT[3u].z;
    payload._m0 = _58;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
%54 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %24
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %24 BuiltIn WorldToObjectKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer IncomingRayPayloadKHR %6
%16 = OpTypeInt 32 0
%17 = OpConstant %16 0
%22 = OpTypeMatrix %6 4
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %5
%29 = OpConstant %16 1
%32 = OpConstant %16 2
%35 = OpConstant %16 3
%3 = OpFunction %1 None %2
%4 = OpLabel
%59 = OpUndef %6
OpBranch %70
%70 = OpLabel
%15 = OpInBoundsAccessChain %14 %9 %17
%18 = OpLoad %6 %15
%19 = OpCompositeExtract %5 %18 0
%20 = OpCompositeExtract %5 %18 1
%21 = OpCompositeExtract %5 %18 2
%26 = OpAccessChain %25 %24 %17 %17
%27 = OpLoad %5 %26
%28 = OpAccessChain %25 %24 %29 %17
%30 = OpLoad %5 %28
%31 = OpAccessChain %25 %24 %32 %17
%33 = OpLoad %5 %31
%34 = OpAccessChain %25 %24 %35 %17
%36 = OpLoad %5 %34
%37 = OpAccessChain %25 %24 %17 %29
%38 = OpLoad %5 %37
%39 = OpAccessChain %25 %24 %29 %29
%40 = OpLoad %5 %39
%41 = OpAccessChain %25 %24 %32 %29
%42 = OpLoad %5 %41
%43 = OpAccessChain %25 %24 %35 %29
%44 = OpLoad %5 %43
%45 = OpAccessChain %25 %24 %17 %32
%46 = OpLoad %5 %45
%47 = OpAccessChain %25 %24 %29 %32
%48 = OpLoad %5 %47
%49 = OpAccessChain %25 %24 %32 %32
%50 = OpLoad %5 %49
%51 = OpAccessChain %25 %24 %35 %32
%52 = OpLoad %5 %51
%53 = OpFMul %5 %27 %19
%55 = OpExtInst %5 %54 Fma %30 %20 %53
%56 = OpExtInst %5 %54 Fma %33 %21 %55
%57 = OpFAdd %5 %56 %36
%58 = OpCompositeInsert %6 %57 %59 0
%60 = OpFMul %5 %38 %19
%61 = OpExtInst %5 %54 Fma %40 %20 %60
%62 = OpExtInst %5 %54 Fma %42 %21 %61
%63 = OpFAdd %5 %62 %44
%64 = OpCompositeInsert %6 %63 %58 1
%65 = OpFMul %5 %46 %19
%66 = OpExtInst %5 %54 Fma %48 %20 %65
%67 = OpExtInst %5 %54 Fma %50 %21 %66
%68 = OpFAdd %5 %67 %52
%69 = OpCompositeInsert %6 %68 %64 2
OpStore %15 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/dxil-builtin/world-to-object-4x3.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec3 _m0;
};

struct _11
{
    vec2 _m0;
};

vec3 _59;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    vec3 _58;
    _58.x = fma(payload._m0.z, gl_WorldToObjectEXT[2u].x, fma(payload._m0.y, gl_WorldToObjectEXT[1u].x, payload._m0.x * gl_WorldToObjectEXT[0u].x)) + gl_WorldToObjectEXT[3u].x;
    _58.y = fma(payload._m0.z, gl_WorldToObjectEXT[2u].y, fma(payload._m0.y, gl_WorldToObjectEXT[1u].y, payload._m0.x * gl_WorldToObjectEXT[0u].y)) + gl_WorldToObjectEXT[3u].y;
    _58.z = fma(payload._m0.z, gl_WorldToObjectEXT[2u].z, fma(payload._m0.y, gl_WorldToObjectEXT[1u].z, payload._m0.x * gl_WorldToObjectEXT[0u].z)) + gl_WorldToObjectEXT[3u].z;
    payload._m0 = _58;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
%54 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %9 %13 %16
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %16 BuiltIn WorldToObjectKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypeMatrix %6 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpTypeInt 32 0
%20 = OpConstant %19 0
%23 = OpConstant %19 1
%26 = OpConstant %19 2
%41 = OpConstant %19 3
%47 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%59 = OpUndef %6
OpBranch %70
%70 = OpLabel
%18 = OpAccessChain %17 %16 %20 %20
%21 = OpLoad %5 %18
%22 = OpAccessChain %17 %16 %20 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %17 %16 %20 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %17 %16 %23 %20
%29 = OpLoad %5 %28
%30 = OpAccessChain %17 %16 %23 %23
%31 = OpLoad %5 %30
%32 = OpAccessChain %17 %16 %23 %26
%33 = OpLoad %5 %32
%34 = OpAccessChain %17 %16 %26 %20
%35 = OpLoad %5 %34
%36 = OpAccessChain %17 %16 %26 %23
%37 = OpLoad %5 %36
%38 = OpAccessChain %17 %16 %26 %26
%39 = OpLoad %5 %38
%40 = OpAccessChain %17 %16 %41 %20
%42 = OpLoad %5 %40
%43 = OpAccessChain %17 %16 %41 %23
%44 = OpLoad %5 %43
%45 = OpAccessChain %17 %16 %41 %26
%46 = OpLoad %5 %45
%48 = OpInBoundsAccessChain %47 %9 %20
%49 = OpLoad %6 %48
%50 = OpCompositeExtract %5 %49 0
%51 = OpCompositeExtract %5 %49 1
%52 = OpCompositeExtract %5 %49 2
%53 = OpFMul %5 %50 %21
%55 = OpExtInst %5 %54 Fma %51 %29 %53
%56 = OpExtInst %5 %54 Fma %52 %35 %55
%57 = OpFAdd %5 %56 %42
%58 = OpCompositeInsert %6 %57 %59 0
%60 = OpFMul %5 %50 %24
%61 = OpExtInst %5 %54 Fma %51 %31 %60
%62 = OpExtInst %5 %54 Fma %52 %37 %61
%63 = OpFAdd %5 %62 %44
%64 = OpCompositeInsert %6 %63 %58 1
%65 = OpFMul %5 %50 %27
%66 = OpExtInst %5 %54 Fma %51 %33 %65
%67 = OpExtInst %5 %54 Fma %52 %39 %66
%68 = OpFAdd %5 %67 %46
%69 = OpCompositeInsert %6 %68 %64 2
OpStore %48 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/fp16/saturate.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_spirv_intrinsics : require

layout(location = 0) in mediump vec4 V;
layout(location = 0) out mediump vec4 SV_Target;

spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

void main()
{
    SV_Target.x = float(spvNClamp(float16_t(V.x), float16_t(0.0), float16_t(1.0)));
    SV_Target.y = float(spvNClamp(float16_t(V.y), float16_t(0.0), float16_t(1.0)));
    SV_Target.z = float(spvNClamp(float16_t(V.z), float16_t(0.0), float16_t(1.0)));
    SV_Target.w = float(spvNClamp(float16_t(V.w), float16_t(0.0), float16_t(1.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
%30 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "V"
OpName %10 "SV_Target"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 Location 0
OpDecorate %10 RelaxedPrecision
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%16 = OpTypeFloat 16
%19 = OpConstant %13 1
%23 = OpConstant %13 2
%27 = OpConstant %13 3
%31 = OpConstant %16 0x0p+0
%32 = OpConstant %16 0x1p+0
%37 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%17 = OpFConvert %16 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpFConvert %16 %20
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%25 = OpFConvert %16 %24
%26 = OpAccessChain %11 %8 %27
%28 = OpLoad %5 %26
%29 = OpFConvert %16 %28
%33 = OpExtInst %16 %30 NClamp %17 %31 %32
%34 = OpExtInst %16 %30 NClamp %21 %31 %32
%35 = OpExtInst %16 %30 NClamp %25 %31 %32
%36 = OpExtInst %16 %30 NClamp %29 %31 %32
%38 = OpAccessChain %37 %10 %14
%39 = OpFConvert %5 %33
OpStore %38 %39
%40 = OpAccessChain %37 %10 %19
%41 = OpFConvert %5 %34
OpStore %40 %41
%42 = OpAccessChain %37 %10 %23
%43 = OpFConvert %5 %35
OpStore %42 %43
%44 = OpAccessChain %37 %10 %27
%45 = OpFConvert %5 %36
OpStore %44 %45
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/fp16/saturate.sm60.frag
================================================
#version 460
#extension GL_EXT_spirv_intrinsics : require

layout(location = 0) in mediump vec4 V;
layout(location = 0) out mediump vec4 SV_Target;

spirv_instruction(set = "GLSL.std.450", id = 81) float spvNClamp(float, float, float);
spirv_instruction(set = "GLSL.std.450", id = 81) vec2 spvNClamp(vec2, vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) vec3 spvNClamp(vec3, vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) vec4 spvNClamp(vec4, vec4, vec4);

mediump float spvNClampRelaxed(mediump float a, mediump float b, mediump float c)
{
    mediump float res = spvNClamp(a, b, c);
    return res;
}

mediump vec2 spvNClampRelaxed(mediump vec2 a, mediump vec2 b, mediump vec2 c)
{
    mediump vec2 res = spvNClamp(a, b, c);
    return res;
}

mediump vec3 spvNClampRelaxed(mediump vec3 a, mediump vec3 b, mediump vec3 c)
{
    mediump vec3 res = spvNClamp(a, b, c);
    return res;
}

mediump vec4 spvNClampRelaxed(mediump vec4 a, mediump vec4 b, mediump vec4 c)
{
    mediump vec4 res = spvNClamp(a, b, c);
    return res;
}

void main()
{
    SV_Target.x = spvNClampRelaxed(V.x, 0.0, 1.0);
    SV_Target.y = spvNClampRelaxed(V.y, 0.0, 1.0);
    SV_Target.z = spvNClampRelaxed(V.z, 0.0, 1.0);
    SV_Target.w = spvNClampRelaxed(V.w, 0.0, 1.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
OpCapability Shader
%25 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "V"
OpName %10 "SV_Target"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 Location 0
OpDecorate %10 RelaxedPrecision
OpDecorate %10 Location 0
OpDecorate %28 RelaxedPrecision
OpDecorate %29 RelaxedPrecision
OpDecorate %30 RelaxedPrecision
OpDecorate %31 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%26 = OpConstant %5 0
%27 = OpConstant %5 1
%32 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%28 = OpExtInst %5 %25 NClamp %15 %26 %27
%29 = OpExtInst %5 %25 NClamp %18 %26 %27
%30 = OpExtInst %5 %25 NClamp %21 %26 %27
%31 = OpExtInst %5 %25 NClamp %24 %26 %27
%33 = OpAccessChain %32 %10 %14
OpStore %33 %28
%34 = OpAccessChain %32 %10 %17
OpStore %34 %29
%35 = OpAccessChain %32 %10 %20
OpStore %35 %30
%36 = OpAccessChain %32 %10 %23
OpStore %36 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/fp16/saturate.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_spirv_intrinsics : require

layout(location = 0) in mediump vec4 V;
layout(location = 0) out mediump vec4 SV_Target;

spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

void main()
{
    SV_Target.x = float(spvNClamp(float16_t(V.x), float16_t(0.0), float16_t(1.0)));
    SV_Target.y = float(spvNClamp(float16_t(V.y), float16_t(0.0), float16_t(1.0)));
    SV_Target.z = float(spvNClamp(float16_t(V.z), float16_t(0.0), float16_t(1.0)));
    SV_Target.w = float(spvNClamp(float16_t(V.w), float16_t(0.0), float16_t(1.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
%30 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "V"
OpName %10 "SV_Target"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 Location 0
OpDecorate %10 RelaxedPrecision
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%16 = OpTypeFloat 16
%19 = OpConstant %13 1
%23 = OpConstant %13 2
%27 = OpConstant %13 3
%31 = OpConstant %16 0x0p+0
%32 = OpConstant %16 0x1p+0
%37 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%17 = OpFConvert %16 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpFConvert %16 %20
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%25 = OpFConvert %16 %24
%26 = OpAccessChain %11 %8 %27
%28 = OpLoad %5 %26
%29 = OpFConvert %16 %28
%33 = OpExtInst %16 %30 NClamp %17 %31 %32
%34 = OpExtInst %16 %30 NClamp %21 %31 %32
%35 = OpExtInst %16 %30 NClamp %25 %31 %32
%36 = OpExtInst %16 %30 NClamp %29 %31 %32
%38 = OpAccessChain %37 %10 %14
%39 = OpFConvert %5 %33
OpStore %38 %39
%40 = OpAccessChain %37 %10 %19
%41 = OpFConvert %5 %34
OpStore %40 %41
%42 = OpAccessChain %37 %10 %23
%43 = OpFConvert %5 %35
OpStore %42 %43
%44 = OpAccessChain %37 %10 %27
%45 = OpFConvert %5 %36
OpStore %44 %45
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/heap-robustness/misc.bindless.heap-raw-va-cbv.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer DescriptorHeapRawBlock;
layout(buffer_reference) buffer uintPointer;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(buffer_reference, buffer_reference_align = 8, std430) readonly buffer DescriptorHeapRawBlock
{
    DescriptorHeapRawPayload descriptors[];
};

layout(set = 10, binding = 21, std140) uniform DescriptorHeapRawPayloadPtr
{
    DescriptorHeapRawBlock ptr;
} DescriptorHeapRaw;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _41[];

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _48[];

layout(set = 4, binding = 0, std430) writeonly buffer _56_59
{
    uvec4 _m0[];
} _59[];

layout(set = 5, binding = 0, std140) uniform _61_64
{
    vec4 _m0[4096];
} _64[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 0, binding = 0) uniform sampler _17[];
layout(set = 0, binding = 0) uniform writeonly image2D _21[];
layout(set = 0, binding = 0) uniform writeonly imageBuffer _25[];
layout(set = 3, binding = 0) uniform writeonly image2D _51[];
layout(set = 4, binding = 0) uniform writeonly imageBuffer _54[];
layout(set = 2, binding = 0) uniform sampler _67[];

uint RobustPhysicalAtomicCounter(uvec2 _208, uint _209, uint _210)
{
    uint _224;
    if (any(notEqual(_208, uvec2(0u))))
    {
        uint _222 = atomicAdd(uintPointer(_208).value, _209);
        _224 = _222 + _210;
    }
    else
    {
        _224 = 0u;
    }
    return _224;
}

void main()
{
    uint _75 = gl_GlobalInvocationID.x + 0u;
    uint _83 = gl_GlobalInvocationID.x + 0u;
    vec4 _95 = textureLod(nonuniformEXT(sampler2D(_13[registers._m0 + _75], _67[registers._m2 + _83])), vec2(0.5), 0.0);
    _59[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_95.x), floatBitsToUint(_95.y), floatBitsToUint(_95.z), floatBitsToUint(_95.w));
    barrier();
    imageStore(_51[nonuniformEXT(registers._m3 + (gl_GlobalInvocationID.x + 0u))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[registers._m0 + _75], _67[registers._m2 + _83])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_54[nonuniformEXT(registers._m4 + (gl_GlobalInvocationID.x + 0u))], int(gl_GlobalInvocationID.x), vec4(_64[nonuniformEXT(registers._m5 + (gl_GlobalInvocationID.x + 0u))]._m0[0u]));
    barrier();
    vec4 _177 = textureLod(nonuniformEXT(sampler2D(_13[registers._m0 + _75], _67[registers._m2 + _83])), vec2(0.5), 0.0);
    uint _183 = gl_GlobalInvocationID.x + 0u;
    _59[nonuniformEXT(registers._m4 + _183)]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_177.x), floatBitsToUint(_177.y), floatBitsToUint(_177.z), floatBitsToUint(_177.w));
    barrier();
    uint _226 = RobustPhysicalAtomicCounter(DescriptorHeapRaw.ptr.descriptors[registers._m4 + _183]._m0[0u], 1u, 0u);
    barrier();
    uint _240 = gl_GlobalInvocationID.x + 400u;
    vec4 _253 = textureLod(nonuniformEXT(sampler2D(_13[gl_GlobalInvocationID.x], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _59[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_253.x), floatBitsToUint(_253.y), floatBitsToUint(_253.z), floatBitsToUint(_253.w));
    barrier();
    imageStore(_21[nonuniformEXT(gl_GlobalInvocationID.x + 200u)], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[gl_GlobalInvocationID.x], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_25[nonuniformEXT(gl_GlobalInvocationID.x + 300u)], int(gl_GlobalInvocationID.x), vec4(_48[nonuniformEXT(gl_GlobalInvocationID.x + 500u)]._m0[0u]));
    barrier();
    vec4 _284 = textureLod(nonuniformEXT(sampler2D(_13[gl_GlobalInvocationID.x], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _41[nonuniformEXT(_240)]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_284.x), floatBitsToUint(_284.y), floatBitsToUint(_284.z), floatBitsToUint(_284.w));
    barrier();
    uint _296 = RobustPhysicalAtomicCounter(DescriptorHeapRaw.ptr.descriptors[_240]._m0[0u], 1u, 0u);
    barrier();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 299
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %70
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %29 "DescriptorHeapRawPayload"
OpName %31 "DescriptorHeapRawBlock"
OpMemberName %31 0 "descriptors"
OpName %33 "DescriptorHeapRawPayloadPtr"
OpMemberName %33 0 "ptr"
OpName %35 "DescriptorHeapRaw"
OpName %38 "SSBO"
OpName %45 "BindlessCBV"
OpName %56 "SSBO"
OpName %61 "BindlessCBV"
OpName %211 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %28 ArrayStride 8
OpMemberDecorate %29 0 Offset 0
OpDecorate %30 ArrayStride 8
OpDecorate %31 Block
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 0 NonWritable
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %35 DescriptorSet 10
OpDecorate %35 Binding 21
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %41 DescriptorSet 0
OpDecorate %41 Binding 0
OpDecorate %44 ArrayStride 16
OpDecorate %45 Block
OpMemberDecorate %45 0 Offset 0
OpDecorate %48 DescriptorSet 0
OpDecorate %48 Binding 0
OpDecorate %51 DescriptorSet 3
OpDecorate %51 Binding 0
OpDecorate %51 NonReadable
OpDecorate %54 DescriptorSet 4
OpDecorate %54 Binding 0
OpDecorate %54 NonReadable
OpDecorate %55 ArrayStride 16
OpMemberDecorate %56 0 Offset 0
OpDecorate %56 Block
OpDecorate %59 DescriptorSet 4
OpDecorate %59 Binding 0
OpDecorate %59 NonReadable
OpDecorate %60 ArrayStride 16
OpDecorate %61 Block
OpMemberDecorate %61 0 Offset 0
OpDecorate %64 DescriptorSet 5
OpDecorate %64 Binding 0
OpDecorate %67 DescriptorSet 2
OpDecorate %67 Binding 0
OpDecorate %70 BuiltIn GlobalInvocationId
OpDecorate %81 NonUniform
OpDecorate %82 NonUniform
OpDecorate %89 NonUniform
OpDecorate %90 NonUniform
OpDecorate %92 NonUniform
OpDecorate %120 NonUniform
OpDecorate %121 NonUniform
OpDecorate %125 NonUniform
OpDecorate %126 NonUniform
OpDecorate %127 NonUniform
OpDecorate %140 NonUniform
OpDecorate %141 NonUniform
OpDecorate %150 NonUniform
OpDecorate %146 NonUniform
OpDecorate %152 NonUniform
OpDecorate %163 NonUniform
OpDecorate %164 NonUniform
OpDecorate %169 NonUniform
OpDecorate %170 NonUniform
OpDecorate %174 NonUniform
OpDecorate %175 NonUniform
OpDecorate %176 NonUniform
OpDecorate %187 NonUniform
OpDecorate %184 NonUniform
OpDecorate %193 NonUniform
OpDecorate %197 NonUniform
OpDecorate %194 NonUniform
OpDecorate %74 NonUniform
OpDecorate %228 NonUniform
OpDecorate %229 NonUniform
OpDecorate %231 NonUniform
OpDecorate %232 NonUniform
OpDecorate %235 NonUniform
OpDecorate %236 NonUniform
OpDecorate %239 NonUniform
OpDecorate %240 NonUniform
OpDecorate %243 NonUniform
OpDecorate %248 NonUniform
OpDecorate %251 NonUniform
OpDecorate %252 NonUniform
OpDecorate %277 NonUniform
OpDecorate %295 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %9 2D 0 0 0 2 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %5 2
%27 = OpConstant %5 1
%28 = OpTypeArray %26 %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypeStruct %30
%32 = OpTypePointer PhysicalStorageBuffer %31
%33 = OpTypeStruct %32
%34 = OpTypePointer Uniform %33
%35 = OpVariable %34 Uniform
%36 = OpTypeVector %5 4
%37 = OpTypeRuntimeArray %36
%38 = OpTypeStruct %37
%39 = OpTypeRuntimeArray %38
%40 = OpTypePointer StorageBuffer %39
%41 = OpVariable %40 StorageBuffer
%42 = OpTypeVector %9 4
%43 = OpConstant %5 4096
%44 = OpTypeArray %42 %43
%45 = OpTypeStruct %44
%46 = OpTypeRuntimeArray %45
%47 = OpTypePointer Uniform %46
%48 = OpVariable %47 Uniform
%49 = OpTypeRuntimeArray %18
%50 = OpTypePointer UniformConstant %49
%51 = OpVariable %50 UniformConstant
%52 = OpTypeRuntimeArray %22
%53 = OpTypePointer UniformConstant %52
%54 = OpVariable %53 UniformConstant
%55 = OpTypeRuntimeArray %36
%56 = OpTypeStruct %55
%57 = OpTypeRuntimeArray %56
%58 = OpTypePointer StorageBuffer %57
%59 = OpVariable %58 StorageBuffer
%60 = OpTypeArray %42 %43
%61 = OpTypeStruct %60
%62 = OpTypeRuntimeArray %61
%63 = OpTypePointer Uniform %62
%64 = OpVariable %63 Uniform
%65 = OpTypeRuntimeArray %14
%66 = OpTypePointer UniformConstant %65
%67 = OpVariable %66 UniformConstant
%68 = OpTypeVector %5 3
%69 = OpTypePointer Input %68
%70 = OpVariable %69 Input
%71 = OpTypePointer Input %5
%73 = OpConstant %5 0
%76 = OpTypePointer UniformConstant %10
%78 = OpTypePointer PushConstant %5
%84 = OpTypePointer UniformConstant %14
%87 = OpConstant %5 2
%91 = OpTypeSampledImage %10
%93 = OpConstant %9 0.5
%94 = OpConstant %9 0
%96 = OpTypeVector %9 2
%102 = OpTypePointer StorageBuffer %56
%105 = OpConstant %5 4
%108 = OpConstant %5 100
%114 = OpTypePointer StorageBuffer %36
%116 = OpConstant %5 264
%135 = OpTypePointer UniformConstant %18
%138 = OpConstant %5 3
%145 = OpTypePointer Uniform %61
%148 = OpConstant %5 5
%151 = OpTypePointer Uniform %42
%159 = OpTypePointer UniformConstant %22
%198 = OpTypePointer Uniform %32
%201 = OpTypePointer PhysicalStorageBuffer %26
%207 = OpTypeFunction %5 %26 %5 %5
%215 = OpTypeBool
%216 = OpTypeVector %215 2
%217 = OpConstantNull %26
%220 = OpTypePointer PhysicalStorageBuffer %5
%233 = OpConstant %5 200
%237 = OpConstant %5 300
%241 = OpConstant %5 400
%242 = OpTypePointer StorageBuffer %38
%249 = OpConstant %5 500
%250 = OpTypePointer Uniform %45
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %297
%297 = OpLabel
%72 = OpAccessChain %71 %70 %73
%74 = OpLoad %5 %72
%75 = OpIAdd %5 %74 %73
%79 = OpAccessChain %78 %8 %73
%80 = OpLoad %5 %79
%81 = OpIAdd %5 %80 %75
%77 = OpAccessChain %76 %13 %81
%82 = OpLoad %10 %77
%83 = OpIAdd %5 %74 %73
%86 = OpAccessChain %78 %8 %87
%88 = OpLoad %5 %86
%89 = OpIAdd %5 %88 %83
%85 = OpAccessChain %84 %67 %89
%90 = OpLoad %14 %85
%92 = OpSampledImage %91 %82 %90
%97 = OpCompositeConstruct %96 %93 %93
%95 = OpImageSampleExplicitLod %42 %92 %97 Lod %94
%98 = OpCompositeExtract %9 %95 0
%99 = OpCompositeExtract %9 %95 1
%100 = OpCompositeExtract %9 %95 2
%101 = OpCompositeExtract %9 %95 3
%104 = OpAccessChain %78 %8 %105
%106 = OpLoad %5 %104
%107 = OpIAdd %5 %106 %108
%103 = OpAccessChain %102 %59 %107
%109 = OpBitcast %5 %98
%110 = OpBitcast %5 %99
%111 = OpBitcast %5 %100
%112 = OpBitcast %5 %101
%113 = OpCompositeConstruct %36 %109 %110 %111 %112
%115 = OpAccessChain %114 %103 %73 %74
OpStore %115 %113
OpControlBarrier %87 %87 %116
%118 = OpAccessChain %78 %8 %73
%119 = OpLoad %5 %118
%120 = OpIAdd %5 %119 %75
%117 = OpAccessChain %76 %13 %120
%121 = OpLoad %10 %117
%123 = OpAccessChain %78 %8 %87
%124 = OpLoad %5 %123
%125 = OpIAdd %5 %124 %83
%122 = OpAccessChain %84 %67 %125
%126 = OpLoad %14 %122
%127 = OpSampledImage %91 %121 %126
%129 = OpCompositeConstruct %96 %93 %93
%128 = OpImageSampleExplicitLod %42 %127 %129 Lod %94
%130 = OpCompositeExtract %9 %128 0
%131 = OpCompositeExtract %9 %128 1
%132 = OpCompositeExtract %9 %128 2
%133 = OpCompositeExtract %9 %128 3
%134 = OpIAdd %5 %74 %73
%137 = OpAccessChain %78 %8 %138
%139 = OpLoad %5 %137
%140 = OpIAdd %5 %139 %134
%136 = OpAccessChain %135 %51 %140
%141 = OpLoad %18 %136
%142 = OpCompositeConstruct %26 %74 %73
%143 = OpCompositeConstruct %42 %130 %131 %132 %133
OpImageWrite %141 %142 %143
OpControlBarrier %87 %87 %116
%144 = OpIAdd %5 %74 %73
%147 = OpAccessChain %78 %8 %148
%149 = OpLoad %5 %147
%150 = OpIAdd %5 %149 %144
%146 = OpAccessChain %145 %64 %150
%152 = OpAccessChain %151 %146 %73 %73
%153 = OpLoad %42 %152
%154 = OpCompositeExtract %9 %153 0
%155 = OpCompositeExtract %9 %153 1
%156 = OpCompositeExtract %9 %153 2
%157 = OpCompositeExtract %9 %153 3
%158 = OpIAdd %5 %74 %73
%161 = OpAccessChain %78 %8 %105
%162 = OpLoad %5 %161
%163 = OpIAdd %5 %162 %158
%160 = OpAccessChain %159 %54 %163
%164 = OpLoad %22 %160
%165 = OpCompositeConstruct %42 %154 %155 %156 %157
OpImageWrite %164 %74 %165
OpControlBarrier %87 %87 %116
%167 = OpAccessChain %78 %8 %73
%168 = OpLoad %5 %167
%169 = OpIAdd %5 %168 %75
%166 = OpAccessChain %76 %13 %169
%170 = OpLoad %10 %166
%172 = OpAccessChain %78 %8 %87
%173 = OpLoad %5 %172
%174 = OpIAdd %5 %173 %83
%171 = OpAccessChain %84 %67 %174
%175 = OpLoad %14 %171
%176 = OpSampledImage %91 %170 %175
%178 = OpCompositeConstruct %96 %93 %93
%177 = OpImageSampleExplicitLod %42 %176 %178 Lod %94
%179 = OpCompositeExtract %9 %177 0
%180 = OpCompositeExtract %9 %177 1
%181 = OpCompositeExtract %9 %177 2
%182 = OpCompositeExtract %9 %177 3
%183 = OpIAdd %5 %74 %73
%185 = OpAccessChain %78 %8 %105
%186 = OpLoad %5 %185
%187 = OpIAdd %5 %186 %183
%184 = OpAccessChain %102 %59 %187
%188 = OpBitcast %5 %179
%189 = OpBitcast %5 %180
%190 = OpBitcast %5 %181
%191 = OpBitcast %5 %182
%192 = OpCompositeConstruct %36 %188 %189 %190 %191
%193 = OpAccessChain %114 %184 %73 %74
OpStore %193 %192
OpControlBarrier %87 %87 %116
%195 = OpAccessChain %78 %8 %105
%196 = OpLoad %5 %195
%197 = OpIAdd %5 %196 %183
%194 = OpAccessChain %102 %59 %197
%199 = OpAccessChain %198 %35 %73
%200 = OpLoad %32 %199
%203 = OpAccessChain %78 %8 %105
%204 = OpLoad %5 %203
%205 = OpIAdd %5 %204 %183
%202 = OpInBoundsAccessChain %201 %200 %73 %205 %73 %73
%206 = OpLoad %26 %202 Aligned 8
%226 = OpFunctionCall %5 %211 %206 %27 %73
OpControlBarrier %87 %87 %116
%227 = OpAccessChain %76 %13 %74
%228 = OpLoad %10 %227
%229 = OpIAdd %5 %74 %108
%230 = OpAccessChain %84 %17 %229
%231 = OpLoad %14 %230
%232 = OpIAdd %5 %74 %233
%234 = OpAccessChain %135 %21 %232
%235 = OpLoad %18 %234
%236 = OpIAdd %5 %74 %237
%238 = OpAccessChain %159 %25 %236
%239 = OpLoad %22 %238
%240 = OpIAdd %5 %74 %241
%243 = OpAccessChain %242 %41 %240
%244 = OpAccessChain %198 %35 %73
%245 = OpLoad %32 %244
%246 = OpInBoundsAccessChain %201 %245 %73 %240 %73 %73
%247 = OpLoad %26 %246 Aligned 8
%248 = OpIAdd %5 %74 %249
%251 = OpAccessChain %250 %48 %248
%252 = OpSampledImage %91 %228 %231
%254 = OpCompositeConstruct %96 %93 %93
%253 = OpImageSampleExplicitLod %42 %252 %254 Lod %94
%255 = OpCompositeExtract %9 %253 0
%256 = OpCompositeExtract %9 %253 1
%257 = OpCompositeExtract %9 %253 2
%258 = OpCompositeExtract %9 %253 3
%260 = OpAccessChain %78 %8 %105
%261 = OpLoad %5 %260
%262 = OpIAdd %5 %261 %108
%259 = OpAccessChain %102 %59 %262
%263 = OpBitcast %5 %255
%264 = OpBitcast %5 %256
%265 = OpBitcast %5 %257
%266 = OpBitcast %5 %258
%267 = OpCompositeConstruct %36 %263 %264 %265 %266
%268 = OpAccessChain %114 %259 %73 %74
OpStore %268 %267
OpControlBarrier %87 %87 %116
%270 = OpCompositeConstruct %96 %93 %93
%269 = OpImageSampleExplicitLod %42 %252 %270 Lod %94
%271 = OpCompositeExtract %9 %269 0
%272 = OpCompositeExtract %9 %269 1
%273 = OpCompositeExtract %9 %269 2
%274 = OpCompositeExtract %9 %269 3
%275 = OpCompositeConstruct %26 %74 %73
%276 = OpCompositeConstruct %42 %271 %272 %273 %274
OpImageWrite %235 %275 %276
OpControlBarrier %87 %87 %116
%277 = OpAccessChain %151 %251 %73 %73
%278 = OpLoad %42 %277
%279 = OpCompositeExtract %9 %278 0
%280 = OpCompositeExtract %9 %278 1
%281 = OpCompositeExtract %9 %278 2
%282 = OpCompositeExtract %9 %278 3
%283 = OpCompositeConstruct %42 %279 %280 %281 %282
OpImageWrite %239 %74 %283
OpControlBarrier %87 %87 %116
%285 = OpCompositeConstruct %96 %93 %93
%284 = OpImageSampleExplicitLod %42 %252 %285 Lod %94
%286 = OpCompositeExtract %9 %284 0
%287 = OpCompositeExtract %9 %284 1
%288 = OpCompositeExtract %9 %284 2
%289 = OpCompositeExtract %9 %284 3
%290 = OpBitcast %5 %286
%291 = OpBitcast %5 %287
%292 = OpBitcast %5 %288
%293 = OpBitcast %5 %289
%294 = OpCompositeConstruct %36 %290 %291 %292 %293
%295 = OpAccessChain %114 %243 %73 %74
OpStore %295 %294
OpControlBarrier %87 %87 %116
%296 = OpFunctionCall %5 %211 %247 %27 %73
OpControlBarrier %87 %87 %116
OpReturn
OpFunctionEnd
%211 = OpFunction %5 None %207
%208 = OpFunctionParameter %26
%209 = OpFunctionParameter %5
%210 = OpFunctionParameter %5
%212 = OpLabel
%218 = OpINotEqual %216 %208 %217
%219 = OpAny %215 %218
OpSelectionMerge %214 None
OpBranchConditional %219 %213 %214
%213 = OpLabel
%221 = OpBitcast %220 %208
%222 = OpAtomicIAdd %5 %221 %27 %73 %209
%223 = OpIAdd %5 %222 %210
OpBranch %214
%214 = OpLabel
%224 = OpPhi %5 %73 %212 %223 %213
OpReturnValue %224
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/heap-robustness/misc.bindless.heap-robustness.heap-robustness-cbv.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _30;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _36[];

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _43[];

layout(set = 10, binding = 20, std140) uniform DescriptorHeapSizeUBO
{
    uint count;
} DescriptorHeapSize;

layout(set = 4, binding = 0, std430) writeonly buffer _54_57
{
    uvec4 _m0[];
} _57[];

layout(set = 5, binding = 0, std140) uniform _59_62
{
    vec4 _m0[4096];
} _62[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 0, binding = 0) uniform sampler _17[];
layout(set = 0, binding = 0) uniform writeonly image2D _21[];
layout(set = 0, binding = 0) uniform writeonly imageBuffer _25[];
layout(set = 3, binding = 0) uniform writeonly image2D _49[];
layout(set = 4, binding = 0) uniform writeonly imageBuffer _52[];
layout(set = 2, binding = 0) uniform sampler _65[];

uint RobustPhysicalAtomicCounter(uvec2 _232, uint _233, uint _234)
{
    uint _249;
    if (any(notEqual(_232, uvec2(0u))))
    {
        uint _246 = atomicAdd(uintPointer(_232).value, _233);
        _249 = _246 + _234;
    }
    else
    {
        _249 = 0u;
    }
    return _249;
}

void main()
{
    uint _73 = gl_GlobalInvocationID.x + 0u;
    uint _86 = gl_GlobalInvocationID.x + 0u;
    vec4 _98 = textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _73), DescriptorHeapSize.count)], _65[registers._m2 + _86])), vec2(0.5), 0.0);
    _57[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_98.x), floatBitsToUint(_98.y), floatBitsToUint(_98.z), floatBitsToUint(_98.w));
    barrier();
    imageStore(_49[nonuniformEXT(min((registers._m3 + (gl_GlobalInvocationID.x + 0u)), DescriptorHeapSize.count))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _73), DescriptorHeapSize.count)], _65[registers._m2 + _86])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_52[nonuniformEXT(min((registers._m4 + (gl_GlobalInvocationID.x + 0u)), DescriptorHeapSize.count))], int(gl_GlobalInvocationID.x), vec4(_62[nonuniformEXT(min((registers._m5 + (gl_GlobalInvocationID.x + 0u)), DescriptorHeapSize.count))]._m0[0u]));
    barrier();
    vec4 _195 = textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _73), DescriptorHeapSize.count)], _65[registers._m2 + _86])), vec2(0.5), 0.0);
    uint _201 = gl_GlobalInvocationID.x + 0u;
    _57[nonuniformEXT(min((registers._m4 + _201), DescriptorHeapSize.count))]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_195.x), floatBitsToUint(_195.y), floatBitsToUint(_195.z), floatBitsToUint(_195.w));
    barrier();
    uvec2 _230 = _30.counters[min((registers._m4 + _201), DescriptorHeapSize.count)];
    uint _251 = RobustPhysicalAtomicCounter(_230, 1u, 0u);
    barrier();
    uint _274 = gl_GlobalInvocationID.x + 400u;
    uvec2 _285 = _30.counters[min(_274, DescriptorHeapSize.count)];
    vec4 _294 = textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, DescriptorHeapSize.count)], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _57[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_294.x), floatBitsToUint(_294.y), floatBitsToUint(_294.z), floatBitsToUint(_294.w));
    barrier();
    imageStore(_21[nonuniformEXT(min((gl_GlobalInvocationID.x + 200u), DescriptorHeapSize.count))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, DescriptorHeapSize.count)], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_25[nonuniformEXT(min((gl_GlobalInvocationID.x + 300u), DescriptorHeapSize.count))], int(gl_GlobalInvocationID.x), vec4(_43[nonuniformEXT(min((gl_GlobalInvocationID.x + 500u), DescriptorHeapSize.count))]._m0[0u]));
    barrier();
    vec4 _325 = textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, DescriptorHeapSize.count)], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _36[nonuniformEXT(min(_274, DescriptorHeapSize.count))]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_325.x), floatBitsToUint(_325.y), floatBitsToUint(_325.z), floatBitsToUint(_325.w));
    barrier();
    uint _337 = RobustPhysicalAtomicCounter(_285, 1u, 0u);
    barrier();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 340
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
%83 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %68
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %28 "AtomicCounters"
OpMemberName %28 0 "counters"
OpName %33 "SSBO"
OpName %40 "BindlessCBV"
OpName %44 "DescriptorHeapSizeUBO"
OpMemberName %44 0 "count"
OpName %46 "DescriptorHeapSize"
OpName %54 "SSBO"
OpName %59 "BindlessCBV"
OpName %235 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %27 ArrayStride 8
OpDecorate %28 Block
OpMemberDecorate %28 0 Offset 0
OpMemberDecorate %28 0 NonWritable
OpDecorate %30 DescriptorSet 7
OpDecorate %30 Binding 0
OpDecorate %30 AliasedPointer
OpDecorate %32 ArrayStride 16
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 0
OpDecorate %39 ArrayStride 16
OpDecorate %40 Block
OpMemberDecorate %40 0 Offset 0
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 0
OpDecorate %44 Block
OpMemberDecorate %44 0 Offset 0
OpDecorate %46 DescriptorSet 10
OpDecorate %46 Binding 20
OpDecorate %49 DescriptorSet 3
OpDecorate %49 Binding 0
OpDecorate %49 NonReadable
OpDecorate %52 DescriptorSet 4
OpDecorate %52 Binding 0
OpDecorate %52 NonReadable
OpDecorate %53 ArrayStride 16
OpMemberDecorate %54 0 Offset 0
OpDecorate %54 Block
OpDecorate %57 DescriptorSet 4
OpDecorate %57 Binding 0
OpDecorate %57 NonReadable
OpDecorate %58 ArrayStride 16
OpDecorate %59 Block
OpMemberDecorate %59 0 Offset 0
OpDecorate %62 DescriptorSet 5
OpDecorate %62 Binding 0
OpDecorate %65 DescriptorSet 2
OpDecorate %65 Binding 0
OpDecorate %68 BuiltIn GlobalInvocationId
OpDecorate %84 NonUniform
OpDecorate %85 NonUniform
OpDecorate %92 NonUniform
OpDecorate %93 NonUniform
OpDecorate %95 NonUniform
OpDecorate %126 NonUniform
OpDecorate %127 NonUniform
OpDecorate %131 NonUniform
OpDecorate %132 NonUniform
OpDecorate %133 NonUniform
OpDecorate %149 NonUniform
OpDecorate %150 NonUniform
OpDecorate %162 NonUniform
OpDecorate %155 NonUniform
OpDecorate %164 NonUniform
OpDecorate %178 NonUniform
OpDecorate %179 NonUniform
OpDecorate %187 NonUniform
OpDecorate %188 NonUniform
OpDecorate %192 NonUniform
OpDecorate %193 NonUniform
OpDecorate %194 NonUniform
OpDecorate %208 NonUniform
OpDecorate %202 NonUniform
OpDecorate %214 NonUniform
OpDecorate %221 NonUniform
OpDecorate %215 NonUniform
OpDecorate %255 NonUniform
OpDecorate %256 NonUniform
OpDecorate %257 NonUniform
OpDecorate %259 NonUniform
OpDecorate %265 NonUniform
OpDecorate %266 NonUniform
OpDecorate %272 NonUniform
OpDecorate %273 NonUniform
OpDecorate %280 NonUniform
OpDecorate %277 NonUniform
OpDecorate %292 NonUniform
OpDecorate %289 NonUniform
OpDecorate %293 NonUniform
OpDecorate %318 NonUniform
OpDecorate %336 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %9 2D 0 0 0 2 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %5 2
%27 = OpTypeRuntimeArray %26
%28 = OpTypeStruct %27
%29 = OpTypePointer StorageBuffer %28
%30 = OpVariable %29 StorageBuffer
%31 = OpTypeVector %5 4
%32 = OpTypeRuntimeArray %31
%33 = OpTypeStruct %32
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer StorageBuffer %34
%36 = OpVariable %35 StorageBuffer
%37 = OpTypeVector %9 4
%38 = OpConstant %5 4096
%39 = OpTypeArray %37 %38
%40 = OpTypeStruct %39
%41 = OpTypeRuntimeArray %40
%42 = OpTypePointer Uniform %41
%43 = OpVariable %42 Uniform
%44 = OpTypeStruct %5
%45 = OpTypePointer Uniform %44
%46 = OpVariable %45 Uniform
%47 = OpTypeRuntimeArray %18
%48 = OpTypePointer UniformConstant %47
%49 = OpVariable %48 UniformConstant
%50 = OpTypeRuntimeArray %22
%51 = OpTypePointer UniformConstant %50
%52 = OpVariable %51 UniformConstant
%53 = OpTypeRuntimeArray %31
%54 = OpTypeStruct %53
%55 = OpTypeRuntimeArray %54
%56 = OpTypePointer StorageBuffer %55
%57 = OpVariable %56 StorageBuffer
%58 = OpTypeArray %37 %38
%59 = OpTypeStruct %58
%60 = OpTypeRuntimeArray %59
%61 = OpTypePointer Uniform %60
%62 = OpVariable %61 Uniform
%63 = OpTypeRuntimeArray %14
%64 = OpTypePointer UniformConstant %63
%65 = OpVariable %64 UniformConstant
%66 = OpTypeVector %5 3
%67 = OpTypePointer Input %66
%68 = OpVariable %67 Input
%69 = OpTypePointer Input %5
%71 = OpConstant %5 0
%74 = OpTypePointer UniformConstant %10
%76 = OpTypePointer PushConstant %5
%80 = OpTypePointer Uniform %5
%87 = OpTypePointer UniformConstant %14
%90 = OpConstant %5 2
%94 = OpTypeSampledImage %10
%96 = OpConstant %9 0.5
%97 = OpConstant %9 0
%99 = OpTypeVector %9 2
%105 = OpTypePointer StorageBuffer %54
%108 = OpConstant %5 4
%111 = OpConstant %5 100
%117 = OpTypePointer StorageBuffer %31
%119 = OpConstant %5 264
%141 = OpTypePointer UniformConstant %18
%144 = OpConstant %5 3
%154 = OpTypePointer Uniform %59
%157 = OpConstant %5 5
%163 = OpTypePointer Uniform %37
%171 = OpTypePointer UniformConstant %22
%222 = OpTypePointer StorageBuffer %26
%231 = OpTypeFunction %5 %26 %5 %5
%239 = OpTypeBool
%240 = OpTypeVector %239 2
%241 = OpConstantNull %26
%244 = OpTypePointer PhysicalStorageBuffer %5
%247 = OpConstant %5 1
%261 = OpConstant %5 200
%268 = OpConstant %5 300
%275 = OpConstant %5 400
%276 = OpTypePointer StorageBuffer %33
%287 = OpConstant %5 500
%288 = OpTypePointer Uniform %40
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %338
%338 = OpLabel
%70 = OpAccessChain %69 %68 %71
%72 = OpLoad %5 %70
%73 = OpIAdd %5 %72 %71
%77 = OpAccessChain %76 %8 %71
%78 = OpLoad %5 %77
%79 = OpIAdd %5 %78 %73
%81 = OpAccessChain %80 %46 %71
%82 = OpLoad %5 %81
%84 = OpExtInst %5 %83 UMin %79 %82
%75 = OpAccessChain %74 %13 %84
%85 = OpLoad %10 %75
%86 = OpIAdd %5 %72 %71
%89 = OpAccessChain %76 %8 %90
%91 = OpLoad %5 %89
%92 = OpIAdd %5 %91 %86
%88 = OpAccessChain %87 %65 %92
%93 = OpLoad %14 %88
%95 = OpSampledImage %94 %85 %93
%100 = OpCompositeConstruct %99 %96 %96
%98 = OpImageSampleExplicitLod %37 %95 %100 Lod %97
%101 = OpCompositeExtract %9 %98 0
%102 = OpCompositeExtract %9 %98 1
%103 = OpCompositeExtract %9 %98 2
%104 = OpCompositeExtract %9 %98 3
%107 = OpAccessChain %76 %8 %108
%109 = OpLoad %5 %107
%110 = OpIAdd %5 %109 %111
%106 = OpAccessChain %105 %57 %110
%112 = OpBitcast %5 %101
%113 = OpBitcast %5 %102
%114 = OpBitcast %5 %103
%115 = OpBitcast %5 %104
%116 = OpCompositeConstruct %31 %112 %113 %114 %115
%118 = OpAccessChain %117 %106 %71 %72
OpStore %118 %116
OpControlBarrier %90 %90 %119
%121 = OpAccessChain %76 %8 %71
%122 = OpLoad %5 %121
%123 = OpIAdd %5 %122 %73
%124 = OpAccessChain %80 %46 %71
%125 = OpLoad %5 %124
%126 = OpExtInst %5 %83 UMin %123 %125
%120 = OpAccessChain %74 %13 %126
%127 = OpLoad %10 %120
%129 = OpAccessChain %76 %8 %90
%130 = OpLoad %5 %129
%131 = OpIAdd %5 %130 %86
%128 = OpAccessChain %87 %65 %131
%132 = OpLoad %14 %128
%133 = OpSampledImage %94 %127 %132
%135 = OpCompositeConstruct %99 %96 %96
%134 = OpImageSampleExplicitLod %37 %133 %135 Lod %97
%136 = OpCompositeExtract %9 %134 0
%137 = OpCompositeExtract %9 %134 1
%138 = OpCompositeExtract %9 %134 2
%139 = OpCompositeExtract %9 %134 3
%140 = OpIAdd %5 %72 %71
%143 = OpAccessChain %76 %8 %144
%145 = OpLoad %5 %143
%146 = OpIAdd %5 %145 %140
%147 = OpAccessChain %80 %46 %71
%148 = OpLoad %5 %147
%149 = OpExtInst %5 %83 UMin %146 %148
%142 = OpAccessChain %141 %49 %149
%150 = OpLoad %18 %142
%151 = OpCompositeConstruct %26 %72 %71
%152 = OpCompositeConstruct %37 %136 %137 %138 %139
OpImageWrite %150 %151 %152
OpControlBarrier %90 %90 %119
%153 = OpIAdd %5 %72 %71
%156 = OpAccessChain %76 %8 %157
%158 = OpLoad %5 %156
%159 = OpIAdd %5 %158 %153
%160 = OpAccessChain %80 %46 %71
%161 = OpLoad %5 %160
%162 = OpExtInst %5 %83 UMin %159 %161
%155 = OpAccessChain %154 %62 %162
%164 = OpAccessChain %163 %155 %71 %71
%165 = OpLoad %37 %164
%166 = OpCompositeExtract %9 %165 0
%167 = OpCompositeExtract %9 %165 1
%168 = OpCompositeExtract %9 %165 2
%169 = OpCompositeExtract %9 %165 3
%170 = OpIAdd %5 %72 %71
%173 = OpAccessChain %76 %8 %108
%174 = OpLoad %5 %173
%175 = OpIAdd %5 %174 %170
%176 = OpAccessChain %80 %46 %71
%177 = OpLoad %5 %176
%178 = OpExtInst %5 %83 UMin %175 %177
%172 = OpAccessChain %171 %52 %178
%179 = OpLoad %22 %172
%180 = OpCompositeConstruct %37 %166 %167 %168 %169
OpImageWrite %179 %72 %180
OpControlBarrier %90 %90 %119
%182 = OpAccessChain %76 %8 %71
%183 = OpLoad %5 %182
%184 = OpIAdd %5 %183 %73
%185 = OpAccessChain %80 %46 %71
%186 = OpLoad %5 %185
%187 = OpExtInst %5 %83 UMin %184 %186
%181 = OpAccessChain %74 %13 %187
%188 = OpLoad %10 %181
%190 = OpAccessChain %76 %8 %90
%191 = OpLoad %5 %190
%192 = OpIAdd %5 %191 %86
%189 = OpAccessChain %87 %65 %192
%193 = OpLoad %14 %189
%194 = OpSampledImage %94 %188 %193
%196 = OpCompositeConstruct %99 %96 %96
%195 = OpImageSampleExplicitLod %37 %194 %196 Lod %97
%197 = OpCompositeExtract %9 %195 0
%198 = OpCompositeExtract %9 %195 1
%199 = OpCompositeExtract %9 %195 2
%200 = OpCompositeExtract %9 %195 3
%201 = OpIAdd %5 %72 %71
%203 = OpAccessChain %76 %8 %108
%204 = OpLoad %5 %203
%205 = OpIAdd %5 %204 %201
%206 = OpAccessChain %80 %46 %71
%207 = OpLoad %5 %206
%208 = OpExtInst %5 %83 UMin %205 %207
%202 = OpAccessChain %105 %57 %208
%209 = OpBitcast %5 %197
%210 = OpBitcast %5 %198
%211 = OpBitcast %5 %199
%212 = OpBitcast %5 %200
%213 = OpCompositeConstruct %31 %209 %210 %211 %212
%214 = OpAccessChain %117 %202 %71 %72
OpStore %214 %213
OpControlBarrier %90 %90 %119
%216 = OpAccessChain %76 %8 %108
%217 = OpLoad %5 %216
%218 = OpIAdd %5 %217 %201
%219 = OpAccessChain %80 %46 %71
%220 = OpLoad %5 %219
%221 = OpExtInst %5 %83 UMin %218 %220
%215 = OpAccessChain %105 %57 %221
%224 = OpAccessChain %76 %8 %108
%225 = OpLoad %5 %224
%226 = OpIAdd %5 %225 %201
%227 = OpAccessChain %80 %46 %71
%228 = OpLoad %5 %227
%229 = OpExtInst %5 %83 UMin %226 %228
%223 = OpAccessChain %222 %30 %71 %229
%230 = OpLoad %26 %223
%251 = OpFunctionCall %5 %235 %230 %247 %71
OpControlBarrier %90 %90 %119
%253 = OpAccessChain %80 %46 %71
%254 = OpLoad %5 %253
%255 = OpExtInst %5 %83 UMin %72 %254
%252 = OpAccessChain %74 %13 %255
%256 = OpLoad %10 %252
%257 = OpIAdd %5 %72 %111
%258 = OpAccessChain %87 %17 %257
%259 = OpLoad %14 %258
%260 = OpIAdd %5 %72 %261
%263 = OpAccessChain %80 %46 %71
%264 = OpLoad %5 %263
%265 = OpExtInst %5 %83 UMin %260 %264
%262 = OpAccessChain %141 %21 %265
%266 = OpLoad %18 %262
%267 = OpIAdd %5 %72 %268
%270 = OpAccessChain %80 %46 %71
%271 = OpLoad %5 %270
%272 = OpExtInst %5 %83 UMin %267 %271
%269 = OpAccessChain %171 %25 %272
%273 = OpLoad %22 %269
%274 = OpIAdd %5 %72 %275
%278 = OpAccessChain %80 %46 %71
%279 = OpLoad %5 %278
%280 = OpExtInst %5 %83 UMin %274 %279
%277 = OpAccessChain %276 %36 %280
%282 = OpAccessChain %80 %46 %71
%283 = OpLoad %5 %282
%284 = OpExtInst %5 %83 UMin %274 %283
%281 = OpAccessChain %222 %30 %71 %284
%285 = OpLoad %26 %281
%286 = OpIAdd %5 %72 %287
%290 = OpAccessChain %80 %46 %71
%291 = OpLoad %5 %290
%292 = OpExtInst %5 %83 UMin %286 %291
%289 = OpAccessChain %288 %43 %292
%293 = OpSampledImage %94 %256 %259
%295 = OpCompositeConstruct %99 %96 %96
%294 = OpImageSampleExplicitLod %37 %293 %295 Lod %97
%296 = OpCompositeExtract %9 %294 0
%297 = OpCompositeExtract %9 %294 1
%298 = OpCompositeExtract %9 %294 2
%299 = OpCompositeExtract %9 %294 3
%301 = OpAccessChain %76 %8 %108
%302 = OpLoad %5 %301
%303 = OpIAdd %5 %302 %111
%300 = OpAccessChain %105 %57 %303
%304 = OpBitcast %5 %296
%305 = OpBitcast %5 %297
%306 = OpBitcast %5 %298
%307 = OpBitcast %5 %299
%308 = OpCompositeConstruct %31 %304 %305 %306 %307
%309 = OpAccessChain %117 %300 %71 %72
OpStore %309 %308
OpControlBarrier %90 %90 %119
%311 = OpCompositeConstruct %99 %96 %96
%310 = OpImageSampleExplicitLod %37 %293 %311 Lod %97
%312 = OpCompositeExtract %9 %310 0
%313 = OpCompositeExtract %9 %310 1
%314 = OpCompositeExtract %9 %310 2
%315 = OpCompositeExtract %9 %310 3
%316 = OpCompositeConstruct %26 %72 %71
%317 = OpCompositeConstruct %37 %312 %313 %314 %315
OpImageWrite %266 %316 %317
OpControlBarrier %90 %90 %119
%318 = OpAccessChain %163 %289 %71 %71
%319 = OpLoad %37 %318
%320 = OpCompositeExtract %9 %319 0
%321 = OpCompositeExtract %9 %319 1
%322 = OpCompositeExtract %9 %319 2
%323 = OpCompositeExtract %9 %319 3
%324 = OpCompositeConstruct %37 %320 %321 %322 %323
OpImageWrite %273 %72 %324
OpControlBarrier %90 %90 %119
%326 = OpCompositeConstruct %99 %96 %96
%325 = OpImageSampleExplicitLod %37 %293 %326 Lod %97
%327 = OpCompositeExtract %9 %325 0
%328 = OpCompositeExtract %9 %325 1
%329 = OpCompositeExtract %9 %325 2
%330 = OpCompositeExtract %9 %325 3
%331 = OpBitcast %5 %327
%332 = OpBitcast %5 %328
%333 = OpBitcast %5 %329
%334 = OpBitcast %5 %330
%335 = OpCompositeConstruct %31 %331 %332 %333 %334
%336 = OpAccessChain %117 %277 %71 %72
OpStore %336 %335
OpControlBarrier %90 %90 %119
%337 = OpFunctionCall %5 %235 %285 %247 %71
OpControlBarrier %90 %90 %119
OpReturn
OpFunctionEnd
%235 = OpFunction %5 None %231
%232 = OpFunctionParameter %26
%233 = OpFunctionParameter %5
%234 = OpFunctionParameter %5
%236 = OpLabel
%242 = OpINotEqual %240 %232 %241
%243 = OpAny %239 %242
OpSelectionMerge %238 None
OpBranchConditional %243 %237 %238
%237 = OpLabel
%245 = OpBitcast %244 %232
%246 = OpAtomicIAdd %5 %245 %247 %71 %233
%248 = OpIAdd %5 %246 %234
OpBranch %238
%238 = OpLabel
%249 = OpPhi %5 %71 %236 %248 %237
OpReturnValue %249
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/heap-robustness/misc.bindless.heap-robustness.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

struct DescriptorHeapRawPayload
{
    uint _m0[2];
};

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _30;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _36[];

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _43[];

layout(set = 0, binding = 0, std430) writeonly readonly buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 4, binding = 0, std430) writeonly buffer _58_61
{
    uvec4 _m0[];
} _61[];

layout(set = 5, binding = 0, std140) uniform _63_66
{
    vec4 _m0[4096];
} _66[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 0, binding = 0) uniform sampler _17[];
layout(set = 0, binding = 0) uniform writeonly image2D _21[];
layout(set = 0, binding = 0) uniform writeonly imageBuffer _25[];
layout(set = 3, binding = 0) uniform writeonly image2D _53[];
layout(set = 4, binding = 0) uniform writeonly imageBuffer _56[];
layout(set = 2, binding = 0) uniform sampler _69[];

uint RobustPhysicalAtomicCounter(uvec2 _225, uint _226, uint _227)
{
    uint _242;
    if (any(notEqual(_225, uvec2(0u))))
    {
        uint _239 = atomicAdd(uintPointer(_225).value, _226);
        _242 = _239 + _227;
    }
    else
    {
        _242 = 0u;
    }
    return _242;
}

void main()
{
    uint _77 = gl_GlobalInvocationID.x + 0u;
    uint _88 = gl_GlobalInvocationID.x + 0u;
    vec4 _99 = textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _77), uint(DescriptorHeapRobustness.descriptors.length()))], _69[registers._m2 + _88])), vec2(0.5), 0.0);
    _61[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_99.x), floatBitsToUint(_99.y), floatBitsToUint(_99.z), floatBitsToUint(_99.w));
    barrier();
    imageStore(_53[nonuniformEXT(min((registers._m3 + (gl_GlobalInvocationID.x + 0u)), uint(DescriptorHeapRobustness.descriptors.length())))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _77), uint(DescriptorHeapRobustness.descriptors.length()))], _69[registers._m2 + _88])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_56[nonuniformEXT(min((registers._m4 + (gl_GlobalInvocationID.x + 0u)), uint(DescriptorHeapRobustness.descriptors.length())))], int(gl_GlobalInvocationID.x), vec4(_66[nonuniformEXT(min((registers._m5 + (gl_GlobalInvocationID.x + 0u)), uint(DescriptorHeapRobustness.descriptors.length())))]._m0[0u]));
    barrier();
    vec4 _191 = textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _77), uint(DescriptorHeapRobustness.descriptors.length()))], _69[registers._m2 + _88])), vec2(0.5), 0.0);
    uint _197 = gl_GlobalInvocationID.x + 0u;
    _61[nonuniformEXT(min((registers._m4 + _197), uint(DescriptorHeapRobustness.descriptors.length())))]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_191.x), floatBitsToUint(_191.y), floatBitsToUint(_191.z), floatBitsToUint(_191.w));
    barrier();
    uvec2 _223 = _30.counters[min((registers._m4 + _197), uint(DescriptorHeapRobustness.descriptors.length()))];
    uint _244 = RobustPhysicalAtomicCounter(_223, 1u, 0u);
    barrier();
    uint _264 = gl_GlobalInvocationID.x + 400u;
    uvec2 _273 = _30.counters[min(_264, uint(DescriptorHeapRobustness.descriptors.length()))];
    vec4 _281 = textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, uint(DescriptorHeapRobustness.descriptors.length()))], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _61[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_281.x), floatBitsToUint(_281.y), floatBitsToUint(_281.z), floatBitsToUint(_281.w));
    barrier();
    imageStore(_21[nonuniformEXT(min((gl_GlobalInvocationID.x + 200u), uint(DescriptorHeapRobustness.descriptors.length())))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, uint(DescriptorHeapRobustness.descriptors.length()))], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_25[nonuniformEXT(min((gl_GlobalInvocationID.x + 300u), uint(DescriptorHeapRobustness.descriptors.length())))], int(gl_GlobalInvocationID.x), vec4(_43[nonuniformEXT(min((gl_GlobalInvocationID.x + 500u), uint(DescriptorHeapRobustness.descriptors.length())))]._m0[0u]));
    barrier();
    vec4 _312 = textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, uint(DescriptorHeapRobustness.descriptors.length()))], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _36[nonuniformEXT(min(_264, uint(DescriptorHeapRobustness.descriptors.length())))]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_312.x), floatBitsToUint(_312.y), floatBitsToUint(_312.z), floatBitsToUint(_312.w));
    barrier();
    uint _324 = RobustPhysicalAtomicCounter(_273, 1u, 0u);
    barrier();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 327
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
%85 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %72
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %28 "AtomicCounters"
OpMemberName %28 0 "counters"
OpName %33 "SSBO"
OpName %40 "BindlessCBV"
OpName %46 "DescriptorHeapRawPayload"
OpName %48 "DescriptorHeapRobustnessSSBO"
OpMemberName %48 0 "descriptors"
OpName %50 "DescriptorHeapRobustness"
OpName %58 "SSBO"
OpName %63 "BindlessCBV"
OpName %228 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %27 ArrayStride 8
OpDecorate %28 Block
OpMemberDecorate %28 0 Offset 0
OpMemberDecorate %28 0 NonWritable
OpDecorate %30 DescriptorSet 7
OpDecorate %30 Binding 0
OpDecorate %30 AliasedPointer
OpDecorate %32 ArrayStride 16
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 0
OpDecorate %39 ArrayStride 16
OpDecorate %40 Block
OpMemberDecorate %40 0 Offset 0
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 0
OpDecorate %45 ArrayStride 4
OpMemberDecorate %46 0 Offset 0
OpDecorate %47 ArrayStride 8
OpDecorate %48 Block
OpMemberDecorate %48 0 Offset 0
OpMemberDecorate %48 0 NonWritable
OpMemberDecorate %48 0 NonReadable
OpDecorate %50 DescriptorSet 0
OpDecorate %50 Binding 0
OpDecorate %53 DescriptorSet 3
OpDecorate %53 Binding 0
OpDecorate %53 NonReadable
OpDecorate %56 DescriptorSet 4
OpDecorate %56 Binding 0
OpDecorate %56 NonReadable
OpDecorate %57 ArrayStride 16
OpMemberDecorate %58 0 Offset 0
OpDecorate %58 Block
OpDecorate %61 DescriptorSet 4
OpDecorate %61 Binding 0
OpDecorate %61 NonReadable
OpDecorate %62 ArrayStride 16
OpDecorate %63 Block
OpMemberDecorate %63 0 Offset 0
OpDecorate %66 DescriptorSet 5
OpDecorate %66 Binding 0
OpDecorate %69 DescriptorSet 2
OpDecorate %69 Binding 0
OpDecorate %72 BuiltIn GlobalInvocationId
OpDecorate %86 NonUniform
OpDecorate %87 NonUniform
OpDecorate %93 NonUniform
OpDecorate %94 NonUniform
OpDecorate %96 NonUniform
OpDecorate %126 NonUniform
OpDecorate %127 NonUniform
OpDecorate %131 NonUniform
OpDecorate %132 NonUniform
OpDecorate %133 NonUniform
OpDecorate %148 NonUniform
OpDecorate %149 NonUniform
OpDecorate %160 NonUniform
OpDecorate %154 NonUniform
OpDecorate %162 NonUniform
OpDecorate %175 NonUniform
OpDecorate %176 NonUniform
OpDecorate %183 NonUniform
OpDecorate %184 NonUniform
OpDecorate %188 NonUniform
OpDecorate %189 NonUniform
OpDecorate %190 NonUniform
OpDecorate %203 NonUniform
OpDecorate %198 NonUniform
OpDecorate %209 NonUniform
OpDecorate %215 NonUniform
OpDecorate %210 NonUniform
OpDecorate %247 NonUniform
OpDecorate %248 NonUniform
OpDecorate %249 NonUniform
OpDecorate %251 NonUniform
OpDecorate %256 NonUniform
OpDecorate %257 NonUniform
OpDecorate %262 NonUniform
OpDecorate %263 NonUniform
OpDecorate %269 NonUniform
OpDecorate %267 NonUniform
OpDecorate %279 NonUniform
OpDecorate %277 NonUniform
OpDecorate %280 NonUniform
OpDecorate %305 NonUniform
OpDecorate %323 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %9 2D 0 0 0 2 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %5 2
%27 = OpTypeRuntimeArray %26
%28 = OpTypeStruct %27
%29 = OpTypePointer StorageBuffer %28
%30 = OpVariable %29 StorageBuffer
%31 = OpTypeVector %5 4
%32 = OpTypeRuntimeArray %31
%33 = OpTypeStruct %32
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer StorageBuffer %34
%36 = OpVariable %35 StorageBuffer
%37 = OpTypeVector %9 4
%38 = OpConstant %5 4096
%39 = OpTypeArray %37 %38
%40 = OpTypeStruct %39
%41 = OpTypeRuntimeArray %40
%42 = OpTypePointer Uniform %41
%43 = OpVariable %42 Uniform
%44 = OpConstant %5 2
%45 = OpTypeArray %5 %44
%46 = OpTypeStruct %45
%47 = OpTypeRuntimeArray %46
%48 = OpTypeStruct %47
%49 = OpTypePointer StorageBuffer %48
%50 = OpVariable %49 StorageBuffer
%51 = OpTypeRuntimeArray %18
%52 = OpTypePointer UniformConstant %51
%53 = OpVariable %52 UniformConstant
%54 = OpTypeRuntimeArray %22
%55 = OpTypePointer UniformConstant %54
%56 = OpVariable %55 UniformConstant
%57 = OpTypeRuntimeArray %31
%58 = OpTypeStruct %57
%59 = OpTypeRuntimeArray %58
%60 = OpTypePointer StorageBuffer %59
%61 = OpVariable %60 StorageBuffer
%62 = OpTypeArray %37 %38
%63 = OpTypeStruct %62
%64 = OpTypeRuntimeArray %63
%65 = OpTypePointer Uniform %64
%66 = OpVariable %65 Uniform
%67 = OpTypeRuntimeArray %14
%68 = OpTypePointer UniformConstant %67
%69 = OpVariable %68 UniformConstant
%70 = OpTypeVector %5 3
%71 = OpTypePointer Input %70
%72 = OpVariable %71 Input
%73 = OpTypePointer Input %5
%75 = OpConstant %5 0
%78 = OpTypePointer UniformConstant %10
%80 = OpTypePointer PushConstant %5
%89 = OpTypePointer UniformConstant %14
%95 = OpTypeSampledImage %10
%97 = OpConstant %9 0.5
%98 = OpConstant %9 0
%100 = OpTypeVector %9 2
%106 = OpTypePointer StorageBuffer %58
%109 = OpConstant %5 4
%112 = OpConstant %5 100
%118 = OpTypePointer StorageBuffer %31
%120 = OpConstant %5 264
%141 = OpTypePointer UniformConstant %18
%144 = OpConstant %5 3
%153 = OpTypePointer Uniform %63
%156 = OpConstant %5 5
%161 = OpTypePointer Uniform %37
%169 = OpTypePointer UniformConstant %22
%216 = OpTypePointer StorageBuffer %26
%224 = OpTypeFunction %5 %26 %5 %5
%232 = OpTypeBool
%233 = OpTypeVector %232 2
%234 = OpConstantNull %26
%237 = OpTypePointer PhysicalStorageBuffer %5
%240 = OpConstant %5 1
%253 = OpConstant %5 200
%259 = OpConstant %5 300
%265 = OpConstant %5 400
%266 = OpTypePointer StorageBuffer %33
%275 = OpConstant %5 500
%276 = OpTypePointer Uniform %40
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %325
%325 = OpLabel
%74 = OpAccessChain %73 %72 %75
%76 = OpLoad %5 %74
%77 = OpIAdd %5 %76 %75
%81 = OpAccessChain %80 %8 %75
%82 = OpLoad %5 %81
%83 = OpIAdd %5 %82 %77
%84 = OpArrayLength %5 %50 0
%86 = OpExtInst %5 %85 UMin %83 %84
%79 = OpAccessChain %78 %13 %86
%87 = OpLoad %10 %79
%88 = OpIAdd %5 %76 %75
%91 = OpAccessChain %80 %8 %44
%92 = OpLoad %5 %91
%93 = OpIAdd %5 %92 %88
%90 = OpAccessChain %89 %69 %93
%94 = OpLoad %14 %90
%96 = OpSampledImage %95 %87 %94
%101 = OpCompositeConstruct %100 %97 %97
%99 = OpImageSampleExplicitLod %37 %96 %101 Lod %98
%102 = OpCompositeExtract %9 %99 0
%103 = OpCompositeExtract %9 %99 1
%104 = OpCompositeExtract %9 %99 2
%105 = OpCompositeExtract %9 %99 3
%108 = OpAccessChain %80 %8 %109
%110 = OpLoad %5 %108
%111 = OpIAdd %5 %110 %112
%107 = OpAccessChain %106 %61 %111
%113 = OpBitcast %5 %102
%114 = OpBitcast %5 %103
%115 = OpBitcast %5 %104
%116 = OpBitcast %5 %105
%117 = OpCompositeConstruct %31 %113 %114 %115 %116
%119 = OpAccessChain %118 %107 %75 %76
OpStore %119 %117
OpControlBarrier %44 %44 %120
%122 = OpAccessChain %80 %8 %75
%123 = OpLoad %5 %122
%124 = OpIAdd %5 %123 %77
%125 = OpArrayLength %5 %50 0
%126 = OpExtInst %5 %85 UMin %124 %125
%121 = OpAccessChain %78 %13 %126
%127 = OpLoad %10 %121
%129 = OpAccessChain %80 %8 %44
%130 = OpLoad %5 %129
%131 = OpIAdd %5 %130 %88
%128 = OpAccessChain %89 %69 %131
%132 = OpLoad %14 %128
%133 = OpSampledImage %95 %127 %132
%135 = OpCompositeConstruct %100 %97 %97
%134 = OpImageSampleExplicitLod %37 %133 %135 Lod %98
%136 = OpCompositeExtract %9 %134 0
%137 = OpCompositeExtract %9 %134 1
%138 = OpCompositeExtract %9 %134 2
%139 = OpCompositeExtract %9 %134 3
%140 = OpIAdd %5 %76 %75
%143 = OpAccessChain %80 %8 %144
%145 = OpLoad %5 %143
%146 = OpIAdd %5 %145 %140
%147 = OpArrayLength %5 %50 0
%148 = OpExtInst %5 %85 UMin %146 %147
%142 = OpAccessChain %141 %53 %148
%149 = OpLoad %18 %142
%150 = OpCompositeConstruct %26 %76 %75
%151 = OpCompositeConstruct %37 %136 %137 %138 %139
OpImageWrite %149 %150 %151
OpControlBarrier %44 %44 %120
%152 = OpIAdd %5 %76 %75
%155 = OpAccessChain %80 %8 %156
%157 = OpLoad %5 %155
%158 = OpIAdd %5 %157 %152
%159 = OpArrayLength %5 %50 0
%160 = OpExtInst %5 %85 UMin %158 %159
%154 = OpAccessChain %153 %66 %160
%162 = OpAccessChain %161 %154 %75 %75
%163 = OpLoad %37 %162
%164 = OpCompositeExtract %9 %163 0
%165 = OpCompositeExtract %9 %163 1
%166 = OpCompositeExtract %9 %163 2
%167 = OpCompositeExtract %9 %163 3
%168 = OpIAdd %5 %76 %75
%171 = OpAccessChain %80 %8 %109
%172 = OpLoad %5 %171
%173 = OpIAdd %5 %172 %168
%174 = OpArrayLength %5 %50 0
%175 = OpExtInst %5 %85 UMin %173 %174
%170 = OpAccessChain %169 %56 %175
%176 = OpLoad %22 %170
%177 = OpCompositeConstruct %37 %164 %165 %166 %167
OpImageWrite %176 %76 %177
OpControlBarrier %44 %44 %120
%179 = OpAccessChain %80 %8 %75
%180 = OpLoad %5 %179
%181 = OpIAdd %5 %180 %77
%182 = OpArrayLength %5 %50 0
%183 = OpExtInst %5 %85 UMin %181 %182
%178 = OpAccessChain %78 %13 %183
%184 = OpLoad %10 %178
%186 = OpAccessChain %80 %8 %44
%187 = OpLoad %5 %186
%188 = OpIAdd %5 %187 %88
%185 = OpAccessChain %89 %69 %188
%189 = OpLoad %14 %185
%190 = OpSampledImage %95 %184 %189
%192 = OpCompositeConstruct %100 %97 %97
%191 = OpImageSampleExplicitLod %37 %190 %192 Lod %98
%193 = OpCompositeExtract %9 %191 0
%194 = OpCompositeExtract %9 %191 1
%195 = OpCompositeExtract %9 %191 2
%196 = OpCompositeExtract %9 %191 3
%197 = OpIAdd %5 %76 %75
%199 = OpAccessChain %80 %8 %109
%200 = OpLoad %5 %199
%201 = OpIAdd %5 %200 %197
%202 = OpArrayLength %5 %50 0
%203 = OpExtInst %5 %85 UMin %201 %202
%198 = OpAccessChain %106 %61 %203
%204 = OpBitcast %5 %193
%205 = OpBitcast %5 %194
%206 = OpBitcast %5 %195
%207 = OpBitcast %5 %196
%208 = OpCompositeConstruct %31 %204 %205 %206 %207
%209 = OpAccessChain %118 %198 %75 %76
OpStore %209 %208
OpControlBarrier %44 %44 %120
%211 = OpAccessChain %80 %8 %109
%212 = OpLoad %5 %211
%213 = OpIAdd %5 %212 %197
%214 = OpArrayLength %5 %50 0
%215 = OpExtInst %5 %85 UMin %213 %214
%210 = OpAccessChain %106 %61 %215
%218 = OpAccessChain %80 %8 %109
%219 = OpLoad %5 %218
%220 = OpIAdd %5 %219 %197
%221 = OpArrayLength %5 %50 0
%222 = OpExtInst %5 %85 UMin %220 %221
%217 = OpAccessChain %216 %30 %75 %222
%223 = OpLoad %26 %217
%244 = OpFunctionCall %5 %228 %223 %240 %75
OpControlBarrier %44 %44 %120
%246 = OpArrayLength %5 %50 0
%247 = OpExtInst %5 %85 UMin %76 %246
%245 = OpAccessChain %78 %13 %247
%248 = OpLoad %10 %245
%249 = OpIAdd %5 %76 %112
%250 = OpAccessChain %89 %17 %249
%251 = OpLoad %14 %250
%252 = OpIAdd %5 %76 %253
%255 = OpArrayLength %5 %50 0
%256 = OpExtInst %5 %85 UMin %252 %255
%254 = OpAccessChain %141 %21 %256
%257 = OpLoad %18 %254
%258 = OpIAdd %5 %76 %259
%261 = OpArrayLength %5 %50 0
%262 = OpExtInst %5 %85 UMin %258 %261
%260 = OpAccessChain %169 %25 %262
%263 = OpLoad %22 %260
%264 = OpIAdd %5 %76 %265
%268 = OpArrayLength %5 %50 0
%269 = OpExtInst %5 %85 UMin %264 %268
%267 = OpAccessChain %266 %36 %269
%271 = OpArrayLength %5 %50 0
%272 = OpExtInst %5 %85 UMin %264 %271
%270 = OpAccessChain %216 %30 %75 %272
%273 = OpLoad %26 %270
%274 = OpIAdd %5 %76 %275
%278 = OpArrayLength %5 %50 0
%279 = OpExtInst %5 %85 UMin %274 %278
%277 = OpAccessChain %276 %43 %279
%280 = OpSampledImage %95 %248 %251
%282 = OpCompositeConstruct %100 %97 %97
%281 = OpImageSampleExplicitLod %37 %280 %282 Lod %98
%283 = OpCompositeExtract %9 %281 0
%284 = OpCompositeExtract %9 %281 1
%285 = OpCompositeExtract %9 %281 2
%286 = OpCompositeExtract %9 %281 3
%288 = OpAccessChain %80 %8 %109
%289 = OpLoad %5 %288
%290 = OpIAdd %5 %289 %112
%287 = OpAccessChain %106 %61 %290
%291 = OpBitcast %5 %283
%292 = OpBitcast %5 %284
%293 = OpBitcast %5 %285
%294 = OpBitcast %5 %286
%295 = OpCompositeConstruct %31 %291 %292 %293 %294
%296 = OpAccessChain %118 %287 %75 %76
OpStore %296 %295
OpControlBarrier %44 %44 %120
%298 = OpCompositeConstruct %100 %97 %97
%297 = OpImageSampleExplicitLod %37 %280 %298 Lod %98
%299 = OpCompositeExtract %9 %297 0
%300 = OpCompositeExtract %9 %297 1
%301 = OpCompositeExtract %9 %297 2
%302 = OpCompositeExtract %9 %297 3
%303 = OpCompositeConstruct %26 %76 %75
%304 = OpCompositeConstruct %37 %299 %300 %301 %302
OpImageWrite %257 %303 %304
OpControlBarrier %44 %44 %120
%305 = OpAccessChain %161 %277 %75 %75
%306 = OpLoad %37 %305
%307 = OpCompositeExtract %9 %306 0
%308 = OpCompositeExtract %9 %306 1
%309 = OpCompositeExtract %9 %306 2
%310 = OpCompositeExtract %9 %306 3
%311 = OpCompositeConstruct %37 %307 %308 %309 %310
OpImageWrite %263 %76 %311
OpControlBarrier %44 %44 %120
%313 = OpCompositeConstruct %100 %97 %97
%312 = OpImageSampleExplicitLod %37 %280 %313 Lod %98
%314 = OpCompositeExtract %9 %312 0
%315 = OpCompositeExtract %9 %312 1
%316 = OpCompositeExtract %9 %312 2
%317 = OpCompositeExtract %9 %312 3
%318 = OpBitcast %5 %314
%319 = OpBitcast %5 %315
%320 = OpBitcast %5 %316
%321 = OpBitcast %5 %317
%322 = OpCompositeConstruct %31 %318 %319 %320 %321
%323 = OpAccessChain %118 %267 %75 %76
OpStore %323 %322
OpControlBarrier %44 %44 %120
%324 = OpFunctionCall %5 %228 %273 %240 %75
OpControlBarrier %44 %44 %120
OpReturn
OpFunctionEnd
%228 = OpFunction %5 None %224
%225 = OpFunctionParameter %26
%226 = OpFunctionParameter %5
%227 = OpFunctionParameter %5
%229 = OpLabel
%235 = OpINotEqual %233 %225 %234
%236 = OpAny %232 %235
OpSelectionMerge %231 None
OpBranchConditional %236 %230 %231
%230 = OpLabel
%238 = OpBitcast %237 %225
%239 = OpAtomicIAdd %5 %238 %240 %75 %226
%241 = OpIAdd %5 %239 %227
OpBranch %231
%231 = OpLabel
%242 = OpPhi %5 %75 %229 %241 %230
OpReturnValue %242
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/heap-robustness/misc.bindless.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _30;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _36[];

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _43[];

layout(set = 4, binding = 0, std430) writeonly buffer _51_54
{
    uvec4 _m0[];
} _54[];

layout(set = 5, binding = 0, std140) uniform _56_59
{
    vec4 _m0[4096];
} _59[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 0, binding = 0) uniform sampler _17[];
layout(set = 0, binding = 0) uniform writeonly image2D _21[];
layout(set = 0, binding = 0) uniform writeonly imageBuffer _25[];
layout(set = 3, binding = 0) uniform writeonly image2D _46[];
layout(set = 4, binding = 0) uniform writeonly imageBuffer _49[];
layout(set = 2, binding = 0) uniform sampler _62[];

uint RobustPhysicalAtomicCounter(uvec2 _200, uint _201, uint _202)
{
    uint _217;
    if (any(notEqual(_200, uvec2(0u))))
    {
        uint _214 = atomicAdd(uintPointer(_200).value, _201);
        _217 = _214 + _202;
    }
    else
    {
        _217 = 0u;
    }
    return _217;
}

void main()
{
    uint _70 = gl_GlobalInvocationID.x + 0u;
    uint _78 = gl_GlobalInvocationID.x + 0u;
    vec4 _90 = textureLod(nonuniformEXT(sampler2D(_13[registers._m0 + _70], _62[registers._m2 + _78])), vec2(0.5), 0.0);
    _54[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_90.x), floatBitsToUint(_90.y), floatBitsToUint(_90.z), floatBitsToUint(_90.w));
    barrier();
    imageStore(_46[nonuniformEXT(registers._m3 + (gl_GlobalInvocationID.x + 0u))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[registers._m0 + _70], _62[registers._m2 + _78])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_49[nonuniformEXT(registers._m4 + (gl_GlobalInvocationID.x + 0u))], int(gl_GlobalInvocationID.x), vec4(_59[nonuniformEXT(registers._m5 + (gl_GlobalInvocationID.x + 0u))]._m0[0u]));
    barrier();
    vec4 _172 = textureLod(nonuniformEXT(sampler2D(_13[registers._m0 + _70], _62[registers._m2 + _78])), vec2(0.5), 0.0);
    uint _178 = gl_GlobalInvocationID.x + 0u;
    _54[nonuniformEXT(registers._m4 + _178)]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_172.x), floatBitsToUint(_172.y), floatBitsToUint(_172.z), floatBitsToUint(_172.w));
    barrier();
    uvec2 _198 = _30.counters[registers._m4 + _178];
    uint _219 = RobustPhysicalAtomicCounter(_198, 1u, 0u);
    barrier();
    uint _233 = gl_GlobalInvocationID.x + 400u;
    uvec2 _238 = _30.counters[_233];
    vec4 _244 = textureLod(nonuniformEXT(sampler2D(_13[gl_GlobalInvocationID.x], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _54[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_244.x), floatBitsToUint(_244.y), floatBitsToUint(_244.z), floatBitsToUint(_244.w));
    barrier();
    imageStore(_21[nonuniformEXT(gl_GlobalInvocationID.x + 200u)], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[gl_GlobalInvocationID.x], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_25[nonuniformEXT(gl_GlobalInvocationID.x + 300u)], int(gl_GlobalInvocationID.x), vec4(_43[nonuniformEXT(gl_GlobalInvocationID.x + 500u)]._m0[0u]));
    barrier();
    vec4 _275 = textureLod(nonuniformEXT(sampler2D(_13[gl_GlobalInvocationID.x], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _36[nonuniformEXT(_233)]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_275.x), floatBitsToUint(_275.y), floatBitsToUint(_275.z), floatBitsToUint(_275.w));
    barrier();
    uint _287 = RobustPhysicalAtomicCounter(_238, 1u, 0u);
    barrier();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 290
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %65
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %28 "AtomicCounters"
OpMemberName %28 0 "counters"
OpName %33 "SSBO"
OpName %40 "BindlessCBV"
OpName %51 "SSBO"
OpName %56 "BindlessCBV"
OpName %203 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %27 ArrayStride 8
OpDecorate %28 Block
OpMemberDecorate %28 0 Offset 0
OpMemberDecorate %28 0 NonWritable
OpDecorate %30 DescriptorSet 7
OpDecorate %30 Binding 0
OpDecorate %30 AliasedPointer
OpDecorate %32 ArrayStride 16
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 0
OpDecorate %39 ArrayStride 16
OpDecorate %40 Block
OpMemberDecorate %40 0 Offset 0
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 0
OpDecorate %46 DescriptorSet 3
OpDecorate %46 Binding 0
OpDecorate %46 NonReadable
OpDecorate %49 DescriptorSet 4
OpDecorate %49 Binding 0
OpDecorate %49 NonReadable
OpDecorate %50 ArrayStride 16
OpMemberDecorate %51 0 Offset 0
OpDecorate %51 Block
OpDecorate %54 DescriptorSet 4
OpDecorate %54 Binding 0
OpDecorate %54 NonReadable
OpDecorate %55 ArrayStride 16
OpDecorate %56 Block
OpMemberDecorate %56 0 Offset 0
OpDecorate %59 DescriptorSet 5
OpDecorate %59 Binding 0
OpDecorate %62 DescriptorSet 2
OpDecorate %62 Binding 0
OpDecorate %65 BuiltIn GlobalInvocationId
OpDecorate %76 NonUniform
OpDecorate %77 NonUniform
OpDecorate %84 NonUniform
OpDecorate %85 NonUniform
OpDecorate %87 NonUniform
OpDecorate %115 NonUniform
OpDecorate %116 NonUniform
OpDecorate %120 NonUniform
OpDecorate %121 NonUniform
OpDecorate %122 NonUniform
OpDecorate %135 NonUniform
OpDecorate %136 NonUniform
OpDecorate %145 NonUniform
OpDecorate %141 NonUniform
OpDecorate %147 NonUniform
OpDecorate %158 NonUniform
OpDecorate %159 NonUniform
OpDecorate %164 NonUniform
OpDecorate %165 NonUniform
OpDecorate %169 NonUniform
OpDecorate %170 NonUniform
OpDecorate %171 NonUniform
OpDecorate %182 NonUniform
OpDecorate %179 NonUniform
OpDecorate %188 NonUniform
OpDecorate %192 NonUniform
OpDecorate %189 NonUniform
OpDecorate %69 NonUniform
OpDecorate %221 NonUniform
OpDecorate %222 NonUniform
OpDecorate %224 NonUniform
OpDecorate %225 NonUniform
OpDecorate %228 NonUniform
OpDecorate %229 NonUniform
OpDecorate %232 NonUniform
OpDecorate %233 NonUniform
OpDecorate %236 NonUniform
OpDecorate %239 NonUniform
OpDecorate %242 NonUniform
OpDecorate %243 NonUniform
OpDecorate %268 NonUniform
OpDecorate %286 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %9 2D 0 0 0 2 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %5 2
%27 = OpTypeRuntimeArray %26
%28 = OpTypeStruct %27
%29 = OpTypePointer StorageBuffer %28
%30 = OpVariable %29 StorageBuffer
%31 = OpTypeVector %5 4
%32 = OpTypeRuntimeArray %31
%33 = OpTypeStruct %32
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer StorageBuffer %34
%36 = OpVariable %35 StorageBuffer
%37 = OpTypeVector %9 4
%38 = OpConstant %5 4096
%39 = OpTypeArray %37 %38
%40 = OpTypeStruct %39
%41 = OpTypeRuntimeArray %40
%42 = OpTypePointer Uniform %41
%43 = OpVariable %42 Uniform
%44 = OpTypeRuntimeArray %18
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeRuntimeArray %22
%48 = OpTypePointer UniformConstant %47
%49 = OpVariable %48 UniformConstant
%50 = OpTypeRuntimeArray %31
%51 = OpTypeStruct %50
%52 = OpTypeRuntimeArray %51
%53 = OpTypePointer StorageBuffer %52
%54 = OpVariable %53 StorageBuffer
%55 = OpTypeArray %37 %38
%56 = OpTypeStruct %55
%57 = OpTypeRuntimeArray %56
%58 = OpTypePointer Uniform %57
%59 = OpVariable %58 Uniform
%60 = OpTypeRuntimeArray %14
%61 = OpTypePointer UniformConstant %60
%62 = OpVariable %61 UniformConstant
%63 = OpTypeVector %5 3
%64 = OpTypePointer Input %63
%65 = OpVariable %64 Input
%66 = OpTypePointer Input %5
%68 = OpConstant %5 0
%71 = OpTypePointer UniformConstant %10
%73 = OpTypePointer PushConstant %5
%79 = OpTypePointer UniformConstant %14
%82 = OpConstant %5 2
%86 = OpTypeSampledImage %10
%88 = OpConstant %9 0.5
%89 = OpConstant %9 0
%91 = OpTypeVector %9 2
%97 = OpTypePointer StorageBuffer %51
%100 = OpConstant %5 4
%103 = OpConstant %5 100
%109 = OpTypePointer StorageBuffer %31
%111 = OpConstant %5 264
%130 = OpTypePointer UniformConstant %18
%133 = OpConstant %5 3
%140 = OpTypePointer Uniform %56
%143 = OpConstant %5 5
%146 = OpTypePointer Uniform %37
%154 = OpTypePointer UniformConstant %22
%193 = OpTypePointer StorageBuffer %26
%199 = OpTypeFunction %5 %26 %5 %5
%207 = OpTypeBool
%208 = OpTypeVector %207 2
%209 = OpConstantNull %26
%212 = OpTypePointer PhysicalStorageBuffer %5
%215 = OpConstant %5 1
%226 = OpConstant %5 200
%230 = OpConstant %5 300
%234 = OpConstant %5 400
%235 = OpTypePointer StorageBuffer %33
%240 = OpConstant %5 500
%241 = OpTypePointer Uniform %40
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %288
%288 = OpLabel
%67 = OpAccessChain %66 %65 %68
%69 = OpLoad %5 %67
%70 = OpIAdd %5 %69 %68
%74 = OpAccessChain %73 %8 %68
%75 = OpLoad %5 %74
%76 = OpIAdd %5 %75 %70
%72 = OpAccessChain %71 %13 %76
%77 = OpLoad %10 %72
%78 = OpIAdd %5 %69 %68
%81 = OpAccessChain %73 %8 %82
%83 = OpLoad %5 %81
%84 = OpIAdd %5 %83 %78
%80 = OpAccessChain %79 %62 %84
%85 = OpLoad %14 %80
%87 = OpSampledImage %86 %77 %85
%92 = OpCompositeConstruct %91 %88 %88
%90 = OpImageSampleExplicitLod %37 %87 %92 Lod %89
%93 = OpCompositeExtract %9 %90 0
%94 = OpCompositeExtract %9 %90 1
%95 = OpCompositeExtract %9 %90 2
%96 = OpCompositeExtract %9 %90 3
%99 = OpAccessChain %73 %8 %100
%101 = OpLoad %5 %99
%102 = OpIAdd %5 %101 %103
%98 = OpAccessChain %97 %54 %102
%104 = OpBitcast %5 %93
%105 = OpBitcast %5 %94
%106 = OpBitcast %5 %95
%107 = OpBitcast %5 %96
%108 = OpCompositeConstruct %31 %104 %105 %106 %107
%110 = OpAccessChain %109 %98 %68 %69
OpStore %110 %108
OpControlBarrier %82 %82 %111
%113 = OpAccessChain %73 %8 %68
%114 = OpLoad %5 %113
%115 = OpIAdd %5 %114 %70
%112 = OpAccessChain %71 %13 %115
%116 = OpLoad %10 %112
%118 = OpAccessChain %73 %8 %82
%119 = OpLoad %5 %118
%120 = OpIAdd %5 %119 %78
%117 = OpAccessChain %79 %62 %120
%121 = OpLoad %14 %117
%122 = OpSampledImage %86 %116 %121
%124 = OpCompositeConstruct %91 %88 %88
%123 = OpImageSampleExplicitLod %37 %122 %124 Lod %89
%125 = OpCompositeExtract %9 %123 0
%126 = OpCompositeExtract %9 %123 1
%127 = OpCompositeExtract %9 %123 2
%128 = OpCompositeExtract %9 %123 3
%129 = OpIAdd %5 %69 %68
%132 = OpAccessChain %73 %8 %133
%134 = OpLoad %5 %132
%135 = OpIAdd %5 %134 %129
%131 = OpAccessChain %130 %46 %135
%136 = OpLoad %18 %131
%137 = OpCompositeConstruct %26 %69 %68
%138 = OpCompositeConstruct %37 %125 %126 %127 %128
OpImageWrite %136 %137 %138
OpControlBarrier %82 %82 %111
%139 = OpIAdd %5 %69 %68
%142 = OpAccessChain %73 %8 %143
%144 = OpLoad %5 %142
%145 = OpIAdd %5 %144 %139
%141 = OpAccessChain %140 %59 %145
%147 = OpAccessChain %146 %141 %68 %68
%148 = OpLoad %37 %147
%149 = OpCompositeExtract %9 %148 0
%150 = OpCompositeExtract %9 %148 1
%151 = OpCompositeExtract %9 %148 2
%152 = OpCompositeExtract %9 %148 3
%153 = OpIAdd %5 %69 %68
%156 = OpAccessChain %73 %8 %100
%157 = OpLoad %5 %156
%158 = OpIAdd %5 %157 %153
%155 = OpAccessChain %154 %49 %158
%159 = OpLoad %22 %155
%160 = OpCompositeConstruct %37 %149 %150 %151 %152
OpImageWrite %159 %69 %160
OpControlBarrier %82 %82 %111
%162 = OpAccessChain %73 %8 %68
%163 = OpLoad %5 %162
%164 = OpIAdd %5 %163 %70
%161 = OpAccessChain %71 %13 %164
%165 = OpLoad %10 %161
%167 = OpAccessChain %73 %8 %82
%168 = OpLoad %5 %167
%169 = OpIAdd %5 %168 %78
%166 = OpAccessChain %79 %62 %169
%170 = OpLoad %14 %166
%171 = OpSampledImage %86 %165 %170
%173 = OpCompositeConstruct %91 %88 %88
%172 = OpImageSampleExplicitLod %37 %171 %173 Lod %89
%174 = OpCompositeExtract %9 %172 0
%175 = OpCompositeExtract %9 %172 1
%176 = OpCompositeExtract %9 %172 2
%177 = OpCompositeExtract %9 %172 3
%178 = OpIAdd %5 %69 %68
%180 = OpAccessChain %73 %8 %100
%181 = OpLoad %5 %180
%182 = OpIAdd %5 %181 %178
%179 = OpAccessChain %97 %54 %182
%183 = OpBitcast %5 %174
%184 = OpBitcast %5 %175
%185 = OpBitcast %5 %176
%186 = OpBitcast %5 %177
%187 = OpCompositeConstruct %31 %183 %184 %185 %186
%188 = OpAccessChain %109 %179 %68 %69
OpStore %188 %187
OpControlBarrier %82 %82 %111
%190 = OpAccessChain %73 %8 %100
%191 = OpLoad %5 %190
%192 = OpIAdd %5 %191 %178
%189 = OpAccessChain %97 %54 %192
%195 = OpAccessChain %73 %8 %100
%196 = OpLoad %5 %195
%197 = OpIAdd %5 %196 %178
%194 = OpAccessChain %193 %30 %68 %197
%198 = OpLoad %26 %194
%219 = OpFunctionCall %5 %203 %198 %215 %68
OpControlBarrier %82 %82 %111
%220 = OpAccessChain %71 %13 %69
%221 = OpLoad %10 %220
%222 = OpIAdd %5 %69 %103
%223 = OpAccessChain %79 %17 %222
%224 = OpLoad %14 %223
%225 = OpIAdd %5 %69 %226
%227 = OpAccessChain %130 %21 %225
%228 = OpLoad %18 %227
%229 = OpIAdd %5 %69 %230
%231 = OpAccessChain %154 %25 %229
%232 = OpLoad %22 %231
%233 = OpIAdd %5 %69 %234
%236 = OpAccessChain %235 %36 %233
%237 = OpAccessChain %193 %30 %68 %233
%238 = OpLoad %26 %237
%239 = OpIAdd %5 %69 %240
%242 = OpAccessChain %241 %43 %239
%243 = OpSampledImage %86 %221 %224
%245 = OpCompositeConstruct %91 %88 %88
%244 = OpImageSampleExplicitLod %37 %243 %245 Lod %89
%246 = OpCompositeExtract %9 %244 0
%247 = OpCompositeExtract %9 %244 1
%248 = OpCompositeExtract %9 %244 2
%249 = OpCompositeExtract %9 %244 3
%251 = OpAccessChain %73 %8 %100
%252 = OpLoad %5 %251
%253 = OpIAdd %5 %252 %103
%250 = OpAccessChain %97 %54 %253
%254 = OpBitcast %5 %246
%255 = OpBitcast %5 %247
%256 = OpBitcast %5 %248
%257 = OpBitcast %5 %249
%258 = OpCompositeConstruct %31 %254 %255 %256 %257
%259 = OpAccessChain %109 %250 %68 %69
OpStore %259 %258
OpControlBarrier %82 %82 %111
%261 = OpCompositeConstruct %91 %88 %88
%260 = OpImageSampleExplicitLod %37 %243 %261 Lod %89
%262 = OpCompositeExtract %9 %260 0
%263 = OpCompositeExtract %9 %260 1
%264 = OpCompositeExtract %9 %260 2
%265 = OpCompositeExtract %9 %260 3
%266 = OpCompositeConstruct %26 %69 %68
%267 = OpCompositeConstruct %37 %262 %263 %264 %265
OpImageWrite %228 %266 %267
OpControlBarrier %82 %82 %111
%268 = OpAccessChain %146 %242 %68 %68
%269 = OpLoad %37 %268
%270 = OpCompositeExtract %9 %269 0
%271 = OpCompositeExtract %9 %269 1
%272 = OpCompositeExtract %9 %269 2
%273 = OpCompositeExtract %9 %269 3
%274 = OpCompositeConstruct %37 %270 %271 %272 %273
OpImageWrite %232 %69 %274
OpControlBarrier %82 %82 %111
%276 = OpCompositeConstruct %91 %88 %88
%275 = OpImageSampleExplicitLod %37 %243 %276 Lod %89
%277 = OpCompositeExtract %9 %275 0
%278 = OpCompositeExtract %9 %275 1
%279 = OpCompositeExtract %9 %275 2
%280 = OpCompositeExtract %9 %275 3
%281 = OpBitcast %5 %277
%282 = OpBitcast %5 %278
%283 = OpBitcast %5 %279
%284 = OpBitcast %5 %280
%285 = OpCompositeConstruct %31 %281 %282 %283 %284
%286 = OpAccessChain %109 %236 %68 %69
OpStore %286 %285
OpControlBarrier %82 %82 %111
%287 = OpFunctionCall %5 %203 %238 %215 %68
OpControlBarrier %82 %82 %111
OpReturn
OpFunctionEnd
%203 = OpFunction %5 None %199
%200 = OpFunctionParameter %26
%201 = OpFunctionParameter %5
%202 = OpFunctionParameter %5
%204 = OpLabel
%210 = OpINotEqual %208 %200 %209
%211 = OpAny %207 %210
OpSelectionMerge %206 None
OpBranchConditional %211 %205 %206
%205 = OpLabel
%213 = OpBitcast %212 %200
%214 = OpAtomicIAdd %5 %213 %215 %68 %201
%216 = OpIAdd %5 %214 %202
OpBranch %206
%206 = OpLabel
%217 = OpPhi %5 %68 %204 %216 %205
OpReturnValue %217
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/heap-robustness/misc.heap-robustness.bindless.heap-robustness-cbv.heap-raw-va-cbv.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer DescriptorHeapRawBlock;
layout(buffer_reference) buffer uintPointer;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(buffer_reference, buffer_reference_align = 8, std430) readonly buffer DescriptorHeapRawBlock
{
    DescriptorHeapRawPayload descriptors[];
};

layout(set = 10, binding = 21, std140) uniform DescriptorHeapRawPayloadPtr
{
    DescriptorHeapRawBlock ptr;
} DescriptorHeapRaw;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _41[];

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _48[];

layout(set = 10, binding = 20, std140) uniform DescriptorHeapSizeUBO
{
    uint count;
} DescriptorHeapSize;

layout(set = 4, binding = 0, std430) writeonly buffer _59_62
{
    uvec4 _m0[];
} _62[];

layout(set = 5, binding = 0, std140) uniform _64_67
{
    vec4 _m0[4096];
} _67[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 0, binding = 0) uniform sampler _17[];
layout(set = 0, binding = 0) uniform writeonly image2D _21[];
layout(set = 0, binding = 0) uniform writeonly imageBuffer _25[];
layout(set = 3, binding = 0) uniform writeonly image2D _54[];
layout(set = 4, binding = 0) uniform writeonly imageBuffer _57[];
layout(set = 2, binding = 0) uniform sampler _70[];

uint RobustPhysicalAtomicCounter(uvec2 _240, uint _241, uint _242)
{
    uint _256;
    if (any(notEqual(_240, uvec2(0u))))
    {
        uint _254 = atomicAdd(uintPointer(_240).value, _241);
        _256 = _254 + _242;
    }
    else
    {
        _256 = 0u;
    }
    return _256;
}

void main()
{
    uint _78 = gl_GlobalInvocationID.x + 0u;
    uint _91 = gl_GlobalInvocationID.x + 0u;
    vec4 _103 = textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _78), DescriptorHeapSize.count)], _70[registers._m2 + _91])), vec2(0.5), 0.0);
    _62[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_103.x), floatBitsToUint(_103.y), floatBitsToUint(_103.z), floatBitsToUint(_103.w));
    barrier();
    imageStore(_54[nonuniformEXT(min((registers._m3 + (gl_GlobalInvocationID.x + 0u)), DescriptorHeapSize.count))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _78), DescriptorHeapSize.count)], _70[registers._m2 + _91])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_57[nonuniformEXT(min((registers._m4 + (gl_GlobalInvocationID.x + 0u)), DescriptorHeapSize.count))], int(gl_GlobalInvocationID.x), vec4(_67[nonuniformEXT(min((registers._m5 + (gl_GlobalInvocationID.x + 0u)), DescriptorHeapSize.count))]._m0[0u]));
    barrier();
    vec4 _200 = textureLod(nonuniformEXT(sampler2D(_13[min((registers._m0 + _78), DescriptorHeapSize.count)], _70[registers._m2 + _91])), vec2(0.5), 0.0);
    uint _206 = gl_GlobalInvocationID.x + 0u;
    _62[nonuniformEXT(min((registers._m4 + _206), DescriptorHeapSize.count))]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_200.x), floatBitsToUint(_200.y), floatBitsToUint(_200.z), floatBitsToUint(_200.w));
    barrier();
    uint _258 = RobustPhysicalAtomicCounter(DescriptorHeapRaw.ptr.descriptors[min((registers._m4 + _206), DescriptorHeapSize.count)]._m0[0u], 1u, 0u);
    barrier();
    uint _281 = gl_GlobalInvocationID.x + 400u;
    vec4 _303 = textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, DescriptorHeapSize.count)], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _62[registers._m4 + 100u]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_303.x), floatBitsToUint(_303.y), floatBitsToUint(_303.z), floatBitsToUint(_303.w));
    barrier();
    imageStore(_21[nonuniformEXT(min((gl_GlobalInvocationID.x + 200u), DescriptorHeapSize.count))], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, DescriptorHeapSize.count)], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0)));
    barrier();
    imageStore(_25[nonuniformEXT(min((gl_GlobalInvocationID.x + 300u), DescriptorHeapSize.count))], int(gl_GlobalInvocationID.x), vec4(_48[nonuniformEXT(min((gl_GlobalInvocationID.x + 500u), DescriptorHeapSize.count))]._m0[0u]));
    barrier();
    vec4 _334 = textureLod(nonuniformEXT(sampler2D(_13[min(gl_GlobalInvocationID.x, DescriptorHeapSize.count)], _17[gl_GlobalInvocationID.x + 100u])), vec2(0.5), 0.0);
    _41[nonuniformEXT(min(_281, DescriptorHeapSize.count))]._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_334.x), floatBitsToUint(_334.y), floatBitsToUint(_334.z), floatBitsToUint(_334.w));
    barrier();
    uint _346 = RobustPhysicalAtomicCounter(DescriptorHeapRaw.ptr.descriptors[min(_281, DescriptorHeapSize.count)]._m0[0u], 1u, 0u);
    barrier();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 349
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
%88 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %73
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %29 "DescriptorHeapRawPayload"
OpName %31 "DescriptorHeapRawBlock"
OpMemberName %31 0 "descriptors"
OpName %33 "DescriptorHeapRawPayloadPtr"
OpMemberName %33 0 "ptr"
OpName %35 "DescriptorHeapRaw"
OpName %38 "SSBO"
OpName %45 "BindlessCBV"
OpName %49 "DescriptorHeapSizeUBO"
OpMemberName %49 0 "count"
OpName %51 "DescriptorHeapSize"
OpName %59 "SSBO"
OpName %64 "BindlessCBV"
OpName %243 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonReadable
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %28 ArrayStride 8
OpMemberDecorate %29 0 Offset 0
OpDecorate %30 ArrayStride 8
OpDecorate %31 Block
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 0 NonWritable
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %35 DescriptorSet 10
OpDecorate %35 Binding 21
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %41 DescriptorSet 0
OpDecorate %41 Binding 0
OpDecorate %44 ArrayStride 16
OpDecorate %45 Block
OpMemberDecorate %45 0 Offset 0
OpDecorate %48 DescriptorSet 0
OpDecorate %48 Binding 0
OpDecorate %49 Block
OpMemberDecorate %49 0 Offset 0
OpDecorate %51 DescriptorSet 10
OpDecorate %51 Binding 20
OpDecorate %54 DescriptorSet 3
OpDecorate %54 Binding 0
OpDecorate %54 NonReadable
OpDecorate %57 DescriptorSet 4
OpDecorate %57 Binding 0
OpDecorate %57 NonReadable
OpDecorate %58 ArrayStride 16
OpMemberDecorate %59 0 Offset 0
OpDecorate %59 Block
OpDecorate %62 DescriptorSet 4
OpDecorate %62 Binding 0
OpDecorate %62 NonReadable
OpDecorate %63 ArrayStride 16
OpDecorate %64 Block
OpMemberDecorate %64 0 Offset 0
OpDecorate %67 DescriptorSet 5
OpDecorate %67 Binding 0
OpDecorate %70 DescriptorSet 2
OpDecorate %70 Binding 0
OpDecorate %73 BuiltIn GlobalInvocationId
OpDecorate %89 NonUniform
OpDecorate %90 NonUniform
OpDecorate %97 NonUniform
OpDecorate %98 NonUniform
OpDecorate %100 NonUniform
OpDecorate %131 NonUniform
OpDecorate %132 NonUniform
OpDecorate %136 NonUniform
OpDecorate %137 NonUniform
OpDecorate %138 NonUniform
OpDecorate %154 NonUniform
OpDecorate %155 NonUniform
OpDecorate %167 NonUniform
OpDecorate %160 NonUniform
OpDecorate %169 NonUniform
OpDecorate %183 NonUniform
OpDecorate %184 NonUniform
OpDecorate %192 NonUniform
OpDecorate %193 NonUniform
OpDecorate %197 NonUniform
OpDecorate %198 NonUniform
OpDecorate %199 NonUniform
OpDecorate %213 NonUniform
OpDecorate %207 NonUniform
OpDecorate %219 NonUniform
OpDecorate %226 NonUniform
OpDecorate %220 NonUniform
OpDecorate %262 NonUniform
OpDecorate %263 NonUniform
OpDecorate %264 NonUniform
OpDecorate %266 NonUniform
OpDecorate %272 NonUniform
OpDecorate %273 NonUniform
OpDecorate %279 NonUniform
OpDecorate %280 NonUniform
OpDecorate %287 NonUniform
OpDecorate %284 NonUniform
OpDecorate %301 NonUniform
OpDecorate %298 NonUniform
OpDecorate %302 NonUniform
OpDecorate %327 NonUniform
OpDecorate %345 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %9 2D 0 0 0 2 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %5 2
%27 = OpConstant %5 1
%28 = OpTypeArray %26 %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypeStruct %30
%32 = OpTypePointer PhysicalStorageBuffer %31
%33 = OpTypeStruct %32
%34 = OpTypePointer Uniform %33
%35 = OpVariable %34 Uniform
%36 = OpTypeVector %5 4
%37 = OpTypeRuntimeArray %36
%38 = OpTypeStruct %37
%39 = OpTypeRuntimeArray %38
%40 = OpTypePointer StorageBuffer %39
%41 = OpVariable %40 StorageBuffer
%42 = OpTypeVector %9 4
%43 = OpConstant %5 4096
%44 = OpTypeArray %42 %43
%45 = OpTypeStruct %44
%46 = OpTypeRuntimeArray %45
%47 = OpTypePointer Uniform %46
%48 = OpVariable %47 Uniform
%49 = OpTypeStruct %5
%50 = OpTypePointer Uniform %49
%51 = OpVariable %50 Uniform
%52 = OpTypeRuntimeArray %18
%53 = OpTypePointer UniformConstant %52
%54 = OpVariable %53 UniformConstant
%55 = OpTypeRuntimeArray %22
%56 = OpTypePointer UniformConstant %55
%57 = OpVariable %56 UniformConstant
%58 = OpTypeRuntimeArray %36
%59 = OpTypeStruct %58
%60 = OpTypeRuntimeArray %59
%61 = OpTypePointer StorageBuffer %60
%62 = OpVariable %61 StorageBuffer
%63 = OpTypeArray %42 %43
%64 = OpTypeStruct %63
%65 = OpTypeRuntimeArray %64
%66 = OpTypePointer Uniform %65
%67 = OpVariable %66 Uniform
%68 = OpTypeRuntimeArray %14
%69 = OpTypePointer UniformConstant %68
%70 = OpVariable %69 UniformConstant
%71 = OpTypeVector %5 3
%72 = OpTypePointer Input %71
%73 = OpVariable %72 Input
%74 = OpTypePointer Input %5
%76 = OpConstant %5 0
%79 = OpTypePointer UniformConstant %10
%81 = OpTypePointer PushConstant %5
%85 = OpTypePointer Uniform %5
%92 = OpTypePointer UniformConstant %14
%95 = OpConstant %5 2
%99 = OpTypeSampledImage %10
%101 = OpConstant %9 0.5
%102 = OpConstant %9 0
%104 = OpTypeVector %9 2
%110 = OpTypePointer StorageBuffer %59
%113 = OpConstant %5 4
%116 = OpConstant %5 100
%122 = OpTypePointer StorageBuffer %36
%124 = OpConstant %5 264
%146 = OpTypePointer UniformConstant %18
%149 = OpConstant %5 3
%159 = OpTypePointer Uniform %64
%162 = OpConstant %5 5
%168 = OpTypePointer Uniform %42
%176 = OpTypePointer UniformConstant %22
%227 = OpTypePointer Uniform %32
%230 = OpTypePointer PhysicalStorageBuffer %26
%239 = OpTypeFunction %5 %26 %5 %5
%247 = OpTypeBool
%248 = OpTypeVector %247 2
%249 = OpConstantNull %26
%252 = OpTypePointer PhysicalStorageBuffer %5
%268 = OpConstant %5 200
%275 = OpConstant %5 300
%282 = OpConstant %5 400
%283 = OpTypePointer StorageBuffer %38
%296 = OpConstant %5 500
%297 = OpTypePointer Uniform %45
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %347
%347 = OpLabel
%75 = OpAccessChain %74 %73 %76
%77 = OpLoad %5 %75
%78 = OpIAdd %5 %77 %76
%82 = OpAccessChain %81 %8 %76
%83 = OpLoad %5 %82
%84 = OpIAdd %5 %83 %78
%86 = OpAccessChain %85 %51 %76
%87 = OpLoad %5 %86
%89 = OpExtInst %5 %88 UMin %84 %87
%80 = OpAccessChain %79 %13 %89
%90 = OpLoad %10 %80
%91 = OpIAdd %5 %77 %76
%94 = OpAccessChain %81 %8 %95
%96 = OpLoad %5 %94
%97 = OpIAdd %5 %96 %91
%93 = OpAccessChain %92 %70 %97
%98 = OpLoad %14 %93
%100 = OpSampledImage %99 %90 %98
%105 = OpCompositeConstruct %104 %101 %101
%103 = OpImageSampleExplicitLod %42 %100 %105 Lod %102
%106 = OpCompositeExtract %9 %103 0
%107 = OpCompositeExtract %9 %103 1
%108 = OpCompositeExtract %9 %103 2
%109 = OpCompositeExtract %9 %103 3
%112 = OpAccessChain %81 %8 %113
%114 = OpLoad %5 %112
%115 = OpIAdd %5 %114 %116
%111 = OpAccessChain %110 %62 %115
%117 = OpBitcast %5 %106
%118 = OpBitcast %5 %107
%119 = OpBitcast %5 %108
%120 = OpBitcast %5 %109
%121 = OpCompositeConstruct %36 %117 %118 %119 %120
%123 = OpAccessChain %122 %111 %76 %77
OpStore %123 %121
OpControlBarrier %95 %95 %124
%126 = OpAccessChain %81 %8 %76
%127 = OpLoad %5 %126
%128 = OpIAdd %5 %127 %78
%129 = OpAccessChain %85 %51 %76
%130 = OpLoad %5 %129
%131 = OpExtInst %5 %88 UMin %128 %130
%125 = OpAccessChain %79 %13 %131
%132 = OpLoad %10 %125
%134 = OpAccessChain %81 %8 %95
%135 = OpLoad %5 %134
%136 = OpIAdd %5 %135 %91
%133 = OpAccessChain %92 %70 %136
%137 = OpLoad %14 %133
%138 = OpSampledImage %99 %132 %137
%140 = OpCompositeConstruct %104 %101 %101
%139 = OpImageSampleExplicitLod %42 %138 %140 Lod %102
%141 = OpCompositeExtract %9 %139 0
%142 = OpCompositeExtract %9 %139 1
%143 = OpCompositeExtract %9 %139 2
%144 = OpCompositeExtract %9 %139 3
%145 = OpIAdd %5 %77 %76
%148 = OpAccessChain %81 %8 %149
%150 = OpLoad %5 %148
%151 = OpIAdd %5 %150 %145
%152 = OpAccessChain %85 %51 %76
%153 = OpLoad %5 %152
%154 = OpExtInst %5 %88 UMin %151 %153
%147 = OpAccessChain %146 %54 %154
%155 = OpLoad %18 %147
%156 = OpCompositeConstruct %26 %77 %76
%157 = OpCompositeConstruct %42 %141 %142 %143 %144
OpImageWrite %155 %156 %157
OpControlBarrier %95 %95 %124
%158 = OpIAdd %5 %77 %76
%161 = OpAccessChain %81 %8 %162
%163 = OpLoad %5 %161
%164 = OpIAdd %5 %163 %158
%165 = OpAccessChain %85 %51 %76
%166 = OpLoad %5 %165
%167 = OpExtInst %5 %88 UMin %164 %166
%160 = OpAccessChain %159 %67 %167
%169 = OpAccessChain %168 %160 %76 %76
%170 = OpLoad %42 %169
%171 = OpCompositeExtract %9 %170 0
%172 = OpCompositeExtract %9 %170 1
%173 = OpCompositeExtract %9 %170 2
%174 = OpCompositeExtract %9 %170 3
%175 = OpIAdd %5 %77 %76
%178 = OpAccessChain %81 %8 %113
%179 = OpLoad %5 %178
%180 = OpIAdd %5 %179 %175
%181 = OpAccessChain %85 %51 %76
%182 = OpLoad %5 %181
%183 = OpExtInst %5 %88 UMin %180 %182
%177 = OpAccessChain %176 %57 %183
%184 = OpLoad %22 %177
%185 = OpCompositeConstruct %42 %171 %172 %173 %174
OpImageWrite %184 %77 %185
OpControlBarrier %95 %95 %124
%187 = OpAccessChain %81 %8 %76
%188 = OpLoad %5 %187
%189 = OpIAdd %5 %188 %78
%190 = OpAccessChain %85 %51 %76
%191 = OpLoad %5 %190
%192 = OpExtInst %5 %88 UMin %189 %191
%186 = OpAccessChain %79 %13 %192
%193 = OpLoad %10 %186
%195 = OpAccessChain %81 %8 %95
%196 = OpLoad %5 %195
%197 = OpIAdd %5 %196 %91
%194 = OpAccessChain %92 %70 %197
%198 = OpLoad %14 %194
%199 = OpSampledImage %99 %193 %198
%201 = OpCompositeConstruct %104 %101 %101
%200 = OpImageSampleExplicitLod %42 %199 %201 Lod %102
%202 = OpCompositeExtract %9 %200 0
%203 = OpCompositeExtract %9 %200 1
%204 = OpCompositeExtract %9 %200 2
%205 = OpCompositeExtract %9 %200 3
%206 = OpIAdd %5 %77 %76
%208 = OpAccessChain %81 %8 %113
%209 = OpLoad %5 %208
%210 = OpIAdd %5 %209 %206
%211 = OpAccessChain %85 %51 %76
%212 = OpLoad %5 %211
%213 = OpExtInst %5 %88 UMin %210 %212
%207 = OpAccessChain %110 %62 %213
%214 = OpBitcast %5 %202
%215 = OpBitcast %5 %203
%216 = OpBitcast %5 %204
%217 = OpBitcast %5 %205
%218 = OpCompositeConstruct %36 %214 %215 %216 %217
%219 = OpAccessChain %122 %207 %76 %77
OpStore %219 %218
OpControlBarrier %95 %95 %124
%221 = OpAccessChain %81 %8 %113
%222 = OpLoad %5 %221
%223 = OpIAdd %5 %222 %206
%224 = OpAccessChain %85 %51 %76
%225 = OpLoad %5 %224
%226 = OpExtInst %5 %88 UMin %223 %225
%220 = OpAccessChain %110 %62 %226
%228 = OpAccessChain %227 %35 %76
%229 = OpLoad %32 %228
%232 = OpAccessChain %81 %8 %113
%233 = OpLoad %5 %232
%234 = OpIAdd %5 %233 %206
%235 = OpAccessChain %85 %51 %76
%236 = OpLoad %5 %235
%237 = OpExtInst %5 %88 UMin %234 %236
%231 = OpInBoundsAccessChain %230 %229 %76 %237 %76 %76
%238 = OpLoad %26 %231 Aligned 8
%258 = OpFunctionCall %5 %243 %238 %27 %76
OpControlBarrier %95 %95 %124
%260 = OpAccessChain %85 %51 %76
%261 = OpLoad %5 %260
%262 = OpExtInst %5 %88 UMin %77 %261
%259 = OpAccessChain %79 %13 %262
%263 = OpLoad %10 %259
%264 = OpIAdd %5 %77 %116
%265 = OpAccessChain %92 %17 %264
%266 = OpLoad %14 %265
%267 = OpIAdd %5 %77 %268
%270 = OpAccessChain %85 %51 %76
%271 = OpLoad %5 %270
%272 = OpExtInst %5 %88 UMin %267 %271
%269 = OpAccessChain %146 %21 %272
%273 = OpLoad %18 %269
%274 = OpIAdd %5 %77 %275
%277 = OpAccessChain %85 %51 %76
%278 = OpLoad %5 %277
%279 = OpExtInst %5 %88 UMin %274 %278
%276 = OpAccessChain %176 %25 %279
%280 = OpLoad %22 %276
%281 = OpIAdd %5 %77 %282
%285 = OpAccessChain %85 %51 %76
%286 = OpLoad %5 %285
%287 = OpExtInst %5 %88 UMin %281 %286
%284 = OpAccessChain %283 %41 %287
%288 = OpAccessChain %227 %35 %76
%289 = OpLoad %32 %288
%291 = OpAccessChain %85 %51 %76
%292 = OpLoad %5 %291
%293 = OpExtInst %5 %88 UMin %281 %292
%290 = OpInBoundsAccessChain %230 %289 %76 %293 %76 %76
%294 = OpLoad %26 %290 Aligned 8
%295 = OpIAdd %5 %77 %296
%299 = OpAccessChain %85 %51 %76
%300 = OpLoad %5 %299
%301 = OpExtInst %5 %88 UMin %295 %300
%298 = OpAccessChain %297 %48 %301
%302 = OpSampledImage %99 %263 %266
%304 = OpCompositeConstruct %104 %101 %101
%303 = OpImageSampleExplicitLod %42 %302 %304 Lod %102
%305 = OpCompositeExtract %9 %303 0
%306 = OpCompositeExtract %9 %303 1
%307 = OpCompositeExtract %9 %303 2
%308 = OpCompositeExtract %9 %303 3
%310 = OpAccessChain %81 %8 %113
%311 = OpLoad %5 %310
%312 = OpIAdd %5 %311 %116
%309 = OpAccessChain %110 %62 %312
%313 = OpBitcast %5 %305
%314 = OpBitcast %5 %306
%315 = OpBitcast %5 %307
%316 = OpBitcast %5 %308
%317 = OpCompositeConstruct %36 %313 %314 %315 %316
%318 = OpAccessChain %122 %309 %76 %77
OpStore %318 %317
OpControlBarrier %95 %95 %124
%320 = OpCompositeConstruct %104 %101 %101
%319 = OpImageSampleExplicitLod %42 %302 %320 Lod %102
%321 = OpCompositeExtract %9 %319 0
%322 = OpCompositeExtract %9 %319 1
%323 = OpCompositeExtract %9 %319 2
%324 = OpCompositeExtract %9 %319 3
%325 = OpCompositeConstruct %26 %77 %76
%326 = OpCompositeConstruct %42 %321 %322 %323 %324
OpImageWrite %273 %325 %326
OpControlBarrier %95 %95 %124
%327 = OpAccessChain %168 %298 %76 %76
%328 = OpLoad %42 %327
%329 = OpCompositeExtract %9 %328 0
%330 = OpCompositeExtract %9 %328 1
%331 = OpCompositeExtract %9 %328 2
%332 = OpCompositeExtract %9 %328 3
%333 = OpCompositeConstruct %42 %329 %330 %331 %332
OpImageWrite %280 %77 %333
OpControlBarrier %95 %95 %124
%335 = OpCompositeConstruct %104 %101 %101
%334 = OpImageSampleExplicitLod %42 %302 %335 Lod %102
%336 = OpCompositeExtract %9 %334 0
%337 = OpCompositeExtract %9 %334 1
%338 = OpCompositeExtract %9 %334 2
%339 = OpCompositeExtract %9 %334 3
%340 = OpBitcast %5 %336
%341 = OpBitcast %5 %337
%342 = OpBitcast %5 %338
%343 = OpBitcast %5 %339
%344 = OpCompositeConstruct %36 %340 %341 %342 %343
%345 = OpAccessChain %122 %284 %76 %77
OpStore %345 %344
OpControlBarrier %95 %95 %124
%346 = OpFunctionCall %5 %243 %294 %27 %76
OpControlBarrier %95 %95 %124
OpReturn
OpFunctionEnd
%243 = OpFunction %5 None %239
%240 = OpFunctionParameter %26
%241 = OpFunctionParameter %5
%242 = OpFunctionParameter %5
%244 = OpLabel
%250 = OpINotEqual %248 %240 %249
%251 = OpAny %247 %250
OpSelectionMerge %246 None
OpBranchConditional %251 %245 %246
%245 = OpLabel
%253 = OpBitcast %252 %240
%254 = OpAtomicIAdd %5 %253 %27 %76 %241
%255 = OpIAdd %5 %254 %242
OpBranch %246
%246 = OpLabel
%256 = OpPhi %5 %76 %244 %255 %245
OpReturnValue %256
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/atomics-raw.bindless.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 4, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _23[];

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _36 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _36;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _71 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _72 = uvec2(prime);
    uvec2 _79 = ((_71 >> uvec2(8u)) ^ _71.yx) * _72;
    uvec2 _83 = ((_79 >> uvec2(8u)) ^ _79.yx) * _72;
    uvec2 _87 = ((_83 >> uvec2(8u)) ^ _83.yx) * _72;
    uvec2 _91 = ((_87 >> uvec2(8u)) ^ _87.yx) * _72;
    uvec2 _95 = ((_91 >> uvec2(8u)) ^ _91.yx) * _72;
    return (((_95 >> uvec2(8u)) ^ _95.yx) * _72).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _128;
    _128._m0 = uaddCarry(BDA.x, offset, _128._m1);
    uvec2 addr = uvec2(_128._m0, BDA.y + _128._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _482 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _484 = InstrumentationControlData.atomics[_482];
        uint _490 = (((inst * 97u) ^ 43981u) ^ (_484 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _491 = _490 >> 4u;
        uint _493 = 1u << (_490 & 15u);
        uint _495 = atomicOr(InstrumentationControlData.atomics[_491], _493);
        ShouldReportInstrumentation = false;
        if ((_495 & _493) == 0u)
        {
            InstrumentationData.data[_490] = uvec4(43981u, 0u, inst, _484);
            memoryBarrierBuffer();
            uint _503 = atomicOr(InstrumentationControlData.atomics[_491], _493 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _39 = AllocateInvocationID();
    InvocationID = _39;
    uvec2 _48 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _53 = uint(_23[registers._m4]._m0.length()) * 4u;
    uint _60 = gl_GlobalInvocationID.x << 2u;
    uint _442 = InvocationID;
    bool _441 = ValidateBDALoadStore(_48, _60, 4u, 2u, _442, _60 < _53);
    AssumeTrue(_441, 1u);
    uint _445 = atomicAdd(_23[registers._m4]._m0[gl_GlobalInvocationID.x], 1u);
    uint _447 = InvocationID;
    bool _446 = ValidateBDALoadStore(_48, _60, 4u, 2u, _447, _60 < _53);
    AssumeTrue(_446, 2u);
    uint _450 = atomicCompSwap(_23[registers._m4]._m0[gl_GlobalInvocationID.x], 5u, 6u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 509
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%152 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %56
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "SSBO"
OpName %25 "BloomBufferInvocationSSBO"
OpMemberName %25 0 "atomics"
OpName %27 "BloomBufferInvocation"
OpName %29 "AllocateInvocationID"
OpName %65 "AddrHash"
OpName %63 "addr"
OpName %64 "prime"
OpName %105 "BloomBufferSSBO"
OpMemberName %105 0 "atomics"
OpName %107 "BloomBuffer"
OpName %109 "BloomBuffer32SSBO"
OpMemberName %109 0 "atomics"
OpName %111 "BloomBuffer32"
OpName %119 "ValidateBDALoadStore"
OpName %113 "BDA"
OpName %114 "offset"
OpName %115 "len"
OpName %116 "type"
OpName %117 "invocation_id"
OpName %118 "in_bounds"
OpName %127 "IAddCarryResult"
OpName %132 "addr"
OpName %133 "addr_lo"
OpName %137 "byte_mask"
OpName %147 "word_mask"
OpName %151 "hash_mask"
OpName %156 "hash_offset"
OpName %160 "bloom_index"
OpName %164 "bloom_index"
OpName %168 "bloom_index"
OpName %171 "bloom_index"
OpName %175 "bloom_index"
OpName %179 "bloom_index"
OpName %183 "bloom_index"
OpName %187 "bloom_index"
OpName %191 "bloom_index"
OpName %195 "bloom_index"
OpName %199 "bloom_index"
OpName %203 "bloom_index"
OpName %207 "bloom_index"
OpName %211 "bloom_index"
OpName %215 "bloom_index"
OpName %219 "bloom_index"
OpName %232 "invalidation_mask"
OpName %235 "prev_hazard_partial"
OpName %237 "prev_hazard_partial"
OpName %240 "prev_hazard_partial"
OpName %243 "prev_hazard_partial"
OpName %246 "prev_hazard_partial"
OpName %249 "prev_hazard_partial"
OpName %252 "prev_hazard_partial"
OpName %255 "prev_hazard_partial"
OpName %258 "prev_hazard_partial"
OpName %261 "prev_hazard_partial"
OpName %264 "prev_hazard_partial"
OpName %267 "prev_hazard_partial"
OpName %270 "prev_hazard_partial"
OpName %273 "prev_hazard_partial"
OpName %276 "prev_hazard_partial"
OpName %279 "prev_hazard_partial"
OpName %281 "prev_hazard"
OpName %282 "prev_hazard_lo"
OpName %283 "prev_hazard_hi"
OpName %286 "has_exclusive_access"
OpName %301 "lock_mask"
OpName %304 "prev_lock"
OpName %317 "lock_mask"
OpName %320 "prev_lock"
OpName %334 "lock_mask"
OpName %337 "prev_lock"
OpName %351 "lock_mask"
OpName %354 "prev_lock"
OpName %368 "lock_mask"
OpName %371 "prev_lock"
OpName %382 "lock_mask"
OpName %385 "prev_lock"
OpName %398 "lock_mask"
OpName %401 "prev_lock"
OpName %415 "lock_mask"
OpName %418 "prev_lock"
OpName %421 "has_complete_self_lock"
OpName %437 "hazard"
OpName %455 "ShouldReportInstrumentation"
OpName %458 "InstrumentationControlDataSSBO"
OpMemberName %458 0 "atomics"
OpName %460 "InstrumentationControlData"
OpName %462 "InstrumentationDataSSBO"
OpMemberName %462 0 "data"
OpName %464 "InstrumentationData"
OpName %468 "AssumeTrue"
OpName %466 "value"
OpName %467 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %23 DescriptorSet 4
OpDecorate %23 Binding 0
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 2
OpDecorate %56 BuiltIn GlobalInvocationId
OpDecorate %104 ArrayStride 8
OpMemberDecorate %105 0 Offset 0
OpDecorate %105 Block
OpDecorate %107 DescriptorSet 0
OpDecorate %107 Binding 2
OpDecorate %108 ArrayStride 8
OpMemberDecorate %109 0 Offset 0
OpDecorate %109 Block
OpDecorate %111 DescriptorSet 0
OpDecorate %111 Binding 2
OpDecorate %457 ArrayStride 4
OpMemberDecorate %458 0 Offset 0
OpDecorate %458 Block
OpDecorate %460 DescriptorSet 0
OpDecorate %460 Binding 2
OpDecorate %461 ArrayStride 16
OpMemberDecorate %462 0 Offset 0
OpDecorate %462 Block
OpDecorate %464 DescriptorSet 0
OpDecorate %464 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeFunction %5
%33 = OpTypePointer StorageBuffer %5
%35 = OpConstant %5 0
%37 = OpConstant %5 1103633207
%40 = OpTypePointer StorageBuffer %20
%42 = OpTypePointer PushConstant %5
%44 = OpConstant %5 4
%46 = OpTypePointer StorageBuffer %9
%51 = OpConstant %5 16
%54 = OpTypeVector %5 3
%55 = OpTypePointer Input %54
%56 = OpVariable %55 Input
%57 = OpTypePointer Input %5
%61 = OpConstant %5 2
%62 = OpTypeFunction %5 %9 %5
%73 = OpConstant %5 65535
%74 = OpConstant %5 8
%75 = OpConstantComposite %9 %74 %74
%102 = OpTypeInt 64 0
%103 = OpTypeBool
%104 = OpTypeRuntimeArray %102
%105 = OpTypeStruct %104
%106 = OpTypePointer StorageBuffer %105
%107 = OpVariable %106 StorageBuffer
%108 = OpTypeRuntimeArray %9
%109 = OpTypeStruct %108
%110 = OpTypePointer StorageBuffer %109
%111 = OpVariable %110 StorageBuffer
%112 = OpTypeFunction %103 %9 %5 %5 %5 %5 %103
%123 = OpConstantTrue %103
%127 = OpTypeStruct %5 %5
%138 = OpConstant %5 4294967295
%139 = OpConstant %5 15
%148 = OpConstant %5 3
%158 = OpConstant %5 1103515245
%162 = OpConstant %5 1103518333
%166 = OpConstant %5 1103539331
%173 = OpConstant %5 10006121
%177 = OpConstant %5 4004951
%181 = OpConstant %5 5005159
%185 = OpConstant %5 6004811
%189 = OpConstant %5 383
%193 = OpConstant %5 821
%197 = OpConstant %5 661
%201 = OpConstant %5 1091
%205 = OpConstant %5 1117
%209 = OpConstant %5 3947
%213 = OpConstant %5 4253
%217 = OpConstant %5 7691
%220 = OpTypeVector %102 4
%221 = OpConstant %102 68719411200
%222 = OpConstant %102 1099511627775
%223 = OpConstant %102 1035087118335
%224 = OpConstantComposite %220 %221 %222 %223 %221
%233 = OpTypePointer StorageBuffer %102
%288 = OpTypeInt 32 1
%289 = OpConstant %288 0
%290 = OpConstant %288 3
%292 = OpConstant %5 256
%295 = OpConstant %5 65536
%298 = OpConstant %288 6
%300 = OpConstant %5 16777216
%308 = OpConstant %288 9
%311 = OpConstant %288 12
%315 = OpConstant %288 15
%325 = OpConstant %288 18
%328 = OpConstant %288 21
%332 = OpConstant %288 24
%342 = OpConstant %288 23
%345 = OpConstant %288 26
%349 = OpConstant %288 29
%359 = OpConstant %288 1
%362 = OpConstant %288 4
%366 = OpConstant %288 7
%390 = OpConstant %288 17
%393 = OpConstant %288 20
%406 = OpConstant %288 22
%409 = OpConstant %288 25
%413 = OpConstant %288 28
%451 = OpConstant %5 5
%452 = OpConstant %5 6
%454 = OpTypePointer Private %103
%455 = OpVariable %454 Private %123
%456 = OpTypeVector %5 4
%457 = OpTypeRuntimeArray %5
%458 = OpTypeStruct %457
%459 = OpTypePointer StorageBuffer %458
%460 = OpVariable %459 StorageBuffer
%461 = OpTypeRuntimeArray %456
%462 = OpTypeStruct %461
%463 = OpTypePointer StorageBuffer %462
%464 = OpVariable %463 StorageBuffer
%465 = OpTypeFunction %1 %103 %5
%479 = OpConstant %5 97
%480 = OpConstant %5 43981
%486 = OpConstant %5 51329
%498 = OpConstantFalse %103
%501 = OpTypePointer StorageBuffer %456
%504 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %453
%453 = OpLabel
%39 = OpFunctionCall %5 %29
OpStore %18 %39
%43 = OpAccessChain %42 %8 %44
%45 = OpLoad %5 %43
%47 = OpAccessChain %46 %16 %35 %45 %35 %35
%48 = OpLoad %9 %47
%49 = OpCompositeExtract %5 %48 1
%50 = OpShiftRightLogical %5 %49 %51
%41 = OpAccessChain %40 %23 %45
%52 = OpArrayLength %5 %41 0
%53 = OpIMul %5 %52 %44
%58 = OpAccessChain %57 %56 %35
%59 = OpLoad %5 %58
%60 = OpShiftLeftLogical %5 %59 %61
%442 = OpLoad %5 %18
%443 = OpULessThan %103 %60 %53
%441 = OpFunctionCall %103 %119 %48 %60 %44 %61 %442 %443
%506 = OpFunctionCall %1 %468 %441 %10
%444 = OpAccessChain %33 %41 %35 %59
%445 = OpAtomicIAdd %5 %444 %10 %35 %10
%447 = OpLoad %5 %18
%448 = OpULessThan %103 %60 %53
%446 = OpFunctionCall %103 %119 %48 %60 %44 %61 %447 %448
%507 = OpFunctionCall %1 %468 %446 %61
%449 = OpAccessChain %33 %41 %35 %59
%450 = OpAtomicCompareExchange %5 %449 %10 %35 %35 %452 %451
OpReturn
OpFunctionEnd
%29 = OpFunction %5 None %28
%30 = OpLabel
%31 = OpArrayLength %5 %27 0
%32 = OpISub %5 %31 %10
%34 = OpAccessChain %33 %27 %35 %32
%36 = OpAtomicIAdd %5 %34 %10 %35 %37
OpReturnValue %36
OpFunctionEnd
%65 = OpFunction %5 None %62
%63 = OpFunctionParameter %9
%64 = OpFunctionParameter %5
%66 = OpLabel
%67 = OpCompositeExtract %5 %63 0
%68 = OpCompositeExtract %5 %63 1
%69 = OpShiftRightLogical %5 %67 %44
%70 = OpBitwiseAnd %5 %68 %73
%71 = OpCompositeConstruct %9 %69 %70
%72 = OpCompositeConstruct %9 %64 %64
%76 = OpVectorShuffle %9 %71 %71 1 0
%77 = OpShiftRightLogical %9 %71 %75
%78 = OpBitwiseXor %9 %77 %76
%79 = OpIMul %9 %78 %72
%80 = OpVectorShuffle %9 %79 %79 1 0
%81 = OpShiftRightLogical %9 %79 %75
%82 = OpBitwiseXor %9 %81 %80
%83 = OpIMul %9 %82 %72
%84 = OpVectorShuffle %9 %83 %83 1 0
%85 = OpShiftRightLogical %9 %83 %75
%86 = OpBitwiseXor %9 %85 %84
%87 = OpIMul %9 %86 %72
%88 = OpVectorShuffle %9 %87 %87 1 0
%89 = OpShiftRightLogical %9 %87 %75
%90 = OpBitwiseXor %9 %89 %88
%91 = OpIMul %9 %90 %72
%92 = OpVectorShuffle %9 %91 %91 1 0
%93 = OpShiftRightLogical %9 %91 %75
%94 = OpBitwiseXor %9 %93 %92
%95 = OpIMul %9 %94 %72
%96 = OpVectorShuffle %9 %95 %95 1 0
%97 = OpShiftRightLogical %9 %95 %75
%98 = OpBitwiseXor %9 %97 %96
%99 = OpIMul %9 %98 %72
%100 = OpCompositeExtract %5 %99 0
OpReturnValue %100
OpFunctionEnd
%119 = OpFunction %103 None %112
%113 = OpFunctionParameter %9
%114 = OpFunctionParameter %5
%115 = OpFunctionParameter %5
%116 = OpFunctionParameter %5
%117 = OpFunctionParameter %5
%118 = OpFunctionParameter %103
%120 = OpLabel
OpSelectionMerge %122 None
OpBranchConditional %118 %122 %121
%121 = OpLabel
OpReturnValue %123
%122 = OpLabel
%125 = OpCompositeExtract %5 %113 0
%126 = OpCompositeExtract %5 %113 1
%128 = OpIAddCarry %127 %125 %114
%129 = OpCompositeExtract %5 %128 0
%130 = OpCompositeExtract %5 %128 1
%131 = OpIAdd %5 %126 %130
%132 = OpCompositeConstruct %9 %129 %131
%133 = OpCompositeExtract %5 %132 0
%134 = OpBitFieldUExtract %5 %138 %35 %115
%135 = OpBitwiseAnd %5 %133 %139
%136 = OpShiftLeftLogical %5 %134 %135
%137 = OpBitwiseAnd %5 %136 %73
%140 = OpBitwiseAnd %5 %133 %148
%141 = OpIAdd %5 %140 %115
%142 = OpIAdd %5 %141 %148
%143 = OpShiftRightLogical %5 %142 %61
%144 = OpBitFieldUExtract %5 %133 %61 %61
%145 = OpBitFieldUExtract %5 %138 %35 %143
%146 = OpShiftLeftLogical %5 %145 %144
%147 = OpBitwiseAnd %5 %146 %139
%149 = OpArrayLength %5 %107 0
%150 = OpExtInst %5 %152 FindUMsb %149
%151 = OpBitFieldUExtract %5 %138 %35 %150
%153 = OpArrayLength %5 %107 0
%154 = OpExtInst %5 %152 FindUMsb %153
%155 = OpBitFieldUExtract %5 %153 %35 %154
%156 = OpISub %5 %155 %10
%157 = OpFunctionCall %5 %65 %132 %158
%159 = OpBitwiseAnd %5 %157 %151
%160 = OpIAdd %5 %159 %156
%161 = OpFunctionCall %5 %65 %132 %162
%163 = OpBitwiseAnd %5 %161 %151
%164 = OpIAdd %5 %163 %156
%165 = OpFunctionCall %5 %65 %132 %166
%167 = OpBitwiseAnd %5 %165 %151
%168 = OpIAdd %5 %167 %156
%169 = OpFunctionCall %5 %65 %132 %37
%170 = OpBitwiseAnd %5 %169 %151
%171 = OpIAdd %5 %170 %156
%172 = OpFunctionCall %5 %65 %132 %173
%174 = OpBitwiseAnd %5 %172 %151
%175 = OpIAdd %5 %174 %156
%176 = OpFunctionCall %5 %65 %132 %177
%178 = OpBitwiseAnd %5 %176 %151
%179 = OpIAdd %5 %178 %156
%180 = OpFunctionCall %5 %65 %132 %181
%182 = OpBitwiseAnd %5 %180 %151
%183 = OpIAdd %5 %182 %156
%184 = OpFunctionCall %5 %65 %132 %185
%186 = OpBitwiseAnd %5 %184 %151
%187 = OpIAdd %5 %186 %156
%188 = OpFunctionCall %5 %65 %132 %189
%190 = OpBitwiseAnd %5 %188 %151
%191 = OpIAdd %5 %190 %156
%192 = OpFunctionCall %5 %65 %132 %193
%194 = OpBitwiseAnd %5 %192 %151
%195 = OpIAdd %5 %194 %156
%196 = OpFunctionCall %5 %65 %132 %197
%198 = OpBitwiseAnd %5 %196 %151
%199 = OpIAdd %5 %198 %156
%200 = OpFunctionCall %5 %65 %132 %201
%202 = OpBitwiseAnd %5 %200 %151
%203 = OpIAdd %5 %202 %156
%204 = OpFunctionCall %5 %65 %132 %205
%206 = OpBitwiseAnd %5 %204 %151
%207 = OpIAdd %5 %206 %156
%208 = OpFunctionCall %5 %65 %132 %209
%210 = OpBitwiseAnd %5 %208 %151
%211 = OpIAdd %5 %210 %156
%212 = OpFunctionCall %5 %65 %132 %213
%214 = OpBitwiseAnd %5 %212 %151
%215 = OpIAdd %5 %214 %156
%216 = OpFunctionCall %5 %65 %132 %217
%218 = OpBitwiseAnd %5 %216 %151
%219 = OpIAdd %5 %218 %156
%225 = OpVectorExtractDynamic %102 %224 %116
%226 = OpShiftLeftLogical %5 %137 %51
%227 = OpShiftLeftLogical %5 %147 %44
%228 = OpBitwiseOr %5 %137 %226
%229 = OpBitwiseOr %5 %147 %227
%230 = OpCompositeConstruct %9 %228 %229
%231 = OpBitcast %102 %230
%232 = OpBitwiseAnd %102 %225 %231
%234 = OpInBoundsAccessChain %233 %107 %35 %160
%235 = OpAtomicOr %102 %234 %10 %35 %232
%236 = OpInBoundsAccessChain %233 %107 %35 %164
%237 = OpAtomicOr %102 %236 %10 %35 %232
%238 = OpBitwiseAnd %102 %235 %237
%239 = OpInBoundsAccessChain %233 %107 %35 %168
%240 = OpAtomicOr %102 %239 %10 %35 %232
%241 = OpBitwiseAnd %102 %238 %240
%242 = OpInBoundsAccessChain %233 %107 %35 %171
%243 = OpAtomicOr %102 %242 %10 %35 %232
%244 = OpBitwiseAnd %102 %241 %243
%245 = OpInBoundsAccessChain %233 %107 %35 %175
%246 = OpAtomicOr %102 %245 %10 %35 %232
%247 = OpBitwiseAnd %102 %244 %246
%248 = OpInBoundsAccessChain %233 %107 %35 %179
%249 = OpAtomicOr %102 %248 %10 %35 %232
%250 = OpBitwiseAnd %102 %247 %249
%251 = OpInBoundsAccessChain %233 %107 %35 %183
%252 = OpAtomicOr %102 %251 %10 %35 %232
%253 = OpBitwiseAnd %102 %250 %252
%254 = OpInBoundsAccessChain %233 %107 %35 %187
%255 = OpAtomicOr %102 %254 %10 %35 %232
%256 = OpBitwiseAnd %102 %253 %255
%257 = OpInBoundsAccessChain %233 %107 %35 %191
%258 = OpAtomicOr %102 %257 %10 %35 %232
%259 = OpBitwiseAnd %102 %256 %258
%260 = OpInBoundsAccessChain %233 %107 %35 %195
%261 = OpAtomicOr %102 %260 %10 %35 %232
%262 = OpBitwiseAnd %102 %259 %261
%263 = OpInBoundsAccessChain %233 %107 %35 %199
%264 = OpAtomicOr %102 %263 %10 %35 %232
%265 = OpBitwiseAnd %102 %262 %264
%266 = OpInBoundsAccessChain %233 %107 %35 %203
%267 = OpAtomicOr %102 %266 %10 %35 %232
%268 = OpBitwiseAnd %102 %265 %267
%269 = OpInBoundsAccessChain %233 %107 %35 %207
%270 = OpAtomicOr %102 %269 %10 %35 %232
%271 = OpBitwiseAnd %102 %268 %270
%272 = OpInBoundsAccessChain %233 %107 %35 %211
%273 = OpAtomicOr %102 %272 %10 %35 %232
%274 = OpBitwiseAnd %102 %271 %273
%275 = OpInBoundsAccessChain %233 %107 %35 %215
%276 = OpAtomicOr %102 %275 %10 %35 %232
%277 = OpBitwiseAnd %102 %274 %276
%278 = OpInBoundsAccessChain %233 %107 %35 %219
%279 = OpAtomicOr %102 %278 %10 %35 %232
%280 = OpBitwiseAnd %102 %277 %279
%281 = OpBitcast %9 %280
%282 = OpCompositeExtract %5 %281 0
%283 = OpCompositeExtract %5 %281 1
%284 = OpShiftRightLogical %5 %282 %51
%285 = OpBitwiseAnd %5 %284 %137
%286 = OpIEqual %103 %285 %35
%287 = OpBitFieldUExtract %5 %117 %289 %290
%291 = OpShiftLeftLogical %5 %292 %287
%293 = OpBitFieldUExtract %5 %117 %290 %290
%294 = OpShiftLeftLogical %5 %295 %293
%296 = OpBitwiseOr %5 %291 %294
%297 = OpBitFieldUExtract %5 %117 %298 %290
%299 = OpShiftLeftLogical %5 %300 %297
%301 = OpBitwiseOr %5 %296 %299
%302 = OpSelect %5 %286 %301 %35
%303 = OpInBoundsAccessChain %33 %111 %35 %160 %10
%304 = OpAtomicOr %5 %303 %10 %35 %302
%305 = OpBitwiseAnd %5 %304 %301
%306 = OpIEqual %103 %305 %301
%307 = OpBitFieldUExtract %5 %117 %308 %290
%309 = OpShiftLeftLogical %5 %292 %307
%310 = OpBitFieldUExtract %5 %117 %311 %290
%312 = OpShiftLeftLogical %5 %295 %310
%313 = OpBitwiseOr %5 %309 %312
%314 = OpBitFieldUExtract %5 %117 %315 %290
%316 = OpShiftLeftLogical %5 %300 %314
%317 = OpBitwiseOr %5 %313 %316
%318 = OpSelect %5 %286 %317 %35
%319 = OpInBoundsAccessChain %33 %111 %35 %164 %10
%320 = OpAtomicOr %5 %319 %10 %35 %318
%321 = OpBitwiseAnd %5 %320 %317
%322 = OpIEqual %103 %321 %317
%323 = OpLogicalAnd %103 %306 %322
%324 = OpBitFieldUExtract %5 %117 %325 %290
%326 = OpShiftLeftLogical %5 %292 %324
%327 = OpBitFieldUExtract %5 %117 %328 %290
%329 = OpShiftLeftLogical %5 %295 %327
%330 = OpBitwiseOr %5 %326 %329
%331 = OpBitFieldUExtract %5 %117 %332 %290
%333 = OpShiftLeftLogical %5 %300 %331
%334 = OpBitwiseOr %5 %330 %333
%335 = OpSelect %5 %286 %334 %35
%336 = OpInBoundsAccessChain %33 %111 %35 %168 %10
%337 = OpAtomicOr %5 %336 %10 %35 %335
%338 = OpBitwiseAnd %5 %337 %334
%339 = OpIEqual %103 %338 %334
%340 = OpLogicalAnd %103 %323 %339
%341 = OpBitFieldUExtract %5 %117 %342 %290
%343 = OpShiftLeftLogical %5 %292 %341
%344 = OpBitFieldUExtract %5 %117 %345 %290
%346 = OpShiftLeftLogical %5 %295 %344
%347 = OpBitwiseOr %5 %343 %346
%348 = OpBitFieldUExtract %5 %117 %349 %290
%350 = OpShiftLeftLogical %5 %300 %348
%351 = OpBitwiseOr %5 %347 %350
%352 = OpSelect %5 %286 %351 %35
%353 = OpInBoundsAccessChain %33 %111 %35 %171 %10
%354 = OpAtomicOr %5 %353 %10 %35 %352
%355 = OpBitwiseAnd %5 %354 %351
%356 = OpIEqual %103 %355 %351
%357 = OpLogicalAnd %103 %340 %356
%358 = OpBitFieldUExtract %5 %117 %359 %290
%360 = OpShiftLeftLogical %5 %292 %358
%361 = OpBitFieldUExtract %5 %117 %362 %290
%363 = OpShiftLeftLogical %5 %295 %361
%364 = OpBitwiseOr %5 %360 %363
%365 = OpBitFieldUExtract %5 %117 %366 %290
%367 = OpShiftLeftLogical %5 %300 %365
%368 = OpBitwiseOr %5 %364 %367
%369 = OpSelect %5 %286 %368 %35
%370 = OpInBoundsAccessChain %33 %111 %35 %175 %10
%371 = OpAtomicOr %5 %370 %10 %35 %369
%372 = OpBitwiseAnd %5 %371 %368
%373 = OpIEqual %103 %372 %368
%374 = OpLogicalAnd %103 %357 %373
%375 = OpBitFieldUExtract %5 %117 %308 %290
%376 = OpShiftLeftLogical %5 %292 %375
%377 = OpBitFieldUExtract %5 %117 %311 %290
%378 = OpShiftLeftLogical %5 %295 %377
%379 = OpBitwiseOr %5 %376 %378
%380 = OpBitFieldUExtract %5 %117 %315 %290
%381 = OpShiftLeftLogical %5 %300 %380
%382 = OpBitwiseOr %5 %379 %381
%383 = OpSelect %5 %286 %382 %35
%384 = OpInBoundsAccessChain %33 %111 %35 %179 %10
%385 = OpAtomicOr %5 %384 %10 %35 %383
%386 = OpBitwiseAnd %5 %385 %382
%387 = OpIEqual %103 %386 %382
%388 = OpLogicalAnd %103 %374 %387
%389 = OpBitFieldUExtract %5 %117 %390 %290
%391 = OpShiftLeftLogical %5 %292 %389
%392 = OpBitFieldUExtract %5 %117 %393 %290
%394 = OpShiftLeftLogical %5 %295 %392
%395 = OpBitwiseOr %5 %391 %394
%396 = OpBitFieldUExtract %5 %117 %342 %290
%397 = OpShiftLeftLogical %5 %300 %396
%398 = OpBitwiseOr %5 %395 %397
%399 = OpSelect %5 %286 %398 %35
%400 = OpInBoundsAccessChain %33 %111 %35 %183 %10
%401 = OpAtomicOr %5 %400 %10 %35 %399
%402 = OpBitwiseAnd %5 %401 %398
%403 = OpIEqual %103 %402 %398
%404 = OpLogicalAnd %103 %388 %403
%405 = OpBitFieldUExtract %5 %117 %406 %290
%407 = OpShiftLeftLogical %5 %292 %405
%408 = OpBitFieldUExtract %5 %117 %409 %290
%410 = OpShiftLeftLogical %5 %295 %408
%411 = OpBitwiseOr %5 %407 %410
%412 = OpBitFieldUExtract %5 %117 %413 %290
%414 = OpShiftLeftLogical %5 %300 %412
%415 = OpBitwiseOr %5 %411 %414
%416 = OpSelect %5 %286 %415 %35
%417 = OpInBoundsAccessChain %33 %111 %35 %187 %10
%418 = OpAtomicOr %5 %417 %10 %35 %416
%419 = OpBitwiseAnd %5 %418 %415
%420 = OpIEqual %103 %419 %415
%421 = OpLogicalAnd %103 %404 %420
OpSelectionMerge %426 None
OpSwitch %116 %425 0 %422 1 %423 2 %424
%425 = OpLabel
%434 = OpShiftLeftLogical %5 %147 %44
%435 = OpBitwiseAnd %5 %283 %434
%436 = OpINotEqual %103 %435 %35
OpBranch %426
%422 = OpLabel
%427 = OpBitwiseAnd %5 %282 %137
%428 = OpINotEqual %103 %427 %35
OpBranch %426
%423 = OpLabel
%429 = OpShiftLeftLogical %5 %137 %51
%430 = OpBitwiseAnd %5 %282 %429
%431 = OpINotEqual %103 %430 %35
OpBranch %426
%424 = OpLabel
%432 = OpBitwiseAnd %5 %283 %147
%433 = OpINotEqual %103 %432 %35
OpBranch %426
%426 = OpLabel
%437 = OpPhi %103 %428 %422 %431 %423 %433 %424 %436 %425
%438 = OpLogicalNot %103 %437
%439 = OpLogicalOr %103 %438 %421
OpReturnValue %439
OpFunctionEnd
%468 = OpFunction %1 None %465
%466 = OpFunctionParameter %103
%467 = OpFunctionParameter %5
%469 = OpLabel
%472 = OpLogicalNot %103 %466
%473 = OpLoad %103 %455
%474 = OpLogicalAnd %103 %472 %473
OpSelectionMerge %470 None
OpBranchConditional %474 %471 %470
%471 = OpLabel
%477 = OpIMul %5 %467 %479
%478 = OpBitwiseXor %5 %477 %480
%481 = OpArrayLength %5 %460 0
%482 = OpISub %5 %481 %61
%483 = OpAccessChain %33 %460 %35 %482
%484 = OpLoad %5 %483
%485 = OpIMul %5 %484 %486
%487 = OpBitwiseXor %5 %478 %485
%488 = OpArrayLength %5 %464 0
%489 = OpISub %5 %488 %10
%490 = OpBitwiseAnd %5 %487 %489
%491 = OpShiftRightLogical %5 %490 %44
%492 = OpBitwiseAnd %5 %490 %139
%493 = OpShiftLeftLogical %5 %10 %492
%494 = OpAccessChain %33 %460 %35 %491
%495 = OpAtomicOr %5 %494 %10 %35 %493
%496 = OpBitwiseAnd %5 %495 %493
%497 = OpIEqual %103 %496 %35
OpStore %455 %498
OpSelectionMerge %476 None
OpBranchConditional %497 %475 %476
%475 = OpLabel
%499 = OpCompositeConstruct %456 %480 %35 %467 %484
%500 = OpShiftLeftLogical %5 %493 %51
%502 = OpAccessChain %501 %464 %35 %490
OpStore %502 %499
OpMemoryBarrier %10 %504
%503 = OpAtomicOr %5 %494 %10 %35 %500
OpMemoryBarrier %10 %504
OpBranch %476
%476 = OpLabel
OpBranch %470
%470 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/atomics-raw.root-descriptor.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerUintArray;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray
{
    uint value[];
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _31 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _31;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _56 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _57 = uvec2(prime);
    uvec2 _64 = ((_56 >> uvec2(8u)) ^ _56.yx) * _57;
    uvec2 _68 = ((_64 >> uvec2(8u)) ^ _64.yx) * _57;
    uvec2 _72 = ((_68 >> uvec2(8u)) ^ _68.yx) * _57;
    uvec2 _76 = ((_72 >> uvec2(8u)) ^ _72.yx) * _57;
    uvec2 _80 = ((_76 >> uvec2(8u)) ^ _76.yx) * _57;
    return (((_80 >> uvec2(8u)) ^ _80.yx) * _57).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _113;
    _113._m0 = uaddCarry(BDA.x, offset, _113._m1);
    uvec2 addr = uvec2(_113._m0, BDA.y + _113._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _472 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _474 = InstrumentationControlData.atomics[_472];
        uint _480 = (((inst * 97u) ^ 43981u) ^ (_474 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _481 = _480 >> 4u;
        uint _483 = 1u << (_480 & 15u);
        uint _485 = atomicOr(InstrumentationControlData.atomics[_481], _483);
        ShouldReportInstrumentation = false;
        if ((_485 & _483) == 0u)
        {
            InstrumentationData.data[_480] = uvec4(43981u, 0u, inst, _474);
            memoryBarrierBuffer();
            uint _493 = atomicOr(InstrumentationControlData.atomics[_481], _483 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _34 = AllocateInvocationID();
    InvocationID = _34;
    uint _45 = gl_GlobalInvocationID.x << 2u;
    uint _428 = InvocationID;
    bool _427 = ValidateBDALoadStore(registers._m2, _45, 4u, 2u, _428, true);
    AssumeTrue(_427, 1u);
    uint _435 = atomicAdd(PhysicalPointerUintArray(registers._m2).value[gl_GlobalInvocationID.x], 1u);
    uint _437 = InvocationID;
    bool _436 = ValidateBDALoadStore(registers._m2, _45, 4u, 2u, _437, true);
    AssumeTrue(_436, 2u);
    uint _440 = atomicCompSwap(PhysicalPointerUintArray(registers._m2).value[gl_GlobalInvocationID.x], 5u, 6u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 499
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%137 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %41
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "BloomBufferInvocationSSBO"
OpMemberName %20 0 "atomics"
OpName %22 "BloomBufferInvocation"
OpName %24 "AllocateInvocationID"
OpName %50 "AddrHash"
OpName %48 "addr"
OpName %49 "prime"
OpName %90 "BloomBufferSSBO"
OpMemberName %90 0 "atomics"
OpName %92 "BloomBuffer"
OpName %94 "BloomBuffer32SSBO"
OpMemberName %94 0 "atomics"
OpName %96 "BloomBuffer32"
OpName %104 "ValidateBDALoadStore"
OpName %98 "BDA"
OpName %99 "offset"
OpName %100 "len"
OpName %101 "type"
OpName %102 "invocation_id"
OpName %103 "in_bounds"
OpName %112 "IAddCarryResult"
OpName %117 "addr"
OpName %118 "addr_lo"
OpName %122 "byte_mask"
OpName %132 "word_mask"
OpName %136 "hash_mask"
OpName %141 "hash_offset"
OpName %145 "bloom_index"
OpName %149 "bloom_index"
OpName %153 "bloom_index"
OpName %156 "bloom_index"
OpName %160 "bloom_index"
OpName %164 "bloom_index"
OpName %168 "bloom_index"
OpName %172 "bloom_index"
OpName %176 "bloom_index"
OpName %180 "bloom_index"
OpName %184 "bloom_index"
OpName %188 "bloom_index"
OpName %192 "bloom_index"
OpName %196 "bloom_index"
OpName %200 "bloom_index"
OpName %204 "bloom_index"
OpName %218 "invalidation_mask"
OpName %221 "prev_hazard_partial"
OpName %223 "prev_hazard_partial"
OpName %226 "prev_hazard_partial"
OpName %229 "prev_hazard_partial"
OpName %232 "prev_hazard_partial"
OpName %235 "prev_hazard_partial"
OpName %238 "prev_hazard_partial"
OpName %241 "prev_hazard_partial"
OpName %244 "prev_hazard_partial"
OpName %247 "prev_hazard_partial"
OpName %250 "prev_hazard_partial"
OpName %253 "prev_hazard_partial"
OpName %256 "prev_hazard_partial"
OpName %259 "prev_hazard_partial"
OpName %262 "prev_hazard_partial"
OpName %265 "prev_hazard_partial"
OpName %267 "prev_hazard"
OpName %268 "prev_hazard_lo"
OpName %269 "prev_hazard_hi"
OpName %272 "has_exclusive_access"
OpName %287 "lock_mask"
OpName %290 "prev_lock"
OpName %303 "lock_mask"
OpName %306 "prev_lock"
OpName %320 "lock_mask"
OpName %323 "prev_lock"
OpName %337 "lock_mask"
OpName %340 "prev_lock"
OpName %354 "lock_mask"
OpName %357 "prev_lock"
OpName %368 "lock_mask"
OpName %371 "prev_lock"
OpName %384 "lock_mask"
OpName %387 "prev_lock"
OpName %401 "lock_mask"
OpName %404 "prev_lock"
OpName %407 "has_complete_self_lock"
OpName %423 "hazard"
OpName %430 "PhysicalPointerUintArray"
OpMemberName %430 0 "value"
OpName %445 "ShouldReportInstrumentation"
OpName %448 "InstrumentationControlDataSSBO"
OpMemberName %448 0 "atomics"
OpName %450 "InstrumentationControlData"
OpName %452 "InstrumentationDataSSBO"
OpMemberName %452 0 "data"
OpName %454 "InstrumentationData"
OpName %458 "AssumeTrue"
OpName %456 "value"
OpName %457 "inst"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 2
OpDecorate %41 BuiltIn GlobalInvocationId
OpDecorate %89 ArrayStride 8
OpMemberDecorate %90 0 Offset 0
OpDecorate %90 Block
OpDecorate %92 DescriptorSet 0
OpDecorate %92 Binding 2
OpDecorate %93 ArrayStride 8
OpMemberDecorate %94 0 Offset 0
OpDecorate %94 Block
OpDecorate %96 DescriptorSet 0
OpDecorate %96 Binding 2
OpDecorate %429 ArrayStride 4
OpMemberDecorate %430 0 Offset 0
OpDecorate %430 Block
OpDecorate %447 ArrayStride 4
OpMemberDecorate %448 0 Offset 0
OpDecorate %448 Block
OpDecorate %450 DescriptorSet 0
OpDecorate %450 Binding 2
OpDecorate %451 ArrayStride 16
OpMemberDecorate %452 0 Offset 0
OpDecorate %452 Block
OpDecorate %454 DescriptorSet 0
OpDecorate %454 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpConstant %5 1
%11 = OpTypeArray %6 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeFunction %5
%28 = OpTypePointer StorageBuffer %5
%30 = OpConstant %5 0
%32 = OpConstant %5 1103633207
%35 = OpTypePointer PushConstant %6
%37 = OpConstant %5 2
%39 = OpTypeVector %5 3
%40 = OpTypePointer Input %39
%41 = OpVariable %40 Input
%42 = OpTypePointer Input %5
%46 = OpConstant %5 4
%47 = OpTypeFunction %5 %6 %5
%58 = OpConstant %5 65535
%59 = OpConstant %5 8
%60 = OpConstantComposite %6 %59 %59
%87 = OpTypeInt 64 0
%88 = OpTypeBool
%89 = OpTypeRuntimeArray %87
%90 = OpTypeStruct %89
%91 = OpTypePointer StorageBuffer %90
%92 = OpVariable %91 StorageBuffer
%93 = OpTypeRuntimeArray %6
%94 = OpTypeStruct %93
%95 = OpTypePointer StorageBuffer %94
%96 = OpVariable %95 StorageBuffer
%97 = OpTypeFunction %88 %6 %5 %5 %5 %5 %88
%108 = OpConstantTrue %88
%112 = OpTypeStruct %5 %5
%123 = OpConstant %5 4294967295
%124 = OpConstant %5 15
%133 = OpConstant %5 3
%143 = OpConstant %5 1103515245
%147 = OpConstant %5 1103518333
%151 = OpConstant %5 1103539331
%158 = OpConstant %5 10006121
%162 = OpConstant %5 4004951
%166 = OpConstant %5 5005159
%170 = OpConstant %5 6004811
%174 = OpConstant %5 383
%178 = OpConstant %5 821
%182 = OpConstant %5 661
%186 = OpConstant %5 1091
%190 = OpConstant %5 1117
%194 = OpConstant %5 3947
%198 = OpConstant %5 4253
%202 = OpConstant %5 7691
%205 = OpTypeVector %87 4
%206 = OpConstant %87 68719411200
%207 = OpConstant %87 1099511627775
%208 = OpConstant %87 1035087118335
%209 = OpConstantComposite %205 %206 %207 %208 %206
%213 = OpConstant %5 16
%219 = OpTypePointer StorageBuffer %87
%274 = OpTypeInt 32 1
%275 = OpConstant %274 0
%276 = OpConstant %274 3
%278 = OpConstant %5 256
%281 = OpConstant %5 65536
%284 = OpConstant %274 6
%286 = OpConstant %5 16777216
%294 = OpConstant %274 9
%297 = OpConstant %274 12
%301 = OpConstant %274 15
%311 = OpConstant %274 18
%314 = OpConstant %274 21
%318 = OpConstant %274 24
%328 = OpConstant %274 23
%331 = OpConstant %274 26
%335 = OpConstant %274 29
%345 = OpConstant %274 1
%348 = OpConstant %274 4
%352 = OpConstant %274 7
%376 = OpConstant %274 17
%379 = OpConstant %274 20
%392 = OpConstant %274 22
%395 = OpConstant %274 25
%399 = OpConstant %274 28
%429 = OpTypeRuntimeArray %5
%430 = OpTypeStruct %429
%431 = OpTypePointer PhysicalStorageBuffer %430
%433 = OpTypePointer PhysicalStorageBuffer %5
%441 = OpConstant %5 5
%442 = OpConstant %5 6
%444 = OpTypePointer Private %88
%445 = OpVariable %444 Private %108
%446 = OpTypeVector %5 4
%447 = OpTypeRuntimeArray %5
%448 = OpTypeStruct %447
%449 = OpTypePointer StorageBuffer %448
%450 = OpVariable %449 StorageBuffer
%451 = OpTypeRuntimeArray %446
%452 = OpTypeStruct %451
%453 = OpTypePointer StorageBuffer %452
%454 = OpVariable %453 StorageBuffer
%455 = OpTypeFunction %1 %88 %5
%469 = OpConstant %5 97
%470 = OpConstant %5 43981
%476 = OpConstant %5 51329
%488 = OpConstantFalse %88
%491 = OpTypePointer StorageBuffer %446
%494 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %443
%443 = OpLabel
%34 = OpFunctionCall %5 %24
OpStore %18 %34
%36 = OpAccessChain %35 %9 %37
%38 = OpLoad %6 %36
%43 = OpAccessChain %42 %41 %30
%44 = OpLoad %5 %43
%45 = OpShiftLeftLogical %5 %44 %37
%428 = OpLoad %5 %18
%427 = OpFunctionCall %88 %104 %38 %45 %46 %37 %428 %108
%496 = OpFunctionCall %1 %458 %427 %10
%432 = OpBitcast %431 %38
%434 = OpAccessChain %433 %432 %30 %44
%435 = OpAtomicIAdd %5 %434 %10 %30 %10
%437 = OpLoad %5 %18
%436 = OpFunctionCall %88 %104 %38 %45 %46 %37 %437 %108
%497 = OpFunctionCall %1 %458 %436 %37
%438 = OpBitcast %431 %38
%439 = OpAccessChain %433 %438 %30 %44
%440 = OpAtomicCompareExchange %5 %439 %10 %30 %30 %442 %441
OpReturn
OpFunctionEnd
%24 = OpFunction %5 None %23
%25 = OpLabel
%26 = OpArrayLength %5 %22 0
%27 = OpISub %5 %26 %10
%29 = OpAccessChain %28 %22 %30 %27
%31 = OpAtomicIAdd %5 %29 %10 %30 %32
OpReturnValue %31
OpFunctionEnd
%50 = OpFunction %5 None %47
%48 = OpFunctionParameter %6
%49 = OpFunctionParameter %5
%51 = OpLabel
%52 = OpCompositeExtract %5 %48 0
%53 = OpCompositeExtract %5 %48 1
%54 = OpShiftRightLogical %5 %52 %46
%55 = OpBitwiseAnd %5 %53 %58
%56 = OpCompositeConstruct %6 %54 %55
%57 = OpCompositeConstruct %6 %49 %49
%61 = OpVectorShuffle %6 %56 %56 1 0
%62 = OpShiftRightLogical %6 %56 %60
%63 = OpBitwiseXor %6 %62 %61
%64 = OpIMul %6 %63 %57
%65 = OpVectorShuffle %6 %64 %64 1 0
%66 = OpShiftRightLogical %6 %64 %60
%67 = OpBitwiseXor %6 %66 %65
%68 = OpIMul %6 %67 %57
%69 = OpVectorShuffle %6 %68 %68 1 0
%70 = OpShiftRightLogical %6 %68 %60
%71 = OpBitwiseXor %6 %70 %69
%72 = OpIMul %6 %71 %57
%73 = OpVectorShuffle %6 %72 %72 1 0
%74 = OpShiftRightLogical %6 %72 %60
%75 = OpBitwiseXor %6 %74 %73
%76 = OpIMul %6 %75 %57
%77 = OpVectorShuffle %6 %76 %76 1 0
%78 = OpShiftRightLogical %6 %76 %60
%79 = OpBitwiseXor %6 %78 %77
%80 = OpIMul %6 %79 %57
%81 = OpVectorShuffle %6 %80 %80 1 0
%82 = OpShiftRightLogical %6 %80 %60
%83 = OpBitwiseXor %6 %82 %81
%84 = OpIMul %6 %83 %57
%85 = OpCompositeExtract %5 %84 0
OpReturnValue %85
OpFunctionEnd
%104 = OpFunction %88 None %97
%98 = OpFunctionParameter %6
%99 = OpFunctionParameter %5
%100 = OpFunctionParameter %5
%101 = OpFunctionParameter %5
%102 = OpFunctionParameter %5
%103 = OpFunctionParameter %88
%105 = OpLabel
OpSelectionMerge %107 None
OpBranchConditional %103 %107 %106
%106 = OpLabel
OpReturnValue %108
%107 = OpLabel
%110 = OpCompositeExtract %5 %98 0
%111 = OpCompositeExtract %5 %98 1
%113 = OpIAddCarry %112 %110 %99
%114 = OpCompositeExtract %5 %113 0
%115 = OpCompositeExtract %5 %113 1
%116 = OpIAdd %5 %111 %115
%117 = OpCompositeConstruct %6 %114 %116
%118 = OpCompositeExtract %5 %117 0
%119 = OpBitFieldUExtract %5 %123 %30 %100
%120 = OpBitwiseAnd %5 %118 %124
%121 = OpShiftLeftLogical %5 %119 %120
%122 = OpBitwiseAnd %5 %121 %58
%125 = OpBitwiseAnd %5 %118 %133
%126 = OpIAdd %5 %125 %100
%127 = OpIAdd %5 %126 %133
%128 = OpShiftRightLogical %5 %127 %37
%129 = OpBitFieldUExtract %5 %118 %37 %37
%130 = OpBitFieldUExtract %5 %123 %30 %128
%131 = OpShiftLeftLogical %5 %130 %129
%132 = OpBitwiseAnd %5 %131 %124
%134 = OpArrayLength %5 %92 0
%135 = OpExtInst %5 %137 FindUMsb %134
%136 = OpBitFieldUExtract %5 %123 %30 %135
%138 = OpArrayLength %5 %92 0
%139 = OpExtInst %5 %137 FindUMsb %138
%140 = OpBitFieldUExtract %5 %138 %30 %139
%141 = OpISub %5 %140 %10
%142 = OpFunctionCall %5 %50 %117 %143
%144 = OpBitwiseAnd %5 %142 %136
%145 = OpIAdd %5 %144 %141
%146 = OpFunctionCall %5 %50 %117 %147
%148 = OpBitwiseAnd %5 %146 %136
%149 = OpIAdd %5 %148 %141
%150 = OpFunctionCall %5 %50 %117 %151
%152 = OpBitwiseAnd %5 %150 %136
%153 = OpIAdd %5 %152 %141
%154 = OpFunctionCall %5 %50 %117 %32
%155 = OpBitwiseAnd %5 %154 %136
%156 = OpIAdd %5 %155 %141
%157 = OpFunctionCall %5 %50 %117 %158
%159 = OpBitwiseAnd %5 %157 %136
%160 = OpIAdd %5 %159 %141
%161 = OpFunctionCall %5 %50 %117 %162
%163 = OpBitwiseAnd %5 %161 %136
%164 = OpIAdd %5 %163 %141
%165 = OpFunctionCall %5 %50 %117 %166
%167 = OpBitwiseAnd %5 %165 %136
%168 = OpIAdd %5 %167 %141
%169 = OpFunctionCall %5 %50 %117 %170
%171 = OpBitwiseAnd %5 %169 %136
%172 = OpIAdd %5 %171 %141
%173 = OpFunctionCall %5 %50 %117 %174
%175 = OpBitwiseAnd %5 %173 %136
%176 = OpIAdd %5 %175 %141
%177 = OpFunctionCall %5 %50 %117 %178
%179 = OpBitwiseAnd %5 %177 %136
%180 = OpIAdd %5 %179 %141
%181 = OpFunctionCall %5 %50 %117 %182
%183 = OpBitwiseAnd %5 %181 %136
%184 = OpIAdd %5 %183 %141
%185 = OpFunctionCall %5 %50 %117 %186
%187 = OpBitwiseAnd %5 %185 %136
%188 = OpIAdd %5 %187 %141
%189 = OpFunctionCall %5 %50 %117 %190
%191 = OpBitwiseAnd %5 %189 %136
%192 = OpIAdd %5 %191 %141
%193 = OpFunctionCall %5 %50 %117 %194
%195 = OpBitwiseAnd %5 %193 %136
%196 = OpIAdd %5 %195 %141
%197 = OpFunctionCall %5 %50 %117 %198
%199 = OpBitwiseAnd %5 %197 %136
%200 = OpIAdd %5 %199 %141
%201 = OpFunctionCall %5 %50 %117 %202
%203 = OpBitwiseAnd %5 %201 %136
%204 = OpIAdd %5 %203 %141
%210 = OpVectorExtractDynamic %87 %209 %101
%211 = OpShiftLeftLogical %5 %122 %213
%212 = OpShiftLeftLogical %5 %132 %46
%214 = OpBitwiseOr %5 %122 %211
%215 = OpBitwiseOr %5 %132 %212
%216 = OpCompositeConstruct %6 %214 %215
%217 = OpBitcast %87 %216
%218 = OpBitwiseAnd %87 %210 %217
%220 = OpInBoundsAccessChain %219 %92 %30 %145
%221 = OpAtomicOr %87 %220 %10 %30 %218
%222 = OpInBoundsAccessChain %219 %92 %30 %149
%223 = OpAtomicOr %87 %222 %10 %30 %218
%224 = OpBitwiseAnd %87 %221 %223
%225 = OpInBoundsAccessChain %219 %92 %30 %153
%226 = OpAtomicOr %87 %225 %10 %30 %218
%227 = OpBitwiseAnd %87 %224 %226
%228 = OpInBoundsAccessChain %219 %92 %30 %156
%229 = OpAtomicOr %87 %228 %10 %30 %218
%230 = OpBitwiseAnd %87 %227 %229
%231 = OpInBoundsAccessChain %219 %92 %30 %160
%232 = OpAtomicOr %87 %231 %10 %30 %218
%233 = OpBitwiseAnd %87 %230 %232
%234 = OpInBoundsAccessChain %219 %92 %30 %164
%235 = OpAtomicOr %87 %234 %10 %30 %218
%236 = OpBitwiseAnd %87 %233 %235
%237 = OpInBoundsAccessChain %219 %92 %30 %168
%238 = OpAtomicOr %87 %237 %10 %30 %218
%239 = OpBitwiseAnd %87 %236 %238
%240 = OpInBoundsAccessChain %219 %92 %30 %172
%241 = OpAtomicOr %87 %240 %10 %30 %218
%242 = OpBitwiseAnd %87 %239 %241
%243 = OpInBoundsAccessChain %219 %92 %30 %176
%244 = OpAtomicOr %87 %243 %10 %30 %218
%245 = OpBitwiseAnd %87 %242 %244
%246 = OpInBoundsAccessChain %219 %92 %30 %180
%247 = OpAtomicOr %87 %246 %10 %30 %218
%248 = OpBitwiseAnd %87 %245 %247
%249 = OpInBoundsAccessChain %219 %92 %30 %184
%250 = OpAtomicOr %87 %249 %10 %30 %218
%251 = OpBitwiseAnd %87 %248 %250
%252 = OpInBoundsAccessChain %219 %92 %30 %188
%253 = OpAtomicOr %87 %252 %10 %30 %218
%254 = OpBitwiseAnd %87 %251 %253
%255 = OpInBoundsAccessChain %219 %92 %30 %192
%256 = OpAtomicOr %87 %255 %10 %30 %218
%257 = OpBitwiseAnd %87 %254 %256
%258 = OpInBoundsAccessChain %219 %92 %30 %196
%259 = OpAtomicOr %87 %258 %10 %30 %218
%260 = OpBitwiseAnd %87 %257 %259
%261 = OpInBoundsAccessChain %219 %92 %30 %200
%262 = OpAtomicOr %87 %261 %10 %30 %218
%263 = OpBitwiseAnd %87 %260 %262
%264 = OpInBoundsAccessChain %219 %92 %30 %204
%265 = OpAtomicOr %87 %264 %10 %30 %218
%266 = OpBitwiseAnd %87 %263 %265
%267 = OpBitcast %6 %266
%268 = OpCompositeExtract %5 %267 0
%269 = OpCompositeExtract %5 %267 1
%270 = OpShiftRightLogical %5 %268 %213
%271 = OpBitwiseAnd %5 %270 %122
%272 = OpIEqual %88 %271 %30
%273 = OpBitFieldUExtract %5 %102 %275 %276
%277 = OpShiftLeftLogical %5 %278 %273
%279 = OpBitFieldUExtract %5 %102 %276 %276
%280 = OpShiftLeftLogical %5 %281 %279
%282 = OpBitwiseOr %5 %277 %280
%283 = OpBitFieldUExtract %5 %102 %284 %276
%285 = OpShiftLeftLogical %5 %286 %283
%287 = OpBitwiseOr %5 %282 %285
%288 = OpSelect %5 %272 %287 %30
%289 = OpInBoundsAccessChain %28 %96 %30 %145 %10
%290 = OpAtomicOr %5 %289 %10 %30 %288
%291 = OpBitwiseAnd %5 %290 %287
%292 = OpIEqual %88 %291 %287
%293 = OpBitFieldUExtract %5 %102 %294 %276
%295 = OpShiftLeftLogical %5 %278 %293
%296 = OpBitFieldUExtract %5 %102 %297 %276
%298 = OpShiftLeftLogical %5 %281 %296
%299 = OpBitwiseOr %5 %295 %298
%300 = OpBitFieldUExtract %5 %102 %301 %276
%302 = OpShiftLeftLogical %5 %286 %300
%303 = OpBitwiseOr %5 %299 %302
%304 = OpSelect %5 %272 %303 %30
%305 = OpInBoundsAccessChain %28 %96 %30 %149 %10
%306 = OpAtomicOr %5 %305 %10 %30 %304
%307 = OpBitwiseAnd %5 %306 %303
%308 = OpIEqual %88 %307 %303
%309 = OpLogicalAnd %88 %292 %308
%310 = OpBitFieldUExtract %5 %102 %311 %276
%312 = OpShiftLeftLogical %5 %278 %310
%313 = OpBitFieldUExtract %5 %102 %314 %276
%315 = OpShiftLeftLogical %5 %281 %313
%316 = OpBitwiseOr %5 %312 %315
%317 = OpBitFieldUExtract %5 %102 %318 %276
%319 = OpShiftLeftLogical %5 %286 %317
%320 = OpBitwiseOr %5 %316 %319
%321 = OpSelect %5 %272 %320 %30
%322 = OpInBoundsAccessChain %28 %96 %30 %153 %10
%323 = OpAtomicOr %5 %322 %10 %30 %321
%324 = OpBitwiseAnd %5 %323 %320
%325 = OpIEqual %88 %324 %320
%326 = OpLogicalAnd %88 %309 %325
%327 = OpBitFieldUExtract %5 %102 %328 %276
%329 = OpShiftLeftLogical %5 %278 %327
%330 = OpBitFieldUExtract %5 %102 %331 %276
%332 = OpShiftLeftLogical %5 %281 %330
%333 = OpBitwiseOr %5 %329 %332
%334 = OpBitFieldUExtract %5 %102 %335 %276
%336 = OpShiftLeftLogical %5 %286 %334
%337 = OpBitwiseOr %5 %333 %336
%338 = OpSelect %5 %272 %337 %30
%339 = OpInBoundsAccessChain %28 %96 %30 %156 %10
%340 = OpAtomicOr %5 %339 %10 %30 %338
%341 = OpBitwiseAnd %5 %340 %337
%342 = OpIEqual %88 %341 %337
%343 = OpLogicalAnd %88 %326 %342
%344 = OpBitFieldUExtract %5 %102 %345 %276
%346 = OpShiftLeftLogical %5 %278 %344
%347 = OpBitFieldUExtract %5 %102 %348 %276
%349 = OpShiftLeftLogical %5 %281 %347
%350 = OpBitwiseOr %5 %346 %349
%351 = OpBitFieldUExtract %5 %102 %352 %276
%353 = OpShiftLeftLogical %5 %286 %351
%354 = OpBitwiseOr %5 %350 %353
%355 = OpSelect %5 %272 %354 %30
%356 = OpInBoundsAccessChain %28 %96 %30 %160 %10
%357 = OpAtomicOr %5 %356 %10 %30 %355
%358 = OpBitwiseAnd %5 %357 %354
%359 = OpIEqual %88 %358 %354
%360 = OpLogicalAnd %88 %343 %359
%361 = OpBitFieldUExtract %5 %102 %294 %276
%362 = OpShiftLeftLogical %5 %278 %361
%363 = OpBitFieldUExtract %5 %102 %297 %276
%364 = OpShiftLeftLogical %5 %281 %363
%365 = OpBitwiseOr %5 %362 %364
%366 = OpBitFieldUExtract %5 %102 %301 %276
%367 = OpShiftLeftLogical %5 %286 %366
%368 = OpBitwiseOr %5 %365 %367
%369 = OpSelect %5 %272 %368 %30
%370 = OpInBoundsAccessChain %28 %96 %30 %164 %10
%371 = OpAtomicOr %5 %370 %10 %30 %369
%372 = OpBitwiseAnd %5 %371 %368
%373 = OpIEqual %88 %372 %368
%374 = OpLogicalAnd %88 %360 %373
%375 = OpBitFieldUExtract %5 %102 %376 %276
%377 = OpShiftLeftLogical %5 %278 %375
%378 = OpBitFieldUExtract %5 %102 %379 %276
%380 = OpShiftLeftLogical %5 %281 %378
%381 = OpBitwiseOr %5 %377 %380
%382 = OpBitFieldUExtract %5 %102 %328 %276
%383 = OpShiftLeftLogical %5 %286 %382
%384 = OpBitwiseOr %5 %381 %383
%385 = OpSelect %5 %272 %384 %30
%386 = OpInBoundsAccessChain %28 %96 %30 %168 %10
%387 = OpAtomicOr %5 %386 %10 %30 %385
%388 = OpBitwiseAnd %5 %387 %384
%389 = OpIEqual %88 %388 %384
%390 = OpLogicalAnd %88 %374 %389
%391 = OpBitFieldUExtract %5 %102 %392 %276
%393 = OpShiftLeftLogical %5 %278 %391
%394 = OpBitFieldUExtract %5 %102 %395 %276
%396 = OpShiftLeftLogical %5 %281 %394
%397 = OpBitwiseOr %5 %393 %396
%398 = OpBitFieldUExtract %5 %102 %399 %276
%400 = OpShiftLeftLogical %5 %286 %398
%401 = OpBitwiseOr %5 %397 %400
%402 = OpSelect %5 %272 %401 %30
%403 = OpInBoundsAccessChain %28 %96 %30 %172 %10
%404 = OpAtomicOr %5 %403 %10 %30 %402
%405 = OpBitwiseAnd %5 %404 %401
%406 = OpIEqual %88 %405 %401
%407 = OpLogicalAnd %88 %390 %406
OpSelectionMerge %412 None
OpSwitch %101 %411 0 %408 1 %409 2 %410
%411 = OpLabel
%420 = OpShiftLeftLogical %5 %132 %46
%421 = OpBitwiseAnd %5 %269 %420
%422 = OpINotEqual %88 %421 %30
OpBranch %412
%408 = OpLabel
%413 = OpBitwiseAnd %5 %268 %122
%414 = OpINotEqual %88 %413 %30
OpBranch %412
%409 = OpLabel
%415 = OpShiftLeftLogical %5 %122 %213
%416 = OpBitwiseAnd %5 %268 %415
%417 = OpINotEqual %88 %416 %30
OpBranch %412
%410 = OpLabel
%418 = OpBitwiseAnd %5 %269 %132
%419 = OpINotEqual %88 %418 %30
OpBranch %412
%412 = OpLabel
%423 = OpPhi %88 %414 %408 %417 %409 %419 %410 %422 %411
%424 = OpLogicalNot %88 %423
%425 = OpLogicalOr %88 %424 %407
OpReturnValue %425
OpFunctionEnd
%458 = OpFunction %1 None %455
%456 = OpFunctionParameter %88
%457 = OpFunctionParameter %5
%459 = OpLabel
%462 = OpLogicalNot %88 %456
%463 = OpLoad %88 %445
%464 = OpLogicalAnd %88 %462 %463
OpSelectionMerge %460 None
OpBranchConditional %464 %461 %460
%461 = OpLabel
%467 = OpIMul %5 %457 %469
%468 = OpBitwiseXor %5 %467 %470
%471 = OpArrayLength %5 %450 0
%472 = OpISub %5 %471 %37
%473 = OpAccessChain %28 %450 %30 %472
%474 = OpLoad %5 %473
%475 = OpIMul %5 %474 %476
%477 = OpBitwiseXor %5 %468 %475
%478 = OpArrayLength %5 %454 0
%479 = OpISub %5 %478 %10
%480 = OpBitwiseAnd %5 %477 %479
%481 = OpShiftRightLogical %5 %480 %46
%482 = OpBitwiseAnd %5 %480 %124
%483 = OpShiftLeftLogical %5 %10 %482
%484 = OpAccessChain %28 %450 %30 %481
%485 = OpAtomicOr %5 %484 %10 %30 %483
%486 = OpBitwiseAnd %5 %485 %483
%487 = OpIEqual %88 %486 %30
OpStore %445 %488
OpSelectionMerge %466 None
OpBranchConditional %487 %465 %466
%465 = OpLabel
%489 = OpCompositeConstruct %446 %470 %30 %457 %474
%490 = OpShiftLeftLogical %5 %483 %213
%492 = OpAccessChain %491 %454 %30 %480
OpStore %492 %489
OpMemoryBarrier %10 %494
%493 = OpAtomicOr %5 %484 %10 %30 %490
OpMemoryBarrier %10 %494
OpBranch %466
%466 = OpLabel
OpBranch %460
%460 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/atomics-structured-counter.bindless.bda-instrumentation.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 4, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _23[];

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _27;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

uint InvocationID;

uint AllocateInvocationID()
{
    uint _40 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _40;
}

uint RobustPhysicalAtomicCounter(uvec2 _74, uint _75, uint _76)
{
    uint _90;
    if (any(notEqual(_74, uvec2(0u))))
    {
        uint _88 = atomicAdd(uintPointer(_74).value, _75);
        _90 = _88 + _76;
    }
    else
    {
        _90 = 0u;
    }
    return _90;
}

void main()
{
    uint _43 = AllocateInvocationID();
    InvocationID = _43;
    uvec2 _61 = _27.counters[registers._m4];
    uint _69 = atomicAdd(_23[registers._m4]._m0[gl_GlobalInvocationID.x], 1u);
    uint _71 = atomicCompSwap(_23[registers._m4]._m0[gl_GlobalInvocationID.x], 1u, 2u);
    uint _92 = RobustPhysicalAtomicCounter(_61, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %64
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "SSBO"
OpName %25 "AtomicCounters"
OpMemberName %25 0 "counters"
OpName %29 "BloomBufferInvocationSSBO"
OpMemberName %29 0 "atomics"
OpName %31 "BloomBufferInvocation"
OpName %33 "AllocateInvocationID"
OpName %77 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %23 DescriptorSet 4
OpDecorate %23 Binding 0
OpDecorate %24 ArrayStride 8
OpDecorate %25 Block
OpMemberDecorate %25 0 Offset 0
OpMemberDecorate %25 0 NonWritable
OpDecorate %27 DescriptorSet 7
OpDecorate %27 Binding 0
OpDecorate %27 AliasedPointer
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 2
OpDecorate %64 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeRuntimeArray %9
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeRuntimeArray %5
%29 = OpTypeStruct %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeFunction %5
%37 = OpTypePointer StorageBuffer %5
%39 = OpConstant %5 0
%41 = OpConstant %5 1103633207
%44 = OpTypePointer StorageBuffer %20
%46 = OpTypePointer PushConstant %5
%48 = OpConstant %5 4
%50 = OpTypePointer StorageBuffer %9
%55 = OpConstant %5 16
%62 = OpTypeVector %5 3
%63 = OpTypePointer Input %62
%64 = OpVariable %63 Input
%65 = OpTypePointer Input %5
%72 = OpConstant %5 2
%73 = OpTypeFunction %5 %9 %5 %5
%81 = OpTypeBool
%82 = OpTypeVector %81 2
%83 = OpConstantNull %9
%86 = OpTypePointer PhysicalStorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %93
%93 = OpLabel
%43 = OpFunctionCall %5 %33
OpStore %18 %43
%47 = OpAccessChain %46 %8 %48
%49 = OpLoad %5 %47
%51 = OpAccessChain %50 %16 %39 %49 %39 %39
%52 = OpLoad %9 %51
%53 = OpCompositeExtract %5 %52 1
%54 = OpShiftRightLogical %5 %53 %55
%45 = OpAccessChain %44 %23 %49
%56 = OpArrayLength %5 %45 0
%57 = OpIMul %5 %56 %48
%59 = OpAccessChain %46 %8 %48
%60 = OpLoad %5 %59
%58 = OpAccessChain %50 %27 %39 %60
%61 = OpLoad %9 %58
%66 = OpAccessChain %65 %64 %39
%67 = OpLoad %5 %66
%68 = OpAccessChain %37 %45 %39 %67
%69 = OpAtomicIAdd %5 %68 %10 %39 %10
%70 = OpAccessChain %37 %45 %39 %67
%71 = OpAtomicCompareExchange %5 %70 %10 %39 %39 %72 %10
%92 = OpFunctionCall %5 %77 %61 %10 %39
OpReturn
OpFunctionEnd
%33 = OpFunction %5 None %32
%34 = OpLabel
%35 = OpArrayLength %5 %31 0
%36 = OpISub %5 %35 %10
%38 = OpAccessChain %37 %31 %39 %36
%40 = OpAtomicIAdd %5 %38 %10 %39 %41
OpReturnValue %40
OpFunctionEnd
%77 = OpFunction %5 None %73
%74 = OpFunctionParameter %9
%75 = OpFunctionParameter %5
%76 = OpFunctionParameter %5
%78 = OpLabel
%84 = OpINotEqual %82 %74 %83
%85 = OpAny %81 %84
OpSelectionMerge %80 None
OpBranchConditional %85 %79 %80
%79 = OpLabel
%87 = OpBitcast %86 %74
%88 = OpAtomicIAdd %5 %87 %10 %39 %75
%89 = OpIAdd %5 %88 %76
OpBranch %80
%80 = OpLabel
%90 = OpPhi %5 %39 %78 %89 %79
OpReturnValue %90
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/atomics-structured.bindless.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 4, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _23[];

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _36 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _36;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _71 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _72 = uvec2(prime);
    uvec2 _79 = ((_71 >> uvec2(8u)) ^ _71.yx) * _72;
    uvec2 _83 = ((_79 >> uvec2(8u)) ^ _79.yx) * _72;
    uvec2 _87 = ((_83 >> uvec2(8u)) ^ _83.yx) * _72;
    uvec2 _91 = ((_87 >> uvec2(8u)) ^ _87.yx) * _72;
    uvec2 _95 = ((_91 >> uvec2(8u)) ^ _91.yx) * _72;
    return (((_95 >> uvec2(8u)) ^ _95.yx) * _72).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _128;
    _128._m0 = uaddCarry(BDA.x, offset, _128._m1);
    uvec2 addr = uvec2(_128._m0, BDA.y + _128._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _483 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _485 = InstrumentationControlData.atomics[_483];
        uint _491 = (((inst * 97u) ^ 43981u) ^ (_485 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _492 = _491 >> 4u;
        uint _494 = 1u << (_491 & 15u);
        uint _496 = atomicOr(InstrumentationControlData.atomics[_492], _494);
        ShouldReportInstrumentation = false;
        if ((_496 & _494) == 0u)
        {
            InstrumentationData.data[_491] = uvec4(43981u, 0u, inst, _485);
            memoryBarrierBuffer();
            uint _504 = atomicOr(InstrumentationControlData.atomics[_492], _494 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _39 = AllocateInvocationID();
    InvocationID = _39;
    uvec2 _48 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _53 = uint(_23[registers._m4]._m0.length()) * 4u;
    uint _61 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _443 = InvocationID;
    bool _442 = ValidateBDALoadStore(_48, _61, 4u, 2u, _443, _61 < _53);
    AssumeTrue(_442, 1u);
    uint _446 = atomicAdd(_23[registers._m4]._m0[gl_GlobalInvocationID.x], 1u);
    uint _448 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _450 = InvocationID;
    bool _449 = ValidateBDALoadStore(_48, _448, 4u, 2u, _450, _448 < _53);
    AssumeTrue(_449, 2u);
    uint _453 = atomicCompSwap(_23[registers._m4]._m0[gl_GlobalInvocationID.x], 1u, 2u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 510
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%153 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %56
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "SSBO"
OpName %25 "BloomBufferInvocationSSBO"
OpMemberName %25 0 "atomics"
OpName %27 "BloomBufferInvocation"
OpName %29 "AllocateInvocationID"
OpName %65 "AddrHash"
OpName %63 "addr"
OpName %64 "prime"
OpName %105 "BloomBufferSSBO"
OpMemberName %105 0 "atomics"
OpName %107 "BloomBuffer"
OpName %109 "BloomBuffer32SSBO"
OpMemberName %109 0 "atomics"
OpName %111 "BloomBuffer32"
OpName %119 "ValidateBDALoadStore"
OpName %113 "BDA"
OpName %114 "offset"
OpName %115 "len"
OpName %116 "type"
OpName %117 "invocation_id"
OpName %118 "in_bounds"
OpName %127 "IAddCarryResult"
OpName %132 "addr"
OpName %133 "addr_lo"
OpName %137 "byte_mask"
OpName %147 "word_mask"
OpName %152 "hash_mask"
OpName %157 "hash_offset"
OpName %161 "bloom_index"
OpName %165 "bloom_index"
OpName %169 "bloom_index"
OpName %172 "bloom_index"
OpName %176 "bloom_index"
OpName %180 "bloom_index"
OpName %184 "bloom_index"
OpName %188 "bloom_index"
OpName %192 "bloom_index"
OpName %196 "bloom_index"
OpName %200 "bloom_index"
OpName %204 "bloom_index"
OpName %208 "bloom_index"
OpName %212 "bloom_index"
OpName %216 "bloom_index"
OpName %220 "bloom_index"
OpName %233 "invalidation_mask"
OpName %236 "prev_hazard_partial"
OpName %238 "prev_hazard_partial"
OpName %241 "prev_hazard_partial"
OpName %244 "prev_hazard_partial"
OpName %247 "prev_hazard_partial"
OpName %250 "prev_hazard_partial"
OpName %253 "prev_hazard_partial"
OpName %256 "prev_hazard_partial"
OpName %259 "prev_hazard_partial"
OpName %262 "prev_hazard_partial"
OpName %265 "prev_hazard_partial"
OpName %268 "prev_hazard_partial"
OpName %271 "prev_hazard_partial"
OpName %274 "prev_hazard_partial"
OpName %277 "prev_hazard_partial"
OpName %280 "prev_hazard_partial"
OpName %282 "prev_hazard"
OpName %283 "prev_hazard_lo"
OpName %284 "prev_hazard_hi"
OpName %287 "has_exclusive_access"
OpName %302 "lock_mask"
OpName %305 "prev_lock"
OpName %318 "lock_mask"
OpName %321 "prev_lock"
OpName %335 "lock_mask"
OpName %338 "prev_lock"
OpName %352 "lock_mask"
OpName %355 "prev_lock"
OpName %369 "lock_mask"
OpName %372 "prev_lock"
OpName %383 "lock_mask"
OpName %386 "prev_lock"
OpName %399 "lock_mask"
OpName %402 "prev_lock"
OpName %416 "lock_mask"
OpName %419 "prev_lock"
OpName %422 "has_complete_self_lock"
OpName %438 "hazard"
OpName %456 "ShouldReportInstrumentation"
OpName %459 "InstrumentationControlDataSSBO"
OpMemberName %459 0 "atomics"
OpName %461 "InstrumentationControlData"
OpName %463 "InstrumentationDataSSBO"
OpMemberName %463 0 "data"
OpName %465 "InstrumentationData"
OpName %469 "AssumeTrue"
OpName %467 "value"
OpName %468 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %23 DescriptorSet 4
OpDecorate %23 Binding 0
OpDecorate %24 ArrayStride 4
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 2
OpDecorate %56 BuiltIn GlobalInvocationId
OpDecorate %104 ArrayStride 8
OpMemberDecorate %105 0 Offset 0
OpDecorate %105 Block
OpDecorate %107 DescriptorSet 0
OpDecorate %107 Binding 2
OpDecorate %108 ArrayStride 8
OpMemberDecorate %109 0 Offset 0
OpDecorate %109 Block
OpDecorate %111 DescriptorSet 0
OpDecorate %111 Binding 2
OpDecorate %458 ArrayStride 4
OpMemberDecorate %459 0 Offset 0
OpDecorate %459 Block
OpDecorate %461 DescriptorSet 0
OpDecorate %461 Binding 2
OpDecorate %462 ArrayStride 16
OpMemberDecorate %463 0 Offset 0
OpDecorate %463 Block
OpDecorate %465 DescriptorSet 0
OpDecorate %465 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeRuntimeArray %5
%25 = OpTypeStruct %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeFunction %5
%33 = OpTypePointer StorageBuffer %5
%35 = OpConstant %5 0
%37 = OpConstant %5 1103633207
%40 = OpTypePointer StorageBuffer %20
%42 = OpTypePointer PushConstant %5
%44 = OpConstant %5 4
%46 = OpTypePointer StorageBuffer %9
%51 = OpConstant %5 16
%54 = OpTypeVector %5 3
%55 = OpTypePointer Input %54
%56 = OpVariable %55 Input
%57 = OpTypePointer Input %5
%62 = OpTypeFunction %5 %9 %5
%73 = OpConstant %5 65535
%74 = OpConstant %5 8
%75 = OpConstantComposite %9 %74 %74
%102 = OpTypeInt 64 0
%103 = OpTypeBool
%104 = OpTypeRuntimeArray %102
%105 = OpTypeStruct %104
%106 = OpTypePointer StorageBuffer %105
%107 = OpVariable %106 StorageBuffer
%108 = OpTypeRuntimeArray %9
%109 = OpTypeStruct %108
%110 = OpTypePointer StorageBuffer %109
%111 = OpVariable %110 StorageBuffer
%112 = OpTypeFunction %103 %9 %5 %5 %5 %5 %103
%123 = OpConstantTrue %103
%127 = OpTypeStruct %5 %5
%138 = OpConstant %5 4294967295
%139 = OpConstant %5 15
%148 = OpConstant %5 3
%149 = OpConstant %5 2
%159 = OpConstant %5 1103515245
%163 = OpConstant %5 1103518333
%167 = OpConstant %5 1103539331
%174 = OpConstant %5 10006121
%178 = OpConstant %5 4004951
%182 = OpConstant %5 5005159
%186 = OpConstant %5 6004811
%190 = OpConstant %5 383
%194 = OpConstant %5 821
%198 = OpConstant %5 661
%202 = OpConstant %5 1091
%206 = OpConstant %5 1117
%210 = OpConstant %5 3947
%214 = OpConstant %5 4253
%218 = OpConstant %5 7691
%221 = OpTypeVector %102 4
%222 = OpConstant %102 68719411200
%223 = OpConstant %102 1099511627775
%224 = OpConstant %102 1035087118335
%225 = OpConstantComposite %221 %222 %223 %224 %222
%234 = OpTypePointer StorageBuffer %102
%289 = OpTypeInt 32 1
%290 = OpConstant %289 0
%291 = OpConstant %289 3
%293 = OpConstant %5 256
%296 = OpConstant %5 65536
%299 = OpConstant %289 6
%301 = OpConstant %5 16777216
%309 = OpConstant %289 9
%312 = OpConstant %289 12
%316 = OpConstant %289 15
%326 = OpConstant %289 18
%329 = OpConstant %289 21
%333 = OpConstant %289 24
%343 = OpConstant %289 23
%346 = OpConstant %289 26
%350 = OpConstant %289 29
%360 = OpConstant %289 1
%363 = OpConstant %289 4
%367 = OpConstant %289 7
%391 = OpConstant %289 17
%394 = OpConstant %289 20
%407 = OpConstant %289 22
%410 = OpConstant %289 25
%414 = OpConstant %289 28
%455 = OpTypePointer Private %103
%456 = OpVariable %455 Private %123
%457 = OpTypeVector %5 4
%458 = OpTypeRuntimeArray %5
%459 = OpTypeStruct %458
%460 = OpTypePointer StorageBuffer %459
%461 = OpVariable %460 StorageBuffer
%462 = OpTypeRuntimeArray %457
%463 = OpTypeStruct %462
%464 = OpTypePointer StorageBuffer %463
%465 = OpVariable %464 StorageBuffer
%466 = OpTypeFunction %1 %103 %5
%480 = OpConstant %5 97
%481 = OpConstant %5 43981
%487 = OpConstant %5 51329
%499 = OpConstantFalse %103
%502 = OpTypePointer StorageBuffer %457
%505 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %454
%454 = OpLabel
%39 = OpFunctionCall %5 %29
OpStore %18 %39
%43 = OpAccessChain %42 %8 %44
%45 = OpLoad %5 %43
%47 = OpAccessChain %46 %16 %35 %45 %35 %35
%48 = OpLoad %9 %47
%49 = OpCompositeExtract %5 %48 1
%50 = OpShiftRightLogical %5 %49 %51
%41 = OpAccessChain %40 %23 %45
%52 = OpArrayLength %5 %41 0
%53 = OpIMul %5 %52 %44
%58 = OpAccessChain %57 %56 %35
%59 = OpLoad %5 %58
%60 = OpIMul %5 %59 %44
%61 = OpIAdd %5 %60 %35
%443 = OpLoad %5 %18
%444 = OpULessThan %103 %61 %53
%442 = OpFunctionCall %103 %119 %48 %61 %44 %149 %443 %444
%507 = OpFunctionCall %1 %469 %442 %10
%445 = OpAccessChain %33 %41 %35 %59
%446 = OpAtomicIAdd %5 %445 %10 %35 %10
%447 = OpIMul %5 %59 %44
%448 = OpIAdd %5 %447 %35
%450 = OpLoad %5 %18
%451 = OpULessThan %103 %448 %53
%449 = OpFunctionCall %103 %119 %48 %448 %44 %149 %450 %451
%508 = OpFunctionCall %1 %469 %449 %149
%452 = OpAccessChain %33 %41 %35 %59
%453 = OpAtomicCompareExchange %5 %452 %10 %35 %35 %149 %10
OpReturn
OpFunctionEnd
%29 = OpFunction %5 None %28
%30 = OpLabel
%31 = OpArrayLength %5 %27 0
%32 = OpISub %5 %31 %10
%34 = OpAccessChain %33 %27 %35 %32
%36 = OpAtomicIAdd %5 %34 %10 %35 %37
OpReturnValue %36
OpFunctionEnd
%65 = OpFunction %5 None %62
%63 = OpFunctionParameter %9
%64 = OpFunctionParameter %5
%66 = OpLabel
%67 = OpCompositeExtract %5 %63 0
%68 = OpCompositeExtract %5 %63 1
%69 = OpShiftRightLogical %5 %67 %44
%70 = OpBitwiseAnd %5 %68 %73
%71 = OpCompositeConstruct %9 %69 %70
%72 = OpCompositeConstruct %9 %64 %64
%76 = OpVectorShuffle %9 %71 %71 1 0
%77 = OpShiftRightLogical %9 %71 %75
%78 = OpBitwiseXor %9 %77 %76
%79 = OpIMul %9 %78 %72
%80 = OpVectorShuffle %9 %79 %79 1 0
%81 = OpShiftRightLogical %9 %79 %75
%82 = OpBitwiseXor %9 %81 %80
%83 = OpIMul %9 %82 %72
%84 = OpVectorShuffle %9 %83 %83 1 0
%85 = OpShiftRightLogical %9 %83 %75
%86 = OpBitwiseXor %9 %85 %84
%87 = OpIMul %9 %86 %72
%88 = OpVectorShuffle %9 %87 %87 1 0
%89 = OpShiftRightLogical %9 %87 %75
%90 = OpBitwiseXor %9 %89 %88
%91 = OpIMul %9 %90 %72
%92 = OpVectorShuffle %9 %91 %91 1 0
%93 = OpShiftRightLogical %9 %91 %75
%94 = OpBitwiseXor %9 %93 %92
%95 = OpIMul %9 %94 %72
%96 = OpVectorShuffle %9 %95 %95 1 0
%97 = OpShiftRightLogical %9 %95 %75
%98 = OpBitwiseXor %9 %97 %96
%99 = OpIMul %9 %98 %72
%100 = OpCompositeExtract %5 %99 0
OpReturnValue %100
OpFunctionEnd
%119 = OpFunction %103 None %112
%113 = OpFunctionParameter %9
%114 = OpFunctionParameter %5
%115 = OpFunctionParameter %5
%116 = OpFunctionParameter %5
%117 = OpFunctionParameter %5
%118 = OpFunctionParameter %103
%120 = OpLabel
OpSelectionMerge %122 None
OpBranchConditional %118 %122 %121
%121 = OpLabel
OpReturnValue %123
%122 = OpLabel
%125 = OpCompositeExtract %5 %113 0
%126 = OpCompositeExtract %5 %113 1
%128 = OpIAddCarry %127 %125 %114
%129 = OpCompositeExtract %5 %128 0
%130 = OpCompositeExtract %5 %128 1
%131 = OpIAdd %5 %126 %130
%132 = OpCompositeConstruct %9 %129 %131
%133 = OpCompositeExtract %5 %132 0
%134 = OpBitFieldUExtract %5 %138 %35 %115
%135 = OpBitwiseAnd %5 %133 %139
%136 = OpShiftLeftLogical %5 %134 %135
%137 = OpBitwiseAnd %5 %136 %73
%140 = OpBitwiseAnd %5 %133 %148
%141 = OpIAdd %5 %140 %115
%142 = OpIAdd %5 %141 %148
%143 = OpShiftRightLogical %5 %142 %149
%144 = OpBitFieldUExtract %5 %133 %149 %149
%145 = OpBitFieldUExtract %5 %138 %35 %143
%146 = OpShiftLeftLogical %5 %145 %144
%147 = OpBitwiseAnd %5 %146 %139
%150 = OpArrayLength %5 %107 0
%151 = OpExtInst %5 %153 FindUMsb %150
%152 = OpBitFieldUExtract %5 %138 %35 %151
%154 = OpArrayLength %5 %107 0
%155 = OpExtInst %5 %153 FindUMsb %154
%156 = OpBitFieldUExtract %5 %154 %35 %155
%157 = OpISub %5 %156 %10
%158 = OpFunctionCall %5 %65 %132 %159
%160 = OpBitwiseAnd %5 %158 %152
%161 = OpIAdd %5 %160 %157
%162 = OpFunctionCall %5 %65 %132 %163
%164 = OpBitwiseAnd %5 %162 %152
%165 = OpIAdd %5 %164 %157
%166 = OpFunctionCall %5 %65 %132 %167
%168 = OpBitwiseAnd %5 %166 %152
%169 = OpIAdd %5 %168 %157
%170 = OpFunctionCall %5 %65 %132 %37
%171 = OpBitwiseAnd %5 %170 %152
%172 = OpIAdd %5 %171 %157
%173 = OpFunctionCall %5 %65 %132 %174
%175 = OpBitwiseAnd %5 %173 %152
%176 = OpIAdd %5 %175 %157
%177 = OpFunctionCall %5 %65 %132 %178
%179 = OpBitwiseAnd %5 %177 %152
%180 = OpIAdd %5 %179 %157
%181 = OpFunctionCall %5 %65 %132 %182
%183 = OpBitwiseAnd %5 %181 %152
%184 = OpIAdd %5 %183 %157
%185 = OpFunctionCall %5 %65 %132 %186
%187 = OpBitwiseAnd %5 %185 %152
%188 = OpIAdd %5 %187 %157
%189 = OpFunctionCall %5 %65 %132 %190
%191 = OpBitwiseAnd %5 %189 %152
%192 = OpIAdd %5 %191 %157
%193 = OpFunctionCall %5 %65 %132 %194
%195 = OpBitwiseAnd %5 %193 %152
%196 = OpIAdd %5 %195 %157
%197 = OpFunctionCall %5 %65 %132 %198
%199 = OpBitwiseAnd %5 %197 %152
%200 = OpIAdd %5 %199 %157
%201 = OpFunctionCall %5 %65 %132 %202
%203 = OpBitwiseAnd %5 %201 %152
%204 = OpIAdd %5 %203 %157
%205 = OpFunctionCall %5 %65 %132 %206
%207 = OpBitwiseAnd %5 %205 %152
%208 = OpIAdd %5 %207 %157
%209 = OpFunctionCall %5 %65 %132 %210
%211 = OpBitwiseAnd %5 %209 %152
%212 = OpIAdd %5 %211 %157
%213 = OpFunctionCall %5 %65 %132 %214
%215 = OpBitwiseAnd %5 %213 %152
%216 = OpIAdd %5 %215 %157
%217 = OpFunctionCall %5 %65 %132 %218
%219 = OpBitwiseAnd %5 %217 %152
%220 = OpIAdd %5 %219 %157
%226 = OpVectorExtractDynamic %102 %225 %116
%227 = OpShiftLeftLogical %5 %137 %51
%228 = OpShiftLeftLogical %5 %147 %44
%229 = OpBitwiseOr %5 %137 %227
%230 = OpBitwiseOr %5 %147 %228
%231 = OpCompositeConstruct %9 %229 %230
%232 = OpBitcast %102 %231
%233 = OpBitwiseAnd %102 %226 %232
%235 = OpInBoundsAccessChain %234 %107 %35 %161
%236 = OpAtomicOr %102 %235 %10 %35 %233
%237 = OpInBoundsAccessChain %234 %107 %35 %165
%238 = OpAtomicOr %102 %237 %10 %35 %233
%239 = OpBitwiseAnd %102 %236 %238
%240 = OpInBoundsAccessChain %234 %107 %35 %169
%241 = OpAtomicOr %102 %240 %10 %35 %233
%242 = OpBitwiseAnd %102 %239 %241
%243 = OpInBoundsAccessChain %234 %107 %35 %172
%244 = OpAtomicOr %102 %243 %10 %35 %233
%245 = OpBitwiseAnd %102 %242 %244
%246 = OpInBoundsAccessChain %234 %107 %35 %176
%247 = OpAtomicOr %102 %246 %10 %35 %233
%248 = OpBitwiseAnd %102 %245 %247
%249 = OpInBoundsAccessChain %234 %107 %35 %180
%250 = OpAtomicOr %102 %249 %10 %35 %233
%251 = OpBitwiseAnd %102 %248 %250
%252 = OpInBoundsAccessChain %234 %107 %35 %184
%253 = OpAtomicOr %102 %252 %10 %35 %233
%254 = OpBitwiseAnd %102 %251 %253
%255 = OpInBoundsAccessChain %234 %107 %35 %188
%256 = OpAtomicOr %102 %255 %10 %35 %233
%257 = OpBitwiseAnd %102 %254 %256
%258 = OpInBoundsAccessChain %234 %107 %35 %192
%259 = OpAtomicOr %102 %258 %10 %35 %233
%260 = OpBitwiseAnd %102 %257 %259
%261 = OpInBoundsAccessChain %234 %107 %35 %196
%262 = OpAtomicOr %102 %261 %10 %35 %233
%263 = OpBitwiseAnd %102 %260 %262
%264 = OpInBoundsAccessChain %234 %107 %35 %200
%265 = OpAtomicOr %102 %264 %10 %35 %233
%266 = OpBitwiseAnd %102 %263 %265
%267 = OpInBoundsAccessChain %234 %107 %35 %204
%268 = OpAtomicOr %102 %267 %10 %35 %233
%269 = OpBitwiseAnd %102 %266 %268
%270 = OpInBoundsAccessChain %234 %107 %35 %208
%271 = OpAtomicOr %102 %270 %10 %35 %233
%272 = OpBitwiseAnd %102 %269 %271
%273 = OpInBoundsAccessChain %234 %107 %35 %212
%274 = OpAtomicOr %102 %273 %10 %35 %233
%275 = OpBitwiseAnd %102 %272 %274
%276 = OpInBoundsAccessChain %234 %107 %35 %216
%277 = OpAtomicOr %102 %276 %10 %35 %233
%278 = OpBitwiseAnd %102 %275 %277
%279 = OpInBoundsAccessChain %234 %107 %35 %220
%280 = OpAtomicOr %102 %279 %10 %35 %233
%281 = OpBitwiseAnd %102 %278 %280
%282 = OpBitcast %9 %281
%283 = OpCompositeExtract %5 %282 0
%284 = OpCompositeExtract %5 %282 1
%285 = OpShiftRightLogical %5 %283 %51
%286 = OpBitwiseAnd %5 %285 %137
%287 = OpIEqual %103 %286 %35
%288 = OpBitFieldUExtract %5 %117 %290 %291
%292 = OpShiftLeftLogical %5 %293 %288
%294 = OpBitFieldUExtract %5 %117 %291 %291
%295 = OpShiftLeftLogical %5 %296 %294
%297 = OpBitwiseOr %5 %292 %295
%298 = OpBitFieldUExtract %5 %117 %299 %291
%300 = OpShiftLeftLogical %5 %301 %298
%302 = OpBitwiseOr %5 %297 %300
%303 = OpSelect %5 %287 %302 %35
%304 = OpInBoundsAccessChain %33 %111 %35 %161 %10
%305 = OpAtomicOr %5 %304 %10 %35 %303
%306 = OpBitwiseAnd %5 %305 %302
%307 = OpIEqual %103 %306 %302
%308 = OpBitFieldUExtract %5 %117 %309 %291
%310 = OpShiftLeftLogical %5 %293 %308
%311 = OpBitFieldUExtract %5 %117 %312 %291
%313 = OpShiftLeftLogical %5 %296 %311
%314 = OpBitwiseOr %5 %310 %313
%315 = OpBitFieldUExtract %5 %117 %316 %291
%317 = OpShiftLeftLogical %5 %301 %315
%318 = OpBitwiseOr %5 %314 %317
%319 = OpSelect %5 %287 %318 %35
%320 = OpInBoundsAccessChain %33 %111 %35 %165 %10
%321 = OpAtomicOr %5 %320 %10 %35 %319
%322 = OpBitwiseAnd %5 %321 %318
%323 = OpIEqual %103 %322 %318
%324 = OpLogicalAnd %103 %307 %323
%325 = OpBitFieldUExtract %5 %117 %326 %291
%327 = OpShiftLeftLogical %5 %293 %325
%328 = OpBitFieldUExtract %5 %117 %329 %291
%330 = OpShiftLeftLogical %5 %296 %328
%331 = OpBitwiseOr %5 %327 %330
%332 = OpBitFieldUExtract %5 %117 %333 %291
%334 = OpShiftLeftLogical %5 %301 %332
%335 = OpBitwiseOr %5 %331 %334
%336 = OpSelect %5 %287 %335 %35
%337 = OpInBoundsAccessChain %33 %111 %35 %169 %10
%338 = OpAtomicOr %5 %337 %10 %35 %336
%339 = OpBitwiseAnd %5 %338 %335
%340 = OpIEqual %103 %339 %335
%341 = OpLogicalAnd %103 %324 %340
%342 = OpBitFieldUExtract %5 %117 %343 %291
%344 = OpShiftLeftLogical %5 %293 %342
%345 = OpBitFieldUExtract %5 %117 %346 %291
%347 = OpShiftLeftLogical %5 %296 %345
%348 = OpBitwiseOr %5 %344 %347
%349 = OpBitFieldUExtract %5 %117 %350 %291
%351 = OpShiftLeftLogical %5 %301 %349
%352 = OpBitwiseOr %5 %348 %351
%353 = OpSelect %5 %287 %352 %35
%354 = OpInBoundsAccessChain %33 %111 %35 %172 %10
%355 = OpAtomicOr %5 %354 %10 %35 %353
%356 = OpBitwiseAnd %5 %355 %352
%357 = OpIEqual %103 %356 %352
%358 = OpLogicalAnd %103 %341 %357
%359 = OpBitFieldUExtract %5 %117 %360 %291
%361 = OpShiftLeftLogical %5 %293 %359
%362 = OpBitFieldUExtract %5 %117 %363 %291
%364 = OpShiftLeftLogical %5 %296 %362
%365 = OpBitwiseOr %5 %361 %364
%366 = OpBitFieldUExtract %5 %117 %367 %291
%368 = OpShiftLeftLogical %5 %301 %366
%369 = OpBitwiseOr %5 %365 %368
%370 = OpSelect %5 %287 %369 %35
%371 = OpInBoundsAccessChain %33 %111 %35 %176 %10
%372 = OpAtomicOr %5 %371 %10 %35 %370
%373 = OpBitwiseAnd %5 %372 %369
%374 = OpIEqual %103 %373 %369
%375 = OpLogicalAnd %103 %358 %374
%376 = OpBitFieldUExtract %5 %117 %309 %291
%377 = OpShiftLeftLogical %5 %293 %376
%378 = OpBitFieldUExtract %5 %117 %312 %291
%379 = OpShiftLeftLogical %5 %296 %378
%380 = OpBitwiseOr %5 %377 %379
%381 = OpBitFieldUExtract %5 %117 %316 %291
%382 = OpShiftLeftLogical %5 %301 %381
%383 = OpBitwiseOr %5 %380 %382
%384 = OpSelect %5 %287 %383 %35
%385 = OpInBoundsAccessChain %33 %111 %35 %180 %10
%386 = OpAtomicOr %5 %385 %10 %35 %384
%387 = OpBitwiseAnd %5 %386 %383
%388 = OpIEqual %103 %387 %383
%389 = OpLogicalAnd %103 %375 %388
%390 = OpBitFieldUExtract %5 %117 %391 %291
%392 = OpShiftLeftLogical %5 %293 %390
%393 = OpBitFieldUExtract %5 %117 %394 %291
%395 = OpShiftLeftLogical %5 %296 %393
%396 = OpBitwiseOr %5 %392 %395
%397 = OpBitFieldUExtract %5 %117 %343 %291
%398 = OpShiftLeftLogical %5 %301 %397
%399 = OpBitwiseOr %5 %396 %398
%400 = OpSelect %5 %287 %399 %35
%401 = OpInBoundsAccessChain %33 %111 %35 %184 %10
%402 = OpAtomicOr %5 %401 %10 %35 %400
%403 = OpBitwiseAnd %5 %402 %399
%404 = OpIEqual %103 %403 %399
%405 = OpLogicalAnd %103 %389 %404
%406 = OpBitFieldUExtract %5 %117 %407 %291
%408 = OpShiftLeftLogical %5 %293 %406
%409 = OpBitFieldUExtract %5 %117 %410 %291
%411 = OpShiftLeftLogical %5 %296 %409
%412 = OpBitwiseOr %5 %408 %411
%413 = OpBitFieldUExtract %5 %117 %414 %291
%415 = OpShiftLeftLogical %5 %301 %413
%416 = OpBitwiseOr %5 %412 %415
%417 = OpSelect %5 %287 %416 %35
%418 = OpInBoundsAccessChain %33 %111 %35 %188 %10
%419 = OpAtomicOr %5 %418 %10 %35 %417
%420 = OpBitwiseAnd %5 %419 %416
%421 = OpIEqual %103 %420 %416
%422 = OpLogicalAnd %103 %405 %421
OpSelectionMerge %427 None
OpSwitch %116 %426 0 %423 1 %424 2 %425
%426 = OpLabel
%435 = OpShiftLeftLogical %5 %147 %44
%436 = OpBitwiseAnd %5 %284 %435
%437 = OpINotEqual %103 %436 %35
OpBranch %427
%423 = OpLabel
%428 = OpBitwiseAnd %5 %283 %137
%429 = OpINotEqual %103 %428 %35
OpBranch %427
%424 = OpLabel
%430 = OpShiftLeftLogical %5 %137 %51
%431 = OpBitwiseAnd %5 %283 %430
%432 = OpINotEqual %103 %431 %35
OpBranch %427
%425 = OpLabel
%433 = OpBitwiseAnd %5 %284 %147
%434 = OpINotEqual %103 %433 %35
OpBranch %427
%427 = OpLabel
%438 = OpPhi %103 %429 %423 %432 %424 %434 %425 %437 %426
%439 = OpLogicalNot %103 %438
%440 = OpLogicalOr %103 %439 %422
OpReturnValue %440
OpFunctionEnd
%469 = OpFunction %1 None %466
%467 = OpFunctionParameter %103
%468 = OpFunctionParameter %5
%470 = OpLabel
%473 = OpLogicalNot %103 %467
%474 = OpLoad %103 %456
%475 = OpLogicalAnd %103 %473 %474
OpSelectionMerge %471 None
OpBranchConditional %475 %472 %471
%472 = OpLabel
%478 = OpIMul %5 %468 %480
%479 = OpBitwiseXor %5 %478 %481
%482 = OpArrayLength %5 %461 0
%483 = OpISub %5 %482 %149
%484 = OpAccessChain %33 %461 %35 %483
%485 = OpLoad %5 %484
%486 = OpIMul %5 %485 %487
%488 = OpBitwiseXor %5 %479 %486
%489 = OpArrayLength %5 %465 0
%490 = OpISub %5 %489 %10
%491 = OpBitwiseAnd %5 %488 %490
%492 = OpShiftRightLogical %5 %491 %44
%493 = OpBitwiseAnd %5 %491 %139
%494 = OpShiftLeftLogical %5 %10 %493
%495 = OpAccessChain %33 %461 %35 %492
%496 = OpAtomicOr %5 %495 %10 %35 %494
%497 = OpBitwiseAnd %5 %496 %494
%498 = OpIEqual %103 %497 %35
OpStore %456 %499
OpSelectionMerge %477 None
OpBranchConditional %498 %476 %477
%476 = OpLabel
%500 = OpCompositeConstruct %457 %481 %35 %468 %485
%501 = OpShiftLeftLogical %5 %494 %51
%503 = OpAccessChain %502 %465 %35 %491
OpStore %503 %500
OpMemoryBarrier %10 %505
%504 = OpAtomicOr %5 %495 %10 %35 %501
OpMemoryBarrier %10 %505
OpBranch %477
%477 = OpLabel
OpBranch %471
%471 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/atomics-structured.root-descriptor.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerUintArray;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray
{
    uint value[];
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _31 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _31;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _57 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _58 = uvec2(prime);
    uvec2 _65 = ((_57 >> uvec2(8u)) ^ _57.yx) * _58;
    uvec2 _69 = ((_65 >> uvec2(8u)) ^ _65.yx) * _58;
    uvec2 _73 = ((_69 >> uvec2(8u)) ^ _69.yx) * _58;
    uvec2 _77 = ((_73 >> uvec2(8u)) ^ _73.yx) * _58;
    uvec2 _81 = ((_77 >> uvec2(8u)) ^ _77.yx) * _58;
    return (((_81 >> uvec2(8u)) ^ _81.yx) * _58).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _114;
    _114._m0 = uaddCarry(BDA.x, offset, _114._m1);
    uvec2 addr = uvec2(_114._m0, BDA.y + _114._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _473 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _475 = InstrumentationControlData.atomics[_473];
        uint _481 = (((inst * 97u) ^ 43981u) ^ (_475 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _482 = _481 >> 4u;
        uint _484 = 1u << (_481 & 15u);
        uint _486 = atomicOr(InstrumentationControlData.atomics[_482], _484);
        ShouldReportInstrumentation = false;
        if ((_486 & _484) == 0u)
        {
            InstrumentationData.data[_481] = uvec4(43981u, 0u, inst, _475);
            memoryBarrierBuffer();
            uint _494 = atomicOr(InstrumentationControlData.atomics[_482], _484 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _34 = AllocateInvocationID();
    InvocationID = _34;
    uint _429 = InvocationID;
    bool _428 = ValidateBDALoadStore(registers._m2, (gl_GlobalInvocationID.x * 4u) + 0u, 4u, 2u, _429, true);
    AssumeTrue(_428, 1u);
    uint _436 = atomicAdd(PhysicalPointerUintArray(registers._m2).value[gl_GlobalInvocationID.x], 1u);
    uint _440 = InvocationID;
    bool _439 = ValidateBDALoadStore(registers._m2, (gl_GlobalInvocationID.x * 4u) + 0u, 4u, 2u, _440, true);
    AssumeTrue(_439, 2u);
    uint _443 = atomicCompSwap(PhysicalPointerUintArray(registers._m2).value[gl_GlobalInvocationID.x], 1u, 2u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 500
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%138 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %41
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "BloomBufferInvocationSSBO"
OpMemberName %20 0 "atomics"
OpName %22 "BloomBufferInvocation"
OpName %24 "AllocateInvocationID"
OpName %51 "AddrHash"
OpName %49 "addr"
OpName %50 "prime"
OpName %91 "BloomBufferSSBO"
OpMemberName %91 0 "atomics"
OpName %93 "BloomBuffer"
OpName %95 "BloomBuffer32SSBO"
OpMemberName %95 0 "atomics"
OpName %97 "BloomBuffer32"
OpName %105 "ValidateBDALoadStore"
OpName %99 "BDA"
OpName %100 "offset"
OpName %101 "len"
OpName %102 "type"
OpName %103 "invocation_id"
OpName %104 "in_bounds"
OpName %113 "IAddCarryResult"
OpName %118 "addr"
OpName %119 "addr_lo"
OpName %123 "byte_mask"
OpName %133 "word_mask"
OpName %137 "hash_mask"
OpName %142 "hash_offset"
OpName %146 "bloom_index"
OpName %150 "bloom_index"
OpName %154 "bloom_index"
OpName %157 "bloom_index"
OpName %161 "bloom_index"
OpName %165 "bloom_index"
OpName %169 "bloom_index"
OpName %173 "bloom_index"
OpName %177 "bloom_index"
OpName %181 "bloom_index"
OpName %185 "bloom_index"
OpName %189 "bloom_index"
OpName %193 "bloom_index"
OpName %197 "bloom_index"
OpName %201 "bloom_index"
OpName %205 "bloom_index"
OpName %219 "invalidation_mask"
OpName %222 "prev_hazard_partial"
OpName %224 "prev_hazard_partial"
OpName %227 "prev_hazard_partial"
OpName %230 "prev_hazard_partial"
OpName %233 "prev_hazard_partial"
OpName %236 "prev_hazard_partial"
OpName %239 "prev_hazard_partial"
OpName %242 "prev_hazard_partial"
OpName %245 "prev_hazard_partial"
OpName %248 "prev_hazard_partial"
OpName %251 "prev_hazard_partial"
OpName %254 "prev_hazard_partial"
OpName %257 "prev_hazard_partial"
OpName %260 "prev_hazard_partial"
OpName %263 "prev_hazard_partial"
OpName %266 "prev_hazard_partial"
OpName %268 "prev_hazard"
OpName %269 "prev_hazard_lo"
OpName %270 "prev_hazard_hi"
OpName %273 "has_exclusive_access"
OpName %288 "lock_mask"
OpName %291 "prev_lock"
OpName %304 "lock_mask"
OpName %307 "prev_lock"
OpName %321 "lock_mask"
OpName %324 "prev_lock"
OpName %338 "lock_mask"
OpName %341 "prev_lock"
OpName %355 "lock_mask"
OpName %358 "prev_lock"
OpName %369 "lock_mask"
OpName %372 "prev_lock"
OpName %385 "lock_mask"
OpName %388 "prev_lock"
OpName %402 "lock_mask"
OpName %405 "prev_lock"
OpName %408 "has_complete_self_lock"
OpName %424 "hazard"
OpName %431 "PhysicalPointerUintArray"
OpMemberName %431 0 "value"
OpName %446 "ShouldReportInstrumentation"
OpName %449 "InstrumentationControlDataSSBO"
OpMemberName %449 0 "atomics"
OpName %451 "InstrumentationControlData"
OpName %453 "InstrumentationDataSSBO"
OpMemberName %453 0 "data"
OpName %455 "InstrumentationData"
OpName %459 "AssumeTrue"
OpName %457 "value"
OpName %458 "inst"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 2
OpDecorate %41 BuiltIn GlobalInvocationId
OpDecorate %90 ArrayStride 8
OpMemberDecorate %91 0 Offset 0
OpDecorate %91 Block
OpDecorate %93 DescriptorSet 0
OpDecorate %93 Binding 2
OpDecorate %94 ArrayStride 8
OpMemberDecorate %95 0 Offset 0
OpDecorate %95 Block
OpDecorate %97 DescriptorSet 0
OpDecorate %97 Binding 2
OpDecorate %430 ArrayStride 4
OpMemberDecorate %431 0 Offset 0
OpDecorate %431 Block
OpDecorate %448 ArrayStride 4
OpMemberDecorate %449 0 Offset 0
OpDecorate %449 Block
OpDecorate %451 DescriptorSet 0
OpDecorate %451 Binding 2
OpDecorate %452 ArrayStride 16
OpMemberDecorate %453 0 Offset 0
OpDecorate %453 Block
OpDecorate %455 DescriptorSet 0
OpDecorate %455 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpConstant %5 1
%11 = OpTypeArray %6 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeFunction %5
%28 = OpTypePointer StorageBuffer %5
%30 = OpConstant %5 0
%32 = OpConstant %5 1103633207
%35 = OpTypePointer PushConstant %6
%37 = OpConstant %5 2
%39 = OpTypeVector %5 3
%40 = OpTypePointer Input %39
%41 = OpVariable %40 Input
%42 = OpTypePointer Input %5
%45 = OpConstant %5 4
%48 = OpTypeFunction %5 %6 %5
%59 = OpConstant %5 65535
%60 = OpConstant %5 8
%61 = OpConstantComposite %6 %60 %60
%88 = OpTypeInt 64 0
%89 = OpTypeBool
%90 = OpTypeRuntimeArray %88
%91 = OpTypeStruct %90
%92 = OpTypePointer StorageBuffer %91
%93 = OpVariable %92 StorageBuffer
%94 = OpTypeRuntimeArray %6
%95 = OpTypeStruct %94
%96 = OpTypePointer StorageBuffer %95
%97 = OpVariable %96 StorageBuffer
%98 = OpTypeFunction %89 %6 %5 %5 %5 %5 %89
%109 = OpConstantTrue %89
%113 = OpTypeStruct %5 %5
%124 = OpConstant %5 4294967295
%125 = OpConstant %5 15
%134 = OpConstant %5 3
%144 = OpConstant %5 1103515245
%148 = OpConstant %5 1103518333
%152 = OpConstant %5 1103539331
%159 = OpConstant %5 10006121
%163 = OpConstant %5 4004951
%167 = OpConstant %5 5005159
%171 = OpConstant %5 6004811
%175 = OpConstant %5 383
%179 = OpConstant %5 821
%183 = OpConstant %5 661
%187 = OpConstant %5 1091
%191 = OpConstant %5 1117
%195 = OpConstant %5 3947
%199 = OpConstant %5 4253
%203 = OpConstant %5 7691
%206 = OpTypeVector %88 4
%207 = OpConstant %88 68719411200
%208 = OpConstant %88 1099511627775
%209 = OpConstant %88 1035087118335
%210 = OpConstantComposite %206 %207 %208 %209 %207
%214 = OpConstant %5 16
%220 = OpTypePointer StorageBuffer %88
%275 = OpTypeInt 32 1
%276 = OpConstant %275 0
%277 = OpConstant %275 3
%279 = OpConstant %5 256
%282 = OpConstant %5 65536
%285 = OpConstant %275 6
%287 = OpConstant %5 16777216
%295 = OpConstant %275 9
%298 = OpConstant %275 12
%302 = OpConstant %275 15
%312 = OpConstant %275 18
%315 = OpConstant %275 21
%319 = OpConstant %275 24
%329 = OpConstant %275 23
%332 = OpConstant %275 26
%336 = OpConstant %275 29
%346 = OpConstant %275 1
%349 = OpConstant %275 4
%353 = OpConstant %275 7
%377 = OpConstant %275 17
%380 = OpConstant %275 20
%393 = OpConstant %275 22
%396 = OpConstant %275 25
%400 = OpConstant %275 28
%430 = OpTypeRuntimeArray %5
%431 = OpTypeStruct %430
%432 = OpTypePointer PhysicalStorageBuffer %431
%434 = OpTypePointer PhysicalStorageBuffer %5
%445 = OpTypePointer Private %89
%446 = OpVariable %445 Private %109
%447 = OpTypeVector %5 4
%448 = OpTypeRuntimeArray %5
%449 = OpTypeStruct %448
%450 = OpTypePointer StorageBuffer %449
%451 = OpVariable %450 StorageBuffer
%452 = OpTypeRuntimeArray %447
%453 = OpTypeStruct %452
%454 = OpTypePointer StorageBuffer %453
%455 = OpVariable %454 StorageBuffer
%456 = OpTypeFunction %1 %89 %5
%470 = OpConstant %5 97
%471 = OpConstant %5 43981
%477 = OpConstant %5 51329
%489 = OpConstantFalse %89
%492 = OpTypePointer StorageBuffer %447
%495 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %444
%444 = OpLabel
%34 = OpFunctionCall %5 %24
OpStore %18 %34
%36 = OpAccessChain %35 %9 %37
%38 = OpLoad %6 %36
%43 = OpAccessChain %42 %41 %30
%44 = OpLoad %5 %43
%46 = OpIMul %5 %44 %45
%47 = OpIAdd %5 %46 %30
%429 = OpLoad %5 %18
%428 = OpFunctionCall %89 %105 %38 %47 %45 %37 %429 %109
%497 = OpFunctionCall %1 %459 %428 %10
%433 = OpBitcast %432 %38
%435 = OpAccessChain %434 %433 %30 %44
%436 = OpAtomicIAdd %5 %435 %10 %30 %10
%437 = OpIMul %5 %44 %45
%438 = OpIAdd %5 %437 %30
%440 = OpLoad %5 %18
%439 = OpFunctionCall %89 %105 %38 %438 %45 %37 %440 %109
%498 = OpFunctionCall %1 %459 %439 %37
%441 = OpBitcast %432 %38
%442 = OpAccessChain %434 %441 %30 %44
%443 = OpAtomicCompareExchange %5 %442 %10 %30 %30 %37 %10
OpReturn
OpFunctionEnd
%24 = OpFunction %5 None %23
%25 = OpLabel
%26 = OpArrayLength %5 %22 0
%27 = OpISub %5 %26 %10
%29 = OpAccessChain %28 %22 %30 %27
%31 = OpAtomicIAdd %5 %29 %10 %30 %32
OpReturnValue %31
OpFunctionEnd
%51 = OpFunction %5 None %48
%49 = OpFunctionParameter %6
%50 = OpFunctionParameter %5
%52 = OpLabel
%53 = OpCompositeExtract %5 %49 0
%54 = OpCompositeExtract %5 %49 1
%55 = OpShiftRightLogical %5 %53 %45
%56 = OpBitwiseAnd %5 %54 %59
%57 = OpCompositeConstruct %6 %55 %56
%58 = OpCompositeConstruct %6 %50 %50
%62 = OpVectorShuffle %6 %57 %57 1 0
%63 = OpShiftRightLogical %6 %57 %61
%64 = OpBitwiseXor %6 %63 %62
%65 = OpIMul %6 %64 %58
%66 = OpVectorShuffle %6 %65 %65 1 0
%67 = OpShiftRightLogical %6 %65 %61
%68 = OpBitwiseXor %6 %67 %66
%69 = OpIMul %6 %68 %58
%70 = OpVectorShuffle %6 %69 %69 1 0
%71 = OpShiftRightLogical %6 %69 %61
%72 = OpBitwiseXor %6 %71 %70
%73 = OpIMul %6 %72 %58
%74 = OpVectorShuffle %6 %73 %73 1 0
%75 = OpShiftRightLogical %6 %73 %61
%76 = OpBitwiseXor %6 %75 %74
%77 = OpIMul %6 %76 %58
%78 = OpVectorShuffle %6 %77 %77 1 0
%79 = OpShiftRightLogical %6 %77 %61
%80 = OpBitwiseXor %6 %79 %78
%81 = OpIMul %6 %80 %58
%82 = OpVectorShuffle %6 %81 %81 1 0
%83 = OpShiftRightLogical %6 %81 %61
%84 = OpBitwiseXor %6 %83 %82
%85 = OpIMul %6 %84 %58
%86 = OpCompositeExtract %5 %85 0
OpReturnValue %86
OpFunctionEnd
%105 = OpFunction %89 None %98
%99 = OpFunctionParameter %6
%100 = OpFunctionParameter %5
%101 = OpFunctionParameter %5
%102 = OpFunctionParameter %5
%103 = OpFunctionParameter %5
%104 = OpFunctionParameter %89
%106 = OpLabel
OpSelectionMerge %108 None
OpBranchConditional %104 %108 %107
%107 = OpLabel
OpReturnValue %109
%108 = OpLabel
%111 = OpCompositeExtract %5 %99 0
%112 = OpCompositeExtract %5 %99 1
%114 = OpIAddCarry %113 %111 %100
%115 = OpCompositeExtract %5 %114 0
%116 = OpCompositeExtract %5 %114 1
%117 = OpIAdd %5 %112 %116
%118 = OpCompositeConstruct %6 %115 %117
%119 = OpCompositeExtract %5 %118 0
%120 = OpBitFieldUExtract %5 %124 %30 %101
%121 = OpBitwiseAnd %5 %119 %125
%122 = OpShiftLeftLogical %5 %120 %121
%123 = OpBitwiseAnd %5 %122 %59
%126 = OpBitwiseAnd %5 %119 %134
%127 = OpIAdd %5 %126 %101
%128 = OpIAdd %5 %127 %134
%129 = OpShiftRightLogical %5 %128 %37
%130 = OpBitFieldUExtract %5 %119 %37 %37
%131 = OpBitFieldUExtract %5 %124 %30 %129
%132 = OpShiftLeftLogical %5 %131 %130
%133 = OpBitwiseAnd %5 %132 %125
%135 = OpArrayLength %5 %93 0
%136 = OpExtInst %5 %138 FindUMsb %135
%137 = OpBitFieldUExtract %5 %124 %30 %136
%139 = OpArrayLength %5 %93 0
%140 = OpExtInst %5 %138 FindUMsb %139
%141 = OpBitFieldUExtract %5 %139 %30 %140
%142 = OpISub %5 %141 %10
%143 = OpFunctionCall %5 %51 %118 %144
%145 = OpBitwiseAnd %5 %143 %137
%146 = OpIAdd %5 %145 %142
%147 = OpFunctionCall %5 %51 %118 %148
%149 = OpBitwiseAnd %5 %147 %137
%150 = OpIAdd %5 %149 %142
%151 = OpFunctionCall %5 %51 %118 %152
%153 = OpBitwiseAnd %5 %151 %137
%154 = OpIAdd %5 %153 %142
%155 = OpFunctionCall %5 %51 %118 %32
%156 = OpBitwiseAnd %5 %155 %137
%157 = OpIAdd %5 %156 %142
%158 = OpFunctionCall %5 %51 %118 %159
%160 = OpBitwiseAnd %5 %158 %137
%161 = OpIAdd %5 %160 %142
%162 = OpFunctionCall %5 %51 %118 %163
%164 = OpBitwiseAnd %5 %162 %137
%165 = OpIAdd %5 %164 %142
%166 = OpFunctionCall %5 %51 %118 %167
%168 = OpBitwiseAnd %5 %166 %137
%169 = OpIAdd %5 %168 %142
%170 = OpFunctionCall %5 %51 %118 %171
%172 = OpBitwiseAnd %5 %170 %137
%173 = OpIAdd %5 %172 %142
%174 = OpFunctionCall %5 %51 %118 %175
%176 = OpBitwiseAnd %5 %174 %137
%177 = OpIAdd %5 %176 %142
%178 = OpFunctionCall %5 %51 %118 %179
%180 = OpBitwiseAnd %5 %178 %137
%181 = OpIAdd %5 %180 %142
%182 = OpFunctionCall %5 %51 %118 %183
%184 = OpBitwiseAnd %5 %182 %137
%185 = OpIAdd %5 %184 %142
%186 = OpFunctionCall %5 %51 %118 %187
%188 = OpBitwiseAnd %5 %186 %137
%189 = OpIAdd %5 %188 %142
%190 = OpFunctionCall %5 %51 %118 %191
%192 = OpBitwiseAnd %5 %190 %137
%193 = OpIAdd %5 %192 %142
%194 = OpFunctionCall %5 %51 %118 %195
%196 = OpBitwiseAnd %5 %194 %137
%197 = OpIAdd %5 %196 %142
%198 = OpFunctionCall %5 %51 %118 %199
%200 = OpBitwiseAnd %5 %198 %137
%201 = OpIAdd %5 %200 %142
%202 = OpFunctionCall %5 %51 %118 %203
%204 = OpBitwiseAnd %5 %202 %137
%205 = OpIAdd %5 %204 %142
%211 = OpVectorExtractDynamic %88 %210 %102
%212 = OpShiftLeftLogical %5 %123 %214
%213 = OpShiftLeftLogical %5 %133 %45
%215 = OpBitwiseOr %5 %123 %212
%216 = OpBitwiseOr %5 %133 %213
%217 = OpCompositeConstruct %6 %215 %216
%218 = OpBitcast %88 %217
%219 = OpBitwiseAnd %88 %211 %218
%221 = OpInBoundsAccessChain %220 %93 %30 %146
%222 = OpAtomicOr %88 %221 %10 %30 %219
%223 = OpInBoundsAccessChain %220 %93 %30 %150
%224 = OpAtomicOr %88 %223 %10 %30 %219
%225 = OpBitwiseAnd %88 %222 %224
%226 = OpInBoundsAccessChain %220 %93 %30 %154
%227 = OpAtomicOr %88 %226 %10 %30 %219
%228 = OpBitwiseAnd %88 %225 %227
%229 = OpInBoundsAccessChain %220 %93 %30 %157
%230 = OpAtomicOr %88 %229 %10 %30 %219
%231 = OpBitwiseAnd %88 %228 %230
%232 = OpInBoundsAccessChain %220 %93 %30 %161
%233 = OpAtomicOr %88 %232 %10 %30 %219
%234 = OpBitwiseAnd %88 %231 %233
%235 = OpInBoundsAccessChain %220 %93 %30 %165
%236 = OpAtomicOr %88 %235 %10 %30 %219
%237 = OpBitwiseAnd %88 %234 %236
%238 = OpInBoundsAccessChain %220 %93 %30 %169
%239 = OpAtomicOr %88 %238 %10 %30 %219
%240 = OpBitwiseAnd %88 %237 %239
%241 = OpInBoundsAccessChain %220 %93 %30 %173
%242 = OpAtomicOr %88 %241 %10 %30 %219
%243 = OpBitwiseAnd %88 %240 %242
%244 = OpInBoundsAccessChain %220 %93 %30 %177
%245 = OpAtomicOr %88 %244 %10 %30 %219
%246 = OpBitwiseAnd %88 %243 %245
%247 = OpInBoundsAccessChain %220 %93 %30 %181
%248 = OpAtomicOr %88 %247 %10 %30 %219
%249 = OpBitwiseAnd %88 %246 %248
%250 = OpInBoundsAccessChain %220 %93 %30 %185
%251 = OpAtomicOr %88 %250 %10 %30 %219
%252 = OpBitwiseAnd %88 %249 %251
%253 = OpInBoundsAccessChain %220 %93 %30 %189
%254 = OpAtomicOr %88 %253 %10 %30 %219
%255 = OpBitwiseAnd %88 %252 %254
%256 = OpInBoundsAccessChain %220 %93 %30 %193
%257 = OpAtomicOr %88 %256 %10 %30 %219
%258 = OpBitwiseAnd %88 %255 %257
%259 = OpInBoundsAccessChain %220 %93 %30 %197
%260 = OpAtomicOr %88 %259 %10 %30 %219
%261 = OpBitwiseAnd %88 %258 %260
%262 = OpInBoundsAccessChain %220 %93 %30 %201
%263 = OpAtomicOr %88 %262 %10 %30 %219
%264 = OpBitwiseAnd %88 %261 %263
%265 = OpInBoundsAccessChain %220 %93 %30 %205
%266 = OpAtomicOr %88 %265 %10 %30 %219
%267 = OpBitwiseAnd %88 %264 %266
%268 = OpBitcast %6 %267
%269 = OpCompositeExtract %5 %268 0
%270 = OpCompositeExtract %5 %268 1
%271 = OpShiftRightLogical %5 %269 %214
%272 = OpBitwiseAnd %5 %271 %123
%273 = OpIEqual %89 %272 %30
%274 = OpBitFieldUExtract %5 %103 %276 %277
%278 = OpShiftLeftLogical %5 %279 %274
%280 = OpBitFieldUExtract %5 %103 %277 %277
%281 = OpShiftLeftLogical %5 %282 %280
%283 = OpBitwiseOr %5 %278 %281
%284 = OpBitFieldUExtract %5 %103 %285 %277
%286 = OpShiftLeftLogical %5 %287 %284
%288 = OpBitwiseOr %5 %283 %286
%289 = OpSelect %5 %273 %288 %30
%290 = OpInBoundsAccessChain %28 %97 %30 %146 %10
%291 = OpAtomicOr %5 %290 %10 %30 %289
%292 = OpBitwiseAnd %5 %291 %288
%293 = OpIEqual %89 %292 %288
%294 = OpBitFieldUExtract %5 %103 %295 %277
%296 = OpShiftLeftLogical %5 %279 %294
%297 = OpBitFieldUExtract %5 %103 %298 %277
%299 = OpShiftLeftLogical %5 %282 %297
%300 = OpBitwiseOr %5 %296 %299
%301 = OpBitFieldUExtract %5 %103 %302 %277
%303 = OpShiftLeftLogical %5 %287 %301
%304 = OpBitwiseOr %5 %300 %303
%305 = OpSelect %5 %273 %304 %30
%306 = OpInBoundsAccessChain %28 %97 %30 %150 %10
%307 = OpAtomicOr %5 %306 %10 %30 %305
%308 = OpBitwiseAnd %5 %307 %304
%309 = OpIEqual %89 %308 %304
%310 = OpLogicalAnd %89 %293 %309
%311 = OpBitFieldUExtract %5 %103 %312 %277
%313 = OpShiftLeftLogical %5 %279 %311
%314 = OpBitFieldUExtract %5 %103 %315 %277
%316 = OpShiftLeftLogical %5 %282 %314
%317 = OpBitwiseOr %5 %313 %316
%318 = OpBitFieldUExtract %5 %103 %319 %277
%320 = OpShiftLeftLogical %5 %287 %318
%321 = OpBitwiseOr %5 %317 %320
%322 = OpSelect %5 %273 %321 %30
%323 = OpInBoundsAccessChain %28 %97 %30 %154 %10
%324 = OpAtomicOr %5 %323 %10 %30 %322
%325 = OpBitwiseAnd %5 %324 %321
%326 = OpIEqual %89 %325 %321
%327 = OpLogicalAnd %89 %310 %326
%328 = OpBitFieldUExtract %5 %103 %329 %277
%330 = OpShiftLeftLogical %5 %279 %328
%331 = OpBitFieldUExtract %5 %103 %332 %277
%333 = OpShiftLeftLogical %5 %282 %331
%334 = OpBitwiseOr %5 %330 %333
%335 = OpBitFieldUExtract %5 %103 %336 %277
%337 = OpShiftLeftLogical %5 %287 %335
%338 = OpBitwiseOr %5 %334 %337
%339 = OpSelect %5 %273 %338 %30
%340 = OpInBoundsAccessChain %28 %97 %30 %157 %10
%341 = OpAtomicOr %5 %340 %10 %30 %339
%342 = OpBitwiseAnd %5 %341 %338
%343 = OpIEqual %89 %342 %338
%344 = OpLogicalAnd %89 %327 %343
%345 = OpBitFieldUExtract %5 %103 %346 %277
%347 = OpShiftLeftLogical %5 %279 %345
%348 = OpBitFieldUExtract %5 %103 %349 %277
%350 = OpShiftLeftLogical %5 %282 %348
%351 = OpBitwiseOr %5 %347 %350
%352 = OpBitFieldUExtract %5 %103 %353 %277
%354 = OpShiftLeftLogical %5 %287 %352
%355 = OpBitwiseOr %5 %351 %354
%356 = OpSelect %5 %273 %355 %30
%357 = OpInBoundsAccessChain %28 %97 %30 %161 %10
%358 = OpAtomicOr %5 %357 %10 %30 %356
%359 = OpBitwiseAnd %5 %358 %355
%360 = OpIEqual %89 %359 %355
%361 = OpLogicalAnd %89 %344 %360
%362 = OpBitFieldUExtract %5 %103 %295 %277
%363 = OpShiftLeftLogical %5 %279 %362
%364 = OpBitFieldUExtract %5 %103 %298 %277
%365 = OpShiftLeftLogical %5 %282 %364
%366 = OpBitwiseOr %5 %363 %365
%367 = OpBitFieldUExtract %5 %103 %302 %277
%368 = OpShiftLeftLogical %5 %287 %367
%369 = OpBitwiseOr %5 %366 %368
%370 = OpSelect %5 %273 %369 %30
%371 = OpInBoundsAccessChain %28 %97 %30 %165 %10
%372 = OpAtomicOr %5 %371 %10 %30 %370
%373 = OpBitwiseAnd %5 %372 %369
%374 = OpIEqual %89 %373 %369
%375 = OpLogicalAnd %89 %361 %374
%376 = OpBitFieldUExtract %5 %103 %377 %277
%378 = OpShiftLeftLogical %5 %279 %376
%379 = OpBitFieldUExtract %5 %103 %380 %277
%381 = OpShiftLeftLogical %5 %282 %379
%382 = OpBitwiseOr %5 %378 %381
%383 = OpBitFieldUExtract %5 %103 %329 %277
%384 = OpShiftLeftLogical %5 %287 %383
%385 = OpBitwiseOr %5 %382 %384
%386 = OpSelect %5 %273 %385 %30
%387 = OpInBoundsAccessChain %28 %97 %30 %169 %10
%388 = OpAtomicOr %5 %387 %10 %30 %386
%389 = OpBitwiseAnd %5 %388 %385
%390 = OpIEqual %89 %389 %385
%391 = OpLogicalAnd %89 %375 %390
%392 = OpBitFieldUExtract %5 %103 %393 %277
%394 = OpShiftLeftLogical %5 %279 %392
%395 = OpBitFieldUExtract %5 %103 %396 %277
%397 = OpShiftLeftLogical %5 %282 %395
%398 = OpBitwiseOr %5 %394 %397
%399 = OpBitFieldUExtract %5 %103 %400 %277
%401 = OpShiftLeftLogical %5 %287 %399
%402 = OpBitwiseOr %5 %398 %401
%403 = OpSelect %5 %273 %402 %30
%404 = OpInBoundsAccessChain %28 %97 %30 %173 %10
%405 = OpAtomicOr %5 %404 %10 %30 %403
%406 = OpBitwiseAnd %5 %405 %402
%407 = OpIEqual %89 %406 %402
%408 = OpLogicalAnd %89 %391 %407
OpSelectionMerge %413 None
OpSwitch %102 %412 0 %409 1 %410 2 %411
%412 = OpLabel
%421 = OpShiftLeftLogical %5 %133 %45
%422 = OpBitwiseAnd %5 %270 %421
%423 = OpINotEqual %89 %422 %30
OpBranch %413
%409 = OpLabel
%414 = OpBitwiseAnd %5 %269 %123
%415 = OpINotEqual %89 %414 %30
OpBranch %413
%410 = OpLabel
%416 = OpShiftLeftLogical %5 %123 %214
%417 = OpBitwiseAnd %5 %269 %416
%418 = OpINotEqual %89 %417 %30
OpBranch %413
%411 = OpLabel
%419 = OpBitwiseAnd %5 %270 %133
%420 = OpINotEqual %89 %419 %30
OpBranch %413
%413 = OpLabel
%424 = OpPhi %89 %415 %409 %418 %410 %420 %411 %423 %412
%425 = OpLogicalNot %89 %424
%426 = OpLogicalOr %89 %425 %408
OpReturnValue %426
OpFunctionEnd
%459 = OpFunction %1 None %456
%457 = OpFunctionParameter %89
%458 = OpFunctionParameter %5
%460 = OpLabel
%463 = OpLogicalNot %89 %457
%464 = OpLoad %89 %446
%465 = OpLogicalAnd %89 %463 %464
OpSelectionMerge %461 None
OpBranchConditional %465 %462 %461
%462 = OpLabel
%468 = OpIMul %5 %458 %470
%469 = OpBitwiseXor %5 %468 %471
%472 = OpArrayLength %5 %451 0
%473 = OpISub %5 %472 %37
%474 = OpAccessChain %28 %451 %30 %473
%475 = OpLoad %5 %474
%476 = OpIMul %5 %475 %477
%478 = OpBitwiseXor %5 %469 %476
%479 = OpArrayLength %5 %455 0
%480 = OpISub %5 %479 %10
%481 = OpBitwiseAnd %5 %478 %480
%482 = OpShiftRightLogical %5 %481 %45
%483 = OpBitwiseAnd %5 %481 %125
%484 = OpShiftLeftLogical %5 %10 %483
%485 = OpAccessChain %28 %451 %30 %482
%486 = OpAtomicOr %5 %485 %10 %30 %484
%487 = OpBitwiseAnd %5 %486 %484
%488 = OpIEqual %89 %487 %30
OpStore %446 %489
OpSelectionMerge %467 None
OpBranchConditional %488 %466 %467
%466 = OpLabel
%490 = OpCompositeConstruct %447 %471 %30 %458 %475
%491 = OpShiftLeftLogical %5 %484 %214
%493 = OpAccessChain %492 %455 %30 %481
OpStore %493 %490
OpMemoryBarrier %10 %495
%494 = OpAtomicOr %5 %485 %10 %30 %491
OpMemoryBarrier %10 %495
OpBranch %467
%467 = OpLabel
OpBranch %461
%461 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/atomics-typed.bindless.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _22[];

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _35 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _35;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _69 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _70 = uvec2(prime);
    uvec2 _77 = ((_69 >> uvec2(8u)) ^ _69.yx) * _70;
    uvec2 _81 = ((_77 >> uvec2(8u)) ^ _77.yx) * _70;
    uvec2 _85 = ((_81 >> uvec2(8u)) ^ _81.yx) * _70;
    uvec2 _89 = ((_85 >> uvec2(8u)) ^ _85.yx) * _70;
    uvec2 _93 = ((_89 >> uvec2(8u)) ^ _89.yx) * _70;
    return (((_93 >> uvec2(8u)) ^ _93.yx) * _70).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _126;
    _126._m0 = uaddCarry(BDA.x, offset, _126._m1);
    uvec2 addr = uvec2(_126._m0, BDA.y + _126._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _481 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _483 = InstrumentationControlData.atomics[_481];
        uint _489 = (((inst * 97u) ^ 43981u) ^ (_483 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _490 = _489 >> 4u;
        uint _492 = 1u << (_489 & 15u);
        uint _494 = atomicOr(InstrumentationControlData.atomics[_490], _492);
        ShouldReportInstrumentation = false;
        if ((_494 & _492) == 0u)
        {
            InstrumentationData.data[_489] = uvec4(43981u, 0u, inst, _483);
            memoryBarrierBuffer();
            uint _502 = atomicOr(InstrumentationControlData.atomics[_490], _492 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _38 = AllocateInvocationID();
    InvocationID = _38;
    uvec2 _47 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _49 = _47.y >> 16u;
    uint _52 = uint(imageSize(_22[registers._m4]));
    uint _441 = InvocationID;
    bool _440 = ValidateBDALoadStore(_47, gl_GlobalInvocationID.x * _49, 4u, 2u, _441, gl_GlobalInvocationID.x < _52);
    AssumeTrue(_440, 1u);
    uint _445 = imageAtomicAdd(_22[registers._m4], int(gl_GlobalInvocationID.x), 1u);
    uint _448 = InvocationID;
    bool _447 = ValidateBDALoadStore(_47, gl_GlobalInvocationID.x * _49, 4u, 2u, _448, gl_GlobalInvocationID.x < _52);
    AssumeTrue(_447, 2u);
    uint _451 = imageAtomicCompSwap(_22[registers._m4], int(gl_GlobalInvocationID.x), 1u, 2u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 508
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%151 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %55
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %24 "BloomBufferInvocationSSBO"
OpMemberName %24 0 "atomics"
OpName %26 "BloomBufferInvocation"
OpName %28 "AllocateInvocationID"
OpName %63 "AddrHash"
OpName %61 "addr"
OpName %62 "prime"
OpName %103 "BloomBufferSSBO"
OpMemberName %103 0 "atomics"
OpName %105 "BloomBuffer"
OpName %107 "BloomBuffer32SSBO"
OpMemberName %107 0 "atomics"
OpName %109 "BloomBuffer32"
OpName %117 "ValidateBDALoadStore"
OpName %111 "BDA"
OpName %112 "offset"
OpName %113 "len"
OpName %114 "type"
OpName %115 "invocation_id"
OpName %116 "in_bounds"
OpName %125 "IAddCarryResult"
OpName %130 "addr"
OpName %131 "addr_lo"
OpName %135 "byte_mask"
OpName %145 "word_mask"
OpName %150 "hash_mask"
OpName %155 "hash_offset"
OpName %159 "bloom_index"
OpName %163 "bloom_index"
OpName %167 "bloom_index"
OpName %170 "bloom_index"
OpName %174 "bloom_index"
OpName %178 "bloom_index"
OpName %182 "bloom_index"
OpName %186 "bloom_index"
OpName %190 "bloom_index"
OpName %194 "bloom_index"
OpName %198 "bloom_index"
OpName %202 "bloom_index"
OpName %206 "bloom_index"
OpName %210 "bloom_index"
OpName %214 "bloom_index"
OpName %218 "bloom_index"
OpName %231 "invalidation_mask"
OpName %234 "prev_hazard_partial"
OpName %236 "prev_hazard_partial"
OpName %239 "prev_hazard_partial"
OpName %242 "prev_hazard_partial"
OpName %245 "prev_hazard_partial"
OpName %248 "prev_hazard_partial"
OpName %251 "prev_hazard_partial"
OpName %254 "prev_hazard_partial"
OpName %257 "prev_hazard_partial"
OpName %260 "prev_hazard_partial"
OpName %263 "prev_hazard_partial"
OpName %266 "prev_hazard_partial"
OpName %269 "prev_hazard_partial"
OpName %272 "prev_hazard_partial"
OpName %275 "prev_hazard_partial"
OpName %278 "prev_hazard_partial"
OpName %280 "prev_hazard"
OpName %281 "prev_hazard_lo"
OpName %282 "prev_hazard_hi"
OpName %285 "has_exclusive_access"
OpName %300 "lock_mask"
OpName %303 "prev_lock"
OpName %316 "lock_mask"
OpName %319 "prev_lock"
OpName %333 "lock_mask"
OpName %336 "prev_lock"
OpName %350 "lock_mask"
OpName %353 "prev_lock"
OpName %367 "lock_mask"
OpName %370 "prev_lock"
OpName %381 "lock_mask"
OpName %384 "prev_lock"
OpName %397 "lock_mask"
OpName %400 "prev_lock"
OpName %414 "lock_mask"
OpName %417 "prev_lock"
OpName %420 "has_complete_self_lock"
OpName %436 "hazard"
OpName %454 "ShouldReportInstrumentation"
OpName %457 "InstrumentationControlDataSSBO"
OpMemberName %457 0 "atomics"
OpName %459 "InstrumentationControlData"
OpName %461 "InstrumentationDataSSBO"
OpMemberName %461 0 "data"
OpName %463 "InstrumentationData"
OpName %467 "AssumeTrue"
OpName %465 "value"
OpName %466 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %22 DescriptorSet 4
OpDecorate %22 Binding 0
OpDecorate %23 ArrayStride 4
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 2
OpDecorate %55 BuiltIn GlobalInvocationId
OpDecorate %102 ArrayStride 8
OpMemberDecorate %103 0 Offset 0
OpDecorate %103 Block
OpDecorate %105 DescriptorSet 0
OpDecorate %105 Binding 2
OpDecorate %106 ArrayStride 8
OpMemberDecorate %107 0 Offset 0
OpDecorate %107 Block
OpDecorate %109 DescriptorSet 0
OpDecorate %109 Binding 2
OpDecorate %456 ArrayStride 4
OpMemberDecorate %457 0 Offset 0
OpDecorate %457 Block
OpDecorate %459 DescriptorSet 0
OpDecorate %459 Binding 2
OpDecorate %460 ArrayStride 16
OpMemberDecorate %461 0 Offset 0
OpDecorate %461 Block
OpDecorate %463 DescriptorSet 0
OpDecorate %463 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeRuntimeArray %5
%24 = OpTypeStruct %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeFunction %5
%32 = OpTypePointer StorageBuffer %5
%34 = OpConstant %5 0
%36 = OpConstant %5 1103633207
%39 = OpTypePointer UniformConstant %19
%41 = OpTypePointer PushConstant %5
%43 = OpConstant %5 4
%45 = OpTypePointer StorageBuffer %9
%50 = OpConstant %5 16
%53 = OpTypeVector %5 3
%54 = OpTypePointer Input %53
%55 = OpVariable %54 Input
%56 = OpTypePointer Input %5
%60 = OpTypeFunction %5 %9 %5
%71 = OpConstant %5 65535
%72 = OpConstant %5 8
%73 = OpConstantComposite %9 %72 %72
%100 = OpTypeInt 64 0
%101 = OpTypeBool
%102 = OpTypeRuntimeArray %100
%103 = OpTypeStruct %102
%104 = OpTypePointer StorageBuffer %103
%105 = OpVariable %104 StorageBuffer
%106 = OpTypeRuntimeArray %9
%107 = OpTypeStruct %106
%108 = OpTypePointer StorageBuffer %107
%109 = OpVariable %108 StorageBuffer
%110 = OpTypeFunction %101 %9 %5 %5 %5 %5 %101
%121 = OpConstantTrue %101
%125 = OpTypeStruct %5 %5
%136 = OpConstant %5 4294967295
%137 = OpConstant %5 15
%146 = OpConstant %5 3
%147 = OpConstant %5 2
%157 = OpConstant %5 1103515245
%161 = OpConstant %5 1103518333
%165 = OpConstant %5 1103539331
%172 = OpConstant %5 10006121
%176 = OpConstant %5 4004951
%180 = OpConstant %5 5005159
%184 = OpConstant %5 6004811
%188 = OpConstant %5 383
%192 = OpConstant %5 821
%196 = OpConstant %5 661
%200 = OpConstant %5 1091
%204 = OpConstant %5 1117
%208 = OpConstant %5 3947
%212 = OpConstant %5 4253
%216 = OpConstant %5 7691
%219 = OpTypeVector %100 4
%220 = OpConstant %100 68719411200
%221 = OpConstant %100 1099511627775
%222 = OpConstant %100 1035087118335
%223 = OpConstantComposite %219 %220 %221 %222 %220
%232 = OpTypePointer StorageBuffer %100
%287 = OpTypeInt 32 1
%288 = OpConstant %287 0
%289 = OpConstant %287 3
%291 = OpConstant %5 256
%294 = OpConstant %5 65536
%297 = OpConstant %287 6
%299 = OpConstant %5 16777216
%307 = OpConstant %287 9
%310 = OpConstant %287 12
%314 = OpConstant %287 15
%324 = OpConstant %287 18
%327 = OpConstant %287 21
%331 = OpConstant %287 24
%341 = OpConstant %287 23
%344 = OpConstant %287 26
%348 = OpConstant %287 29
%358 = OpConstant %287 1
%361 = OpConstant %287 4
%365 = OpConstant %287 7
%389 = OpConstant %287 17
%392 = OpConstant %287 20
%405 = OpConstant %287 22
%408 = OpConstant %287 25
%412 = OpConstant %287 28
%443 = OpTypePointer Image %5
%453 = OpTypePointer Private %101
%454 = OpVariable %453 Private %121
%455 = OpTypeVector %5 4
%456 = OpTypeRuntimeArray %5
%457 = OpTypeStruct %456
%458 = OpTypePointer StorageBuffer %457
%459 = OpVariable %458 StorageBuffer
%460 = OpTypeRuntimeArray %455
%461 = OpTypeStruct %460
%462 = OpTypePointer StorageBuffer %461
%463 = OpVariable %462 StorageBuffer
%464 = OpTypeFunction %1 %101 %5
%478 = OpConstant %5 97
%479 = OpConstant %5 43981
%485 = OpConstant %5 51329
%497 = OpConstantFalse %101
%500 = OpTypePointer StorageBuffer %455
%503 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %452
%452 = OpLabel
%38 = OpFunctionCall %5 %28
OpStore %18 %38
%42 = OpAccessChain %41 %8 %43
%44 = OpLoad %5 %42
%46 = OpAccessChain %45 %16 %34 %44 %34 %34
%47 = OpLoad %9 %46
%48 = OpCompositeExtract %5 %47 1
%49 = OpShiftRightLogical %5 %48 %50
%40 = OpAccessChain %39 %22 %44
%51 = OpLoad %19 %40
%52 = OpImageQuerySize %5 %51
%57 = OpAccessChain %56 %55 %34
%58 = OpLoad %5 %57
%59 = OpIMul %5 %58 %49
%441 = OpLoad %5 %18
%442 = OpULessThan %101 %58 %52
%440 = OpFunctionCall %101 %117 %47 %59 %43 %147 %441 %442
%505 = OpFunctionCall %1 %467 %440 %10
%444 = OpImageTexelPointer %443 %40 %58 %34
%445 = OpAtomicIAdd %5 %444 %10 %34 %10
%446 = OpIMul %5 %58 %49
%448 = OpLoad %5 %18
%449 = OpULessThan %101 %58 %52
%447 = OpFunctionCall %101 %117 %47 %446 %43 %147 %448 %449
%506 = OpFunctionCall %1 %467 %447 %147
%450 = OpImageTexelPointer %443 %40 %58 %34
%451 = OpAtomicCompareExchange %5 %450 %10 %34 %34 %147 %10
OpReturn
OpFunctionEnd
%28 = OpFunction %5 None %27
%29 = OpLabel
%30 = OpArrayLength %5 %26 0
%31 = OpISub %5 %30 %10
%33 = OpAccessChain %32 %26 %34 %31
%35 = OpAtomicIAdd %5 %33 %10 %34 %36
OpReturnValue %35
OpFunctionEnd
%63 = OpFunction %5 None %60
%61 = OpFunctionParameter %9
%62 = OpFunctionParameter %5
%64 = OpLabel
%65 = OpCompositeExtract %5 %61 0
%66 = OpCompositeExtract %5 %61 1
%67 = OpShiftRightLogical %5 %65 %43
%68 = OpBitwiseAnd %5 %66 %71
%69 = OpCompositeConstruct %9 %67 %68
%70 = OpCompositeConstruct %9 %62 %62
%74 = OpVectorShuffle %9 %69 %69 1 0
%75 = OpShiftRightLogical %9 %69 %73
%76 = OpBitwiseXor %9 %75 %74
%77 = OpIMul %9 %76 %70
%78 = OpVectorShuffle %9 %77 %77 1 0
%79 = OpShiftRightLogical %9 %77 %73
%80 = OpBitwiseXor %9 %79 %78
%81 = OpIMul %9 %80 %70
%82 = OpVectorShuffle %9 %81 %81 1 0
%83 = OpShiftRightLogical %9 %81 %73
%84 = OpBitwiseXor %9 %83 %82
%85 = OpIMul %9 %84 %70
%86 = OpVectorShuffle %9 %85 %85 1 0
%87 = OpShiftRightLogical %9 %85 %73
%88 = OpBitwiseXor %9 %87 %86
%89 = OpIMul %9 %88 %70
%90 = OpVectorShuffle %9 %89 %89 1 0
%91 = OpShiftRightLogical %9 %89 %73
%92 = OpBitwiseXor %9 %91 %90
%93 = OpIMul %9 %92 %70
%94 = OpVectorShuffle %9 %93 %93 1 0
%95 = OpShiftRightLogical %9 %93 %73
%96 = OpBitwiseXor %9 %95 %94
%97 = OpIMul %9 %96 %70
%98 = OpCompositeExtract %5 %97 0
OpReturnValue %98
OpFunctionEnd
%117 = OpFunction %101 None %110
%111 = OpFunctionParameter %9
%112 = OpFunctionParameter %5
%113 = OpFunctionParameter %5
%114 = OpFunctionParameter %5
%115 = OpFunctionParameter %5
%116 = OpFunctionParameter %101
%118 = OpLabel
OpSelectionMerge %120 None
OpBranchConditional %116 %120 %119
%119 = OpLabel
OpReturnValue %121
%120 = OpLabel
%123 = OpCompositeExtract %5 %111 0
%124 = OpCompositeExtract %5 %111 1
%126 = OpIAddCarry %125 %123 %112
%127 = OpCompositeExtract %5 %126 0
%128 = OpCompositeExtract %5 %126 1
%129 = OpIAdd %5 %124 %128
%130 = OpCompositeConstruct %9 %127 %129
%131 = OpCompositeExtract %5 %130 0
%132 = OpBitFieldUExtract %5 %136 %34 %113
%133 = OpBitwiseAnd %5 %131 %137
%134 = OpShiftLeftLogical %5 %132 %133
%135 = OpBitwiseAnd %5 %134 %71
%138 = OpBitwiseAnd %5 %131 %146
%139 = OpIAdd %5 %138 %113
%140 = OpIAdd %5 %139 %146
%141 = OpShiftRightLogical %5 %140 %147
%142 = OpBitFieldUExtract %5 %131 %147 %147
%143 = OpBitFieldUExtract %5 %136 %34 %141
%144 = OpShiftLeftLogical %5 %143 %142
%145 = OpBitwiseAnd %5 %144 %137
%148 = OpArrayLength %5 %105 0
%149 = OpExtInst %5 %151 FindUMsb %148
%150 = OpBitFieldUExtract %5 %136 %34 %149
%152 = OpArrayLength %5 %105 0
%153 = OpExtInst %5 %151 FindUMsb %152
%154 = OpBitFieldUExtract %5 %152 %34 %153
%155 = OpISub %5 %154 %10
%156 = OpFunctionCall %5 %63 %130 %157
%158 = OpBitwiseAnd %5 %156 %150
%159 = OpIAdd %5 %158 %155
%160 = OpFunctionCall %5 %63 %130 %161
%162 = OpBitwiseAnd %5 %160 %150
%163 = OpIAdd %5 %162 %155
%164 = OpFunctionCall %5 %63 %130 %165
%166 = OpBitwiseAnd %5 %164 %150
%167 = OpIAdd %5 %166 %155
%168 = OpFunctionCall %5 %63 %130 %36
%169 = OpBitwiseAnd %5 %168 %150
%170 = OpIAdd %5 %169 %155
%171 = OpFunctionCall %5 %63 %130 %172
%173 = OpBitwiseAnd %5 %171 %150
%174 = OpIAdd %5 %173 %155
%175 = OpFunctionCall %5 %63 %130 %176
%177 = OpBitwiseAnd %5 %175 %150
%178 = OpIAdd %5 %177 %155
%179 = OpFunctionCall %5 %63 %130 %180
%181 = OpBitwiseAnd %5 %179 %150
%182 = OpIAdd %5 %181 %155
%183 = OpFunctionCall %5 %63 %130 %184
%185 = OpBitwiseAnd %5 %183 %150
%186 = OpIAdd %5 %185 %155
%187 = OpFunctionCall %5 %63 %130 %188
%189 = OpBitwiseAnd %5 %187 %150
%190 = OpIAdd %5 %189 %155
%191 = OpFunctionCall %5 %63 %130 %192
%193 = OpBitwiseAnd %5 %191 %150
%194 = OpIAdd %5 %193 %155
%195 = OpFunctionCall %5 %63 %130 %196
%197 = OpBitwiseAnd %5 %195 %150
%198 = OpIAdd %5 %197 %155
%199 = OpFunctionCall %5 %63 %130 %200
%201 = OpBitwiseAnd %5 %199 %150
%202 = OpIAdd %5 %201 %155
%203 = OpFunctionCall %5 %63 %130 %204
%205 = OpBitwiseAnd %5 %203 %150
%206 = OpIAdd %5 %205 %155
%207 = OpFunctionCall %5 %63 %130 %208
%209 = OpBitwiseAnd %5 %207 %150
%210 = OpIAdd %5 %209 %155
%211 = OpFunctionCall %5 %63 %130 %212
%213 = OpBitwiseAnd %5 %211 %150
%214 = OpIAdd %5 %213 %155
%215 = OpFunctionCall %5 %63 %130 %216
%217 = OpBitwiseAnd %5 %215 %150
%218 = OpIAdd %5 %217 %155
%224 = OpVectorExtractDynamic %100 %223 %114
%225 = OpShiftLeftLogical %5 %135 %50
%226 = OpShiftLeftLogical %5 %145 %43
%227 = OpBitwiseOr %5 %135 %225
%228 = OpBitwiseOr %5 %145 %226
%229 = OpCompositeConstruct %9 %227 %228
%230 = OpBitcast %100 %229
%231 = OpBitwiseAnd %100 %224 %230
%233 = OpInBoundsAccessChain %232 %105 %34 %159
%234 = OpAtomicOr %100 %233 %10 %34 %231
%235 = OpInBoundsAccessChain %232 %105 %34 %163
%236 = OpAtomicOr %100 %235 %10 %34 %231
%237 = OpBitwiseAnd %100 %234 %236
%238 = OpInBoundsAccessChain %232 %105 %34 %167
%239 = OpAtomicOr %100 %238 %10 %34 %231
%240 = OpBitwiseAnd %100 %237 %239
%241 = OpInBoundsAccessChain %232 %105 %34 %170
%242 = OpAtomicOr %100 %241 %10 %34 %231
%243 = OpBitwiseAnd %100 %240 %242
%244 = OpInBoundsAccessChain %232 %105 %34 %174
%245 = OpAtomicOr %100 %244 %10 %34 %231
%246 = OpBitwiseAnd %100 %243 %245
%247 = OpInBoundsAccessChain %232 %105 %34 %178
%248 = OpAtomicOr %100 %247 %10 %34 %231
%249 = OpBitwiseAnd %100 %246 %248
%250 = OpInBoundsAccessChain %232 %105 %34 %182
%251 = OpAtomicOr %100 %250 %10 %34 %231
%252 = OpBitwiseAnd %100 %249 %251
%253 = OpInBoundsAccessChain %232 %105 %34 %186
%254 = OpAtomicOr %100 %253 %10 %34 %231
%255 = OpBitwiseAnd %100 %252 %254
%256 = OpInBoundsAccessChain %232 %105 %34 %190
%257 = OpAtomicOr %100 %256 %10 %34 %231
%258 = OpBitwiseAnd %100 %255 %257
%259 = OpInBoundsAccessChain %232 %105 %34 %194
%260 = OpAtomicOr %100 %259 %10 %34 %231
%261 = OpBitwiseAnd %100 %258 %260
%262 = OpInBoundsAccessChain %232 %105 %34 %198
%263 = OpAtomicOr %100 %262 %10 %34 %231
%264 = OpBitwiseAnd %100 %261 %263
%265 = OpInBoundsAccessChain %232 %105 %34 %202
%266 = OpAtomicOr %100 %265 %10 %34 %231
%267 = OpBitwiseAnd %100 %264 %266
%268 = OpInBoundsAccessChain %232 %105 %34 %206
%269 = OpAtomicOr %100 %268 %10 %34 %231
%270 = OpBitwiseAnd %100 %267 %269
%271 = OpInBoundsAccessChain %232 %105 %34 %210
%272 = OpAtomicOr %100 %271 %10 %34 %231
%273 = OpBitwiseAnd %100 %270 %272
%274 = OpInBoundsAccessChain %232 %105 %34 %214
%275 = OpAtomicOr %100 %274 %10 %34 %231
%276 = OpBitwiseAnd %100 %273 %275
%277 = OpInBoundsAccessChain %232 %105 %34 %218
%278 = OpAtomicOr %100 %277 %10 %34 %231
%279 = OpBitwiseAnd %100 %276 %278
%280 = OpBitcast %9 %279
%281 = OpCompositeExtract %5 %280 0
%282 = OpCompositeExtract %5 %280 1
%283 = OpShiftRightLogical %5 %281 %50
%284 = OpBitwiseAnd %5 %283 %135
%285 = OpIEqual %101 %284 %34
%286 = OpBitFieldUExtract %5 %115 %288 %289
%290 = OpShiftLeftLogical %5 %291 %286
%292 = OpBitFieldUExtract %5 %115 %289 %289
%293 = OpShiftLeftLogical %5 %294 %292
%295 = OpBitwiseOr %5 %290 %293
%296 = OpBitFieldUExtract %5 %115 %297 %289
%298 = OpShiftLeftLogical %5 %299 %296
%300 = OpBitwiseOr %5 %295 %298
%301 = OpSelect %5 %285 %300 %34
%302 = OpInBoundsAccessChain %32 %109 %34 %159 %10
%303 = OpAtomicOr %5 %302 %10 %34 %301
%304 = OpBitwiseAnd %5 %303 %300
%305 = OpIEqual %101 %304 %300
%306 = OpBitFieldUExtract %5 %115 %307 %289
%308 = OpShiftLeftLogical %5 %291 %306
%309 = OpBitFieldUExtract %5 %115 %310 %289
%311 = OpShiftLeftLogical %5 %294 %309
%312 = OpBitwiseOr %5 %308 %311
%313 = OpBitFieldUExtract %5 %115 %314 %289
%315 = OpShiftLeftLogical %5 %299 %313
%316 = OpBitwiseOr %5 %312 %315
%317 = OpSelect %5 %285 %316 %34
%318 = OpInBoundsAccessChain %32 %109 %34 %163 %10
%319 = OpAtomicOr %5 %318 %10 %34 %317
%320 = OpBitwiseAnd %5 %319 %316
%321 = OpIEqual %101 %320 %316
%322 = OpLogicalAnd %101 %305 %321
%323 = OpBitFieldUExtract %5 %115 %324 %289
%325 = OpShiftLeftLogical %5 %291 %323
%326 = OpBitFieldUExtract %5 %115 %327 %289
%328 = OpShiftLeftLogical %5 %294 %326
%329 = OpBitwiseOr %5 %325 %328
%330 = OpBitFieldUExtract %5 %115 %331 %289
%332 = OpShiftLeftLogical %5 %299 %330
%333 = OpBitwiseOr %5 %329 %332
%334 = OpSelect %5 %285 %333 %34
%335 = OpInBoundsAccessChain %32 %109 %34 %167 %10
%336 = OpAtomicOr %5 %335 %10 %34 %334
%337 = OpBitwiseAnd %5 %336 %333
%338 = OpIEqual %101 %337 %333
%339 = OpLogicalAnd %101 %322 %338
%340 = OpBitFieldUExtract %5 %115 %341 %289
%342 = OpShiftLeftLogical %5 %291 %340
%343 = OpBitFieldUExtract %5 %115 %344 %289
%345 = OpShiftLeftLogical %5 %294 %343
%346 = OpBitwiseOr %5 %342 %345
%347 = OpBitFieldUExtract %5 %115 %348 %289
%349 = OpShiftLeftLogical %5 %299 %347
%350 = OpBitwiseOr %5 %346 %349
%351 = OpSelect %5 %285 %350 %34
%352 = OpInBoundsAccessChain %32 %109 %34 %170 %10
%353 = OpAtomicOr %5 %352 %10 %34 %351
%354 = OpBitwiseAnd %5 %353 %350
%355 = OpIEqual %101 %354 %350
%356 = OpLogicalAnd %101 %339 %355
%357 = OpBitFieldUExtract %5 %115 %358 %289
%359 = OpShiftLeftLogical %5 %291 %357
%360 = OpBitFieldUExtract %5 %115 %361 %289
%362 = OpShiftLeftLogical %5 %294 %360
%363 = OpBitwiseOr %5 %359 %362
%364 = OpBitFieldUExtract %5 %115 %365 %289
%366 = OpShiftLeftLogical %5 %299 %364
%367 = OpBitwiseOr %5 %363 %366
%368 = OpSelect %5 %285 %367 %34
%369 = OpInBoundsAccessChain %32 %109 %34 %174 %10
%370 = OpAtomicOr %5 %369 %10 %34 %368
%371 = OpBitwiseAnd %5 %370 %367
%372 = OpIEqual %101 %371 %367
%373 = OpLogicalAnd %101 %356 %372
%374 = OpBitFieldUExtract %5 %115 %307 %289
%375 = OpShiftLeftLogical %5 %291 %374
%376 = OpBitFieldUExtract %5 %115 %310 %289
%377 = OpShiftLeftLogical %5 %294 %376
%378 = OpBitwiseOr %5 %375 %377
%379 = OpBitFieldUExtract %5 %115 %314 %289
%380 = OpShiftLeftLogical %5 %299 %379
%381 = OpBitwiseOr %5 %378 %380
%382 = OpSelect %5 %285 %381 %34
%383 = OpInBoundsAccessChain %32 %109 %34 %178 %10
%384 = OpAtomicOr %5 %383 %10 %34 %382
%385 = OpBitwiseAnd %5 %384 %381
%386 = OpIEqual %101 %385 %381
%387 = OpLogicalAnd %101 %373 %386
%388 = OpBitFieldUExtract %5 %115 %389 %289
%390 = OpShiftLeftLogical %5 %291 %388
%391 = OpBitFieldUExtract %5 %115 %392 %289
%393 = OpShiftLeftLogical %5 %294 %391
%394 = OpBitwiseOr %5 %390 %393
%395 = OpBitFieldUExtract %5 %115 %341 %289
%396 = OpShiftLeftLogical %5 %299 %395
%397 = OpBitwiseOr %5 %394 %396
%398 = OpSelect %5 %285 %397 %34
%399 = OpInBoundsAccessChain %32 %109 %34 %182 %10
%400 = OpAtomicOr %5 %399 %10 %34 %398
%401 = OpBitwiseAnd %5 %400 %397
%402 = OpIEqual %101 %401 %397
%403 = OpLogicalAnd %101 %387 %402
%404 = OpBitFieldUExtract %5 %115 %405 %289
%406 = OpShiftLeftLogical %5 %291 %404
%407 = OpBitFieldUExtract %5 %115 %408 %289
%409 = OpShiftLeftLogical %5 %294 %407
%410 = OpBitwiseOr %5 %406 %409
%411 = OpBitFieldUExtract %5 %115 %412 %289
%413 = OpShiftLeftLogical %5 %299 %411
%414 = OpBitwiseOr %5 %410 %413
%415 = OpSelect %5 %285 %414 %34
%416 = OpInBoundsAccessChain %32 %109 %34 %186 %10
%417 = OpAtomicOr %5 %416 %10 %34 %415
%418 = OpBitwiseAnd %5 %417 %414
%419 = OpIEqual %101 %418 %414
%420 = OpLogicalAnd %101 %403 %419
OpSelectionMerge %425 None
OpSwitch %114 %424 0 %421 1 %422 2 %423
%424 = OpLabel
%433 = OpShiftLeftLogical %5 %145 %43
%434 = OpBitwiseAnd %5 %282 %433
%435 = OpINotEqual %101 %434 %34
OpBranch %425
%421 = OpLabel
%426 = OpBitwiseAnd %5 %281 %135
%427 = OpINotEqual %101 %426 %34
OpBranch %425
%422 = OpLabel
%428 = OpShiftLeftLogical %5 %135 %50
%429 = OpBitwiseAnd %5 %281 %428
%430 = OpINotEqual %101 %429 %34
OpBranch %425
%423 = OpLabel
%431 = OpBitwiseAnd %5 %282 %145
%432 = OpINotEqual %101 %431 %34
OpBranch %425
%425 = OpLabel
%436 = OpPhi %101 %427 %421 %430 %422 %432 %423 %435 %424
%437 = OpLogicalNot %101 %436
%438 = OpLogicalOr %101 %437 %420
OpReturnValue %438
OpFunctionEnd
%467 = OpFunction %1 None %464
%465 = OpFunctionParameter %101
%466 = OpFunctionParameter %5
%468 = OpLabel
%471 = OpLogicalNot %101 %465
%472 = OpLoad %101 %454
%473 = OpLogicalAnd %101 %471 %472
OpSelectionMerge %469 None
OpBranchConditional %473 %470 %469
%470 = OpLabel
%476 = OpIMul %5 %466 %478
%477 = OpBitwiseXor %5 %476 %479
%480 = OpArrayLength %5 %459 0
%481 = OpISub %5 %480 %147
%482 = OpAccessChain %32 %459 %34 %481
%483 = OpLoad %5 %482
%484 = OpIMul %5 %483 %485
%486 = OpBitwiseXor %5 %477 %484
%487 = OpArrayLength %5 %463 0
%488 = OpISub %5 %487 %10
%489 = OpBitwiseAnd %5 %486 %488
%490 = OpShiftRightLogical %5 %489 %43
%491 = OpBitwiseAnd %5 %489 %137
%492 = OpShiftLeftLogical %5 %10 %491
%493 = OpAccessChain %32 %459 %34 %490
%494 = OpAtomicOr %5 %493 %10 %34 %492
%495 = OpBitwiseAnd %5 %494 %492
%496 = OpIEqual %101 %495 %34
OpStore %454 %497
OpSelectionMerge %475 None
OpBranchConditional %496 %474 %475
%474 = OpLabel
%498 = OpCompositeConstruct %455 %479 %34 %466 %483
%499 = OpShiftLeftLogical %5 %492 %50
%501 = OpAccessChain %500 %463 %34 %489
OpStore %501 %498
OpMemoryBarrier %10 %503
%502 = OpAtomicOr %5 %493 %10 %34 %499
OpMemoryBarrier %10 %503
OpBranch %475
%475 = OpLabel
OpBranch %469
%469 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/cbv.bindless.bda-instrumentation.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _30[];

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _22[];

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _43 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _43;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _91 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _92 = uvec2(prime);
    uvec2 _99 = ((_91 >> uvec2(8u)) ^ _91.yx) * _92;
    uvec2 _103 = ((_99 >> uvec2(8u)) ^ _99.yx) * _92;
    uvec2 _107 = ((_103 >> uvec2(8u)) ^ _103.yx) * _92;
    uvec2 _111 = ((_107 >> uvec2(8u)) ^ _107.yx) * _92;
    uvec2 _115 = ((_111 >> uvec2(8u)) ^ _111.yx) * _92;
    return (((_115 >> uvec2(8u)) ^ _115.yx) * _92).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _148;
    _148._m0 = uaddCarry(BDA.x, offset, _148._m1);
    uvec2 addr = uvec2(_148._m0, BDA.y + _148._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _517 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _519 = InstrumentationControlData.atomics[_517];
        uint _525 = (((inst * 97u) ^ 43981u) ^ (_519 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _526 = _525 >> 4u;
        uint _528 = 1u << (_525 & 15u);
        uint _530 = atomicOr(InstrumentationControlData.atomics[_526], _528);
        ShouldReportInstrumentation = false;
        if ((_530 & _528) == 0u)
        {
            InstrumentationData.data[_525] = uvec4(43981u, 0u, inst, _519);
            memoryBarrierBuffer();
            uint _538 = atomicOr(InstrumentationControlData.atomics[_526], _528 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _46 = AllocateInvocationID();
    InvocationID = _46;
    uvec2 _55 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _61 = uint(imageSize(_22[registers._m4])) * 4u;
    uint _68 = gl_GlobalInvocationID.x >> 4u;
    uint _75 = registers._m5 + (gl_GlobalInvocationID.x + 0u);
    uvec2 _77 = DescriptorHeapRobustness.descriptors[_75]._m0[0u];
    uint _81 = _68 * 16u;
    uint _462 = InvocationID;
    bool _461 = ValidateBDALoadStore(_77, _81, 16u, 0u, _462, _81 < 65536u);
    AssumeTrue(_461, 1u);
    uint _473 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _475 = InvocationID;
    bool _474 = ValidateBDALoadStore(_55, _473, 4u, 0u, _475, _473 < _61);
    AssumeTrue(_474, 2u);
    uvec4 _478 = imageLoad(_22[registers._m4], int(gl_GlobalInvocationID.x));
    uint _483 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _485 = InvocationID;
    bool _484 = ValidateBDALoadStore(_55, _483, 4u, 1u, _485, _483 < _61);
    AssumeTrue(_484, 3u);
    imageStore(_22[registers._m4], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(((_30[_75]._m0[_68].x + 40.0) + _30[_75]._m0[_68].z) + uintBitsToFloat(_478.x))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 545
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability UniformBufferArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%173 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %64
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %27 "BindlessCBV"
OpName %32 "BloomBufferInvocationSSBO"
OpMemberName %32 0 "atomics"
OpName %34 "BloomBufferInvocation"
OpName %36 "AllocateInvocationID"
OpName %85 "AddrHash"
OpName %83 "addr"
OpName %84 "prime"
OpName %125 "BloomBufferSSBO"
OpMemberName %125 0 "atomics"
OpName %127 "BloomBuffer"
OpName %129 "BloomBuffer32SSBO"
OpMemberName %129 0 "atomics"
OpName %131 "BloomBuffer32"
OpName %139 "ValidateBDALoadStore"
OpName %133 "BDA"
OpName %134 "offset"
OpName %135 "len"
OpName %136 "type"
OpName %137 "invocation_id"
OpName %138 "in_bounds"
OpName %147 "IAddCarryResult"
OpName %152 "addr"
OpName %153 "addr_lo"
OpName %157 "byte_mask"
OpName %167 "word_mask"
OpName %172 "hash_mask"
OpName %177 "hash_offset"
OpName %181 "bloom_index"
OpName %185 "bloom_index"
OpName %189 "bloom_index"
OpName %192 "bloom_index"
OpName %196 "bloom_index"
OpName %200 "bloom_index"
OpName %204 "bloom_index"
OpName %208 "bloom_index"
OpName %212 "bloom_index"
OpName %216 "bloom_index"
OpName %220 "bloom_index"
OpName %224 "bloom_index"
OpName %228 "bloom_index"
OpName %232 "bloom_index"
OpName %236 "bloom_index"
OpName %240 "bloom_index"
OpName %253 "invalidation_mask"
OpName %256 "prev_hazard_partial"
OpName %258 "prev_hazard_partial"
OpName %261 "prev_hazard_partial"
OpName %264 "prev_hazard_partial"
OpName %267 "prev_hazard_partial"
OpName %270 "prev_hazard_partial"
OpName %273 "prev_hazard_partial"
OpName %276 "prev_hazard_partial"
OpName %279 "prev_hazard_partial"
OpName %282 "prev_hazard_partial"
OpName %285 "prev_hazard_partial"
OpName %288 "prev_hazard_partial"
OpName %291 "prev_hazard_partial"
OpName %294 "prev_hazard_partial"
OpName %297 "prev_hazard_partial"
OpName %300 "prev_hazard_partial"
OpName %302 "prev_hazard"
OpName %303 "prev_hazard_lo"
OpName %304 "prev_hazard_hi"
OpName %307 "has_exclusive_access"
OpName %321 "lock_mask"
OpName %324 "prev_lock"
OpName %337 "lock_mask"
OpName %340 "prev_lock"
OpName %354 "lock_mask"
OpName %357 "prev_lock"
OpName %371 "lock_mask"
OpName %374 "prev_lock"
OpName %388 "lock_mask"
OpName %391 "prev_lock"
OpName %402 "lock_mask"
OpName %405 "prev_lock"
OpName %418 "lock_mask"
OpName %421 "prev_lock"
OpName %435 "lock_mask"
OpName %438 "prev_lock"
OpName %441 "has_complete_self_lock"
OpName %457 "hazard"
OpName %491 "ShouldReportInstrumentation"
OpName %493 "InstrumentationControlDataSSBO"
OpMemberName %493 0 "atomics"
OpName %495 "InstrumentationControlData"
OpName %497 "InstrumentationDataSSBO"
OpMemberName %497 0 "data"
OpName %499 "InstrumentationData"
OpName %503 "AssumeTrue"
OpName %501 "value"
OpName %502 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %22 DescriptorSet 4
OpDecorate %22 Binding 0
OpDecorate %26 ArrayStride 16
OpDecorate %27 Block
OpMemberDecorate %27 0 Offset 0
OpDecorate %30 DescriptorSet 5
OpDecorate %30 Binding 0
OpDecorate %31 ArrayStride 4
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 2
OpDecorate %64 BuiltIn GlobalInvocationId
OpDecorate %124 ArrayStride 8
OpMemberDecorate %125 0 Offset 0
OpDecorate %125 Block
OpDecorate %127 DescriptorSet 0
OpDecorate %127 Binding 2
OpDecorate %128 ArrayStride 8
OpMemberDecorate %129 0 Offset 0
OpDecorate %129 Block
OpDecorate %131 DescriptorSet 0
OpDecorate %131 Binding 2
OpDecorate %492 ArrayStride 4
OpMemberDecorate %493 0 Offset 0
OpDecorate %493 Block
OpDecorate %495 DescriptorSet 0
OpDecorate %495 Binding 2
OpDecorate %496 ArrayStride 16
OpMemberDecorate %497 0 Offset 0
OpDecorate %497 Block
OpDecorate %499 DescriptorSet 0
OpDecorate %499 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 4
%25 = OpConstant %5 4096
%26 = OpTypeArray %24 %25
%27 = OpTypeStruct %26
%28 = OpTypeRuntimeArray %27
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%31 = OpTypeRuntimeArray %5
%32 = OpTypeStruct %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeFunction %5
%40 = OpTypePointer StorageBuffer %5
%42 = OpConstant %5 0
%44 = OpConstant %5 1103633207
%47 = OpTypePointer UniformConstant %19
%49 = OpTypePointer PushConstant %5
%51 = OpConstant %5 4
%53 = OpTypePointer StorageBuffer %9
%58 = OpConstant %5 16
%62 = OpTypeVector %5 3
%63 = OpTypePointer Input %62
%64 = OpVariable %63 Input
%65 = OpTypePointer Input %5
%70 = OpTypePointer Uniform %27
%73 = OpConstant %5 5
%80 = OpConstant %5 65536
%82 = OpTypeFunction %5 %9 %5
%93 = OpConstant %5 65535
%94 = OpConstant %5 8
%95 = OpConstantComposite %9 %94 %94
%122 = OpTypeInt 64 0
%123 = OpTypeBool
%124 = OpTypeRuntimeArray %122
%125 = OpTypeStruct %124
%126 = OpTypePointer StorageBuffer %125
%127 = OpVariable %126 StorageBuffer
%128 = OpTypeRuntimeArray %9
%129 = OpTypeStruct %128
%130 = OpTypePointer StorageBuffer %129
%131 = OpVariable %130 StorageBuffer
%132 = OpTypeFunction %123 %9 %5 %5 %5 %5 %123
%143 = OpConstantTrue %123
%147 = OpTypeStruct %5 %5
%158 = OpConstant %5 4294967295
%159 = OpConstant %5 15
%168 = OpConstant %5 3
%169 = OpConstant %5 2
%179 = OpConstant %5 1103515245
%183 = OpConstant %5 1103518333
%187 = OpConstant %5 1103539331
%194 = OpConstant %5 10006121
%198 = OpConstant %5 4004951
%202 = OpConstant %5 5005159
%206 = OpConstant %5 6004811
%210 = OpConstant %5 383
%214 = OpConstant %5 821
%218 = OpConstant %5 661
%222 = OpConstant %5 1091
%226 = OpConstant %5 1117
%230 = OpConstant %5 3947
%234 = OpConstant %5 4253
%238 = OpConstant %5 7691
%241 = OpTypeVector %122 4
%242 = OpConstant %122 68719411200
%243 = OpConstant %122 1099511627775
%244 = OpConstant %122 1035087118335
%245 = OpConstantComposite %241 %242 %243 %244 %242
%254 = OpTypePointer StorageBuffer %122
%309 = OpTypeInt 32 1
%310 = OpConstant %309 0
%311 = OpConstant %309 3
%313 = OpConstant %5 256
%318 = OpConstant %309 6
%320 = OpConstant %5 16777216
%328 = OpConstant %309 9
%331 = OpConstant %309 12
%335 = OpConstant %309 15
%345 = OpConstant %309 18
%348 = OpConstant %309 21
%352 = OpConstant %309 24
%362 = OpConstant %309 23
%365 = OpConstant %309 26
%369 = OpConstant %309 29
%379 = OpConstant %309 1
%382 = OpConstant %309 4
%386 = OpConstant %309 7
%410 = OpConstant %309 17
%413 = OpConstant %309 20
%426 = OpConstant %309 22
%429 = OpConstant %309 25
%433 = OpConstant %309 28
%464 = OpTypePointer Uniform %24
%470 = OpConstant %23 40
%477 = OpTypeVector %5 4
%490 = OpTypePointer Private %123
%491 = OpVariable %490 Private %143
%492 = OpTypeRuntimeArray %5
%493 = OpTypeStruct %492
%494 = OpTypePointer StorageBuffer %493
%495 = OpVariable %494 StorageBuffer
%496 = OpTypeRuntimeArray %477
%497 = OpTypeStruct %496
%498 = OpTypePointer StorageBuffer %497
%499 = OpVariable %498 StorageBuffer
%500 = OpTypeFunction %1 %123 %5
%514 = OpConstant %5 97
%515 = OpConstant %5 43981
%521 = OpConstant %5 51329
%533 = OpConstantFalse %123
%536 = OpTypePointer StorageBuffer %477
%539 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %489
%489 = OpLabel
%46 = OpFunctionCall %5 %36
OpStore %18 %46
%50 = OpAccessChain %49 %8 %51
%52 = OpLoad %5 %50
%54 = OpAccessChain %53 %16 %42 %52 %42 %42
%55 = OpLoad %9 %54
%56 = OpCompositeExtract %5 %55 1
%57 = OpShiftRightLogical %5 %56 %58
%48 = OpAccessChain %47 %22 %52
%59 = OpLoad %19 %48
%60 = OpImageQuerySize %5 %59
%61 = OpIMul %5 %60 %51
%66 = OpAccessChain %65 %64 %42
%67 = OpLoad %5 %66
%68 = OpShiftRightLogical %5 %67 %51
%69 = OpIAdd %5 %67 %42
%72 = OpAccessChain %49 %8 %73
%74 = OpLoad %5 %72
%75 = OpIAdd %5 %74 %69
%76 = OpAccessChain %53 %16 %42 %75 %42 %42
%77 = OpLoad %9 %76
%78 = OpCompositeExtract %5 %77 1
%79 = OpShiftRightLogical %5 %78 %58
%71 = OpAccessChain %70 %30 %75
%81 = OpIMul %5 %68 %58
%462 = OpLoad %5 %18
%463 = OpULessThan %123 %81 %80
%461 = OpFunctionCall %123 %139 %77 %81 %58 %42 %462 %463
%541 = OpFunctionCall %1 %503 %461 %10
%465 = OpAccessChain %464 %71 %42 %68
%466 = OpLoad %24 %465
%467 = OpCompositeExtract %23 %466 0
%468 = OpCompositeExtract %23 %466 2
%469 = OpFAdd %23 %467 %470
%471 = OpFAdd %23 %469 %468
%472 = OpIMul %5 %67 %51
%473 = OpIAdd %5 %472 %42
%475 = OpLoad %5 %18
%476 = OpULessThan %123 %473 %61
%474 = OpFunctionCall %123 %139 %55 %473 %51 %42 %475 %476
%542 = OpFunctionCall %1 %503 %474 %169
%478 = OpImageRead %477 %59 %67
%479 = OpCompositeExtract %5 %478 0
%480 = OpBitcast %23 %479
%481 = OpFAdd %23 %471 %480
%482 = OpIMul %5 %67 %51
%483 = OpIAdd %5 %482 %42
%485 = OpLoad %5 %18
%486 = OpULessThan %123 %483 %61
%484 = OpFunctionCall %123 %139 %55 %483 %51 %10 %485 %486
%543 = OpFunctionCall %1 %503 %484 %168
%487 = OpBitcast %5 %481
%488 = OpCompositeConstruct %477 %487 %487 %487 %487
OpImageWrite %59 %67 %488
OpReturn
OpFunctionEnd
%36 = OpFunction %5 None %35
%37 = OpLabel
%38 = OpArrayLength %5 %34 0
%39 = OpISub %5 %38 %10
%41 = OpAccessChain %40 %34 %42 %39
%43 = OpAtomicIAdd %5 %41 %10 %42 %44
OpReturnValue %43
OpFunctionEnd
%85 = OpFunction %5 None %82
%83 = OpFunctionParameter %9
%84 = OpFunctionParameter %5
%86 = OpLabel
%87 = OpCompositeExtract %5 %83 0
%88 = OpCompositeExtract %5 %83 1
%89 = OpShiftRightLogical %5 %87 %51
%90 = OpBitwiseAnd %5 %88 %93
%91 = OpCompositeConstruct %9 %89 %90
%92 = OpCompositeConstruct %9 %84 %84
%96 = OpVectorShuffle %9 %91 %91 1 0
%97 = OpShiftRightLogical %9 %91 %95
%98 = OpBitwiseXor %9 %97 %96
%99 = OpIMul %9 %98 %92
%100 = OpVectorShuffle %9 %99 %99 1 0
%101 = OpShiftRightLogical %9 %99 %95
%102 = OpBitwiseXor %9 %101 %100
%103 = OpIMul %9 %102 %92
%104 = OpVectorShuffle %9 %103 %103 1 0
%105 = OpShiftRightLogical %9 %103 %95
%106 = OpBitwiseXor %9 %105 %104
%107 = OpIMul %9 %106 %92
%108 = OpVectorShuffle %9 %107 %107 1 0
%109 = OpShiftRightLogical %9 %107 %95
%110 = OpBitwiseXor %9 %109 %108
%111 = OpIMul %9 %110 %92
%112 = OpVectorShuffle %9 %111 %111 1 0
%113 = OpShiftRightLogical %9 %111 %95
%114 = OpBitwiseXor %9 %113 %112
%115 = OpIMul %9 %114 %92
%116 = OpVectorShuffle %9 %115 %115 1 0
%117 = OpShiftRightLogical %9 %115 %95
%118 = OpBitwiseXor %9 %117 %116
%119 = OpIMul %9 %118 %92
%120 = OpCompositeExtract %5 %119 0
OpReturnValue %120
OpFunctionEnd
%139 = OpFunction %123 None %132
%133 = OpFunctionParameter %9
%134 = OpFunctionParameter %5
%135 = OpFunctionParameter %5
%136 = OpFunctionParameter %5
%137 = OpFunctionParameter %5
%138 = OpFunctionParameter %123
%140 = OpLabel
OpSelectionMerge %142 None
OpBranchConditional %138 %142 %141
%141 = OpLabel
OpReturnValue %143
%142 = OpLabel
%145 = OpCompositeExtract %5 %133 0
%146 = OpCompositeExtract %5 %133 1
%148 = OpIAddCarry %147 %145 %134
%149 = OpCompositeExtract %5 %148 0
%150 = OpCompositeExtract %5 %148 1
%151 = OpIAdd %5 %146 %150
%152 = OpCompositeConstruct %9 %149 %151
%153 = OpCompositeExtract %5 %152 0
%154 = OpBitFieldUExtract %5 %158 %42 %135
%155 = OpBitwiseAnd %5 %153 %159
%156 = OpShiftLeftLogical %5 %154 %155
%157 = OpBitwiseAnd %5 %156 %93
%160 = OpBitwiseAnd %5 %153 %168
%161 = OpIAdd %5 %160 %135
%162 = OpIAdd %5 %161 %168
%163 = OpShiftRightLogical %5 %162 %169
%164 = OpBitFieldUExtract %5 %153 %169 %169
%165 = OpBitFieldUExtract %5 %158 %42 %163
%166 = OpShiftLeftLogical %5 %165 %164
%167 = OpBitwiseAnd %5 %166 %159
%170 = OpArrayLength %5 %127 0
%171 = OpExtInst %5 %173 FindUMsb %170
%172 = OpBitFieldUExtract %5 %158 %42 %171
%174 = OpArrayLength %5 %127 0
%175 = OpExtInst %5 %173 FindUMsb %174
%176 = OpBitFieldUExtract %5 %174 %42 %175
%177 = OpISub %5 %176 %10
%178 = OpFunctionCall %5 %85 %152 %179
%180 = OpBitwiseAnd %5 %178 %172
%181 = OpIAdd %5 %180 %177
%182 = OpFunctionCall %5 %85 %152 %183
%184 = OpBitwiseAnd %5 %182 %172
%185 = OpIAdd %5 %184 %177
%186 = OpFunctionCall %5 %85 %152 %187
%188 = OpBitwiseAnd %5 %186 %172
%189 = OpIAdd %5 %188 %177
%190 = OpFunctionCall %5 %85 %152 %44
%191 = OpBitwiseAnd %5 %190 %172
%192 = OpIAdd %5 %191 %177
%193 = OpFunctionCall %5 %85 %152 %194
%195 = OpBitwiseAnd %5 %193 %172
%196 = OpIAdd %5 %195 %177
%197 = OpFunctionCall %5 %85 %152 %198
%199 = OpBitwiseAnd %5 %197 %172
%200 = OpIAdd %5 %199 %177
%201 = OpFunctionCall %5 %85 %152 %202
%203 = OpBitwiseAnd %5 %201 %172
%204 = OpIAdd %5 %203 %177
%205 = OpFunctionCall %5 %85 %152 %206
%207 = OpBitwiseAnd %5 %205 %172
%208 = OpIAdd %5 %207 %177
%209 = OpFunctionCall %5 %85 %152 %210
%211 = OpBitwiseAnd %5 %209 %172
%212 = OpIAdd %5 %211 %177
%213 = OpFunctionCall %5 %85 %152 %214
%215 = OpBitwiseAnd %5 %213 %172
%216 = OpIAdd %5 %215 %177
%217 = OpFunctionCall %5 %85 %152 %218
%219 = OpBitwiseAnd %5 %217 %172
%220 = OpIAdd %5 %219 %177
%221 = OpFunctionCall %5 %85 %152 %222
%223 = OpBitwiseAnd %5 %221 %172
%224 = OpIAdd %5 %223 %177
%225 = OpFunctionCall %5 %85 %152 %226
%227 = OpBitwiseAnd %5 %225 %172
%228 = OpIAdd %5 %227 %177
%229 = OpFunctionCall %5 %85 %152 %230
%231 = OpBitwiseAnd %5 %229 %172
%232 = OpIAdd %5 %231 %177
%233 = OpFunctionCall %5 %85 %152 %234
%235 = OpBitwiseAnd %5 %233 %172
%236 = OpIAdd %5 %235 %177
%237 = OpFunctionCall %5 %85 %152 %238
%239 = OpBitwiseAnd %5 %237 %172
%240 = OpIAdd %5 %239 %177
%246 = OpVectorExtractDynamic %122 %245 %136
%247 = OpShiftLeftLogical %5 %157 %58
%248 = OpShiftLeftLogical %5 %167 %51
%249 = OpBitwiseOr %5 %157 %247
%250 = OpBitwiseOr %5 %167 %248
%251 = OpCompositeConstruct %9 %249 %250
%252 = OpBitcast %122 %251
%253 = OpBitwiseAnd %122 %246 %252
%255 = OpInBoundsAccessChain %254 %127 %42 %181
%256 = OpAtomicOr %122 %255 %10 %42 %253
%257 = OpInBoundsAccessChain %254 %127 %42 %185
%258 = OpAtomicOr %122 %257 %10 %42 %253
%259 = OpBitwiseAnd %122 %256 %258
%260 = OpInBoundsAccessChain %254 %127 %42 %189
%261 = OpAtomicOr %122 %260 %10 %42 %253
%262 = OpBitwiseAnd %122 %259 %261
%263 = OpInBoundsAccessChain %254 %127 %42 %192
%264 = OpAtomicOr %122 %263 %10 %42 %253
%265 = OpBitwiseAnd %122 %262 %264
%266 = OpInBoundsAccessChain %254 %127 %42 %196
%267 = OpAtomicOr %122 %266 %10 %42 %253
%268 = OpBitwiseAnd %122 %265 %267
%269 = OpInBoundsAccessChain %254 %127 %42 %200
%270 = OpAtomicOr %122 %269 %10 %42 %253
%271 = OpBitwiseAnd %122 %268 %270
%272 = OpInBoundsAccessChain %254 %127 %42 %204
%273 = OpAtomicOr %122 %272 %10 %42 %253
%274 = OpBitwiseAnd %122 %271 %273
%275 = OpInBoundsAccessChain %254 %127 %42 %208
%276 = OpAtomicOr %122 %275 %10 %42 %253
%277 = OpBitwiseAnd %122 %274 %276
%278 = OpInBoundsAccessChain %254 %127 %42 %212
%279 = OpAtomicOr %122 %278 %10 %42 %253
%280 = OpBitwiseAnd %122 %277 %279
%281 = OpInBoundsAccessChain %254 %127 %42 %216
%282 = OpAtomicOr %122 %281 %10 %42 %253
%283 = OpBitwiseAnd %122 %280 %282
%284 = OpInBoundsAccessChain %254 %127 %42 %220
%285 = OpAtomicOr %122 %284 %10 %42 %253
%286 = OpBitwiseAnd %122 %283 %285
%287 = OpInBoundsAccessChain %254 %127 %42 %224
%288 = OpAtomicOr %122 %287 %10 %42 %253
%289 = OpBitwiseAnd %122 %286 %288
%290 = OpInBoundsAccessChain %254 %127 %42 %228
%291 = OpAtomicOr %122 %290 %10 %42 %253
%292 = OpBitwiseAnd %122 %289 %291
%293 = OpInBoundsAccessChain %254 %127 %42 %232
%294 = OpAtomicOr %122 %293 %10 %42 %253
%295 = OpBitwiseAnd %122 %292 %294
%296 = OpInBoundsAccessChain %254 %127 %42 %236
%297 = OpAtomicOr %122 %296 %10 %42 %253
%298 = OpBitwiseAnd %122 %295 %297
%299 = OpInBoundsAccessChain %254 %127 %42 %240
%300 = OpAtomicOr %122 %299 %10 %42 %253
%301 = OpBitwiseAnd %122 %298 %300
%302 = OpBitcast %9 %301
%303 = OpCompositeExtract %5 %302 0
%304 = OpCompositeExtract %5 %302 1
%305 = OpShiftRightLogical %5 %303 %58
%306 = OpBitwiseAnd %5 %305 %157
%307 = OpIEqual %123 %306 %42
%308 = OpBitFieldUExtract %5 %137 %310 %311
%312 = OpShiftLeftLogical %5 %313 %308
%314 = OpBitFieldUExtract %5 %137 %311 %311
%315 = OpShiftLeftLogical %5 %80 %314
%316 = OpBitwiseOr %5 %312 %315
%317 = OpBitFieldUExtract %5 %137 %318 %311
%319 = OpShiftLeftLogical %5 %320 %317
%321 = OpBitwiseOr %5 %316 %319
%322 = OpSelect %5 %307 %321 %42
%323 = OpInBoundsAccessChain %40 %131 %42 %181 %10
%324 = OpAtomicOr %5 %323 %10 %42 %322
%325 = OpBitwiseAnd %5 %324 %321
%326 = OpIEqual %123 %325 %321
%327 = OpBitFieldUExtract %5 %137 %328 %311
%329 = OpShiftLeftLogical %5 %313 %327
%330 = OpBitFieldUExtract %5 %137 %331 %311
%332 = OpShiftLeftLogical %5 %80 %330
%333 = OpBitwiseOr %5 %329 %332
%334 = OpBitFieldUExtract %5 %137 %335 %311
%336 = OpShiftLeftLogical %5 %320 %334
%337 = OpBitwiseOr %5 %333 %336
%338 = OpSelect %5 %307 %337 %42
%339 = OpInBoundsAccessChain %40 %131 %42 %185 %10
%340 = OpAtomicOr %5 %339 %10 %42 %338
%341 = OpBitwiseAnd %5 %340 %337
%342 = OpIEqual %123 %341 %337
%343 = OpLogicalAnd %123 %326 %342
%344 = OpBitFieldUExtract %5 %137 %345 %311
%346 = OpShiftLeftLogical %5 %313 %344
%347 = OpBitFieldUExtract %5 %137 %348 %311
%349 = OpShiftLeftLogical %5 %80 %347
%350 = OpBitwiseOr %5 %346 %349
%351 = OpBitFieldUExtract %5 %137 %352 %311
%353 = OpShiftLeftLogical %5 %320 %351
%354 = OpBitwiseOr %5 %350 %353
%355 = OpSelect %5 %307 %354 %42
%356 = OpInBoundsAccessChain %40 %131 %42 %189 %10
%357 = OpAtomicOr %5 %356 %10 %42 %355
%358 = OpBitwiseAnd %5 %357 %354
%359 = OpIEqual %123 %358 %354
%360 = OpLogicalAnd %123 %343 %359
%361 = OpBitFieldUExtract %5 %137 %362 %311
%363 = OpShiftLeftLogical %5 %313 %361
%364 = OpBitFieldUExtract %5 %137 %365 %311
%366 = OpShiftLeftLogical %5 %80 %364
%367 = OpBitwiseOr %5 %363 %366
%368 = OpBitFieldUExtract %5 %137 %369 %311
%370 = OpShiftLeftLogical %5 %320 %368
%371 = OpBitwiseOr %5 %367 %370
%372 = OpSelect %5 %307 %371 %42
%373 = OpInBoundsAccessChain %40 %131 %42 %192 %10
%374 = OpAtomicOr %5 %373 %10 %42 %372
%375 = OpBitwiseAnd %5 %374 %371
%376 = OpIEqual %123 %375 %371
%377 = OpLogicalAnd %123 %360 %376
%378 = OpBitFieldUExtract %5 %137 %379 %311
%380 = OpShiftLeftLogical %5 %313 %378
%381 = OpBitFieldUExtract %5 %137 %382 %311
%383 = OpShiftLeftLogical %5 %80 %381
%384 = OpBitwiseOr %5 %380 %383
%385 = OpBitFieldUExtract %5 %137 %386 %311
%387 = OpShiftLeftLogical %5 %320 %385
%388 = OpBitwiseOr %5 %384 %387
%389 = OpSelect %5 %307 %388 %42
%390 = OpInBoundsAccessChain %40 %131 %42 %196 %10
%391 = OpAtomicOr %5 %390 %10 %42 %389
%392 = OpBitwiseAnd %5 %391 %388
%393 = OpIEqual %123 %392 %388
%394 = OpLogicalAnd %123 %377 %393
%395 = OpBitFieldUExtract %5 %137 %328 %311
%396 = OpShiftLeftLogical %5 %313 %395
%397 = OpBitFieldUExtract %5 %137 %331 %311
%398 = OpShiftLeftLogical %5 %80 %397
%399 = OpBitwiseOr %5 %396 %398
%400 = OpBitFieldUExtract %5 %137 %335 %311
%401 = OpShiftLeftLogical %5 %320 %400
%402 = OpBitwiseOr %5 %399 %401
%403 = OpSelect %5 %307 %402 %42
%404 = OpInBoundsAccessChain %40 %131 %42 %200 %10
%405 = OpAtomicOr %5 %404 %10 %42 %403
%406 = OpBitwiseAnd %5 %405 %402
%407 = OpIEqual %123 %406 %402
%408 = OpLogicalAnd %123 %394 %407
%409 = OpBitFieldUExtract %5 %137 %410 %311
%411 = OpShiftLeftLogical %5 %313 %409
%412 = OpBitFieldUExtract %5 %137 %413 %311
%414 = OpShiftLeftLogical %5 %80 %412
%415 = OpBitwiseOr %5 %411 %414
%416 = OpBitFieldUExtract %5 %137 %362 %311
%417 = OpShiftLeftLogical %5 %320 %416
%418 = OpBitwiseOr %5 %415 %417
%419 = OpSelect %5 %307 %418 %42
%420 = OpInBoundsAccessChain %40 %131 %42 %204 %10
%421 = OpAtomicOr %5 %420 %10 %42 %419
%422 = OpBitwiseAnd %5 %421 %418
%423 = OpIEqual %123 %422 %418
%424 = OpLogicalAnd %123 %408 %423
%425 = OpBitFieldUExtract %5 %137 %426 %311
%427 = OpShiftLeftLogical %5 %313 %425
%428 = OpBitFieldUExtract %5 %137 %429 %311
%430 = OpShiftLeftLogical %5 %80 %428
%431 = OpBitwiseOr %5 %427 %430
%432 = OpBitFieldUExtract %5 %137 %433 %311
%434 = OpShiftLeftLogical %5 %320 %432
%435 = OpBitwiseOr %5 %431 %434
%436 = OpSelect %5 %307 %435 %42
%437 = OpInBoundsAccessChain %40 %131 %42 %208 %10
%438 = OpAtomicOr %5 %437 %10 %42 %436
%439 = OpBitwiseAnd %5 %438 %435
%440 = OpIEqual %123 %439 %435
%441 = OpLogicalAnd %123 %424 %440
OpSelectionMerge %446 None
OpSwitch %136 %445 0 %442 1 %443 2 %444
%445 = OpLabel
%454 = OpShiftLeftLogical %5 %167 %51
%455 = OpBitwiseAnd %5 %304 %454
%456 = OpINotEqual %123 %455 %42
OpBranch %446
%442 = OpLabel
%447 = OpBitwiseAnd %5 %303 %157
%448 = OpINotEqual %123 %447 %42
OpBranch %446
%443 = OpLabel
%449 = OpShiftLeftLogical %5 %157 %58
%450 = OpBitwiseAnd %5 %303 %449
%451 = OpINotEqual %123 %450 %42
OpBranch %446
%444 = OpLabel
%452 = OpBitwiseAnd %5 %304 %167
%453 = OpINotEqual %123 %452 %42
OpBranch %446
%446 = OpLabel
%457 = OpPhi %123 %448 %442 %451 %443 %453 %444 %456 %445
%458 = OpLogicalNot %123 %457
%459 = OpLogicalOr %123 %458 %441
OpReturnValue %459
OpFunctionEnd
%503 = OpFunction %1 None %500
%501 = OpFunctionParameter %123
%502 = OpFunctionParameter %5
%504 = OpLabel
%507 = OpLogicalNot %123 %501
%508 = OpLoad %123 %491
%509 = OpLogicalAnd %123 %507 %508
OpSelectionMerge %505 None
OpBranchConditional %509 %506 %505
%506 = OpLabel
%512 = OpIMul %5 %502 %514
%513 = OpBitwiseXor %5 %512 %515
%516 = OpArrayLength %5 %495 0
%517 = OpISub %5 %516 %169
%518 = OpAccessChain %40 %495 %42 %517
%519 = OpLoad %5 %518
%520 = OpIMul %5 %519 %521
%522 = OpBitwiseXor %5 %513 %520
%523 = OpArrayLength %5 %499 0
%524 = OpISub %5 %523 %10
%525 = OpBitwiseAnd %5 %522 %524
%526 = OpShiftRightLogical %5 %525 %51
%527 = OpBitwiseAnd %5 %525 %159
%528 = OpShiftLeftLogical %5 %10 %527
%529 = OpAccessChain %40 %495 %42 %526
%530 = OpAtomicOr %5 %529 %10 %42 %528
%531 = OpBitwiseAnd %5 %530 %528
%532 = OpIEqual %123 %531 %42
OpStore %491 %533
OpSelectionMerge %511 None
OpBranchConditional %532 %510 %511
%510 = OpLabel
%534 = OpCompositeConstruct %477 %515 %42 %502 %519
%535 = OpShiftLeftLogical %5 %528 %58
%537 = OpAccessChain %536 %499 %42 %525
OpStore %537 %534
OpMemoryBarrier %10 %539
%538 = OpAtomicOr %5 %529 %10 %42 %535
OpMemoryBarrier %10 %539
OpBranch %511
%511 = OpLabel
OpBranch %505
%505 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/cbv.root-descriptor.bda-instrumentation.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloatArray;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

float _465;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray
{
    float value[];
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _31 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _31;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _60 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _61 = uvec2(prime);
    uvec2 _68 = ((_60 >> uvec2(8u)) ^ _60.yx) * _61;
    uvec2 _72 = ((_68 >> uvec2(8u)) ^ _68.yx) * _61;
    uvec2 _76 = ((_72 >> uvec2(8u)) ^ _72.yx) * _61;
    uvec2 _80 = ((_76 >> uvec2(8u)) ^ _76.yx) * _61;
    uvec2 _84 = ((_80 >> uvec2(8u)) ^ _80.yx) * _61;
    return (((_84 >> uvec2(8u)) ^ _84.yx) * _61).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _117;
    _117._m0 = uaddCarry(BDA.x, offset, _117._m1);
    uvec2 addr = uvec2(_117._m0, BDA.y + _117._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _495 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _497 = InstrumentationControlData.atomics[_495];
        uint _503 = (((inst * 97u) ^ 43981u) ^ (_497 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _504 = _503 >> 4u;
        uint _506 = 1u << (_503 & 15u);
        uint _508 = atomicOr(InstrumentationControlData.atomics[_504], _506);
        ShouldReportInstrumentation = false;
        if ((_508 & _506) == 0u)
        {
            InstrumentationData.data[_503] = uvec4(43981u, 0u, inst, _497);
            memoryBarrierBuffer();
            uint _516 = atomicOr(InstrumentationControlData.atomics[_504], _506 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _34 = AllocateInvocationID();
    InvocationID = _34;
    uint _47 = gl_GlobalInvocationID.x >> 4u;
    uint _431 = InvocationID;
    bool _430 = ValidateBDALoadStore(registers._m0, _47 * 16u, 16u, 0u, _431, true);
    AssumeTrue(_430, 1u);
    PhysicalPointerFloat4NonWriteCBVArray _438 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    uint _453 = InvocationID;
    bool _452 = ValidateBDALoadStore(registers._m2, (gl_GlobalInvocationID.x * 4u) + 0u, 4u, 0u, _453, true);
    AssumeTrue(_452, 2u);
    uint _462 = InvocationID;
    bool _461 = ValidateBDALoadStore(registers._m2, (gl_GlobalInvocationID.x * 4u) + 0u, 4u, 1u, _462, true);
    AssumeTrue(_461, 3u);
    PhysicalPointerFloatArray(registers._m2).value[gl_GlobalInvocationID.x] = ((_438.value[_47].x + 40.0) + _438.value[_47].z) + PhysicalPointerFloatArray(registers._m2).value[gl_GlobalInvocationID.x];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 523
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%141 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %43
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "BloomBufferInvocationSSBO"
OpMemberName %20 0 "atomics"
OpName %22 "BloomBufferInvocation"
OpName %24 "AllocateInvocationID"
OpName %54 "AddrHash"
OpName %52 "addr"
OpName %53 "prime"
OpName %94 "BloomBufferSSBO"
OpMemberName %94 0 "atomics"
OpName %96 "BloomBuffer"
OpName %98 "BloomBuffer32SSBO"
OpMemberName %98 0 "atomics"
OpName %100 "BloomBuffer32"
OpName %108 "ValidateBDALoadStore"
OpName %102 "BDA"
OpName %103 "offset"
OpName %104 "len"
OpName %105 "type"
OpName %106 "invocation_id"
OpName %107 "in_bounds"
OpName %116 "IAddCarryResult"
OpName %121 "addr"
OpName %122 "addr_lo"
OpName %126 "byte_mask"
OpName %136 "word_mask"
OpName %140 "hash_mask"
OpName %145 "hash_offset"
OpName %149 "bloom_index"
OpName %153 "bloom_index"
OpName %157 "bloom_index"
OpName %160 "bloom_index"
OpName %164 "bloom_index"
OpName %168 "bloom_index"
OpName %172 "bloom_index"
OpName %176 "bloom_index"
OpName %180 "bloom_index"
OpName %184 "bloom_index"
OpName %188 "bloom_index"
OpName %192 "bloom_index"
OpName %196 "bloom_index"
OpName %200 "bloom_index"
OpName %204 "bloom_index"
OpName %208 "bloom_index"
OpName %221 "invalidation_mask"
OpName %224 "prev_hazard_partial"
OpName %226 "prev_hazard_partial"
OpName %229 "prev_hazard_partial"
OpName %232 "prev_hazard_partial"
OpName %235 "prev_hazard_partial"
OpName %238 "prev_hazard_partial"
OpName %241 "prev_hazard_partial"
OpName %244 "prev_hazard_partial"
OpName %247 "prev_hazard_partial"
OpName %250 "prev_hazard_partial"
OpName %253 "prev_hazard_partial"
OpName %256 "prev_hazard_partial"
OpName %259 "prev_hazard_partial"
OpName %262 "prev_hazard_partial"
OpName %265 "prev_hazard_partial"
OpName %268 "prev_hazard_partial"
OpName %270 "prev_hazard"
OpName %271 "prev_hazard_lo"
OpName %272 "prev_hazard_hi"
OpName %275 "has_exclusive_access"
OpName %290 "lock_mask"
OpName %293 "prev_lock"
OpName %306 "lock_mask"
OpName %309 "prev_lock"
OpName %323 "lock_mask"
OpName %326 "prev_lock"
OpName %340 "lock_mask"
OpName %343 "prev_lock"
OpName %357 "lock_mask"
OpName %360 "prev_lock"
OpName %371 "lock_mask"
OpName %374 "prev_lock"
OpName %387 "lock_mask"
OpName %390 "prev_lock"
OpName %404 "lock_mask"
OpName %407 "prev_lock"
OpName %410 "has_complete_self_lock"
OpName %426 "hazard"
OpName %436 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %436 0 "value"
OpName %448 "PhysicalPointerFloatArray"
OpMemberName %448 0 "value"
OpName %468 "ShouldReportInstrumentation"
OpName %471 "InstrumentationControlDataSSBO"
OpMemberName %471 0 "atomics"
OpName %473 "InstrumentationControlData"
OpName %475 "InstrumentationDataSSBO"
OpMemberName %475 0 "data"
OpName %477 "InstrumentationData"
OpName %481 "AssumeTrue"
OpName %479 "value"
OpName %480 "inst"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 2
OpDecorate %43 BuiltIn GlobalInvocationId
OpDecorate %93 ArrayStride 8
OpMemberDecorate %94 0 Offset 0
OpDecorate %94 Block
OpDecorate %96 DescriptorSet 0
OpDecorate %96 Binding 2
OpDecorate %97 ArrayStride 8
OpMemberDecorate %98 0 Offset 0
OpDecorate %98 Block
OpDecorate %100 DescriptorSet 0
OpDecorate %100 Binding 2
OpDecorate %435 ArrayStride 16
OpMemberDecorate %436 0 Offset 0
OpDecorate %436 Block
OpMemberDecorate %436 0 NonWritable
OpDecorate %447 ArrayStride 4
OpMemberDecorate %448 0 Offset 0
OpDecorate %448 Block
OpDecorate %470 ArrayStride 4
OpMemberDecorate %471 0 Offset 0
OpDecorate %471 Block
OpDecorate %473 DescriptorSet 0
OpDecorate %473 Binding 2
OpDecorate %474 ArrayStride 16
OpMemberDecorate %475 0 Offset 0
OpDecorate %475 Block
OpDecorate %477 DescriptorSet 0
OpDecorate %477 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpConstant %5 1
%11 = OpTypeArray %6 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeFunction %5
%28 = OpTypePointer StorageBuffer %5
%30 = OpConstant %5 0
%32 = OpConstant %5 1103633207
%35 = OpTypePointer PushConstant %6
%37 = OpConstant %5 2
%41 = OpTypeVector %5 3
%42 = OpTypePointer Input %41
%43 = OpVariable %42 Input
%44 = OpTypePointer Input %5
%48 = OpConstant %5 4
%49 = OpConstant %5 16
%51 = OpTypeFunction %5 %6 %5
%62 = OpConstant %5 65535
%63 = OpConstant %5 8
%64 = OpConstantComposite %6 %63 %63
%91 = OpTypeInt 64 0
%92 = OpTypeBool
%93 = OpTypeRuntimeArray %91
%94 = OpTypeStruct %93
%95 = OpTypePointer StorageBuffer %94
%96 = OpVariable %95 StorageBuffer
%97 = OpTypeRuntimeArray %6
%98 = OpTypeStruct %97
%99 = OpTypePointer StorageBuffer %98
%100 = OpVariable %99 StorageBuffer
%101 = OpTypeFunction %92 %6 %5 %5 %5 %5 %92
%112 = OpConstantTrue %92
%116 = OpTypeStruct %5 %5
%127 = OpConstant %5 4294967295
%128 = OpConstant %5 15
%137 = OpConstant %5 3
%147 = OpConstant %5 1103515245
%151 = OpConstant %5 1103518333
%155 = OpConstant %5 1103539331
%162 = OpConstant %5 10006121
%166 = OpConstant %5 4004951
%170 = OpConstant %5 5005159
%174 = OpConstant %5 6004811
%178 = OpConstant %5 383
%182 = OpConstant %5 821
%186 = OpConstant %5 661
%190 = OpConstant %5 1091
%194 = OpConstant %5 1117
%198 = OpConstant %5 3947
%202 = OpConstant %5 4253
%206 = OpConstant %5 7691
%209 = OpTypeVector %91 4
%210 = OpConstant %91 68719411200
%211 = OpConstant %91 1099511627775
%212 = OpConstant %91 1035087118335
%213 = OpConstantComposite %209 %210 %211 %212 %210
%222 = OpTypePointer StorageBuffer %91
%277 = OpTypeInt 32 1
%278 = OpConstant %277 0
%279 = OpConstant %277 3
%281 = OpConstant %5 256
%284 = OpConstant %5 65536
%287 = OpConstant %277 6
%289 = OpConstant %5 16777216
%297 = OpConstant %277 9
%300 = OpConstant %277 12
%304 = OpConstant %277 15
%314 = OpConstant %277 18
%317 = OpConstant %277 21
%321 = OpConstant %277 24
%331 = OpConstant %277 23
%334 = OpConstant %277 26
%338 = OpConstant %277 29
%348 = OpConstant %277 1
%351 = OpConstant %277 4
%355 = OpConstant %277 7
%379 = OpConstant %277 17
%382 = OpConstant %277 20
%395 = OpConstant %277 22
%398 = OpConstant %277 25
%402 = OpConstant %277 28
%432 = OpTypeFloat 32
%433 = OpTypeVector %432 4
%434 = OpConstant %5 4096
%435 = OpTypeArray %433 %434
%436 = OpTypeStruct %435
%437 = OpTypePointer PhysicalStorageBuffer %436
%439 = OpTypePointer PhysicalStorageBuffer %433
%445 = OpConstant %432 40
%447 = OpTypeRuntimeArray %432
%448 = OpTypeStruct %447
%449 = OpTypePointer PhysicalStorageBuffer %448
%455 = OpTypePointer PhysicalStorageBuffer %432
%467 = OpTypePointer Private %92
%468 = OpVariable %467 Private %112
%469 = OpTypeVector %5 4
%470 = OpTypeRuntimeArray %5
%471 = OpTypeStruct %470
%472 = OpTypePointer StorageBuffer %471
%473 = OpVariable %472 StorageBuffer
%474 = OpTypeRuntimeArray %469
%475 = OpTypeStruct %474
%476 = OpTypePointer StorageBuffer %475
%477 = OpVariable %476 StorageBuffer
%478 = OpTypeFunction %1 %92 %5
%492 = OpConstant %5 97
%493 = OpConstant %5 43981
%499 = OpConstant %5 51329
%511 = OpConstantFalse %92
%514 = OpTypePointer StorageBuffer %469
%517 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
%465 = OpUndef %432
OpBranch %466
%466 = OpLabel
%34 = OpFunctionCall %5 %24
OpStore %18 %34
%36 = OpAccessChain %35 %9 %37
%38 = OpLoad %6 %36
%39 = OpAccessChain %35 %9 %30
%40 = OpLoad %6 %39
%45 = OpAccessChain %44 %43 %30
%46 = OpLoad %5 %45
%47 = OpShiftRightLogical %5 %46 %48
%50 = OpIMul %5 %47 %49
%431 = OpLoad %5 %18
%430 = OpFunctionCall %92 %108 %40 %50 %49 %30 %431 %112
%519 = OpFunctionCall %1 %481 %430 %10
%438 = OpBitcast %437 %40
%440 = OpInBoundsAccessChain %439 %438 %30 %47
%441 = OpLoad %433 %440 Aligned 16
%442 = OpCompositeExtract %432 %441 0
%443 = OpCompositeExtract %432 %441 2
%444 = OpFAdd %432 %442 %445
%446 = OpFAdd %432 %444 %443
%450 = OpIMul %5 %46 %48
%451 = OpIAdd %5 %450 %30
%453 = OpLoad %5 %18
%452 = OpFunctionCall %92 %108 %38 %451 %48 %30 %453 %112
%520 = OpFunctionCall %1 %481 %452 %37
%454 = OpBitcast %449 %38
%456 = OpInBoundsAccessChain %455 %454 %30 %46
%457 = OpLoad %432 %456 Aligned 4
%458 = OpFAdd %432 %446 %457
%459 = OpIMul %5 %46 %48
%460 = OpIAdd %5 %459 %30
%462 = OpLoad %5 %18
%461 = OpFunctionCall %92 %108 %38 %460 %48 %10 %462 %112
%521 = OpFunctionCall %1 %481 %461 %137
%463 = OpBitcast %449 %38
%464 = OpInBoundsAccessChain %455 %463 %30 %46
OpStore %464 %458 Aligned 4
OpReturn
OpFunctionEnd
%24 = OpFunction %5 None %23
%25 = OpLabel
%26 = OpArrayLength %5 %22 0
%27 = OpISub %5 %26 %10
%29 = OpAccessChain %28 %22 %30 %27
%31 = OpAtomicIAdd %5 %29 %10 %30 %32
OpReturnValue %31
OpFunctionEnd
%54 = OpFunction %5 None %51
%52 = OpFunctionParameter %6
%53 = OpFunctionParameter %5
%55 = OpLabel
%56 = OpCompositeExtract %5 %52 0
%57 = OpCompositeExtract %5 %52 1
%58 = OpShiftRightLogical %5 %56 %48
%59 = OpBitwiseAnd %5 %57 %62
%60 = OpCompositeConstruct %6 %58 %59
%61 = OpCompositeConstruct %6 %53 %53
%65 = OpVectorShuffle %6 %60 %60 1 0
%66 = OpShiftRightLogical %6 %60 %64
%67 = OpBitwiseXor %6 %66 %65
%68 = OpIMul %6 %67 %61
%69 = OpVectorShuffle %6 %68 %68 1 0
%70 = OpShiftRightLogical %6 %68 %64
%71 = OpBitwiseXor %6 %70 %69
%72 = OpIMul %6 %71 %61
%73 = OpVectorShuffle %6 %72 %72 1 0
%74 = OpShiftRightLogical %6 %72 %64
%75 = OpBitwiseXor %6 %74 %73
%76 = OpIMul %6 %75 %61
%77 = OpVectorShuffle %6 %76 %76 1 0
%78 = OpShiftRightLogical %6 %76 %64
%79 = OpBitwiseXor %6 %78 %77
%80 = OpIMul %6 %79 %61
%81 = OpVectorShuffle %6 %80 %80 1 0
%82 = OpShiftRightLogical %6 %80 %64
%83 = OpBitwiseXor %6 %82 %81
%84 = OpIMul %6 %83 %61
%85 = OpVectorShuffle %6 %84 %84 1 0
%86 = OpShiftRightLogical %6 %84 %64
%87 = OpBitwiseXor %6 %86 %85
%88 = OpIMul %6 %87 %61
%89 = OpCompositeExtract %5 %88 0
OpReturnValue %89
OpFunctionEnd
%108 = OpFunction %92 None %101
%102 = OpFunctionParameter %6
%103 = OpFunctionParameter %5
%104 = OpFunctionParameter %5
%105 = OpFunctionParameter %5
%106 = OpFunctionParameter %5
%107 = OpFunctionParameter %92
%109 = OpLabel
OpSelectionMerge %111 None
OpBranchConditional %107 %111 %110
%110 = OpLabel
OpReturnValue %112
%111 = OpLabel
%114 = OpCompositeExtract %5 %102 0
%115 = OpCompositeExtract %5 %102 1
%117 = OpIAddCarry %116 %114 %103
%118 = OpCompositeExtract %5 %117 0
%119 = OpCompositeExtract %5 %117 1
%120 = OpIAdd %5 %115 %119
%121 = OpCompositeConstruct %6 %118 %120
%122 = OpCompositeExtract %5 %121 0
%123 = OpBitFieldUExtract %5 %127 %30 %104
%124 = OpBitwiseAnd %5 %122 %128
%125 = OpShiftLeftLogical %5 %123 %124
%126 = OpBitwiseAnd %5 %125 %62
%129 = OpBitwiseAnd %5 %122 %137
%130 = OpIAdd %5 %129 %104
%131 = OpIAdd %5 %130 %137
%132 = OpShiftRightLogical %5 %131 %37
%133 = OpBitFieldUExtract %5 %122 %37 %37
%134 = OpBitFieldUExtract %5 %127 %30 %132
%135 = OpShiftLeftLogical %5 %134 %133
%136 = OpBitwiseAnd %5 %135 %128
%138 = OpArrayLength %5 %96 0
%139 = OpExtInst %5 %141 FindUMsb %138
%140 = OpBitFieldUExtract %5 %127 %30 %139
%142 = OpArrayLength %5 %96 0
%143 = OpExtInst %5 %141 FindUMsb %142
%144 = OpBitFieldUExtract %5 %142 %30 %143
%145 = OpISub %5 %144 %10
%146 = OpFunctionCall %5 %54 %121 %147
%148 = OpBitwiseAnd %5 %146 %140
%149 = OpIAdd %5 %148 %145
%150 = OpFunctionCall %5 %54 %121 %151
%152 = OpBitwiseAnd %5 %150 %140
%153 = OpIAdd %5 %152 %145
%154 = OpFunctionCall %5 %54 %121 %155
%156 = OpBitwiseAnd %5 %154 %140
%157 = OpIAdd %5 %156 %145
%158 = OpFunctionCall %5 %54 %121 %32
%159 = OpBitwiseAnd %5 %158 %140
%160 = OpIAdd %5 %159 %145
%161 = OpFunctionCall %5 %54 %121 %162
%163 = OpBitwiseAnd %5 %161 %140
%164 = OpIAdd %5 %163 %145
%165 = OpFunctionCall %5 %54 %121 %166
%167 = OpBitwiseAnd %5 %165 %140
%168 = OpIAdd %5 %167 %145
%169 = OpFunctionCall %5 %54 %121 %170
%171 = OpBitwiseAnd %5 %169 %140
%172 = OpIAdd %5 %171 %145
%173 = OpFunctionCall %5 %54 %121 %174
%175 = OpBitwiseAnd %5 %173 %140
%176 = OpIAdd %5 %175 %145
%177 = OpFunctionCall %5 %54 %121 %178
%179 = OpBitwiseAnd %5 %177 %140
%180 = OpIAdd %5 %179 %145
%181 = OpFunctionCall %5 %54 %121 %182
%183 = OpBitwiseAnd %5 %181 %140
%184 = OpIAdd %5 %183 %145
%185 = OpFunctionCall %5 %54 %121 %186
%187 = OpBitwiseAnd %5 %185 %140
%188 = OpIAdd %5 %187 %145
%189 = OpFunctionCall %5 %54 %121 %190
%191 = OpBitwiseAnd %5 %189 %140
%192 = OpIAdd %5 %191 %145
%193 = OpFunctionCall %5 %54 %121 %194
%195 = OpBitwiseAnd %5 %193 %140
%196 = OpIAdd %5 %195 %145
%197 = OpFunctionCall %5 %54 %121 %198
%199 = OpBitwiseAnd %5 %197 %140
%200 = OpIAdd %5 %199 %145
%201 = OpFunctionCall %5 %54 %121 %202
%203 = OpBitwiseAnd %5 %201 %140
%204 = OpIAdd %5 %203 %145
%205 = OpFunctionCall %5 %54 %121 %206
%207 = OpBitwiseAnd %5 %205 %140
%208 = OpIAdd %5 %207 %145
%214 = OpVectorExtractDynamic %91 %213 %105
%215 = OpShiftLeftLogical %5 %126 %49
%216 = OpShiftLeftLogical %5 %136 %48
%217 = OpBitwiseOr %5 %126 %215
%218 = OpBitwiseOr %5 %136 %216
%219 = OpCompositeConstruct %6 %217 %218
%220 = OpBitcast %91 %219
%221 = OpBitwiseAnd %91 %214 %220
%223 = OpInBoundsAccessChain %222 %96 %30 %149
%224 = OpAtomicOr %91 %223 %10 %30 %221
%225 = OpInBoundsAccessChain %222 %96 %30 %153
%226 = OpAtomicOr %91 %225 %10 %30 %221
%227 = OpBitwiseAnd %91 %224 %226
%228 = OpInBoundsAccessChain %222 %96 %30 %157
%229 = OpAtomicOr %91 %228 %10 %30 %221
%230 = OpBitwiseAnd %91 %227 %229
%231 = OpInBoundsAccessChain %222 %96 %30 %160
%232 = OpAtomicOr %91 %231 %10 %30 %221
%233 = OpBitwiseAnd %91 %230 %232
%234 = OpInBoundsAccessChain %222 %96 %30 %164
%235 = OpAtomicOr %91 %234 %10 %30 %221
%236 = OpBitwiseAnd %91 %233 %235
%237 = OpInBoundsAccessChain %222 %96 %30 %168
%238 = OpAtomicOr %91 %237 %10 %30 %221
%239 = OpBitwiseAnd %91 %236 %238
%240 = OpInBoundsAccessChain %222 %96 %30 %172
%241 = OpAtomicOr %91 %240 %10 %30 %221
%242 = OpBitwiseAnd %91 %239 %241
%243 = OpInBoundsAccessChain %222 %96 %30 %176
%244 = OpAtomicOr %91 %243 %10 %30 %221
%245 = OpBitwiseAnd %91 %242 %244
%246 = OpInBoundsAccessChain %222 %96 %30 %180
%247 = OpAtomicOr %91 %246 %10 %30 %221
%248 = OpBitwiseAnd %91 %245 %247
%249 = OpInBoundsAccessChain %222 %96 %30 %184
%250 = OpAtomicOr %91 %249 %10 %30 %221
%251 = OpBitwiseAnd %91 %248 %250
%252 = OpInBoundsAccessChain %222 %96 %30 %188
%253 = OpAtomicOr %91 %252 %10 %30 %221
%254 = OpBitwiseAnd %91 %251 %253
%255 = OpInBoundsAccessChain %222 %96 %30 %192
%256 = OpAtomicOr %91 %255 %10 %30 %221
%257 = OpBitwiseAnd %91 %254 %256
%258 = OpInBoundsAccessChain %222 %96 %30 %196
%259 = OpAtomicOr %91 %258 %10 %30 %221
%260 = OpBitwiseAnd %91 %257 %259
%261 = OpInBoundsAccessChain %222 %96 %30 %200
%262 = OpAtomicOr %91 %261 %10 %30 %221
%263 = OpBitwiseAnd %91 %260 %262
%264 = OpInBoundsAccessChain %222 %96 %30 %204
%265 = OpAtomicOr %91 %264 %10 %30 %221
%266 = OpBitwiseAnd %91 %263 %265
%267 = OpInBoundsAccessChain %222 %96 %30 %208
%268 = OpAtomicOr %91 %267 %10 %30 %221
%269 = OpBitwiseAnd %91 %266 %268
%270 = OpBitcast %6 %269
%271 = OpCompositeExtract %5 %270 0
%272 = OpCompositeExtract %5 %270 1
%273 = OpShiftRightLogical %5 %271 %49
%274 = OpBitwiseAnd %5 %273 %126
%275 = OpIEqual %92 %274 %30
%276 = OpBitFieldUExtract %5 %106 %278 %279
%280 = OpShiftLeftLogical %5 %281 %276
%282 = OpBitFieldUExtract %5 %106 %279 %279
%283 = OpShiftLeftLogical %5 %284 %282
%285 = OpBitwiseOr %5 %280 %283
%286 = OpBitFieldUExtract %5 %106 %287 %279
%288 = OpShiftLeftLogical %5 %289 %286
%290 = OpBitwiseOr %5 %285 %288
%291 = OpSelect %5 %275 %290 %30
%292 = OpInBoundsAccessChain %28 %100 %30 %149 %10
%293 = OpAtomicOr %5 %292 %10 %30 %291
%294 = OpBitwiseAnd %5 %293 %290
%295 = OpIEqual %92 %294 %290
%296 = OpBitFieldUExtract %5 %106 %297 %279
%298 = OpShiftLeftLogical %5 %281 %296
%299 = OpBitFieldUExtract %5 %106 %300 %279
%301 = OpShiftLeftLogical %5 %284 %299
%302 = OpBitwiseOr %5 %298 %301
%303 = OpBitFieldUExtract %5 %106 %304 %279
%305 = OpShiftLeftLogical %5 %289 %303
%306 = OpBitwiseOr %5 %302 %305
%307 = OpSelect %5 %275 %306 %30
%308 = OpInBoundsAccessChain %28 %100 %30 %153 %10
%309 = OpAtomicOr %5 %308 %10 %30 %307
%310 = OpBitwiseAnd %5 %309 %306
%311 = OpIEqual %92 %310 %306
%312 = OpLogicalAnd %92 %295 %311
%313 = OpBitFieldUExtract %5 %106 %314 %279
%315 = OpShiftLeftLogical %5 %281 %313
%316 = OpBitFieldUExtract %5 %106 %317 %279
%318 = OpShiftLeftLogical %5 %284 %316
%319 = OpBitwiseOr %5 %315 %318
%320 = OpBitFieldUExtract %5 %106 %321 %279
%322 = OpShiftLeftLogical %5 %289 %320
%323 = OpBitwiseOr %5 %319 %322
%324 = OpSelect %5 %275 %323 %30
%325 = OpInBoundsAccessChain %28 %100 %30 %157 %10
%326 = OpAtomicOr %5 %325 %10 %30 %324
%327 = OpBitwiseAnd %5 %326 %323
%328 = OpIEqual %92 %327 %323
%329 = OpLogicalAnd %92 %312 %328
%330 = OpBitFieldUExtract %5 %106 %331 %279
%332 = OpShiftLeftLogical %5 %281 %330
%333 = OpBitFieldUExtract %5 %106 %334 %279
%335 = OpShiftLeftLogical %5 %284 %333
%336 = OpBitwiseOr %5 %332 %335
%337 = OpBitFieldUExtract %5 %106 %338 %279
%339 = OpShiftLeftLogical %5 %289 %337
%340 = OpBitwiseOr %5 %336 %339
%341 = OpSelect %5 %275 %340 %30
%342 = OpInBoundsAccessChain %28 %100 %30 %160 %10
%343 = OpAtomicOr %5 %342 %10 %30 %341
%344 = OpBitwiseAnd %5 %343 %340
%345 = OpIEqual %92 %344 %340
%346 = OpLogicalAnd %92 %329 %345
%347 = OpBitFieldUExtract %5 %106 %348 %279
%349 = OpShiftLeftLogical %5 %281 %347
%350 = OpBitFieldUExtract %5 %106 %351 %279
%352 = OpShiftLeftLogical %5 %284 %350
%353 = OpBitwiseOr %5 %349 %352
%354 = OpBitFieldUExtract %5 %106 %355 %279
%356 = OpShiftLeftLogical %5 %289 %354
%357 = OpBitwiseOr %5 %353 %356
%358 = OpSelect %5 %275 %357 %30
%359 = OpInBoundsAccessChain %28 %100 %30 %164 %10
%360 = OpAtomicOr %5 %359 %10 %30 %358
%361 = OpBitwiseAnd %5 %360 %357
%362 = OpIEqual %92 %361 %357
%363 = OpLogicalAnd %92 %346 %362
%364 = OpBitFieldUExtract %5 %106 %297 %279
%365 = OpShiftLeftLogical %5 %281 %364
%366 = OpBitFieldUExtract %5 %106 %300 %279
%367 = OpShiftLeftLogical %5 %284 %366
%368 = OpBitwiseOr %5 %365 %367
%369 = OpBitFieldUExtract %5 %106 %304 %279
%370 = OpShiftLeftLogical %5 %289 %369
%371 = OpBitwiseOr %5 %368 %370
%372 = OpSelect %5 %275 %371 %30
%373 = OpInBoundsAccessChain %28 %100 %30 %168 %10
%374 = OpAtomicOr %5 %373 %10 %30 %372
%375 = OpBitwiseAnd %5 %374 %371
%376 = OpIEqual %92 %375 %371
%377 = OpLogicalAnd %92 %363 %376
%378 = OpBitFieldUExtract %5 %106 %379 %279
%380 = OpShiftLeftLogical %5 %281 %378
%381 = OpBitFieldUExtract %5 %106 %382 %279
%383 = OpShiftLeftLogical %5 %284 %381
%384 = OpBitwiseOr %5 %380 %383
%385 = OpBitFieldUExtract %5 %106 %331 %279
%386 = OpShiftLeftLogical %5 %289 %385
%387 = OpBitwiseOr %5 %384 %386
%388 = OpSelect %5 %275 %387 %30
%389 = OpInBoundsAccessChain %28 %100 %30 %172 %10
%390 = OpAtomicOr %5 %389 %10 %30 %388
%391 = OpBitwiseAnd %5 %390 %387
%392 = OpIEqual %92 %391 %387
%393 = OpLogicalAnd %92 %377 %392
%394 = OpBitFieldUExtract %5 %106 %395 %279
%396 = OpShiftLeftLogical %5 %281 %394
%397 = OpBitFieldUExtract %5 %106 %398 %279
%399 = OpShiftLeftLogical %5 %284 %397
%400 = OpBitwiseOr %5 %396 %399
%401 = OpBitFieldUExtract %5 %106 %402 %279
%403 = OpShiftLeftLogical %5 %289 %401
%404 = OpBitwiseOr %5 %400 %403
%405 = OpSelect %5 %275 %404 %30
%406 = OpInBoundsAccessChain %28 %100 %30 %176 %10
%407 = OpAtomicOr %5 %406 %10 %30 %405
%408 = OpBitwiseAnd %5 %407 %404
%409 = OpIEqual %92 %408 %404
%410 = OpLogicalAnd %92 %393 %409
OpSelectionMerge %415 None
OpSwitch %105 %414 0 %411 1 %412 2 %413
%414 = OpLabel
%423 = OpShiftLeftLogical %5 %136 %48
%424 = OpBitwiseAnd %5 %272 %423
%425 = OpINotEqual %92 %424 %30
OpBranch %415
%411 = OpLabel
%416 = OpBitwiseAnd %5 %271 %126
%417 = OpINotEqual %92 %416 %30
OpBranch %415
%412 = OpLabel
%418 = OpShiftLeftLogical %5 %126 %49
%419 = OpBitwiseAnd %5 %271 %418
%420 = OpINotEqual %92 %419 %30
OpBranch %415
%413 = OpLabel
%421 = OpBitwiseAnd %5 %272 %136
%422 = OpINotEqual %92 %421 %30
OpBranch %415
%415 = OpLabel
%426 = OpPhi %92 %417 %411 %420 %412 %422 %413 %425 %414
%427 = OpLogicalNot %92 %426
%428 = OpLogicalOr %92 %427 %410
OpReturnValue %428
OpFunctionEnd
%481 = OpFunction %1 None %478
%479 = OpFunctionParameter %92
%480 = OpFunctionParameter %5
%482 = OpLabel
%485 = OpLogicalNot %92 %479
%486 = OpLoad %92 %468
%487 = OpLogicalAnd %92 %485 %486
OpSelectionMerge %483 None
OpBranchConditional %487 %484 %483
%484 = OpLabel
%490 = OpIMul %5 %480 %492
%491 = OpBitwiseXor %5 %490 %493
%494 = OpArrayLength %5 %473 0
%495 = OpISub %5 %494 %37
%496 = OpAccessChain %28 %473 %30 %495
%497 = OpLoad %5 %496
%498 = OpIMul %5 %497 %499
%500 = OpBitwiseXor %5 %491 %498
%501 = OpArrayLength %5 %477 0
%502 = OpISub %5 %501 %10
%503 = OpBitwiseAnd %5 %500 %502
%504 = OpShiftRightLogical %5 %503 %48
%505 = OpBitwiseAnd %5 %503 %128
%506 = OpShiftLeftLogical %5 %10 %505
%507 = OpAccessChain %28 %473 %30 %504
%508 = OpAtomicOr %5 %507 %10 %30 %506
%509 = OpBitwiseAnd %5 %508 %506
%510 = OpIEqual %92 %509 %30
OpStore %468 %511
OpSelectionMerge %489 None
OpBranchConditional %510 %488 %489
%488 = OpLabel
%512 = OpCompositeConstruct %469 %493 %30 %480 %497
%513 = OpShiftLeftLogical %5 %506 %49
%515 = OpAccessChain %514 %477 %30 %503
OpStore %515 %512
OpMemoryBarrier %10 %517
%516 = OpAtomicOr %5 %507 %10 %30 %513
OpMemoryBarrier %10 %517
OpBranch %489
%489 = OpLabel
OpBranch %483
%483 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/raw.bindless.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 1, binding = 0, scalar) restrict readonly buffer SSBO
{
    uvec3 _m0[];
} _24[];

layout(set = 4, binding = 0, std430) buffer _26_29
{
    uint _m0[];
} _29[];

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _42 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _42;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _86 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _87 = uvec2(prime);
    uvec2 _94 = ((_86 >> uvec2(8u)) ^ _86.yx) * _87;
    uvec2 _98 = ((_94 >> uvec2(8u)) ^ _94.yx) * _87;
    uvec2 _102 = ((_98 >> uvec2(8u)) ^ _98.yx) * _87;
    uvec2 _106 = ((_102 >> uvec2(8u)) ^ _102.yx) * _87;
    uvec2 _110 = ((_106 >> uvec2(8u)) ^ _106.yx) * _87;
    return (((_110 >> uvec2(8u)) ^ _110.yx) * _87).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _143;
    _143._m0 = uaddCarry(BDA.x, offset, _143._m1);
    uvec2 addr = uvec2(_143._m0, BDA.y + _143._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _513 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _515 = InstrumentationControlData.atomics[_513];
        uint _521 = (((inst * 97u) ^ 43981u) ^ (_515 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _522 = _521 >> 4u;
        uint _524 = 1u << (_521 & 15u);
        uint _526 = atomicOr(InstrumentationControlData.atomics[_522], _524);
        ShouldReportInstrumentation = false;
        if ((_526 & _524) == 0u)
        {
            InstrumentationData.data[_521] = uvec4(43981u, 0u, inst, _515);
            memoryBarrierBuffer();
            uint _534 = atomicOr(InstrumentationControlData.atomics[_522], _524 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _45 = AllocateInvocationID();
    InvocationID = _45;
    uvec2 _54 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _59 = uint(_29[registers._m4]._m0.length()) * 4u;
    uvec2 _65 = DescriptorHeapRobustness.descriptors[registers._m1]._m0[0u];
    uint _76 = gl_GlobalInvocationID.x * 12u;
    uint _458 = InvocationID;
    bool _457 = ValidateBDALoadStore(_65, _76, 12u, 0u, _458, _76 < (uint(_24[registers._m1]._m0.length()) * 12u));
    AssumeTrue(_457, 1u);
    vec3 _465 = uintBitsToFloat(_24[registers._m1]._m0[gl_GlobalInvocationID.x]);
    uint _468 = gl_GlobalInvocationID.x << 2u;
    uint _470 = InvocationID;
    bool _469 = ValidateBDALoadStore(_54, _468, 4u, 0u, _470, _468 < _59);
    AssumeTrue(_469, 2u);
    uint _473 = _29[registers._m4]._m0[gl_GlobalInvocationID.x];
    uint _480 = InvocationID;
    bool _479 = ValidateBDALoadStore(_54, _468, 4u, 1u, _480, _468 < _59);
    AssumeTrue(_479, 3u);
    _29[registers._m4]._m0[gl_GlobalInvocationID.x] = floatBitsToUint(((_465.x + 40.0) + _465.z) + uintBitsToFloat(_473));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 541
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%168 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %72
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "BloomBufferInvocationSSBO"
OpMemberName %31 0 "atomics"
OpName %33 "BloomBufferInvocation"
OpName %35 "AllocateInvocationID"
OpName %80 "AddrHash"
OpName %78 "addr"
OpName %79 "prime"
OpName %120 "BloomBufferSSBO"
OpMemberName %120 0 "atomics"
OpName %122 "BloomBuffer"
OpName %124 "BloomBuffer32SSBO"
OpMemberName %124 0 "atomics"
OpName %126 "BloomBuffer32"
OpName %134 "ValidateBDALoadStore"
OpName %128 "BDA"
OpName %129 "offset"
OpName %130 "len"
OpName %131 "type"
OpName %132 "invocation_id"
OpName %133 "in_bounds"
OpName %142 "IAddCarryResult"
OpName %147 "addr"
OpName %148 "addr_lo"
OpName %152 "byte_mask"
OpName %162 "word_mask"
OpName %167 "hash_mask"
OpName %172 "hash_offset"
OpName %176 "bloom_index"
OpName %180 "bloom_index"
OpName %184 "bloom_index"
OpName %187 "bloom_index"
OpName %191 "bloom_index"
OpName %195 "bloom_index"
OpName %199 "bloom_index"
OpName %203 "bloom_index"
OpName %207 "bloom_index"
OpName %211 "bloom_index"
OpName %215 "bloom_index"
OpName %219 "bloom_index"
OpName %223 "bloom_index"
OpName %227 "bloom_index"
OpName %231 "bloom_index"
OpName %235 "bloom_index"
OpName %248 "invalidation_mask"
OpName %251 "prev_hazard_partial"
OpName %253 "prev_hazard_partial"
OpName %256 "prev_hazard_partial"
OpName %259 "prev_hazard_partial"
OpName %262 "prev_hazard_partial"
OpName %265 "prev_hazard_partial"
OpName %268 "prev_hazard_partial"
OpName %271 "prev_hazard_partial"
OpName %274 "prev_hazard_partial"
OpName %277 "prev_hazard_partial"
OpName %280 "prev_hazard_partial"
OpName %283 "prev_hazard_partial"
OpName %286 "prev_hazard_partial"
OpName %289 "prev_hazard_partial"
OpName %292 "prev_hazard_partial"
OpName %295 "prev_hazard_partial"
OpName %297 "prev_hazard"
OpName %298 "prev_hazard_lo"
OpName %299 "prev_hazard_hi"
OpName %302 "has_exclusive_access"
OpName %317 "lock_mask"
OpName %320 "prev_lock"
OpName %333 "lock_mask"
OpName %336 "prev_lock"
OpName %350 "lock_mask"
OpName %353 "prev_lock"
OpName %367 "lock_mask"
OpName %370 "prev_lock"
OpName %384 "lock_mask"
OpName %387 "prev_lock"
OpName %398 "lock_mask"
OpName %401 "prev_lock"
OpName %414 "lock_mask"
OpName %417 "prev_lock"
OpName %431 "lock_mask"
OpName %434 "prev_lock"
OpName %437 "has_complete_self_lock"
OpName %453 "hazard"
OpName %486 "ShouldReportInstrumentation"
OpName %489 "InstrumentationControlDataSSBO"
OpMemberName %489 0 "atomics"
OpName %491 "InstrumentationControlData"
OpName %493 "InstrumentationDataSSBO"
OpMemberName %493 0 "data"
OpName %495 "InstrumentationData"
OpName %499 "AssumeTrue"
OpName %497 "value"
OpName %498 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %20 ArrayStride 12
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %25 ArrayStride 4
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %30 ArrayStride 4
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 2
OpDecorate %72 BuiltIn GlobalInvocationId
OpDecorate %119 ArrayStride 8
OpMemberDecorate %120 0 Offset 0
OpDecorate %120 Block
OpDecorate %122 DescriptorSet 0
OpDecorate %122 Binding 2
OpDecorate %123 ArrayStride 8
OpMemberDecorate %124 0 Offset 0
OpDecorate %124 Block
OpDecorate %126 DescriptorSet 0
OpDecorate %126 Binding 2
OpDecorate %488 ArrayStride 4
OpMemberDecorate %489 0 Offset 0
OpDecorate %489 Block
OpDecorate %491 DescriptorSet 0
OpDecorate %491 Binding 2
OpDecorate %492 ArrayStride 16
OpMemberDecorate %493 0 Offset 0
OpDecorate %493 Block
OpDecorate %495 DescriptorSet 0
OpDecorate %495 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeVector %5 3
%20 = OpTypeRuntimeArray %19
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %5
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %5
%31 = OpTypeStruct %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeFunction %5
%39 = OpTypePointer StorageBuffer %5
%41 = OpConstant %5 0
%43 = OpConstant %5 1103633207
%46 = OpTypePointer StorageBuffer %26
%48 = OpTypePointer PushConstant %5
%50 = OpConstant %5 4
%52 = OpTypePointer StorageBuffer %9
%57 = OpConstant %5 16
%60 = OpTypePointer StorageBuffer %21
%70 = OpConstant %5 12
%71 = OpTypePointer Input %19
%72 = OpVariable %71 Input
%73 = OpTypePointer Input %5
%77 = OpTypeFunction %5 %9 %5
%88 = OpConstant %5 65535
%89 = OpConstant %5 8
%90 = OpConstantComposite %9 %89 %89
%117 = OpTypeInt 64 0
%118 = OpTypeBool
%119 = OpTypeRuntimeArray %117
%120 = OpTypeStruct %119
%121 = OpTypePointer StorageBuffer %120
%122 = OpVariable %121 StorageBuffer
%123 = OpTypeRuntimeArray %9
%124 = OpTypeStruct %123
%125 = OpTypePointer StorageBuffer %124
%126 = OpVariable %125 StorageBuffer
%127 = OpTypeFunction %118 %9 %5 %5 %5 %5 %118
%138 = OpConstantTrue %118
%142 = OpTypeStruct %5 %5
%153 = OpConstant %5 4294967295
%154 = OpConstant %5 15
%163 = OpConstant %5 3
%164 = OpConstant %5 2
%174 = OpConstant %5 1103515245
%178 = OpConstant %5 1103518333
%182 = OpConstant %5 1103539331
%189 = OpConstant %5 10006121
%193 = OpConstant %5 4004951
%197 = OpConstant %5 5005159
%201 = OpConstant %5 6004811
%205 = OpConstant %5 383
%209 = OpConstant %5 821
%213 = OpConstant %5 661
%217 = OpConstant %5 1091
%221 = OpConstant %5 1117
%225 = OpConstant %5 3947
%229 = OpConstant %5 4253
%233 = OpConstant %5 7691
%236 = OpTypeVector %117 4
%237 = OpConstant %117 68719411200
%238 = OpConstant %117 1099511627775
%239 = OpConstant %117 1035087118335
%240 = OpConstantComposite %236 %237 %238 %239 %237
%249 = OpTypePointer StorageBuffer %117
%304 = OpTypeInt 32 1
%305 = OpConstant %304 0
%306 = OpConstant %304 3
%308 = OpConstant %5 256
%311 = OpConstant %5 65536
%314 = OpConstant %304 6
%316 = OpConstant %5 16777216
%324 = OpConstant %304 9
%327 = OpConstant %304 12
%331 = OpConstant %304 15
%341 = OpConstant %304 18
%344 = OpConstant %304 21
%348 = OpConstant %304 24
%358 = OpConstant %304 23
%361 = OpConstant %304 26
%365 = OpConstant %304 29
%375 = OpConstant %304 1
%378 = OpConstant %304 4
%382 = OpConstant %304 7
%406 = OpConstant %304 17
%409 = OpConstant %304 20
%422 = OpConstant %304 22
%425 = OpConstant %304 25
%429 = OpConstant %304 28
%460 = OpTypePointer StorageBuffer %19
%463 = OpTypeFloat 32
%464 = OpTypeVector %463 3
%476 = OpConstant %463 40
%485 = OpTypePointer Private %118
%486 = OpVariable %485 Private %138
%487 = OpTypeVector %5 4
%488 = OpTypeRuntimeArray %5
%489 = OpTypeStruct %488
%490 = OpTypePointer StorageBuffer %489
%491 = OpVariable %490 StorageBuffer
%492 = OpTypeRuntimeArray %487
%493 = OpTypeStruct %492
%494 = OpTypePointer StorageBuffer %493
%495 = OpVariable %494 StorageBuffer
%496 = OpTypeFunction %1 %118 %5
%510 = OpConstant %5 97
%511 = OpConstant %5 43981
%517 = OpConstant %5 51329
%529 = OpConstantFalse %118
%532 = OpTypePointer StorageBuffer %487
%535 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %484
%484 = OpLabel
%45 = OpFunctionCall %5 %35
OpStore %18 %45
%49 = OpAccessChain %48 %8 %50
%51 = OpLoad %5 %49
%53 = OpAccessChain %52 %16 %41 %51 %41 %41
%54 = OpLoad %9 %53
%55 = OpCompositeExtract %5 %54 1
%56 = OpShiftRightLogical %5 %55 %57
%47 = OpAccessChain %46 %29 %51
%58 = OpArrayLength %5 %47 0
%59 = OpIMul %5 %58 %50
%62 = OpAccessChain %48 %8 %10
%63 = OpLoad %5 %62
%64 = OpAccessChain %52 %16 %41 %63 %41 %41
%65 = OpLoad %9 %64
%66 = OpCompositeExtract %5 %65 1
%67 = OpShiftRightLogical %5 %66 %57
%61 = OpAccessChain %60 %24 %63
%68 = OpArrayLength %5 %61 0
%69 = OpIMul %5 %68 %70
%74 = OpAccessChain %73 %72 %41
%75 = OpLoad %5 %74
%76 = OpIMul %5 %75 %70
%458 = OpLoad %5 %18
%459 = OpULessThan %118 %76 %69
%457 = OpFunctionCall %118 %134 %65 %76 %70 %41 %458 %459
%537 = OpFunctionCall %1 %499 %457 %10
%461 = OpAccessChain %460 %61 %41 %75
%462 = OpLoad %19 %461
%465 = OpBitcast %464 %462
%466 = OpCompositeExtract %463 %465 0
%467 = OpCompositeExtract %463 %465 2
%468 = OpShiftLeftLogical %5 %75 %164
%470 = OpLoad %5 %18
%471 = OpULessThan %118 %468 %59
%469 = OpFunctionCall %118 %134 %54 %468 %50 %41 %470 %471
%538 = OpFunctionCall %1 %499 %469 %164
%472 = OpAccessChain %39 %47 %41 %75
%473 = OpLoad %5 %472
%474 = OpBitcast %463 %473
%475 = OpFAdd %463 %466 %476
%477 = OpFAdd %463 %475 %467
%478 = OpFAdd %463 %477 %474
%480 = OpLoad %5 %18
%481 = OpULessThan %118 %468 %59
%479 = OpFunctionCall %118 %134 %54 %468 %50 %10 %480 %481
%539 = OpFunctionCall %1 %499 %479 %163
%482 = OpBitcast %5 %478
%483 = OpAccessChain %39 %47 %41 %75
OpStore %483 %482
OpReturn
OpFunctionEnd
%35 = OpFunction %5 None %34
%36 = OpLabel
%37 = OpArrayLength %5 %33 0
%38 = OpISub %5 %37 %10
%40 = OpAccessChain %39 %33 %41 %38
%42 = OpAtomicIAdd %5 %40 %10 %41 %43
OpReturnValue %42
OpFunctionEnd
%80 = OpFunction %5 None %77
%78 = OpFunctionParameter %9
%79 = OpFunctionParameter %5
%81 = OpLabel
%82 = OpCompositeExtract %5 %78 0
%83 = OpCompositeExtract %5 %78 1
%84 = OpShiftRightLogical %5 %82 %50
%85 = OpBitwiseAnd %5 %83 %88
%86 = OpCompositeConstruct %9 %84 %85
%87 = OpCompositeConstruct %9 %79 %79
%91 = OpVectorShuffle %9 %86 %86 1 0
%92 = OpShiftRightLogical %9 %86 %90
%93 = OpBitwiseXor %9 %92 %91
%94 = OpIMul %9 %93 %87
%95 = OpVectorShuffle %9 %94 %94 1 0
%96 = OpShiftRightLogical %9 %94 %90
%97 = OpBitwiseXor %9 %96 %95
%98 = OpIMul %9 %97 %87
%99 = OpVectorShuffle %9 %98 %98 1 0
%100 = OpShiftRightLogical %9 %98 %90
%101 = OpBitwiseXor %9 %100 %99
%102 = OpIMul %9 %101 %87
%103 = OpVectorShuffle %9 %102 %102 1 0
%104 = OpShiftRightLogical %9 %102 %90
%105 = OpBitwiseXor %9 %104 %103
%106 = OpIMul %9 %105 %87
%107 = OpVectorShuffle %9 %106 %106 1 0
%108 = OpShiftRightLogical %9 %106 %90
%109 = OpBitwiseXor %9 %108 %107
%110 = OpIMul %9 %109 %87
%111 = OpVectorShuffle %9 %110 %110 1 0
%112 = OpShiftRightLogical %9 %110 %90
%113 = OpBitwiseXor %9 %112 %111
%114 = OpIMul %9 %113 %87
%115 = OpCompositeExtract %5 %114 0
OpReturnValue %115
OpFunctionEnd
%134 = OpFunction %118 None %127
%128 = OpFunctionParameter %9
%129 = OpFunctionParameter %5
%130 = OpFunctionParameter %5
%131 = OpFunctionParameter %5
%132 = OpFunctionParameter %5
%133 = OpFunctionParameter %118
%135 = OpLabel
OpSelectionMerge %137 None
OpBranchConditional %133 %137 %136
%136 = OpLabel
OpReturnValue %138
%137 = OpLabel
%140 = OpCompositeExtract %5 %128 0
%141 = OpCompositeExtract %5 %128 1
%143 = OpIAddCarry %142 %140 %129
%144 = OpCompositeExtract %5 %143 0
%145 = OpCompositeExtract %5 %143 1
%146 = OpIAdd %5 %141 %145
%147 = OpCompositeConstruct %9 %144 %146
%148 = OpCompositeExtract %5 %147 0
%149 = OpBitFieldUExtract %5 %153 %41 %130
%150 = OpBitwiseAnd %5 %148 %154
%151 = OpShiftLeftLogical %5 %149 %150
%152 = OpBitwiseAnd %5 %151 %88
%155 = OpBitwiseAnd %5 %148 %163
%156 = OpIAdd %5 %155 %130
%157 = OpIAdd %5 %156 %163
%158 = OpShiftRightLogical %5 %157 %164
%159 = OpBitFieldUExtract %5 %148 %164 %164
%160 = OpBitFieldUExtract %5 %153 %41 %158
%161 = OpShiftLeftLogical %5 %160 %159
%162 = OpBitwiseAnd %5 %161 %154
%165 = OpArrayLength %5 %122 0
%166 = OpExtInst %5 %168 FindUMsb %165
%167 = OpBitFieldUExtract %5 %153 %41 %166
%169 = OpArrayLength %5 %122 0
%170 = OpExtInst %5 %168 FindUMsb %169
%171 = OpBitFieldUExtract %5 %169 %41 %170
%172 = OpISub %5 %171 %10
%173 = OpFunctionCall %5 %80 %147 %174
%175 = OpBitwiseAnd %5 %173 %167
%176 = OpIAdd %5 %175 %172
%177 = OpFunctionCall %5 %80 %147 %178
%179 = OpBitwiseAnd %5 %177 %167
%180 = OpIAdd %5 %179 %172
%181 = OpFunctionCall %5 %80 %147 %182
%183 = OpBitwiseAnd %5 %181 %167
%184 = OpIAdd %5 %183 %172
%185 = OpFunctionCall %5 %80 %147 %43
%186 = OpBitwiseAnd %5 %185 %167
%187 = OpIAdd %5 %186 %172
%188 = OpFunctionCall %5 %80 %147 %189
%190 = OpBitwiseAnd %5 %188 %167
%191 = OpIAdd %5 %190 %172
%192 = OpFunctionCall %5 %80 %147 %193
%194 = OpBitwiseAnd %5 %192 %167
%195 = OpIAdd %5 %194 %172
%196 = OpFunctionCall %5 %80 %147 %197
%198 = OpBitwiseAnd %5 %196 %167
%199 = OpIAdd %5 %198 %172
%200 = OpFunctionCall %5 %80 %147 %201
%202 = OpBitwiseAnd %5 %200 %167
%203 = OpIAdd %5 %202 %172
%204 = OpFunctionCall %5 %80 %147 %205
%206 = OpBitwiseAnd %5 %204 %167
%207 = OpIAdd %5 %206 %172
%208 = OpFunctionCall %5 %80 %147 %209
%210 = OpBitwiseAnd %5 %208 %167
%211 = OpIAdd %5 %210 %172
%212 = OpFunctionCall %5 %80 %147 %213
%214 = OpBitwiseAnd %5 %212 %167
%215 = OpIAdd %5 %214 %172
%216 = OpFunctionCall %5 %80 %147 %217
%218 = OpBitwiseAnd %5 %216 %167
%219 = OpIAdd %5 %218 %172
%220 = OpFunctionCall %5 %80 %147 %221
%222 = OpBitwiseAnd %5 %220 %167
%223 = OpIAdd %5 %222 %172
%224 = OpFunctionCall %5 %80 %147 %225
%226 = OpBitwiseAnd %5 %224 %167
%227 = OpIAdd %5 %226 %172
%228 = OpFunctionCall %5 %80 %147 %229
%230 = OpBitwiseAnd %5 %228 %167
%231 = OpIAdd %5 %230 %172
%232 = OpFunctionCall %5 %80 %147 %233
%234 = OpBitwiseAnd %5 %232 %167
%235 = OpIAdd %5 %234 %172
%241 = OpVectorExtractDynamic %117 %240 %131
%242 = OpShiftLeftLogical %5 %152 %57
%243 = OpShiftLeftLogical %5 %162 %50
%244 = OpBitwiseOr %5 %152 %242
%245 = OpBitwiseOr %5 %162 %243
%246 = OpCompositeConstruct %9 %244 %245
%247 = OpBitcast %117 %246
%248 = OpBitwiseAnd %117 %241 %247
%250 = OpInBoundsAccessChain %249 %122 %41 %176
%251 = OpAtomicOr %117 %250 %10 %41 %248
%252 = OpInBoundsAccessChain %249 %122 %41 %180
%253 = OpAtomicOr %117 %252 %10 %41 %248
%254 = OpBitwiseAnd %117 %251 %253
%255 = OpInBoundsAccessChain %249 %122 %41 %184
%256 = OpAtomicOr %117 %255 %10 %41 %248
%257 = OpBitwiseAnd %117 %254 %256
%258 = OpInBoundsAccessChain %249 %122 %41 %187
%259 = OpAtomicOr %117 %258 %10 %41 %248
%260 = OpBitwiseAnd %117 %257 %259
%261 = OpInBoundsAccessChain %249 %122 %41 %191
%262 = OpAtomicOr %117 %261 %10 %41 %248
%263 = OpBitwiseAnd %117 %260 %262
%264 = OpInBoundsAccessChain %249 %122 %41 %195
%265 = OpAtomicOr %117 %264 %10 %41 %248
%266 = OpBitwiseAnd %117 %263 %265
%267 = OpInBoundsAccessChain %249 %122 %41 %199
%268 = OpAtomicOr %117 %267 %10 %41 %248
%269 = OpBitwiseAnd %117 %266 %268
%270 = OpInBoundsAccessChain %249 %122 %41 %203
%271 = OpAtomicOr %117 %270 %10 %41 %248
%272 = OpBitwiseAnd %117 %269 %271
%273 = OpInBoundsAccessChain %249 %122 %41 %207
%274 = OpAtomicOr %117 %273 %10 %41 %248
%275 = OpBitwiseAnd %117 %272 %274
%276 = OpInBoundsAccessChain %249 %122 %41 %211
%277 = OpAtomicOr %117 %276 %10 %41 %248
%278 = OpBitwiseAnd %117 %275 %277
%279 = OpInBoundsAccessChain %249 %122 %41 %215
%280 = OpAtomicOr %117 %279 %10 %41 %248
%281 = OpBitwiseAnd %117 %278 %280
%282 = OpInBoundsAccessChain %249 %122 %41 %219
%283 = OpAtomicOr %117 %282 %10 %41 %248
%284 = OpBitwiseAnd %117 %281 %283
%285 = OpInBoundsAccessChain %249 %122 %41 %223
%286 = OpAtomicOr %117 %285 %10 %41 %248
%287 = OpBitwiseAnd %117 %284 %286
%288 = OpInBoundsAccessChain %249 %122 %41 %227
%289 = OpAtomicOr %117 %288 %10 %41 %248
%290 = OpBitwiseAnd %117 %287 %289
%291 = OpInBoundsAccessChain %249 %122 %41 %231
%292 = OpAtomicOr %117 %291 %10 %41 %248
%293 = OpBitwiseAnd %117 %290 %292
%294 = OpInBoundsAccessChain %249 %122 %41 %235
%295 = OpAtomicOr %117 %294 %10 %41 %248
%296 = OpBitwiseAnd %117 %293 %295
%297 = OpBitcast %9 %296
%298 = OpCompositeExtract %5 %297 0
%299 = OpCompositeExtract %5 %297 1
%300 = OpShiftRightLogical %5 %298 %57
%301 = OpBitwiseAnd %5 %300 %152
%302 = OpIEqual %118 %301 %41
%303 = OpBitFieldUExtract %5 %132 %305 %306
%307 = OpShiftLeftLogical %5 %308 %303
%309 = OpBitFieldUExtract %5 %132 %306 %306
%310 = OpShiftLeftLogical %5 %311 %309
%312 = OpBitwiseOr %5 %307 %310
%313 = OpBitFieldUExtract %5 %132 %314 %306
%315 = OpShiftLeftLogical %5 %316 %313
%317 = OpBitwiseOr %5 %312 %315
%318 = OpSelect %5 %302 %317 %41
%319 = OpInBoundsAccessChain %39 %126 %41 %176 %10
%320 = OpAtomicOr %5 %319 %10 %41 %318
%321 = OpBitwiseAnd %5 %320 %317
%322 = OpIEqual %118 %321 %317
%323 = OpBitFieldUExtract %5 %132 %324 %306
%325 = OpShiftLeftLogical %5 %308 %323
%326 = OpBitFieldUExtract %5 %132 %327 %306
%328 = OpShiftLeftLogical %5 %311 %326
%329 = OpBitwiseOr %5 %325 %328
%330 = OpBitFieldUExtract %5 %132 %331 %306
%332 = OpShiftLeftLogical %5 %316 %330
%333 = OpBitwiseOr %5 %329 %332
%334 = OpSelect %5 %302 %333 %41
%335 = OpInBoundsAccessChain %39 %126 %41 %180 %10
%336 = OpAtomicOr %5 %335 %10 %41 %334
%337 = OpBitwiseAnd %5 %336 %333
%338 = OpIEqual %118 %337 %333
%339 = OpLogicalAnd %118 %322 %338
%340 = OpBitFieldUExtract %5 %132 %341 %306
%342 = OpShiftLeftLogical %5 %308 %340
%343 = OpBitFieldUExtract %5 %132 %344 %306
%345 = OpShiftLeftLogical %5 %311 %343
%346 = OpBitwiseOr %5 %342 %345
%347 = OpBitFieldUExtract %5 %132 %348 %306
%349 = OpShiftLeftLogical %5 %316 %347
%350 = OpBitwiseOr %5 %346 %349
%351 = OpSelect %5 %302 %350 %41
%352 = OpInBoundsAccessChain %39 %126 %41 %184 %10
%353 = OpAtomicOr %5 %352 %10 %41 %351
%354 = OpBitwiseAnd %5 %353 %350
%355 = OpIEqual %118 %354 %350
%356 = OpLogicalAnd %118 %339 %355
%357 = OpBitFieldUExtract %5 %132 %358 %306
%359 = OpShiftLeftLogical %5 %308 %357
%360 = OpBitFieldUExtract %5 %132 %361 %306
%362 = OpShiftLeftLogical %5 %311 %360
%363 = OpBitwiseOr %5 %359 %362
%364 = OpBitFieldUExtract %5 %132 %365 %306
%366 = OpShiftLeftLogical %5 %316 %364
%367 = OpBitwiseOr %5 %363 %366
%368 = OpSelect %5 %302 %367 %41
%369 = OpInBoundsAccessChain %39 %126 %41 %187 %10
%370 = OpAtomicOr %5 %369 %10 %41 %368
%371 = OpBitwiseAnd %5 %370 %367
%372 = OpIEqual %118 %371 %367
%373 = OpLogicalAnd %118 %356 %372
%374 = OpBitFieldUExtract %5 %132 %375 %306
%376 = OpShiftLeftLogical %5 %308 %374
%377 = OpBitFieldUExtract %5 %132 %378 %306
%379 = OpShiftLeftLogical %5 %311 %377
%380 = OpBitwiseOr %5 %376 %379
%381 = OpBitFieldUExtract %5 %132 %382 %306
%383 = OpShiftLeftLogical %5 %316 %381
%384 = OpBitwiseOr %5 %380 %383
%385 = OpSelect %5 %302 %384 %41
%386 = OpInBoundsAccessChain %39 %126 %41 %191 %10
%387 = OpAtomicOr %5 %386 %10 %41 %385
%388 = OpBitwiseAnd %5 %387 %384
%389 = OpIEqual %118 %388 %384
%390 = OpLogicalAnd %118 %373 %389
%391 = OpBitFieldUExtract %5 %132 %324 %306
%392 = OpShiftLeftLogical %5 %308 %391
%393 = OpBitFieldUExtract %5 %132 %327 %306
%394 = OpShiftLeftLogical %5 %311 %393
%395 = OpBitwiseOr %5 %392 %394
%396 = OpBitFieldUExtract %5 %132 %331 %306
%397 = OpShiftLeftLogical %5 %316 %396
%398 = OpBitwiseOr %5 %395 %397
%399 = OpSelect %5 %302 %398 %41
%400 = OpInBoundsAccessChain %39 %126 %41 %195 %10
%401 = OpAtomicOr %5 %400 %10 %41 %399
%402 = OpBitwiseAnd %5 %401 %398
%403 = OpIEqual %118 %402 %398
%404 = OpLogicalAnd %118 %390 %403
%405 = OpBitFieldUExtract %5 %132 %406 %306
%407 = OpShiftLeftLogical %5 %308 %405
%408 = OpBitFieldUExtract %5 %132 %409 %306
%410 = OpShiftLeftLogical %5 %311 %408
%411 = OpBitwiseOr %5 %407 %410
%412 = OpBitFieldUExtract %5 %132 %358 %306
%413 = OpShiftLeftLogical %5 %316 %412
%414 = OpBitwiseOr %5 %411 %413
%415 = OpSelect %5 %302 %414 %41
%416 = OpInBoundsAccessChain %39 %126 %41 %199 %10
%417 = OpAtomicOr %5 %416 %10 %41 %415
%418 = OpBitwiseAnd %5 %417 %414
%419 = OpIEqual %118 %418 %414
%420 = OpLogicalAnd %118 %404 %419
%421 = OpBitFieldUExtract %5 %132 %422 %306
%423 = OpShiftLeftLogical %5 %308 %421
%424 = OpBitFieldUExtract %5 %132 %425 %306
%426 = OpShiftLeftLogical %5 %311 %424
%427 = OpBitwiseOr %5 %423 %426
%428 = OpBitFieldUExtract %5 %132 %429 %306
%430 = OpShiftLeftLogical %5 %316 %428
%431 = OpBitwiseOr %5 %427 %430
%432 = OpSelect %5 %302 %431 %41
%433 = OpInBoundsAccessChain %39 %126 %41 %203 %10
%434 = OpAtomicOr %5 %433 %10 %41 %432
%435 = OpBitwiseAnd %5 %434 %431
%436 = OpIEqual %118 %435 %431
%437 = OpLogicalAnd %118 %420 %436
OpSelectionMerge %442 None
OpSwitch %131 %441 0 %438 1 %439 2 %440
%441 = OpLabel
%450 = OpShiftLeftLogical %5 %162 %50
%451 = OpBitwiseAnd %5 %299 %450
%452 = OpINotEqual %118 %451 %41
OpBranch %442
%438 = OpLabel
%443 = OpBitwiseAnd %5 %298 %152
%444 = OpINotEqual %118 %443 %41
OpBranch %442
%439 = OpLabel
%445 = OpShiftLeftLogical %5 %152 %57
%446 = OpBitwiseAnd %5 %298 %445
%447 = OpINotEqual %118 %446 %41
OpBranch %442
%440 = OpLabel
%448 = OpBitwiseAnd %5 %299 %162
%449 = OpINotEqual %118 %448 %41
OpBranch %442
%442 = OpLabel
%453 = OpPhi %118 %444 %438 %447 %439 %449 %440 %452 %441
%454 = OpLogicalNot %118 %453
%455 = OpLogicalOr %118 %454 %437
OpReturnValue %455
OpFunctionEnd
%499 = OpFunction %1 None %496
%497 = OpFunctionParameter %118
%498 = OpFunctionParameter %5
%500 = OpLabel
%503 = OpLogicalNot %118 %497
%504 = OpLoad %118 %486
%505 = OpLogicalAnd %118 %503 %504
OpSelectionMerge %501 None
OpBranchConditional %505 %502 %501
%502 = OpLabel
%508 = OpIMul %5 %498 %510
%509 = OpBitwiseXor %5 %508 %511
%512 = OpArrayLength %5 %491 0
%513 = OpISub %5 %512 %164
%514 = OpAccessChain %39 %491 %41 %513
%515 = OpLoad %5 %514
%516 = OpIMul %5 %515 %517
%518 = OpBitwiseXor %5 %509 %516
%519 = OpArrayLength %5 %495 0
%520 = OpISub %5 %519 %10
%521 = OpBitwiseAnd %5 %518 %520
%522 = OpShiftRightLogical %5 %521 %50
%523 = OpBitwiseAnd %5 %521 %154
%524 = OpShiftLeftLogical %5 %10 %523
%525 = OpAccessChain %39 %491 %41 %522
%526 = OpAtomicOr %5 %525 %10 %41 %524
%527 = OpBitwiseAnd %5 %526 %524
%528 = OpIEqual %118 %527 %41
OpStore %486 %529
OpSelectionMerge %507 None
OpBranchConditional %528 %506 %507
%506 = OpLabel
%530 = OpCompositeConstruct %487 %511 %41 %498 %515
%531 = OpShiftLeftLogical %5 %524 %57
%533 = OpAccessChain %532 %495 %41 %521
OpStore %533 %530
OpMemoryBarrier %10 %535
%534 = OpAtomicOr %5 %525 %10 %41 %531
OpMemoryBarrier %10 %535
OpBranch %507
%507 = OpLabel
OpBranch %501
%501 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/raw.root-descriptor.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat3NonWrite;
layout(buffer_reference) buffer PhysicalPointerFloatArray;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct AddCarry
{
    uint _m0;
    uint _m1;
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

float _468;

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloat3NonWrite
{
    vec3 value;
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray
{
    float value[];
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _31 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _31;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _71 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _72 = uvec2(prime);
    uvec2 _80 = ((_71 >> uvec2(8u)) ^ _71.yx) * _72;
    uvec2 _84 = ((_80 >> uvec2(8u)) ^ _80.yx) * _72;
    uvec2 _88 = ((_84 >> uvec2(8u)) ^ _84.yx) * _72;
    uvec2 _92 = ((_88 >> uvec2(8u)) ^ _88.yx) * _72;
    uvec2 _96 = ((_92 >> uvec2(8u)) ^ _92.yx) * _72;
    return (((_96 >> uvec2(8u)) ^ _96.yx) * _72).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _129;
    _129._m0 = uaddCarry(BDA.x, offset, _129._m1);
    uvec2 addr = uvec2(_129._m0, BDA.y + _129._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _498 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _500 = InstrumentationControlData.atomics[_498];
        uint _506 = (((inst * 97u) ^ 43981u) ^ (_500 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _507 = _506 >> 4u;
        uint _509 = 1u << (_506 & 15u);
        uint _511 = atomicOr(InstrumentationControlData.atomics[_507], _509);
        ShouldReportInstrumentation = false;
        if ((_511 & _509) == 0u)
        {
            InstrumentationData.data[_506] = uvec4(43981u, 0u, inst, _500);
            memoryBarrierBuffer();
            uint _519 = atomicOr(InstrumentationControlData.atomics[_507], _509 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _34 = AllocateInvocationID();
    InvocationID = _34;
    uint _47 = gl_GlobalInvocationID.x << 3u;
    AddCarry _56;
    _56._m0 = uaddCarry(registers._m1.x, _47, _56._m1);
    uint _443 = InvocationID;
    bool _442 = ValidateBDALoadStore(registers._m1, _47, 12u, 0u, _443, true);
    AssumeTrue(_442, 1u);
    PhysicalPointerFloat3NonWrite _444 = PhysicalPointerFloat3NonWrite(uvec2(_56._m0, registers._m1.y + _56._m1));
    uint _450 = gl_GlobalInvocationID.x << 2u;
    uint _455 = InvocationID;
    bool _454 = ValidateBDALoadStore(registers._m2, _450, 4u, 0u, _455, true);
    AssumeTrue(_454, 2u);
    uint _465 = InvocationID;
    bool _464 = ValidateBDALoadStore(registers._m2, _450, 4u, 1u, _465, true);
    AssumeTrue(_464, 3u);
    PhysicalPointerFloatArray(registers._m2).value[gl_GlobalInvocationID.x] = ((_444.value.x + 40.0) + _444.value.z) + PhysicalPointerFloatArray(registers._m2).value[gl_GlobalInvocationID.x];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 526
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%152 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %43
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "BloomBufferInvocationSSBO"
OpMemberName %20 0 "atomics"
OpName %22 "BloomBufferInvocation"
OpName %24 "AllocateInvocationID"
OpName %51 "PhysicalPointerFloat3NonWrite"
OpMemberName %51 0 "value"
OpName %55 "AddCarry"
OpName %65 "AddrHash"
OpName %63 "addr"
OpName %64 "prime"
OpName %106 "BloomBufferSSBO"
OpMemberName %106 0 "atomics"
OpName %108 "BloomBuffer"
OpName %110 "BloomBuffer32SSBO"
OpMemberName %110 0 "atomics"
OpName %112 "BloomBuffer32"
OpName %120 "ValidateBDALoadStore"
OpName %114 "BDA"
OpName %115 "offset"
OpName %116 "len"
OpName %117 "type"
OpName %118 "invocation_id"
OpName %119 "in_bounds"
OpName %128 "IAddCarryResult"
OpName %133 "addr"
OpName %134 "addr_lo"
OpName %138 "byte_mask"
OpName %148 "word_mask"
OpName %151 "hash_mask"
OpName %156 "hash_offset"
OpName %160 "bloom_index"
OpName %164 "bloom_index"
OpName %168 "bloom_index"
OpName %171 "bloom_index"
OpName %175 "bloom_index"
OpName %179 "bloom_index"
OpName %183 "bloom_index"
OpName %187 "bloom_index"
OpName %191 "bloom_index"
OpName %195 "bloom_index"
OpName %199 "bloom_index"
OpName %203 "bloom_index"
OpName %207 "bloom_index"
OpName %211 "bloom_index"
OpName %215 "bloom_index"
OpName %219 "bloom_index"
OpName %233 "invalidation_mask"
OpName %236 "prev_hazard_partial"
OpName %238 "prev_hazard_partial"
OpName %241 "prev_hazard_partial"
OpName %244 "prev_hazard_partial"
OpName %247 "prev_hazard_partial"
OpName %250 "prev_hazard_partial"
OpName %253 "prev_hazard_partial"
OpName %256 "prev_hazard_partial"
OpName %259 "prev_hazard_partial"
OpName %262 "prev_hazard_partial"
OpName %265 "prev_hazard_partial"
OpName %268 "prev_hazard_partial"
OpName %271 "prev_hazard_partial"
OpName %274 "prev_hazard_partial"
OpName %277 "prev_hazard_partial"
OpName %280 "prev_hazard_partial"
OpName %282 "prev_hazard"
OpName %283 "prev_hazard_lo"
OpName %284 "prev_hazard_hi"
OpName %287 "has_exclusive_access"
OpName %302 "lock_mask"
OpName %305 "prev_lock"
OpName %318 "lock_mask"
OpName %321 "prev_lock"
OpName %335 "lock_mask"
OpName %338 "prev_lock"
OpName %352 "lock_mask"
OpName %355 "prev_lock"
OpName %369 "lock_mask"
OpName %372 "prev_lock"
OpName %383 "lock_mask"
OpName %386 "prev_lock"
OpName %399 "lock_mask"
OpName %402 "prev_lock"
OpName %416 "lock_mask"
OpName %419 "prev_lock"
OpName %422 "has_complete_self_lock"
OpName %438 "hazard"
OpName %452 "PhysicalPointerFloatArray"
OpMemberName %452 0 "value"
OpName %471 "ShouldReportInstrumentation"
OpName %474 "InstrumentationControlDataSSBO"
OpMemberName %474 0 "atomics"
OpName %476 "InstrumentationControlData"
OpName %478 "InstrumentationDataSSBO"
OpMemberName %478 0 "data"
OpName %480 "InstrumentationData"
OpName %484 "AssumeTrue"
OpName %482 "value"
OpName %483 "inst"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 2
OpDecorate %43 BuiltIn GlobalInvocationId
OpMemberDecorate %51 0 Offset 0
OpDecorate %51 Block
OpMemberDecorate %51 0 NonWritable
OpDecorate %105 ArrayStride 8
OpMemberDecorate %106 0 Offset 0
OpDecorate %106 Block
OpDecorate %108 DescriptorSet 0
OpDecorate %108 Binding 2
OpDecorate %109 ArrayStride 8
OpMemberDecorate %110 0 Offset 0
OpDecorate %110 Block
OpDecorate %112 DescriptorSet 0
OpDecorate %112 Binding 2
OpDecorate %451 ArrayStride 4
OpMemberDecorate %452 0 Offset 0
OpDecorate %452 Block
OpDecorate %473 ArrayStride 4
OpMemberDecorate %474 0 Offset 0
OpDecorate %474 Block
OpDecorate %476 DescriptorSet 0
OpDecorate %476 Binding 2
OpDecorate %477 ArrayStride 16
OpMemberDecorate %478 0 Offset 0
OpDecorate %478 Block
OpDecorate %480 DescriptorSet 0
OpDecorate %480 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpConstant %5 1
%11 = OpTypeArray %6 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeFunction %5
%28 = OpTypePointer StorageBuffer %5
%30 = OpConstant %5 0
%32 = OpConstant %5 1103633207
%35 = OpTypePointer PushConstant %6
%37 = OpConstant %5 2
%41 = OpTypeVector %5 3
%42 = OpTypePointer Input %41
%43 = OpVariable %42 Input
%44 = OpTypePointer Input %5
%48 = OpConstant %5 3
%49 = OpTypeFloat 32
%50 = OpTypeVector %49 3
%51 = OpTypeStruct %50
%52 = OpTypePointer PhysicalStorageBuffer %51
%55 = OpTypeStruct %5 %5
%61 = OpConstant %5 12
%62 = OpTypeFunction %5 %6 %5
%73 = OpConstant %5 4
%74 = OpConstant %5 65535
%75 = OpConstant %5 8
%76 = OpConstantComposite %6 %75 %75
%103 = OpTypeInt 64 0
%104 = OpTypeBool
%105 = OpTypeRuntimeArray %103
%106 = OpTypeStruct %105
%107 = OpTypePointer StorageBuffer %106
%108 = OpVariable %107 StorageBuffer
%109 = OpTypeRuntimeArray %6
%110 = OpTypeStruct %109
%111 = OpTypePointer StorageBuffer %110
%112 = OpVariable %111 StorageBuffer
%113 = OpTypeFunction %104 %6 %5 %5 %5 %5 %104
%124 = OpConstantTrue %104
%128 = OpTypeStruct %5 %5
%139 = OpConstant %5 4294967295
%140 = OpConstant %5 15
%158 = OpConstant %5 1103515245
%162 = OpConstant %5 1103518333
%166 = OpConstant %5 1103539331
%173 = OpConstant %5 10006121
%177 = OpConstant %5 4004951
%181 = OpConstant %5 5005159
%185 = OpConstant %5 6004811
%189 = OpConstant %5 383
%193 = OpConstant %5 821
%197 = OpConstant %5 661
%201 = OpConstant %5 1091
%205 = OpConstant %5 1117
%209 = OpConstant %5 3947
%213 = OpConstant %5 4253
%217 = OpConstant %5 7691
%220 = OpTypeVector %103 4
%221 = OpConstant %103 68719411200
%222 = OpConstant %103 1099511627775
%223 = OpConstant %103 1035087118335
%224 = OpConstantComposite %220 %221 %222 %223 %221
%228 = OpConstant %5 16
%234 = OpTypePointer StorageBuffer %103
%289 = OpTypeInt 32 1
%290 = OpConstant %289 0
%291 = OpConstant %289 3
%293 = OpConstant %5 256
%296 = OpConstant %5 65536
%299 = OpConstant %289 6
%301 = OpConstant %5 16777216
%309 = OpConstant %289 9
%312 = OpConstant %289 12
%316 = OpConstant %289 15
%326 = OpConstant %289 18
%329 = OpConstant %289 21
%333 = OpConstant %289 24
%343 = OpConstant %289 23
%346 = OpConstant %289 26
%350 = OpConstant %289 29
%360 = OpConstant %289 1
%363 = OpConstant %289 4
%367 = OpConstant %289 7
%391 = OpConstant %289 17
%394 = OpConstant %289 20
%407 = OpConstant %289 22
%410 = OpConstant %289 25
%414 = OpConstant %289 28
%445 = OpTypePointer PhysicalStorageBuffer %50
%451 = OpTypeRuntimeArray %49
%452 = OpTypeStruct %451
%453 = OpTypePointer PhysicalStorageBuffer %452
%457 = OpTypePointer PhysicalStorageBuffer %49
%461 = OpConstant %49 40
%470 = OpTypePointer Private %104
%471 = OpVariable %470 Private %124
%472 = OpTypeVector %5 4
%473 = OpTypeRuntimeArray %5
%474 = OpTypeStruct %473
%475 = OpTypePointer StorageBuffer %474
%476 = OpVariable %475 StorageBuffer
%477 = OpTypeRuntimeArray %472
%478 = OpTypeStruct %477
%479 = OpTypePointer StorageBuffer %478
%480 = OpVariable %479 StorageBuffer
%481 = OpTypeFunction %1 %104 %5
%495 = OpConstant %5 97
%496 = OpConstant %5 43981
%502 = OpConstant %5 51329
%514 = OpConstantFalse %104
%517 = OpTypePointer StorageBuffer %472
%520 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
%468 = OpUndef %49
OpBranch %469
%469 = OpLabel
%34 = OpFunctionCall %5 %24
OpStore %18 %34
%36 = OpAccessChain %35 %9 %37
%38 = OpLoad %6 %36
%39 = OpAccessChain %35 %9 %10
%40 = OpLoad %6 %39
%45 = OpAccessChain %44 %43 %30
%46 = OpLoad %5 %45
%47 = OpShiftLeftLogical %5 %46 %48
%53 = OpCompositeExtract %5 %40 0
%54 = OpCompositeExtract %5 %40 1
%56 = OpIAddCarry %55 %53 %47
%57 = OpCompositeExtract %5 %56 0
%58 = OpCompositeExtract %5 %56 1
%59 = OpIAdd %5 %54 %58
%60 = OpCompositeConstruct %6 %57 %59
%443 = OpLoad %5 %18
%442 = OpFunctionCall %104 %120 %40 %47 %61 %30 %443 %124
%522 = OpFunctionCall %1 %484 %442 %10
%444 = OpBitcast %52 %60
%446 = OpInBoundsAccessChain %445 %444 %30
%447 = OpLoad %50 %446 Aligned 4
%448 = OpCompositeExtract %49 %447 0
%449 = OpCompositeExtract %49 %447 2
%450 = OpShiftLeftLogical %5 %46 %37
%455 = OpLoad %5 %18
%454 = OpFunctionCall %104 %120 %38 %450 %73 %30 %455 %124
%523 = OpFunctionCall %1 %484 %454 %37
%456 = OpBitcast %453 %38
%458 = OpInBoundsAccessChain %457 %456 %30 %46
%459 = OpLoad %49 %458 Aligned 4
%460 = OpFAdd %49 %448 %461
%462 = OpFAdd %49 %460 %449
%463 = OpFAdd %49 %462 %459
%465 = OpLoad %5 %18
%464 = OpFunctionCall %104 %120 %38 %450 %73 %10 %465 %124
%524 = OpFunctionCall %1 %484 %464 %48
%466 = OpBitcast %453 %38
%467 = OpInBoundsAccessChain %457 %466 %30 %46
OpStore %467 %463 Aligned 4
OpReturn
OpFunctionEnd
%24 = OpFunction %5 None %23
%25 = OpLabel
%26 = OpArrayLength %5 %22 0
%27 = OpISub %5 %26 %10
%29 = OpAccessChain %28 %22 %30 %27
%31 = OpAtomicIAdd %5 %29 %10 %30 %32
OpReturnValue %31
OpFunctionEnd
%65 = OpFunction %5 None %62
%63 = OpFunctionParameter %6
%64 = OpFunctionParameter %5
%66 = OpLabel
%67 = OpCompositeExtract %5 %63 0
%68 = OpCompositeExtract %5 %63 1
%69 = OpShiftRightLogical %5 %67 %73
%70 = OpBitwiseAnd %5 %68 %74
%71 = OpCompositeConstruct %6 %69 %70
%72 = OpCompositeConstruct %6 %64 %64
%77 = OpVectorShuffle %6 %71 %71 1 0
%78 = OpShiftRightLogical %6 %71 %76
%79 = OpBitwiseXor %6 %78 %77
%80 = OpIMul %6 %79 %72
%81 = OpVectorShuffle %6 %80 %80 1 0
%82 = OpShiftRightLogical %6 %80 %76
%83 = OpBitwiseXor %6 %82 %81
%84 = OpIMul %6 %83 %72
%85 = OpVectorShuffle %6 %84 %84 1 0
%86 = OpShiftRightLogical %6 %84 %76
%87 = OpBitwiseXor %6 %86 %85
%88 = OpIMul %6 %87 %72
%89 = OpVectorShuffle %6 %88 %88 1 0
%90 = OpShiftRightLogical %6 %88 %76
%91 = OpBitwiseXor %6 %90 %89
%92 = OpIMul %6 %91 %72
%93 = OpVectorShuffle %6 %92 %92 1 0
%94 = OpShiftRightLogical %6 %92 %76
%95 = OpBitwiseXor %6 %94 %93
%96 = OpIMul %6 %95 %72
%97 = OpVectorShuffle %6 %96 %96 1 0
%98 = OpShiftRightLogical %6 %96 %76
%99 = OpBitwiseXor %6 %98 %97
%100 = OpIMul %6 %99 %72
%101 = OpCompositeExtract %5 %100 0
OpReturnValue %101
OpFunctionEnd
%120 = OpFunction %104 None %113
%114 = OpFunctionParameter %6
%115 = OpFunctionParameter %5
%116 = OpFunctionParameter %5
%117 = OpFunctionParameter %5
%118 = OpFunctionParameter %5
%119 = OpFunctionParameter %104
%121 = OpLabel
OpSelectionMerge %123 None
OpBranchConditional %119 %123 %122
%122 = OpLabel
OpReturnValue %124
%123 = OpLabel
%126 = OpCompositeExtract %5 %114 0
%127 = OpCompositeExtract %5 %114 1
%129 = OpIAddCarry %128 %126 %115
%130 = OpCompositeExtract %5 %129 0
%131 = OpCompositeExtract %5 %129 1
%132 = OpIAdd %5 %127 %131
%133 = OpCompositeConstruct %6 %130 %132
%134 = OpCompositeExtract %5 %133 0
%135 = OpBitFieldUExtract %5 %139 %30 %116
%136 = OpBitwiseAnd %5 %134 %140
%137 = OpShiftLeftLogical %5 %135 %136
%138 = OpBitwiseAnd %5 %137 %74
%141 = OpBitwiseAnd %5 %134 %48
%142 = OpIAdd %5 %141 %116
%143 = OpIAdd %5 %142 %48
%144 = OpShiftRightLogical %5 %143 %37
%145 = OpBitFieldUExtract %5 %134 %37 %37
%146 = OpBitFieldUExtract %5 %139 %30 %144
%147 = OpShiftLeftLogical %5 %146 %145
%148 = OpBitwiseAnd %5 %147 %140
%149 = OpArrayLength %5 %108 0
%150 = OpExtInst %5 %152 FindUMsb %149
%151 = OpBitFieldUExtract %5 %139 %30 %150
%153 = OpArrayLength %5 %108 0
%154 = OpExtInst %5 %152 FindUMsb %153
%155 = OpBitFieldUExtract %5 %153 %30 %154
%156 = OpISub %5 %155 %10
%157 = OpFunctionCall %5 %65 %133 %158
%159 = OpBitwiseAnd %5 %157 %151
%160 = OpIAdd %5 %159 %156
%161 = OpFunctionCall %5 %65 %133 %162
%163 = OpBitwiseAnd %5 %161 %151
%164 = OpIAdd %5 %163 %156
%165 = OpFunctionCall %5 %65 %133 %166
%167 = OpBitwiseAnd %5 %165 %151
%168 = OpIAdd %5 %167 %156
%169 = OpFunctionCall %5 %65 %133 %32
%170 = OpBitwiseAnd %5 %169 %151
%171 = OpIAdd %5 %170 %156
%172 = OpFunctionCall %5 %65 %133 %173
%174 = OpBitwiseAnd %5 %172 %151
%175 = OpIAdd %5 %174 %156
%176 = OpFunctionCall %5 %65 %133 %177
%178 = OpBitwiseAnd %5 %176 %151
%179 = OpIAdd %5 %178 %156
%180 = OpFunctionCall %5 %65 %133 %181
%182 = OpBitwiseAnd %5 %180 %151
%183 = OpIAdd %5 %182 %156
%184 = OpFunctionCall %5 %65 %133 %185
%186 = OpBitwiseAnd %5 %184 %151
%187 = OpIAdd %5 %186 %156
%188 = OpFunctionCall %5 %65 %133 %189
%190 = OpBitwiseAnd %5 %188 %151
%191 = OpIAdd %5 %190 %156
%192 = OpFunctionCall %5 %65 %133 %193
%194 = OpBitwiseAnd %5 %192 %151
%195 = OpIAdd %5 %194 %156
%196 = OpFunctionCall %5 %65 %133 %197
%198 = OpBitwiseAnd %5 %196 %151
%199 = OpIAdd %5 %198 %156
%200 = OpFunctionCall %5 %65 %133 %201
%202 = OpBitwiseAnd %5 %200 %151
%203 = OpIAdd %5 %202 %156
%204 = OpFunctionCall %5 %65 %133 %205
%206 = OpBitwiseAnd %5 %204 %151
%207 = OpIAdd %5 %206 %156
%208 = OpFunctionCall %5 %65 %133 %209
%210 = OpBitwiseAnd %5 %208 %151
%211 = OpIAdd %5 %210 %156
%212 = OpFunctionCall %5 %65 %133 %213
%214 = OpBitwiseAnd %5 %212 %151
%215 = OpIAdd %5 %214 %156
%216 = OpFunctionCall %5 %65 %133 %217
%218 = OpBitwiseAnd %5 %216 %151
%219 = OpIAdd %5 %218 %156
%225 = OpVectorExtractDynamic %103 %224 %117
%226 = OpShiftLeftLogical %5 %138 %228
%227 = OpShiftLeftLogical %5 %148 %73
%229 = OpBitwiseOr %5 %138 %226
%230 = OpBitwiseOr %5 %148 %227
%231 = OpCompositeConstruct %6 %229 %230
%232 = OpBitcast %103 %231
%233 = OpBitwiseAnd %103 %225 %232
%235 = OpInBoundsAccessChain %234 %108 %30 %160
%236 = OpAtomicOr %103 %235 %10 %30 %233
%237 = OpInBoundsAccessChain %234 %108 %30 %164
%238 = OpAtomicOr %103 %237 %10 %30 %233
%239 = OpBitwiseAnd %103 %236 %238
%240 = OpInBoundsAccessChain %234 %108 %30 %168
%241 = OpAtomicOr %103 %240 %10 %30 %233
%242 = OpBitwiseAnd %103 %239 %241
%243 = OpInBoundsAccessChain %234 %108 %30 %171
%244 = OpAtomicOr %103 %243 %10 %30 %233
%245 = OpBitwiseAnd %103 %242 %244
%246 = OpInBoundsAccessChain %234 %108 %30 %175
%247 = OpAtomicOr %103 %246 %10 %30 %233
%248 = OpBitwiseAnd %103 %245 %247
%249 = OpInBoundsAccessChain %234 %108 %30 %179
%250 = OpAtomicOr %103 %249 %10 %30 %233
%251 = OpBitwiseAnd %103 %248 %250
%252 = OpInBoundsAccessChain %234 %108 %30 %183
%253 = OpAtomicOr %103 %252 %10 %30 %233
%254 = OpBitwiseAnd %103 %251 %253
%255 = OpInBoundsAccessChain %234 %108 %30 %187
%256 = OpAtomicOr %103 %255 %10 %30 %233
%257 = OpBitwiseAnd %103 %254 %256
%258 = OpInBoundsAccessChain %234 %108 %30 %191
%259 = OpAtomicOr %103 %258 %10 %30 %233
%260 = OpBitwiseAnd %103 %257 %259
%261 = OpInBoundsAccessChain %234 %108 %30 %195
%262 = OpAtomicOr %103 %261 %10 %30 %233
%263 = OpBitwiseAnd %103 %260 %262
%264 = OpInBoundsAccessChain %234 %108 %30 %199
%265 = OpAtomicOr %103 %264 %10 %30 %233
%266 = OpBitwiseAnd %103 %263 %265
%267 = OpInBoundsAccessChain %234 %108 %30 %203
%268 = OpAtomicOr %103 %267 %10 %30 %233
%269 = OpBitwiseAnd %103 %266 %268
%270 = OpInBoundsAccessChain %234 %108 %30 %207
%271 = OpAtomicOr %103 %270 %10 %30 %233
%272 = OpBitwiseAnd %103 %269 %271
%273 = OpInBoundsAccessChain %234 %108 %30 %211
%274 = OpAtomicOr %103 %273 %10 %30 %233
%275 = OpBitwiseAnd %103 %272 %274
%276 = OpInBoundsAccessChain %234 %108 %30 %215
%277 = OpAtomicOr %103 %276 %10 %30 %233
%278 = OpBitwiseAnd %103 %275 %277
%279 = OpInBoundsAccessChain %234 %108 %30 %219
%280 = OpAtomicOr %103 %279 %10 %30 %233
%281 = OpBitwiseAnd %103 %278 %280
%282 = OpBitcast %6 %281
%283 = OpCompositeExtract %5 %282 0
%284 = OpCompositeExtract %5 %282 1
%285 = OpShiftRightLogical %5 %283 %228
%286 = OpBitwiseAnd %5 %285 %138
%287 = OpIEqual %104 %286 %30
%288 = OpBitFieldUExtract %5 %118 %290 %291
%292 = OpShiftLeftLogical %5 %293 %288
%294 = OpBitFieldUExtract %5 %118 %291 %291
%295 = OpShiftLeftLogical %5 %296 %294
%297 = OpBitwiseOr %5 %292 %295
%298 = OpBitFieldUExtract %5 %118 %299 %291
%300 = OpShiftLeftLogical %5 %301 %298
%302 = OpBitwiseOr %5 %297 %300
%303 = OpSelect %5 %287 %302 %30
%304 = OpInBoundsAccessChain %28 %112 %30 %160 %10
%305 = OpAtomicOr %5 %304 %10 %30 %303
%306 = OpBitwiseAnd %5 %305 %302
%307 = OpIEqual %104 %306 %302
%308 = OpBitFieldUExtract %5 %118 %309 %291
%310 = OpShiftLeftLogical %5 %293 %308
%311 = OpBitFieldUExtract %5 %118 %312 %291
%313 = OpShiftLeftLogical %5 %296 %311
%314 = OpBitwiseOr %5 %310 %313
%315 = OpBitFieldUExtract %5 %118 %316 %291
%317 = OpShiftLeftLogical %5 %301 %315
%318 = OpBitwiseOr %5 %314 %317
%319 = OpSelect %5 %287 %318 %30
%320 = OpInBoundsAccessChain %28 %112 %30 %164 %10
%321 = OpAtomicOr %5 %320 %10 %30 %319
%322 = OpBitwiseAnd %5 %321 %318
%323 = OpIEqual %104 %322 %318
%324 = OpLogicalAnd %104 %307 %323
%325 = OpBitFieldUExtract %5 %118 %326 %291
%327 = OpShiftLeftLogical %5 %293 %325
%328 = OpBitFieldUExtract %5 %118 %329 %291
%330 = OpShiftLeftLogical %5 %296 %328
%331 = OpBitwiseOr %5 %327 %330
%332 = OpBitFieldUExtract %5 %118 %333 %291
%334 = OpShiftLeftLogical %5 %301 %332
%335 = OpBitwiseOr %5 %331 %334
%336 = OpSelect %5 %287 %335 %30
%337 = OpInBoundsAccessChain %28 %112 %30 %168 %10
%338 = OpAtomicOr %5 %337 %10 %30 %336
%339 = OpBitwiseAnd %5 %338 %335
%340 = OpIEqual %104 %339 %335
%341 = OpLogicalAnd %104 %324 %340
%342 = OpBitFieldUExtract %5 %118 %343 %291
%344 = OpShiftLeftLogical %5 %293 %342
%345 = OpBitFieldUExtract %5 %118 %346 %291
%347 = OpShiftLeftLogical %5 %296 %345
%348 = OpBitwiseOr %5 %344 %347
%349 = OpBitFieldUExtract %5 %118 %350 %291
%351 = OpShiftLeftLogical %5 %301 %349
%352 = OpBitwiseOr %5 %348 %351
%353 = OpSelect %5 %287 %352 %30
%354 = OpInBoundsAccessChain %28 %112 %30 %171 %10
%355 = OpAtomicOr %5 %354 %10 %30 %353
%356 = OpBitwiseAnd %5 %355 %352
%357 = OpIEqual %104 %356 %352
%358 = OpLogicalAnd %104 %341 %357
%359 = OpBitFieldUExtract %5 %118 %360 %291
%361 = OpShiftLeftLogical %5 %293 %359
%362 = OpBitFieldUExtract %5 %118 %363 %291
%364 = OpShiftLeftLogical %5 %296 %362
%365 = OpBitwiseOr %5 %361 %364
%366 = OpBitFieldUExtract %5 %118 %367 %291
%368 = OpShiftLeftLogical %5 %301 %366
%369 = OpBitwiseOr %5 %365 %368
%370 = OpSelect %5 %287 %369 %30
%371 = OpInBoundsAccessChain %28 %112 %30 %175 %10
%372 = OpAtomicOr %5 %371 %10 %30 %370
%373 = OpBitwiseAnd %5 %372 %369
%374 = OpIEqual %104 %373 %369
%375 = OpLogicalAnd %104 %358 %374
%376 = OpBitFieldUExtract %5 %118 %309 %291
%377 = OpShiftLeftLogical %5 %293 %376
%378 = OpBitFieldUExtract %5 %118 %312 %291
%379 = OpShiftLeftLogical %5 %296 %378
%380 = OpBitwiseOr %5 %377 %379
%381 = OpBitFieldUExtract %5 %118 %316 %291
%382 = OpShiftLeftLogical %5 %301 %381
%383 = OpBitwiseOr %5 %380 %382
%384 = OpSelect %5 %287 %383 %30
%385 = OpInBoundsAccessChain %28 %112 %30 %179 %10
%386 = OpAtomicOr %5 %385 %10 %30 %384
%387 = OpBitwiseAnd %5 %386 %383
%388 = OpIEqual %104 %387 %383
%389 = OpLogicalAnd %104 %375 %388
%390 = OpBitFieldUExtract %5 %118 %391 %291
%392 = OpShiftLeftLogical %5 %293 %390
%393 = OpBitFieldUExtract %5 %118 %394 %291
%395 = OpShiftLeftLogical %5 %296 %393
%396 = OpBitwiseOr %5 %392 %395
%397 = OpBitFieldUExtract %5 %118 %343 %291
%398 = OpShiftLeftLogical %5 %301 %397
%399 = OpBitwiseOr %5 %396 %398
%400 = OpSelect %5 %287 %399 %30
%401 = OpInBoundsAccessChain %28 %112 %30 %183 %10
%402 = OpAtomicOr %5 %401 %10 %30 %400
%403 = OpBitwiseAnd %5 %402 %399
%404 = OpIEqual %104 %403 %399
%405 = OpLogicalAnd %104 %389 %404
%406 = OpBitFieldUExtract %5 %118 %407 %291
%408 = OpShiftLeftLogical %5 %293 %406
%409 = OpBitFieldUExtract %5 %118 %410 %291
%411 = OpShiftLeftLogical %5 %296 %409
%412 = OpBitwiseOr %5 %408 %411
%413 = OpBitFieldUExtract %5 %118 %414 %291
%415 = OpShiftLeftLogical %5 %301 %413
%416 = OpBitwiseOr %5 %412 %415
%417 = OpSelect %5 %287 %416 %30
%418 = OpInBoundsAccessChain %28 %112 %30 %187 %10
%419 = OpAtomicOr %5 %418 %10 %30 %417
%420 = OpBitwiseAnd %5 %419 %416
%421 = OpIEqual %104 %420 %416
%422 = OpLogicalAnd %104 %405 %421
OpSelectionMerge %427 None
OpSwitch %117 %426 0 %423 1 %424 2 %425
%426 = OpLabel
%435 = OpShiftLeftLogical %5 %148 %73
%436 = OpBitwiseAnd %5 %284 %435
%437 = OpINotEqual %104 %436 %30
OpBranch %427
%423 = OpLabel
%428 = OpBitwiseAnd %5 %283 %138
%429 = OpINotEqual %104 %428 %30
OpBranch %427
%424 = OpLabel
%430 = OpShiftLeftLogical %5 %138 %228
%431 = OpBitwiseAnd %5 %283 %430
%432 = OpINotEqual %104 %431 %30
OpBranch %427
%425 = OpLabel
%433 = OpBitwiseAnd %5 %284 %148
%434 = OpINotEqual %104 %433 %30
OpBranch %427
%427 = OpLabel
%438 = OpPhi %104 %429 %423 %432 %424 %434 %425 %437 %426
%439 = OpLogicalNot %104 %438
%440 = OpLogicalOr %104 %439 %422
OpReturnValue %440
OpFunctionEnd
%484 = OpFunction %1 None %481
%482 = OpFunctionParameter %104
%483 = OpFunctionParameter %5
%485 = OpLabel
%488 = OpLogicalNot %104 %482
%489 = OpLoad %104 %471
%490 = OpLogicalAnd %104 %488 %489
OpSelectionMerge %486 None
OpBranchConditional %490 %487 %486
%487 = OpLabel
%493 = OpIMul %5 %483 %495
%494 = OpBitwiseXor %5 %493 %496
%497 = OpArrayLength %5 %476 0
%498 = OpISub %5 %497 %37
%499 = OpAccessChain %28 %476 %30 %498
%500 = OpLoad %5 %499
%501 = OpIMul %5 %500 %502
%503 = OpBitwiseXor %5 %494 %501
%504 = OpArrayLength %5 %480 0
%505 = OpISub %5 %504 %10
%506 = OpBitwiseAnd %5 %503 %505
%507 = OpShiftRightLogical %5 %506 %73
%508 = OpBitwiseAnd %5 %506 %140
%509 = OpShiftLeftLogical %5 %10 %508
%510 = OpAccessChain %28 %476 %30 %507
%511 = OpAtomicOr %5 %510 %10 %30 %509
%512 = OpBitwiseAnd %5 %511 %509
%513 = OpIEqual %104 %512 %30
OpStore %471 %514
OpSelectionMerge %492 None
OpBranchConditional %513 %491 %492
%491 = OpLabel
%515 = OpCompositeConstruct %472 %496 %30 %483 %500
%516 = OpShiftLeftLogical %5 %509 %228
%518 = OpAccessChain %517 %480 %30 %506
OpStore %518 %515
OpMemoryBarrier %10 %520
%519 = OpAtomicOr %5 %510 %10 %30 %516
OpMemoryBarrier %10 %520
OpBranch %492
%492 = OpLabel
OpBranch %486
%486 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/structured.bindless.bda-instrumentation.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

uint _465;

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 1, binding = 0) uniform usamplerBuffer _22[];
layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _26[];

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _39 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _39;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _87 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _88 = uvec2(prime);
    uvec2 _95 = ((_87 >> uvec2(8u)) ^ _87.yx) * _88;
    uvec2 _99 = ((_95 >> uvec2(8u)) ^ _95.yx) * _88;
    uvec2 _103 = ((_99 >> uvec2(8u)) ^ _99.yx) * _88;
    uvec2 _107 = ((_103 >> uvec2(8u)) ^ _103.yx) * _88;
    uvec2 _111 = ((_107 >> uvec2(8u)) ^ _107.yx) * _88;
    return (((_111 >> uvec2(8u)) ^ _111.yx) * _88).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _144;
    _144._m0 = uaddCarry(BDA.x, offset, _144._m1);
    uvec2 addr = uvec2(_144._m0, BDA.y + _144._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _522 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _524 = InstrumentationControlData.atomics[_522];
        uint _530 = (((inst * 97u) ^ 43981u) ^ (_524 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _531 = _530 >> 4u;
        uint _533 = 1u << (_530 & 15u);
        uint _535 = atomicOr(InstrumentationControlData.atomics[_531], _533);
        ShouldReportInstrumentation = false;
        if ((_535 & _533) == 0u)
        {
            InstrumentationData.data[_530] = uvec4(43981u, 0u, inst, _524);
            memoryBarrierBuffer();
            uint _543 = atomicOr(InstrumentationControlData.atomics[_531], _533 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _42 = AllocateInvocationID();
    InvocationID = _42;
    uvec2 _51 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _57 = uint(imageSize(_26[registers._m4])) * 4u;
    uvec2 _63 = DescriptorHeapRobustness.descriptors[registers._m1]._m0[0u];
    uint _77 = (gl_GlobalInvocationID.x * 12u) + 0u;
    uint _459 = InvocationID;
    bool _458 = ValidateBDALoadStore(_63, _77, 12u, 0u, _459, _77 < (uint(textureSize(_22[registers._m1])) * 4u));
    AssumeTrue(_458, 1u);
    uint _461 = gl_GlobalInvocationID.x * 3u;
    vec3 _472 = uintBitsToFloat(uvec3(texelFetch(_22[registers._m1], int(_461)).x, _465, texelFetch(_22[registers._m1], int(_461 + 2u)).x));
    uint _479 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _481 = InvocationID;
    bool _480 = ValidateBDALoadStore(_51, _479, 4u, 0u, _481, _479 < _57);
    AssumeTrue(_480, 2u);
    uvec4 _483 = imageLoad(_26[registers._m4], int(gl_GlobalInvocationID.x));
    uint _488 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _490 = InvocationID;
    bool _489 = ValidateBDALoadStore(_51, _488, 4u, 1u, _490, _488 < _57);
    AssumeTrue(_489, 3u);
    imageStore(_26[registers._m4], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(((_472.x + 40.0) + _472.z) + uintBitsToFloat(_483.x))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 550
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%169 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %71
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %28 "BloomBufferInvocationSSBO"
OpMemberName %28 0 "atomics"
OpName %30 "BloomBufferInvocation"
OpName %32 "AllocateInvocationID"
OpName %81 "AddrHash"
OpName %79 "addr"
OpName %80 "prime"
OpName %121 "BloomBufferSSBO"
OpMemberName %121 0 "atomics"
OpName %123 "BloomBuffer"
OpName %125 "BloomBuffer32SSBO"
OpMemberName %125 0 "atomics"
OpName %127 "BloomBuffer32"
OpName %135 "ValidateBDALoadStore"
OpName %129 "BDA"
OpName %130 "offset"
OpName %131 "len"
OpName %132 "type"
OpName %133 "invocation_id"
OpName %134 "in_bounds"
OpName %143 "IAddCarryResult"
OpName %148 "addr"
OpName %149 "addr_lo"
OpName %153 "byte_mask"
OpName %163 "word_mask"
OpName %168 "hash_mask"
OpName %173 "hash_offset"
OpName %177 "bloom_index"
OpName %181 "bloom_index"
OpName %185 "bloom_index"
OpName %188 "bloom_index"
OpName %192 "bloom_index"
OpName %196 "bloom_index"
OpName %200 "bloom_index"
OpName %204 "bloom_index"
OpName %208 "bloom_index"
OpName %212 "bloom_index"
OpName %216 "bloom_index"
OpName %220 "bloom_index"
OpName %224 "bloom_index"
OpName %228 "bloom_index"
OpName %232 "bloom_index"
OpName %236 "bloom_index"
OpName %249 "invalidation_mask"
OpName %252 "prev_hazard_partial"
OpName %254 "prev_hazard_partial"
OpName %257 "prev_hazard_partial"
OpName %260 "prev_hazard_partial"
OpName %263 "prev_hazard_partial"
OpName %266 "prev_hazard_partial"
OpName %269 "prev_hazard_partial"
OpName %272 "prev_hazard_partial"
OpName %275 "prev_hazard_partial"
OpName %278 "prev_hazard_partial"
OpName %281 "prev_hazard_partial"
OpName %284 "prev_hazard_partial"
OpName %287 "prev_hazard_partial"
OpName %290 "prev_hazard_partial"
OpName %293 "prev_hazard_partial"
OpName %296 "prev_hazard_partial"
OpName %298 "prev_hazard"
OpName %299 "prev_hazard_lo"
OpName %300 "prev_hazard_hi"
OpName %303 "has_exclusive_access"
OpName %318 "lock_mask"
OpName %321 "prev_lock"
OpName %334 "lock_mask"
OpName %337 "prev_lock"
OpName %351 "lock_mask"
OpName %354 "prev_lock"
OpName %368 "lock_mask"
OpName %371 "prev_lock"
OpName %385 "lock_mask"
OpName %388 "prev_lock"
OpName %399 "lock_mask"
OpName %402 "prev_lock"
OpName %415 "lock_mask"
OpName %418 "prev_lock"
OpName %432 "lock_mask"
OpName %435 "prev_lock"
OpName %438 "has_complete_self_lock"
OpName %454 "hazard"
OpName %496 "ShouldReportInstrumentation"
OpName %498 "InstrumentationControlDataSSBO"
OpMemberName %498 0 "atomics"
OpName %500 "InstrumentationControlData"
OpName %502 "InstrumentationDataSSBO"
OpMemberName %502 0 "data"
OpName %504 "InstrumentationData"
OpName %508 "AssumeTrue"
OpName %506 "value"
OpName %507 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %22 DescriptorSet 1
OpDecorate %22 Binding 0
OpDecorate %26 DescriptorSet 4
OpDecorate %26 Binding 0
OpDecorate %27 ArrayStride 4
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 2
OpDecorate %71 BuiltIn GlobalInvocationId
OpDecorate %120 ArrayStride 8
OpMemberDecorate %121 0 Offset 0
OpDecorate %121 Block
OpDecorate %123 DescriptorSet 0
OpDecorate %123 Binding 2
OpDecorate %124 ArrayStride 8
OpMemberDecorate %125 0 Offset 0
OpDecorate %125 Block
OpDecorate %127 DescriptorSet 0
OpDecorate %127 Binding 2
OpDecorate %497 ArrayStride 4
OpMemberDecorate %498 0 Offset 0
OpDecorate %498 Block
OpDecorate %500 DescriptorSet 0
OpDecorate %500 Binding 2
OpDecorate %501 ArrayStride 16
OpMemberDecorate %502 0 Offset 0
OpDecorate %502 Block
OpDecorate %504 DescriptorSet 0
OpDecorate %504 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeRuntimeArray %5
%28 = OpTypeStruct %27
%29 = OpTypePointer StorageBuffer %28
%30 = OpVariable %29 StorageBuffer
%31 = OpTypeFunction %5
%36 = OpTypePointer StorageBuffer %5
%38 = OpConstant %5 0
%40 = OpConstant %5 1103633207
%43 = OpTypePointer UniformConstant %23
%45 = OpTypePointer PushConstant %5
%47 = OpConstant %5 4
%49 = OpTypePointer StorageBuffer %9
%54 = OpConstant %5 16
%58 = OpTypePointer UniformConstant %19
%69 = OpTypeVector %5 3
%70 = OpTypePointer Input %69
%71 = OpVariable %70 Input
%72 = OpTypePointer Input %5
%75 = OpConstant %5 12
%78 = OpTypeFunction %5 %9 %5
%89 = OpConstant %5 65535
%90 = OpConstant %5 8
%91 = OpConstantComposite %9 %90 %90
%118 = OpTypeInt 64 0
%119 = OpTypeBool
%120 = OpTypeRuntimeArray %118
%121 = OpTypeStruct %120
%122 = OpTypePointer StorageBuffer %121
%123 = OpVariable %122 StorageBuffer
%124 = OpTypeRuntimeArray %9
%125 = OpTypeStruct %124
%126 = OpTypePointer StorageBuffer %125
%127 = OpVariable %126 StorageBuffer
%128 = OpTypeFunction %119 %9 %5 %5 %5 %5 %119
%139 = OpConstantTrue %119
%143 = OpTypeStruct %5 %5
%154 = OpConstant %5 4294967295
%155 = OpConstant %5 15
%164 = OpConstant %5 3
%165 = OpConstant %5 2
%175 = OpConstant %5 1103515245
%179 = OpConstant %5 1103518333
%183 = OpConstant %5 1103539331
%190 = OpConstant %5 10006121
%194 = OpConstant %5 4004951
%198 = OpConstant %5 5005159
%202 = OpConstant %5 6004811
%206 = OpConstant %5 383
%210 = OpConstant %5 821
%214 = OpConstant %5 661
%218 = OpConstant %5 1091
%222 = OpConstant %5 1117
%226 = OpConstant %5 3947
%230 = OpConstant %5 4253
%234 = OpConstant %5 7691
%237 = OpTypeVector %118 4
%238 = OpConstant %118 68719411200
%239 = OpConstant %118 1099511627775
%240 = OpConstant %118 1035087118335
%241 = OpConstantComposite %237 %238 %239 %240 %238
%250 = OpTypePointer StorageBuffer %118
%305 = OpTypeInt 32 1
%306 = OpConstant %305 0
%307 = OpConstant %305 3
%309 = OpConstant %5 256
%312 = OpConstant %5 65536
%315 = OpConstant %305 6
%317 = OpConstant %5 16777216
%325 = OpConstant %305 9
%328 = OpConstant %305 12
%332 = OpConstant %305 15
%342 = OpConstant %305 18
%345 = OpConstant %305 21
%349 = OpConstant %305 24
%359 = OpConstant %305 23
%362 = OpConstant %305 26
%366 = OpConstant %305 29
%376 = OpConstant %305 1
%379 = OpConstant %305 4
%383 = OpConstant %305 7
%407 = OpConstant %305 17
%410 = OpConstant %305 20
%423 = OpConstant %305 22
%426 = OpConstant %305 25
%430 = OpConstant %305 28
%462 = OpTypeVector %5 4
%470 = OpTypeFloat 32
%471 = OpTypeVector %470 3
%476 = OpConstant %470 40
%495 = OpTypePointer Private %119
%496 = OpVariable %495 Private %139
%497 = OpTypeRuntimeArray %5
%498 = OpTypeStruct %497
%499 = OpTypePointer StorageBuffer %498
%500 = OpVariable %499 StorageBuffer
%501 = OpTypeRuntimeArray %462
%502 = OpTypeStruct %501
%503 = OpTypePointer StorageBuffer %502
%504 = OpVariable %503 StorageBuffer
%505 = OpTypeFunction %1 %119 %5
%519 = OpConstant %5 97
%520 = OpConstant %5 43981
%526 = OpConstant %5 51329
%538 = OpConstantFalse %119
%541 = OpTypePointer StorageBuffer %462
%544 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
%465 = OpUndef %5
OpBranch %494
%494 = OpLabel
%42 = OpFunctionCall %5 %32
OpStore %18 %42
%46 = OpAccessChain %45 %8 %47
%48 = OpLoad %5 %46
%50 = OpAccessChain %49 %16 %38 %48 %38 %38
%51 = OpLoad %9 %50
%52 = OpCompositeExtract %5 %51 1
%53 = OpShiftRightLogical %5 %52 %54
%44 = OpAccessChain %43 %26 %48
%55 = OpLoad %23 %44
%56 = OpImageQuerySize %5 %55
%57 = OpIMul %5 %56 %47
%60 = OpAccessChain %45 %8 %10
%61 = OpLoad %5 %60
%62 = OpAccessChain %49 %16 %38 %61 %38 %38
%63 = OpLoad %9 %62
%64 = OpCompositeExtract %5 %63 1
%65 = OpShiftRightLogical %5 %64 %54
%59 = OpAccessChain %58 %22 %61
%66 = OpLoad %19 %59
%67 = OpImageQuerySize %5 %66
%68 = OpIMul %5 %67 %47
%73 = OpAccessChain %72 %71 %38
%74 = OpLoad %5 %73
%76 = OpIMul %5 %74 %75
%77 = OpIAdd %5 %76 %38
%459 = OpLoad %5 %18
%460 = OpULessThan %119 %77 %68
%458 = OpFunctionCall %119 %135 %63 %77 %75 %38 %459 %460
%546 = OpFunctionCall %1 %508 %458 %10
%461 = OpIMul %5 %74 %164
%463 = OpImageFetch %462 %66 %461
%464 = OpCompositeExtract %5 %463 0
%467 = OpIAdd %5 %461 %165
%466 = OpImageFetch %462 %66 %467
%468 = OpCompositeExtract %5 %466 0
%469 = OpCompositeConstruct %69 %464 %465 %468
%472 = OpBitcast %471 %469
%473 = OpCompositeExtract %470 %472 0
%474 = OpCompositeExtract %470 %472 2
%475 = OpFAdd %470 %473 %476
%477 = OpFAdd %470 %475 %474
%478 = OpIMul %5 %74 %47
%479 = OpIAdd %5 %478 %38
%481 = OpLoad %5 %18
%482 = OpULessThan %119 %479 %57
%480 = OpFunctionCall %119 %135 %51 %479 %47 %38 %481 %482
%547 = OpFunctionCall %1 %508 %480 %165
%483 = OpImageRead %462 %55 %74
%484 = OpCompositeExtract %5 %483 0
%485 = OpBitcast %470 %484
%486 = OpFAdd %470 %477 %485
%487 = OpIMul %5 %74 %47
%488 = OpIAdd %5 %487 %38
%490 = OpLoad %5 %18
%491 = OpULessThan %119 %488 %57
%489 = OpFunctionCall %119 %135 %51 %488 %47 %10 %490 %491
%548 = OpFunctionCall %1 %508 %489 %164
%492 = OpBitcast %5 %486
%493 = OpCompositeConstruct %462 %492 %492 %492 %492
OpImageWrite %55 %74 %493
OpReturn
OpFunctionEnd
%32 = OpFunction %5 None %31
%33 = OpLabel
%34 = OpArrayLength %5 %30 0
%35 = OpISub %5 %34 %10
%37 = OpAccessChain %36 %30 %38 %35
%39 = OpAtomicIAdd %5 %37 %10 %38 %40
OpReturnValue %39
OpFunctionEnd
%81 = OpFunction %5 None %78
%79 = OpFunctionParameter %9
%80 = OpFunctionParameter %5
%82 = OpLabel
%83 = OpCompositeExtract %5 %79 0
%84 = OpCompositeExtract %5 %79 1
%85 = OpShiftRightLogical %5 %83 %47
%86 = OpBitwiseAnd %5 %84 %89
%87 = OpCompositeConstruct %9 %85 %86
%88 = OpCompositeConstruct %9 %80 %80
%92 = OpVectorShuffle %9 %87 %87 1 0
%93 = OpShiftRightLogical %9 %87 %91
%94 = OpBitwiseXor %9 %93 %92
%95 = OpIMul %9 %94 %88
%96 = OpVectorShuffle %9 %95 %95 1 0
%97 = OpShiftRightLogical %9 %95 %91
%98 = OpBitwiseXor %9 %97 %96
%99 = OpIMul %9 %98 %88
%100 = OpVectorShuffle %9 %99 %99 1 0
%101 = OpShiftRightLogical %9 %99 %91
%102 = OpBitwiseXor %9 %101 %100
%103 = OpIMul %9 %102 %88
%104 = OpVectorShuffle %9 %103 %103 1 0
%105 = OpShiftRightLogical %9 %103 %91
%106 = OpBitwiseXor %9 %105 %104
%107 = OpIMul %9 %106 %88
%108 = OpVectorShuffle %9 %107 %107 1 0
%109 = OpShiftRightLogical %9 %107 %91
%110 = OpBitwiseXor %9 %109 %108
%111 = OpIMul %9 %110 %88
%112 = OpVectorShuffle %9 %111 %111 1 0
%113 = OpShiftRightLogical %9 %111 %91
%114 = OpBitwiseXor %9 %113 %112
%115 = OpIMul %9 %114 %88
%116 = OpCompositeExtract %5 %115 0
OpReturnValue %116
OpFunctionEnd
%135 = OpFunction %119 None %128
%129 = OpFunctionParameter %9
%130 = OpFunctionParameter %5
%131 = OpFunctionParameter %5
%132 = OpFunctionParameter %5
%133 = OpFunctionParameter %5
%134 = OpFunctionParameter %119
%136 = OpLabel
OpSelectionMerge %138 None
OpBranchConditional %134 %138 %137
%137 = OpLabel
OpReturnValue %139
%138 = OpLabel
%141 = OpCompositeExtract %5 %129 0
%142 = OpCompositeExtract %5 %129 1
%144 = OpIAddCarry %143 %141 %130
%145 = OpCompositeExtract %5 %144 0
%146 = OpCompositeExtract %5 %144 1
%147 = OpIAdd %5 %142 %146
%148 = OpCompositeConstruct %9 %145 %147
%149 = OpCompositeExtract %5 %148 0
%150 = OpBitFieldUExtract %5 %154 %38 %131
%151 = OpBitwiseAnd %5 %149 %155
%152 = OpShiftLeftLogical %5 %150 %151
%153 = OpBitwiseAnd %5 %152 %89
%156 = OpBitwiseAnd %5 %149 %164
%157 = OpIAdd %5 %156 %131
%158 = OpIAdd %5 %157 %164
%159 = OpShiftRightLogical %5 %158 %165
%160 = OpBitFieldUExtract %5 %149 %165 %165
%161 = OpBitFieldUExtract %5 %154 %38 %159
%162 = OpShiftLeftLogical %5 %161 %160
%163 = OpBitwiseAnd %5 %162 %155
%166 = OpArrayLength %5 %123 0
%167 = OpExtInst %5 %169 FindUMsb %166
%168 = OpBitFieldUExtract %5 %154 %38 %167
%170 = OpArrayLength %5 %123 0
%171 = OpExtInst %5 %169 FindUMsb %170
%172 = OpBitFieldUExtract %5 %170 %38 %171
%173 = OpISub %5 %172 %10
%174 = OpFunctionCall %5 %81 %148 %175
%176 = OpBitwiseAnd %5 %174 %168
%177 = OpIAdd %5 %176 %173
%178 = OpFunctionCall %5 %81 %148 %179
%180 = OpBitwiseAnd %5 %178 %168
%181 = OpIAdd %5 %180 %173
%182 = OpFunctionCall %5 %81 %148 %183
%184 = OpBitwiseAnd %5 %182 %168
%185 = OpIAdd %5 %184 %173
%186 = OpFunctionCall %5 %81 %148 %40
%187 = OpBitwiseAnd %5 %186 %168
%188 = OpIAdd %5 %187 %173
%189 = OpFunctionCall %5 %81 %148 %190
%191 = OpBitwiseAnd %5 %189 %168
%192 = OpIAdd %5 %191 %173
%193 = OpFunctionCall %5 %81 %148 %194
%195 = OpBitwiseAnd %5 %193 %168
%196 = OpIAdd %5 %195 %173
%197 = OpFunctionCall %5 %81 %148 %198
%199 = OpBitwiseAnd %5 %197 %168
%200 = OpIAdd %5 %199 %173
%201 = OpFunctionCall %5 %81 %148 %202
%203 = OpBitwiseAnd %5 %201 %168
%204 = OpIAdd %5 %203 %173
%205 = OpFunctionCall %5 %81 %148 %206
%207 = OpBitwiseAnd %5 %205 %168
%208 = OpIAdd %5 %207 %173
%209 = OpFunctionCall %5 %81 %148 %210
%211 = OpBitwiseAnd %5 %209 %168
%212 = OpIAdd %5 %211 %173
%213 = OpFunctionCall %5 %81 %148 %214
%215 = OpBitwiseAnd %5 %213 %168
%216 = OpIAdd %5 %215 %173
%217 = OpFunctionCall %5 %81 %148 %218
%219 = OpBitwiseAnd %5 %217 %168
%220 = OpIAdd %5 %219 %173
%221 = OpFunctionCall %5 %81 %148 %222
%223 = OpBitwiseAnd %5 %221 %168
%224 = OpIAdd %5 %223 %173
%225 = OpFunctionCall %5 %81 %148 %226
%227 = OpBitwiseAnd %5 %225 %168
%228 = OpIAdd %5 %227 %173
%229 = OpFunctionCall %5 %81 %148 %230
%231 = OpBitwiseAnd %5 %229 %168
%232 = OpIAdd %5 %231 %173
%233 = OpFunctionCall %5 %81 %148 %234
%235 = OpBitwiseAnd %5 %233 %168
%236 = OpIAdd %5 %235 %173
%242 = OpVectorExtractDynamic %118 %241 %132
%243 = OpShiftLeftLogical %5 %153 %54
%244 = OpShiftLeftLogical %5 %163 %47
%245 = OpBitwiseOr %5 %153 %243
%246 = OpBitwiseOr %5 %163 %244
%247 = OpCompositeConstruct %9 %245 %246
%248 = OpBitcast %118 %247
%249 = OpBitwiseAnd %118 %242 %248
%251 = OpInBoundsAccessChain %250 %123 %38 %177
%252 = OpAtomicOr %118 %251 %10 %38 %249
%253 = OpInBoundsAccessChain %250 %123 %38 %181
%254 = OpAtomicOr %118 %253 %10 %38 %249
%255 = OpBitwiseAnd %118 %252 %254
%256 = OpInBoundsAccessChain %250 %123 %38 %185
%257 = OpAtomicOr %118 %256 %10 %38 %249
%258 = OpBitwiseAnd %118 %255 %257
%259 = OpInBoundsAccessChain %250 %123 %38 %188
%260 = OpAtomicOr %118 %259 %10 %38 %249
%261 = OpBitwiseAnd %118 %258 %260
%262 = OpInBoundsAccessChain %250 %123 %38 %192
%263 = OpAtomicOr %118 %262 %10 %38 %249
%264 = OpBitwiseAnd %118 %261 %263
%265 = OpInBoundsAccessChain %250 %123 %38 %196
%266 = OpAtomicOr %118 %265 %10 %38 %249
%267 = OpBitwiseAnd %118 %264 %266
%268 = OpInBoundsAccessChain %250 %123 %38 %200
%269 = OpAtomicOr %118 %268 %10 %38 %249
%270 = OpBitwiseAnd %118 %267 %269
%271 = OpInBoundsAccessChain %250 %123 %38 %204
%272 = OpAtomicOr %118 %271 %10 %38 %249
%273 = OpBitwiseAnd %118 %270 %272
%274 = OpInBoundsAccessChain %250 %123 %38 %208
%275 = OpAtomicOr %118 %274 %10 %38 %249
%276 = OpBitwiseAnd %118 %273 %275
%277 = OpInBoundsAccessChain %250 %123 %38 %212
%278 = OpAtomicOr %118 %277 %10 %38 %249
%279 = OpBitwiseAnd %118 %276 %278
%280 = OpInBoundsAccessChain %250 %123 %38 %216
%281 = OpAtomicOr %118 %280 %10 %38 %249
%282 = OpBitwiseAnd %118 %279 %281
%283 = OpInBoundsAccessChain %250 %123 %38 %220
%284 = OpAtomicOr %118 %283 %10 %38 %249
%285 = OpBitwiseAnd %118 %282 %284
%286 = OpInBoundsAccessChain %250 %123 %38 %224
%287 = OpAtomicOr %118 %286 %10 %38 %249
%288 = OpBitwiseAnd %118 %285 %287
%289 = OpInBoundsAccessChain %250 %123 %38 %228
%290 = OpAtomicOr %118 %289 %10 %38 %249
%291 = OpBitwiseAnd %118 %288 %290
%292 = OpInBoundsAccessChain %250 %123 %38 %232
%293 = OpAtomicOr %118 %292 %10 %38 %249
%294 = OpBitwiseAnd %118 %291 %293
%295 = OpInBoundsAccessChain %250 %123 %38 %236
%296 = OpAtomicOr %118 %295 %10 %38 %249
%297 = OpBitwiseAnd %118 %294 %296
%298 = OpBitcast %9 %297
%299 = OpCompositeExtract %5 %298 0
%300 = OpCompositeExtract %5 %298 1
%301 = OpShiftRightLogical %5 %299 %54
%302 = OpBitwiseAnd %5 %301 %153
%303 = OpIEqual %119 %302 %38
%304 = OpBitFieldUExtract %5 %133 %306 %307
%308 = OpShiftLeftLogical %5 %309 %304
%310 = OpBitFieldUExtract %5 %133 %307 %307
%311 = OpShiftLeftLogical %5 %312 %310
%313 = OpBitwiseOr %5 %308 %311
%314 = OpBitFieldUExtract %5 %133 %315 %307
%316 = OpShiftLeftLogical %5 %317 %314
%318 = OpBitwiseOr %5 %313 %316
%319 = OpSelect %5 %303 %318 %38
%320 = OpInBoundsAccessChain %36 %127 %38 %177 %10
%321 = OpAtomicOr %5 %320 %10 %38 %319
%322 = OpBitwiseAnd %5 %321 %318
%323 = OpIEqual %119 %322 %318
%324 = OpBitFieldUExtract %5 %133 %325 %307
%326 = OpShiftLeftLogical %5 %309 %324
%327 = OpBitFieldUExtract %5 %133 %328 %307
%329 = OpShiftLeftLogical %5 %312 %327
%330 = OpBitwiseOr %5 %326 %329
%331 = OpBitFieldUExtract %5 %133 %332 %307
%333 = OpShiftLeftLogical %5 %317 %331
%334 = OpBitwiseOr %5 %330 %333
%335 = OpSelect %5 %303 %334 %38
%336 = OpInBoundsAccessChain %36 %127 %38 %181 %10
%337 = OpAtomicOr %5 %336 %10 %38 %335
%338 = OpBitwiseAnd %5 %337 %334
%339 = OpIEqual %119 %338 %334
%340 = OpLogicalAnd %119 %323 %339
%341 = OpBitFieldUExtract %5 %133 %342 %307
%343 = OpShiftLeftLogical %5 %309 %341
%344 = OpBitFieldUExtract %5 %133 %345 %307
%346 = OpShiftLeftLogical %5 %312 %344
%347 = OpBitwiseOr %5 %343 %346
%348 = OpBitFieldUExtract %5 %133 %349 %307
%350 = OpShiftLeftLogical %5 %317 %348
%351 = OpBitwiseOr %5 %347 %350
%352 = OpSelect %5 %303 %351 %38
%353 = OpInBoundsAccessChain %36 %127 %38 %185 %10
%354 = OpAtomicOr %5 %353 %10 %38 %352
%355 = OpBitwiseAnd %5 %354 %351
%356 = OpIEqual %119 %355 %351
%357 = OpLogicalAnd %119 %340 %356
%358 = OpBitFieldUExtract %5 %133 %359 %307
%360 = OpShiftLeftLogical %5 %309 %358
%361 = OpBitFieldUExtract %5 %133 %362 %307
%363 = OpShiftLeftLogical %5 %312 %361
%364 = OpBitwiseOr %5 %360 %363
%365 = OpBitFieldUExtract %5 %133 %366 %307
%367 = OpShiftLeftLogical %5 %317 %365
%368 = OpBitwiseOr %5 %364 %367
%369 = OpSelect %5 %303 %368 %38
%370 = OpInBoundsAccessChain %36 %127 %38 %188 %10
%371 = OpAtomicOr %5 %370 %10 %38 %369
%372 = OpBitwiseAnd %5 %371 %368
%373 = OpIEqual %119 %372 %368
%374 = OpLogicalAnd %119 %357 %373
%375 = OpBitFieldUExtract %5 %133 %376 %307
%377 = OpShiftLeftLogical %5 %309 %375
%378 = OpBitFieldUExtract %5 %133 %379 %307
%380 = OpShiftLeftLogical %5 %312 %378
%381 = OpBitwiseOr %5 %377 %380
%382 = OpBitFieldUExtract %5 %133 %383 %307
%384 = OpShiftLeftLogical %5 %317 %382
%385 = OpBitwiseOr %5 %381 %384
%386 = OpSelect %5 %303 %385 %38
%387 = OpInBoundsAccessChain %36 %127 %38 %192 %10
%388 = OpAtomicOr %5 %387 %10 %38 %386
%389 = OpBitwiseAnd %5 %388 %385
%390 = OpIEqual %119 %389 %385
%391 = OpLogicalAnd %119 %374 %390
%392 = OpBitFieldUExtract %5 %133 %325 %307
%393 = OpShiftLeftLogical %5 %309 %392
%394 = OpBitFieldUExtract %5 %133 %328 %307
%395 = OpShiftLeftLogical %5 %312 %394
%396 = OpBitwiseOr %5 %393 %395
%397 = OpBitFieldUExtract %5 %133 %332 %307
%398 = OpShiftLeftLogical %5 %317 %397
%399 = OpBitwiseOr %5 %396 %398
%400 = OpSelect %5 %303 %399 %38
%401 = OpInBoundsAccessChain %36 %127 %38 %196 %10
%402 = OpAtomicOr %5 %401 %10 %38 %400
%403 = OpBitwiseAnd %5 %402 %399
%404 = OpIEqual %119 %403 %399
%405 = OpLogicalAnd %119 %391 %404
%406 = OpBitFieldUExtract %5 %133 %407 %307
%408 = OpShiftLeftLogical %5 %309 %406
%409 = OpBitFieldUExtract %5 %133 %410 %307
%411 = OpShiftLeftLogical %5 %312 %409
%412 = OpBitwiseOr %5 %408 %411
%413 = OpBitFieldUExtract %5 %133 %359 %307
%414 = OpShiftLeftLogical %5 %317 %413
%415 = OpBitwiseOr %5 %412 %414
%416 = OpSelect %5 %303 %415 %38
%417 = OpInBoundsAccessChain %36 %127 %38 %200 %10
%418 = OpAtomicOr %5 %417 %10 %38 %416
%419 = OpBitwiseAnd %5 %418 %415
%420 = OpIEqual %119 %419 %415
%421 = OpLogicalAnd %119 %405 %420
%422 = OpBitFieldUExtract %5 %133 %423 %307
%424 = OpShiftLeftLogical %5 %309 %422
%425 = OpBitFieldUExtract %5 %133 %426 %307
%427 = OpShiftLeftLogical %5 %312 %425
%428 = OpBitwiseOr %5 %424 %427
%429 = OpBitFieldUExtract %5 %133 %430 %307
%431 = OpShiftLeftLogical %5 %317 %429
%432 = OpBitwiseOr %5 %428 %431
%433 = OpSelect %5 %303 %432 %38
%434 = OpInBoundsAccessChain %36 %127 %38 %204 %10
%435 = OpAtomicOr %5 %434 %10 %38 %433
%436 = OpBitwiseAnd %5 %435 %432
%437 = OpIEqual %119 %436 %432
%438 = OpLogicalAnd %119 %421 %437
OpSelectionMerge %443 None
OpSwitch %132 %442 0 %439 1 %440 2 %441
%442 = OpLabel
%451 = OpShiftLeftLogical %5 %163 %47
%452 = OpBitwiseAnd %5 %300 %451
%453 = OpINotEqual %119 %452 %38
OpBranch %443
%439 = OpLabel
%444 = OpBitwiseAnd %5 %299 %153
%445 = OpINotEqual %119 %444 %38
OpBranch %443
%440 = OpLabel
%446 = OpShiftLeftLogical %5 %153 %54
%447 = OpBitwiseAnd %5 %299 %446
%448 = OpINotEqual %119 %447 %38
OpBranch %443
%441 = OpLabel
%449 = OpBitwiseAnd %5 %300 %163
%450 = OpINotEqual %119 %449 %38
OpBranch %443
%443 = OpLabel
%454 = OpPhi %119 %445 %439 %448 %440 %450 %441 %453 %442
%455 = OpLogicalNot %119 %454
%456 = OpLogicalOr %119 %455 %438
OpReturnValue %456
OpFunctionEnd
%508 = OpFunction %1 None %505
%506 = OpFunctionParameter %119
%507 = OpFunctionParameter %5
%509 = OpLabel
%512 = OpLogicalNot %119 %506
%513 = OpLoad %119 %496
%514 = OpLogicalAnd %119 %512 %513
OpSelectionMerge %510 None
OpBranchConditional %514 %511 %510
%511 = OpLabel
%517 = OpIMul %5 %507 %519
%518 = OpBitwiseXor %5 %517 %520
%521 = OpArrayLength %5 %500 0
%522 = OpISub %5 %521 %165
%523 = OpAccessChain %36 %500 %38 %522
%524 = OpLoad %5 %523
%525 = OpIMul %5 %524 %526
%527 = OpBitwiseXor %5 %518 %525
%528 = OpArrayLength %5 %504 0
%529 = OpISub %5 %528 %10
%530 = OpBitwiseAnd %5 %527 %529
%531 = OpShiftRightLogical %5 %530 %47
%532 = OpBitwiseAnd %5 %530 %155
%533 = OpShiftLeftLogical %5 %10 %532
%534 = OpAccessChain %36 %500 %38 %531
%535 = OpAtomicOr %5 %534 %10 %38 %533
%536 = OpBitwiseAnd %5 %535 %533
%537 = OpIEqual %119 %536 %38
OpStore %496 %538
OpSelectionMerge %516 None
OpBranchConditional %537 %515 %516
%515 = OpLabel
%539 = OpCompositeConstruct %462 %520 %38 %507 %524
%540 = OpShiftLeftLogical %5 %533 %54
%542 = OpAccessChain %541 %504 %38 %530
OpStore %542 %539
OpMemoryBarrier %10 %544
%543 = OpAtomicOr %5 %534 %10 %38 %540
OpMemoryBarrier %10 %544
OpBranch %516
%516 = OpLabel
OpBranch %510
%510 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/structured.bindless.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 1, binding = 0, scalar) restrict readonly buffer SSBO
{
    uvec3 _m0[];
} _24[];

layout(set = 4, binding = 0, std430) buffer _26_29
{
    uint _m0[];
} _29[];

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _42 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _42;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _87 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _88 = uvec2(prime);
    uvec2 _95 = ((_87 >> uvec2(8u)) ^ _87.yx) * _88;
    uvec2 _99 = ((_95 >> uvec2(8u)) ^ _95.yx) * _88;
    uvec2 _103 = ((_99 >> uvec2(8u)) ^ _99.yx) * _88;
    uvec2 _107 = ((_103 >> uvec2(8u)) ^ _103.yx) * _88;
    uvec2 _111 = ((_107 >> uvec2(8u)) ^ _107.yx) * _88;
    return (((_111 >> uvec2(8u)) ^ _111.yx) * _88).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _144;
    _144._m0 = uaddCarry(BDA.x, offset, _144._m1);
    uvec2 addr = uvec2(_144._m0, BDA.y + _144._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _517 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _519 = InstrumentationControlData.atomics[_517];
        uint _525 = (((inst * 97u) ^ 43981u) ^ (_519 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _526 = _525 >> 4u;
        uint _528 = 1u << (_525 & 15u);
        uint _530 = atomicOr(InstrumentationControlData.atomics[_526], _528);
        ShouldReportInstrumentation = false;
        if ((_530 & _528) == 0u)
        {
            InstrumentationData.data[_525] = uvec4(43981u, 0u, inst, _519);
            memoryBarrierBuffer();
            uint _538 = atomicOr(InstrumentationControlData.atomics[_526], _528 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _45 = AllocateInvocationID();
    InvocationID = _45;
    uvec2 _54 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _59 = uint(_29[registers._m4]._m0.length()) * 4u;
    uvec2 _65 = DescriptorHeapRobustness.descriptors[registers._m1]._m0[0u];
    uint _77 = (gl_GlobalInvocationID.x * 12u) + 0u;
    uint _459 = InvocationID;
    bool _458 = ValidateBDALoadStore(_65, _77, 12u, 0u, _459, _77 < (uint(_24[registers._m1]._m0.length()) * 12u));
    AssumeTrue(_458, 1u);
    vec3 _466 = uintBitsToFloat(_24[registers._m1]._m0[gl_GlobalInvocationID.x]);
    uint _473 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _475 = InvocationID;
    bool _474 = ValidateBDALoadStore(_54, _473, 4u, 0u, _475, _473 < _59);
    AssumeTrue(_474, 2u);
    uint _478 = _29[registers._m4]._m0[gl_GlobalInvocationID.x];
    uint _482 = (gl_GlobalInvocationID.x * 4u) + 0u;
    uint _484 = InvocationID;
    bool _483 = ValidateBDALoadStore(_54, _482, 4u, 1u, _484, _482 < _59);
    AssumeTrue(_483, 3u);
    _29[registers._m4]._m0[gl_GlobalInvocationID.x] = floatBitsToUint(((_466.x + 40.0) + _466.z) + uintBitsToFloat(_478));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 545
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%169 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %72
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "BloomBufferInvocationSSBO"
OpMemberName %31 0 "atomics"
OpName %33 "BloomBufferInvocation"
OpName %35 "AllocateInvocationID"
OpName %81 "AddrHash"
OpName %79 "addr"
OpName %80 "prime"
OpName %121 "BloomBufferSSBO"
OpMemberName %121 0 "atomics"
OpName %123 "BloomBuffer"
OpName %125 "BloomBuffer32SSBO"
OpMemberName %125 0 "atomics"
OpName %127 "BloomBuffer32"
OpName %135 "ValidateBDALoadStore"
OpName %129 "BDA"
OpName %130 "offset"
OpName %131 "len"
OpName %132 "type"
OpName %133 "invocation_id"
OpName %134 "in_bounds"
OpName %143 "IAddCarryResult"
OpName %148 "addr"
OpName %149 "addr_lo"
OpName %153 "byte_mask"
OpName %163 "word_mask"
OpName %168 "hash_mask"
OpName %173 "hash_offset"
OpName %177 "bloom_index"
OpName %181 "bloom_index"
OpName %185 "bloom_index"
OpName %188 "bloom_index"
OpName %192 "bloom_index"
OpName %196 "bloom_index"
OpName %200 "bloom_index"
OpName %204 "bloom_index"
OpName %208 "bloom_index"
OpName %212 "bloom_index"
OpName %216 "bloom_index"
OpName %220 "bloom_index"
OpName %224 "bloom_index"
OpName %228 "bloom_index"
OpName %232 "bloom_index"
OpName %236 "bloom_index"
OpName %249 "invalidation_mask"
OpName %252 "prev_hazard_partial"
OpName %254 "prev_hazard_partial"
OpName %257 "prev_hazard_partial"
OpName %260 "prev_hazard_partial"
OpName %263 "prev_hazard_partial"
OpName %266 "prev_hazard_partial"
OpName %269 "prev_hazard_partial"
OpName %272 "prev_hazard_partial"
OpName %275 "prev_hazard_partial"
OpName %278 "prev_hazard_partial"
OpName %281 "prev_hazard_partial"
OpName %284 "prev_hazard_partial"
OpName %287 "prev_hazard_partial"
OpName %290 "prev_hazard_partial"
OpName %293 "prev_hazard_partial"
OpName %296 "prev_hazard_partial"
OpName %298 "prev_hazard"
OpName %299 "prev_hazard_lo"
OpName %300 "prev_hazard_hi"
OpName %303 "has_exclusive_access"
OpName %318 "lock_mask"
OpName %321 "prev_lock"
OpName %334 "lock_mask"
OpName %337 "prev_lock"
OpName %351 "lock_mask"
OpName %354 "prev_lock"
OpName %368 "lock_mask"
OpName %371 "prev_lock"
OpName %385 "lock_mask"
OpName %388 "prev_lock"
OpName %399 "lock_mask"
OpName %402 "prev_lock"
OpName %415 "lock_mask"
OpName %418 "prev_lock"
OpName %432 "lock_mask"
OpName %435 "prev_lock"
OpName %438 "has_complete_self_lock"
OpName %454 "hazard"
OpName %490 "ShouldReportInstrumentation"
OpName %493 "InstrumentationControlDataSSBO"
OpMemberName %493 0 "atomics"
OpName %495 "InstrumentationControlData"
OpName %497 "InstrumentationDataSSBO"
OpMemberName %497 0 "data"
OpName %499 "InstrumentationData"
OpName %503 "AssumeTrue"
OpName %501 "value"
OpName %502 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %20 ArrayStride 12
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %25 ArrayStride 4
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %30 ArrayStride 4
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 2
OpDecorate %72 BuiltIn GlobalInvocationId
OpDecorate %120 ArrayStride 8
OpMemberDecorate %121 0 Offset 0
OpDecorate %121 Block
OpDecorate %123 DescriptorSet 0
OpDecorate %123 Binding 2
OpDecorate %124 ArrayStride 8
OpMemberDecorate %125 0 Offset 0
OpDecorate %125 Block
OpDecorate %127 DescriptorSet 0
OpDecorate %127 Binding 2
OpDecorate %492 ArrayStride 4
OpMemberDecorate %493 0 Offset 0
OpDecorate %493 Block
OpDecorate %495 DescriptorSet 0
OpDecorate %495 Binding 2
OpDecorate %496 ArrayStride 16
OpMemberDecorate %497 0 Offset 0
OpDecorate %497 Block
OpDecorate %499 DescriptorSet 0
OpDecorate %499 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeVector %5 3
%20 = OpTypeRuntimeArray %19
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %5
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %5
%31 = OpTypeStruct %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeFunction %5
%39 = OpTypePointer StorageBuffer %5
%41 = OpConstant %5 0
%43 = OpConstant %5 1103633207
%46 = OpTypePointer StorageBuffer %26
%48 = OpTypePointer PushConstant %5
%50 = OpConstant %5 4
%52 = OpTypePointer StorageBuffer %9
%57 = OpConstant %5 16
%60 = OpTypePointer StorageBuffer %21
%70 = OpConstant %5 12
%71 = OpTypePointer Input %19
%72 = OpVariable %71 Input
%73 = OpTypePointer Input %5
%78 = OpTypeFunction %5 %9 %5
%89 = OpConstant %5 65535
%90 = OpConstant %5 8
%91 = OpConstantComposite %9 %90 %90
%118 = OpTypeInt 64 0
%119 = OpTypeBool
%120 = OpTypeRuntimeArray %118
%121 = OpTypeStruct %120
%122 = OpTypePointer StorageBuffer %121
%123 = OpVariable %122 StorageBuffer
%124 = OpTypeRuntimeArray %9
%125 = OpTypeStruct %124
%126 = OpTypePointer StorageBuffer %125
%127 = OpVariable %126 StorageBuffer
%128 = OpTypeFunction %119 %9 %5 %5 %5 %5 %119
%139 = OpConstantTrue %119
%143 = OpTypeStruct %5 %5
%154 = OpConstant %5 4294967295
%155 = OpConstant %5 15
%164 = OpConstant %5 3
%165 = OpConstant %5 2
%175 = OpConstant %5 1103515245
%179 = OpConstant %5 1103518333
%183 = OpConstant %5 1103539331
%190 = OpConstant %5 10006121
%194 = OpConstant %5 4004951
%198 = OpConstant %5 5005159
%202 = OpConstant %5 6004811
%206 = OpConstant %5 383
%210 = OpConstant %5 821
%214 = OpConstant %5 661
%218 = OpConstant %5 1091
%222 = OpConstant %5 1117
%226 = OpConstant %5 3947
%230 = OpConstant %5 4253
%234 = OpConstant %5 7691
%237 = OpTypeVector %118 4
%238 = OpConstant %118 68719411200
%239 = OpConstant %118 1099511627775
%240 = OpConstant %118 1035087118335
%241 = OpConstantComposite %237 %238 %239 %240 %238
%250 = OpTypePointer StorageBuffer %118
%305 = OpTypeInt 32 1
%306 = OpConstant %305 0
%307 = OpConstant %305 3
%309 = OpConstant %5 256
%312 = OpConstant %5 65536
%315 = OpConstant %305 6
%317 = OpConstant %5 16777216
%325 = OpConstant %305 9
%328 = OpConstant %305 12
%332 = OpConstant %305 15
%342 = OpConstant %305 18
%345 = OpConstant %305 21
%349 = OpConstant %305 24
%359 = OpConstant %305 23
%362 = OpConstant %305 26
%366 = OpConstant %305 29
%376 = OpConstant %305 1
%379 = OpConstant %305 4
%383 = OpConstant %305 7
%407 = OpConstant %305 17
%410 = OpConstant %305 20
%423 = OpConstant %305 22
%426 = OpConstant %305 25
%430 = OpConstant %305 28
%461 = OpTypePointer StorageBuffer %19
%464 = OpTypeFloat 32
%465 = OpTypeVector %464 3
%470 = OpConstant %464 40
%489 = OpTypePointer Private %119
%490 = OpVariable %489 Private %139
%491 = OpTypeVector %5 4
%492 = OpTypeRuntimeArray %5
%493 = OpTypeStruct %492
%494 = OpTypePointer StorageBuffer %493
%495 = OpVariable %494 StorageBuffer
%496 = OpTypeRuntimeArray %491
%497 = OpTypeStruct %496
%498 = OpTypePointer StorageBuffer %497
%499 = OpVariable %498 StorageBuffer
%500 = OpTypeFunction %1 %119 %5
%514 = OpConstant %5 97
%515 = OpConstant %5 43981
%521 = OpConstant %5 51329
%533 = OpConstantFalse %119
%536 = OpTypePointer StorageBuffer %491
%539 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %488
%488 = OpLabel
%45 = OpFunctionCall %5 %35
OpStore %18 %45
%49 = OpAccessChain %48 %8 %50
%51 = OpLoad %5 %49
%53 = OpAccessChain %52 %16 %41 %51 %41 %41
%54 = OpLoad %9 %53
%55 = OpCompositeExtract %5 %54 1
%56 = OpShiftRightLogical %5 %55 %57
%47 = OpAccessChain %46 %29 %51
%58 = OpArrayLength %5 %47 0
%59 = OpIMul %5 %58 %50
%62 = OpAccessChain %48 %8 %10
%63 = OpLoad %5 %62
%64 = OpAccessChain %52 %16 %41 %63 %41 %41
%65 = OpLoad %9 %64
%66 = OpCompositeExtract %5 %65 1
%67 = OpShiftRightLogical %5 %66 %57
%61 = OpAccessChain %60 %24 %63
%68 = OpArrayLength %5 %61 0
%69 = OpIMul %5 %68 %70
%74 = OpAccessChain %73 %72 %41
%75 = OpLoad %5 %74
%76 = OpIMul %5 %75 %70
%77 = OpIAdd %5 %76 %41
%459 = OpLoad %5 %18
%460 = OpULessThan %119 %77 %69
%458 = OpFunctionCall %119 %135 %65 %77 %70 %41 %459 %460
%541 = OpFunctionCall %1 %503 %458 %10
%462 = OpAccessChain %461 %61 %41 %75
%463 = OpLoad %19 %462
%466 = OpBitcast %465 %463
%467 = OpCompositeExtract %464 %466 0
%468 = OpCompositeExtract %464 %466 2
%469 = OpFAdd %464 %467 %470
%471 = OpFAdd %464 %469 %468
%472 = OpIMul %5 %75 %50
%473 = OpIAdd %5 %472 %41
%475 = OpLoad %5 %18
%476 = OpULessThan %119 %473 %59
%474 = OpFunctionCall %119 %135 %54 %473 %50 %41 %475 %476
%542 = OpFunctionCall %1 %503 %474 %165
%477 = OpAccessChain %39 %47 %41 %75
%478 = OpLoad %5 %477
%479 = OpBitcast %464 %478
%480 = OpFAdd %464 %471 %479
%481 = OpIMul %5 %75 %50
%482 = OpIAdd %5 %481 %41
%484 = OpLoad %5 %18
%485 = OpULessThan %119 %482 %59
%483 = OpFunctionCall %119 %135 %54 %482 %50 %10 %484 %485
%543 = OpFunctionCall %1 %503 %483 %164
%486 = OpBitcast %5 %480
%487 = OpAccessChain %39 %47 %41 %75
OpStore %487 %486
OpReturn
OpFunctionEnd
%35 = OpFunction %5 None %34
%36 = OpLabel
%37 = OpArrayLength %5 %33 0
%38 = OpISub %5 %37 %10
%40 = OpAccessChain %39 %33 %41 %38
%42 = OpAtomicIAdd %5 %40 %10 %41 %43
OpReturnValue %42
OpFunctionEnd
%81 = OpFunction %5 None %78
%79 = OpFunctionParameter %9
%80 = OpFunctionParameter %5
%82 = OpLabel
%83 = OpCompositeExtract %5 %79 0
%84 = OpCompositeExtract %5 %79 1
%85 = OpShiftRightLogical %5 %83 %50
%86 = OpBitwiseAnd %5 %84 %89
%87 = OpCompositeConstruct %9 %85 %86
%88 = OpCompositeConstruct %9 %80 %80
%92 = OpVectorShuffle %9 %87 %87 1 0
%93 = OpShiftRightLogical %9 %87 %91
%94 = OpBitwiseXor %9 %93 %92
%95 = OpIMul %9 %94 %88
%96 = OpVectorShuffle %9 %95 %95 1 0
%97 = OpShiftRightLogical %9 %95 %91
%98 = OpBitwiseXor %9 %97 %96
%99 = OpIMul %9 %98 %88
%100 = OpVectorShuffle %9 %99 %99 1 0
%101 = OpShiftRightLogical %9 %99 %91
%102 = OpBitwiseXor %9 %101 %100
%103 = OpIMul %9 %102 %88
%104 = OpVectorShuffle %9 %103 %103 1 0
%105 = OpShiftRightLogical %9 %103 %91
%106 = OpBitwiseXor %9 %105 %104
%107 = OpIMul %9 %106 %88
%108 = OpVectorShuffle %9 %107 %107 1 0
%109 = OpShiftRightLogical %9 %107 %91
%110 = OpBitwiseXor %9 %109 %108
%111 = OpIMul %9 %110 %88
%112 = OpVectorShuffle %9 %111 %111 1 0
%113 = OpShiftRightLogical %9 %111 %91
%114 = OpBitwiseXor %9 %113 %112
%115 = OpIMul %9 %114 %88
%116 = OpCompositeExtract %5 %115 0
OpReturnValue %116
OpFunctionEnd
%135 = OpFunction %119 None %128
%129 = OpFunctionParameter %9
%130 = OpFunctionParameter %5
%131 = OpFunctionParameter %5
%132 = OpFunctionParameter %5
%133 = OpFunctionParameter %5
%134 = OpFunctionParameter %119
%136 = OpLabel
OpSelectionMerge %138 None
OpBranchConditional %134 %138 %137
%137 = OpLabel
OpReturnValue %139
%138 = OpLabel
%141 = OpCompositeExtract %5 %129 0
%142 = OpCompositeExtract %5 %129 1
%144 = OpIAddCarry %143 %141 %130
%145 = OpCompositeExtract %5 %144 0
%146 = OpCompositeExtract %5 %144 1
%147 = OpIAdd %5 %142 %146
%148 = OpCompositeConstruct %9 %145 %147
%149 = OpCompositeExtract %5 %148 0
%150 = OpBitFieldUExtract %5 %154 %41 %131
%151 = OpBitwiseAnd %5 %149 %155
%152 = OpShiftLeftLogical %5 %150 %151
%153 = OpBitwiseAnd %5 %152 %89
%156 = OpBitwiseAnd %5 %149 %164
%157 = OpIAdd %5 %156 %131
%158 = OpIAdd %5 %157 %164
%159 = OpShiftRightLogical %5 %158 %165
%160 = OpBitFieldUExtract %5 %149 %165 %165
%161 = OpBitFieldUExtract %5 %154 %41 %159
%162 = OpShiftLeftLogical %5 %161 %160
%163 = OpBitwiseAnd %5 %162 %155
%166 = OpArrayLength %5 %123 0
%167 = OpExtInst %5 %169 FindUMsb %166
%168 = OpBitFieldUExtract %5 %154 %41 %167
%170 = OpArrayLength %5 %123 0
%171 = OpExtInst %5 %169 FindUMsb %170
%172 = OpBitFieldUExtract %5 %170 %41 %171
%173 = OpISub %5 %172 %10
%174 = OpFunctionCall %5 %81 %148 %175
%176 = OpBitwiseAnd %5 %174 %168
%177 = OpIAdd %5 %176 %173
%178 = OpFunctionCall %5 %81 %148 %179
%180 = OpBitwiseAnd %5 %178 %168
%181 = OpIAdd %5 %180 %173
%182 = OpFunctionCall %5 %81 %148 %183
%184 = OpBitwiseAnd %5 %182 %168
%185 = OpIAdd %5 %184 %173
%186 = OpFunctionCall %5 %81 %148 %43
%187 = OpBitwiseAnd %5 %186 %168
%188 = OpIAdd %5 %187 %173
%189 = OpFunctionCall %5 %81 %148 %190
%191 = OpBitwiseAnd %5 %189 %168
%192 = OpIAdd %5 %191 %173
%193 = OpFunctionCall %5 %81 %148 %194
%195 = OpBitwiseAnd %5 %193 %168
%196 = OpIAdd %5 %195 %173
%197 = OpFunctionCall %5 %81 %148 %198
%199 = OpBitwiseAnd %5 %197 %168
%200 = OpIAdd %5 %199 %173
%201 = OpFunctionCall %5 %81 %148 %202
%203 = OpBitwiseAnd %5 %201 %168
%204 = OpIAdd %5 %203 %173
%205 = OpFunctionCall %5 %81 %148 %206
%207 = OpBitwiseAnd %5 %205 %168
%208 = OpIAdd %5 %207 %173
%209 = OpFunctionCall %5 %81 %148 %210
%211 = OpBitwiseAnd %5 %209 %168
%212 = OpIAdd %5 %211 %173
%213 = OpFunctionCall %5 %81 %148 %214
%215 = OpBitwiseAnd %5 %213 %168
%216 = OpIAdd %5 %215 %173
%217 = OpFunctionCall %5 %81 %148 %218
%219 = OpBitwiseAnd %5 %217 %168
%220 = OpIAdd %5 %219 %173
%221 = OpFunctionCall %5 %81 %148 %222
%223 = OpBitwiseAnd %5 %221 %168
%224 = OpIAdd %5 %223 %173
%225 = OpFunctionCall %5 %81 %148 %226
%227 = OpBitwiseAnd %5 %225 %168
%228 = OpIAdd %5 %227 %173
%229 = OpFunctionCall %5 %81 %148 %230
%231 = OpBitwiseAnd %5 %229 %168
%232 = OpIAdd %5 %231 %173
%233 = OpFunctionCall %5 %81 %148 %234
%235 = OpBitwiseAnd %5 %233 %168
%236 = OpIAdd %5 %235 %173
%242 = OpVectorExtractDynamic %118 %241 %132
%243 = OpShiftLeftLogical %5 %153 %57
%244 = OpShiftLeftLogical %5 %163 %50
%245 = OpBitwiseOr %5 %153 %243
%246 = OpBitwiseOr %5 %163 %244
%247 = OpCompositeConstruct %9 %245 %246
%248 = OpBitcast %118 %247
%249 = OpBitwiseAnd %118 %242 %248
%251 = OpInBoundsAccessChain %250 %123 %41 %177
%252 = OpAtomicOr %118 %251 %10 %41 %249
%253 = OpInBoundsAccessChain %250 %123 %41 %181
%254 = OpAtomicOr %118 %253 %10 %41 %249
%255 = OpBitwiseAnd %118 %252 %254
%256 = OpInBoundsAccessChain %250 %123 %41 %185
%257 = OpAtomicOr %118 %256 %10 %41 %249
%258 = OpBitwiseAnd %118 %255 %257
%259 = OpInBoundsAccessChain %250 %123 %41 %188
%260 = OpAtomicOr %118 %259 %10 %41 %249
%261 = OpBitwiseAnd %118 %258 %260
%262 = OpInBoundsAccessChain %250 %123 %41 %192
%263 = OpAtomicOr %118 %262 %10 %41 %249
%264 = OpBitwiseAnd %118 %261 %263
%265 = OpInBoundsAccessChain %250 %123 %41 %196
%266 = OpAtomicOr %118 %265 %10 %41 %249
%267 = OpBitwiseAnd %118 %264 %266
%268 = OpInBoundsAccessChain %250 %123 %41 %200
%269 = OpAtomicOr %118 %268 %10 %41 %249
%270 = OpBitwiseAnd %118 %267 %269
%271 = OpInBoundsAccessChain %250 %123 %41 %204
%272 = OpAtomicOr %118 %271 %10 %41 %249
%273 = OpBitwiseAnd %118 %270 %272
%274 = OpInBoundsAccessChain %250 %123 %41 %208
%275 = OpAtomicOr %118 %274 %10 %41 %249
%276 = OpBitwiseAnd %118 %273 %275
%277 = OpInBoundsAccessChain %250 %123 %41 %212
%278 = OpAtomicOr %118 %277 %10 %41 %249
%279 = OpBitwiseAnd %118 %276 %278
%280 = OpInBoundsAccessChain %250 %123 %41 %216
%281 = OpAtomicOr %118 %280 %10 %41 %249
%282 = OpBitwiseAnd %118 %279 %281
%283 = OpInBoundsAccessChain %250 %123 %41 %220
%284 = OpAtomicOr %118 %283 %10 %41 %249
%285 = OpBitwiseAnd %118 %282 %284
%286 = OpInBoundsAccessChain %250 %123 %41 %224
%287 = OpAtomicOr %118 %286 %10 %41 %249
%288 = OpBitwiseAnd %118 %285 %287
%289 = OpInBoundsAccessChain %250 %123 %41 %228
%290 = OpAtomicOr %118 %289 %10 %41 %249
%291 = OpBitwiseAnd %118 %288 %290
%292 = OpInBoundsAccessChain %250 %123 %41 %232
%293 = OpAtomicOr %118 %292 %10 %41 %249
%294 = OpBitwiseAnd %118 %291 %293
%295 = OpInBoundsAccessChain %250 %123 %41 %236
%296 = OpAtomicOr %118 %295 %10 %41 %249
%297 = OpBitwiseAnd %118 %294 %296
%298 = OpBitcast %9 %297
%299 = OpCompositeExtract %5 %298 0
%300 = OpCompositeExtract %5 %298 1
%301 = OpShiftRightLogical %5 %299 %57
%302 = OpBitwiseAnd %5 %301 %153
%303 = OpIEqual %119 %302 %41
%304 = OpBitFieldUExtract %5 %133 %306 %307
%308 = OpShiftLeftLogical %5 %309 %304
%310 = OpBitFieldUExtract %5 %133 %307 %307
%311 = OpShiftLeftLogical %5 %312 %310
%313 = OpBitwiseOr %5 %308 %311
%314 = OpBitFieldUExtract %5 %133 %315 %307
%316 = OpShiftLeftLogical %5 %317 %314
%318 = OpBitwiseOr %5 %313 %316
%319 = OpSelect %5 %303 %318 %41
%320 = OpInBoundsAccessChain %39 %127 %41 %177 %10
%321 = OpAtomicOr %5 %320 %10 %41 %319
%322 = OpBitwiseAnd %5 %321 %318
%323 = OpIEqual %119 %322 %318
%324 = OpBitFieldUExtract %5 %133 %325 %307
%326 = OpShiftLeftLogical %5 %309 %324
%327 = OpBitFieldUExtract %5 %133 %328 %307
%329 = OpShiftLeftLogical %5 %312 %327
%330 = OpBitwiseOr %5 %326 %329
%331 = OpBitFieldUExtract %5 %133 %332 %307
%333 = OpShiftLeftLogical %5 %317 %331
%334 = OpBitwiseOr %5 %330 %333
%335 = OpSelect %5 %303 %334 %41
%336 = OpInBoundsAccessChain %39 %127 %41 %181 %10
%337 = OpAtomicOr %5 %336 %10 %41 %335
%338 = OpBitwiseAnd %5 %337 %334
%339 = OpIEqual %119 %338 %334
%340 = OpLogicalAnd %119 %323 %339
%341 = OpBitFieldUExtract %5 %133 %342 %307
%343 = OpShiftLeftLogical %5 %309 %341
%344 = OpBitFieldUExtract %5 %133 %345 %307
%346 = OpShiftLeftLogical %5 %312 %344
%347 = OpBitwiseOr %5 %343 %346
%348 = OpBitFieldUExtract %5 %133 %349 %307
%350 = OpShiftLeftLogical %5 %317 %348
%351 = OpBitwiseOr %5 %347 %350
%352 = OpSelect %5 %303 %351 %41
%353 = OpInBoundsAccessChain %39 %127 %41 %185 %10
%354 = OpAtomicOr %5 %353 %10 %41 %352
%355 = OpBitwiseAnd %5 %354 %351
%356 = OpIEqual %119 %355 %351
%357 = OpLogicalAnd %119 %340 %356
%358 = OpBitFieldUExtract %5 %133 %359 %307
%360 = OpShiftLeftLogical %5 %309 %358
%361 = OpBitFieldUExtract %5 %133 %362 %307
%363 = OpShiftLeftLogical %5 %312 %361
%364 = OpBitwiseOr %5 %360 %363
%365 = OpBitFieldUExtract %5 %133 %366 %307
%367 = OpShiftLeftLogical %5 %317 %365
%368 = OpBitwiseOr %5 %364 %367
%369 = OpSelect %5 %303 %368 %41
%370 = OpInBoundsAccessChain %39 %127 %41 %188 %10
%371 = OpAtomicOr %5 %370 %10 %41 %369
%372 = OpBitwiseAnd %5 %371 %368
%373 = OpIEqual %119 %372 %368
%374 = OpLogicalAnd %119 %357 %373
%375 = OpBitFieldUExtract %5 %133 %376 %307
%377 = OpShiftLeftLogical %5 %309 %375
%378 = OpBitFieldUExtract %5 %133 %379 %307
%380 = OpShiftLeftLogical %5 %312 %378
%381 = OpBitwiseOr %5 %377 %380
%382 = OpBitFieldUExtract %5 %133 %383 %307
%384 = OpShiftLeftLogical %5 %317 %382
%385 = OpBitwiseOr %5 %381 %384
%386 = OpSelect %5 %303 %385 %41
%387 = OpInBoundsAccessChain %39 %127 %41 %192 %10
%388 = OpAtomicOr %5 %387 %10 %41 %386
%389 = OpBitwiseAnd %5 %388 %385
%390 = OpIEqual %119 %389 %385
%391 = OpLogicalAnd %119 %374 %390
%392 = OpBitFieldUExtract %5 %133 %325 %307
%393 = OpShiftLeftLogical %5 %309 %392
%394 = OpBitFieldUExtract %5 %133 %328 %307
%395 = OpShiftLeftLogical %5 %312 %394
%396 = OpBitwiseOr %5 %393 %395
%397 = OpBitFieldUExtract %5 %133 %332 %307
%398 = OpShiftLeftLogical %5 %317 %397
%399 = OpBitwiseOr %5 %396 %398
%400 = OpSelect %5 %303 %399 %41
%401 = OpInBoundsAccessChain %39 %127 %41 %196 %10
%402 = OpAtomicOr %5 %401 %10 %41 %400
%403 = OpBitwiseAnd %5 %402 %399
%404 = OpIEqual %119 %403 %399
%405 = OpLogicalAnd %119 %391 %404
%406 = OpBitFieldUExtract %5 %133 %407 %307
%408 = OpShiftLeftLogical %5 %309 %406
%409 = OpBitFieldUExtract %5 %133 %410 %307
%411 = OpShiftLeftLogical %5 %312 %409
%412 = OpBitwiseOr %5 %408 %411
%413 = OpBitFieldUExtract %5 %133 %359 %307
%414 = OpShiftLeftLogical %5 %317 %413
%415 = OpBitwiseOr %5 %412 %414
%416 = OpSelect %5 %303 %415 %41
%417 = OpInBoundsAccessChain %39 %127 %41 %200 %10
%418 = OpAtomicOr %5 %417 %10 %41 %416
%419 = OpBitwiseAnd %5 %418 %415
%420 = OpIEqual %119 %419 %415
%421 = OpLogicalAnd %119 %405 %420
%422 = OpBitFieldUExtract %5 %133 %423 %307
%424 = OpShiftLeftLogical %5 %309 %422
%425 = OpBitFieldUExtract %5 %133 %426 %307
%427 = OpShiftLeftLogical %5 %312 %425
%428 = OpBitwiseOr %5 %424 %427
%429 = OpBitFieldUExtract %5 %133 %430 %307
%431 = OpShiftLeftLogical %5 %317 %429
%432 = OpBitwiseOr %5 %428 %431
%433 = OpSelect %5 %303 %432 %41
%434 = OpInBoundsAccessChain %39 %127 %41 %204 %10
%435 = OpAtomicOr %5 %434 %10 %41 %433
%436 = OpBitwiseAnd %5 %435 %432
%437 = OpIEqual %119 %436 %432
%438 = OpLogicalAnd %119 %421 %437
OpSelectionMerge %443 None
OpSwitch %132 %442 0 %439 1 %440 2 %441
%442 = OpLabel
%451 = OpShiftLeftLogical %5 %163 %50
%452 = OpBitwiseAnd %5 %300 %451
%453 = OpINotEqual %119 %452 %41
OpBranch %443
%439 = OpLabel
%444 = OpBitwiseAnd %5 %299 %153
%445 = OpINotEqual %119 %444 %41
OpBranch %443
%440 = OpLabel
%446 = OpShiftLeftLogical %5 %153 %57
%447 = OpBitwiseAnd %5 %299 %446
%448 = OpINotEqual %119 %447 %41
OpBranch %443
%441 = OpLabel
%449 = OpBitwiseAnd %5 %300 %163
%450 = OpINotEqual %119 %449 %41
OpBranch %443
%443 = OpLabel
%454 = OpPhi %119 %445 %439 %448 %440 %450 %441 %453 %442
%455 = OpLogicalNot %119 %454
%456 = OpLogicalOr %119 %455 %438
OpReturnValue %456
OpFunctionEnd
%503 = OpFunction %1 None %500
%501 = OpFunctionParameter %119
%502 = OpFunctionParameter %5
%504 = OpLabel
%507 = OpLogicalNot %119 %501
%508 = OpLoad %119 %490
%509 = OpLogicalAnd %119 %507 %508
OpSelectionMerge %505 None
OpBranchConditional %509 %506 %505
%506 = OpLabel
%512 = OpIMul %5 %502 %514
%513 = OpBitwiseXor %5 %512 %515
%516 = OpArrayLength %5 %495 0
%517 = OpISub %5 %516 %165
%518 = OpAccessChain %39 %495 %41 %517
%519 = OpLoad %5 %518
%520 = OpIMul %5 %519 %521
%522 = OpBitwiseXor %5 %513 %520
%523 = OpArrayLength %5 %499 0
%524 = OpISub %5 %523 %10
%525 = OpBitwiseAnd %5 %522 %524
%526 = OpShiftRightLogical %5 %525 %50
%527 = OpBitwiseAnd %5 %525 %155
%528 = OpShiftLeftLogical %5 %10 %527
%529 = OpAccessChain %39 %495 %41 %526
%530 = OpAtomicOr %5 %529 %10 %41 %528
%531 = OpBitwiseAnd %5 %530 %528
%532 = OpIEqual %119 %531 %41
OpStore %490 %533
OpSelectionMerge %511 None
OpBranchConditional %532 %510 %511
%510 = OpLabel
%534 = OpCompositeConstruct %491 %515 %41 %502 %519
%535 = OpShiftLeftLogical %5 %528 %57
%537 = OpAccessChain %536 %499 %41 %525
OpStore %537 %534
OpMemoryBarrier %10 %539
%538 = OpAtomicOr %5 %529 %10 %41 %535
OpMemoryBarrier %10 %539
OpBranch %511
%511 = OpLabel
OpBranch %505
%505 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/structured.root-descriptor.bda-instrumentation.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat3NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloatArray;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

float _465;

layout(buffer_reference, buffer_reference_align = 4, scalar) readonly buffer PhysicalPointerFloat3NonWriteArray
{
    vec3 value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray
{
    float value[];
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _31 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _31;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _64 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _65 = uvec2(prime);
    uvec2 _73 = ((_64 >> uvec2(8u)) ^ _64.yx) * _65;
    uvec2 _77 = ((_73 >> uvec2(8u)) ^ _73.yx) * _65;
    uvec2 _81 = ((_77 >> uvec2(8u)) ^ _77.yx) * _65;
    uvec2 _85 = ((_81 >> uvec2(8u)) ^ _81.yx) * _65;
    uvec2 _89 = ((_85 >> uvec2(8u)) ^ _85.yx) * _65;
    return (((_89 >> uvec2(8u)) ^ _89.yx) * _65).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _122;
    _122._m0 = uaddCarry(BDA.x, offset, _122._m1);
    uvec2 addr = uvec2(_122._m0, BDA.y + _122._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _495 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _497 = InstrumentationControlData.atomics[_495];
        uint _503 = (((inst * 97u) ^ 43981u) ^ (_497 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _504 = _503 >> 4u;
        uint _506 = 1u << (_503 & 15u);
        uint _508 = atomicOr(InstrumentationControlData.atomics[_504], _506);
        ShouldReportInstrumentation = false;
        if ((_508 & _506) == 0u)
        {
            InstrumentationData.data[_503] = uvec4(43981u, 0u, inst, _497);
            memoryBarrierBuffer();
            uint _516 = atomicOr(InstrumentationControlData.atomics[_504], _506 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _34 = AllocateInvocationID();
    InvocationID = _34;
    uint _437 = InvocationID;
    bool _436 = ValidateBDALoadStore(registers._m1, (gl_GlobalInvocationID.x * 12u) + 0u, 12u, 0u, _437, true);
    AssumeTrue(_436, 1u);
    PhysicalPointerFloat3NonWriteArray _438 = PhysicalPointerFloat3NonWriteArray(registers._m1);
    uint _453 = InvocationID;
    bool _452 = ValidateBDALoadStore(registers._m2, (gl_GlobalInvocationID.x * 4u) + 0u, 4u, 0u, _453, true);
    AssumeTrue(_452, 2u);
    uint _462 = InvocationID;
    bool _461 = ValidateBDALoadStore(registers._m2, (gl_GlobalInvocationID.x * 4u) + 0u, 4u, 1u, _462, true);
    AssumeTrue(_461, 3u);
    PhysicalPointerFloatArray(registers._m2).value[gl_GlobalInvocationID.x] = ((_438.value[gl_GlobalInvocationID.x].x + 40.0) + _438.value[gl_GlobalInvocationID.x].z) + PhysicalPointerFloatArray(registers._m2).value[gl_GlobalInvocationID.x];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 523
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SignedZeroInfNanPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%146 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %43
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %20 "BloomBufferInvocationSSBO"
OpMemberName %20 0 "atomics"
OpName %22 "BloomBufferInvocation"
OpName %24 "AllocateInvocationID"
OpName %50 "PhysicalPointerFloat3NonWriteArray"
OpMemberName %50 0 "value"
OpName %58 "AddrHash"
OpName %56 "addr"
OpName %57 "prime"
OpName %99 "BloomBufferSSBO"
OpMemberName %99 0 "atomics"
OpName %101 "BloomBuffer"
OpName %103 "BloomBuffer32SSBO"
OpMemberName %103 0 "atomics"
OpName %105 "BloomBuffer32"
OpName %113 "ValidateBDALoadStore"
OpName %107 "BDA"
OpName %108 "offset"
OpName %109 "len"
OpName %110 "type"
OpName %111 "invocation_id"
OpName %112 "in_bounds"
OpName %121 "IAddCarryResult"
OpName %126 "addr"
OpName %127 "addr_lo"
OpName %131 "byte_mask"
OpName %141 "word_mask"
OpName %145 "hash_mask"
OpName %150 "hash_offset"
OpName %154 "bloom_index"
OpName %158 "bloom_index"
OpName %162 "bloom_index"
OpName %165 "bloom_index"
OpName %169 "bloom_index"
OpName %173 "bloom_index"
OpName %177 "bloom_index"
OpName %181 "bloom_index"
OpName %185 "bloom_index"
OpName %189 "bloom_index"
OpName %193 "bloom_index"
OpName %197 "bloom_index"
OpName %201 "bloom_index"
OpName %205 "bloom_index"
OpName %209 "bloom_index"
OpName %213 "bloom_index"
OpName %227 "invalidation_mask"
OpName %230 "prev_hazard_partial"
OpName %232 "prev_hazard_partial"
OpName %235 "prev_hazard_partial"
OpName %238 "prev_hazard_partial"
OpName %241 "prev_hazard_partial"
OpName %244 "prev_hazard_partial"
OpName %247 "prev_hazard_partial"
OpName %250 "prev_hazard_partial"
OpName %253 "prev_hazard_partial"
OpName %256 "prev_hazard_partial"
OpName %259 "prev_hazard_partial"
OpName %262 "prev_hazard_partial"
OpName %265 "prev_hazard_partial"
OpName %268 "prev_hazard_partial"
OpName %271 "prev_hazard_partial"
OpName %274 "prev_hazard_partial"
OpName %276 "prev_hazard"
OpName %277 "prev_hazard_lo"
OpName %278 "prev_hazard_hi"
OpName %281 "has_exclusive_access"
OpName %296 "lock_mask"
OpName %299 "prev_lock"
OpName %312 "lock_mask"
OpName %315 "prev_lock"
OpName %329 "lock_mask"
OpName %332 "prev_lock"
OpName %346 "lock_mask"
OpName %349 "prev_lock"
OpName %363 "lock_mask"
OpName %366 "prev_lock"
OpName %377 "lock_mask"
OpName %380 "prev_lock"
OpName %393 "lock_mask"
OpName %396 "prev_lock"
OpName %410 "lock_mask"
OpName %413 "prev_lock"
OpName %416 "has_complete_self_lock"
OpName %432 "hazard"
OpName %448 "PhysicalPointerFloatArray"
OpMemberName %448 0 "value"
OpName %468 "ShouldReportInstrumentation"
OpName %471 "InstrumentationControlDataSSBO"
OpMemberName %471 0 "atomics"
OpName %473 "InstrumentationControlData"
OpName %475 "InstrumentationDataSSBO"
OpMemberName %475 0 "data"
OpName %477 "InstrumentationData"
OpName %481 "AssumeTrue"
OpName %479 "value"
OpName %480 "inst"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 2
OpDecorate %43 BuiltIn GlobalInvocationId
OpDecorate %49 ArrayStride 12
OpMemberDecorate %50 0 Offset 0
OpDecorate %50 Block
OpMemberDecorate %50 0 NonWritable
OpDecorate %98 ArrayStride 8
OpMemberDecorate %99 0 Offset 0
OpDecorate %99 Block
OpDecorate %101 DescriptorSet 0
OpDecorate %101 Binding 2
OpDecorate %102 ArrayStride 8
OpMemberDecorate %103 0 Offset 0
OpDecorate %103 Block
OpDecorate %105 DescriptorSet 0
OpDecorate %105 Binding 2
OpDecorate %447 ArrayStride 4
OpMemberDecorate %448 0 Offset 0
OpDecorate %448 Block
OpDecorate %470 ArrayStride 4
OpMemberDecorate %471 0 Offset 0
OpDecorate %471 Block
OpDecorate %473 DescriptorSet 0
OpDecorate %473 Binding 2
OpDecorate %474 ArrayStride 16
OpMemberDecorate %475 0 Offset 0
OpDecorate %475 Block
OpDecorate %477 DescriptorSet 0
OpDecorate %477 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpConstant %5 1
%11 = OpTypeArray %6 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeFunction %5
%28 = OpTypePointer StorageBuffer %5
%30 = OpConstant %5 0
%32 = OpConstant %5 1103633207
%35 = OpTypePointer PushConstant %6
%37 = OpConstant %5 2
%41 = OpTypeVector %5 3
%42 = OpTypePointer Input %41
%43 = OpVariable %42 Input
%44 = OpTypePointer Input %5
%47 = OpTypeFloat 32
%48 = OpTypeVector %47 3
%49 = OpTypeRuntimeArray %48
%50 = OpTypeStruct %49
%51 = OpTypePointer PhysicalStorageBuffer %50
%52 = OpConstant %5 12
%55 = OpTypeFunction %5 %6 %5
%66 = OpConstant %5 4
%67 = OpConstant %5 65535
%68 = OpConstant %5 8
%69 = OpConstantComposite %6 %68 %68
%96 = OpTypeInt 64 0
%97 = OpTypeBool
%98 = OpTypeRuntimeArray %96
%99 = OpTypeStruct %98
%100 = OpTypePointer StorageBuffer %99
%101 = OpVariable %100 StorageBuffer
%102 = OpTypeRuntimeArray %6
%103 = OpTypeStruct %102
%104 = OpTypePointer StorageBuffer %103
%105 = OpVariable %104 StorageBuffer
%106 = OpTypeFunction %97 %6 %5 %5 %5 %5 %97
%117 = OpConstantTrue %97
%121 = OpTypeStruct %5 %5
%132 = OpConstant %5 4294967295
%133 = OpConstant %5 15
%142 = OpConstant %5 3
%152 = OpConstant %5 1103515245
%156 = OpConstant %5 1103518333
%160 = OpConstant %5 1103539331
%167 = OpConstant %5 10006121
%171 = OpConstant %5 4004951
%175 = OpConstant %5 5005159
%179 = OpConstant %5 6004811
%183 = OpConstant %5 383
%187 = OpConstant %5 821
%191 = OpConstant %5 661
%195 = OpConstant %5 1091
%199 = OpConstant %5 1117
%203 = OpConstant %5 3947
%207 = OpConstant %5 4253
%211 = OpConstant %5 7691
%214 = OpTypeVector %96 4
%215 = OpConstant %96 68719411200
%216 = OpConstant %96 1099511627775
%217 = OpConstant %96 1035087118335
%218 = OpConstantComposite %214 %215 %216 %217 %215
%222 = OpConstant %5 16
%228 = OpTypePointer StorageBuffer %96
%283 = OpTypeInt 32 1
%284 = OpConstant %283 0
%285 = OpConstant %283 3
%287 = OpConstant %5 256
%290 = OpConstant %5 65536
%293 = OpConstant %283 6
%295 = OpConstant %5 16777216
%303 = OpConstant %283 9
%306 = OpConstant %283 12
%310 = OpConstant %283 15
%320 = OpConstant %283 18
%323 = OpConstant %283 21
%327 = OpConstant %283 24
%337 = OpConstant %283 23
%340 = OpConstant %283 26
%344 = OpConstant %283 29
%354 = OpConstant %283 1
%357 = OpConstant %283 4
%361 = OpConstant %283 7
%385 = OpConstant %283 17
%388 = OpConstant %283 20
%401 = OpConstant %283 22
%404 = OpConstant %283 25
%408 = OpConstant %283 28
%439 = OpTypePointer PhysicalStorageBuffer %48
%445 = OpConstant %47 40
%447 = OpTypeRuntimeArray %47
%448 = OpTypeStruct %447
%449 = OpTypePointer PhysicalStorageBuffer %448
%455 = OpTypePointer PhysicalStorageBuffer %47
%467 = OpTypePointer Private %97
%468 = OpVariable %467 Private %117
%469 = OpTypeVector %5 4
%470 = OpTypeRuntimeArray %5
%471 = OpTypeStruct %470
%472 = OpTypePointer StorageBuffer %471
%473 = OpVariable %472 StorageBuffer
%474 = OpTypeRuntimeArray %469
%475 = OpTypeStruct %474
%476 = OpTypePointer StorageBuffer %475
%477 = OpVariable %476 StorageBuffer
%478 = OpTypeFunction %1 %97 %5
%492 = OpConstant %5 97
%493 = OpConstant %5 43981
%499 = OpConstant %5 51329
%511 = OpConstantFalse %97
%514 = OpTypePointer StorageBuffer %469
%517 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
%465 = OpUndef %47
OpBranch %466
%466 = OpLabel
%34 = OpFunctionCall %5 %24
OpStore %18 %34
%36 = OpAccessChain %35 %9 %37
%38 = OpLoad %6 %36
%39 = OpAccessChain %35 %9 %10
%40 = OpLoad %6 %39
%45 = OpAccessChain %44 %43 %30
%46 = OpLoad %5 %45
%53 = OpIMul %5 %46 %52
%54 = OpIAdd %5 %53 %30
%437 = OpLoad %5 %18
%436 = OpFunctionCall %97 %113 %40 %54 %52 %30 %437 %117
%519 = OpFunctionCall %1 %481 %436 %10
%438 = OpBitcast %51 %40
%440 = OpInBoundsAccessChain %439 %438 %30 %46
%441 = OpLoad %48 %440 Aligned 4
%442 = OpCompositeExtract %47 %441 0
%443 = OpCompositeExtract %47 %441 2
%444 = OpFAdd %47 %442 %445
%446 = OpFAdd %47 %444 %443
%450 = OpIMul %5 %46 %66
%451 = OpIAdd %5 %450 %30
%453 = OpLoad %5 %18
%452 = OpFunctionCall %97 %113 %38 %451 %66 %30 %453 %117
%520 = OpFunctionCall %1 %481 %452 %37
%454 = OpBitcast %449 %38
%456 = OpInBoundsAccessChain %455 %454 %30 %46
%457 = OpLoad %47 %456 Aligned 4
%458 = OpFAdd %47 %446 %457
%459 = OpIMul %5 %46 %66
%460 = OpIAdd %5 %459 %30
%462 = OpLoad %5 %18
%461 = OpFunctionCall %97 %113 %38 %460 %66 %10 %462 %117
%521 = OpFunctionCall %1 %481 %461 %142
%463 = OpBitcast %449 %38
%464 = OpInBoundsAccessChain %455 %463 %30 %46
OpStore %464 %458 Aligned 4
OpReturn
OpFunctionEnd
%24 = OpFunction %5 None %23
%25 = OpLabel
%26 = OpArrayLength %5 %22 0
%27 = OpISub %5 %26 %10
%29 = OpAccessChain %28 %22 %30 %27
%31 = OpAtomicIAdd %5 %29 %10 %30 %32
OpReturnValue %31
OpFunctionEnd
%58 = OpFunction %5 None %55
%56 = OpFunctionParameter %6
%57 = OpFunctionParameter %5
%59 = OpLabel
%60 = OpCompositeExtract %5 %56 0
%61 = OpCompositeExtract %5 %56 1
%62 = OpShiftRightLogical %5 %60 %66
%63 = OpBitwiseAnd %5 %61 %67
%64 = OpCompositeConstruct %6 %62 %63
%65 = OpCompositeConstruct %6 %57 %57
%70 = OpVectorShuffle %6 %64 %64 1 0
%71 = OpShiftRightLogical %6 %64 %69
%72 = OpBitwiseXor %6 %71 %70
%73 = OpIMul %6 %72 %65
%74 = OpVectorShuffle %6 %73 %73 1 0
%75 = OpShiftRightLogical %6 %73 %69
%76 = OpBitwiseXor %6 %75 %74
%77 = OpIMul %6 %76 %65
%78 = OpVectorShuffle %6 %77 %77 1 0
%79 = OpShiftRightLogical %6 %77 %69
%80 = OpBitwiseXor %6 %79 %78
%81 = OpIMul %6 %80 %65
%82 = OpVectorShuffle %6 %81 %81 1 0
%83 = OpShiftRightLogical %6 %81 %69
%84 = OpBitwiseXor %6 %83 %82
%85 = OpIMul %6 %84 %65
%86 = OpVectorShuffle %6 %85 %85 1 0
%87 = OpShiftRightLogical %6 %85 %69
%88 = OpBitwiseXor %6 %87 %86
%89 = OpIMul %6 %88 %65
%90 = OpVectorShuffle %6 %89 %89 1 0
%91 = OpShiftRightLogical %6 %89 %69
%92 = OpBitwiseXor %6 %91 %90
%93 = OpIMul %6 %92 %65
%94 = OpCompositeExtract %5 %93 0
OpReturnValue %94
OpFunctionEnd
%113 = OpFunction %97 None %106
%107 = OpFunctionParameter %6
%108 = OpFunctionParameter %5
%109 = OpFunctionParameter %5
%110 = OpFunctionParameter %5
%111 = OpFunctionParameter %5
%112 = OpFunctionParameter %97
%114 = OpLabel
OpSelectionMerge %116 None
OpBranchConditional %112 %116 %115
%115 = OpLabel
OpReturnValue %117
%116 = OpLabel
%119 = OpCompositeExtract %5 %107 0
%120 = OpCompositeExtract %5 %107 1
%122 = OpIAddCarry %121 %119 %108
%123 = OpCompositeExtract %5 %122 0
%124 = OpCompositeExtract %5 %122 1
%125 = OpIAdd %5 %120 %124
%126 = OpCompositeConstruct %6 %123 %125
%127 = OpCompositeExtract %5 %126 0
%128 = OpBitFieldUExtract %5 %132 %30 %109
%129 = OpBitwiseAnd %5 %127 %133
%130 = OpShiftLeftLogical %5 %128 %129
%131 = OpBitwiseAnd %5 %130 %67
%134 = OpBitwiseAnd %5 %127 %142
%135 = OpIAdd %5 %134 %109
%136 = OpIAdd %5 %135 %142
%137 = OpShiftRightLogical %5 %136 %37
%138 = OpBitFieldUExtract %5 %127 %37 %37
%139 = OpBitFieldUExtract %5 %132 %30 %137
%140 = OpShiftLeftLogical %5 %139 %138
%141 = OpBitwiseAnd %5 %140 %133
%143 = OpArrayLength %5 %101 0
%144 = OpExtInst %5 %146 FindUMsb %143
%145 = OpBitFieldUExtract %5 %132 %30 %144
%147 = OpArrayLength %5 %101 0
%148 = OpExtInst %5 %146 FindUMsb %147
%149 = OpBitFieldUExtract %5 %147 %30 %148
%150 = OpISub %5 %149 %10
%151 = OpFunctionCall %5 %58 %126 %152
%153 = OpBitwiseAnd %5 %151 %145
%154 = OpIAdd %5 %153 %150
%155 = OpFunctionCall %5 %58 %126 %156
%157 = OpBitwiseAnd %5 %155 %145
%158 = OpIAdd %5 %157 %150
%159 = OpFunctionCall %5 %58 %126 %160
%161 = OpBitwiseAnd %5 %159 %145
%162 = OpIAdd %5 %161 %150
%163 = OpFunctionCall %5 %58 %126 %32
%164 = OpBitwiseAnd %5 %163 %145
%165 = OpIAdd %5 %164 %150
%166 = OpFunctionCall %5 %58 %126 %167
%168 = OpBitwiseAnd %5 %166 %145
%169 = OpIAdd %5 %168 %150
%170 = OpFunctionCall %5 %58 %126 %171
%172 = OpBitwiseAnd %5 %170 %145
%173 = OpIAdd %5 %172 %150
%174 = OpFunctionCall %5 %58 %126 %175
%176 = OpBitwiseAnd %5 %174 %145
%177 = OpIAdd %5 %176 %150
%178 = OpFunctionCall %5 %58 %126 %179
%180 = OpBitwiseAnd %5 %178 %145
%181 = OpIAdd %5 %180 %150
%182 = OpFunctionCall %5 %58 %126 %183
%184 = OpBitwiseAnd %5 %182 %145
%185 = OpIAdd %5 %184 %150
%186 = OpFunctionCall %5 %58 %126 %187
%188 = OpBitwiseAnd %5 %186 %145
%189 = OpIAdd %5 %188 %150
%190 = OpFunctionCall %5 %58 %126 %191
%192 = OpBitwiseAnd %5 %190 %145
%193 = OpIAdd %5 %192 %150
%194 = OpFunctionCall %5 %58 %126 %195
%196 = OpBitwiseAnd %5 %194 %145
%197 = OpIAdd %5 %196 %150
%198 = OpFunctionCall %5 %58 %126 %199
%200 = OpBitwiseAnd %5 %198 %145
%201 = OpIAdd %5 %200 %150
%202 = OpFunctionCall %5 %58 %126 %203
%204 = OpBitwiseAnd %5 %202 %145
%205 = OpIAdd %5 %204 %150
%206 = OpFunctionCall %5 %58 %126 %207
%208 = OpBitwiseAnd %5 %206 %145
%209 = OpIAdd %5 %208 %150
%210 = OpFunctionCall %5 %58 %126 %211
%212 = OpBitwiseAnd %5 %210 %145
%213 = OpIAdd %5 %212 %150
%219 = OpVectorExtractDynamic %96 %218 %110
%220 = OpShiftLeftLogical %5 %131 %222
%221 = OpShiftLeftLogical %5 %141 %66
%223 = OpBitwiseOr %5 %131 %220
%224 = OpBitwiseOr %5 %141 %221
%225 = OpCompositeConstruct %6 %223 %224
%226 = OpBitcast %96 %225
%227 = OpBitwiseAnd %96 %219 %226
%229 = OpInBoundsAccessChain %228 %101 %30 %154
%230 = OpAtomicOr %96 %229 %10 %30 %227
%231 = OpInBoundsAccessChain %228 %101 %30 %158
%232 = OpAtomicOr %96 %231 %10 %30 %227
%233 = OpBitwiseAnd %96 %230 %232
%234 = OpInBoundsAccessChain %228 %101 %30 %162
%235 = OpAtomicOr %96 %234 %10 %30 %227
%236 = OpBitwiseAnd %96 %233 %235
%237 = OpInBoundsAccessChain %228 %101 %30 %165
%238 = OpAtomicOr %96 %237 %10 %30 %227
%239 = OpBitwiseAnd %96 %236 %238
%240 = OpInBoundsAccessChain %228 %101 %30 %169
%241 = OpAtomicOr %96 %240 %10 %30 %227
%242 = OpBitwiseAnd %96 %239 %241
%243 = OpInBoundsAccessChain %228 %101 %30 %173
%244 = OpAtomicOr %96 %243 %10 %30 %227
%245 = OpBitwiseAnd %96 %242 %244
%246 = OpInBoundsAccessChain %228 %101 %30 %177
%247 = OpAtomicOr %96 %246 %10 %30 %227
%248 = OpBitwiseAnd %96 %245 %247
%249 = OpInBoundsAccessChain %228 %101 %30 %181
%250 = OpAtomicOr %96 %249 %10 %30 %227
%251 = OpBitwiseAnd %96 %248 %250
%252 = OpInBoundsAccessChain %228 %101 %30 %185
%253 = OpAtomicOr %96 %252 %10 %30 %227
%254 = OpBitwiseAnd %96 %251 %253
%255 = OpInBoundsAccessChain %228 %101 %30 %189
%256 = OpAtomicOr %96 %255 %10 %30 %227
%257 = OpBitwiseAnd %96 %254 %256
%258 = OpInBoundsAccessChain %228 %101 %30 %193
%259 = OpAtomicOr %96 %258 %10 %30 %227
%260 = OpBitwiseAnd %96 %257 %259
%261 = OpInBoundsAccessChain %228 %101 %30 %197
%262 = OpAtomicOr %96 %261 %10 %30 %227
%263 = OpBitwiseAnd %96 %260 %262
%264 = OpInBoundsAccessChain %228 %101 %30 %201
%265 = OpAtomicOr %96 %264 %10 %30 %227
%266 = OpBitwiseAnd %96 %263 %265
%267 = OpInBoundsAccessChain %228 %101 %30 %205
%268 = OpAtomicOr %96 %267 %10 %30 %227
%269 = OpBitwiseAnd %96 %266 %268
%270 = OpInBoundsAccessChain %228 %101 %30 %209
%271 = OpAtomicOr %96 %270 %10 %30 %227
%272 = OpBitwiseAnd %96 %269 %271
%273 = OpInBoundsAccessChain %228 %101 %30 %213
%274 = OpAtomicOr %96 %273 %10 %30 %227
%275 = OpBitwiseAnd %96 %272 %274
%276 = OpBitcast %6 %275
%277 = OpCompositeExtract %5 %276 0
%278 = OpCompositeExtract %5 %276 1
%279 = OpShiftRightLogical %5 %277 %222
%280 = OpBitwiseAnd %5 %279 %131
%281 = OpIEqual %97 %280 %30
%282 = OpBitFieldUExtract %5 %111 %284 %285
%286 = OpShiftLeftLogical %5 %287 %282
%288 = OpBitFieldUExtract %5 %111 %285 %285
%289 = OpShiftLeftLogical %5 %290 %288
%291 = OpBitwiseOr %5 %286 %289
%292 = OpBitFieldUExtract %5 %111 %293 %285
%294 = OpShiftLeftLogical %5 %295 %292
%296 = OpBitwiseOr %5 %291 %294
%297 = OpSelect %5 %281 %296 %30
%298 = OpInBoundsAccessChain %28 %105 %30 %154 %10
%299 = OpAtomicOr %5 %298 %10 %30 %297
%300 = OpBitwiseAnd %5 %299 %296
%301 = OpIEqual %97 %300 %296
%302 = OpBitFieldUExtract %5 %111 %303 %285
%304 = OpShiftLeftLogical %5 %287 %302
%305 = OpBitFieldUExtract %5 %111 %306 %285
%307 = OpShiftLeftLogical %5 %290 %305
%308 = OpBitwiseOr %5 %304 %307
%309 = OpBitFieldUExtract %5 %111 %310 %285
%311 = OpShiftLeftLogical %5 %295 %309
%312 = OpBitwiseOr %5 %308 %311
%313 = OpSelect %5 %281 %312 %30
%314 = OpInBoundsAccessChain %28 %105 %30 %158 %10
%315 = OpAtomicOr %5 %314 %10 %30 %313
%316 = OpBitwiseAnd %5 %315 %312
%317 = OpIEqual %97 %316 %312
%318 = OpLogicalAnd %97 %301 %317
%319 = OpBitFieldUExtract %5 %111 %320 %285
%321 = OpShiftLeftLogical %5 %287 %319
%322 = OpBitFieldUExtract %5 %111 %323 %285
%324 = OpShiftLeftLogical %5 %290 %322
%325 = OpBitwiseOr %5 %321 %324
%326 = OpBitFieldUExtract %5 %111 %327 %285
%328 = OpShiftLeftLogical %5 %295 %326
%329 = OpBitwiseOr %5 %325 %328
%330 = OpSelect %5 %281 %329 %30
%331 = OpInBoundsAccessChain %28 %105 %30 %162 %10
%332 = OpAtomicOr %5 %331 %10 %30 %330
%333 = OpBitwiseAnd %5 %332 %329
%334 = OpIEqual %97 %333 %329
%335 = OpLogicalAnd %97 %318 %334
%336 = OpBitFieldUExtract %5 %111 %337 %285
%338 = OpShiftLeftLogical %5 %287 %336
%339 = OpBitFieldUExtract %5 %111 %340 %285
%341 = OpShiftLeftLogical %5 %290 %339
%342 = OpBitwiseOr %5 %338 %341
%343 = OpBitFieldUExtract %5 %111 %344 %285
%345 = OpShiftLeftLogical %5 %295 %343
%346 = OpBitwiseOr %5 %342 %345
%347 = OpSelect %5 %281 %346 %30
%348 = OpInBoundsAccessChain %28 %105 %30 %165 %10
%349 = OpAtomicOr %5 %348 %10 %30 %347
%350 = OpBitwiseAnd %5 %349 %346
%351 = OpIEqual %97 %350 %346
%352 = OpLogicalAnd %97 %335 %351
%353 = OpBitFieldUExtract %5 %111 %354 %285
%355 = OpShiftLeftLogical %5 %287 %353
%356 = OpBitFieldUExtract %5 %111 %357 %285
%358 = OpShiftLeftLogical %5 %290 %356
%359 = OpBitwiseOr %5 %355 %358
%360 = OpBitFieldUExtract %5 %111 %361 %285
%362 = OpShiftLeftLogical %5 %295 %360
%363 = OpBitwiseOr %5 %359 %362
%364 = OpSelect %5 %281 %363 %30
%365 = OpInBoundsAccessChain %28 %105 %30 %169 %10
%366 = OpAtomicOr %5 %365 %10 %30 %364
%367 = OpBitwiseAnd %5 %366 %363
%368 = OpIEqual %97 %367 %363
%369 = OpLogicalAnd %97 %352 %368
%370 = OpBitFieldUExtract %5 %111 %303 %285
%371 = OpShiftLeftLogical %5 %287 %370
%372 = OpBitFieldUExtract %5 %111 %306 %285
%373 = OpShiftLeftLogical %5 %290 %372
%374 = OpBitwiseOr %5 %371 %373
%375 = OpBitFieldUExtract %5 %111 %310 %285
%376 = OpShiftLeftLogical %5 %295 %375
%377 = OpBitwiseOr %5 %374 %376
%378 = OpSelect %5 %281 %377 %30
%379 = OpInBoundsAccessChain %28 %105 %30 %173 %10
%380 = OpAtomicOr %5 %379 %10 %30 %378
%381 = OpBitwiseAnd %5 %380 %377
%382 = OpIEqual %97 %381 %377
%383 = OpLogicalAnd %97 %369 %382
%384 = OpBitFieldUExtract %5 %111 %385 %285
%386 = OpShiftLeftLogical %5 %287 %384
%387 = OpBitFieldUExtract %5 %111 %388 %285
%389 = OpShiftLeftLogical %5 %290 %387
%390 = OpBitwiseOr %5 %386 %389
%391 = OpBitFieldUExtract %5 %111 %337 %285
%392 = OpShiftLeftLogical %5 %295 %391
%393 = OpBitwiseOr %5 %390 %392
%394 = OpSelect %5 %281 %393 %30
%395 = OpInBoundsAccessChain %28 %105 %30 %177 %10
%396 = OpAtomicOr %5 %395 %10 %30 %394
%397 = OpBitwiseAnd %5 %396 %393
%398 = OpIEqual %97 %397 %393
%399 = OpLogicalAnd %97 %383 %398
%400 = OpBitFieldUExtract %5 %111 %401 %285
%402 = OpShiftLeftLogical %5 %287 %400
%403 = OpBitFieldUExtract %5 %111 %404 %285
%405 = OpShiftLeftLogical %5 %290 %403
%406 = OpBitwiseOr %5 %402 %405
%407 = OpBitFieldUExtract %5 %111 %408 %285
%409 = OpShiftLeftLogical %5 %295 %407
%410 = OpBitwiseOr %5 %406 %409
%411 = OpSelect %5 %281 %410 %30
%412 = OpInBoundsAccessChain %28 %105 %30 %181 %10
%413 = OpAtomicOr %5 %412 %10 %30 %411
%414 = OpBitwiseAnd %5 %413 %410
%415 = OpIEqual %97 %414 %410
%416 = OpLogicalAnd %97 %399 %415
OpSelectionMerge %421 None
OpSwitch %110 %420 0 %417 1 %418 2 %419
%420 = OpLabel
%429 = OpShiftLeftLogical %5 %141 %66
%430 = OpBitwiseAnd %5 %278 %429
%431 = OpINotEqual %97 %430 %30
OpBranch %421
%417 = OpLabel
%422 = OpBitwiseAnd %5 %277 %131
%423 = OpINotEqual %97 %422 %30
OpBranch %421
%418 = OpLabel
%424 = OpShiftLeftLogical %5 %131 %222
%425 = OpBitwiseAnd %5 %277 %424
%426 = OpINotEqual %97 %425 %30
OpBranch %421
%419 = OpLabel
%427 = OpBitwiseAnd %5 %278 %141
%428 = OpINotEqual %97 %427 %30
OpBranch %421
%421 = OpLabel
%432 = OpPhi %97 %423 %417 %426 %418 %428 %419 %431 %420
%433 = OpLogicalNot %97 %432
%434 = OpLogicalOr %97 %433 %416
OpReturnValue %434
OpFunctionEnd
%481 = OpFunction %1 None %478
%479 = OpFunctionParameter %97
%480 = OpFunctionParameter %5
%482 = OpLabel
%485 = OpLogicalNot %97 %479
%486 = OpLoad %97 %468
%487 = OpLogicalAnd %97 %485 %486
OpSelectionMerge %483 None
OpBranchConditional %487 %484 %483
%484 = OpLabel
%490 = OpIMul %5 %480 %492
%491 = OpBitwiseXor %5 %490 %493
%494 = OpArrayLength %5 %473 0
%495 = OpISub %5 %494 %37
%496 = OpAccessChain %28 %473 %30 %495
%497 = OpLoad %5 %496
%498 = OpIMul %5 %497 %499
%500 = OpBitwiseXor %5 %491 %498
%501 = OpArrayLength %5 %477 0
%502 = OpISub %5 %501 %10
%503 = OpBitwiseAnd %5 %500 %502
%504 = OpShiftRightLogical %5 %503 %66
%505 = OpBitwiseAnd %5 %503 %133
%506 = OpShiftLeftLogical %5 %10 %505
%507 = OpAccessChain %28 %473 %30 %504
%508 = OpAtomicOr %5 %507 %10 %30 %506
%509 = OpBitwiseAnd %5 %508 %506
%510 = OpIEqual %97 %509 %30
OpStore %468 %511
OpSelectionMerge %489 None
OpBranchConditional %510 %488 %489
%488 = OpLabel
%512 = OpCompositeConstruct %469 %493 %30 %480 %497
%513 = OpShiftLeftLogical %5 %506 %222
%515 = OpAccessChain %514 %477 %30 %503
OpStore %515 %512
OpMemoryBarrier %10 %517
%516 = OpAtomicOr %5 %507 %10 %30 %513
OpMemoryBarrier %10 %517
OpBranch %489
%489 = OpLabel
OpBranch %483
%483 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/instrumentation/typed.bindless.bda-instrumentation.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[1];
};

struct IAddCarryResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) buffer DescriptorHeapRobustnessSSBO
{
    DescriptorHeapRawPayload descriptors[];
} DescriptorHeapRobustness;

layout(set = 0, binding = 2, std430) buffer BloomBufferInvocationSSBO
{
    uint atomics[];
} BloomBufferInvocation;

layout(set = 0, binding = 2, std430) buffer BloomBufferSSBO
{
    uint64_t atomics[];
} BloomBuffer;

layout(set = 0, binding = 2, std430) buffer BloomBuffer32SSBO
{
    uvec2 atomics[];
} BloomBuffer32;

layout(set = 0, binding = 2, std430) buffer InstrumentationControlDataSSBO
{
    uint atomics[];
} InstrumentationControlData;

layout(set = 0, binding = 3, std430) buffer InstrumentationDataSSBO
{
    uvec4 data[];
} InstrumentationData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 1, binding = 0) uniform samplerBuffer _23[];
layout(set = 4, binding = 0, r32f) uniform imageBuffer _27[];

uint InvocationID;
bool ShouldReportInstrumentation = true;

uint AllocateInvocationID()
{
    uint _40 = atomicAdd(BloomBufferInvocation.atomics[uint(BloomBufferInvocation.atomics.length()) - 1u], 1103633207u);
    return _40;
}

uint AddrHash(uvec2 addr, uint prime)
{
    uvec2 _84 = uvec2(addr.x >> 4u, addr.y & 65535u);
    uvec2 _85 = uvec2(prime);
    uvec2 _92 = ((_84 >> uvec2(8u)) ^ _84.yx) * _85;
    uvec2 _96 = ((_92 >> uvec2(8u)) ^ _92.yx) * _85;
    uvec2 _100 = ((_96 >> uvec2(8u)) ^ _96.yx) * _85;
    uvec2 _104 = ((_100 >> uvec2(8u)) ^ _100.yx) * _85;
    uvec2 _108 = ((_104 >> uvec2(8u)) ^ _104.yx) * _85;
    return (((_108 >> uvec2(8u)) ^ _108.yx) * _85).x;
}

bool ValidateBDALoadStore(uvec2 BDA, uint offset, uint len, uint type, uint invocation_id, bool in_bounds)
{
    if (!in_bounds)
    {
        return true;
    }
    IAddCarryResult _141;
    _141._m0 = uaddCarry(BDA.x, offset, _141._m1);
    uvec2 addr = uvec2(_141._m0, BDA.y + _141._m1);
    uint addr_lo = addr.x;
    uint byte_mask = (bitfieldExtract(4294967295u, int(0u), int(len)) << (addr_lo & 15u)) & 65535u;
    uint word_mask = (bitfieldExtract(4294967295u, int(0u), int((((addr_lo & 3u) + len) + 3u) >> 2u)) << bitfieldExtract(addr_lo, int(2u), int(2u))) & 15u;
    uint hash_mask = bitfieldExtract(4294967295u, int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length())))));
    uint hash_offset = bitfieldExtract(uint(BloomBuffer.atomics.length()), int(0u), int(uint(findMSB(uint(BloomBuffer.atomics.length()))))) - 1u;
    uint bloom_index = (AddrHash(addr, 1103515245u) & hash_mask) + hash_offset;
    uint bloom_index_1 = (AddrHash(addr, 1103518333u) & hash_mask) + hash_offset;
    uint bloom_index_2 = (AddrHash(addr, 1103539331u) & hash_mask) + hash_offset;
    uint bloom_index_3 = (AddrHash(addr, 1103633207u) & hash_mask) + hash_offset;
    uint bloom_index_4 = (AddrHash(addr, 10006121u) & hash_mask) + hash_offset;
    uint bloom_index_5 = (AddrHash(addr, 4004951u) & hash_mask) + hash_offset;
    uint bloom_index_6 = (AddrHash(addr, 5005159u) & hash_mask) + hash_offset;
    uint bloom_index_7 = (AddrHash(addr, 6004811u) & hash_mask) + hash_offset;
    uint64_t invalidation_mask = u64vec4(68719411200ul, 1099511627775ul, 1035087118335ul, 68719411200ul)[type] & packUint2x32(uvec2(byte_mask | (byte_mask << 16u), word_mask | (word_mask << 4u)));
    uint64_t prev_hazard_partial = atomicOr(BloomBuffer.atomics[bloom_index], invalidation_mask);
    uint64_t prev_hazard_partial_1 = atomicOr(BloomBuffer.atomics[bloom_index_1], invalidation_mask);
    uint64_t prev_hazard_partial_2 = atomicOr(BloomBuffer.atomics[bloom_index_2], invalidation_mask);
    uint64_t prev_hazard_partial_3 = atomicOr(BloomBuffer.atomics[bloom_index_3], invalidation_mask);
    uint64_t prev_hazard_partial_4 = atomicOr(BloomBuffer.atomics[bloom_index_4], invalidation_mask);
    uint64_t prev_hazard_partial_5 = atomicOr(BloomBuffer.atomics[bloom_index_5], invalidation_mask);
    uint64_t prev_hazard_partial_6 = atomicOr(BloomBuffer.atomics[bloom_index_6], invalidation_mask);
    uint64_t prev_hazard_partial_7 = atomicOr(BloomBuffer.atomics[bloom_index_7], invalidation_mask);
    uint64_t prev_hazard_partial_8 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 383u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_9 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 821u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_10 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 661u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_11 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1091u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_12 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 1117u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_13 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 3947u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_14 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 4253u) & hash_mask) + hash_offset], invalidation_mask);
    uint64_t prev_hazard_partial_15 = atomicOr(BloomBuffer.atomics[(AddrHash(addr, 7691u) & hash_mask) + hash_offset], invalidation_mask);
    uvec2 prev_hazard = unpackUint2x32(((((((((((((((prev_hazard_partial & prev_hazard_partial_1) & prev_hazard_partial_2) & prev_hazard_partial_3) & prev_hazard_partial_4) & prev_hazard_partial_5) & prev_hazard_partial_6) & prev_hazard_partial_7) & prev_hazard_partial_8) & prev_hazard_partial_9) & prev_hazard_partial_10) & prev_hazard_partial_11) & prev_hazard_partial_12) & prev_hazard_partial_13) & prev_hazard_partial_14) & prev_hazard_partial_15);
    uint prev_hazard_lo = prev_hazard.x;
    uint prev_hazard_hi = prev_hazard.y;
    bool has_exclusive_access = ((prev_hazard_lo >> 16u) & byte_mask) == 0u;
    uint lock_mask = ((256u << bitfieldExtract(invocation_id, 0, 3)) | (65536u << bitfieldExtract(invocation_id, 3, 3))) | (16777216u << bitfieldExtract(invocation_id, 6, 3));
    uint prev_lock = atomicOr(BloomBuffer32.atomics[bloom_index].y, has_exclusive_access ? lock_mask : 0u);
    uint lock_mask_1 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_1 = atomicOr(BloomBuffer32.atomics[bloom_index_1].y, has_exclusive_access ? lock_mask_1 : 0u);
    uint lock_mask_2 = ((256u << bitfieldExtract(invocation_id, 18, 3)) | (65536u << bitfieldExtract(invocation_id, 21, 3))) | (16777216u << bitfieldExtract(invocation_id, 24, 3));
    uint prev_lock_2 = atomicOr(BloomBuffer32.atomics[bloom_index_2].y, has_exclusive_access ? lock_mask_2 : 0u);
    uint lock_mask_3 = ((256u << bitfieldExtract(invocation_id, 23, 3)) | (65536u << bitfieldExtract(invocation_id, 26, 3))) | (16777216u << bitfieldExtract(invocation_id, 29, 3));
    uint prev_lock_3 = atomicOr(BloomBuffer32.atomics[bloom_index_3].y, has_exclusive_access ? lock_mask_3 : 0u);
    uint lock_mask_4 = ((256u << bitfieldExtract(invocation_id, 1, 3)) | (65536u << bitfieldExtract(invocation_id, 4, 3))) | (16777216u << bitfieldExtract(invocation_id, 7, 3));
    uint prev_lock_4 = atomicOr(BloomBuffer32.atomics[bloom_index_4].y, has_exclusive_access ? lock_mask_4 : 0u);
    uint lock_mask_5 = ((256u << bitfieldExtract(invocation_id, 9, 3)) | (65536u << bitfieldExtract(invocation_id, 12, 3))) | (16777216u << bitfieldExtract(invocation_id, 15, 3));
    uint prev_lock_5 = atomicOr(BloomBuffer32.atomics[bloom_index_5].y, has_exclusive_access ? lock_mask_5 : 0u);
    uint lock_mask_6 = ((256u << bitfieldExtract(invocation_id, 17, 3)) | (65536u << bitfieldExtract(invocation_id, 20, 3))) | (16777216u << bitfieldExtract(invocation_id, 23, 3));
    uint prev_lock_6 = atomicOr(BloomBuffer32.atomics[bloom_index_6].y, has_exclusive_access ? lock_mask_6 : 0u);
    uint lock_mask_7 = ((256u << bitfieldExtract(invocation_id, 22, 3)) | (65536u << bitfieldExtract(invocation_id, 25, 3))) | (16777216u << bitfieldExtract(invocation_id, 28, 3));
    uint prev_lock_7 = atomicOr(BloomBuffer32.atomics[bloom_index_7].y, has_exclusive_access ? lock_mask_7 : 0u);
    bool hazard;
    switch (type)
    {
        case 0u:
        {
            hazard = (prev_hazard_lo & byte_mask) != 0u;
            break;
        }
        case 1u:
        {
            hazard = (prev_hazard_lo & (byte_mask << 16u)) != 0u;
            break;
        }
        case 2u:
        {
            hazard = (prev_hazard_hi & word_mask) != 0u;
            break;
        }
        default:
        {
            hazard = (prev_hazard_hi & (word_mask << 4u)) != 0u;
            break;
        }
    }
    return (!hazard) || (((((((((prev_lock & lock_mask) == lock_mask) && ((prev_lock_1 & lock_mask_1) == lock_mask_1)) && ((prev_lock_2 & lock_mask_2) == lock_mask_2)) && ((prev_lock_3 & lock_mask_3) == lock_mask_3)) && ((prev_lock_4 & lock_mask_4) == lock_mask_4)) && ((prev_lock_5 & lock_mask_5) == lock_mask_5)) && ((prev_lock_6 & lock_mask_6) == lock_mask_6)) && ((prev_lock_7 & lock_mask_7) == lock_mask_7));
}

void AssumeTrue(bool value, uint inst)
{
    if ((!value) && ShouldReportInstrumentation)
    {
        uint _506 = uint(InstrumentationControlData.atomics.length()) - 2u;
        uint _508 = InstrumentationControlData.atomics[_506];
        uint _514 = (((inst * 97u) ^ 43981u) ^ (_508 * 51329u)) & (uint(InstrumentationData.data.length()) - 1u);
        uint _515 = _514 >> 4u;
        uint _517 = 1u << (_514 & 15u);
        uint _519 = atomicOr(InstrumentationControlData.atomics[_515], _517);
        ShouldReportInstrumentation = false;
        if ((_519 & _517) == 0u)
        {
            InstrumentationData.data[_514] = uvec4(43981u, 0u, inst, _508);
            memoryBarrierBuffer();
            uint _527 = atomicOr(InstrumentationControlData.atomics[_515], _517 << 16u);
            memoryBarrierBuffer();
        }
    }
}

void main()
{
    uint _43 = AllocateInvocationID();
    InvocationID = _43;
    uvec2 _52 = DescriptorHeapRobustness.descriptors[registers._m4]._m0[0u];
    uint _54 = _52.y >> 16u;
    uint _57 = uint(imageSize(_27[registers._m4]));
    uvec2 _63 = DescriptorHeapRobustness.descriptors[registers._m1]._m0[0u];
    uint _65 = _63.y >> 16u;
    uint _456 = InvocationID;
    bool _455 = ValidateBDALoadStore(_63, gl_GlobalInvocationID.x * _65, _65, 0u, _456, gl_GlobalInvocationID.x < uint(textureSize(_23[registers._m1])));
    AssumeTrue(_455, 1u);
    vec4 _459 = texelFetch(_23[registers._m1], int(gl_GlobalInvocationID.x));
    uint _467 = InvocationID;
    bool _466 = ValidateBDALoadStore(_52, gl_GlobalInvocationID.x * _54, _54, 0u, _467, gl_GlobalInvocationID.x < _57);
    AssumeTrue(_466, 2u);
    vec4 _469 = imageLoad(_27[registers._m4], int(gl_GlobalInvocationID.x));
    uint _474 = InvocationID;
    bool _473 = ValidateBDALoadStore(_52, gl_GlobalInvocationID.x * _54, _54, 1u, _474, gl_GlobalInvocationID.x < _57);
    AssumeTrue(_473, 3u);
    imageStore(_27[registers._m4], int(gl_GlobalInvocationID.x), vec4(((_459.x + 40.0) + _459.z) + _469.x));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 534
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability StorageImageWriteWithoutFormat
OpCapability SignedZeroInfNanPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
%166 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %70
OpExecutionMode %3 SignedZeroInfNanPreserve 32
OpExecutionMode %3 SignedZeroInfNanPreserve 64
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "DescriptorHeapRawPayload"
OpName %14 "DescriptorHeapRobustnessSSBO"
OpMemberName %14 0 "descriptors"
OpName %16 "DescriptorHeapRobustness"
OpName %18 "InvocationID"
OpName %29 "BloomBufferInvocationSSBO"
OpMemberName %29 0 "atomics"
OpName %31 "BloomBufferInvocation"
OpName %33 "AllocateInvocationID"
OpName %78 "AddrHash"
OpName %76 "addr"
OpName %77 "prime"
OpName %118 "BloomBufferSSBO"
OpMemberName %118 0 "atomics"
OpName %120 "BloomBuffer"
OpName %122 "BloomBuffer32SSBO"
OpMemberName %122 0 "atomics"
OpName %124 "BloomBuffer32"
OpName %132 "ValidateBDALoadStore"
OpName %126 "BDA"
OpName %127 "offset"
OpName %128 "len"
OpName %129 "type"
OpName %130 "invocation_id"
OpName %131 "in_bounds"
OpName %140 "IAddCarryResult"
OpName %145 "addr"
OpName %146 "addr_lo"
OpName %150 "byte_mask"
OpName %160 "word_mask"
OpName %165 "hash_mask"
OpName %170 "hash_offset"
OpName %174 "bloom_index"
OpName %178 "bloom_index"
OpName %182 "bloom_index"
OpName %185 "bloom_index"
OpName %189 "bloom_index"
OpName %193 "bloom_index"
OpName %197 "bloom_index"
OpName %201 "bloom_index"
OpName %205 "bloom_index"
OpName %209 "bloom_index"
OpName %213 "bloom_index"
OpName %217 "bloom_index"
OpName %221 "bloom_index"
OpName %225 "bloom_index"
OpName %229 "bloom_index"
OpName %233 "bloom_index"
OpName %246 "invalidation_mask"
OpName %249 "prev_hazard_partial"
OpName %251 "prev_hazard_partial"
OpName %254 "prev_hazard_partial"
OpName %257 "prev_hazard_partial"
OpName %260 "prev_hazard_partial"
OpName %263 "prev_hazard_partial"
OpName %266 "prev_hazard_partial"
OpName %269 "prev_hazard_partial"
OpName %272 "prev_hazard_partial"
OpName %275 "prev_hazard_partial"
OpName %278 "prev_hazard_partial"
OpName %281 "prev_hazard_partial"
OpName %284 "prev_hazard_partial"
OpName %287 "prev_hazard_partial"
OpName %290 "prev_hazard_partial"
OpName %293 "prev_hazard_partial"
OpName %295 "prev_hazard"
OpName %296 "prev_hazard_lo"
OpName %297 "prev_hazard_hi"
OpName %300 "has_exclusive_access"
OpName %315 "lock_mask"
OpName %318 "prev_lock"
OpName %331 "lock_mask"
OpName %334 "prev_lock"
OpName %348 "lock_mask"
OpName %351 "prev_lock"
OpName %365 "lock_mask"
OpName %368 "prev_lock"
OpName %382 "lock_mask"
OpName %385 "prev_lock"
OpName %396 "lock_mask"
OpName %399 "prev_lock"
OpName %412 "lock_mask"
OpName %415 "prev_lock"
OpName %429 "lock_mask"
OpName %432 "prev_lock"
OpName %435 "has_complete_self_lock"
OpName %451 "hazard"
OpName %479 "ShouldReportInstrumentation"
OpName %482 "InstrumentationControlDataSSBO"
OpMemberName %482 0 "atomics"
OpName %484 "InstrumentationControlData"
OpName %486 "InstrumentationDataSSBO"
OpMemberName %486 0 "data"
OpName %488 "InstrumentationData"
OpName %492 "AssumeTrue"
OpName %490 "value"
OpName %491 "inst"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %13 ArrayStride 8
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %23 DescriptorSet 1
OpDecorate %23 Binding 0
OpDecorate %27 DescriptorSet 4
OpDecorate %27 Binding 0
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 2
OpDecorate %70 BuiltIn GlobalInvocationId
OpDecorate %117 ArrayStride 8
OpMemberDecorate %118 0 Offset 0
OpDecorate %118 Block
OpDecorate %120 DescriptorSet 0
OpDecorate %120 Binding 2
OpDecorate %121 ArrayStride 8
OpMemberDecorate %122 0 Offset 0
OpDecorate %122 Block
OpDecorate %124 DescriptorSet 0
OpDecorate %124 Binding 2
OpDecorate %481 ArrayStride 4
OpMemberDecorate %482 0 Offset 0
OpDecorate %482 Block
OpDecorate %484 DescriptorSet 0
OpDecorate %484 Binding 2
OpDecorate %485 ArrayStride 16
OpMemberDecorate %486 0 Offset 0
OpDecorate %486 Block
OpDecorate %488 DescriptorSet 0
OpDecorate %488 Binding 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpConstant %5 1
%11 = OpTypeArray %9 %10
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Private %5
%18 = OpVariable %17 Private
%19 = OpTypeFloat 32
%20 = OpTypeImage %19 Buffer 0 0 0 1 Unknown
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %19 Buffer 0 0 0 2 R32f
%25 = OpTypeRuntimeArray %24
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeRuntimeArray %5
%29 = OpTypeStruct %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeFunction %5
%37 = OpTypePointer StorageBuffer %5
%39 = OpConstant %5 0
%41 = OpConstant %5 1103633207
%44 = OpTypePointer UniformConstant %24
%46 = OpTypePointer PushConstant %5
%48 = OpConstant %5 4
%50 = OpTypePointer StorageBuffer %9
%55 = OpConstant %5 16
%58 = OpTypePointer UniformConstant %20
%68 = OpTypeVector %5 3
%69 = OpTypePointer Input %68
%70 = OpVariable %69 Input
%71 = OpTypePointer Input %5
%75 = OpTypeFunction %5 %9 %5
%86 = OpConstant %5 65535
%87 = OpConstant %5 8
%88 = OpConstantComposite %9 %87 %87
%115 = OpTypeInt 64 0
%116 = OpTypeBool
%117 = OpTypeRuntimeArray %115
%118 = OpTypeStruct %117
%119 = OpTypePointer StorageBuffer %118
%120 = OpVariable %119 StorageBuffer
%121 = OpTypeRuntimeArray %9
%122 = OpTypeStruct %121
%123 = OpTypePointer StorageBuffer %122
%124 = OpVariable %123 StorageBuffer
%125 = OpTypeFunction %116 %9 %5 %5 %5 %5 %116
%136 = OpConstantTrue %116
%140 = OpTypeStruct %5 %5
%151 = OpConstant %5 4294967295
%152 = OpConstant %5 15
%161 = OpConstant %5 3
%162 = OpConstant %5 2
%172 = OpConstant %5 1103515245
%176 = OpConstant %5 1103518333
%180 = OpConstant %5 1103539331
%187 = OpConstant %5 10006121
%191 = OpConstant %5 4004951
%195 = OpConstant %5 5005159
%199 = OpConstant %5 6004811
%203 = OpConstant %5 383
%207 = OpConstant %5 821
%211 = OpConstant %5 661
%215 = OpConstant %5 1091
%219 = OpConstant %5 1117
%223 = OpConstant %5 3947
%227 = OpConstant %5 4253
%231 = OpConstant %5 7691
%234 = OpTypeVector %115 4
%235 = OpConstant %115 68719411200
%236 = OpConstant %115 1099511627775
%237 = OpConstant %115 1035087118335
%238 = OpConstantComposite %234 %235 %236 %237 %235
%247 = OpTypePointer StorageBuffer %115
%302 = OpTypeInt 32 1
%303 = OpConstant %302 0
%304 = OpConstant %302 3
%306 = OpConstant %5 256
%309 = OpConstant %5 65536
%312 = OpConstant %302 6
%314 = OpConstant %5 16777216
%322 = OpConstant %302 9
%325 = OpConstant %302 12
%329 = OpConstant %302 15
%339 = OpConstant %302 18
%342 = OpConstant %302 21
%346 = OpConstant %302 24
%356 = OpConstant %302 23
%359 = OpConstant %302 26
%363 = OpConstant %302 29
%373 = OpConstant %302 1
%376 = OpConstant %302 4
%380 = OpConstant %302 7
%404 = OpConstant %302 17
%407 = OpConstant %302 20
%420 = OpConstant %302 22
%423 = OpConstant %302 25
%427 = OpConstant %302 28
%458 = OpTypeVector %19 4
%463 = OpConstant %19 40
%478 = OpTypePointer Private %116
%479 = OpVariable %478 Private %136
%480 = OpTypeVector %5 4
%481 = OpTypeRuntimeArray %5
%482 = OpTypeStruct %481
%483 = OpTypePointer StorageBuffer %482
%484 = OpVariable %483 StorageBuffer
%485 = OpTypeRuntimeArray %480
%486 = OpTypeStruct %485
%487 = OpTypePointer StorageBuffer %486
%488 = OpVariable %487 StorageBuffer
%489 = OpTypeFunction %1 %116 %5
%503 = OpConstant %5 97
%504 = OpConstant %5 43981
%510 = OpConstant %5 51329
%522 = OpConstantFalse %116
%525 = OpTypePointer StorageBuffer %480
%528 = OpConstant %5 72
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %477
%477 = OpLabel
%43 = OpFunctionCall %5 %33
OpStore %18 %43
%47 = OpAccessChain %46 %8 %48
%49 = OpLoad %5 %47
%51 = OpAccessChain %50 %16 %39 %49 %39 %39
%52 = OpLoad %9 %51
%53 = OpCompositeExtract %5 %52 1
%54 = OpShiftRightLogical %5 %53 %55
%45 = OpAccessChain %44 %27 %49
%56 = OpLoad %24 %45
%57 = OpImageQuerySize %5 %56
%60 = OpAccessChain %46 %8 %10
%61 = OpLoad %5 %60
%62 = OpAccessChain %50 %16 %39 %61 %39 %39
%63 = OpLoad %9 %62
%64 = OpCompositeExtract %5 %63 1
%65 = OpShiftRightLogical %5 %64 %55
%59 = OpAccessChain %58 %23 %61
%66 = OpLoad %20 %59
%67 = OpImageQuerySize %5 %66
%72 = OpAccessChain %71 %70 %39
%73 = OpLoad %5 %72
%74 = OpIMul %5 %73 %65
%456 = OpLoad %5 %18
%457 = OpULessThan %116 %73 %67
%455 = OpFunctionCall %116 %132 %63 %74 %65 %39 %456 %457
%530 = OpFunctionCall %1 %492 %455 %10
%459 = OpImageFetch %458 %66 %73
%460 = OpCompositeExtract %19 %459 0
%461 = OpCompositeExtract %19 %459 2
%462 = OpFAdd %19 %460 %463
%464 = OpFAdd %19 %462 %461
%465 = OpIMul %5 %73 %54
%467 = OpLoad %5 %18
%468 = OpULessThan %116 %73 %57
%466 = OpFunctionCall %116 %132 %52 %465 %54 %39 %467 %468
%531 = OpFunctionCall %1 %492 %466 %162
%469 = OpImageRead %458 %56 %73
%470 = OpCompositeExtract %19 %469 0
%471 = OpFAdd %19 %464 %470
%472 = OpIMul %5 %73 %54
%474 = OpLoad %5 %18
%475 = OpULessThan %116 %73 %57
%473 = OpFunctionCall %116 %132 %52 %472 %54 %10 %474 %475
%532 = OpFunctionCall %1 %492 %473 %161
%476 = OpCompositeConstruct %458 %471 %471 %471 %471
OpImageWrite %56 %73 %476
OpReturn
OpFunctionEnd
%33 = OpFunction %5 None %32
%34 = OpLabel
%35 = OpArrayLength %5 %31 0
%36 = OpISub %5 %35 %10
%38 = OpAccessChain %37 %31 %39 %36
%40 = OpAtomicIAdd %5 %38 %10 %39 %41
OpReturnValue %40
OpFunctionEnd
%78 = OpFunction %5 None %75
%76 = OpFunctionParameter %9
%77 = OpFunctionParameter %5
%79 = OpLabel
%80 = OpCompositeExtract %5 %76 0
%81 = OpCompositeExtract %5 %76 1
%82 = OpShiftRightLogical %5 %80 %48
%83 = OpBitwiseAnd %5 %81 %86
%84 = OpCompositeConstruct %9 %82 %83
%85 = OpCompositeConstruct %9 %77 %77
%89 = OpVectorShuffle %9 %84 %84 1 0
%90 = OpShiftRightLogical %9 %84 %88
%91 = OpBitwiseXor %9 %90 %89
%92 = OpIMul %9 %91 %85
%93 = OpVectorShuffle %9 %92 %92 1 0
%94 = OpShiftRightLogical %9 %92 %88
%95 = OpBitwiseXor %9 %94 %93
%96 = OpIMul %9 %95 %85
%97 = OpVectorShuffle %9 %96 %96 1 0
%98 = OpShiftRightLogical %9 %96 %88
%99 = OpBitwiseXor %9 %98 %97
%100 = OpIMul %9 %99 %85
%101 = OpVectorShuffle %9 %100 %100 1 0
%102 = OpShiftRightLogical %9 %100 %88
%103 = OpBitwiseXor %9 %102 %101
%104 = OpIMul %9 %103 %85
%105 = OpVectorShuffle %9 %104 %104 1 0
%106 = OpShiftRightLogical %9 %104 %88
%107 = OpBitwiseXor %9 %106 %105
%108 = OpIMul %9 %107 %85
%109 = OpVectorShuffle %9 %108 %108 1 0
%110 = OpShiftRightLogical %9 %108 %88
%111 = OpBitwiseXor %9 %110 %109
%112 = OpIMul %9 %111 %85
%113 = OpCompositeExtract %5 %112 0
OpReturnValue %113
OpFunctionEnd
%132 = OpFunction %116 None %125
%126 = OpFunctionParameter %9
%127 = OpFunctionParameter %5
%128 = OpFunctionParameter %5
%129 = OpFunctionParameter %5
%130 = OpFunctionParameter %5
%131 = OpFunctionParameter %116
%133 = OpLabel
OpSelectionMerge %135 None
OpBranchConditional %131 %135 %134
%134 = OpLabel
OpReturnValue %136
%135 = OpLabel
%138 = OpCompositeExtract %5 %126 0
%139 = OpCompositeExtract %5 %126 1
%141 = OpIAddCarry %140 %138 %127
%142 = OpCompositeExtract %5 %141 0
%143 = OpCompositeExtract %5 %141 1
%144 = OpIAdd %5 %139 %143
%145 = OpCompositeConstruct %9 %142 %144
%146 = OpCompositeExtract %5 %145 0
%147 = OpBitFieldUExtract %5 %151 %39 %128
%148 = OpBitwiseAnd %5 %146 %152
%149 = OpShiftLeftLogical %5 %147 %148
%150 = OpBitwiseAnd %5 %149 %86
%153 = OpBitwiseAnd %5 %146 %161
%154 = OpIAdd %5 %153 %128
%155 = OpIAdd %5 %154 %161
%156 = OpShiftRightLogical %5 %155 %162
%157 = OpBitFieldUExtract %5 %146 %162 %162
%158 = OpBitFieldUExtract %5 %151 %39 %156
%159 = OpShiftLeftLogical %5 %158 %157
%160 = OpBitwiseAnd %5 %159 %152
%163 = OpArrayLength %5 %120 0
%164 = OpExtInst %5 %166 FindUMsb %163
%165 = OpBitFieldUExtract %5 %151 %39 %164
%167 = OpArrayLength %5 %120 0
%168 = OpExtInst %5 %166 FindUMsb %167
%169 = OpBitFieldUExtract %5 %167 %39 %168
%170 = OpISub %5 %169 %10
%171 = OpFunctionCall %5 %78 %145 %172
%173 = OpBitwiseAnd %5 %171 %165
%174 = OpIAdd %5 %173 %170
%175 = OpFunctionCall %5 %78 %145 %176
%177 = OpBitwiseAnd %5 %175 %165
%178 = OpIAdd %5 %177 %170
%179 = OpFunctionCall %5 %78 %145 %180
%181 = OpBitwiseAnd %5 %179 %165
%182 = OpIAdd %5 %181 %170
%183 = OpFunctionCall %5 %78 %145 %41
%184 = OpBitwiseAnd %5 %183 %165
%185 = OpIAdd %5 %184 %170
%186 = OpFunctionCall %5 %78 %145 %187
%188 = OpBitwiseAnd %5 %186 %165
%189 = OpIAdd %5 %188 %170
%190 = OpFunctionCall %5 %78 %145 %191
%192 = OpBitwiseAnd %5 %190 %165
%193 = OpIAdd %5 %192 %170
%194 = OpFunctionCall %5 %78 %145 %195
%196 = OpBitwiseAnd %5 %194 %165
%197 = OpIAdd %5 %196 %170
%198 = OpFunctionCall %5 %78 %145 %199
%200 = OpBitwiseAnd %5 %198 %165
%201 = OpIAdd %5 %200 %170
%202 = OpFunctionCall %5 %78 %145 %203
%204 = OpBitwiseAnd %5 %202 %165
%205 = OpIAdd %5 %204 %170
%206 = OpFunctionCall %5 %78 %145 %207
%208 = OpBitwiseAnd %5 %206 %165
%209 = OpIAdd %5 %208 %170
%210 = OpFunctionCall %5 %78 %145 %211
%212 = OpBitwiseAnd %5 %210 %165
%213 = OpIAdd %5 %212 %170
%214 = OpFunctionCall %5 %78 %145 %215
%216 = OpBitwiseAnd %5 %214 %165
%217 = OpIAdd %5 %216 %170
%218 = OpFunctionCall %5 %78 %145 %219
%220 = OpBitwiseAnd %5 %218 %165
%221 = OpIAdd %5 %220 %170
%222 = OpFunctionCall %5 %78 %145 %223
%224 = OpBitwiseAnd %5 %222 %165
%225 = OpIAdd %5 %224 %170
%226 = OpFunctionCall %5 %78 %145 %227
%228 = OpBitwiseAnd %5 %226 %165
%229 = OpIAdd %5 %228 %170
%230 = OpFunctionCall %5 %78 %145 %231
%232 = OpBitwiseAnd %5 %230 %165
%233 = OpIAdd %5 %232 %170
%239 = OpVectorExtractDynamic %115 %238 %129
%240 = OpShiftLeftLogical %5 %150 %55
%241 = OpShiftLeftLogical %5 %160 %48
%242 = OpBitwiseOr %5 %150 %240
%243 = OpBitwiseOr %5 %160 %241
%244 = OpCompositeConstruct %9 %242 %243
%245 = OpBitcast %115 %244
%246 = OpBitwiseAnd %115 %239 %245
%248 = OpInBoundsAccessChain %247 %120 %39 %174
%249 = OpAtomicOr %115 %248 %10 %39 %246
%250 = OpInBoundsAccessChain %247 %120 %39 %178
%251 = OpAtomicOr %115 %250 %10 %39 %246
%252 = OpBitwiseAnd %115 %249 %251
%253 = OpInBoundsAccessChain %247 %120 %39 %182
%254 = OpAtomicOr %115 %253 %10 %39 %246
%255 = OpBitwiseAnd %115 %252 %254
%256 = OpInBoundsAccessChain %247 %120 %39 %185
%257 = OpAtomicOr %115 %256 %10 %39 %246
%258 = OpBitwiseAnd %115 %255 %257
%259 = OpInBoundsAccessChain %247 %120 %39 %189
%260 = OpAtomicOr %115 %259 %10 %39 %246
%261 = OpBitwiseAnd %115 %258 %260
%262 = OpInBoundsAccessChain %247 %120 %39 %193
%263 = OpAtomicOr %115 %262 %10 %39 %246
%264 = OpBitwiseAnd %115 %261 %263
%265 = OpInBoundsAccessChain %247 %120 %39 %197
%266 = OpAtomicOr %115 %265 %10 %39 %246
%267 = OpBitwiseAnd %115 %264 %266
%268 = OpInBoundsAccessChain %247 %120 %39 %201
%269 = OpAtomicOr %115 %268 %10 %39 %246
%270 = OpBitwiseAnd %115 %267 %269
%271 = OpInBoundsAccessChain %247 %120 %39 %205
%272 = OpAtomicOr %115 %271 %10 %39 %246
%273 = OpBitwiseAnd %115 %270 %272
%274 = OpInBoundsAccessChain %247 %120 %39 %209
%275 = OpAtomicOr %115 %274 %10 %39 %246
%276 = OpBitwiseAnd %115 %273 %275
%277 = OpInBoundsAccessChain %247 %120 %39 %213
%278 = OpAtomicOr %115 %277 %10 %39 %246
%279 = OpBitwiseAnd %115 %276 %278
%280 = OpInBoundsAccessChain %247 %120 %39 %217
%281 = OpAtomicOr %115 %280 %10 %39 %246
%282 = OpBitwiseAnd %115 %279 %281
%283 = OpInBoundsAccessChain %247 %120 %39 %221
%284 = OpAtomicOr %115 %283 %10 %39 %246
%285 = OpBitwiseAnd %115 %282 %284
%286 = OpInBoundsAccessChain %247 %120 %39 %225
%287 = OpAtomicOr %115 %286 %10 %39 %246
%288 = OpBitwiseAnd %115 %285 %287
%289 = OpInBoundsAccessChain %247 %120 %39 %229
%290 = OpAtomicOr %115 %289 %10 %39 %246
%291 = OpBitwiseAnd %115 %288 %290
%292 = OpInBoundsAccessChain %247 %120 %39 %233
%293 = OpAtomicOr %115 %292 %10 %39 %246
%294 = OpBitwiseAnd %115 %291 %293
%295 = OpBitcast %9 %294
%296 = OpCompositeExtract %5 %295 0
%297 = OpCompositeExtract %5 %295 1
%298 = OpShiftRightLogical %5 %296 %55
%299 = OpBitwiseAnd %5 %298 %150
%300 = OpIEqual %116 %299 %39
%301 = OpBitFieldUExtract %5 %130 %303 %304
%305 = OpShiftLeftLogical %5 %306 %301
%307 = OpBitFieldUExtract %5 %130 %304 %304
%308 = OpShiftLeftLogical %5 %309 %307
%310 = OpBitwiseOr %5 %305 %308
%311 = OpBitFieldUExtract %5 %130 %312 %304
%313 = OpShiftLeftLogical %5 %314 %311
%315 = OpBitwiseOr %5 %310 %313
%316 = OpSelect %5 %300 %315 %39
%317 = OpInBoundsAccessChain %37 %124 %39 %174 %10
%318 = OpAtomicOr %5 %317 %10 %39 %316
%319 = OpBitwiseAnd %5 %318 %315
%320 = OpIEqual %116 %319 %315
%321 = OpBitFieldUExtract %5 %130 %322 %304
%323 = OpShiftLeftLogical %5 %306 %321
%324 = OpBitFieldUExtract %5 %130 %325 %304
%326 = OpShiftLeftLogical %5 %309 %324
%327 = OpBitwiseOr %5 %323 %326
%328 = OpBitFieldUExtract %5 %130 %329 %304
%330 = OpShiftLeftLogical %5 %314 %328
%331 = OpBitwiseOr %5 %327 %330
%332 = OpSelect %5 %300 %331 %39
%333 = OpInBoundsAccessChain %37 %124 %39 %178 %10
%334 = OpAtomicOr %5 %333 %10 %39 %332
%335 = OpBitwiseAnd %5 %334 %331
%336 = OpIEqual %116 %335 %331
%337 = OpLogicalAnd %116 %320 %336
%338 = OpBitFieldUExtract %5 %130 %339 %304
%340 = OpShiftLeftLogical %5 %306 %338
%341 = OpBitFieldUExtract %5 %130 %342 %304
%343 = OpShiftLeftLogical %5 %309 %341
%344 = OpBitwiseOr %5 %340 %343
%345 = OpBitFieldUExtract %5 %130 %346 %304
%347 = OpShiftLeftLogical %5 %314 %345
%348 = OpBitwiseOr %5 %344 %347
%349 = OpSelect %5 %300 %348 %39
%350 = OpInBoundsAccessChain %37 %124 %39 %182 %10
%351 = OpAtomicOr %5 %350 %10 %39 %349
%352 = OpBitwiseAnd %5 %351 %348
%353 = OpIEqual %116 %352 %348
%354 = OpLogicalAnd %116 %337 %353
%355 = OpBitFieldUExtract %5 %130 %356 %304
%357 = OpShiftLeftLogical %5 %306 %355
%358 = OpBitFieldUExtract %5 %130 %359 %304
%360 = OpShiftLeftLogical %5 %309 %358
%361 = OpBitwiseOr %5 %357 %360
%362 = OpBitFieldUExtract %5 %130 %363 %304
%364 = OpShiftLeftLogical %5 %314 %362
%365 = OpBitwiseOr %5 %361 %364
%366 = OpSelect %5 %300 %365 %39
%367 = OpInBoundsAccessChain %37 %124 %39 %185 %10
%368 = OpAtomicOr %5 %367 %10 %39 %366
%369 = OpBitwiseAnd %5 %368 %365
%370 = OpIEqual %116 %369 %365
%371 = OpLogicalAnd %116 %354 %370
%372 = OpBitFieldUExtract %5 %130 %373 %304
%374 = OpShiftLeftLogical %5 %306 %372
%375 = OpBitFieldUExtract %5 %130 %376 %304
%377 = OpShiftLeftLogical %5 %309 %375
%378 = OpBitwiseOr %5 %374 %377
%379 = OpBitFieldUExtract %5 %130 %380 %304
%381 = OpShiftLeftLogical %5 %314 %379
%382 = OpBitwiseOr %5 %378 %381
%383 = OpSelect %5 %300 %382 %39
%384 = OpInBoundsAccessChain %37 %124 %39 %189 %10
%385 = OpAtomicOr %5 %384 %10 %39 %383
%386 = OpBitwiseAnd %5 %385 %382
%387 = OpIEqual %116 %386 %382
%388 = OpLogicalAnd %116 %371 %387
%389 = OpBitFieldUExtract %5 %130 %322 %304
%390 = OpShiftLeftLogical %5 %306 %389
%391 = OpBitFieldUExtract %5 %130 %325 %304
%392 = OpShiftLeftLogical %5 %309 %391
%393 = OpBitwiseOr %5 %390 %392
%394 = OpBitFieldUExtract %5 %130 %329 %304
%395 = OpShiftLeftLogical %5 %314 %394
%396 = OpBitwiseOr %5 %393 %395
%397 = OpSelect %5 %300 %396 %39
%398 = OpInBoundsAccessChain %37 %124 %39 %193 %10
%399 = OpAtomicOr %5 %398 %10 %39 %397
%400 = OpBitwiseAnd %5 %399 %396
%401 = OpIEqual %116 %400 %396
%402 = OpLogicalAnd %116 %388 %401
%403 = OpBitFieldUExtract %5 %130 %404 %304
%405 = OpShiftLeftLogical %5 %306 %403
%406 = OpBitFieldUExtract %5 %130 %407 %304
%408 = OpShiftLeftLogical %5 %309 %406
%409 = OpBitwiseOr %5 %405 %408
%410 = OpBitFieldUExtract %5 %130 %356 %304
%411 = OpShiftLeftLogical %5 %314 %410
%412 = OpBitwiseOr %5 %409 %411
%413 = OpSelect %5 %300 %412 %39
%414 = OpInBoundsAccessChain %37 %124 %39 %197 %10
%415 = OpAtomicOr %5 %414 %10 %39 %413
%416 = OpBitwiseAnd %5 %415 %412
%417 = OpIEqual %116 %416 %412
%418 = OpLogicalAnd %116 %402 %417
%419 = OpBitFieldUExtract %5 %130 %420 %304
%421 = OpShiftLeftLogical %5 %306 %419
%422 = OpBitFieldUExtract %5 %130 %423 %304
%424 = OpShiftLeftLogical %5 %309 %422
%425 = OpBitwiseOr %5 %421 %424
%426 = OpBitFieldUExtract %5 %130 %427 %304
%428 = OpShiftLeftLogical %5 %314 %426
%429 = OpBitwiseOr %5 %425 %428
%430 = OpSelect %5 %300 %429 %39
%431 = OpInBoundsAccessChain %37 %124 %39 %201 %10
%432 = OpAtomicOr %5 %431 %10 %39 %430
%433 = OpBitwiseAnd %5 %432 %429
%434 = OpIEqual %116 %433 %429
%435 = OpLogicalAnd %116 %418 %434
OpSelectionMerge %440 None
OpSwitch %129 %439 0 %436 1 %437 2 %438
%439 = OpLabel
%448 = OpShiftLeftLogical %5 %160 %48
%449 = OpBitwiseAnd %5 %297 %448
%450 = OpINotEqual %116 %449 %39
OpBranch %440
%436 = OpLabel
%441 = OpBitwiseAnd %5 %296 %150
%442 = OpINotEqual %116 %441 %39
OpBranch %440
%437 = OpLabel
%443 = OpShiftLeftLogical %5 %150 %55
%444 = OpBitwiseAnd %5 %296 %443
%445 = OpINotEqual %116 %444 %39
OpBranch %440
%438 = OpLabel
%446 = OpBitwiseAnd %5 %297 %160
%447 = OpINotEqual %116 %446 %39
OpBranch %440
%440 = OpLabel
%451 = OpPhi %116 %442 %436 %445 %437 %447 %438 %450 %439
%452 = OpLogicalNot %116 %451
%453 = OpLogicalOr %116 %452 %435
OpReturnValue %453
OpFunctionEnd
%492 = OpFunction %1 None %489
%490 = OpFunctionParameter %116
%491 = OpFunctionParameter %5
%493 = OpLabel
%496 = OpLogicalNot %116 %490
%497 = OpLoad %116 %479
%498 = OpLogicalAnd %116 %496 %497
OpSelectionMerge %494 None
OpBranchConditional %498 %495 %494
%495 = OpLabel
%501 = OpIMul %5 %491 %503
%502 = OpBitwiseXor %5 %501 %504
%505 = OpArrayLength %5 %484 0
%506 = OpISub %5 %505 %162
%507 = OpAccessChain %37 %484 %39 %506
%508 = OpLoad %5 %507
%509 = OpIMul %5 %508 %510
%511 = OpBitwiseXor %5 %502 %509
%512 = OpArrayLength %5 %488 0
%513 = OpISub %5 %512 %10
%514 = OpBitwiseAnd %5 %511 %513
%515 = OpShiftRightLogical %5 %514 %48
%516 = OpBitwiseAnd %5 %514 %152
%517 = OpShiftLeftLogical %5 %10 %516
%518 = OpAccessChain %37 %484 %39 %515
%519 = OpAtomicOr %5 %518 %10 %39 %517
%520 = OpBitwiseAnd %5 %519 %517
%521 = OpIEqual %116 %520 %39
OpStore %479 %522
OpSelectionMerge %500 None
OpBranchConditional %521 %499 %500
%499 = OpLabel
%523 = OpCompositeConstruct %480 %504 %39 %491 %508
%524 = OpShiftLeftLogical %5 %517 %55
%526 = OpAccessChain %525 %488 %39 %514
OpStore %526 %523
OpMemoryBarrier %10 %528
%527 = OpAtomicOr %5 %518 %10 %39 %524
OpMemoryBarrier %10 %528
OpBranch %500
%500 = OpLabel
OpBranch %494
%494 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/alloca-robustness-cases.extended-robustness.vert
================================================
#version 460

const float _23[5] = float[](1.0, 2.0, 3.0, 4.0, 0.0);

layout(location = 2) in uvec4 A;
float _34[4] = float[](5.0, 6.0, 7.0, 8.0);

void main()
{
    float _56[4];
    _56[0u] = 0.0;
    _56[1u] = 0.0;
    _56[2u] = 0.0;
    _56[3u] = 0.0;
    float _63 = float(uint(gl_VertexIndex) - uint(gl_BaseVertex));
    bool _66 = A.x < 4u;
    if (_66)
    {
        _56[A.x] = _63;
    }
    if (A.y < 4u)
    {
        _34[A.y] = float(uint(gl_InstanceIndex) - uint(gl_BaseInstance));
    }
    float _72;
    if (A.w == 0u)
    {
        _72 = _63;
    }
    else
    {
        uint _86;
        _86 = 0u;
        float _91;
        uint _88;
        bool _90;
        for (;;)
        {
            _88 = _86 ^ 2u;
            _90 = _88 < 4u;
            if (_90)
            {
                _91 = _34[_88];
            }
            else
            {
                _91 = 0.0;
            }
            if (_90)
            {
                _34[_88] = _91 + 60.0;
            }
            uint _87 = _86 + 1u;
            bool _95 = _87 < 4u;
            float _96;
            if (_95)
            {
                _96 = _56[_87];
            }
            else
            {
                _96 = 0.0;
            }
            if (_95)
            {
                _56[_87] = _96 + 50.0;
            }
            if (_87 == A.w)
            {
                break;
            }
            else
            {
                _86 = _87;
                continue;
            }
        }
        float _73;
        if (_66)
        {
            _73 = _56[A.x];
        }
        else
        {
            _73 = 0.0;
        }
        _72 = _73;
    }
    float _80;
    if (A.w < 4u)
    {
        _80 = _34[A.w];
    }
    else
    {
        _80 = 0.0;
    }
    gl_Position.x = _23[min(A.z, 4u)];
    gl_Position.y = _80;
    gl_Position.z = _72;
    gl_Position.w = 1.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 131
; Schema: 0
OpCapability Shader
OpCapability DrawParameters
%75 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %8 %11 %15 %48 %52
OpName %3 "main"
OpName %7 "SV_VertexID"
OpName %8 "SV_InstanceID"
OpName %11 "A"
OpName %15 "SV_Position"
OpDecorate %7 BuiltIn VertexIndex
OpDecorate %8 BuiltIn InstanceIndex
OpDecorate %11 Location 2
OpDecorate %15 BuiltIn Position
OpDecorate %48 BuiltIn BaseInstance
OpDecorate %52 BuiltIn BaseVertex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%16 = OpConstant %5 5
%17 = OpTypeArray %12 %16
%18 = OpConstant %12 1
%19 = OpConstant %12 2
%20 = OpConstant %12 3
%21 = OpConstant %12 4
%22 = OpConstantNull %12
%23 = OpConstantComposite %17 %18 %19 %20 %21 %22
%24 = OpTypePointer Private %17
%25 = OpVariable %24 Private %23
%26 = OpConstant %5 4
%27 = OpTypeArray %12 %26
%28 = OpConstant %12 5
%29 = OpConstant %12 6
%30 = OpConstant %12 7
%31 = OpConstant %12 8
%32 = OpConstantComposite %27 %28 %29 %30 %31
%33 = OpTypePointer Private %27
%34 = OpVariable %33 Private %32
%36 = OpConstant %5 0
%39 = OpConstant %5 1
%42 = OpConstant %5 2
%45 = OpConstant %5 3
%48 = OpVariable %6 Input
%52 = OpVariable %6 Input
%55 = OpTypePointer Function %27
%57 = OpTypePointer Function %12
%59 = OpConstant %12 0
%65 = OpTypeBool
%68 = OpTypePointer Private %12
%81 = OpTypePointer Output %12
%93 = OpConstant %12 60
%98 = OpConstant %12 50
%113 = OpConstantNull %12
%119 = OpConstantNull %12
%125 = OpConstantNull %12
%129 = OpConstantNull %12
%3 = OpFunction %1 None %2
%4 = OpLabel
%56 = OpVariable %55 Function
OpBranch %100
%100 = OpLabel
%35 = OpAccessChain %6 %11 %36
%37 = OpLoad %5 %35
%38 = OpAccessChain %6 %11 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %6 %11 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %6 %11 %45
%46 = OpLoad %5 %44
%47 = OpLoad %5 %8
%49 = OpLoad %5 %48
%50 = OpISub %5 %47 %49
%51 = OpLoad %5 %7
%53 = OpLoad %5 %52
%54 = OpISub %5 %51 %53
%58 = OpInBoundsAccessChain %57 %56 %36
OpStore %58 %59
%60 = OpInBoundsAccessChain %57 %56 %39
OpStore %60 %59
%61 = OpInBoundsAccessChain %57 %56 %42
OpStore %61 %59
%62 = OpInBoundsAccessChain %57 %56 %45
OpStore %62 %59
%63 = OpConvertUToF %12 %54
%66 = OpULessThan %65 %37 %26
%64 = OpInBoundsAccessChain %57 %56 %37
OpSelectionMerge %106 None
OpBranchConditional %66 %105 %106
%105 = OpLabel
OpStore %64 %63
OpBranch %106
%106 = OpLabel
%67 = OpConvertUToF %12 %50
%70 = OpULessThan %65 %40 %26
%69 = OpInBoundsAccessChain %68 %34 %40
OpSelectionMerge %108 None
OpBranchConditional %70 %107 %108
%107 = OpLabel
OpStore %69 %67
OpBranch %108
%108 = OpLabel
%71 = OpIEqual %65 %46 %36
OpSelectionMerge %104 None
OpBranchConditional %71 %104 %101
%101 = OpLabel
OpBranch %102
%102 = OpLabel
%86 = OpPhi %5 %36 %101 %87 %121
%88 = OpBitwiseXor %5 %86 %42
%90 = OpULessThan %65 %88 %26
%89 = OpInBoundsAccessChain %68 %34 %88
OpLoopMerge %103 %109 None
OpBranch %109
%109 = OpLabel
OpSelectionMerge %111 None
OpBranchConditional %90 %110 %111
%110 = OpLabel
%112 = OpLoad %12 %89
OpBranch %111
%111 = OpLabel
%91 = OpPhi %12 %112 %110 %113 %109
%92 = OpFAdd %12 %91 %93
OpSelectionMerge %115 None
OpBranchConditional %90 %114 %115
%114 = OpLabel
OpStore %89 %92
OpBranch %115
%115 = OpLabel
%87 = OpIAdd %5 %86 %39
%95 = OpULessThan %65 %87 %26
%94 = OpInBoundsAccessChain %57 %56 %87
OpSelectionMerge %117 None
OpBranchConditional %95 %116 %117
%116 = OpLabel
%118 = OpLoad %12 %94
OpBranch %117
%117 = OpLabel
%96 = OpPhi %12 %118 %116 %119 %115
%97 = OpFAdd %12 %96 %98
OpSelectionMerge %121 None
OpBranchConditional %95 %120 %121
%120 = OpLabel
OpStore %94 %97
OpBranch %121
%121 = OpLabel
%99 = OpIEqual %65 %87 %46
OpBranchConditional %99 %103 %102
%103 = OpLabel
OpSelectionMerge %123 None
OpBranchConditional %66 %122 %123
%122 = OpLabel
%124 = OpLoad %12 %64
OpBranch %123
%123 = OpLabel
%73 = OpPhi %12 %124 %122 %125 %103
OpBranch %104
%104 = OpLabel
%72 = OpPhi %12 %63 %108 %73 %123
%76 = OpExtInst %5 %75 UMin %43 %26
%74 = OpInBoundsAccessChain %68 %25 %76
%77 = OpLoad %12 %74
%79 = OpULessThan %65 %46 %26
%78 = OpInBoundsAccessChain %68 %34 %46
OpSelectionMerge %127 None
OpBranchConditional %79 %126 %127
%126 = OpLabel
%128 = OpLoad %12 %78
OpBranch %127
%127 = OpLabel
%80 = OpPhi %12 %128 %126 %129 %104
%82 = OpAccessChain %81 %15 %36
OpStore %82 %77
%83 = OpAccessChain %81 %15 %39
OpStore %83 %80
%84 = OpAccessChain %81 %15 %42
OpStore %84 %72
%85 = OpAccessChain %81 %15 %45
OpStore %85 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/alloca.frag
================================================
#version 460

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _14[4];
    _14[0u] = 1u;
    _14[1u] = 2u;
    _14[2u] = 3u;
    _14[3u] = 4u;
    _14[TEXCOORD & 3u] = TEXCOORD;
    SV_Target = ((_14[1u] + _14[0u]) + _14[2u]) + _14[3u];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "TEXCOORD"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%11 = OpConstant %5 4
%12 = OpTypeArray %5 %11
%13 = OpTypePointer Function %12
%15 = OpTypePointer Function %5
%17 = OpConstant %5 0
%18 = OpConstant %5 1
%20 = OpConstant %5 2
%22 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
%14 = OpVariable %13 Function
OpBranch %33
%33 = OpLabel
%10 = OpLoad %5 %7
%16 = OpInBoundsAccessChain %15 %14 %17
OpStore %16 %18
%19 = OpInBoundsAccessChain %15 %14 %18
OpStore %19 %20
%21 = OpInBoundsAccessChain %15 %14 %20
OpStore %21 %22
%23 = OpInBoundsAccessChain %15 %14 %22
OpStore %23 %11
%24 = OpBitwiseAnd %5 %10 %22
%25 = OpInBoundsAccessChain %15 %14 %24
OpStore %25 %10
%26 = OpLoad %5 %16
%27 = OpLoad %5 %19
%28 = OpIAdd %5 %27 %26
%29 = OpLoad %5 %21
%30 = OpIAdd %5 %28 %29
%31 = OpLoad %5 %23
%32 = OpIAdd %5 %30 %31
OpStore %9 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/atomic-bin-op.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

shared uint _9[64];
shared uint _10[64];

void main()
{
    uint _20 = atomicAdd(_9[gl_GlobalInvocationID.x], 1u);
    uint _23 = atomicAnd(_9[gl_GlobalInvocationID.x], 2u);
    uint _24 = atomicExchange(_9[gl_GlobalInvocationID.x], 3u);
    uint _26 = atomicMax(_9[gl_GlobalInvocationID.x], 4u);
    uint _28 = atomicMin(_9[gl_GlobalInvocationID.x], 5u);
    uint _30 = atomicOr(_9[gl_GlobalInvocationID.x], 6u);
    uint _32 = atomicXor(_9[gl_GlobalInvocationID.x], 7u);
    uint _35 = atomicMin(_10[gl_GlobalInvocationID.x], 8u);
    uint _37 = atomicMax(_10[gl_GlobalInvocationID.x], 9u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %13
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %13 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 64
%7 = OpTypeArray %5 %6
%8 = OpTypePointer Workgroup %7
%9 = OpVariable %8 Workgroup
%10 = OpVariable %8 Workgroup
%11 = OpTypeVector %5 3
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpTypePointer Input %5
%16 = OpConstant %5 0
%18 = OpTypePointer Workgroup %5
%21 = OpConstant %5 2
%22 = OpConstant %5 1
%25 = OpConstant %5 3
%27 = OpConstant %5 4
%29 = OpConstant %5 5
%31 = OpConstant %5 6
%33 = OpConstant %5 7
%36 = OpConstant %5 8
%38 = OpConstant %5 9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%15 = OpAccessChain %14 %13 %16
%17 = OpLoad %5 %15
%19 = OpAccessChain %18 %9 %17
%20 = OpAtomicIAdd %5 %19 %21 %16 %22
%23 = OpAtomicAnd %5 %19 %21 %16 %21
%24 = OpAtomicExchange %5 %19 %21 %16 %25
%26 = OpAtomicUMax %5 %19 %21 %16 %27
%28 = OpAtomicUMin %5 %19 %21 %16 %29
%30 = OpAtomicOr %5 %19 %21 %16 %31
%32 = OpAtomicXor %5 %19 %21 %16 %33
%34 = OpAccessChain %18 %10 %17
%35 = OpAtomicSMin %5 %34 %21 %16 %36
%37 = OpAtomicSMax %5 %34 %21 %16 %38
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/atomic-compare-exchange.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

struct CmpXchgResult
{
    uint _m0;
    bool _m1;
};

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[64];

void main()
{
    uint _23 = atomicCompSwap(_12[gl_GlobalInvocationID.x], 20u, 30u);
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(CmpXchgResult(_23, _23 == 20u)._m0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %29 "CmpXchgResult"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 64
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypePointer Workgroup %5
%24 = OpConstant %5 2
%25 = OpConstant %5 30
%26 = OpConstant %5 20
%27 = OpTypeBool
%29 = OpTypeStruct %5 %27
%33 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpAccessChain %21 %12 %20
%23 = OpAtomicCompareExchange %5 %22 %24 %19 %19 %25 %26
%28 = OpIEqual %27 %23 %26
%30 = OpCompositeConstruct %29 %23 %28
%31 = OpCompositeExtract %5 %30 0
%32 = OpShiftLeftLogical %5 %20 %24
%34 = OpCompositeConstruct %33 %31 %31 %31 %31
OpImageWrite %13 %20 %34
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/atomic-compare-exchange.sm66.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

struct CmpXchgResult
{
    uint _m0;
    bool _m1;
};

struct CmpXchgResult_1
{
    uint64_t _m0;
    bool _m1;
};

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    uint64_t _m0[];
} _14;

shared uint _18[64];
shared uint64_t _21[64];

void main()
{
    uint _31 = atomicCompSwap(_18[gl_GlobalInvocationID.x], 20u, 30u);
    uint _40 = atomicCompSwap(_18[gl_GlobalInvocationID.x], 20u, 30u);
    _9._m0[gl_GlobalInvocationID.x] = CmpXchgResult(_31, _31 == 20u)._m0;
    uint64_t _48 = atomicCompSwap(_21[gl_GlobalInvocationID.x], 20ul, 30ul);
    uint64_t _55 = atomicCompSwap(_21[gl_GlobalInvocationID.x], 20ul, 30ul);
    _14._m0[gl_GlobalInvocationID.x] = CmpXchgResult_1(_48, _48 == 20ul)._m0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %24
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %37 "CmpXchgResult"
OpName %52 "CmpXchgResult"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %24 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeInt 64 0
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpConstant %5 64
%16 = OpTypeArray %5 %15
%17 = OpTypePointer Workgroup %16
%18 = OpVariable %17 Workgroup
%19 = OpTypeArray %10 %15
%20 = OpTypePointer Workgroup %19
%21 = OpVariable %20 Workgroup
%22 = OpTypeVector %5 3
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Input %5
%27 = OpConstant %5 0
%29 = OpTypePointer Workgroup %5
%32 = OpConstant %5 2
%33 = OpConstant %5 30
%34 = OpConstant %5 20
%35 = OpTypeBool
%37 = OpTypeStruct %5 %35
%44 = OpTypePointer StorageBuffer %5
%46 = OpTypePointer Workgroup %10
%49 = OpConstant %10 30
%50 = OpConstant %10 20
%52 = OpTypeStruct %10 %35
%59 = OpConstant %5 3
%60 = OpTypePointer StorageBuffer %10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%26 = OpAccessChain %25 %24 %27
%28 = OpLoad %5 %26
%30 = OpAccessChain %29 %18 %28
%31 = OpAtomicCompareExchange %5 %30 %32 %27 %27 %33 %34
%36 = OpIEqual %35 %31 %34
%38 = OpCompositeConstruct %37 %31 %36
%39 = OpCompositeExtract %5 %38 0
%40 = OpAtomicCompareExchange %5 %30 %32 %27 %27 %33 %34
%41 = OpIEqual %35 %40 %34
%42 = OpCompositeConstruct %37 %40 %41
%43 = OpShiftLeftLogical %5 %28 %32
%45 = OpAccessChain %44 %9 %27 %28
OpStore %45 %39
%47 = OpAccessChain %46 %21 %28
%48 = OpAtomicCompareExchange %10 %47 %32 %27 %27 %49 %50
%51 = OpIEqual %35 %48 %50
%53 = OpCompositeConstruct %52 %48 %51
%54 = OpCompositeExtract %10 %53 0
%55 = OpAtomicCompareExchange %10 %47 %32 %27 %27 %49 %50
%56 = OpIEqual %35 %55 %50
%57 = OpCompositeConstruct %52 %55 %56
%58 = OpShiftLeftLogical %5 %28 %59
%61 = OpAccessChain %60 %14 %27 %28
OpStore %61 %54
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/bool-to-fp.frag
================================================
#version 460

layout(location = 0) flat in uint V;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = float(V != 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 20
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "V"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%12 = OpTypeBool
%14 = OpConstant %5 0
%15 = OpConstant %8 0
%16 = OpConstant %8 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %18
%18 = OpLabel
%11 = OpLoad %5 %7
%13 = OpINotEqual %12 %11 %14
%17 = OpSelect %8 %13 %16 %15
OpStore %10 %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/constant-expression-cast.comp
================================================
#version 460
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[4];

void main()
{
    imageStore(_8, int(0u), uvec4(floatBitsToUint(uintBitsToFloat(_12[2u]))));
}

/* WARNINGS:
- Has group shared access, but no group shared barrier anywhere.
*/


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 4
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypePointer Workgroup %5
%16 = OpConstant %5 2
%18 = OpTypeFloat 32
%20 = OpConstant %5 0
%22 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%13 = OpLoad %6 %8
%15 = OpAccessChain %14 %12 %16
%17 = OpLoad %5 %15
%19 = OpBitcast %18 %17
%21 = OpBitcast %5 %19
%23 = OpCompositeConstruct %22 %21 %21 %21 %21
OpImageWrite %13 %20 %23
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/constant-expression-gep.comp
================================================
#version 460
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;

struct CmpXchgResult
{
    uint _m0;
    bool _m1;
};

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[4];

void main()
{
    _12[0u] = gl_GlobalInvocationID.x;
    _12[1u] = gl_GlobalInvocationID.y;
    _12[2u] = gl_GlobalInvocationID.z;
    _12[3u] = 10u;
    barrier();
    uint _35 = atomicAdd(_12[3u], 10u);
    uint _37 = atomicCompSwap(_12[2u], 10u, _35);
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(_12[gl_GlobalInvocationID.y & 3u]));
    imageStore(_8, int(gl_GlobalInvocationID.z), uvec4(_12[1u] + CmpXchgResult(_37, _37 == 10u)._m0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 4 1 1
OpName %3 "main"
OpName %41 "CmpXchgResult"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 4
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypeVector %5 3
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 1
%25 = OpConstant %5 2
%27 = OpTypePointer Workgroup %5
%32 = OpConstant %5 3
%33 = OpConstant %5 10
%34 = OpConstant %5 264
%39 = OpTypeBool
%41 = OpTypeStruct %5 %39
%47 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %53
%53 = OpLabel
%13 = OpLoad %6 %8
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %16 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %17 %16 %25
%26 = OpLoad %5 %24
%28 = OpAccessChain %27 %12 %19
OpStore %28 %20
%29 = OpAccessChain %27 %12 %22
OpStore %29 %23
%30 = OpAccessChain %27 %12 %25
OpStore %30 %26
%31 = OpAccessChain %27 %12 %32
OpStore %31 %33
OpControlBarrier %25 %25 %34
%36 = OpAccessChain %27 %12 %32
%35 = OpAtomicIAdd %5 %36 %25 %19 %33
%38 = OpAccessChain %27 %12 %25
%37 = OpAtomicCompareExchange %5 %38 %25 %19 %19 %35 %33
%40 = OpIEqual %39 %37 %33
%42 = OpCompositeConstruct %41 %37 %40
%43 = OpCompositeExtract %5 %42 0
%44 = OpBitwiseAnd %5 %23 %32
%45 = OpAccessChain %27 %12 %44
%46 = OpLoad %5 %45
%48 = OpCompositeConstruct %47 %46 %46 %46 %46
OpImageWrite %13 %20 %48
%49 = OpAccessChain %27 %12 %22
%50 = OpLoad %5 %49
%51 = OpIAdd %5 %50 %43
%52 = OpCompositeConstruct %47 %51 %51 %51 %51
OpImageWrite %13 %26 %52
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fadd.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = A.x + B.x;
    SV_Target.y = A.y + B.y;
    SV_Target.z = A.z + B.z;
    SV_Target.w = A.w + B.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %6
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%38 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %9 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %12 %8 %15
%27 = OpLoad %5 %26
%28 = OpAccessChain %12 %8 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %12 %8 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %12 %8 %24
%33 = OpLoad %5 %32
%34 = OpFAdd %5 %27 %16
%35 = OpFAdd %5 %29 %19
%36 = OpFAdd %5 %31 %22
%37 = OpFAdd %5 %33 %25
%39 = OpAccessChain %38 %11 %15
OpStore %39 %34
%40 = OpAccessChain %38 %11 %18
OpStore %40 %35
%41 = OpAccessChain %38 %11 %21
OpStore %41 %36
%42 = OpAccessChain %38 %11 %24
OpStore %42 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fast-mul-div-pair.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _11;

void main()
{
    float _19 = uintBitsToFloat(texelFetch(_8, int(1u)).x);
    float _23 = uintBitsToFloat(texelFetch(_8, int(2u)).x);
    imageStore(_11, int(0u), uvec4(floatBitsToUint(_23)));
    float _31 = uintBitsToFloat(texelFetch(_8, int(3u)).x);
    imageStore(_11, int(1u), uvec4(floatBitsToUint(_31)));
    imageStore(_11, int(2u), uvec4(floatBitsToUint(_19 / (uintBitsToFloat(texelFetch(_8, int(4u)).x) * _19))));
    imageStore(_11, int(3u), uvec4(floatBitsToUint(_19 / (uintBitsToFloat(texelFetch(_8, int(5u)).x) * _19))));
    float _54 = uintBitsToFloat(texelFetch(_8, int(6u)).x);
    imageStore(_11, int(4u), uvec4(floatBitsToUint(_54)));
    float _61 = uintBitsToFloat(texelFetch(_8, int(7u)).x);
    imageStore(_11, int(5u), uvec4(floatBitsToUint(_61)));
    imageStore(_11, int(6u), uvec4(floatBitsToUint((_19 / uintBitsToFloat(texelFetch(_8, int(8u)).x)) * _19)));
    imageStore(_11, int(7u), uvec4(floatBitsToUint((_19 / uintBitsToFloat(texelFetch(_8, int(9u)).x)) * _19)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 83
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%14 = OpConstant %5 1
%15 = OpTypeVector %5 4
%18 = OpTypeFloat 32
%20 = OpConstant %5 2
%25 = OpConstant %5 0
%28 = OpConstant %5 3
%35 = OpConstant %5 4
%43 = OpConstant %5 5
%51 = OpConstant %5 6
%58 = OpConstant %5 7
%65 = OpConstant %5 8
%73 = OpConstant %5 9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %81
%81 = OpLabel
%12 = OpLoad %9 %11
%13 = OpLoad %6 %8
%16 = OpImageFetch %15 %13 %14
%17 = OpCompositeExtract %5 %16 0
%19 = OpBitcast %18 %17
%21 = OpImageFetch %15 %13 %20
%22 = OpCompositeExtract %5 %21 0
%23 = OpBitcast %18 %22
%24 = OpFMul %18 %23 %19
%26 = OpBitcast %5 %23
%27 = OpCompositeConstruct %15 %26 %26 %26 %26
OpImageWrite %12 %25 %27
%29 = OpImageFetch %15 %13 %28
%30 = OpCompositeExtract %5 %29 0
%31 = OpBitcast %18 %30
%32 = OpFMul %18 %31 %19
%33 = OpBitcast %5 %31
%34 = OpCompositeConstruct %15 %33 %33 %33 %33
OpImageWrite %12 %14 %34
%36 = OpImageFetch %15 %13 %35
%37 = OpCompositeExtract %5 %36 0
%38 = OpBitcast %18 %37
%39 = OpFMul %18 %38 %19
%40 = OpFDiv %18 %19 %39
%41 = OpBitcast %5 %40
%42 = OpCompositeConstruct %15 %41 %41 %41 %41
OpImageWrite %12 %20 %42
%44 = OpImageFetch %15 %13 %43
%45 = OpCompositeExtract %5 %44 0
%46 = OpBitcast %18 %45
%47 = OpFMul %18 %46 %19
%48 = OpFDiv %18 %19 %47
%49 = OpBitcast %5 %48
%50 = OpCompositeConstruct %15 %49 %49 %49 %49
OpImageWrite %12 %28 %50
%52 = OpImageFetch %15 %13 %51
%53 = OpCompositeExtract %5 %52 0
%54 = OpBitcast %18 %53
%55 = OpFDiv %18 %54 %19
%56 = OpBitcast %5 %54
%57 = OpCompositeConstruct %15 %56 %56 %56 %56
OpImageWrite %12 %35 %57
%59 = OpImageFetch %15 %13 %58
%60 = OpCompositeExtract %5 %59 0
%61 = OpBitcast %18 %60
%62 = OpFDiv %18 %61 %19
%63 = OpBitcast %5 %61
%64 = OpCompositeConstruct %15 %63 %63 %63 %63
OpImageWrite %12 %43 %64
%66 = OpImageFetch %15 %13 %65
%67 = OpCompositeExtract %5 %66 0
%68 = OpBitcast %18 %67
%69 = OpFDiv %18 %19 %68
%70 = OpFMul %18 %69 %19
%71 = OpBitcast %5 %70
%72 = OpCompositeConstruct %15 %71 %71 %71 %71
OpImageWrite %12 %51 %72
%74 = OpImageFetch %15 %13 %73
%75 = OpCompositeExtract %5 %74 0
%76 = OpBitcast %18 %75
%77 = OpFDiv %18 %19 %76
%78 = OpFMul %18 %77 %19
%79 = OpBitcast %5 %78
%80 = OpCompositeConstruct %15 %79 %79 %79 %79
OpImageWrite %12 %58 %80
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fcmp_eq.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (A.x == A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpFOrdEqual %25 %15 %18
%27 = OpSelect %5 %26 %21 %24
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fcmp_ge.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (A.x >= A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpFOrdGreaterThanEqual %25 %15 %18
%27 = OpSelect %5 %26 %21 %24
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fcmp_gt.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (A.x > A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpFOrdGreaterThan %25 %15 %18
%27 = OpSelect %5 %26 %21 %24
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fcmp_le.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (A.x <= A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpFOrdLessThanEqual %25 %15 %18
%27 = OpSelect %5 %26 %21 %24
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fcmp_lt.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (A.x < A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpFOrdLessThan %25 %15 %18
%27 = OpSelect %5 %26 %21 %24
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fcmp_ne.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (A.x != A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpFUnordNotEqual %25 %15 %18
%27 = OpSelect %5 %26 %21 %24
OpStore %10 %27
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fdiv.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = A.x / B.x;
    SV_Target.y = A.y / B.y;
    SV_Target.z = A.z / B.z;
    SV_Target.w = A.w / B.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %6
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%38 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %9 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %12 %8 %15
%27 = OpLoad %5 %26
%28 = OpAccessChain %12 %8 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %12 %8 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %12 %8 %24
%33 = OpLoad %5 %32
%34 = OpFDiv %5 %27 %16
%35 = OpFDiv %5 %29 %19
%36 = OpFDiv %5 %31 %22
%37 = OpFDiv %5 %33 %25
%39 = OpAccessChain %38 %11 %15
OpStore %39 %34
%40 = OpAccessChain %38 %11 %18
OpStore %40 %35
%41 = OpAccessChain %38 %11 %21
OpStore %41 %36
%42 = OpAccessChain %38 %11 %24
OpStore %42 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fmul.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = A.x * B.x;
    SV_Target.y = A.y * B.y;
    SV_Target.z = A.z * B.z;
    SV_Target.w = A.w * B.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %6
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%38 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %9 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %12 %8 %15
%27 = OpLoad %5 %26
%28 = OpAccessChain %12 %8 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %12 %8 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %12 %8 %24
%33 = OpLoad %5 %32
%34 = OpFMul %5 %27 %16
%35 = OpFMul %5 %29 %19
%36 = OpFMul %5 %31 %22
%37 = OpFMul %5 %33 %25
%39 = OpAccessChain %38 %11 %15
OpStore %39 %34
%40 = OpAccessChain %38 %11 %18
OpStore %40 %35
%41 = OpAccessChain %38 %11 %21
OpStore %41 %36
%42 = OpAccessChain %38 %11 %24
OpStore %42 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/frem.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = A.x - B.x * trunc(A.x / B.x);
    SV_Target.y = A.y - B.y * trunc(A.y / B.y);
    SV_Target.z = A.z - B.z * trunc(A.z / B.z);
    SV_Target.w = A.w - B.w * trunc(A.w / B.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %6
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%38 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %9 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %12 %8 %15
%27 = OpLoad %5 %26
%28 = OpAccessChain %12 %8 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %12 %8 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %12 %8 %24
%33 = OpLoad %5 %32
%34 = OpFRem %5 %27 %16
%35 = OpFRem %5 %29 %19
%36 = OpFRem %5 %31 %22
%37 = OpFRem %5 %33 %25
%39 = OpAccessChain %38 %11 %15
OpStore %39 %34
%40 = OpAccessChain %38 %11 %18
OpStore %40 %35
%41 = OpAccessChain %38 %11 %21
OpStore %41 %36
%42 = OpAccessChain %38 %11 %24
OpStore %42 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/fsub.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = A.x - B.x;
    SV_Target.y = A.y - B.y;
    SV_Target.z = A.z - B.z;
    SV_Target.w = A.w - B.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %11 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpTypePointer Output %6
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%38 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %43
%43 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %9 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %12 %8 %15
%27 = OpLoad %5 %26
%28 = OpAccessChain %12 %8 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %12 %8 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %12 %8 %24
%33 = OpLoad %5 %32
%34 = OpFSub %5 %27 %16
%35 = OpFSub %5 %29 %19
%36 = OpFSub %5 %31 %22
%37 = OpFSub %5 %33 %25
%39 = OpAccessChain %38 %11 %15
OpStore %39 %34
%40 = OpAccessChain %38 %11 %18
OpStore %40 %35
%41 = OpAccessChain %38 %11 %21
OpStore %41 %36
%42 = OpAccessChain %38 %11 %24
OpStore %42 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/glitched-integer-width.comp
================================================
#version 460
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _13_15
{
    vec4 _m0[1];
} _15;

layout(set = 0, binding = 0) uniform writeonly image2DArray _8;

void main()
{
    uint _24 = floatBitsToUint(_15._m0[0u]).x + 4294967295u;
    bool _38;
    if (_24 < 11u)
    {
        _38 = bitfieldExtract((bitfieldExtract(440u, int(0u), int(11u)) >> bitfieldExtract(bitfieldExtract(_24, int(0u), int(11u)), int(0u), int(11u))) & 1u, int(0u), int(11u)) != bitfieldExtract(0u, int(0u), int(11u));
    }
    else
    {
        _38 = true;
    }
    if (_38)
    {
        imageStore(_8, ivec3(uvec3(1u)), vec4(0.0));
    }
    else
    {
        imageStore(_8, ivec3(uvec3(1u)), vec4(1.0));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpName %13 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 ArrayStride 16
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 1 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeVector %5 4
%12 = OpTypeArray %11 %10
%13 = OpTypeStruct %12
%14 = OpTypePointer Uniform %13
%15 = OpVariable %14 Uniform
%17 = OpConstant %9 0
%18 = OpTypePointer Uniform %11
%21 = OpTypeVector %9 4
%25 = OpConstant %9 4294967295
%26 = OpTypeBool
%28 = OpConstant %9 11
%31 = OpConstant %9 440
%39 = OpConstantTrue %26
%40 = OpConstant %5 0
%41 = OpTypeVector %9 3
%44 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %47
%47 = OpLabel
%16 = OpLoad %6 %8
%19 = OpAccessChain %18 %15 %17 %17
%20 = OpLoad %11 %19
%22 = OpBitcast %21 %20
%23 = OpCompositeExtract %9 %22 0
%24 = OpIAdd %9 %23 %25
%27 = OpULessThan %26 %24 %28
OpSelectionMerge %49 None
OpBranchConditional %27 %48 %49
%48 = OpLabel
%29 = OpBitFieldUExtract %9 %24 %17 %28
%32 = OpBitFieldUExtract %9 %31 %17 %28
%33 = OpBitFieldUExtract %9 %29 %17 %28
%30 = OpShiftRightLogical %9 %32 %33
%34 = OpBitwiseAnd %9 %30 %10
%36 = OpBitFieldUExtract %9 %34 %17 %28
%37 = OpBitFieldUExtract %9 %17 %17 %28
%35 = OpINotEqual %26 %36 %37
OpBranch %49
%49 = OpLabel
%38 = OpPhi %26 %39 %47 %35 %48
OpSelectionMerge %52 None
OpBranchConditional %38 %51 %50
%51 = OpLabel
%42 = OpCompositeConstruct %41 %10 %10 %10
%43 = OpCompositeConstruct %11 %40 %40 %40 %40
OpImageWrite %16 %42 %43
OpBranch %52
%50 = OpLabel
%45 = OpCompositeConstruct %41 %10 %10 %10
%46 = OpCompositeConstruct %11 %44 %44 %44 %44
OpImageWrite %16 %45 %46
OpBranch %52
%52 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/groupshared.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared float _13[256];
shared float _17[128];

void main()
{
    float _25 = float(gl_LocalInvocationIndex + 1u);
    _13[0u + ((0u + (gl_LocalInvocationIndex * 2u)) * 2u)] = float(gl_LocalInvocationIndex);
    _13[1u + ((0u + (gl_LocalInvocationIndex * 2u)) * 2u)] = _25;
    float _40 = float(gl_LocalInvocationIndex + 2u);
    _13[0u + ((1u + (gl_LocalInvocationIndex * 2u)) * 2u)] = _25;
    _13[1u + ((1u + (gl_LocalInvocationIndex * 2u)) * 2u)] = _40;
    _17[0u + (gl_LocalInvocationIndex * 2u)] = _40;
    _17[1u + (gl_LocalInvocationIndex * 2u)] = float(gl_LocalInvocationIndex + 3u);
    barrier();
    uint _61 = gl_LocalInvocationIndex ^ 1u;
    uint _74 = gl_LocalInvocationIndex ^ 2u;
    uint _89 = gl_LocalInvocationIndex ^ 4u;
    uint _101 = gl_LocalInvocationIndex * 2u;
    imageStore(_8, int(_101), uvec4(floatBitsToUint((_13[0u + ((1u + (_74 * 2u)) * 2u)] * _13[0u + ((0u + (_61 * 2u)) * 2u)]) * _17[0u + (_89 * 2u)])));
    imageStore(_8, int(_101 + 1u), uvec4(floatBitsToUint((_13[1u + ((1u + (_74 * 2u)) * 2u)] * _13[1u + ((0u + (_61 * 2u)) * 2u)]) * _17[1u + (_89 * 2u)])));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 110
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %20
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %20 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 256
%10 = OpTypeFloat 32
%11 = OpTypeArray %10 %9
%12 = OpTypePointer Workgroup %11
%13 = OpVariable %12 Workgroup
%14 = OpConstant %5 128
%15 = OpTypeArray %10 %14
%16 = OpTypePointer Workgroup %15
%17 = OpVariable %16 Workgroup
%19 = OpTypePointer Input %5
%20 = OpVariable %19 Input
%24 = OpConstant %5 1
%27 = OpConstant %5 2
%29 = OpConstant %5 0
%32 = OpTypePointer Workgroup %10
%52 = OpConstant %5 3
%60 = OpConstant %5 264
%90 = OpConstant %5 4
%104 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %108
%108 = OpLabel
%18 = OpLoad %6 %8
%21 = OpLoad %5 %20
%22 = OpConvertUToF %10 %21
%23 = OpIAdd %5 %21 %24
%25 = OpConvertUToF %10 %23
%26 = OpIMul %5 %21 %27
%28 = OpIAdd %5 %29 %26
%30 = OpIMul %5 %28 %27
%31 = OpIAdd %5 %29 %30
%33 = OpAccessChain %32 %13 %31
OpStore %33 %22
%34 = OpIMul %5 %21 %27
%35 = OpIAdd %5 %29 %34
%36 = OpIMul %5 %35 %27
%37 = OpIAdd %5 %24 %36
%38 = OpAccessChain %32 %13 %37
OpStore %38 %25
%39 = OpIAdd %5 %21 %27
%40 = OpConvertUToF %10 %39
%41 = OpIMul %5 %21 %27
%42 = OpIAdd %5 %24 %41
%43 = OpIMul %5 %42 %27
%44 = OpIAdd %5 %29 %43
%45 = OpAccessChain %32 %13 %44
OpStore %45 %25
%46 = OpIMul %5 %21 %27
%47 = OpIAdd %5 %24 %46
%48 = OpIMul %5 %47 %27
%49 = OpIAdd %5 %24 %48
%50 = OpAccessChain %32 %13 %49
OpStore %50 %40
%51 = OpIAdd %5 %21 %52
%53 = OpConvertUToF %10 %51
%54 = OpIMul %5 %21 %27
%55 = OpIAdd %5 %29 %54
%56 = OpAccessChain %32 %17 %55
OpStore %56 %40
%57 = OpIMul %5 %21 %27
%58 = OpIAdd %5 %24 %57
%59 = OpAccessChain %32 %17 %58
OpStore %59 %53
OpControlBarrier %27 %27 %60
%61 = OpBitwiseXor %5 %21 %24
%62 = OpIMul %5 %61 %27
%63 = OpIAdd %5 %29 %62
%64 = OpIMul %5 %63 %27
%65 = OpIAdd %5 %29 %64
%66 = OpAccessChain %32 %13 %65
%67 = OpLoad %10 %66
%68 = OpIMul %5 %61 %27
%69 = OpIAdd %5 %29 %68
%70 = OpIMul %5 %69 %27
%71 = OpIAdd %5 %24 %70
%72 = OpAccessChain %32 %13 %71
%73 = OpLoad %10 %72
%74 = OpBitwiseXor %5 %21 %27
%75 = OpIMul %5 %74 %27
%76 = OpIAdd %5 %24 %75
%77 = OpIMul %5 %76 %27
%78 = OpIAdd %5 %29 %77
%79 = OpAccessChain %32 %13 %78
%80 = OpLoad %10 %79
%81 = OpIMul %5 %74 %27
%82 = OpIAdd %5 %24 %81
%83 = OpIMul %5 %82 %27
%84 = OpIAdd %5 %24 %83
%85 = OpAccessChain %32 %13 %84
%86 = OpLoad %10 %85
%87 = OpFMul %10 %80 %67
%88 = OpFMul %10 %86 %73
%89 = OpBitwiseXor %5 %21 %90
%91 = OpIMul %5 %89 %27
%92 = OpIAdd %5 %29 %91
%93 = OpAccessChain %32 %17 %92
%94 = OpLoad %10 %93
%95 = OpIMul %5 %89 %27
%96 = OpIAdd %5 %24 %95
%97 = OpAccessChain %32 %17 %96
%98 = OpLoad %10 %97
%99 = OpFMul %10 %87 %94
%100 = OpFMul %10 %88 %98
%101 = OpIMul %5 %21 %27
%102 = OpBitcast %5 %99
%103 = OpBitcast %5 %100
%105 = OpCompositeConstruct %104 %102 %102 %102 %102
OpImageWrite %18 %101 %105
%106 = OpCompositeConstruct %104 %103 %103 %103 %103
%107 = OpIAdd %5 %101 %24
OpImageWrite %18 %107 %106
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_eq.frag
================================================
#version 460

layout(location = 0) flat in uvec4 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x == A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%22 = OpConstant %5 3
%24 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%25 = OpIEqual %24 %14 %17
%26 = OpSelect %5 %25 %20 %23
OpStore %10 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_ne.frag
================================================
#version 460

layout(location = 0) flat in uvec4 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x != A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%22 = OpConstant %5 3
%24 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%25 = OpINotEqual %24 %14 %17
%26 = OpSelect %5 %25 %20 %23
OpStore %10 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_sge.frag
================================================
#version 460

layout(location = 0) flat in ivec4 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int((int(uint(A.x)) >= int(uint(A.y))) ? uint(A.z) : uint(A.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%26 = OpConstant %13 3
%29 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpAccessChain %11 %8 %26
%27 = OpLoad %5 %25
%28 = OpBitcast %13 %27
%30 = OpSGreaterThanEqual %29 %16 %20
%31 = OpSelect %13 %30 %24 %28
%32 = OpBitcast %5 %31
OpStore %10 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_sgt.frag
================================================
#version 460

layout(location = 0) flat in ivec4 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int((int(uint(A.x)) > int(uint(A.y))) ? uint(A.z) : uint(A.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%26 = OpConstant %13 3
%29 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpAccessChain %11 %8 %26
%27 = OpLoad %5 %25
%28 = OpBitcast %13 %27
%30 = OpSGreaterThan %29 %16 %20
%31 = OpSelect %13 %30 %24 %28
%32 = OpBitcast %5 %31
OpStore %10 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_sle.frag
================================================
#version 460

layout(location = 0) flat in ivec4 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int((int(uint(A.x)) <= int(uint(A.y))) ? uint(A.z) : uint(A.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%26 = OpConstant %13 3
%29 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpAccessChain %11 %8 %26
%27 = OpLoad %5 %25
%28 = OpBitcast %13 %27
%30 = OpSLessThanEqual %29 %16 %20
%31 = OpSelect %13 %30 %24 %28
%32 = OpBitcast %5 %31
OpStore %10 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_slt.frag
================================================
#version 460

layout(location = 0) flat in ivec4 A;
layout(location = 0) out int SV_Target;

void main()
{
    SV_Target = int((int(uint(A.x)) < int(uint(A.y))) ? uint(A.z) : uint(A.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%26 = OpConstant %13 3
%29 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpAccessChain %11 %8 %26
%27 = OpLoad %5 %25
%28 = OpBitcast %13 %27
%30 = OpSLessThan %29 %16 %20
%31 = OpSelect %13 %30 %24 %28
%32 = OpBitcast %5 %31
OpStore %10 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_uge.frag
================================================
#version 460

layout(location = 0) flat in uvec4 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x >= A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%22 = OpConstant %5 3
%24 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%25 = OpUGreaterThanEqual %24 %14 %17
%26 = OpSelect %5 %25 %20 %23
OpStore %10 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_ugt.frag
================================================
#version 460

layout(location = 0) flat in uvec4 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x > A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%22 = OpConstant %5 3
%24 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%25 = OpUGreaterThan %24 %14 %17
%26 = OpSelect %5 %25 %20 %23
OpStore %10 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_ule.frag
================================================
#version 460

layout(location = 0) flat in uvec4 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x <= A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%22 = OpConstant %5 3
%24 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%25 = OpULessThanEqual %24 %14 %17
%26 = OpSelect %5 %25 %20 %23
OpStore %10 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/icmp_ult.frag
================================================
#version 460

layout(location = 0) flat in uvec4 A;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = (A.x < A.y) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpConstant %5 0
%16 = OpConstant %5 1
%19 = OpConstant %5 2
%22 = OpConstant %5 3
%24 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%12 = OpAccessChain %11 %8 %13
%14 = OpLoad %5 %12
%15 = OpAccessChain %11 %8 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %11 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%25 = OpULessThan %24 %14 %17
%26 = OpSelect %5 %25 %20 %23
OpStore %10 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/logical-and.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    float _28;
    if (isnan(A.x))
    {
        float frontier_phi_3_1_ladder;
        if (isnan(A.y))
        {
            frontier_phi_3_1_ladder = A.z;
        }
        else
        {
            frontier_phi_3_1_ladder = A.w;
        }
        _28 = frontier_phi_3_1_ladder;
    }
    else
    {
        _28 = A.w;
    }
    SV_Target = _28;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpName %29 "frontier_phi_3.1.ladder"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 3
%19 = OpTypeBool
%22 = OpConstant %13 2
%25 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%20 = OpIsNan %19 %15
OpSelectionMerge %35 None
OpBranchConditional %20 %32 %31
%32 = OpLabel
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %11 %8 %25
%26 = OpLoad %5 %24
%27 = OpIsNan %19 %26
OpSelectionMerge %34 None
OpBranchConditional %27 %34 %33
%33 = OpLabel
OpBranch %34
%34 = OpLabel
%29 = OpPhi %5 %23 %32 %18 %33
OpBranch %35
%31 = OpLabel
OpBranch %35
%35 = OpLabel
%28 = OpPhi %5 %18 %31 %29 %34
OpStore %10 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/logical-equal.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (isnan(A.x) != isnan(A.y)) ? A.w : A.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpIsNan %25 %15
%27 = OpIsNan %25 %18
%28 = OpLogicalNotEqual %25 %26 %27
%29 = OpSelect %5 %28 %24 %21
OpStore %10 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/logical-not-equal.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = (isnan(A.x) != isnan(A.y)) ? A.z : A.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%25 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%26 = OpIsNan %25 %15
%27 = OpIsNan %25 %18
%28 = OpLogicalNotEqual %25 %26 %27
%29 = OpSelect %5 %28 %21 %24
OpStore %10 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/logical-or.frag
================================================
#version 460

layout(location = 0) in vec4 A;
layout(location = 0) out float SV_Target;

void main()
{
    float _21;
    if (isnan(A.x))
    {
        _21 = A.z;
    }
    else
    {
        _21 = isnan(A.y) ? A.z : A.w;
    }
    SV_Target = _21;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %5
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 2
%19 = OpTypeBool
%24 = OpConstant %13 3
%27 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%20 = OpIsNan %19 %15
OpSelectionMerge %32 None
OpBranchConditional %20 %32 %31
%31 = OpLabel
%23 = OpAccessChain %11 %8 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %11 %8 %27
%28 = OpLoad %5 %26
%29 = OpIsNan %19 %28
%22 = OpSelect %5 %29 %18 %25
OpBranch %32
%32 = OpLabel
%21 = OpPhi %5 %18 %30 %22 %31
OpStore %10 %21
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/lut.frag
================================================
#version 460

const uint _29[16] = uint[](1u, 4294967295u, 2u, 4294967294u, 3u, 4294967293u, 5u, 4294967291u, 7u, 4294967289u, 11u, 4294967285u, 13u, 4294967283u, 17u, 4294967279u);

layout(location = 0) flat in uint TEXCOORD;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    SV_Target.x = _29[0u + (TEXCOORD * 2u)];
    SV_Target.y = _29[1u + (TEXCOORD * 2u)];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "TEXCOORD"
OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 2
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%11 = OpConstant %5 16
%12 = OpTypeArray %5 %11
%13 = OpConstant %5 1
%14 = OpConstant %5 4294967295
%15 = OpConstant %5 2
%16 = OpConstant %5 4294967294
%17 = OpConstant %5 3
%18 = OpConstant %5 4294967293
%19 = OpConstant %5 5
%20 = OpConstant %5 4294967291
%21 = OpConstant %5 7
%22 = OpConstant %5 4294967289
%23 = OpConstant %5 11
%24 = OpConstant %5 4294967285
%25 = OpConstant %5 13
%26 = OpConstant %5 4294967283
%27 = OpConstant %5 17
%28 = OpConstant %5 4294967279
%29 = OpConstantComposite %12 %13 %14 %15 %16 %17 %18 %19 %20 %21 %22 %23 %24 %25 %26 %27 %28
%30 = OpTypePointer Private %12
%31 = OpVariable %30 Private %29
%35 = OpConstant %5 0
%36 = OpTypePointer Private %5
%43 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%32 = OpLoad %5 %7
%33 = OpIMul %5 %32 %15
%34 = OpIAdd %5 %35 %33
%37 = OpAccessChain %36 %31 %34
%38 = OpLoad %5 %37
%39 = OpIMul %5 %32 %15
%40 = OpIAdd %5 %13 %39
%41 = OpAccessChain %36 %31 %40
%42 = OpLoad %5 %41
%44 = OpAccessChain %43 %10 %35
OpStore %44 %38
%45 = OpAccessChain %43 %10 %13
OpStore %45 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/min16-phi.sm60.comp
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform mediump texture2D _8;
layout(set = 0, binding = 1) uniform mediump texture2D _9;
layout(set = 0, binding = 2) uniform mediump texture2D _10;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _14;

void main()
{
    mediump float _45;
    mediump float _48;
    mediump float _51;
    mediump float _54;
    if (gl_GlobalInvocationID.x < 20u)
    {
        mediump vec4 _36 = texelFetch(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z));
        _45 = _36.x;
        _48 = _36.y;
        _51 = _36.z;
        _54 = _36.w;
    }
    else
    {
        mediump float frontier_phi_3_2_ladder;
        mediump float frontier_phi_3_2_ladder_1;
        mediump float frontier_phi_3_2_ladder_2;
        mediump float frontier_phi_3_2_ladder_3;
        if (gl_GlobalInvocationID.y < 40u)
        {
            mediump vec4 _72 = texelFetch(_9, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z));
            frontier_phi_3_2_ladder = _72.x;
            frontier_phi_3_2_ladder_1 = _72.y;
            frontier_phi_3_2_ladder_2 = _72.z;
            frontier_phi_3_2_ladder_3 = _72.w;
        }
        else
        {
            mediump vec4 _74 = texelFetch(_10, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z));
            frontier_phi_3_2_ladder = _74.x;
            frontier_phi_3_2_ladder_1 = _74.y;
            frontier_phi_3_2_ladder_2 = _74.z;
            frontier_phi_3_2_ladder_3 = _74.w;
        }
        _45 = frontier_phi_3_2_ladder;
        _48 = frontier_phi_3_2_ladder_1;
        _51 = frontier_phi_3_2_ladder_2;
        _54 = frontier_phi_3_2_ladder_3;
    }
    uint _57 = gl_GlobalInvocationID.x * 4u;
    imageStore(_14, int(_57), uvec4(floatBitsToUint(_45)));
    imageStore(_14, int(_57 + 1u), uvec4(floatBitsToUint(_48)));
    imageStore(_14, int(_57 + 2u), uvec4(floatBitsToUint(_51)));
    imageStore(_14, int(_57 + 3u), uvec4(floatBitsToUint(_54)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 88
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %21
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %76 "frontier_phi_3.2.ladder"
OpName %77 "frontier_phi_3.2.ladder"
OpName %78 "frontier_phi_3.2.ladder"
OpName %79 "frontier_phi_3.2.ladder"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 RelaxedPrecision
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %10 RelaxedPrecision
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonReadable
OpDecorate %21 BuiltIn GlobalInvocationId
OpDecorate %36 RelaxedPrecision
OpDecorate %39 RelaxedPrecision
OpDecorate %40 RelaxedPrecision
OpDecorate %41 RelaxedPrecision
OpDecorate %42 RelaxedPrecision
OpDecorate %72 RelaxedPrecision
OpDecorate %46 RelaxedPrecision
OpDecorate %49 RelaxedPrecision
OpDecorate %52 RelaxedPrecision
OpDecorate %55 RelaxedPrecision
OpDecorate %74 RelaxedPrecision
OpDecorate %47 RelaxedPrecision
OpDecorate %50 RelaxedPrecision
OpDecorate %53 RelaxedPrecision
OpDecorate %56 RelaxedPrecision
OpDecorate %76 RelaxedPrecision
OpDecorate %77 RelaxedPrecision
OpDecorate %78 RelaxedPrecision
OpDecorate %79 RelaxedPrecision
OpDecorate %45 RelaxedPrecision
OpDecorate %48 RelaxedPrecision
OpDecorate %51 RelaxedPrecision
OpDecorate %54 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeInt 32 0
%12 = OpTypeImage %11 Buffer 0 0 0 2 R32ui
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%19 = OpTypeVector %11 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypePointer Input %11
%24 = OpConstant %11 0
%27 = OpConstant %11 1
%30 = OpConstant %11 2
%32 = OpTypeBool
%34 = OpConstant %11 20
%35 = OpTypeVector %5 4
%37 = OpTypeVector %11 2
%44 = OpConstant %11 40
%58 = OpConstant %11 4
%63 = OpTypeVector %11 4
%71 = OpConstant %11 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%15 = OpLoad %12 %14
%16 = OpLoad %6 %10
%17 = OpLoad %6 %9
%18 = OpLoad %6 %8
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %11 %23
%26 = OpAccessChain %22 %21 %27
%28 = OpLoad %11 %26
%29 = OpAccessChain %22 %21 %30
%31 = OpLoad %11 %29
%33 = OpULessThan %32 %25 %34
OpSelectionMerge %86 None
OpBranchConditional %33 %85 %81
%85 = OpLabel
%38 = OpCompositeConstruct %37 %25 %28
%36 = OpImageFetch %35 %18 %38 Lod %31
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeExtract %5 %36 1
%41 = OpCompositeExtract %5 %36 2
%42 = OpCompositeExtract %5 %36 3
OpBranch %86
%81 = OpLabel
%43 = OpULessThan %32 %28 %44
OpSelectionMerge %84 None
OpBranchConditional %43 %83 %82
%83 = OpLabel
%73 = OpCompositeConstruct %37 %25 %28
%72 = OpImageFetch %35 %17 %73 Lod %31
%46 = OpCompositeExtract %5 %72 0
%49 = OpCompositeExtract %5 %72 1
%52 = OpCompositeExtract %5 %72 2
%55 = OpCompositeExtract %5 %72 3
OpBranch %84
%82 = OpLabel
%75 = OpCompositeConstruct %37 %25 %28
%74 = OpImageFetch %35 %16 %75 Lod %31
%47 = OpCompositeExtract %5 %74 0
%50 = OpCompositeExtract %5 %74 1
%53 = OpCompositeExtract %5 %74 2
%56 = OpCompositeExtract %5 %74 3
OpBranch %84
%84 = OpLabel
%76 = OpPhi %5 %46 %83 %47 %82
%77 = OpPhi %5 %49 %83 %50 %82
%78 = OpPhi %5 %52 %83 %53 %82
%79 = OpPhi %5 %55 %83 %56 %82
OpBranch %86
%86 = OpLabel
%45 = OpPhi %5 %39 %85 %76 %84
%48 = OpPhi %5 %40 %85 %77 %84
%51 = OpPhi %5 %41 %85 %78 %84
%54 = OpPhi %5 %42 %85 %79 %84
%57 = OpIMul %11 %25 %58
%59 = OpBitcast %11 %45
%60 = OpBitcast %11 %48
%61 = OpBitcast %11 %51
%62 = OpBitcast %11 %54
%64 = OpCompositeConstruct %63 %59 %59 %59 %59
OpImageWrite %15 %57 %64
%65 = OpCompositeConstruct %63 %60 %60 %60 %60
%66 = OpIAdd %11 %57 %27
OpImageWrite %15 %66 %65
%67 = OpCompositeConstruct %63 %61 %61 %61 %61
%68 = OpIAdd %11 %57 %30
OpImageWrite %15 %68 %67
%69 = OpCompositeConstruct %63 %62 %62 %62 %62
%70 = OpIAdd %11 %57 %71
OpImageWrite %15 %70 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/precise_math.frag
================================================
#version 460

layout(location = 0) in float A;
layout(location = 0, component = 1) in float B;
layout(location = 0, component = 2) in float C;
layout(location = 0) out float SV_Target;

void main()
{
    precise float _15 = B * A;
    precise float _16 = C + _15;
    precise float _17 = A + _16;
    precise float _18 = _17 - B;
    precise float _19 = C * _18;
    SV_Target = _19;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %8 %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "A"
OpName %8 "B"
OpName %9 "C"
OpName %11 "SV_Target"
OpDecorate %7 Location 0
OpDecorate %8 Location 0
OpDecorate %8 Component 1
OpDecorate %9 Location 0
OpDecorate %9 Component 2
OpDecorate %11 Location 0
OpDecorate %15 NoContraction
OpDecorate %16 NoContraction
OpDecorate %17 NoContraction
OpDecorate %18 NoContraction
OpDecorate %19 NoContraction
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpVariable %6 Input
%10 = OpTypePointer Output %5
%11 = OpVariable %10 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%12 = OpLoad %5 %9
%13 = OpLoad %5 %8
%14 = OpLoad %5 %7
%15 = OpFMul %5 %13 %14
%16 = OpFAdd %5 %12 %15
%17 = OpFAdd %5 %14 %16
%18 = OpFSub %5 %17 %13
%19 = OpFMul %5 %12 %18
OpStore %11 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/llvm-builtin/zext-bool.frag
================================================
#version 460

layout(location = 0) flat in uint V;
layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(V != 40u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 19
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "V"
OpName %9 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%11 = OpTypeBool
%13 = OpConstant %5 40
%14 = OpConstant %5 0
%15 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %17
%17 = OpLabel
%10 = OpLoad %5 %7
%12 = OpINotEqual %11 %10 %13
%16 = OpSelect %5 %12 %15 %14
OpStore %9 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent-promotion.bindless.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _13[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _16_19
{
    uvec4 _m0[];
} _19[];

layout(set = 4, binding = 0, std430) coherent buffer _21_24
{
    uvec4 _m0[];
} _24[];

layout(set = 4, binding = 0, std430) readonly buffer _26_29
{
    uvec4 _m0[];
} _29[];

layout(set = 4, binding = 0, std430) writeonly buffer _31_34
{
    uvec4 _m0[];
} _34[];

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _42[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _65 = registers._m1 + 1u;
    _24[registers._m4]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_42[registers._m5]._m0[0u]).x == 0u))
    {
        uint _107 = 0u;
        uint _111;
        uint _114;
        bool _116;
        for (;;)
        {
            _111 = (_107 << 8u) + gl_LocalInvocationIndex;
            _114 = _13[registers._m1]._m0[_111];
            groupMemoryBarrier();
            barrier();
            _116 = _114 < 256u;
            if (_116)
            {
                vec4 _120 = uintBitsToFloat(_19[_65]._m0[_111]);
                vec4 _127 = uintBitsToFloat(_24[registers._m4]._m0[_114]);
                _24[registers._m4]._m0[_114] = uvec4(floatBitsToUint(_127.x + _120.x), floatBitsToUint(_127.y + _120.y), floatBitsToUint(_127.z + _120.z), floatBitsToUint(_127.w + _120.w));
            }
            uint _108 = _107 + 1u;
            if (_108 < floatBitsToUint(_42[registers._m5]._m0[0u]).x)
            {
                _107 = _108;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _96 = uintBitsToFloat(_29[registers._m4 + 1u]._m0[gl_LocalInvocationIndex]);
    _34[registers._m4 + 2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_96.x), floatBitsToUint(_96.y), floatBitsToUint(_96.z), floatBitsToUint(_96.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 157
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %76
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %10 "SSBO"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "SSBO"
OpName %39 "BindlessCBV"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %9 ArrayStride 4
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %13 DescriptorSet 1
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 4
OpDecorate %24 Binding 0
OpDecorate %24 Coherent
OpDecorate %25 ArrayStride 16
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %29 NonWritable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 4
OpDecorate %34 Binding 0
OpDecorate %34 NonReadable
OpDecorate %38 ArrayStride 16
OpDecorate %39 Block
OpMemberDecorate %39 0 Offset 0
OpDecorate %42 DescriptorSet 5
OpDecorate %42 Binding 0
OpDecorate %76 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeRuntimeArray %5
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeVector %5 4
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %14
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %14
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %14
%31 = OpTypeStruct %30
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%37 = OpConstant %5 4096
%38 = OpTypeArray %36 %37
%39 = OpTypeStruct %38
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer Uniform %40
%42 = OpVariable %41 Uniform
%43 = OpTypePointer StorageBuffer %31
%45 = OpTypePointer PushConstant %5
%47 = OpConstant %5 4
%50 = OpConstant %5 2
%51 = OpTypePointer StorageBuffer %26
%56 = OpConstant %5 1
%57 = OpTypePointer StorageBuffer %21
%61 = OpTypePointer StorageBuffer %16
%66 = OpTypePointer StorageBuffer %10
%70 = OpTypePointer Uniform %39
%73 = OpConstant %5 5
%75 = OpTypePointer Input %5
%76 = OpVariable %75 Input
%78 = OpConstant %35 0
%84 = OpTypePointer StorageBuffer %14
%86 = OpConstant %5 0
%87 = OpTypePointer Uniform %36
%92 = OpTypeBool
%110 = OpConstant %5 8
%112 = OpTypePointer StorageBuffer %5
%115 = OpConstant %5 2120
%117 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %147
%147 = OpLabel
%46 = OpAccessChain %45 %8 %47
%48 = OpLoad %5 %46
%49 = OpIAdd %5 %48 %50
%44 = OpAccessChain %43 %34 %49
%53 = OpAccessChain %45 %8 %47
%54 = OpLoad %5 %53
%55 = OpIAdd %5 %54 %56
%52 = OpAccessChain %51 %29 %55
%59 = OpAccessChain %45 %8 %47
%60 = OpLoad %5 %59
%58 = OpAccessChain %57 %24 %60
%63 = OpAccessChain %45 %8 %56
%64 = OpLoad %5 %63
%65 = OpIAdd %5 %64 %56
%62 = OpAccessChain %61 %19 %65
%68 = OpAccessChain %45 %8 %56
%69 = OpLoad %5 %68
%67 = OpAccessChain %66 %13 %69
%72 = OpAccessChain %45 %8 %73
%74 = OpLoad %5 %72
%71 = OpAccessChain %70 %42 %74
%77 = OpLoad %5 %76
%79 = OpBitcast %5 %78
%80 = OpBitcast %5 %78
%81 = OpBitcast %5 %78
%82 = OpBitcast %5 %78
%83 = OpCompositeConstruct %14 %79 %80 %81 %82
%85 = OpAccessChain %84 %58 %86 %77
OpStore %85 %83
%88 = OpAccessChain %87 %71 %86 %86
%89 = OpLoad %36 %88
%90 = OpBitcast %14 %89
%91 = OpCompositeExtract %5 %90 0
%93 = OpIEqual %92 %91 %86
OpSelectionMerge %155 None
OpBranchConditional %93 %155 %148
%148 = OpLabel
OpBranch %149
%149 = OpLabel
%107 = OpPhi %5 %86 %148 %108 %153
%109 = OpShiftLeftLogical %5 %107 %110
%111 = OpIAdd %5 %109 %77
%113 = OpAccessChain %112 %67 %86 %111
%114 = OpLoad %5 %113
OpControlBarrier %50 %50 %115
%116 = OpULessThan %92 %114 %117
OpLoopMerge %154 %153 None
OpBranch %150
%150 = OpLabel
OpSelectionMerge %152 None
OpBranchConditional %116 %151 %152
%151 = OpLabel
%118 = OpAccessChain %84 %62 %86 %111
%119 = OpLoad %14 %118
%120 = OpBitcast %36 %119
%121 = OpCompositeExtract %35 %120 0
%122 = OpCompositeExtract %35 %120 1
%123 = OpCompositeExtract %35 %120 2
%124 = OpCompositeExtract %35 %120 3
%125 = OpAccessChain %84 %58 %86 %114
%126 = OpLoad %14 %125
%127 = OpBitcast %36 %126
%128 = OpCompositeExtract %35 %127 0
%129 = OpCompositeExtract %35 %127 1
%130 = OpCompositeExtract %35 %127 2
%131 = OpCompositeExtract %35 %127 3
%132 = OpFAdd %35 %128 %121
%133 = OpFAdd %35 %129 %122
%134 = OpFAdd %35 %130 %123
%135 = OpFAdd %35 %131 %124
%136 = OpBitcast %5 %132
%137 = OpBitcast %5 %133
%138 = OpBitcast %5 %134
%139 = OpBitcast %5 %135
%140 = OpCompositeConstruct %14 %136 %137 %138 %139
%141 = OpAccessChain %84 %58 %86 %114
OpStore %141 %140
OpBranch %152
%152 = OpLabel
OpBranch %153
%153 = OpLabel
%108 = OpIAdd %5 %107 %56
%142 = OpAccessChain %87 %71 %86 %86
%143 = OpLoad %36 %142
%144 = OpBitcast %14 %143
%145 = OpCompositeExtract %5 %144 0
%146 = OpULessThan %92 %108 %145
OpBranchConditional %146 %149 %154
%154 = OpLabel
OpBranch %155
%155 = OpLabel
%94 = OpAccessChain %84 %52 %86 %77
%95 = OpLoad %14 %94
%96 = OpBitcast %36 %95
%97 = OpCompositeExtract %35 %96 0
%98 = OpCompositeExtract %35 %96 1
%99 = OpCompositeExtract %35 %96 2
%100 = OpCompositeExtract %35 %96 3
%101 = OpBitcast %5 %97
%102 = OpBitcast %5 %98
%103 = OpBitcast %5 %99
%104 = OpBitcast %5 %100
%105 = OpCompositeConstruct %14 %101 %102 %103 %104
%106 = OpAccessChain %84 %44 %86 %77
OpStore %106 %105
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent-promotion.root-descriptor.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;
layout(buffer_reference) buffer PhysicalPointerUint4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloat4Array;
layout(buffer_reference) buffer PhysicalPointerUintNonWriteArray;

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint4NonWriteCBVArray
{
    uvec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) buffer PhysicalPointerFloat4Array
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUintNonWriteArray
{
    uint value[];
};

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 2, std430) writeonly buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_LocalInvocationIndex] = vec4(0.0);
    if (!(PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x == 0u))
    {
        uint _73 = 0u;
        uint _77;
        uint _84;
        bool _86;
        for (;;)
        {
            _77 = (_73 << 8u) + gl_LocalInvocationIndex;
            _84 = PhysicalPointerUintNonWriteArray(registers._m1).value[_77];
            groupMemoryBarrier();
            barrier();
            _86 = _84 < 256u;
            if (_86)
            {
                vec4 _90 = uintBitsToFloat(_14._m0[_77]);
                PhysicalPointerFloat4CoherentArray _95 = PhysicalPointerFloat4CoherentArray(registers._m2);
                PhysicalPointerFloat4CoherentArray(registers._m2).value[_84] = vec4(_95.value[_84].x + _90.x, _95.value[_84].y + _90.y, _95.value[_84].z + _90.z, _95.value[_84].w + _90.w);
            }
            uint _74 = _73 + 1u;
            if (_74 < PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x)
            {
                _73 = _74;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    PhysicalPointerFloat4Array _59 = PhysicalPointerFloat4Array(registers._m3);
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_59.value[gl_LocalInvocationIndex].x), floatBitsToUint(_59.value[gl_LocalInvocationIndex].y), floatBitsToUint(_59.value[gl_LocalInvocationIndex].z), floatBitsToUint(_59.value[gl_LocalInvocationIndex].w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 124
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %33
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %38 "PhysicalPointerFloat4CoherentArray"
OpMemberName %38 0 "value"
OpName %47 "PhysicalPointerUint4NonWriteCBVArray"
OpMemberName %47 0 "value"
OpName %57 "PhysicalPointerFloat4Array"
OpMemberName %57 0 "value"
OpName %79 "PhysicalPointerUintNonWriteArray"
OpMemberName %79 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonReadable
OpDecorate %33 BuiltIn LocalInvocationIndex
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpMemberDecorate %38 0 Coherent
OpDecorate %46 ArrayStride 16
OpMemberDecorate %47 0 Offset 0
OpDecorate %47 Block
OpMemberDecorate %47 0 NonWritable
OpDecorate %56 ArrayStride 16
OpMemberDecorate %57 0 Offset 0
OpDecorate %57 Block
OpDecorate %78 ArrayStride 4
OpMemberDecorate %79 0 Offset 0
OpDecorate %79 Block
OpMemberDecorate %79 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypePointer PushConstant %6
%21 = OpConstant %5 3
%24 = OpConstant %5 2
%27 = OpConstant %5 1
%30 = OpConstant %5 0
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%37 = OpTypeRuntimeArray %36
%38 = OpTypeStruct %37
%39 = OpTypePointer PhysicalStorageBuffer %38
%41 = OpTypePointer PhysicalStorageBuffer %36
%43 = OpConstant %35 0
%45 = OpConstant %5 4096
%46 = OpTypeArray %10 %45
%47 = OpTypeStruct %46
%48 = OpTypePointer PhysicalStorageBuffer %47
%50 = OpTypePointer PhysicalStorageBuffer %10
%54 = OpTypeBool
%56 = OpTypeRuntimeArray %36
%57 = OpTypeStruct %56
%58 = OpTypePointer PhysicalStorageBuffer %57
%71 = OpTypePointer StorageBuffer %10
%76 = OpConstant %5 8
%78 = OpTypeRuntimeArray %5
%79 = OpTypeStruct %78
%80 = OpTypePointer PhysicalStorageBuffer %79
%82 = OpTypePointer PhysicalStorageBuffer %5
%85 = OpConstant %5 2120
%87 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%20 = OpAccessChain %19 %9 %21
%22 = OpLoad %6 %20
%23 = OpAccessChain %19 %9 %24
%25 = OpLoad %6 %23
%26 = OpAccessChain %19 %9 %27
%28 = OpLoad %6 %26
%29 = OpAccessChain %19 %9 %30
%31 = OpLoad %6 %29
%34 = OpLoad %5 %33
%40 = OpBitcast %39 %25
%42 = OpInBoundsAccessChain %41 %40 %30 %34
%44 = OpCompositeConstruct %36 %43 %43 %43 %43
OpStore %42 %44 Aligned 16
%49 = OpBitcast %48 %31
%51 = OpInBoundsAccessChain %50 %49 %30 %30
%52 = OpLoad %10 %51 Aligned 16
%53 = OpCompositeExtract %5 %52 0
%55 = OpIEqual %54 %53 %30
OpSelectionMerge %122 None
OpBranchConditional %55 %122 %115
%115 = OpLabel
OpBranch %116
%116 = OpLabel
%73 = OpPhi %5 %30 %115 %74 %120
%75 = OpShiftLeftLogical %5 %73 %76
%77 = OpIAdd %5 %75 %34
%81 = OpBitcast %80 %28
%83 = OpInBoundsAccessChain %82 %81 %30 %77
%84 = OpLoad %5 %83 Aligned 4
OpControlBarrier %24 %24 %85
%86 = OpULessThan %54 %84 %87
OpLoopMerge %121 %120 None
OpBranch %117
%117 = OpLabel
OpSelectionMerge %119 None
OpBranchConditional %86 %118 %119
%118 = OpLabel
%88 = OpAccessChain %71 %14 %30 %77
%89 = OpLoad %10 %88
%90 = OpBitcast %36 %89
%91 = OpCompositeExtract %35 %90 0
%92 = OpCompositeExtract %35 %90 1
%93 = OpCompositeExtract %35 %90 2
%94 = OpCompositeExtract %35 %90 3
%95 = OpBitcast %39 %25
%96 = OpInBoundsAccessChain %41 %95 %30 %84
%97 = OpLoad %36 %96 Aligned 16
%98 = OpCompositeExtract %35 %97 0
%99 = OpCompositeExtract %35 %97 1
%100 = OpCompositeExtract %35 %97 2
%101 = OpCompositeExtract %35 %97 3
%102 = OpFAdd %35 %98 %91
%103 = OpFAdd %35 %99 %92
%104 = OpFAdd %35 %100 %93
%105 = OpFAdd %35 %101 %94
%106 = OpBitcast %39 %25
%107 = OpInBoundsAccessChain %41 %106 %30 %84
%108 = OpCompositeConstruct %36 %102 %103 %104 %105
OpStore %107 %108 Aligned 16
OpBranch %119
%119 = OpLabel
OpBranch %120
%120 = OpLabel
%74 = OpIAdd %5 %73 %27
%109 = OpBitcast %48 %31
%110 = OpInBoundsAccessChain %50 %109 %30 %30
%111 = OpLoad %10 %110 Aligned 16
%112 = OpCompositeExtract %5 %111 0
%113 = OpULessThan %54 %74 %112
OpBranchConditional %113 %116 %121
%121 = OpLabel
OpBranch %122
%122 = OpLabel
%59 = OpBitcast %58 %22
%60 = OpInBoundsAccessChain %41 %59 %30 %34
%61 = OpLoad %36 %60 Aligned 16
%62 = OpCompositeExtract %35 %61 0
%63 = OpCompositeExtract %35 %61 1
%64 = OpCompositeExtract %35 %61 2
%65 = OpCompositeExtract %35 %61 3
%66 = OpBitcast %5 %62
%67 = OpBitcast %5 %63
%68 = OpBitcast %5 %64
%69 = OpBitcast %5 %65
%70 = OpCompositeConstruct %10 %66 %67 %68 %69
%72 = OpAccessChain %71 %18 %30 %34
OpStore %72 %70
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent-promotion.sm66.bindless.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _14[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _16_19
{
    uint _m0[];
} _19[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _21_24
{
    uvec4 _m0[];
} _24[];

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _32[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    _14[0u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_32[registers._m5]._m0[0u]).x == 0u))
    {
        uint _77 = 0u;
        uint _81;
        uint _88;
        bool _90;
        for (;;)
        {
            _81 = (_77 << 8u) + gl_LocalInvocationIndex;
            _88 = _19[registers._m1]._m0[_81];
            groupMemoryBarrier();
            barrier();
            _90 = _88 < 256u;
            if (_90)
            {
                vec4 _99 = uintBitsToFloat(_24[registers._m1 + 1u]._m0[_81]);
                vec4 _106 = uintBitsToFloat(_14[0u]._m0[_88]);
                _14[0u]._m0[_88] = uvec4(floatBitsToUint(_106.x + _99.x), floatBitsToUint(_106.y + _99.y), floatBitsToUint(_106.z + _99.z), floatBitsToUint(_106.w + _99.w));
            }
            uint _78 = _77 + 1u;
            if (_78 < floatBitsToUint(_32[registers._m5]._m0[0u]).x)
            {
                _77 = _78;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _66 = uintBitsToFloat(_14[1u]._m0[gl_LocalInvocationIndex]);
    _14[2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_66.x), floatBitsToUint(_66.y), floatBitsToUint(_66.z), floatBitsToUint(_66.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 136
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %40
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpName %29 "BindlessCBV"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 16
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 Coherent
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %28 ArrayStride 16
OpDecorate %29 Block
OpMemberDecorate %29 0 Offset 0
OpDecorate %32 DescriptorSet 5
OpDecorate %32 Binding 0
OpDecorate %40 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 4
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %9
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeFloat 32
%26 = OpTypeVector %25 4
%27 = OpConstant %5 4096
%28 = OpTypeArray %26 %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer Uniform %30
%32 = OpVariable %31 Uniform
%33 = OpTypePointer Uniform %29
%35 = OpTypePointer PushConstant %5
%37 = OpConstant %5 5
%39 = OpTypePointer Input %5
%40 = OpVariable %39 Input
%42 = OpTypePointer StorageBuffer %11
%44 = OpConstant %5 0
%46 = OpConstant %5 1
%48 = OpConstant %5 2
%49 = OpConstant %25 0
%55 = OpTypePointer StorageBuffer %9
%57 = OpTypePointer Uniform %26
%62 = OpTypeBool
%80 = OpConstant %5 8
%82 = OpTypePointer StorageBuffer %16
%86 = OpTypePointer StorageBuffer %5
%89 = OpConstant %5 2120
%91 = OpConstant %5 256
%92 = OpTypePointer StorageBuffer %21
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %126
%126 = OpLabel
%36 = OpAccessChain %35 %8 %37
%38 = OpLoad %5 %36
%34 = OpAccessChain %33 %32 %38
%41 = OpLoad %5 %40
%43 = OpAccessChain %42 %14 %44
%45 = OpAccessChain %42 %14 %46
%47 = OpAccessChain %42 %14 %48
%50 = OpBitcast %5 %49
%51 = OpBitcast %5 %49
%52 = OpBitcast %5 %49
%53 = OpBitcast %5 %49
%54 = OpCompositeConstruct %9 %50 %51 %52 %53
%56 = OpAccessChain %55 %43 %44 %41
OpStore %56 %54
%58 = OpAccessChain %57 %34 %44 %44
%59 = OpLoad %26 %58
%60 = OpBitcast %9 %59
%61 = OpCompositeExtract %5 %60 0
%63 = OpIEqual %62 %61 %44
OpSelectionMerge %134 None
OpBranchConditional %63 %134 %127
%127 = OpLabel
OpBranch %128
%128 = OpLabel
%77 = OpPhi %5 %44 %127 %78 %132
%79 = OpShiftLeftLogical %5 %77 %80
%81 = OpIAdd %5 %79 %41
%84 = OpAccessChain %35 %8 %46
%85 = OpLoad %5 %84
%83 = OpAccessChain %82 %19 %85
%87 = OpAccessChain %86 %83 %44 %81
%88 = OpLoad %5 %87
OpControlBarrier %48 %48 %89
%90 = OpULessThan %62 %88 %91
OpLoopMerge %133 %132 None
OpBranch %129
%129 = OpLabel
OpSelectionMerge %131 None
OpBranchConditional %90 %130 %131
%130 = OpLabel
%94 = OpAccessChain %35 %8 %46
%95 = OpLoad %5 %94
%96 = OpIAdd %5 %95 %46
%93 = OpAccessChain %92 %24 %96
%97 = OpAccessChain %55 %93 %44 %81
%98 = OpLoad %9 %97
%99 = OpBitcast %26 %98
%100 = OpCompositeExtract %25 %99 0
%101 = OpCompositeExtract %25 %99 1
%102 = OpCompositeExtract %25 %99 2
%103 = OpCompositeExtract %25 %99 3
%104 = OpAccessChain %55 %43 %44 %88
%105 = OpLoad %9 %104
%106 = OpBitcast %26 %105
%107 = OpCompositeExtract %25 %106 0
%108 = OpCompositeExtract %25 %106 1
%109 = OpCompositeExtract %25 %106 2
%110 = OpCompositeExtract %25 %106 3
%111 = OpFAdd %25 %107 %100
%112 = OpFAdd %25 %108 %101
%113 = OpFAdd %25 %109 %102
%114 = OpFAdd %25 %110 %103
%115 = OpBitcast %5 %111
%116 = OpBitcast %5 %112
%117 = OpBitcast %5 %113
%118 = OpBitcast %5 %114
%119 = OpCompositeConstruct %9 %115 %116 %117 %118
%120 = OpAccessChain %55 %43 %44 %88
OpStore %120 %119
OpBranch %131
%131 = OpLabel
OpBranch %132
%132 = OpLabel
%78 = OpIAdd %5 %77 %46
%121 = OpAccessChain %57 %34 %44 %44
%122 = OpLoad %26 %121
%123 = OpBitcast %9 %122
%124 = OpCompositeExtract %5 %123 0
%125 = OpULessThan %62 %78 %124
OpBranchConditional %125 %128 %133
%133 = OpLabel
OpBranch %134
%134 = OpLabel
%64 = OpAccessChain %55 %45 %44 %41
%65 = OpLoad %9 %64
%66 = OpBitcast %26 %65
%67 = OpCompositeExtract %25 %66 0
%68 = OpCompositeExtract %25 %66 1
%69 = OpCompositeExtract %25 %66 2
%70 = OpCompositeExtract %25 %66 3
%71 = OpBitcast %5 %67
%72 = OpBitcast %5 %68
%73 = OpBitcast %5 %69
%74 = OpBitcast %5 %70
%75 = OpCompositeConstruct %9 %71 %72 %73 %74
%76 = OpAccessChain %55 %47 %44 %41
OpStore %76 %75
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent-promotion.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _11[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _13_15
{
    uint _m0[];
} _15;

layout(set = 0, binding = 1, std430) restrict readonly buffer _17_19
{
    uvec4 _m0[];
} _19;

layout(set = 0, binding = 0, std140) uniform _24_26
{
    vec4 _m0[1];
} _26;

void main()
{
    _11[0u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_26._m0[0u]).x == 0u))
    {
        uint _64 = 0u;
        uint _68;
        uint _71;
        bool _73;
        for (;;)
        {
            _68 = (_64 << 8u) + gl_LocalInvocationIndex;
            _71 = _15._m0[_68];
            groupMemoryBarrier();
            barrier();
            _73 = _71 < 256u;
            if (_73)
            {
                vec4 _77 = uintBitsToFloat(_19._m0[_68]);
                vec4 _84 = uintBitsToFloat(_11[0u]._m0[_71]);
                _11[0u]._m0[_71] = uvec4(floatBitsToUint(_84.x + _77.x), floatBitsToUint(_84.y + _77.y), floatBitsToUint(_84.z + _77.z), floatBitsToUint(_84.w + _77.w));
            }
            uint _65 = _64 + 1u;
            if (_65 < floatBitsToUint(_26._m0[0u]).x)
            {
                _64 = _65;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _53 = uintBitsToFloat(_11[1u]._m0[gl_LocalInvocationIndex]);
    _11[2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_53.x), floatBitsToUint(_53.y), floatBitsToUint(_53.z), floatBitsToUint(_53.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 114
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %28
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "SSBO"
OpName %17 "SSBO"
OpName %24 ""
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 Coherent
OpDecorate %12 ArrayStride 4
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 1
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %28 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %5
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeRuntimeArray %6
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpConstant %5 1
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeArray %22 %20
%24 = OpTypeStruct %23
%25 = OpTypePointer Uniform %24
%26 = OpVariable %25 Uniform
%27 = OpTypePointer Input %5
%28 = OpVariable %27 Input
%30 = OpTypePointer StorageBuffer %8
%32 = OpConstant %5 0
%35 = OpConstant %5 2
%36 = OpConstant %21 0
%42 = OpTypePointer StorageBuffer %6
%44 = OpTypePointer Uniform %22
%49 = OpTypeBool
%67 = OpConstant %5 8
%69 = OpTypePointer StorageBuffer %5
%72 = OpConstant %5 2120
%74 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %104
%104 = OpLabel
%29 = OpLoad %5 %28
%31 = OpAccessChain %30 %11 %32
%33 = OpAccessChain %30 %11 %20
%34 = OpAccessChain %30 %11 %35
%37 = OpBitcast %5 %36
%38 = OpBitcast %5 %36
%39 = OpBitcast %5 %36
%40 = OpBitcast %5 %36
%41 = OpCompositeConstruct %6 %37 %38 %39 %40
%43 = OpAccessChain %42 %31 %32 %29
OpStore %43 %41
%45 = OpAccessChain %44 %26 %32 %32
%46 = OpLoad %22 %45
%47 = OpBitcast %6 %46
%48 = OpCompositeExtract %5 %47 0
%50 = OpIEqual %49 %48 %32
OpSelectionMerge %112 None
OpBranchConditional %50 %112 %105
%105 = OpLabel
OpBranch %106
%106 = OpLabel
%64 = OpPhi %5 %32 %105 %65 %110
%66 = OpShiftLeftLogical %5 %64 %67
%68 = OpIAdd %5 %66 %29
%70 = OpAccessChain %69 %15 %32 %68
%71 = OpLoad %5 %70
OpControlBarrier %35 %35 %72
%73 = OpULessThan %49 %71 %74
OpLoopMerge %111 %110 None
OpBranch %107
%107 = OpLabel
OpSelectionMerge %109 None
OpBranchConditional %73 %108 %109
%108 = OpLabel
%75 = OpAccessChain %42 %19 %32 %68
%76 = OpLoad %6 %75
%77 = OpBitcast %22 %76
%78 = OpCompositeExtract %21 %77 0
%79 = OpCompositeExtract %21 %77 1
%80 = OpCompositeExtract %21 %77 2
%81 = OpCompositeExtract %21 %77 3
%82 = OpAccessChain %42 %31 %32 %71
%83 = OpLoad %6 %82
%84 = OpBitcast %22 %83
%85 = OpCompositeExtract %21 %84 0
%86 = OpCompositeExtract %21 %84 1
%87 = OpCompositeExtract %21 %84 2
%88 = OpCompositeExtract %21 %84 3
%89 = OpFAdd %21 %85 %78
%90 = OpFAdd %21 %86 %79
%91 = OpFAdd %21 %87 %80
%92 = OpFAdd %21 %88 %81
%93 = OpBitcast %5 %89
%94 = OpBitcast %5 %90
%95 = OpBitcast %5 %91
%96 = OpBitcast %5 %92
%97 = OpCompositeConstruct %6 %93 %94 %95 %96
%98 = OpAccessChain %42 %31 %32 %71
OpStore %98 %97
OpBranch %109
%109 = OpLabel
OpBranch %110
%110 = OpLabel
%65 = OpIAdd %5 %64 %20
%99 = OpAccessChain %44 %26 %32 %32
%100 = OpLoad %22 %99
%101 = OpBitcast %6 %100
%102 = OpCompositeExtract %5 %101 0
%103 = OpULessThan %49 %65 %102
OpBranchConditional %103 %106 %111
%111 = OpLabel
OpBranch %112
%112 = OpLabel
%51 = OpAccessChain %42 %33 %32 %29
%52 = OpLoad %6 %51
%53 = OpBitcast %22 %52
%54 = OpCompositeExtract %21 %53 0
%55 = OpCompositeExtract %21 %53 1
%56 = OpCompositeExtract %21 %53 2
%57 = OpCompositeExtract %21 %53 3
%58 = OpBitcast %5 %54
%59 = OpBitcast %5 %55
%60 = OpBitcast %5 %56
%61 = OpBitcast %5 %57
%62 = OpCompositeConstruct %6 %58 %59 %60 %61
%63 = OpAccessChain %42 %34 %32 %29
OpStore %63 %62
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent-promotion.ssbo.comp
================================================
#version 460
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _12_14
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 0, std430) coherent buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(set = 0, binding = 1, std430) readonly buffer _20_22
{
    uvec4 _m0[];
} _22;

layout(set = 0, binding = 2, std430) writeonly buffer _24_26
{
    uvec4 _m0[];
} _26;

layout(set = 0, binding = 0, std140) uniform _31_33
{
    vec4 _m0[1];
} _33;

void main()
{
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_33._m0[0u]).x == 0u))
    {
        uint _66 = 0u;
        uint _70;
        uint _73;
        bool _76;
        for (;;)
        {
            _70 = (_66 << 8u) + gl_LocalInvocationIndex;
            _73 = _9._m0[_70];
            groupMemoryBarrier();
            barrier();
            _76 = _73 < 256u;
            if (_76)
            {
                vec4 _80 = uintBitsToFloat(_14._m0[_70]);
                vec4 _87 = uintBitsToFloat(_18._m0[_73]);
                _18._m0[_73] = uvec4(floatBitsToUint(_87.x + _80.x), floatBitsToUint(_87.y + _80.y), floatBitsToUint(_87.z + _80.z), floatBitsToUint(_87.w + _80.w));
            }
            uint _67 = _66 + 1u;
            if (_67 < floatBitsToUint(_33._m0[0u]).x)
            {
                _66 = _67;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _55 = uintBitsToFloat(_22._m0[gl_LocalInvocationIndex]);
    _26._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_55.x), floatBitsToUint(_55.y), floatBitsToUint(_55.z), floatBitsToUint(_55.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 117
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %35
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %24 "SSBO"
OpName %31 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 Coherent
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 1
OpDecorate %22 NonWritable
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 2
OpDecorate %26 NonReadable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %35 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %10
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %10
%24 = OpTypeStruct %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpConstant %5 1
%28 = OpTypeFloat 32
%29 = OpTypeVector %28 4
%30 = OpTypeArray %29 %27
%31 = OpTypeStruct %30
%32 = OpTypePointer Uniform %31
%33 = OpVariable %32 Uniform
%34 = OpTypePointer Input %5
%35 = OpVariable %34 Input
%37 = OpConstant %28 0
%43 = OpTypePointer StorageBuffer %10
%45 = OpConstant %5 0
%46 = OpTypePointer Uniform %29
%51 = OpTypeBool
%69 = OpConstant %5 8
%71 = OpTypePointer StorageBuffer %5
%74 = OpConstant %5 2
%75 = OpConstant %5 2120
%77 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %107
%107 = OpLabel
%36 = OpLoad %5 %35
%38 = OpBitcast %5 %37
%39 = OpBitcast %5 %37
%40 = OpBitcast %5 %37
%41 = OpBitcast %5 %37
%42 = OpCompositeConstruct %10 %38 %39 %40 %41
%44 = OpAccessChain %43 %18 %45 %36
OpStore %44 %42
%47 = OpAccessChain %46 %33 %45 %45
%48 = OpLoad %29 %47
%49 = OpBitcast %10 %48
%50 = OpCompositeExtract %5 %49 0
%52 = OpIEqual %51 %50 %45
OpSelectionMerge %115 None
OpBranchConditional %52 %115 %108
%108 = OpLabel
OpBranch %109
%109 = OpLabel
%66 = OpPhi %5 %45 %108 %67 %113
%68 = OpShiftLeftLogical %5 %66 %69
%70 = OpIAdd %5 %68 %36
%72 = OpAccessChain %71 %9 %45 %70
%73 = OpLoad %5 %72
OpControlBarrier %74 %74 %75
%76 = OpULessThan %51 %73 %77
OpLoopMerge %114 %113 None
OpBranch %110
%110 = OpLabel
OpSelectionMerge %112 None
OpBranchConditional %76 %111 %112
%111 = OpLabel
%78 = OpAccessChain %43 %14 %45 %70
%79 = OpLoad %10 %78
%80 = OpBitcast %29 %79
%81 = OpCompositeExtract %28 %80 0
%82 = OpCompositeExtract %28 %80 1
%83 = OpCompositeExtract %28 %80 2
%84 = OpCompositeExtract %28 %80 3
%85 = OpAccessChain %43 %18 %45 %73
%86 = OpLoad %10 %85
%87 = OpBitcast %29 %86
%88 = OpCompositeExtract %28 %87 0
%89 = OpCompositeExtract %28 %87 1
%90 = OpCompositeExtract %28 %87 2
%91 = OpCompositeExtract %28 %87 3
%92 = OpFAdd %28 %88 %81
%93 = OpFAdd %28 %89 %82
%94 = OpFAdd %28 %90 %83
%95 = OpFAdd %28 %91 %84
%96 = OpBitcast %5 %92
%97 = OpBitcast %5 %93
%98 = OpBitcast %5 %94
%99 = OpBitcast %5 %95
%100 = OpCompositeConstruct %10 %96 %97 %98 %99
%101 = OpAccessChain %43 %18 %45 %73
OpStore %101 %100
OpBranch %112
%112 = OpLabel
OpBranch %113
%113 = OpLabel
%67 = OpIAdd %5 %66 %27
%102 = OpAccessChain %46 %33 %45 %45
%103 = OpLoad %29 %102
%104 = OpBitcast %10 %103
%105 = OpCompositeExtract %5 %104 0
%106 = OpULessThan %51 %67 %105
OpBranchConditional %106 %109 %114
%114 = OpLabel
OpBranch %115
%115 = OpLabel
%53 = OpAccessChain %43 %22 %45 %36
%54 = OpLoad %10 %53
%55 = OpBitcast %29 %54
%56 = OpCompositeExtract %28 %55 0
%57 = OpCompositeExtract %28 %55 1
%58 = OpCompositeExtract %28 %55 2
%59 = OpCompositeExtract %28 %55 3
%60 = OpBitcast %5 %56
%61 = OpBitcast %5 %57
%62 = OpBitcast %5 %58
%63 = OpBitcast %5 %59
%64 = OpCompositeConstruct %10 %60 %61 %62 %63
%65 = OpAccessChain %43 %26 %45 %36
OpStore %65 %64
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent.root-descriptor.ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;
layout(buffer_reference) buffer PhysicalPointerUint4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloat4Array;
layout(buffer_reference) buffer PhysicalPointerUintNonWriteArray;

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint4NonWriteCBVArray
{
    uvec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) buffer PhysicalPointerFloat4Array
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUintNonWriteArray
{
    uint value[];
};

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 2, std430) writeonly buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_LocalInvocationIndex] = vec4(0.0);
    if (!(PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x == 0u))
    {
        uint _73 = 0u;
        uint _77;
        uint _84;
        bool _86;
        for (;;)
        {
            _77 = (_73 << 8u) + gl_LocalInvocationIndex;
            _84 = PhysicalPointerUintNonWriteArray(registers._m1).value[_77];
            memoryBarrierBuffer();
            memoryBarrierImage();
            barrier();
            _86 = _84 < 256u;
            if (_86)
            {
                vec4 _90 = uintBitsToFloat(_14._m0[_77]);
                PhysicalPointerFloat4CoherentArray _95 = PhysicalPointerFloat4CoherentArray(registers._m2);
                PhysicalPointerFloat4CoherentArray(registers._m2).value[_84] = vec4(_95.value[_84].x + _90.x, _95.value[_84].y + _90.y, _95.value[_84].z + _90.z, _95.value[_84].w + _90.w);
            }
            uint _74 = _73 + 1u;
            if (_74 < PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x)
            {
                _73 = _74;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    PhysicalPointerFloat4Array _59 = PhysicalPointerFloat4Array(registers._m3);
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_59.value[gl_LocalInvocationIndex].x), floatBitsToUint(_59.value[gl_LocalInvocationIndex].y), floatBitsToUint(_59.value[gl_LocalInvocationIndex].z), floatBitsToUint(_59.value[gl_LocalInvocationIndex].w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 124
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %33
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %38 "PhysicalPointerFloat4CoherentArray"
OpMemberName %38 0 "value"
OpName %47 "PhysicalPointerUint4NonWriteCBVArray"
OpMemberName %47 0 "value"
OpName %57 "PhysicalPointerFloat4Array"
OpMemberName %57 0 "value"
OpName %79 "PhysicalPointerUintNonWriteArray"
OpMemberName %79 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonReadable
OpDecorate %33 BuiltIn LocalInvocationIndex
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpMemberDecorate %38 0 Coherent
OpDecorate %46 ArrayStride 16
OpMemberDecorate %47 0 Offset 0
OpDecorate %47 Block
OpMemberDecorate %47 0 NonWritable
OpDecorate %56 ArrayStride 16
OpMemberDecorate %57 0 Offset 0
OpDecorate %57 Block
OpDecorate %78 ArrayStride 4
OpMemberDecorate %79 0 Offset 0
OpDecorate %79 Block
OpMemberDecorate %79 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypePointer PushConstant %6
%21 = OpConstant %5 3
%24 = OpConstant %5 2
%27 = OpConstant %5 1
%30 = OpConstant %5 0
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%37 = OpTypeRuntimeArray %36
%38 = OpTypeStruct %37
%39 = OpTypePointer PhysicalStorageBuffer %38
%41 = OpTypePointer PhysicalStorageBuffer %36
%43 = OpConstant %35 0
%45 = OpConstant %5 4096
%46 = OpTypeArray %10 %45
%47 = OpTypeStruct %46
%48 = OpTypePointer PhysicalStorageBuffer %47
%50 = OpTypePointer PhysicalStorageBuffer %10
%54 = OpTypeBool
%56 = OpTypeRuntimeArray %36
%57 = OpTypeStruct %56
%58 = OpTypePointer PhysicalStorageBuffer %57
%71 = OpTypePointer StorageBuffer %10
%76 = OpConstant %5 8
%78 = OpTypeRuntimeArray %5
%79 = OpTypeStruct %78
%80 = OpTypePointer PhysicalStorageBuffer %79
%82 = OpTypePointer PhysicalStorageBuffer %5
%85 = OpConstant %5 2120
%87 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%20 = OpAccessChain %19 %9 %21
%22 = OpLoad %6 %20
%23 = OpAccessChain %19 %9 %24
%25 = OpLoad %6 %23
%26 = OpAccessChain %19 %9 %27
%28 = OpLoad %6 %26
%29 = OpAccessChain %19 %9 %30
%31 = OpLoad %6 %29
%34 = OpLoad %5 %33
%40 = OpBitcast %39 %25
%42 = OpInBoundsAccessChain %41 %40 %30 %34
%44 = OpCompositeConstruct %36 %43 %43 %43 %43
OpStore %42 %44 Aligned 16
%49 = OpBitcast %48 %31
%51 = OpInBoundsAccessChain %50 %49 %30 %30
%52 = OpLoad %10 %51 Aligned 16
%53 = OpCompositeExtract %5 %52 0
%55 = OpIEqual %54 %53 %30
OpSelectionMerge %122 None
OpBranchConditional %55 %122 %115
%115 = OpLabel
OpBranch %116
%116 = OpLabel
%73 = OpPhi %5 %30 %115 %74 %120
%75 = OpShiftLeftLogical %5 %73 %76
%77 = OpIAdd %5 %75 %34
%81 = OpBitcast %80 %28
%83 = OpInBoundsAccessChain %82 %81 %30 %77
%84 = OpLoad %5 %83 Aligned 4
OpControlBarrier %24 %27 %85
%86 = OpULessThan %54 %84 %87
OpLoopMerge %121 %120 None
OpBranch %117
%117 = OpLabel
OpSelectionMerge %119 None
OpBranchConditional %86 %118 %119
%118 = OpLabel
%88 = OpAccessChain %71 %14 %30 %77
%89 = OpLoad %10 %88
%90 = OpBitcast %36 %89
%91 = OpCompositeExtract %35 %90 0
%92 = OpCompositeExtract %35 %90 1
%93 = OpCompositeExtract %35 %90 2
%94 = OpCompositeExtract %35 %90 3
%95 = OpBitcast %39 %25
%96 = OpInBoundsAccessChain %41 %95 %30 %84
%97 = OpLoad %36 %96 Aligned 16
%98 = OpCompositeExtract %35 %97 0
%99 = OpCompositeExtract %35 %97 1
%100 = OpCompositeExtract %35 %97 2
%101 = OpCompositeExtract %35 %97 3
%102 = OpFAdd %35 %98 %91
%103 = OpFAdd %35 %99 %92
%104 = OpFAdd %35 %100 %93
%105 = OpFAdd %35 %101 %94
%106 = OpBitcast %39 %25
%107 = OpInBoundsAccessChain %41 %106 %30 %84
%108 = OpCompositeConstruct %36 %102 %103 %104 %105
OpStore %107 %108 Aligned 16
OpBranch %119
%119 = OpLabel
OpBranch %120
%120 = OpLabel
%74 = OpIAdd %5 %73 %27
%109 = OpBitcast %48 %31
%110 = OpInBoundsAccessChain %50 %109 %30 %30
%111 = OpLoad %10 %110 Aligned 16
%112 = OpCompositeExtract %5 %111 0
%113 = OpULessThan %54 %74 %112
OpBranchConditional %113 %116 %121
%121 = OpLabel
OpBranch %122
%122 = OpLabel
%59 = OpBitcast %58 %22
%60 = OpInBoundsAccessChain %41 %59 %30 %34
%61 = OpLoad %36 %60 Aligned 16
%62 = OpCompositeExtract %35 %61 0
%63 = OpCompositeExtract %35 %61 1
%64 = OpCompositeExtract %35 %61 2
%65 = OpCompositeExtract %35 %61 3
%66 = OpBitcast %5 %62
%67 = OpBitcast %5 %63
%68 = OpBitcast %5 %64
%69 = OpBitcast %5 %65
%70 = OpCompositeConstruct %10 %66 %67 %68 %69
%72 = OpAccessChain %71 %18 %30 %34
OpStore %72 %70
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent.sm66.ssbo.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _11[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _13_15
{
    uint _m0[];
} _15;

layout(set = 0, binding = 1, std430) restrict readonly buffer _17_19
{
    uvec4 _m0[];
} _19;

layout(set = 0, binding = 0, std140) uniform _24_26
{
    vec4 _m0[1];
} _26;

void main()
{
    _11[0u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_26._m0[0u]).x == 0u))
    {
        uint _64 = 0u;
        uint _68;
        uint _71;
        bool _73;
        for (;;)
        {
            _68 = (_64 << 8u) + gl_LocalInvocationIndex;
            _71 = _15._m0[_68];
            memoryBarrierBuffer();
            memoryBarrierImage();
            barrier();
            _73 = _71 < 256u;
            if (_73)
            {
                vec4 _77 = uintBitsToFloat(_19._m0[_68]);
                vec4 _84 = uintBitsToFloat(_11[0u]._m0[_71]);
                _11[0u]._m0[_71] = uvec4(floatBitsToUint(_84.x + _77.x), floatBitsToUint(_84.y + _77.y), floatBitsToUint(_84.z + _77.z), floatBitsToUint(_84.w + _77.w));
            }
            uint _65 = _64 + 1u;
            if (_65 < floatBitsToUint(_26._m0[0u]).x)
            {
                _64 = _65;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _53 = uintBitsToFloat(_11[1u]._m0[gl_LocalInvocationIndex]);
    _11[2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_53.x), floatBitsToUint(_53.y), floatBitsToUint(_53.z), floatBitsToUint(_53.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 114
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %28
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "SSBO"
OpName %17 "SSBO"
OpName %24 ""
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 Coherent
OpDecorate %12 ArrayStride 4
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 1
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %28 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %5
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeRuntimeArray %6
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpConstant %5 1
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeArray %22 %20
%24 = OpTypeStruct %23
%25 = OpTypePointer Uniform %24
%26 = OpVariable %25 Uniform
%27 = OpTypePointer Input %5
%28 = OpVariable %27 Input
%30 = OpTypePointer StorageBuffer %8
%32 = OpConstant %5 0
%35 = OpConstant %5 2
%36 = OpConstant %21 0
%42 = OpTypePointer StorageBuffer %6
%44 = OpTypePointer Uniform %22
%49 = OpTypeBool
%67 = OpConstant %5 8
%69 = OpTypePointer StorageBuffer %5
%72 = OpConstant %5 2120
%74 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %104
%104 = OpLabel
%29 = OpLoad %5 %28
%31 = OpAccessChain %30 %11 %32
%33 = OpAccessChain %30 %11 %20
%34 = OpAccessChain %30 %11 %35
%37 = OpBitcast %5 %36
%38 = OpBitcast %5 %36
%39 = OpBitcast %5 %36
%40 = OpBitcast %5 %36
%41 = OpCompositeConstruct %6 %37 %38 %39 %40
%43 = OpAccessChain %42 %31 %32 %29
OpStore %43 %41
%45 = OpAccessChain %44 %26 %32 %32
%46 = OpLoad %22 %45
%47 = OpBitcast %6 %46
%48 = OpCompositeExtract %5 %47 0
%50 = OpIEqual %49 %48 %32
OpSelectionMerge %112 None
OpBranchConditional %50 %112 %105
%105 = OpLabel
OpBranch %106
%106 = OpLabel
%64 = OpPhi %5 %32 %105 %65 %110
%66 = OpShiftLeftLogical %5 %64 %67
%68 = OpIAdd %5 %66 %29
%70 = OpAccessChain %69 %15 %32 %68
%71 = OpLoad %5 %70
OpControlBarrier %35 %20 %72
%73 = OpULessThan %49 %71 %74
OpLoopMerge %111 %110 None
OpBranch %107
%107 = OpLabel
OpSelectionMerge %109 None
OpBranchConditional %73 %108 %109
%108 = OpLabel
%75 = OpAccessChain %42 %19 %32 %68
%76 = OpLoad %6 %75
%77 = OpBitcast %22 %76
%78 = OpCompositeExtract %21 %77 0
%79 = OpCompositeExtract %21 %77 1
%80 = OpCompositeExtract %21 %77 2
%81 = OpCompositeExtract %21 %77 3
%82 = OpAccessChain %42 %31 %32 %71
%83 = OpLoad %6 %82
%84 = OpBitcast %22 %83
%85 = OpCompositeExtract %21 %84 0
%86 = OpCompositeExtract %21 %84 1
%87 = OpCompositeExtract %21 %84 2
%88 = OpCompositeExtract %21 %84 3
%89 = OpFAdd %21 %85 %78
%90 = OpFAdd %21 %86 %79
%91 = OpFAdd %21 %87 %80
%92 = OpFAdd %21 %88 %81
%93 = OpBitcast %5 %89
%94 = OpBitcast %5 %90
%95 = OpBitcast %5 %91
%96 = OpBitcast %5 %92
%97 = OpCompositeConstruct %6 %93 %94 %95 %96
%98 = OpAccessChain %42 %31 %32 %71
OpStore %98 %97
OpBranch %109
%109 = OpLabel
OpBranch %110
%110 = OpLabel
%65 = OpIAdd %5 %64 %20
%99 = OpAccessChain %44 %26 %32 %32
%100 = OpLoad %22 %99
%101 = OpBitcast %6 %100
%102 = OpCompositeExtract %5 %101 0
%103 = OpULessThan %49 %65 %102
OpBranchConditional %103 %106 %111
%111 = OpLabel
OpBranch %112
%112 = OpLabel
%51 = OpAccessChain %42 %33 %32 %29
%52 = OpLoad %6 %51
%53 = OpBitcast %22 %52
%54 = OpCompositeExtract %21 %53 0
%55 = OpCompositeExtract %21 %53 1
%56 = OpCompositeExtract %21 %53 2
%57 = OpCompositeExtract %21 %53 3
%58 = OpBitcast %5 %54
%59 = OpBitcast %5 %55
%60 = OpBitcast %5 %56
%61 = OpBitcast %5 %57
%62 = OpCompositeConstruct %6 %58 %59 %60 %61
%63 = OpAccessChain %42 %34 %32 %29
OpStore %63 %62
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/memory-model/uav-coherent.ssbo.comp
================================================
#version 460
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _12_14
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 0, std430) coherent buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(set = 0, binding = 1, std430) readonly buffer _20_22
{
    uvec4 _m0[];
} _22;

layout(set = 0, binding = 2, std430) writeonly buffer _24_26
{
    uvec4 _m0[];
} _26;

layout(set = 0, binding = 0, std140) uniform _31_33
{
    vec4 _m0[1];
} _33;

void main()
{
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_33._m0[0u]).x == 0u))
    {
        uint _66 = 0u;
        uint _70;
        uint _73;
        bool _76;
        for (;;)
        {
            _70 = (_66 << 8u) + gl_LocalInvocationIndex;
            _73 = _9._m0[_70];
            memoryBarrierBuffer();
            memoryBarrierImage();
            barrier();
            _76 = _73 < 256u;
            if (_76)
            {
                vec4 _80 = uintBitsToFloat(_14._m0[_70]);
                vec4 _87 = uintBitsToFloat(_18._m0[_73]);
                _18._m0[_73] = uvec4(floatBitsToUint(_87.x + _80.x), floatBitsToUint(_87.y + _80.y), floatBitsToUint(_87.z + _80.z), floatBitsToUint(_87.w + _80.w));
            }
            uint _67 = _66 + 1u;
            if (_67 < floatBitsToUint(_33._m0[0u]).x)
            {
                _66 = _67;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _55 = uintBitsToFloat(_22._m0[gl_LocalInvocationIndex]);
    _26._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_55.x), floatBitsToUint(_55.y), floatBitsToUint(_55.z), floatBitsToUint(_55.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 117
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %35
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %24 "SSBO"
OpName %31 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 Coherent
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 1
OpDecorate %22 NonWritable
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 2
OpDecorate %26 NonReadable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %35 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %10
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %10
%24 = OpTypeStruct %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpConstant %5 1
%28 = OpTypeFloat 32
%29 = OpTypeVector %28 4
%30 = OpTypeArray %29 %27
%31 = OpTypeStruct %30
%32 = OpTypePointer Uniform %31
%33 = OpVariable %32 Uniform
%34 = OpTypePointer Input %5
%35 = OpVariable %34 Input
%37 = OpConstant %28 0
%43 = OpTypePointer StorageBuffer %10
%45 = OpConstant %5 0
%46 = OpTypePointer Uniform %29
%51 = OpTypeBool
%69 = OpConstant %5 8
%71 = OpTypePointer StorageBuffer %5
%74 = OpConstant %5 2
%75 = OpConstant %5 2120
%77 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %107
%107 = OpLabel
%36 = OpLoad %5 %35
%38 = OpBitcast %5 %37
%39 = OpBitcast %5 %37
%40 = OpBitcast %5 %37
%41 = OpBitcast %5 %37
%42 = OpCompositeConstruct %10 %38 %39 %40 %41
%44 = OpAccessChain %43 %18 %45 %36
OpStore %44 %42
%47 = OpAccessChain %46 %33 %45 %45
%48 = OpLoad %29 %47
%49 = OpBitcast %10 %48
%50 = OpCompositeExtract %5 %49 0
%52 = OpIEqual %51 %50 %45
OpSelectionMerge %115 None
OpBranchConditional %52 %115 %108
%108 = OpLabel
OpBranch %109
%109 = OpLabel
%66 = OpPhi %5 %45 %108 %67 %113
%68 = OpShiftLeftLogical %5 %66 %69
%70 = OpIAdd %5 %68 %36
%72 = OpAccessChain %71 %9 %45 %70
%73 = OpLoad %5 %72
OpControlBarrier %74 %27 %75
%76 = OpULessThan %51 %73 %77
OpLoopMerge %114 %113 None
OpBranch %110
%110 = OpLabel
OpSelectionMerge %112 None
OpBranchConditional %76 %111 %112
%111 = OpLabel
%78 = OpAccessChain %43 %14 %45 %70
%79 = OpLoad %10 %78
%80 = OpBitcast %29 %79
%81 = OpCompositeExtract %28 %80 0
%82 = OpCompositeExtract %28 %80 1
%83 = OpCompositeExtract %28 %80 2
%84 = OpCompositeExtract %28 %80 3
%85 = OpAccessChain %43 %18 %45 %73
%86 = OpLoad %10 %85
%87 = OpBitcast %29 %86
%88 = OpCompositeExtract %28 %87 0
%89 = OpCompositeExtract %28 %87 1
%90 = OpCompositeExtract %28 %87 2
%91 = OpCompositeExtract %28 %87 3
%92 = OpFAdd %28 %88 %81
%93 = OpFAdd %28 %89 %82
%94 = OpFAdd %28 %90 %83
%95 = OpFAdd %28 %91 %84
%96 = OpBitcast %5 %92
%97 = OpBitcast %5 %93
%98 = OpBitcast %5 %94
%99 = OpBitcast %5 %95
%100 = OpCompositeConstruct %10 %96 %97 %98 %99
%101 = OpAccessChain %43 %18 %45 %73
OpStore %101 %100
OpBranch %112
%112 = OpLabel
OpBranch %113
%113 = OpLabel
%67 = OpIAdd %5 %66 %27
%102 = OpAccessChain %46 %33 %45 %45
%103 = OpLoad %29 %102
%104 = OpBitcast %10 %103
%105 = OpCompositeExtract %5 %104 0
%106 = OpULessThan %51 %67 %105
OpBranchConditional %106 %109 %114
%114 = OpLabel
OpBranch %115
%115 = OpLabel
%53 = OpAccessChain %43 %22 %45 %36
%54 = OpLoad %10 %53
%55 = OpBitcast %29 %54
%56 = OpCompositeExtract %28 %55 0
%57 = OpCompositeExtract %28 %55 1
%58 = OpCompositeExtract %28 %55 2
%59 = OpCompositeExtract %28 %55 3
%60 = OpBitcast %5 %56
%61 = OpBitcast %5 %57
%62 = OpBitcast %5 %58
%63 = OpBitcast %5 %59
%64 = OpCompositeConstruct %10 %60 %61 %62 %63
%65 = OpAccessChain %43 %26 %45 %36
OpStore %65 %64
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/nvapi/bringup.nvapi.ssbo.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_shuffle : require

layout(set = 0, binding = 0, std430) writeonly buffer BlahSSBO
{
    uint _m0[];
} Blah;

layout(set = 0, binding = 1, std430) writeonly buffer BABSSBO
{
    uint _m0[];
} BAB;

layout(set = 0, binding = 2) uniform writeonly image1D RWTex;

void main()
{
    Blah._m0[gl_LaunchIDEXT.x] = subgroupShuffle(gl_LaunchIDEXT.x, 9u);
    uint _33 = atomicAdd(BAB._m0[64u], 9u);
    Blah._m0[100u] = _33;
    imageStore(RWTex, int(gl_LaunchIDEXT.x), vec4(2.0));
    Blah._m0[101u] = 42u;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformShuffle
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %9 %13 %17 %20
OpName %3 "main"
OpName %7 "BlahSSBO"
OpName %9 "Blah"
OpName %11 "BABSSBO"
OpName %13 "BAB"
OpName %17 "RWTex"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonReadable
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 2
OpDecorate %17 NonReadable
OpDecorate %20 BuiltIn LaunchIdKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 32
%15 = OpTypeImage %14 1D 0 0 0 2 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %5 3
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %5
%23 = OpConstant %5 0
%26 = OpConstant %5 9
%28 = OpConstant %5 3
%29 = OpTypePointer StorageBuffer %5
%31 = OpConstant %5 64
%34 = OpConstant %5 1
%35 = OpConstant %5 100
%38 = OpConstant %5 10
%39 = OpConstant %14 2
%40 = OpTypeVector %14 4
%41 = OpConstantComposite %40 %39 %39 %39 %39
%42 = OpConstant %5 42
%43 = OpConstant %5 101
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%27 = OpGroupNonUniformShuffle %5 %28 %24 %26
%30 = OpAccessChain %29 %9 %23 %24
OpStore %30 %27
%32 = OpAccessChain %29 %13 %23 %31
%33 = OpAtomicIAdd %5 %32 %34 %23 %26
%36 = OpAccessChain %29 %9 %23 %35
OpStore %36 %33
%37 = OpLoad %15 %17
OpImageWrite %37 %24 %41
%44 = OpAccessChain %29 %9 %23 %43
OpStore %44 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/nvapi/get-special-global-timer.nvapi.ssbo.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_realtime_clock : require

layout(set = 0, binding = 0, std430) writeonly buffer BufSSBO
{
    uint _m0[];
} Buf;

void main()
{
    uvec2 _12 = clockRealtime2x32EXT();
    Buf._m0[0u] = _12.x;
    uvec2 _18 = clockRealtime2x32EXT();
    Buf._m0[1u] = _18.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability ShaderClockKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpExtension "SPV_KHR_shader_clock"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %9
OpName %3 "main"
OpName %7 "BufSSBO"
OpName %9 "Buf"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%11 = OpTypeVector %5 2
%13 = OpConstant %5 1
%15 = OpConstant %5 0
%16 = OpTypePointer StorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %21
%21 = OpLabel
%12 = OpReadClockKHR %11 %13
%14 = OpCompositeExtract %5 %12 0
%17 = OpAccessChain %16 %9 %15 %15
OpStore %17 %14
%18 = OpReadClockKHR %11 %13
%19 = OpCompositeExtract %5 %18 1
%20 = OpAccessChain %16 %9 %15 %13
OpStore %20 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/nvapi/hit-object.local-root-signature.noglsl.nvapi.ssbo.rgen
================================================
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 202
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpCapability ShaderInvocationReorderNV
OpCapability RayTracingClusterAccelerationStructureNV
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpExtension "SPV_NV_cluster_acceleration_structure"
OpExtension "SPV_NV_shader_invocation_reorder"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %13 %17 %21 %25 %30 %33 %34 %35 %38 %39 %40
OpName %3 "main"
OpName %11 "SBTBlock"
OpName %13 "SBT"
OpName %17 "AS"
OpName %19 "RWFloatSSBO"
OpName %21 "RWFloat"
OpName %23 "RWUintSSBO"
OpName %25 "RWUint"
OpName %28 ""
OpName %31 ""
OpName %36 ""
OpName %137 "HitObjectSRB"
OpDecorate %7 ArrayStride 4
OpDecorate %9 ArrayStride 4
OpDecorate %11 Block
OpMemberDecorate %11 0 Offset 0
OpMemberDecorate %11 1 Offset 20
OpMemberDecorate %11 2 Offset 48
OpMemberDecorate %11 3 Offset 56
OpMemberDecorate %11 4 Offset 64
OpMemberDecorate %11 5 Offset 72
OpMemberDecorate %11 6 Offset 80
OpMemberDecorate %11 7 Offset 88
OpMemberDecorate %11 8 Offset 96
OpMemberDecorate %11 9 Offset 104
OpMemberDecorate %11 10 Offset 112
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %18 ArrayStride 4
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 1
OpDecorate %21 NonReadable
OpDecorate %22 ArrayStride 4
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 2
OpDecorate %25 NonReadable
OpDecorate %137 Block
OpDecorate %137 HitObjectShaderRecordBufferNV
OpMemberDecorate %137 0 Offset 0
OpMemberDecorate %137 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeArray %5 %6
%8 = OpConstant %5 6
%9 = OpTypeArray %5 %8
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %7 %9 %10 %10 %10 %10 %10 %10 %10 %10 %10
%12 = OpTypePointer ShaderRecordBufferKHR %11
%13 = OpVariable %12 ShaderRecordBufferKHR
%14 = OpTypeInt 32 1
%15 = OpTypeAccelerationStructureKHR
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeRuntimeArray %5
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %5
%23 = OpTypeStruct %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeFloat 32
%27 = OpTypeVector %26 2
%28 = OpTypeStruct %27
%29 = OpTypePointer HitObjectAttributeNV %28
%30 = OpVariable %29 HitObjectAttributeNV
%31 = OpTypeStruct %5
%32 = OpTypePointer RayPayloadKHR %31
%33 = OpVariable %32 RayPayloadKHR
%34 = OpVariable %32 RayPayloadKHR
%35 = OpVariable %32 RayPayloadKHR
%36 = OpTypeStruct %28
%37 = OpTypePointer HitObjectAttributeNV %36
%38 = OpVariable %37 HitObjectAttributeNV
%39 = OpVariable %32 RayPayloadKHR
%40 = OpVariable %37 HitObjectAttributeNV
%42 = OpTypeHitObjectNV
%43 = OpTypePointer Function %42
%45 = OpConstant %5 0
%47 = OpConstant %5 1088421888
%49 = OpConstant %5 1090519040
%51 = OpConstant %5 1065353216
%53 = OpConstant %5 1082130432
%55 = OpConstant %5 1073741824
%57 = OpConstant %5 1084227584
%59 = OpConstant %5 1077936128
%61 = OpConstant %5 1086324736
%62 = OpTypeVector %26 3
%67 = OpConstant %5 1
%68 = OpConstant %5 2
%69 = OpConstant %5 3
%70 = OpConstant %5 4
%71 = OpConstant %26 1
%72 = OpConstant %26 4
%73 = OpConstant %26 2
%74 = OpConstant %26 5
%75 = OpConstant %26 3
%76 = OpConstant %26 6
%77 = OpConstant %26 7
%78 = OpConstant %26 8
%87 = OpConstant %5 17
%91 = OpTypePointer RayPayloadKHR %5
%94 = OpTypePointer StorageBuffer %5
%100 = OpTypeBool
%121 = OpConstant %5 7
%124 = OpConstant %5 8
%127 = OpConstant %5 9
%130 = OpConstant %5 10
%133 = OpConstant %5 11
%135 = OpConstant %5 32
%136 = OpTypeInt 64 0
%137 = OpTypeStruct %5
%138 = OpTypePointer PhysicalStorageBuffer %137
%139 = OpTypePointer PhysicalStorageBuffer %5
%147 = OpConstant %5 12
%191 = OpTypePointer HitObjectAttributeNV %27
%3 = OpFunction %1 None %2
%4 = OpLabel
%44 = OpVariable %43 Function
%65 = OpVariable %43 Function
%81 = OpVariable %43 Function
%85 = OpVariable %43 Function
%90 = OpVariable %43 Function
OpBranch %200
%200 = OpLabel
OpHitObjectRecordEmptyNV %44
%46 = OpBitcast %26 %47
%48 = OpBitcast %26 %49
%50 = OpBitcast %26 %51
%52 = OpBitcast %26 %53
%54 = OpBitcast %26 %55
%56 = OpBitcast %26 %57
%58 = OpBitcast %26 %59
%60 = OpBitcast %26 %61
%63 = OpCompositeConstruct %62 %50 %54 %58
%64 = OpCompositeConstruct %62 %52 %56 %60
OpHitObjectRecordMissNV %65 %45 %63 %46 %64 %48
%66 = OpLoad %15 %17
%79 = OpCompositeConstruct %62 %71 %73 %75
%80 = OpCompositeConstruct %62 %72 %74 %76
OpHitObjectRecordHitNV %81 %66 %45 %68 %67 %69 %70 %6 %79 %77 %80 %78 %40
%82 = OpLoad %15 %17
%83 = OpCompositeConstruct %62 %71 %73 %75
%84 = OpCompositeConstruct %62 %72 %74 %76
OpHitObjectRecordHitWithIndexNV %85 %82 %45 %68 %67 %69 %70 %83 %77 %84 %78 %38
%86 = OpLoad %15 %17
%88 = OpCompositeConstruct %62 %71 %73 %75
%89 = OpCompositeConstruct %62 %72 %74 %76
OpHitObjectTraceRayNV %90 %86 %87 %45 %67 %68 %69 %88 %77 %89 %78 %33
%92 = OpInBoundsAccessChain %91 %33 %45
%93 = OpLoad %5 %92
%95 = OpAccessChain %94 %25 %45 %45
OpStore %95 %93
OpReorderThreadWithHintNV %67 %68
OpReorderThreadWithHitObjectNV %90 %45 %45
%96 = OpLoad %15 %17
%97 = OpInBoundsAccessChain %91 %34 %45
OpStore %97 %93
OpHitObjectExecuteShaderNV %90 %34
%98 = OpLoad %5 %97
%99 = OpAccessChain %94 %25 %45 %67
OpStore %99 %98
%101 = OpHitObjectIsEmptyNV %100 %90
%102 = OpSelect %5 %101 %67 %45
%103 = OpINotEqual %100 %102 %45
%104 = OpSelect %5 %103 %67 %45
%105 = OpAccessChain %94 %25 %45 %68
OpStore %105 %104
%106 = OpHitObjectIsMissNV %100 %90
%107 = OpSelect %5 %106 %67 %45
%108 = OpINotEqual %100 %107 %45
%109 = OpSelect %5 %108 %67 %45
%110 = OpAccessChain %94 %25 %45 %69
OpStore %110 %109
%111 = OpHitObjectIsHitNV %100 %90
%112 = OpSelect %5 %111 %67 %45
%113 = OpINotEqual %100 %112 %45
%114 = OpSelect %5 %113 %67 %45
%115 = OpAccessChain %94 %25 %45 %70
OpStore %115 %114
%116 = OpHitObjectGetClusterIdNV %5 %90
%117 = OpAccessChain %94 %25 %45 %6
OpStore %117 %116
%118 = OpHitObjectGetInstanceIdNV %5 %90
%119 = OpAccessChain %94 %25 %45 %8
OpStore %119 %118
%120 = OpHitObjectGetInstanceCustomIndexNV %5 %90
%122 = OpAccessChain %94 %25 %45 %121
OpStore %122 %120
%123 = OpHitObjectGetPrimitiveIndexNV %5 %90
%125 = OpAccessChain %94 %25 %45 %124
OpStore %125 %123
%126 = OpHitObjectGetGeometryIndexNV %5 %90
%128 = OpAccessChain %94 %25 %45 %127
OpStore %128 %126
%129 = OpHitObjectGetHitKindNV %5 %90
%131 = OpAccessChain %94 %25 %45 %130
OpStore %131 %129
%132 = OpHitObjectGetShaderBindingTableRecordIndexNV %5 %90
%134 = OpAccessChain %94 %25 %45 %133
OpStore %134 %132
%140 = OpHitObjectGetShaderRecordBufferHandleNV %10 %90
%141 = OpBitcast %136 %140
%142 = OpUConvert %136 %135
%143 = OpIAdd %136 %141 %142
%144 = OpConvertUToPtr %138 %143
%145 = OpAccessChain %139 %144 %45
%146 = OpLoad %5 %145 Aligned 4
%148 = OpAccessChain %94 %25 %45 %147
OpStore %148 %146
%149 = OpHitObjectGetRayTMinNV %26 %90
%150 = OpBitcast %5 %149
%151 = OpHitObjectGetRayTMaxNV %26 %90
%152 = OpBitcast %5 %151
%153 = OpHitObjectGetWorldRayOriginNV %62 %90
%154 = OpCompositeExtract %26 %153 0
%155 = OpBitcast %5 %154
%156 = OpCompositeExtract %26 %153 1
%157 = OpBitcast %5 %156
%158 = OpCompositeExtract %26 %153 2
%159 = OpBitcast %5 %158
%160 = OpHitObjectGetWorldRayDirectionNV %62 %90
%161 = OpCompositeExtract %26 %160 0
%162 = OpBitcast %5 %161
%163 = OpCompositeExtract %26 %160 1
%164 = OpBitcast %5 %163
%165 = OpCompositeExtract %26 %160 2
%166 = OpBitcast %5 %165
%167 = OpBitcast %26 %150
%168 = OpBitcast %26 %152
%169 = OpBitcast %26 %155
%170 = OpBitcast %26 %157
%171 = OpBitcast %26 %159
%172 = OpBitcast %26 %162
%173 = OpBitcast %26 %164
%174 = OpBitcast %26 %166
%175 = OpBitcast %5 %167
%176 = OpAccessChain %94 %21 %45 %45
OpStore %176 %175
%177 = OpBitcast %5 %168
%178 = OpAccessChain %94 %21 %45 %67
OpStore %178 %177
%179 = OpBitcast %5 %169
%180 = OpAccessChain %94 %21 %45 %68
OpStore %180 %179
%181 = OpBitcast %5 %170
%182 = OpAccessChain %94 %21 %45 %69
OpStore %182 %181
%183 = OpBitcast %5 %171
%184 = OpAccessChain %94 %21 %45 %70
OpStore %184 %183
%185 = OpBitcast %5 %172
%186 = OpAccessChain %94 %21 %45 %6
OpStore %186 %185
%187 = OpBitcast %5 %173
%188 = OpAccessChain %94 %21 %45 %8
OpStore %188 %187
%189 = OpBitcast %5 %174
%190 = OpAccessChain %94 %21 %45 %121
OpStore %190 %189
OpHitObjectGetAttributesNV %90 %30
%192 = OpInBoundsAccessChain %191 %30 %45
%193 = OpLoad %27 %192
%194 = OpCompositeExtract %26 %193 0
%195 = OpBitcast %5 %194
%196 = OpAccessChain %94 %21 %45 %124
OpStore %196 %195
%197 = OpCompositeExtract %26 %193 1
%198 = OpBitcast %5 %197
%199 = OpAccessChain %94 %21 %45 %127
OpStore %199 %198
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/nvapi/ray-query-cluster-id.nvapi.comp
================================================
#version 460
#extension GL_EXT_ray_query : require
#extension GL_EXT_ray_flags_primitive_culling : require
#extension GL_NV_cluster_acceleration_structure : require
layout(primitive_culling);
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _12;

rayQueryEXT _21;

void main()
{
    rayQueryInitializeEXT(_21, _8, 132u, 170u, vec3(1.0, 2.0, 3.0), 4.0, vec3(5.0, 6.0, 7.0), 8.0);
    bool _37 = rayQueryProceedEXT(_21);
    if (_37)
    {
        for (;;)
        {
            uint _42 = rayQueryGetRayFlagsEXT(_21);
            uint _43 = rayQueryGetIntersectionClusterIdNV(_21, bool(0u));
            imageStore(_12, int(gl_LocalInvocationIndex * 2u), uvec4(_43));
            rayQueryTerminateEXT(_21);
            bool _49 = rayQueryProceedEXT(_21);
            if (_49)
            {
            }
            else
            {
                break;
            }
        }
    }
    uint _38 = rayQueryGetIntersectionTypeEXT(_21, bool(1u));
    if (!(_38 == 0u))
    {
        uint _50 = rayQueryGetRayFlagsEXT(_21);
        uint _51 = rayQueryGetIntersectionClusterIdNV(_21, bool(1u));
        imageStore(_12, int((gl_LocalInvocationIndex * 2u) + 1u), uvec4(_51));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RayQueryKHR
OpCapability RayTraversalPrimitiveCullingKHR
OpCapability RayTracingClusterAccelerationStructureNV
OpExtension "SPV_KHR_ray_query"
OpExtension "SPV_NV_cluster_acceleration_structure"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %8 %12 %17 %21
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %12 NonReadable
OpDecorate %17 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 0
%10 = OpTypeImage %9 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%16 = OpTypePointer Input %9
%17 = OpVariable %16 Input
%19 = OpTypeRayQueryKHR
%20 = OpTypePointer Private %19
%21 = OpVariable %20 Private
%22 = OpConstant %9 132
%23 = OpConstant %9 170
%24 = OpTypeFloat 32
%25 = OpConstant %24 1
%26 = OpConstant %24 5
%27 = OpConstant %24 2
%28 = OpConstant %24 6
%29 = OpConstant %24 3
%30 = OpConstant %24 7
%31 = OpTypeVector %24 3
%33 = OpConstant %24 4
%35 = OpConstant %24 8
%36 = OpTypeBool
%39 = OpConstant %9 1
%41 = OpConstant %9 0
%46 = OpConstant %9 2
%47 = OpTypeVector %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%13 = OpLoad %10 %12
%15 = OpLoad %6 %8
%18 = OpLoad %9 %17
%32 = OpCompositeConstruct %31 %25 %27 %29
%34 = OpCompositeConstruct %31 %26 %28 %30
OpRayQueryInitializeKHR %21 %15 %22 %23 %32 %33 %34 %35
%37 = OpRayQueryProceedKHR %36 %21
OpSelectionMerge %61 None
OpBranchConditional %37 %58 %61
%58 = OpLabel
OpBranch %59
%59 = OpLabel
%42 = OpRayQueryGetRayFlagsKHR %9 %21
%43 = OpRayQueryGetIntersectionClusterIdNV %9 %21 %41
%44 = OpShiftLeftLogical %9 %18 %39
%45 = OpIMul %9 %18 %46
%48 = OpCompositeConstruct %47 %43 %43 %43 %43
OpImageWrite %13 %45 %48
OpRayQueryTerminateKHR %21
%49 = OpRayQueryProceedKHR %36 %21
OpLoopMerge %60 %59 None
OpBranchConditional %49 %59 %60
%60 = OpLabel
OpBranch %61
%61 = OpLabel
%38 = OpRayQueryGetIntersectionTypeKHR %9 %21 %39
%40 = OpIEqual %36 %38 %41
OpSelectionMerge %63 None
OpBranchConditional %40 %63 %62
%62 = OpLabel
%50 = OpRayQueryGetRayFlagsKHR %9 %21
%51 = OpRayQueryGetIntersectionClusterIdNV %9 %21 %39
%52 = OpShiftLeftLogical %9 %18 %39
%53 = OpBitwiseOr %9 %52 %39
%54 = OpIMul %9 %18 %46
%55 = OpIAdd %9 %54 %39
%56 = OpCompositeConstruct %47 %51 %51 %51 %51
OpImageWrite %13 %55 %56
OpBranch %63
%63 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/nvapi/rt-cluster-id.nvapi.rany
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_NV_cluster_acceleration_structure : require

struct _6
{
    uint _m0;
};

struct _11
{
    vec2 _m0;
};

layout(location = 0) rayPayloadInEXT _6 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = gl_ClusterIDNV;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability RayTracingClusterAccelerationStructureNV
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpExtension "SPV_NV_cluster_acceleration_structure"
OpMemoryModel Logical GLSL450
OpEntryPoint AnyHitKHR %3 "main" %8 %13 %16
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
OpName %11 ""
OpName %13 "hit"
OpDecorate %16 BuiltIn ClusterIDNV
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingRayPayloadKHR %6
%8 = OpVariable %7 IncomingRayPayloadKHR
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%15 = OpTypePointer Input %5
%16 = OpVariable %15 Input
%18 = OpTypePointer IncomingRayPayloadKHR %5
%20 = OpConstant %5 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %21
%21 = OpLabel
%17 = OpLoad %5 %16
%19 = OpInBoundsAccessChain %18 %8 %20
OpStore %19 %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/nvapi/shuffle.nvapi.ssbo.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_shuffle : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

layout(set = 0, binding = 2) uniform writeonly image1D _17;

void main()
{
    _9._m0[gl_GlobalInvocationID.x] = subgroupShuffle(gl_GlobalInvocationID.x, 9u);
    uint _34 = atomicAdd(_13._m0[64u], 9u);
    _9._m0[100u] = _34;
    imageStore(_17, int(gl_GlobalInvocationID.x), vec4(2.0));
    _9._m0[101u] = 42u;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformShuffle
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %22
OpExecutionMode %3 LocalSize 32 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonReadable
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 2
OpDecorate %17 NonReadable
OpDecorate %22 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 32
%15 = OpTypeImage %14 1D 0 0 0 2 Unknown
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%20 = OpTypeVector %5 3
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypePointer Input %5
%25 = OpConstant %5 0
%27 = OpConstant %5 9
%29 = OpConstant %5 3
%30 = OpTypePointer StorageBuffer %5
%32 = OpConstant %5 64
%35 = OpConstant %5 1
%36 = OpConstant %5 100
%38 = OpConstant %5 10
%39 = OpConstant %14 2
%40 = OpTypeVector %14 4
%41 = OpConstantComposite %40 %39 %39 %39 %39
%42 = OpConstant %5 42
%43 = OpConstant %5 101
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%18 = OpLoad %15 %17
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %5 %24
%28 = OpGroupNonUniformShuffle %5 %29 %26 %27
%31 = OpAccessChain %30 %9 %25 %26
OpStore %31 %28
%33 = OpAccessChain %30 %13 %25 %32
%34 = OpAtomicIAdd %5 %33 %35 %25 %27
%37 = OpAccessChain %30 %9 %25 %36
OpStore %37 %34
OpImageWrite %18 %26 %41
%44 = OpAccessChain %30 %9 %25 %43
OpStore %44 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/fp16-fp32-fp16-1.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    uint16_t _m0[];
} _14;

layout(set = 0, binding = 2, std430) writeonly buffer _16_18
{
    uint _m0[];
} _18;

void main()
{
    uint _28 = _9._m0[gl_GlobalInvocationID.x];
    float16_t _44 = uint16BitsToFloat16(unpackUint2x16(_28).x);
    uint _45 = gl_GlobalInvocationID.x << 1u;
    _14._m0[gl_GlobalInvocationID.x * 2u] = float16BitsToUint16(_44);
    float16_t _54 = uint16BitsToFloat16(unpackUint2x16(_28).y);
    _14._m0[(gl_GlobalInvocationID.x * 2u) + 1u] = float16BitsToUint16(_54);
    _18._m0[gl_GlobalInvocationID.x * 2u] = uint(unpackUint2x16(_28).x);
    _18._m0[(gl_GlobalInvocationID.x * 2u) + 1u] = uint(unpackUint2x16(_28).y);
    _18._m0[(gl_GlobalInvocationID.x * 2u) + 1024u] = uint(float16BitsToUint16(_44));
    _18._m0[(gl_GlobalInvocationID.x * 2u) + 1025u] = uint(float16BitsToUint16(_54));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 89
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
%31 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %21
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %11 ArrayStride 2
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonReadable
OpDecorate %21 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeInt 16 0
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeVector %5 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypePointer Input %5
%24 = OpConstant %5 0
%26 = OpTypePointer StorageBuffer %5
%30 = OpConstant %5 65535
%32 = OpTypeFloat 32
%33 = OpTypeVector %32 2
%37 = OpConstant %5 16
%40 = OpTypeVector %10 2
%43 = OpTypeFloat 16
%46 = OpConstant %5 1
%48 = OpConstant %5 2
%50 = OpTypePointer StorageBuffer %10
%75 = OpConstant %5 1024
%83 = OpConstant %5 1025
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %87
%87 = OpLabel
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %5 %23
%27 = OpAccessChain %26 %9 %24 %25
%28 = OpLoad %5 %27
%29 = OpBitwiseAnd %5 %28 %30
%34 = OpExtInst %33 %31 UnpackHalf2x16 %29
%35 = OpCompositeExtract %32 %34 0
%36 = OpShiftRightLogical %5 %28 %37
%38 = OpExtInst %33 %31 UnpackHalf2x16 %36
%39 = OpCompositeExtract %32 %38 0
%41 = OpBitcast %40 %28
%42 = OpCompositeExtract %10 %41 0
%44 = OpBitcast %43 %42
%45 = OpShiftLeftLogical %5 %25 %46
%47 = OpIMul %5 %25 %48
%49 = OpBitcast %10 %44
%51 = OpAccessChain %50 %14 %24 %47
OpStore %51 %49
%52 = OpBitcast %40 %28
%53 = OpCompositeExtract %10 %52 1
%54 = OpBitcast %43 %53
%55 = OpBitwiseOr %5 %45 %46
%56 = OpIMul %5 %25 %48
%57 = OpIAdd %5 %56 %46
%58 = OpBitcast %10 %54
%59 = OpAccessChain %50 %14 %24 %57
OpStore %59 %58
%60 = OpBitcast %40 %28
%61 = OpCompositeExtract %10 %60 0
%62 = OpUConvert %5 %61
%63 = OpIMul %5 %25 %48
%64 = OpAccessChain %26 %18 %24 %63
OpStore %64 %62
%65 = OpBitcast %40 %28
%66 = OpCompositeExtract %10 %65 1
%67 = OpUConvert %5 %66
%68 = OpIMul %5 %25 %48
%69 = OpIAdd %5 %68 %46
%70 = OpAccessChain %26 %18 %24 %69
OpStore %70 %67
%71 = OpQuantizeToF16 %32 %35
%72 = OpBitcast %10 %44
%73 = OpUConvert %5 %72
%74 = OpIAdd %5 %45 %75
%76 = OpIMul %5 %25 %48
%77 = OpIAdd %5 %76 %75
%78 = OpAccessChain %26 %18 %24 %77
OpStore %78 %73
%79 = OpQuantizeToF16 %32 %39
%80 = OpBitcast %10 %54
%81 = OpUConvert %5 %80
%82 = OpIAdd %5 %45 %83
%84 = OpIMul %5 %25 %48
%85 = OpIAdd %5 %84 %83
%86 = OpAccessChain %26 %18 %24 %85
OpStore %86 %81
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/sabs.frag
================================================
#version 460

layout(location = 0) flat in ivec4 A;
layout(location = 0) out ivec4 SV_Target;

void main()
{
    uint _16 = uint(A.x);
    uint _20 = uint(A.y);
    uint _24 = uint(A.z);
    uint _28 = uint(A.w);
    SV_Target.x = int(uint(abs(int(_16))));
    SV_Target.y = int(uint(abs(int(_20))));
    SV_Target.z = int(uint(abs(int(_24))));
    SV_Target.w = int(uint(abs(int(_28))));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
%33 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%26 = OpConstant %13 3
%38 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %47
%47 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpAccessChain %11 %8 %26
%27 = OpLoad %5 %25
%28 = OpBitcast %13 %27
%29 = OpSNegate %13 %16
%30 = OpSNegate %13 %20
%31 = OpSNegate %13 %24
%32 = OpSNegate %13 %28
%34 = OpExtInst %13 %33 SAbs %16
%35 = OpExtInst %13 %33 SAbs %20
%36 = OpExtInst %13 %33 SAbs %24
%37 = OpExtInst %13 %33 SAbs %28
%39 = OpAccessChain %38 %10 %14
%40 = OpBitcast %5 %34
OpStore %39 %40
%41 = OpAccessChain %38 %10 %18
%42 = OpBitcast %5 %35
OpStore %41 %42
%43 = OpAccessChain %38 %10 %22
%44 = OpBitcast %5 %36
OpStore %43 %44
%45 = OpAccessChain %38 %10 %26
%46 = OpBitcast %5 %37
OpStore %45 %46
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/sneg.frag
================================================
#version 460

layout(location = 0) flat in ivec4 A;
layout(location = 0) out ivec4 SV_Target;

void main()
{
    SV_Target.x = int(-uint(A.x));
    SV_Target.y = int(-uint(A.y));
    SV_Target.z = int(-uint(A.z));
    SV_Target.w = int(-uint(A.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "A"
OpName %10 "SV_Target"
OpDecorate %8 Flat
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%18 = OpConstant %13 1
%22 = OpConstant %13 2
%26 = OpConstant %13 3
%33 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpBitcast %13 %15
%17 = OpAccessChain %11 %8 %18
%19 = OpLoad %5 %17
%20 = OpBitcast %13 %19
%21 = OpAccessChain %11 %8 %22
%23 = OpLoad %5 %21
%24 = OpBitcast %13 %23
%25 = OpAccessChain %11 %8 %26
%27 = OpLoad %5 %25
%28 = OpBitcast %13 %27
%29 = OpSNegate %13 %16
%30 = OpSNegate %13 %20
%31 = OpSNegate %13 %24
%32 = OpSNegate %13 %28
%34 = OpAccessChain %33 %10 %14
%35 = OpBitcast %5 %29
OpStore %34 %35
%36 = OpAccessChain %33 %10 %18
%37 = OpBitcast %5 %30
OpStore %36 %37
%38 = OpAccessChain %33 %10 %22
%39 = OpBitcast %5 %31
OpStore %38 %39
%40 = OpAccessChain %33 %10 %26
%41 = OpBitcast %5 %32
OpStore %40 %41
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first-heap.sm66.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _13[];

layout(set = 0, binding = 0, std140) uniform _29_31
{
    vec4 _m0[1024];
} _31;

layout(set = 0, binding = 1, std140) uniform _33_36
{
    vec4 _m0[1024];
} _36[2];

layout(set = 0, binding = 0) uniform usamplerBuffer _16;
layout(set = 0, binding = 1) uniform usamplerBuffer _20[2];
layout(set = 0, binding = 3) uniform samplerBuffer _23;
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _26;

void main()
{
    uint _63 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(floatBitsToUint(_13[0u]._m0[0u]).x));
    uint _70 = imageAtomicAdd(_26, int(0u), floatBitsToUint(_31._m0[1u]).x);
    uint _78 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x));
    uint _86 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(floatBitsToUint(_36[0u]._m0[1u]).x));
    uint _94 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(floatBitsToUint(_36[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _101 = imageAtomicAdd(_26, int(0u), subgroupBroadcastFirst(texelFetch(_16, int(1u)).x));
    uint _108 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(texelFetch(_16, int(gl_GlobalInvocationID.x)).x));
    uint _116 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(texelFetch(_20[0u], int(1u)).x));
    uint _123 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(imageLoad(_26, int(2u)).x));
    uint _131 = imageAtomicAdd(_26, int(1u), uint(subgroupBroadcastFirst(texelFetch(_23, int(0u)).x)));
    uint _134 = imageAtomicAdd(_26, int(0u), gl_WorkGroupID.x);
    uint _138 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _145 = imageAtomicAdd(_26, int(0u), floatBitsToUint(_31._m0[1u]).x);
    uint _153 = imageAtomicAdd(_26, int(0u), subgroupAdd(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x));
    uint _161 = imageAtomicAdd(_26, int(0u), subgroupOr(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x));
    uint _171 = imageAtomicAdd(_26, int(0u), uint(subgroupAllEqual(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x)));
    uint _181 = imageAtomicAdd(_26, int(0u), subgroupBallot(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _191 = imageAtomicAdd(_26, int(0u), uint(subgroupAny(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _201 = imageAtomicAdd(_26, int(0u), uint(subgroupAll(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _210 = imageAtomicAdd(_26, int(1u), subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_31._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 213
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %42 %46
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %10 "BindlessCBV"
OpName %29 ""
OpName %33 ""
OpDecorate %9 ArrayStride 16
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 3
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %28 ArrayStride 16
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 0
OpDecorate %32 ArrayStride 16
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 1
OpDecorate %42 BuiltIn GlobalInvocationId
OpDecorate %46 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 4096
%9 = OpTypeArray %6 %8
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer Uniform %11
%13 = OpVariable %12 Uniform
%14 = OpTypeImage %7 Buffer 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpConstant %7 2
%18 = OpTypeArray %14 %17
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypeImage %7 Buffer 0 0 0 2 R32ui
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpConstant %7 1024
%28 = OpTypeArray %6 %27
%29 = OpTypeStruct %28
%30 = OpTypePointer Uniform %29
%31 = OpVariable %30 Uniform
%32 = OpTypeArray %6 %27
%33 = OpTypeStruct %32
%34 = OpTypeArray %33 %17
%35 = OpTypePointer Uniform %34
%36 = OpVariable %35 Uniform
%37 = OpTypePointer Uniform %33
%39 = OpConstant %7 0
%40 = OpTypeVector %7 3
%41 = OpTypePointer Input %40
%42 = OpVariable %41 Input
%43 = OpTypePointer Input %7
%46 = OpVariable %41 Input
%49 = OpTypePointer Uniform %10
%51 = OpTypePointer Uniform %6
%54 = OpTypeVector %7 4
%58 = OpConstant %7 3
%60 = OpConstant %7 1
%61 = OpTypePointer Image %7
%166 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %211
%211 = OpLabel
%38 = OpAccessChain %37 %36 %39
%44 = OpAccessChain %43 %42 %39
%45 = OpLoad %7 %44
%47 = OpAccessChain %43 %46 %39
%48 = OpLoad %7 %47
%50 = OpAccessChain %49 %13 %39
%52 = OpAccessChain %51 %50 %39 %39
%53 = OpLoad %6 %52
%55 = OpBitcast %54 %53
%56 = OpCompositeExtract %7 %55 0
%57 = OpGroupNonUniformBroadcastFirst %7 %58 %56
%59 = OpLoad %24 %26
%62 = OpImageTexelPointer %61 %26 %60 %39
%63 = OpAtomicIAdd %7 %62 %60 %39 %57
%64 = OpAccessChain %51 %31 %39 %60
%65 = OpLoad %6 %64
%66 = OpBitcast %54 %65
%67 = OpCompositeExtract %7 %66 0
%68 = OpLoad %24 %26
%69 = OpImageTexelPointer %61 %26 %39 %39
%70 = OpAtomicIAdd %7 %69 %60 %39 %67
%71 = OpAccessChain %51 %31 %39 %45
%72 = OpLoad %6 %71
%73 = OpBitcast %54 %72
%74 = OpCompositeExtract %7 %73 0
%75 = OpGroupNonUniformBroadcastFirst %7 %58 %74
%76 = OpLoad %24 %26
%77 = OpImageTexelPointer %61 %26 %60 %39
%78 = OpAtomicIAdd %7 %77 %60 %39 %75
%79 = OpAccessChain %51 %38 %39 %60
%80 = OpLoad %6 %79
%81 = OpBitcast %54 %80
%82 = OpCompositeExtract %7 %81 0
%83 = OpGroupNonUniformBroadcastFirst %7 %58 %82
%84 = OpLoad %24 %26
%85 = OpImageTexelPointer %61 %26 %60 %39
%86 = OpAtomicIAdd %7 %85 %60 %39 %83
%87 = OpAccessChain %51 %38 %39 %45
%88 = OpLoad %6 %87
%89 = OpBitcast %54 %88
%90 = OpCompositeExtract %7 %89 0
%91 = OpGroupNonUniformBroadcastFirst %7 %58 %90
%92 = OpLoad %24 %26
%93 = OpImageTexelPointer %61 %26 %60 %39
%94 = OpAtomicIAdd %7 %93 %60 %39 %91
%95 = OpLoad %14 %16
%96 = OpImageFetch %54 %95 %60
%97 = OpCompositeExtract %7 %96 0
%98 = OpGroupNonUniformBroadcastFirst %7 %58 %97
%99 = OpLoad %24 %26
%100 = OpImageTexelPointer %61 %26 %39 %39
%101 = OpAtomicIAdd %7 %100 %60 %39 %98
%102 = OpLoad %14 %16
%103 = OpImageFetch %54 %102 %45
%104 = OpCompositeExtract %7 %103 0
%105 = OpGroupNonUniformBroadcastFirst %7 %58 %104
%106 = OpLoad %24 %26
%107 = OpImageTexelPointer %61 %26 %60 %39
%108 = OpAtomicIAdd %7 %107 %60 %39 %105
%109 = OpAccessChain %15 %20 %39
%110 = OpLoad %14 %109
%111 = OpImageFetch %54 %110 %60
%112 = OpCompositeExtract %7 %111 0
%113 = OpGroupNonUniformBroadcastFirst %7 %58 %112
%114 = OpLoad %24 %26
%115 = OpImageTexelPointer %61 %26 %60 %39
%116 = OpAtomicIAdd %7 %115 %60 %39 %113
%117 = OpLoad %24 %26
%118 = OpImageRead %54 %117 %17
%119 = OpCompositeExtract %7 %118 0
%120 = OpGroupNonUniformBroadcastFirst %7 %58 %119
%121 = OpLoad %24 %26
%122 = OpImageTexelPointer %61 %26 %60 %39
%123 = OpAtomicIAdd %7 %122 %60 %39 %120
%124 = OpLoad %21 %23
%125 = OpImageFetch %6 %124 %39
%126 = OpCompositeExtract %5 %125 0
%127 = OpGroupNonUniformBroadcastFirst %5 %58 %126
%128 = OpConvertFToU %7 %127
%129 = OpLoad %24 %26
%130 = OpImageTexelPointer %61 %26 %60 %39
%131 = OpAtomicIAdd %7 %130 %60 %39 %128
%132 = OpLoad %24 %26
%133 = OpImageTexelPointer %61 %26 %39 %39
%134 = OpAtomicIAdd %7 %133 %60 %39 %48
%135 = OpGroupNonUniformBroadcastFirst %7 %58 %45
%136 = OpLoad %24 %26
%137 = OpImageTexelPointer %61 %26 %60 %39
%138 = OpAtomicIAdd %7 %137 %60 %39 %135
%139 = OpAccessChain %51 %31 %39 %60
%140 = OpLoad %6 %139
%141 = OpBitcast %54 %140
%142 = OpCompositeExtract %7 %141 0
%143 = OpLoad %24 %26
%144 = OpImageTexelPointer %61 %26 %39 %39
%145 = OpAtomicIAdd %7 %144 %60 %39 %142
%146 = OpAccessChain %51 %31 %39 %45
%147 = OpLoad %6 %146
%148 = OpBitcast %54 %147
%149 = OpCompositeExtract %7 %148 0
%150 = OpGroupNonUniformIAdd %7 %58 Reduce %149
%151 = OpLoad %24 %26
%152 = OpImageTexelPointer %61 %26 %39 %39
%153 = OpAtomicIAdd %7 %152 %60 %39 %150
%154 = OpAccessChain %51 %31 %39 %45
%155 = OpLoad %6 %154
%156 = OpBitcast %54 %155
%157 = OpCompositeExtract %7 %156 0
%158 = OpGroupNonUniformBitwiseOr %7 %58 Reduce %157
%159 = OpLoad %24 %26
%160 = OpImageTexelPointer %61 %26 %39 %39
%161 = OpAtomicIAdd %7 %160 %60 %39 %158
%162 = OpAccessChain %51 %31 %39 %45
%163 = OpLoad %6 %162
%164 = OpBitcast %54 %163
%165 = OpCompositeExtract %7 %164 0
%167 = OpGroupNonUniformAllEqual %166 %58 %165
%168 = OpSelect %7 %167 %60 %39
%169 = OpLoad %24 %26
%170 = OpImageTexelPointer %61 %26 %39 %39
%171 = OpAtomicIAdd %7 %170 %60 %39 %168
%172 = OpAccessChain %51 %31 %39 %45
%173 = OpLoad %6 %172
%174 = OpBitcast %54 %173
%175 = OpCompositeExtract %7 %174 0
%176 = OpINotEqual %166 %175 %39
%177 = OpGroupNonUniformBallot %54 %58 %176
%178 = OpCompositeExtract %7 %177 0
%179 = OpLoad %24 %26
%180 = OpImageTexelPointer %61 %26 %39 %39
%181 = OpAtomicIAdd %7 %180 %60 %39 %178
%182 = OpAccessChain %51 %31 %39 %45
%183 = OpLoad %6 %182
%184 = OpBitcast %54 %183
%185 = OpCompositeExtract %7 %184 0
%186 = OpINotEqual %166 %185 %39
%187 = OpGroupNonUniformAny %166 %58 %186
%188 = OpSelect %7 %187 %60 %39
%189 = OpLoad %24 %26
%190 = OpImageTexelPointer %61 %26 %39 %39
%191 = OpAtomicIAdd %7 %190 %60 %39 %188
%192 = OpAccessChain %51 %31 %39 %45
%193 = OpLoad %6 %192
%194 = OpBitcast %54 %193
%195 = OpCompositeExtract %7 %194 0
%196 = OpINotEqual %166 %195 %39
%197 = OpGroupNonUniformAll %166 %58 %196
%198 = OpSelect %7 %197 %60 %39
%199 = OpLoad %24 %26
%200 = OpImageTexelPointer %61 %26 %39 %39
%201 = OpAtomicIAdd %7 %200 %60 %39 %198
%202 = OpAccessChain %51 %31 %39 %45
%203 = OpLoad %6 %202
%204 = OpBitcast %54 %203
%205 = OpCompositeExtract %7 %204 0
%206 = OpGroupNonUniformIAdd %7 %58 ExclusiveScan %205
%207 = OpGroupNonUniformBroadcastFirst %7 %58 %206
%208 = OpLoad %24 %26
%209 = OpImageTexelPointer %61 %26 %60 %39
%210 = OpAtomicIAdd %7 %209 %60 %39 %207
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.bindless.local-root-signature.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _25
{
    vec4 _m0;
    uint _m1;
};

vec4 _89;

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _24[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) rayPayloadInEXT _25 payload;

void main()
{
    uint _38 = (SBT._m9.x >> 6u) + 12u;
    vec4 _53 = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u]));
    vec4 _88;
    _88.x = (payload._m0.x + subgroupBroadcastFirst(_53.x)) + subgroupBroadcastFirst(_24[nonuniformEXT(_38)]._m0[0u].x);
    _88.y = subgroupBroadcastFirst(_24[nonuniformEXT(_38)]._m0[0u].y) + (payload._m0.y + subgroupBroadcastFirst(_53.y));
    _88.z = subgroupBroadcastFirst(_24[nonuniformEXT(_38)]._m0[0u].z) + (payload._m0.z + subgroupBroadcastFirst(_53.z));
    _88.w = subgroupBroadcastFirst(_24[nonuniformEXT(_38)]._m0[0u].w) + (payload._m0.w + subgroupBroadcastFirst(_53.w));
    payload._m0 = _88;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint MissKHR %3 "main" %8 %16 %24 %27
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %14 "SBTBlock"
OpName %16 "SBT"
OpName %21 "BindlessCBV"
OpName %25 ""
OpName %27 "payload"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 4
OpDecorate %12 ArrayStride 4
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpMemberDecorate %14 1 Offset 20
OpMemberDecorate %14 2 Offset 48
OpMemberDecorate %14 3 Offset 56
OpMemberDecorate %14 4 Offset 64
OpMemberDecorate %14 5 Offset 72
OpMemberDecorate %14 6 Offset 80
OpMemberDecorate %14 7 Offset 88
OpMemberDecorate %14 8 Offset 96
OpMemberDecorate %14 9 Offset 104
OpMemberDecorate %14 10 Offset 112
OpDecorate %20 ArrayStride 16
OpDecorate %21 Block
OpMemberDecorate %21 0 Offset 0
OpDecorate %24 DescriptorSet 5
OpDecorate %24 Binding 0
OpDecorate %32 NonUniform
OpDecorate %74 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 5
%10 = OpTypeArray %5 %9
%11 = OpConstant %5 6
%12 = OpTypeArray %5 %11
%13 = OpTypeVector %5 2
%14 = OpTypeStruct %10 %12 %13 %13 %13 %13 %13 %13 %13 %13 %13
%15 = OpTypePointer ShaderRecordBufferKHR %14
%16 = OpVariable %15 ShaderRecordBufferKHR
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 4
%19 = OpConstant %5 4096
%20 = OpTypeArray %18 %19
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer Uniform %22
%24 = OpVariable %23 Uniform
%25 = OpTypeStruct %18 %5
%26 = OpTypePointer IncomingRayPayloadKHR %25
%27 = OpVariable %26 IncomingRayPayloadKHR
%28 = OpTypePointer ShaderRecordBufferKHR %10
%30 = OpConstant %5 0
%31 = OpTypePointer Uniform %21
%33 = OpTypePointer ShaderRecordBufferKHR %5
%35 = OpConstant %5 9
%39 = OpConstant %5 12
%43 = OpConstant %5 1
%46 = OpConstant %5 2
%49 = OpConstant %5 3
%51 = OpTypeVector %5 4
%62 = OpTypePointer IncomingRayPayloadKHR %18
%73 = OpTypePointer Uniform %18
%3 = OpFunction %1 None %2
%4 = OpLabel
%89 = OpUndef %18
OpBranch %93
%93 = OpLabel
%29 = OpAccessChain %28 %16 %30
%34 = OpAccessChain %33 %16 %35 %30
%36 = OpLoad %5 %34
%37 = OpShiftRightLogical %5 %36 %11
%38 = OpIAdd %5 %37 %39
%32 = OpAccessChain %31 %24 %38
%40 = OpAccessChain %33 %29 %30
%41 = OpLoad %5 %40
%42 = OpAccessChain %33 %29 %43
%44 = OpLoad %5 %42
%45 = OpAccessChain %33 %29 %46
%47 = OpLoad %5 %45
%48 = OpAccessChain %33 %29 %49
%50 = OpLoad %5 %48
%52 = OpCompositeConstruct %51 %41 %44 %47 %50
%53 = OpBitcast %18 %52
%54 = OpCompositeExtract %17 %53 0
%55 = OpCompositeExtract %17 %53 1
%56 = OpCompositeExtract %17 %53 2
%57 = OpCompositeExtract %17 %53 3
%58 = OpGroupNonUniformBroadcastFirst %17 %49 %54
%59 = OpGroupNonUniformBroadcastFirst %17 %49 %55
%60 = OpGroupNonUniformBroadcastFirst %17 %49 %56
%61 = OpGroupNonUniformBroadcastFirst %17 %49 %57
%63 = OpInBoundsAccessChain %62 %27 %30
%64 = OpLoad %18 %63
%65 = OpCompositeExtract %17 %64 0
%66 = OpFAdd %17 %65 %58
%67 = OpCompositeExtract %17 %64 1
%68 = OpFAdd %17 %67 %59
%69 = OpCompositeExtract %17 %64 2
%70 = OpFAdd %17 %69 %60
%71 = OpCompositeExtract %17 %64 3
%72 = OpFAdd %17 %71 %61
%74 = OpAccessChain %73 %32 %30 %30
%75 = OpLoad %18 %74
%76 = OpCompositeExtract %17 %75 0
%77 = OpCompositeExtract %17 %75 1
%78 = OpCompositeExtract %17 %75 2
%79 = OpCompositeExtract %17 %75 3
%80 = OpGroupNonUniformBroadcastFirst %17 %49 %76
%81 = OpGroupNonUniformBroadcastFirst %17 %49 %77
%82 = OpGroupNonUniformBroadcastFirst %17 %49 %78
%83 = OpGroupNonUniformBroadcastFirst %17 %49 %79
%84 = OpFAdd %17 %66 %80
%85 = OpFAdd %17 %81 %68
%86 = OpFAdd %17 %82 %70
%87 = OpFAdd %17 %83 %72
%88 = OpCompositeInsert %18 %84 %89 0
%90 = OpCompositeInsert %18 %85 %88 1
%91 = OpCompositeInsert %18 %86 %90 2
%92 = OpCompositeInsert %18 %87 %91 3
OpStore %63 %92
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _23_25
{
    vec4 _m0[1024];
} _25;

layout(set = 0, binding = 1, std140) uniform _27_30
{
    vec4 _m0[1024];
} _30[2];

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _12[2];
layout(set = 0, binding = 3) uniform samplerBuffer _16;
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _19;

void main()
{
    uint _55 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _63 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _70 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[1u]).x));
    uint _77 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _82 = imageAtomicAdd(_19, int(0u), subgroupBroadcastFirst(texelFetch(_8, int(1u)).x));
    uint _87 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_8, int(gl_GlobalInvocationID.x)).x));
    uint _94 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_12[0u], int(1u)).x));
    uint _99 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(imageLoad(_19, int(2u)).x));
    uint _105 = imageAtomicAdd(_19, int(1u), uint(subgroupBroadcastFirst(texelFetch(_16, int(0u)).x)));
    uint _107 = imageAtomicAdd(_19, int(0u), gl_WorkGroupID.x);
    uint _110 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _116 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _123 = imageAtomicAdd(_19, int(0u), subgroupAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _130 = imageAtomicAdd(_19, int(0u), subgroupOr(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _139 = imageAtomicAdd(_19, int(0u), uint(subgroupAllEqual(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
    uint _148 = imageAtomicAdd(_19, int(0u), subgroupBallot(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _157 = imageAtomicAdd(_19, int(0u), uint(subgroupAny(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _166 = imageAtomicAdd(_19, int(0u), uint(subgroupAll(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _174 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 177
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %39 %43
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %23 ""
OpName %27 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 3
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %22 ArrayStride 16
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 1
OpDecorate %39 BuiltIn GlobalInvocationId
OpDecorate %43 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 2
%10 = OpTypeArray %6 %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpConstant %5 1024
%21 = OpTypeVector %13 4
%22 = OpTypeArray %21 %20
%23 = OpTypeStruct %22
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypeArray %21 %20
%27 = OpTypeStruct %26
%28 = OpTypeArray %27 %9
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%34 = OpTypePointer Uniform %27
%36 = OpConstant %5 0
%37 = OpTypeVector %5 3
%38 = OpTypePointer Input %37
%39 = OpVariable %38 Input
%40 = OpTypePointer Input %5
%43 = OpVariable %38 Input
%46 = OpConstant %5 1
%47 = OpTypePointer Uniform %21
%50 = OpTypeVector %5 4
%53 = OpTypePointer Image %5
%61 = OpConstant %5 3
%135 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %175
%175 = OpLabel
%31 = OpLoad %17 %19
%32 = OpLoad %14 %16
%33 = OpLoad %6 %8
%35 = OpAccessChain %34 %30 %36
%41 = OpAccessChain %40 %39 %36
%42 = OpLoad %5 %41
%44 = OpAccessChain %40 %43 %36
%45 = OpLoad %5 %44
%48 = OpAccessChain %47 %25 %36 %46
%49 = OpLoad %21 %48
%51 = OpBitcast %50 %49
%52 = OpCompositeExtract %5 %51 0
%54 = OpImageTexelPointer %53 %19 %36 %36
%55 = OpAtomicIAdd %5 %54 %46 %36 %52
%56 = OpAccessChain %47 %25 %36 %42
%57 = OpLoad %21 %56
%58 = OpBitcast %50 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpGroupNonUniformBroadcastFirst %5 %61 %59
%62 = OpImageTexelPointer %53 %19 %46 %36
%63 = OpAtomicIAdd %5 %62 %46 %36 %60
%64 = OpAccessChain %47 %35 %36 %46
%65 = OpLoad %21 %64
%66 = OpBitcast %50 %65
%67 = OpCompositeExtract %5 %66 0
%68 = OpGroupNonUniformBroadcastFirst %5 %61 %67
%69 = OpImageTexelPointer %53 %19 %46 %36
%70 = OpAtomicIAdd %5 %69 %46 %36 %68
%71 = OpAccessChain %47 %35 %36 %42
%72 = OpLoad %21 %71
%73 = OpBitcast %50 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %61 %74
%76 = OpImageTexelPointer %53 %19 %46 %36
%77 = OpAtomicIAdd %5 %76 %46 %36 %75
%78 = OpImageFetch %50 %33 %46
%79 = OpCompositeExtract %5 %78 0
%80 = OpGroupNonUniformBroadcastFirst %5 %61 %79
%81 = OpImageTexelPointer %53 %19 %36 %36
%82 = OpAtomicIAdd %5 %81 %46 %36 %80
%83 = OpImageFetch %50 %33 %42
%84 = OpCompositeExtract %5 %83 0
%85 = OpGroupNonUniformBroadcastFirst %5 %61 %84
%86 = OpImageTexelPointer %53 %19 %46 %36
%87 = OpAtomicIAdd %5 %86 %46 %36 %85
%88 = OpAccessChain %7 %12 %36
%89 = OpLoad %6 %88
%90 = OpImageFetch %50 %89 %46
%91 = OpCompositeExtract %5 %90 0
%92 = OpGroupNonUniformBroadcastFirst %5 %61 %91
%93 = OpImageTexelPointer %53 %19 %46 %36
%94 = OpAtomicIAdd %5 %93 %46 %36 %92
%95 = OpImageRead %50 %31 %9
%96 = OpCompositeExtract %5 %95 0
%97 = OpGroupNonUniformBroadcastFirst %5 %61 %96
%98 = OpImageTexelPointer %53 %19 %46 %36
%99 = OpAtomicIAdd %5 %98 %46 %36 %97
%100 = OpImageFetch %21 %32 %36
%101 = OpCompositeExtract %13 %100 0
%102 = OpGroupNonUniformBroadcastFirst %13 %61 %101
%103 = OpConvertFToU %5 %102
%104 = OpImageTexelPointer %53 %19 %46 %36
%105 = OpAtomicIAdd %5 %104 %46 %36 %103
%106 = OpImageTexelPointer %53 %19 %36 %36
%107 = OpAtomicIAdd %5 %106 %46 %36 %45
%108 = OpGroupNonUniformBroadcastFirst %5 %61 %42
%109 = OpImageTexelPointer %53 %19 %46 %36
%110 = OpAtomicIAdd %5 %109 %46 %36 %108
%111 = OpAccessChain %47 %25 %36 %46
%112 = OpLoad %21 %111
%113 = OpBitcast %50 %112
%114 = OpCompositeExtract %5 %113 0
%115 = OpImageTexelPointer %53 %19 %36 %36
%116 = OpAtomicIAdd %5 %115 %46 %36 %114
%117 = OpAccessChain %47 %25 %36 %42
%118 = OpLoad %21 %117
%119 = OpBitcast %50 %118
%120 = OpCompositeExtract %5 %119 0
%121 = OpGroupNonUniformIAdd %5 %61 Reduce %120
%122 = OpImageTexelPointer %53 %19 %36 %36
%123 = OpAtomicIAdd %5 %122 %46 %36 %121
%124 = OpAccessChain %47 %25 %36 %42
%125 = OpLoad %21 %124
%126 = OpBitcast %50 %125
%127 = OpCompositeExtract %5 %126 0
%128 = OpGroupNonUniformBitwiseOr %5 %61 Reduce %127
%129 = OpImageTexelPointer %53 %19 %36 %36
%130 = OpAtomicIAdd %5 %129 %46 %36 %128
%131 = OpAccessChain %47 %25 %36 %42
%132 = OpLoad %21 %131
%133 = OpBitcast %50 %132
%134 = OpCompositeExtract %5 %133 0
%136 = OpGroupNonUniformAllEqual %135 %61 %134
%137 = OpSelect %5 %136 %46 %36
%138 = OpImageTexelPointer %53 %19 %36 %36
%139 = OpAtomicIAdd %5 %138 %46 %36 %137
%140 = OpAccessChain %47 %25 %36 %42
%141 = OpLoad %21 %140
%142 = OpBitcast %50 %141
%143 = OpCompositeExtract %5 %142 0
%144 = OpINotEqual %135 %143 %36
%145 = OpGroupNonUniformBallot %50 %61 %144
%146 = OpCompositeExtract %5 %145 0
%147 = OpImageTexelPointer %53 %19 %36 %36
%148 = OpAtomicIAdd %5 %147 %46 %36 %146
%149 = OpAccessChain %47 %25 %36 %42
%150 = OpLoad %21 %149
%151 = OpBitcast %50 %150
%152 = OpCompositeExtract %5 %151 0
%153 = OpINotEqual %135 %152 %36
%154 = OpGroupNonUniformAny %135 %61 %153
%155 = OpSelect %5 %154 %46 %36
%156 = OpImageTexelPointer %53 %19 %36 %36
%157 = OpAtomicIAdd %5 %156 %46 %36 %155
%158 = OpAccessChain %47 %25 %36 %42
%159 = OpLoad %21 %158
%160 = OpBitcast %50 %159
%161 = OpCompositeExtract %5 %160 0
%162 = OpINotEqual %135 %161 %36
%163 = OpGroupNonUniformAll %135 %61 %162
%164 = OpSelect %5 %163 %46 %36
%165 = OpImageTexelPointer %53 %19 %36 %36
%166 = OpAtomicIAdd %5 %165 %46 %36 %164
%167 = OpAccessChain %47 %25 %36 %42
%168 = OpLoad %21 %167
%169 = OpBitcast %50 %168
%170 = OpCompositeExtract %5 %169 0
%171 = OpGroupNonUniformIAdd %5 %61 ExclusiveScan %170
%172 = OpGroupNonUniformBroadcastFirst %5 %61 %171
%173 = OpImageTexelPointer %53 %19 %46 %36
%174 = OpAtomicIAdd %5 %173 %46 %36 %172
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.no-legacy-cbuf-layout.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _23_25
{
    vec4 _m0[1024];
} _25;

layout(set = 0, binding = 1, std140) uniform _27_30
{
    vec4 _m0[1024];
} _30[2];

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _12[2];
layout(set = 0, binding = 3) uniform samplerBuffer _16;
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _19;

void main()
{
    uint _55 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _63 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _70 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[1u]).x));
    uint _77 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _82 = imageAtomicAdd(_19, int(0u), subgroupBroadcastFirst(texelFetch(_8, int(1u)).x));
    uint _87 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_8, int(gl_GlobalInvocationID.x)).x));
    uint _94 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_12[0u], int(1u)).x));
    uint _99 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(imageLoad(_19, int(2u)).x));
    uint _105 = imageAtomicAdd(_19, int(1u), uint(subgroupBroadcastFirst(texelFetch(_16, int(0u)).x)));
    uint _107 = imageAtomicAdd(_19, int(0u), gl_WorkGroupID.x);
    uint _110 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _116 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _123 = imageAtomicAdd(_19, int(0u), subgroupAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _130 = imageAtomicAdd(_19, int(0u), subgroupOr(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _139 = imageAtomicAdd(_19, int(0u), uint(subgroupAllEqual(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
    uint _148 = imageAtomicAdd(_19, int(0u), subgroupBallot(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _157 = imageAtomicAdd(_19, int(0u), uint(subgroupAny(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _166 = imageAtomicAdd(_19, int(0u), uint(subgroupAll(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _174 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 177
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %39 %43
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %23 ""
OpName %27 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 3
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %22 ArrayStride 16
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 1
OpDecorate %39 BuiltIn GlobalInvocationId
OpDecorate %43 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 2
%10 = OpTypeArray %6 %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpConstant %5 1024
%21 = OpTypeVector %13 4
%22 = OpTypeArray %21 %20
%23 = OpTypeStruct %22
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypeArray %21 %20
%27 = OpTypeStruct %26
%28 = OpTypeArray %27 %9
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%34 = OpTypePointer Uniform %27
%36 = OpConstant %5 0
%37 = OpTypeVector %5 3
%38 = OpTypePointer Input %37
%39 = OpVariable %38 Input
%40 = OpTypePointer Input %5
%43 = OpVariable %38 Input
%46 = OpConstant %5 1
%47 = OpTypePointer Uniform %21
%50 = OpTypeVector %5 4
%53 = OpTypePointer Image %5
%61 = OpConstant %5 3
%135 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %175
%175 = OpLabel
%31 = OpLoad %17 %19
%32 = OpLoad %14 %16
%33 = OpLoad %6 %8
%35 = OpAccessChain %34 %30 %36
%41 = OpAccessChain %40 %39 %36
%42 = OpLoad %5 %41
%44 = OpAccessChain %40 %43 %36
%45 = OpLoad %5 %44
%48 = OpAccessChain %47 %25 %36 %46
%49 = OpLoad %21 %48
%51 = OpBitcast %50 %49
%52 = OpCompositeExtract %5 %51 0
%54 = OpImageTexelPointer %53 %19 %36 %36
%55 = OpAtomicIAdd %5 %54 %46 %36 %52
%56 = OpAccessChain %47 %25 %36 %42
%57 = OpLoad %21 %56
%58 = OpBitcast %50 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpGroupNonUniformBroadcastFirst %5 %61 %59
%62 = OpImageTexelPointer %53 %19 %46 %36
%63 = OpAtomicIAdd %5 %62 %46 %36 %60
%64 = OpAccessChain %47 %35 %36 %46
%65 = OpLoad %21 %64
%66 = OpBitcast %50 %65
%67 = OpCompositeExtract %5 %66 0
%68 = OpGroupNonUniformBroadcastFirst %5 %61 %67
%69 = OpImageTexelPointer %53 %19 %46 %36
%70 = OpAtomicIAdd %5 %69 %46 %36 %68
%71 = OpAccessChain %47 %35 %36 %42
%72 = OpLoad %21 %71
%73 = OpBitcast %50 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %61 %74
%76 = OpImageTexelPointer %53 %19 %46 %36
%77 = OpAtomicIAdd %5 %76 %46 %36 %75
%78 = OpImageFetch %50 %33 %46
%79 = OpCompositeExtract %5 %78 0
%80 = OpGroupNonUniformBroadcastFirst %5 %61 %79
%81 = OpImageTexelPointer %53 %19 %36 %36
%82 = OpAtomicIAdd %5 %81 %46 %36 %80
%83 = OpImageFetch %50 %33 %42
%84 = OpCompositeExtract %5 %83 0
%85 = OpGroupNonUniformBroadcastFirst %5 %61 %84
%86 = OpImageTexelPointer %53 %19 %46 %36
%87 = OpAtomicIAdd %5 %86 %46 %36 %85
%88 = OpAccessChain %7 %12 %36
%89 = OpLoad %6 %88
%90 = OpImageFetch %50 %89 %46
%91 = OpCompositeExtract %5 %90 0
%92 = OpGroupNonUniformBroadcastFirst %5 %61 %91
%93 = OpImageTexelPointer %53 %19 %46 %36
%94 = OpAtomicIAdd %5 %93 %46 %36 %92
%95 = OpImageRead %50 %31 %9
%96 = OpCompositeExtract %5 %95 0
%97 = OpGroupNonUniformBroadcastFirst %5 %61 %96
%98 = OpImageTexelPointer %53 %19 %46 %36
%99 = OpAtomicIAdd %5 %98 %46 %36 %97
%100 = OpImageFetch %21 %32 %36
%101 = OpCompositeExtract %13 %100 0
%102 = OpGroupNonUniformBroadcastFirst %13 %61 %101
%103 = OpConvertFToU %5 %102
%104 = OpImageTexelPointer %53 %19 %46 %36
%105 = OpAtomicIAdd %5 %104 %46 %36 %103
%106 = OpImageTexelPointer %53 %19 %36 %36
%107 = OpAtomicIAdd %5 %106 %46 %36 %45
%108 = OpGroupNonUniformBroadcastFirst %5 %61 %42
%109 = OpImageTexelPointer %53 %19 %46 %36
%110 = OpAtomicIAdd %5 %109 %46 %36 %108
%111 = OpAccessChain %47 %25 %36 %46
%112 = OpLoad %21 %111
%113 = OpBitcast %50 %112
%114 = OpCompositeExtract %5 %113 0
%115 = OpImageTexelPointer %53 %19 %36 %36
%116 = OpAtomicIAdd %5 %115 %46 %36 %114
%117 = OpAccessChain %47 %25 %36 %42
%118 = OpLoad %21 %117
%119 = OpBitcast %50 %118
%120 = OpCompositeExtract %5 %119 0
%121 = OpGroupNonUniformIAdd %5 %61 Reduce %120
%122 = OpImageTexelPointer %53 %19 %36 %36
%123 = OpAtomicIAdd %5 %122 %46 %36 %121
%124 = OpAccessChain %47 %25 %36 %42
%125 = OpLoad %21 %124
%126 = OpBitcast %50 %125
%127 = OpCompositeExtract %5 %126 0
%128 = OpGroupNonUniformBitwiseOr %5 %61 Reduce %127
%129 = OpImageTexelPointer %53 %19 %36 %36
%130 = OpAtomicIAdd %5 %129 %46 %36 %128
%131 = OpAccessChain %47 %25 %36 %42
%132 = OpLoad %21 %131
%133 = OpBitcast %50 %132
%134 = OpCompositeExtract %5 %133 0
%136 = OpGroupNonUniformAllEqual %135 %61 %134
%137 = OpSelect %5 %136 %46 %36
%138 = OpImageTexelPointer %53 %19 %36 %36
%139 = OpAtomicIAdd %5 %138 %46 %36 %137
%140 = OpAccessChain %47 %25 %36 %42
%141 = OpLoad %21 %140
%142 = OpBitcast %50 %141
%143 = OpCompositeExtract %5 %142 0
%144 = OpINotEqual %135 %143 %36
%145 = OpGroupNonUniformBallot %50 %61 %144
%146 = OpCompositeExtract %5 %145 0
%147 = OpImageTexelPointer %53 %19 %36 %36
%148 = OpAtomicIAdd %5 %147 %46 %36 %146
%149 = OpAccessChain %47 %25 %36 %42
%150 = OpLoad %21 %149
%151 = OpBitcast %50 %150
%152 = OpCompositeExtract %5 %151 0
%153 = OpINotEqual %135 %152 %36
%154 = OpGroupNonUniformAny %135 %61 %153
%155 = OpSelect %5 %154 %46 %36
%156 = OpImageTexelPointer %53 %19 %36 %36
%157 = OpAtomicIAdd %5 %156 %46 %36 %155
%158 = OpAccessChain %47 %25 %36 %42
%159 = OpLoad %21 %158
%160 = OpBitcast %50 %159
%161 = OpCompositeExtract %5 %160 0
%162 = OpINotEqual %135 %161 %36
%163 = OpGroupNonUniformAll %135 %61 %162
%164 = OpSelect %5 %163 %46 %36
%165 = OpImageTexelPointer %53 %19 %36 %36
%166 = OpAtomicIAdd %5 %165 %46 %36 %164
%167 = OpAccessChain %47 %25 %36 %42
%168 = OpLoad %21 %167
%169 = OpBitcast %50 %168
%170 = OpCompositeExtract %5 %169 0
%171 = OpGroupNonUniformIAdd %5 %61 ExclusiveScan %170
%172 = OpGroupNonUniformBroadcastFirst %5 %61 %171
%173 = OpImageTexelPointer %53 %19 %46 %36
%174 = OpAtomicIAdd %5 %173 %46 %36 %172
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.no-legacy-cbuf-layout.sm60.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _23_25
{
    vec4 _m0[1024];
} _25;

layout(set = 0, binding = 1, std140) uniform _27_30
{
    vec4 _m0[1024];
} _30[2];

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _12[2];
layout(set = 0, binding = 3) uniform samplerBuffer _16;
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _19;

void main()
{
    uint _55 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _63 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _70 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[1u]).x));
    uint _77 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _82 = imageAtomicAdd(_19, int(0u), subgroupBroadcastFirst(texelFetch(_8, int(1u)).x));
    uint _87 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_8, int(gl_GlobalInvocationID.x)).x));
    uint _94 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_12[0u], int(1u)).x));
    uint _99 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(imageLoad(_19, int(2u)).x));
    uint _105 = imageAtomicAdd(_19, int(1u), uint(subgroupBroadcastFirst(texelFetch(_16, int(0u)).x)));
    uint _107 = imageAtomicAdd(_19, int(0u), gl_WorkGroupID.x);
    uint _110 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _116 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _123 = imageAtomicAdd(_19, int(0u), subgroupAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _130 = imageAtomicAdd(_19, int(0u), subgroupOr(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _139 = imageAtomicAdd(_19, int(0u), uint(subgroupAllEqual(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
    uint _148 = imageAtomicAdd(_19, int(0u), subgroupBallot(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _157 = imageAtomicAdd(_19, int(0u), uint(subgroupAny(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _166 = imageAtomicAdd(_19, int(0u), uint(subgroupAll(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _174 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 177
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %39 %43
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %23 ""
OpName %27 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 3
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %22 ArrayStride 16
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 1
OpDecorate %39 BuiltIn GlobalInvocationId
OpDecorate %43 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 2
%10 = OpTypeArray %6 %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpConstant %5 1024
%21 = OpTypeVector %13 4
%22 = OpTypeArray %21 %20
%23 = OpTypeStruct %22
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypeArray %21 %20
%27 = OpTypeStruct %26
%28 = OpTypeArray %27 %9
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%34 = OpTypePointer Uniform %27
%36 = OpConstant %5 0
%37 = OpTypeVector %5 3
%38 = OpTypePointer Input %37
%39 = OpVariable %38 Input
%40 = OpTypePointer Input %5
%43 = OpVariable %38 Input
%46 = OpConstant %5 1
%47 = OpTypePointer Uniform %21
%50 = OpTypeVector %5 4
%53 = OpTypePointer Image %5
%61 = OpConstant %5 3
%135 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %175
%175 = OpLabel
%31 = OpLoad %17 %19
%32 = OpLoad %14 %16
%33 = OpLoad %6 %8
%35 = OpAccessChain %34 %30 %36
%41 = OpAccessChain %40 %39 %36
%42 = OpLoad %5 %41
%44 = OpAccessChain %40 %43 %36
%45 = OpLoad %5 %44
%48 = OpAccessChain %47 %25 %36 %46
%49 = OpLoad %21 %48
%51 = OpBitcast %50 %49
%52 = OpCompositeExtract %5 %51 0
%54 = OpImageTexelPointer %53 %19 %36 %36
%55 = OpAtomicIAdd %5 %54 %46 %36 %52
%56 = OpAccessChain %47 %25 %36 %42
%57 = OpLoad %21 %56
%58 = OpBitcast %50 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpGroupNonUniformBroadcastFirst %5 %61 %59
%62 = OpImageTexelPointer %53 %19 %46 %36
%63 = OpAtomicIAdd %5 %62 %46 %36 %60
%64 = OpAccessChain %47 %35 %36 %46
%65 = OpLoad %21 %64
%66 = OpBitcast %50 %65
%67 = OpCompositeExtract %5 %66 0
%68 = OpGroupNonUniformBroadcastFirst %5 %61 %67
%69 = OpImageTexelPointer %53 %19 %46 %36
%70 = OpAtomicIAdd %5 %69 %46 %36 %68
%71 = OpAccessChain %47 %35 %36 %42
%72 = OpLoad %21 %71
%73 = OpBitcast %50 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %61 %74
%76 = OpImageTexelPointer %53 %19 %46 %36
%77 = OpAtomicIAdd %5 %76 %46 %36 %75
%78 = OpImageFetch %50 %33 %46
%79 = OpCompositeExtract %5 %78 0
%80 = OpGroupNonUniformBroadcastFirst %5 %61 %79
%81 = OpImageTexelPointer %53 %19 %36 %36
%82 = OpAtomicIAdd %5 %81 %46 %36 %80
%83 = OpImageFetch %50 %33 %42
%84 = OpCompositeExtract %5 %83 0
%85 = OpGroupNonUniformBroadcastFirst %5 %61 %84
%86 = OpImageTexelPointer %53 %19 %46 %36
%87 = OpAtomicIAdd %5 %86 %46 %36 %85
%88 = OpAccessChain %7 %12 %36
%89 = OpLoad %6 %88
%90 = OpImageFetch %50 %89 %46
%91 = OpCompositeExtract %5 %90 0
%92 = OpGroupNonUniformBroadcastFirst %5 %61 %91
%93 = OpImageTexelPointer %53 %19 %46 %36
%94 = OpAtomicIAdd %5 %93 %46 %36 %92
%95 = OpImageRead %50 %31 %9
%96 = OpCompositeExtract %5 %95 0
%97 = OpGroupNonUniformBroadcastFirst %5 %61 %96
%98 = OpImageTexelPointer %53 %19 %46 %36
%99 = OpAtomicIAdd %5 %98 %46 %36 %97
%100 = OpImageFetch %21 %32 %36
%101 = OpCompositeExtract %13 %100 0
%102 = OpGroupNonUniformBroadcastFirst %13 %61 %101
%103 = OpConvertFToU %5 %102
%104 = OpImageTexelPointer %53 %19 %46 %36
%105 = OpAtomicIAdd %5 %104 %46 %36 %103
%106 = OpImageTexelPointer %53 %19 %36 %36
%107 = OpAtomicIAdd %5 %106 %46 %36 %45
%108 = OpGroupNonUniformBroadcastFirst %5 %61 %42
%109 = OpImageTexelPointer %53 %19 %46 %36
%110 = OpAtomicIAdd %5 %109 %46 %36 %108
%111 = OpAccessChain %47 %25 %36 %46
%112 = OpLoad %21 %111
%113 = OpBitcast %50 %112
%114 = OpCompositeExtract %5 %113 0
%115 = OpImageTexelPointer %53 %19 %36 %36
%116 = OpAtomicIAdd %5 %115 %46 %36 %114
%117 = OpAccessChain %47 %25 %36 %42
%118 = OpLoad %21 %117
%119 = OpBitcast %50 %118
%120 = OpCompositeExtract %5 %119 0
%121 = OpGroupNonUniformIAdd %5 %61 Reduce %120
%122 = OpImageTexelPointer %53 %19 %36 %36
%123 = OpAtomicIAdd %5 %122 %46 %36 %121
%124 = OpAccessChain %47 %25 %36 %42
%125 = OpLoad %21 %124
%126 = OpBitcast %50 %125
%127 = OpCompositeExtract %5 %126 0
%128 = OpGroupNonUniformBitwiseOr %5 %61 Reduce %127
%129 = OpImageTexelPointer %53 %19 %36 %36
%130 = OpAtomicIAdd %5 %129 %46 %36 %128
%131 = OpAccessChain %47 %25 %36 %42
%132 = OpLoad %21 %131
%133 = OpBitcast %50 %132
%134 = OpCompositeExtract %5 %133 0
%136 = OpGroupNonUniformAllEqual %135 %61 %134
%137 = OpSelect %5 %136 %46 %36
%138 = OpImageTexelPointer %53 %19 %36 %36
%139 = OpAtomicIAdd %5 %138 %46 %36 %137
%140 = OpAccessChain %47 %25 %36 %42
%141 = OpLoad %21 %140
%142 = OpBitcast %50 %141
%143 = OpCompositeExtract %5 %142 0
%144 = OpINotEqual %135 %143 %36
%145 = OpGroupNonUniformBallot %50 %61 %144
%146 = OpCompositeExtract %5 %145 0
%147 = OpImageTexelPointer %53 %19 %36 %36
%148 = OpAtomicIAdd %5 %147 %46 %36 %146
%149 = OpAccessChain %47 %25 %36 %42
%150 = OpLoad %21 %149
%151 = OpBitcast %50 %150
%152 = OpCompositeExtract %5 %151 0
%153 = OpINotEqual %135 %152 %36
%154 = OpGroupNonUniformAny %135 %61 %153
%155 = OpSelect %5 %154 %46 %36
%156 = OpImageTexelPointer %53 %19 %36 %36
%157 = OpAtomicIAdd %5 %156 %46 %36 %155
%158 = OpAccessChain %47 %25 %36 %42
%159 = OpLoad %21 %158
%160 = OpBitcast %50 %159
%161 = OpCompositeExtract %5 %160 0
%162 = OpINotEqual %135 %161 %36
%163 = OpGroupNonUniformAll %135 %61 %162
%164 = OpSelect %5 %163 %46 %36
%165 = OpImageTexelPointer %53 %19 %36 %36
%166 = OpAtomicIAdd %5 %165 %46 %36 %164
%167 = OpAccessChain %47 %25 %36 %42
%168 = OpLoad %21 %167
%169 = OpBitcast %50 %168
%170 = OpCompositeExtract %5 %169 0
%171 = OpGroupNonUniformIAdd %5 %61 ExclusiveScan %170
%172 = OpGroupNonUniformBroadcastFirst %5 %61 %171
%173 = OpImageTexelPointer %53 %19 %46 %36
%174 = OpAtomicIAdd %5 %173 %46 %36 %172
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.sm60.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _23_25
{
    vec4 _m0[1024];
} _25;

layout(set = 0, binding = 1, std140) uniform _27_30
{
    vec4 _m0[1024];
} _30[2];

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _12[2];
layout(set = 0, binding = 3) uniform samplerBuffer _16;
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _19;

void main()
{
    uint _55 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _63 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _70 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[1u]).x));
    uint _77 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _82 = imageAtomicAdd(_19, int(0u), subgroupBroadcastFirst(texelFetch(_8, int(1u)).x));
    uint _87 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_8, int(gl_GlobalInvocationID.x)).x));
    uint _94 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_12[0u], int(1u)).x));
    uint _99 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(imageLoad(_19, int(2u)).x));
    uint _105 = imageAtomicAdd(_19, int(1u), uint(subgroupBroadcastFirst(texelFetch(_16, int(0u)).x)));
    uint _107 = imageAtomicAdd(_19, int(0u), gl_WorkGroupID.x);
    uint _110 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _116 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _123 = imageAtomicAdd(_19, int(0u), subgroupAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _130 = imageAtomicAdd(_19, int(0u), subgroupOr(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _139 = imageAtomicAdd(_19, int(0u), uint(subgroupAllEqual(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
    uint _148 = imageAtomicAdd(_19, int(0u), subgroupBallot(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _157 = imageAtomicAdd(_19, int(0u), uint(subgroupAny(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _166 = imageAtomicAdd(_19, int(0u), uint(subgroupAll(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _174 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 177
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %39 %43
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %23 ""
OpName %27 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 3
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %22 ArrayStride 16
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 1
OpDecorate %39 BuiltIn GlobalInvocationId
OpDecorate %43 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 2
%10 = OpTypeArray %6 %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpConstant %5 1024
%21 = OpTypeVector %13 4
%22 = OpTypeArray %21 %20
%23 = OpTypeStruct %22
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypeArray %21 %20
%27 = OpTypeStruct %26
%28 = OpTypeArray %27 %9
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%34 = OpTypePointer Uniform %27
%36 = OpConstant %5 0
%37 = OpTypeVector %5 3
%38 = OpTypePointer Input %37
%39 = OpVariable %38 Input
%40 = OpTypePointer Input %5
%43 = OpVariable %38 Input
%46 = OpConstant %5 1
%47 = OpTypePointer Uniform %21
%50 = OpTypeVector %5 4
%53 = OpTypePointer Image %5
%61 = OpConstant %5 3
%135 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %175
%175 = OpLabel
%31 = OpLoad %17 %19
%32 = OpLoad %14 %16
%33 = OpLoad %6 %8
%35 = OpAccessChain %34 %30 %36
%41 = OpAccessChain %40 %39 %36
%42 = OpLoad %5 %41
%44 = OpAccessChain %40 %43 %36
%45 = OpLoad %5 %44
%48 = OpAccessChain %47 %25 %36 %46
%49 = OpLoad %21 %48
%51 = OpBitcast %50 %49
%52 = OpCompositeExtract %5 %51 0
%54 = OpImageTexelPointer %53 %19 %36 %36
%55 = OpAtomicIAdd %5 %54 %46 %36 %52
%56 = OpAccessChain %47 %25 %36 %42
%57 = OpLoad %21 %56
%58 = OpBitcast %50 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpGroupNonUniformBroadcastFirst %5 %61 %59
%62 = OpImageTexelPointer %53 %19 %46 %36
%63 = OpAtomicIAdd %5 %62 %46 %36 %60
%64 = OpAccessChain %47 %35 %36 %46
%65 = OpLoad %21 %64
%66 = OpBitcast %50 %65
%67 = OpCompositeExtract %5 %66 0
%68 = OpGroupNonUniformBroadcastFirst %5 %61 %67
%69 = OpImageTexelPointer %53 %19 %46 %36
%70 = OpAtomicIAdd %5 %69 %46 %36 %68
%71 = OpAccessChain %47 %35 %36 %42
%72 = OpLoad %21 %71
%73 = OpBitcast %50 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %61 %74
%76 = OpImageTexelPointer %53 %19 %46 %36
%77 = OpAtomicIAdd %5 %76 %46 %36 %75
%78 = OpImageFetch %50 %33 %46
%79 = OpCompositeExtract %5 %78 0
%80 = OpGroupNonUniformBroadcastFirst %5 %61 %79
%81 = OpImageTexelPointer %53 %19 %36 %36
%82 = OpAtomicIAdd %5 %81 %46 %36 %80
%83 = OpImageFetch %50 %33 %42
%84 = OpCompositeExtract %5 %83 0
%85 = OpGroupNonUniformBroadcastFirst %5 %61 %84
%86 = OpImageTexelPointer %53 %19 %46 %36
%87 = OpAtomicIAdd %5 %86 %46 %36 %85
%88 = OpAccessChain %7 %12 %36
%89 = OpLoad %6 %88
%90 = OpImageFetch %50 %89 %46
%91 = OpCompositeExtract %5 %90 0
%92 = OpGroupNonUniformBroadcastFirst %5 %61 %91
%93 = OpImageTexelPointer %53 %19 %46 %36
%94 = OpAtomicIAdd %5 %93 %46 %36 %92
%95 = OpImageRead %50 %31 %9
%96 = OpCompositeExtract %5 %95 0
%97 = OpGroupNonUniformBroadcastFirst %5 %61 %96
%98 = OpImageTexelPointer %53 %19 %46 %36
%99 = OpAtomicIAdd %5 %98 %46 %36 %97
%100 = OpImageFetch %21 %32 %36
%101 = OpCompositeExtract %13 %100 0
%102 = OpGroupNonUniformBroadcastFirst %13 %61 %101
%103 = OpConvertFToU %5 %102
%104 = OpImageTexelPointer %53 %19 %46 %36
%105 = OpAtomicIAdd %5 %104 %46 %36 %103
%106 = OpImageTexelPointer %53 %19 %36 %36
%107 = OpAtomicIAdd %5 %106 %46 %36 %45
%108 = OpGroupNonUniformBroadcastFirst %5 %61 %42
%109 = OpImageTexelPointer %53 %19 %46 %36
%110 = OpAtomicIAdd %5 %109 %46 %36 %108
%111 = OpAccessChain %47 %25 %36 %46
%112 = OpLoad %21 %111
%113 = OpBitcast %50 %112
%114 = OpCompositeExtract %5 %113 0
%115 = OpImageTexelPointer %53 %19 %36 %36
%116 = OpAtomicIAdd %5 %115 %46 %36 %114
%117 = OpAccessChain %47 %25 %36 %42
%118 = OpLoad %21 %117
%119 = OpBitcast %50 %118
%120 = OpCompositeExtract %5 %119 0
%121 = OpGroupNonUniformIAdd %5 %61 Reduce %120
%122 = OpImageTexelPointer %53 %19 %36 %36
%123 = OpAtomicIAdd %5 %122 %46 %36 %121
%124 = OpAccessChain %47 %25 %36 %42
%125 = OpLoad %21 %124
%126 = OpBitcast %50 %125
%127 = OpCompositeExtract %5 %126 0
%128 = OpGroupNonUniformBitwiseOr %5 %61 Reduce %127
%129 = OpImageTexelPointer %53 %19 %36 %36
%130 = OpAtomicIAdd %5 %129 %46 %36 %128
%131 = OpAccessChain %47 %25 %36 %42
%132 = OpLoad %21 %131
%133 = OpBitcast %50 %132
%134 = OpCompositeExtract %5 %133 0
%136 = OpGroupNonUniformAllEqual %135 %61 %134
%137 = OpSelect %5 %136 %46 %36
%138 = OpImageTexelPointer %53 %19 %36 %36
%139 = OpAtomicIAdd %5 %138 %46 %36 %137
%140 = OpAccessChain %47 %25 %36 %42
%141 = OpLoad %21 %140
%142 = OpBitcast %50 %141
%143 = OpCompositeExtract %5 %142 0
%144 = OpINotEqual %135 %143 %36
%145 = OpGroupNonUniformBallot %50 %61 %144
%146 = OpCompositeExtract %5 %145 0
%147 = OpImageTexelPointer %53 %19 %36 %36
%148 = OpAtomicIAdd %5 %147 %46 %36 %146
%149 = OpAccessChain %47 %25 %36 %42
%150 = OpLoad %21 %149
%151 = OpBitcast %50 %150
%152 = OpCompositeExtract %5 %151 0
%153 = OpINotEqual %135 %152 %36
%154 = OpGroupNonUniformAny %135 %61 %153
%155 = OpSelect %5 %154 %46 %36
%156 = OpImageTexelPointer %53 %19 %36 %36
%157 = OpAtomicIAdd %5 %156 %46 %36 %155
%158 = OpAccessChain %47 %25 %36 %42
%159 = OpLoad %21 %158
%160 = OpBitcast %50 %159
%161 = OpCompositeExtract %5 %160 0
%162 = OpINotEqual %135 %161 %36
%163 = OpGroupNonUniformAll %135 %61 %162
%164 = OpSelect %5 %163 %46 %36
%165 = OpImageTexelPointer %53 %19 %36 %36
%166 = OpAtomicIAdd %5 %165 %46 %36 %164
%167 = OpAccessChain %47 %25 %36 %42
%168 = OpLoad %21 %167
%169 = OpBitcast %50 %168
%170 = OpCompositeExtract %5 %169 0
%171 = OpGroupNonUniformIAdd %5 %61 ExclusiveScan %170
%172 = OpGroupNonUniformBroadcastFirst %5 %61 %171
%173 = OpImageTexelPointer %53 %19 %46 %36
%174 = OpAtomicIAdd %5 %173 %46 %36 %172
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.sm66.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std140) uniform _23_25
{
    vec4 _m0[1024];
} _25;

layout(set = 0, binding = 1, std140) uniform _27_30
{
    vec4 _m0[1024];
} _30[2];

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _12[2];
layout(set = 0, binding = 3) uniform samplerBuffer _16;
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _19;

void main()
{
    uint _53 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _62 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _70 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[1u]).x));
    uint _78 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(floatBitsToUint(_30[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _85 = imageAtomicAdd(_19, int(0u), subgroupBroadcastFirst(texelFetch(_8, int(1u)).x));
    uint _92 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_8, int(gl_GlobalInvocationID.x)).x));
    uint _100 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(texelFetch(_12[0u], int(1u)).x));
    uint _107 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(imageLoad(_19, int(2u)).x));
    uint _115 = imageAtomicAdd(_19, int(1u), uint(subgroupBroadcastFirst(texelFetch(_16, int(0u)).x)));
    uint _118 = imageAtomicAdd(_19, int(0u), gl_WorkGroupID.x);
    uint _122 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _129 = imageAtomicAdd(_19, int(0u), floatBitsToUint(_25._m0[1u]).x);
    uint _137 = imageAtomicAdd(_19, int(0u), subgroupAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _145 = imageAtomicAdd(_19, int(0u), subgroupOr(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x));
    uint _155 = imageAtomicAdd(_19, int(0u), uint(subgroupAllEqual(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
    uint _165 = imageAtomicAdd(_19, int(0u), subgroupBallot(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _175 = imageAtomicAdd(_19, int(0u), uint(subgroupAny(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _185 = imageAtomicAdd(_19, int(0u), uint(subgroupAll(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _194 = imageAtomicAdd(_19, int(1u), subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_25._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 197
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %36 %40
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %23 ""
OpName %27 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 3
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %22 ArrayStride 16
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 1
OpDecorate %36 BuiltIn GlobalInvocationId
OpDecorate %40 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 2
%10 = OpTypeArray %6 %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpConstant %5 1024
%21 = OpTypeVector %13 4
%22 = OpTypeArray %21 %20
%23 = OpTypeStruct %22
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypeArray %21 %20
%27 = OpTypeStruct %26
%28 = OpTypeArray %27 %9
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%31 = OpTypePointer Uniform %27
%33 = OpConstant %5 0
%34 = OpTypeVector %5 3
%35 = OpTypePointer Input %34
%36 = OpVariable %35 Input
%37 = OpTypePointer Input %5
%40 = OpVariable %35 Input
%43 = OpConstant %5 1
%44 = OpTypePointer Uniform %21
%47 = OpTypeVector %5 4
%51 = OpTypePointer Image %5
%59 = OpConstant %5 3
%150 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %195
%195 = OpLabel
%32 = OpAccessChain %31 %30 %33
%38 = OpAccessChain %37 %36 %33
%39 = OpLoad %5 %38
%41 = OpAccessChain %37 %40 %33
%42 = OpLoad %5 %41
%45 = OpAccessChain %44 %25 %33 %43
%46 = OpLoad %21 %45
%48 = OpBitcast %47 %46
%49 = OpCompositeExtract %5 %48 0
%50 = OpLoad %17 %19
%52 = OpImageTexelPointer %51 %19 %33 %33
%53 = OpAtomicIAdd %5 %52 %43 %33 %49
%54 = OpAccessChain %44 %25 %33 %39
%55 = OpLoad %21 %54
%56 = OpBitcast %47 %55
%57 = OpCompositeExtract %5 %56 0
%58 = OpGroupNonUniformBroadcastFirst %5 %59 %57
%60 = OpLoad %17 %19
%61 = OpImageTexelPointer %51 %19 %43 %33
%62 = OpAtomicIAdd %5 %61 %43 %33 %58
%63 = OpAccessChain %44 %32 %33 %43
%64 = OpLoad %21 %63
%65 = OpBitcast %47 %64
%66 = OpCompositeExtract %5 %65 0
%67 = OpGroupNonUniformBroadcastFirst %5 %59 %66
%68 = OpLoad %17 %19
%69 = OpImageTexelPointer %51 %19 %43 %33
%70 = OpAtomicIAdd %5 %69 %43 %33 %67
%71 = OpAccessChain %44 %32 %33 %39
%72 = OpLoad %21 %71
%73 = OpBitcast %47 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %59 %74
%76 = OpLoad %17 %19
%77 = OpImageTexelPointer %51 %19 %43 %33
%78 = OpAtomicIAdd %5 %77 %43 %33 %75
%79 = OpLoad %6 %8
%80 = OpImageFetch %47 %79 %43
%81 = OpCompositeExtract %5 %80 0
%82 = OpGroupNonUniformBroadcastFirst %5 %59 %81
%83 = OpLoad %17 %19
%84 = OpImageTexelPointer %51 %19 %33 %33
%85 = OpAtomicIAdd %5 %84 %43 %33 %82
%86 = OpLoad %6 %8
%87 = OpImageFetch %47 %86 %39
%88 = OpCompositeExtract %5 %87 0
%89 = OpGroupNonUniformBroadcastFirst %5 %59 %88
%90 = OpLoad %17 %19
%91 = OpImageTexelPointer %51 %19 %43 %33
%92 = OpAtomicIAdd %5 %91 %43 %33 %89
%93 = OpAccessChain %7 %12 %33
%94 = OpLoad %6 %93
%95 = OpImageFetch %47 %94 %43
%96 = OpCompositeExtract %5 %95 0
%97 = OpGroupNonUniformBroadcastFirst %5 %59 %96
%98 = OpLoad %17 %19
%99 = OpImageTexelPointer %51 %19 %43 %33
%100 = OpAtomicIAdd %5 %99 %43 %33 %97
%101 = OpLoad %17 %19
%102 = OpImageRead %47 %101 %9
%103 = OpCompositeExtract %5 %102 0
%104 = OpGroupNonUniformBroadcastFirst %5 %59 %103
%105 = OpLoad %17 %19
%106 = OpImageTexelPointer %51 %19 %43 %33
%107 = OpAtomicIAdd %5 %106 %43 %33 %104
%108 = OpLoad %14 %16
%109 = OpImageFetch %21 %108 %33
%110 = OpCompositeExtract %13 %109 0
%111 = OpGroupNonUniformBroadcastFirst %13 %59 %110
%112 = OpConvertFToU %5 %111
%113 = OpLoad %17 %19
%114 = OpImageTexelPointer %51 %19 %43 %33
%115 = OpAtomicIAdd %5 %114 %43 %33 %112
%116 = OpLoad %17 %19
%117 = OpImageTexelPointer %51 %19 %33 %33
%118 = OpAtomicIAdd %5 %117 %43 %33 %42
%119 = OpGroupNonUniformBroadcastFirst %5 %59 %39
%120 = OpLoad %17 %19
%121 = OpImageTexelPointer %51 %19 %43 %33
%122 = OpAtomicIAdd %5 %121 %43 %33 %119
%123 = OpAccessChain %44 %25 %33 %43
%124 = OpLoad %21 %123
%125 = OpBitcast %47 %124
%126 = OpCompositeExtract %5 %125 0
%127 = OpLoad %17 %19
%128 = OpImageTexelPointer %51 %19 %33 %33
%129 = OpAtomicIAdd %5 %128 %43 %33 %126
%130 = OpAccessChain %44 %25 %33 %39
%131 = OpLoad %21 %130
%132 = OpBitcast %47 %131
%133 = OpCompositeExtract %5 %132 0
%134 = OpGroupNonUniformIAdd %5 %59 Reduce %133
%135 = OpLoad %17 %19
%136 = OpImageTexelPointer %51 %19 %33 %33
%137 = OpAtomicIAdd %5 %136 %43 %33 %134
%138 = OpAccessChain %44 %25 %33 %39
%139 = OpLoad %21 %138
%140 = OpBitcast %47 %139
%141 = OpCompositeExtract %5 %140 0
%142 = OpGroupNonUniformBitwiseOr %5 %59 Reduce %141
%143 = OpLoad %17 %19
%144 = OpImageTexelPointer %51 %19 %33 %33
%145 = OpAtomicIAdd %5 %144 %43 %33 %142
%146 = OpAccessChain %44 %25 %33 %39
%147 = OpLoad %21 %146
%148 = OpBitcast %47 %147
%149 = OpCompositeExtract %5 %148 0
%151 = OpGroupNonUniformAllEqual %150 %59 %149
%152 = OpSelect %5 %151 %43 %33
%153 = OpLoad %17 %19
%154 = OpImageTexelPointer %51 %19 %33 %33
%155 = OpAtomicIAdd %5 %154 %43 %33 %152
%156 = OpAccessChain %44 %25 %33 %39
%157 = OpLoad %21 %156
%158 = OpBitcast %47 %157
%159 = OpCompositeExtract %5 %158 0
%160 = OpINotEqual %150 %159 %33
%161 = OpGroupNonUniformBallot %47 %59 %160
%162 = OpCompositeExtract %5 %161 0
%163 = OpLoad %17 %19
%164 = OpImageTexelPointer %51 %19 %33 %33
%165 = OpAtomicIAdd %5 %164 %43 %33 %162
%166 = OpAccessChain %44 %25 %33 %39
%167 = OpLoad %21 %166
%168 = OpBitcast %47 %167
%169 = OpCompositeExtract %5 %168 0
%170 = OpINotEqual %150 %169 %33
%171 = OpGroupNonUniformAny %150 %59 %170
%172 = OpSelect %5 %171 %43 %33
%173 = OpLoad %17 %19
%174 = OpImageTexelPointer %51 %19 %33 %33
%175 = OpAtomicIAdd %5 %174 %43 %33 %172
%176 = OpAccessChain %44 %25 %33 %39
%177 = OpLoad %21 %176
%178 = OpBitcast %47 %177
%179 = OpCompositeExtract %5 %178 0
%180 = OpINotEqual %150 %179 %33
%181 = OpGroupNonUniformAll %150 %59 %180
%182 = OpSelect %5 %181 %43 %33
%183 = OpLoad %17 %19
%184 = OpImageTexelPointer %51 %19 %33 %33
%185 = OpAtomicIAdd %5 %184 %43 %33 %182
%186 = OpAccessChain %44 %25 %33 %39
%187 = OpLoad %21 %186
%188 = OpBitcast %47 %187
%189 = OpCompositeExtract %5 %188 0
%190 = OpGroupNonUniformIAdd %5 %59 ExclusiveScan %189
%191 = OpGroupNonUniformBroadcastFirst %5 %59 %190
%192 = OpLoad %17 %19
%193 = OpImageTexelPointer %51 %19 %43 %33
%194 = OpAtomicIAdd %5 %193 %43 %33 %191
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.ssbo.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) restrict readonly buffer _12_14
{
    uvec2 _m0[];
} _14;

layout(set = 0, binding = 1, std430) restrict readonly buffer _16_20
{
    uint _m0[];
} _20[2];

layout(set = 0, binding = 4, std430) restrict readonly buffer _26_28
{
    uint _m0[];
} _28;

layout(set = 0, binding = 0, std430) buffer _30_32
{
    uint _m0[];
} _32;

layout(set = 0, binding = 0, std140) uniform _36_38
{
    vec4 _m0[1024];
} _38;

layout(set = 0, binding = 1, std140) uniform _40_43
{
    vec4 _m0[1024];
} _43[2];

layout(set = 0, binding = 3) uniform samplerBuffer _24;

void main()
{
    uint _66 = atomicAdd(_32._m0[0u], floatBitsToUint(_38._m0[1u]).x);
    uint _74 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x));
    uint _81 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_43[0u]._m0[1u]).x));
    uint _88 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_43[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _92 = atomicAdd(_32._m0[0u], _9._m0[2u]);
    uint _96 = atomicAdd(_32._m0[0u], _28._m0[0u]);
    uint _103 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(_14._m0[gl_GlobalInvocationID.x].y));
    uint _107 = 1u * 2u;
    uint _114 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(_9._m0[_107 + (((gl_GlobalInvocationID.x << 2u) & 4u) >> 2u)]));
    uint _121 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(_20[0u]._m0[1u]));
    uint _126 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(_28._m0[gl_GlobalInvocationID.x]));
    uint _131 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(_32._m0[2u]));
    uint _137 = atomicAdd(_32._m0[1u], uint(subgroupBroadcastFirst(texelFetch(_24, int(0u)).x)));
    uint _139 = atomicAdd(_32._m0[0u], gl_WorkGroupID.x);
    uint _142 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _148 = atomicAdd(_32._m0[0u], floatBitsToUint(_38._m0[1u]).x);
    uint _155 = atomicAdd(_32._m0[0u], subgroupAdd(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x));
    uint _162 = atomicAdd(_32._m0[0u], subgroupOr(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x));
    uint _171 = atomicAdd(_32._m0[0u], uint(subgroupAllEqual(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x)));
    uint _180 = atomicAdd(_32._m0[0u], subgroupBallot(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _189 = atomicAdd(_32._m0[0u], uint(subgroupAny(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _198 = atomicAdd(_32._m0[0u], uint(subgroupAll(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _206 = atomicAdd(_32._m0[1u], subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_38._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 209
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %50 %54
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %26 "SSBO"
OpName %30 "SSBO"
OpName %36 ""
OpName %40 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 3
OpDecorate %25 ArrayStride 4
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 4
OpDecorate %28 NonWritable
OpDecorate %28 Restrict
OpDecorate %29 ArrayStride 4
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 0
OpDecorate %35 ArrayStride 16
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 0
OpDecorate %39 ArrayStride 16
OpMemberDecorate %40 0 Offset 0
OpDecorate %40 Block
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 1
OpDecorate %50 BuiltIn GlobalInvocationId
OpDecorate %54 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpConstant %5 2
%18 = OpTypeArray %16 %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeFloat 32
%22 = OpTypeImage %21 Buffer 0 0 0 1 Unknown
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeRuntimeArray %5
%26 = OpTypeStruct %25
%27 = OpTypePointer StorageBuffer %26
%28 = OpVariable %27 StorageBuffer
%29 = OpTypeRuntimeArray %5
%30 = OpTypeStruct %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpConstant %5 1024
%34 = OpTypeVector %21 4
%35 = OpTypeArray %34 %33
%36 = OpTypeStruct %35
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypeArray %34 %33
%40 = OpTypeStruct %39
%41 = OpTypeArray %40 %17
%42 = OpTypePointer Uniform %41
%43 = OpVariable %42 Uniform
%45 = OpTypePointer Uniform %40
%47 = OpConstant %5 0
%48 = OpTypeVector %5 3
%49 = OpTypePointer Input %48
%50 = OpVariable %49 Input
%51 = OpTypePointer Input %5
%54 = OpVariable %49 Input
%57 = OpConstant %5 1
%58 = OpTypePointer Uniform %34
%61 = OpTypeVector %5 4
%64 = OpTypePointer StorageBuffer %5
%72 = OpConstant %5 3
%97 = OpTypePointer StorageBuffer %10
%106 = OpConstant %5 4
%115 = OpTypePointer StorageBuffer %16
%167 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %207
%207 = OpLabel
%44 = OpLoad %22 %24
%46 = OpAccessChain %45 %43 %47
%52 = OpAccessChain %51 %50 %47
%53 = OpLoad %5 %52
%55 = OpAccessChain %51 %54 %47
%56 = OpLoad %5 %55
%59 = OpAccessChain %58 %38 %47 %57
%60 = OpLoad %34 %59
%62 = OpBitcast %61 %60
%63 = OpCompositeExtract %5 %62 0
%65 = OpAccessChain %64 %32 %47 %47
%66 = OpAtomicIAdd %5 %65 %57 %47 %63
%67 = OpAccessChain %58 %38 %47 %53
%68 = OpLoad %34 %67
%69 = OpBitcast %61 %68
%70 = OpCompositeExtract %5 %69 0
%71 = OpGroupNonUniformBroadcastFirst %5 %72 %70
%73 = OpAccessChain %64 %32 %47 %57
%74 = OpAtomicIAdd %5 %73 %57 %47 %71
%75 = OpAccessChain %58 %46 %47 %57
%76 = OpLoad %34 %75
%77 = OpBitcast %61 %76
%78 = OpCompositeExtract %5 %77 0
%79 = OpGroupNonUniformBroadcastFirst %5 %72 %78
%80 = OpAccessChain %64 %32 %47 %57
%81 = OpAtomicIAdd %5 %80 %57 %47 %79
%82 = OpAccessChain %58 %46 %47 %53
%83 = OpLoad %34 %82
%84 = OpBitcast %61 %83
%85 = OpCompositeExtract %5 %84 0
%86 = OpGroupNonUniformBroadcastFirst %5 %72 %85
%87 = OpAccessChain %64 %32 %47 %57
%88 = OpAtomicIAdd %5 %87 %57 %47 %86
%89 = OpAccessChain %64 %9 %47 %17
%90 = OpLoad %5 %89
%91 = OpAccessChain %64 %32 %47 %47
%92 = OpAtomicIAdd %5 %91 %57 %47 %90
%93 = OpAccessChain %64 %28 %47 %47
%94 = OpLoad %5 %93
%95 = OpAccessChain %64 %32 %47 %47
%96 = OpAtomicIAdd %5 %95 %57 %47 %94
%98 = OpAccessChain %97 %14 %47 %53
%99 = OpLoad %10 %98
%100 = OpCompositeExtract %5 %99 1
%101 = OpGroupNonUniformBroadcastFirst %5 %72 %100
%102 = OpAccessChain %64 %32 %47 %57
%103 = OpAtomicIAdd %5 %102 %57 %47 %101
%104 = OpShiftLeftLogical %5 %53 %17
%105 = OpBitwiseAnd %5 %104 %106
%107 = OpIMul %5 %57 %17
%108 = OpShiftRightLogical %5 %105 %17
%109 = OpIAdd %5 %107 %108
%110 = OpAccessChain %64 %9 %47 %109
%111 = OpLoad %5 %110
%112 = OpGroupNonUniformBroadcastFirst %5 %72 %111
%113 = OpAccessChain %64 %32 %47 %57
%114 = OpAtomicIAdd %5 %113 %57 %47 %112
%116 = OpAccessChain %115 %20 %47
%117 = OpAccessChain %64 %116 %47 %57
%118 = OpLoad %5 %117
%119 = OpGroupNonUniformBroadcastFirst %5 %72 %118
%120 = OpAccessChain %64 %32 %47 %57
%121 = OpAtomicIAdd %5 %120 %57 %47 %119
%122 = OpAccessChain %64 %28 %47 %53
%123 = OpLoad %5 %122
%124 = OpGroupNonUniformBroadcastFirst %5 %72 %123
%125 = OpAccessChain %64 %32 %47 %57
%126 = OpAtomicIAdd %5 %125 %57 %47 %124
%127 = OpAccessChain %64 %32 %47 %17
%128 = OpLoad %5 %127
%129 = OpGroupNonUniformBroadcastFirst %5 %72 %128
%130 = OpAccessChain %64 %32 %47 %57
%131 = OpAtomicIAdd %5 %130 %57 %47 %129
%132 = OpImageFetch %34 %44 %47
%133 = OpCompositeExtract %21 %132 0
%134 = OpGroupNonUniformBroadcastFirst %21 %72 %133
%135 = OpConvertFToU %5 %134
%136 = OpAccessChain %64 %32 %47 %57
%137 = OpAtomicIAdd %5 %136 %57 %47 %135
%138 = OpAccessChain %64 %32 %47 %47
%139 = OpAtomicIAdd %5 %138 %57 %47 %56
%140 = OpGroupNonUniformBroadcastFirst %5 %72 %53
%141 = OpAccessChain %64 %32 %47 %57
%142 = OpAtomicIAdd %5 %141 %57 %47 %140
%143 = OpAccessChain %58 %38 %47 %57
%144 = OpLoad %34 %143
%145 = OpBitcast %61 %144
%146 = OpCompositeExtract %5 %145 0
%147 = OpAccessChain %64 %32 %47 %47
%148 = OpAtomicIAdd %5 %147 %57 %47 %146
%149 = OpAccessChain %58 %38 %47 %53
%150 = OpLoad %34 %149
%151 = OpBitcast %61 %150
%152 = OpCompositeExtract %5 %151 0
%153 = OpGroupNonUniformIAdd %5 %72 Reduce %152
%154 = OpAccessChain %64 %32 %47 %47
%155 = OpAtomicIAdd %5 %154 %57 %47 %153
%156 = OpAccessChain %58 %38 %47 %53
%157 = OpLoad %34 %156
%158 = OpBitcast %61 %157
%159 = OpCompositeExtract %5 %158 0
%160 = OpGroupNonUniformBitwiseOr %5 %72 Reduce %159
%161 = OpAccessChain %64 %32 %47 %47
%162 = OpAtomicIAdd %5 %161 %57 %47 %160
%163 = OpAccessChain %58 %38 %47 %53
%164 = OpLoad %34 %163
%165 = OpBitcast %61 %164
%166 = OpCompositeExtract %5 %165 0
%168 = OpGroupNonUniformAllEqual %167 %72 %166
%169 = OpSelect %5 %168 %57 %47
%170 = OpAccessChain %64 %32 %47 %47
%171 = OpAtomicIAdd %5 %170 %57 %47 %169
%172 = OpAccessChain %58 %38 %47 %53
%173 = OpLoad %34 %172
%174 = OpBitcast %61 %173
%175 = OpCompositeExtract %5 %174 0
%176 = OpINotEqual %167 %175 %47
%177 = OpGroupNonUniformBallot %61 %72 %176
%178 = OpCompositeExtract %5 %177 0
%179 = OpAccessChain %64 %32 %47 %47
%180 = OpAtomicIAdd %5 %179 %57 %47 %178
%181 = OpAccessChain %58 %38 %47 %53
%182 = OpLoad %34 %181
%183 = OpBitcast %61 %182
%184 = OpCompositeExtract %5 %183 0
%185 = OpINotEqual %167 %184 %47
%186 = OpGroupNonUniformAny %167 %72 %185
%187 = OpSelect %5 %186 %57 %47
%188 = OpAccessChain %64 %32 %47 %47
%189 = OpAtomicIAdd %5 %188 %57 %47 %187
%190 = OpAccessChain %58 %38 %47 %53
%191 = OpLoad %34 %190
%192 = OpBitcast %61 %191
%193 = OpCompositeExtract %5 %192 0
%194 = OpINotEqual %167 %193 %47
%195 = OpGroupNonUniformAll %167 %72 %194
%196 = OpSelect %5 %195 %57 %47
%197 = OpAccessChain %64 %32 %47 %47
%198 = OpAtomicIAdd %5 %197 %57 %47 %196
%199 = OpAccessChain %58 %38 %47 %53
%200 = OpLoad %34 %199
%201 = OpBitcast %61 %200
%202 = OpCompositeExtract %5 %201 0
%203 = OpGroupNonUniformIAdd %5 %72 ExclusiveScan %202
%204 = OpGroupNonUniformBroadcastFirst %5 %72 %203
%205 = OpAccessChain %64 %32 %47 %57
%206 = OpAtomicIAdd %5 %205 %57 %47 %204
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.ssbo.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require

layout(set = 0, binding = 0, std430) restrict readonly buffer TSSBO
{
    uint _m0[];
} T;

layout(set = 0, binding = 0, std430) restrict readonly buffer T_1
{
    uvec2 _m0[];
} T_2;

layout(set = 0, binding = 1, std430) restrict readonly buffer TsSSBO
{
    uint _m0[];
} Ts[2];

layout(set = 0, binding = 4, std430) restrict readonly buffer TBSSBO
{
    uint _m0[];
} TB;

layout(set = 0, binding = 0, std430) buffer USSBO
{
    uint _m0[];
} U;

layout(set = 0, binding = 0, std140) uniform CUBO
{
    vec4 _m0[1024];
} C;

layout(set = 0, binding = 1, std140) uniform CsUBO
{
    vec4 _m0[1024];
} Cs[2];

layout(set = 0, binding = 3) uniform samplerBuffer B;

void main()
{
    uint _62 = atomicAdd(U._m0[0u], floatBitsToUint(C._m0[1u]).x);
    uint _70 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x));
    uint _77 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(floatBitsToUint(Cs[0u]._m0[1u]).x));
    uint _84 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(floatBitsToUint(Cs[0u]._m0[gl_LaunchIDEXT.x]).x));
    uint _88 = atomicAdd(U._m0[0u], T._m0[2u]);
    uint _92 = atomicAdd(U._m0[0u], TB._m0[0u]);
    uint _99 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(T_2._m0[gl_LaunchIDEXT.x].y));
    uint _103 = 1u * 2u;
    uint _110 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(T._m0[_103 + (((gl_LaunchIDEXT.x << 2u) & 4u) >> 2u)]));
    uint _117 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(Ts[0u]._m0[1u]));
    uint _122 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(TB._m0[gl_LaunchIDEXT.x]));
    uint _127 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(U._m0[2u]));
    uint _134 = atomicAdd(U._m0[1u], uint(subgroupBroadcastFirst(texelFetch(B, int(0u)).x)));
    uint _140 = atomicAdd(U._m0[0u], floatBitsToUint(C._m0[1u]).x);
    uint _147 = atomicAdd(U._m0[0u], subgroupAdd(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x));
    uint _154 = atomicAdd(U._m0[0u], subgroupOr(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x));
    uint _163 = atomicAdd(U._m0[0u], uint(subgroupAllEqual(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x)));
    uint _172 = atomicAdd(U._m0[0u], subgroupBallot(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x != 0u).x);
    uint _181 = atomicAdd(U._m0[0u], uint(subgroupAny(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x != 0u)));
    uint _190 = atomicAdd(U._m0[0u], uint(subgroupAll(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x != 0u)));
    uint _198 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 201
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %9 %14 %20 %24 %28 %32 %38 %43 %49
OpName %3 "main"
OpName %7 "TSSBO"
OpName %9 "T"
OpName %12 "TSSBO"
OpName %14 "T"
OpName %16 "TsSSBO"
OpName %20 "Ts"
OpName %24 "B"
OpName %26 "TBSSBO"
OpName %28 "TB"
OpName %30 "USSBO"
OpName %32 "U"
OpName %36 "CUBO"
OpName %38 "C"
OpName %40 "CsUBO"
OpName %43 "Cs"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 3
OpDecorate %25 ArrayStride 4
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 4
OpDecorate %28 NonWritable
OpDecorate %28 Restrict
OpDecorate %29 ArrayStride 4
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 0
OpDecorate %35 ArrayStride 16
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 0
OpDecorate %39 ArrayStride 16
OpMemberDecorate %40 0 Offset 0
OpDecorate %40 Block
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 1
OpDecorate %49 BuiltIn LaunchIdKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpConstant %5 2
%18 = OpTypeArray %16 %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeFloat 32
%22 = OpTypeImage %21 Buffer 0 0 0 1 Unknown
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeRuntimeArray %5
%26 = OpTypeStruct %25
%27 = OpTypePointer StorageBuffer %26
%28 = OpVariable %27 StorageBuffer
%29 = OpTypeRuntimeArray %5
%30 = OpTypeStruct %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpConstant %5 1024
%34 = OpTypeVector %21 4
%35 = OpTypeArray %34 %33
%36 = OpTypeStruct %35
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypeArray %34 %33
%40 = OpTypeStruct %39
%41 = OpTypeArray %40 %17
%42 = OpTypePointer Uniform %41
%43 = OpVariable %42 Uniform
%44 = OpTypePointer Uniform %40
%46 = OpConstant %5 0
%47 = OpTypeVector %5 3
%48 = OpTypePointer Input %47
%49 = OpVariable %48 Input
%50 = OpTypePointer Input %5
%53 = OpConstant %5 1
%54 = OpTypePointer Uniform %34
%57 = OpTypeVector %5 4
%60 = OpTypePointer StorageBuffer %5
%68 = OpConstant %5 3
%93 = OpTypePointer StorageBuffer %10
%102 = OpConstant %5 4
%111 = OpTypePointer StorageBuffer %16
%159 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %199
%199 = OpLabel
%45 = OpAccessChain %44 %43 %46
%51 = OpAccessChain %50 %49 %46
%52 = OpLoad %5 %51
%55 = OpAccessChain %54 %38 %46 %53
%56 = OpLoad %34 %55
%58 = OpBitcast %57 %56
%59 = OpCompositeExtract %5 %58 0
%61 = OpAccessChain %60 %32 %46 %46
%62 = OpAtomicIAdd %5 %61 %53 %46 %59
%63 = OpAccessChain %54 %38 %46 %52
%64 = OpLoad %34 %63
%65 = OpBitcast %57 %64
%66 = OpCompositeExtract %5 %65 0
%67 = OpGroupNonUniformBroadcastFirst %5 %68 %66
%69 = OpAccessChain %60 %32 %46 %53
%70 = OpAtomicIAdd %5 %69 %53 %46 %67
%71 = OpAccessChain %54 %45 %46 %53
%72 = OpLoad %34 %71
%73 = OpBitcast %57 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %68 %74
%76 = OpAccessChain %60 %32 %46 %53
%77 = OpAtomicIAdd %5 %76 %53 %46 %75
%78 = OpAccessChain %54 %45 %46 %52
%79 = OpLoad %34 %78
%80 = OpBitcast %57 %79
%81 = OpCompositeExtract %5 %80 0
%82 = OpGroupNonUniformBroadcastFirst %5 %68 %81
%83 = OpAccessChain %60 %32 %46 %53
%84 = OpAtomicIAdd %5 %83 %53 %46 %82
%85 = OpAccessChain %60 %9 %46 %17
%86 = OpLoad %5 %85
%87 = OpAccessChain %60 %32 %46 %46
%88 = OpAtomicIAdd %5 %87 %53 %46 %86
%89 = OpAccessChain %60 %28 %46 %46
%90 = OpLoad %5 %89
%91 = OpAccessChain %60 %32 %46 %46
%92 = OpAtomicIAdd %5 %91 %53 %46 %90
%94 = OpAccessChain %93 %14 %46 %52
%95 = OpLoad %10 %94
%96 = OpCompositeExtract %5 %95 1
%97 = OpGroupNonUniformBroadcastFirst %5 %68 %96
%98 = OpAccessChain %60 %32 %46 %53
%99 = OpAtomicIAdd %5 %98 %53 %46 %97
%100 = OpShiftLeftLogical %5 %52 %17
%101 = OpBitwiseAnd %5 %100 %102
%103 = OpIMul %5 %53 %17
%104 = OpShiftRightLogical %5 %101 %17
%105 = OpIAdd %5 %103 %104
%106 = OpAccessChain %60 %9 %46 %105
%107 = OpLoad %5 %106
%108 = OpGroupNonUniformBroadcastFirst %5 %68 %107
%109 = OpAccessChain %60 %32 %46 %53
%110 = OpAtomicIAdd %5 %109 %53 %46 %108
%112 = OpAccessChain %111 %20 %46
%113 = OpAccessChain %60 %112 %46 %53
%114 = OpLoad %5 %113
%115 = OpGroupNonUniformBroadcastFirst %5 %68 %114
%116 = OpAccessChain %60 %32 %46 %53
%117 = OpAtomicIAdd %5 %116 %53 %46 %115
%118 = OpAccessChain %60 %28 %46 %52
%119 = OpLoad %5 %118
%120 = OpGroupNonUniformBroadcastFirst %5 %68 %119
%121 = OpAccessChain %60 %32 %46 %53
%122 = OpAtomicIAdd %5 %121 %53 %46 %120
%123 = OpAccessChain %60 %32 %46 %17
%124 = OpLoad %5 %123
%125 = OpGroupNonUniformBroadcastFirst %5 %68 %124
%126 = OpAccessChain %60 %32 %46 %53
%127 = OpAtomicIAdd %5 %126 %53 %46 %125
%128 = OpLoad %22 %24
%129 = OpImageFetch %34 %128 %46
%130 = OpCompositeExtract %21 %129 0
%131 = OpGroupNonUniformBroadcastFirst %21 %68 %130
%132 = OpConvertFToU %5 %131
%133 = OpAccessChain %60 %32 %46 %53
%134 = OpAtomicIAdd %5 %133 %53 %46 %132
%135 = OpAccessChain %54 %38 %46 %53
%136 = OpLoad %34 %135
%137 = OpBitcast %57 %136
%138 = OpCompositeExtract %5 %137 0
%139 = OpAccessChain %60 %32 %46 %46
%140 = OpAtomicIAdd %5 %139 %53 %46 %138
%141 = OpAccessChain %54 %38 %46 %52
%142 = OpLoad %34 %141
%143 = OpBitcast %57 %142
%144 = OpCompositeExtract %5 %143 0
%145 = OpGroupNonUniformIAdd %5 %68 Reduce %144
%146 = OpAccessChain %60 %32 %46 %46
%147 = OpAtomicIAdd %5 %146 %53 %46 %145
%148 = OpAccessChain %54 %38 %46 %52
%149 = OpLoad %34 %148
%150 = OpBitcast %57 %149
%151 = OpCompositeExtract %5 %150 0
%152 = OpGroupNonUniformBitwiseOr %5 %68 Reduce %151
%153 = OpAccessChain %60 %32 %46 %46
%154 = OpAtomicIAdd %5 %153 %53 %46 %152
%155 = OpAccessChain %54 %38 %46 %52
%156 = OpLoad %34 %155
%157 = OpBitcast %57 %156
%158 = OpCompositeExtract %5 %157 0
%160 = OpGroupNonUniformAllEqual %159 %68 %158
%161 = OpSelect %5 %160 %53 %46
%162 = OpAccessChain %60 %32 %46 %46
%163 = OpAtomicIAdd %5 %162 %53 %46 %161
%164 = OpAccessChain %54 %38 %46 %52
%165 = OpLoad %34 %164
%166 = OpBitcast %57 %165
%167 = OpCompositeExtract %5 %166 0
%168 = OpINotEqual %159 %167 %46
%169 = OpGroupNonUniformBallot %57 %68 %168
%170 = OpCompositeExtract %5 %169 0
%171 = OpAccessChain %60 %32 %46 %46
%172 = OpAtomicIAdd %5 %171 %53 %46 %170
%173 = OpAccessChain %54 %38 %46 %52
%174 = OpLoad %34 %173
%175 = OpBitcast %57 %174
%176 = OpCompositeExtract %5 %175 0
%177 = OpINotEqual %159 %176 %46
%178 = OpGroupNonUniformAny %159 %68 %177
%179 = OpSelect %5 %178 %53 %46
%180 = OpAccessChain %60 %32 %46 %46
%181 = OpAtomicIAdd %5 %180 %53 %46 %179
%182 = OpAccessChain %54 %38 %46 %52
%183 = OpLoad %34 %182
%184 = OpBitcast %57 %183
%185 = OpCompositeExtract %5 %184 0
%186 = OpINotEqual %159 %185 %46
%187 = OpGroupNonUniformAll %159 %68 %186
%188 = OpSelect %5 %187 %53 %46
%189 = OpAccessChain %60 %32 %46 %46
%190 = OpAtomicIAdd %5 %189 %53 %46 %188
%191 = OpAccessChain %54 %38 %46 %52
%192 = OpLoad %34 %191
%193 = OpBitcast %57 %192
%194 = OpCompositeExtract %5 %193 0
%195 = OpGroupNonUniformIAdd %5 %68 ExclusiveScan %194
%196 = OpGroupNonUniformBroadcastFirst %5 %68 %195
%197 = OpAccessChain %60 %32 %46 %53
%198 = OpAtomicIAdd %5 %197 %53 %46 %196
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.ssbo.sm60.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _11_15
{
    uint _m0[];
} _15[2];

layout(set = 0, binding = 0, std430) buffer _21_23
{
    uint _m0[];
} _23;

layout(set = 0, binding = 0, std140) uniform _27_29
{
    vec4 _m0[1024];
} _29;

layout(set = 0, binding = 1, std140) uniform _31_34
{
    vec4 _m0[1024];
} _34[2];

layout(set = 0, binding = 3) uniform samplerBuffer _19;

void main()
{
    uint _57 = atomicAdd(_23._m0[0u], floatBitsToUint(_29._m0[1u]).x);
    uint _65 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x));
    uint _72 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_34[0u]._m0[1u]).x));
    uint _79 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_34[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _83 = atomicAdd(_23._m0[0u], _9._m0[1u]);
    uint _88 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(_9._m0[gl_GlobalInvocationID.x]));
    uint _95 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(_15[0u]._m0[1u]));
    uint _100 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(_23._m0[2u]));
    uint _106 = atomicAdd(_23._m0[1u], uint(subgroupBroadcastFirst(texelFetch(_19, int(0u)).x)));
    uint _108 = atomicAdd(_23._m0[0u], gl_WorkGroupID.x);
    uint _111 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _117 = atomicAdd(_23._m0[0u], floatBitsToUint(_29._m0[1u]).x);
    uint _124 = atomicAdd(_23._m0[0u], subgroupAdd(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x));
    uint _131 = atomicAdd(_23._m0[0u], subgroupOr(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x));
    uint _140 = atomicAdd(_23._m0[0u], uint(subgroupAllEqual(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x)));
    uint _149 = atomicAdd(_23._m0[0u], subgroupBallot(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _158 = atomicAdd(_23._m0[0u], uint(subgroupAny(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _167 = atomicAdd(_23._m0[0u], uint(subgroupAll(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _175 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 178
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %41 %45
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %21 "SSBO"
OpName %27 ""
OpName %31 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 1
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 3
OpDecorate %20 ArrayStride 4
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 1
OpDecorate %41 BuiltIn GlobalInvocationId
OpDecorate %45 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpConstant %5 2
%13 = OpTypeArray %11 %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeFloat 32
%17 = OpTypeImage %16 Buffer 0 0 0 1 Unknown
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeRuntimeArray %5
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpConstant %5 1024
%25 = OpTypeVector %16 4
%26 = OpTypeArray %25 %24
%27 = OpTypeStruct %26
%28 = OpTypePointer Uniform %27
%29 = OpVariable %28 Uniform
%30 = OpTypeArray %25 %24
%31 = OpTypeStruct %30
%32 = OpTypeArray %31 %12
%33 = OpTypePointer Uniform %32
%34 = OpVariable %33 Uniform
%36 = OpTypePointer Uniform %31
%38 = OpConstant %5 0
%39 = OpTypeVector %5 3
%40 = OpTypePointer Input %39
%41 = OpVariable %40 Input
%42 = OpTypePointer Input %5
%45 = OpVariable %40 Input
%48 = OpConstant %5 1
%49 = OpTypePointer Uniform %25
%52 = OpTypeVector %5 4
%55 = OpTypePointer StorageBuffer %5
%63 = OpConstant %5 3
%89 = OpTypePointer StorageBuffer %11
%136 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %176
%176 = OpLabel
%35 = OpLoad %17 %19
%37 = OpAccessChain %36 %34 %38
%43 = OpAccessChain %42 %41 %38
%44 = OpLoad %5 %43
%46 = OpAccessChain %42 %45 %38
%47 = OpLoad %5 %46
%50 = OpAccessChain %49 %29 %38 %48
%51 = OpLoad %25 %50
%53 = OpBitcast %52 %51
%54 = OpCompositeExtract %5 %53 0
%56 = OpAccessChain %55 %23 %38 %38
%57 = OpAtomicIAdd %5 %56 %48 %38 %54
%58 = OpAccessChain %49 %29 %38 %44
%59 = OpLoad %25 %58
%60 = OpBitcast %52 %59
%61 = OpCompositeExtract %5 %60 0
%62 = OpGroupNonUniformBroadcastFirst %5 %63 %61
%64 = OpAccessChain %55 %23 %38 %48
%65 = OpAtomicIAdd %5 %64 %48 %38 %62
%66 = OpAccessChain %49 %37 %38 %48
%67 = OpLoad %25 %66
%68 = OpBitcast %52 %67
%69 = OpCompositeExtract %5 %68 0
%70 = OpGroupNonUniformBroadcastFirst %5 %63 %69
%71 = OpAccessChain %55 %23 %38 %48
%72 = OpAtomicIAdd %5 %71 %48 %38 %70
%73 = OpAccessChain %49 %37 %38 %44
%74 = OpLoad %25 %73
%75 = OpBitcast %52 %74
%76 = OpCompositeExtract %5 %75 0
%77 = OpGroupNonUniformBroadcastFirst %5 %63 %76
%78 = OpAccessChain %55 %23 %38 %48
%79 = OpAtomicIAdd %5 %78 %48 %38 %77
%80 = OpAccessChain %55 %9 %38 %48
%81 = OpLoad %5 %80
%82 = OpAccessChain %55 %23 %38 %38
%83 = OpAtomicIAdd %5 %82 %48 %38 %81
%84 = OpAccessChain %55 %9 %38 %44
%85 = OpLoad %5 %84
%86 = OpGroupNonUniformBroadcastFirst %5 %63 %85
%87 = OpAccessChain %55 %23 %38 %48
%88 = OpAtomicIAdd %5 %87 %48 %38 %86
%90 = OpAccessChain %89 %15 %38
%91 = OpAccessChain %55 %90 %38 %48
%92 = OpLoad %5 %91
%93 = OpGroupNonUniformBroadcastFirst %5 %63 %92
%94 = OpAccessChain %55 %23 %38 %48
%95 = OpAtomicIAdd %5 %94 %48 %38 %93
%96 = OpAccessChain %55 %23 %38 %12
%97 = OpLoad %5 %96
%98 = OpGroupNonUniformBroadcastFirst %5 %63 %97
%99 = OpAccessChain %55 %23 %38 %48
%100 = OpAtomicIAdd %5 %99 %48 %38 %98
%101 = OpImageFetch %25 %35 %38
%102 = OpCompositeExtract %16 %101 0
%103 = OpGroupNonUniformBroadcastFirst %16 %63 %102
%104 = OpConvertFToU %5 %103
%105 = OpAccessChain %55 %23 %38 %48
%106 = OpAtomicIAdd %5 %105 %48 %38 %104
%107 = OpAccessChain %55 %23 %38 %38
%108 = OpAtomicIAdd %5 %107 %48 %38 %47
%109 = OpGroupNonUniformBroadcastFirst %5 %63 %44
%110 = OpAccessChain %55 %23 %38 %48
%111 = OpAtomicIAdd %5 %110 %48 %38 %109
%112 = OpAccessChain %49 %29 %38 %48
%113 = OpLoad %25 %112
%114 = OpBitcast %52 %113
%115 = OpCompositeExtract %5 %114 0
%116 = OpAccessChain %55 %23 %38 %38
%117 = OpAtomicIAdd %5 %116 %48 %38 %115
%118 = OpAccessChain %49 %29 %38 %44
%119 = OpLoad %25 %118
%120 = OpBitcast %52 %119
%121 = OpCompositeExtract %5 %120 0
%122 = OpGroupNonUniformIAdd %5 %63 Reduce %121
%123 = OpAccessChain %55 %23 %38 %38
%124 = OpAtomicIAdd %5 %123 %48 %38 %122
%125 = OpAccessChain %49 %29 %38 %44
%126 = OpLoad %25 %125
%127 = OpBitcast %52 %126
%128 = OpCompositeExtract %5 %127 0
%129 = OpGroupNonUniformBitwiseOr %5 %63 Reduce %128
%130 = OpAccessChain %55 %23 %38 %38
%131 = OpAtomicIAdd %5 %130 %48 %38 %129
%132 = OpAccessChain %49 %29 %38 %44
%133 = OpLoad %25 %132
%134 = OpBitcast %52 %133
%135 = OpCompositeExtract %5 %134 0
%137 = OpGroupNonUniformAllEqual %136 %63 %135
%138 = OpSelect %5 %137 %48 %38
%139 = OpAccessChain %55 %23 %38 %38
%140 = OpAtomicIAdd %5 %139 %48 %38 %138
%141 = OpAccessChain %49 %29 %38 %44
%142 = OpLoad %25 %141
%143 = OpBitcast %52 %142
%144 = OpCompositeExtract %5 %143 0
%145 = OpINotEqual %136 %144 %38
%146 = OpGroupNonUniformBallot %52 %63 %145
%147 = OpCompositeExtract %5 %146 0
%148 = OpAccessChain %55 %23 %38 %38
%149 = OpAtomicIAdd %5 %148 %48 %38 %147
%150 = OpAccessChain %49 %29 %38 %44
%151 = OpLoad %25 %150
%152 = OpBitcast %52 %151
%153 = OpCompositeExtract %5 %152 0
%154 = OpINotEqual %136 %153 %38
%155 = OpGroupNonUniformAny %136 %63 %154
%156 = OpSelect %5 %155 %48 %38
%157 = OpAccessChain %55 %23 %38 %38
%158 = OpAtomicIAdd %5 %157 %48 %38 %156
%159 = OpAccessChain %49 %29 %38 %44
%160 = OpLoad %25 %159
%161 = OpBitcast %52 %160
%162 = OpCompositeExtract %5 %161 0
%163 = OpINotEqual %136 %162 %38
%164 = OpGroupNonUniformAll %136 %63 %163
%165 = OpSelect %5 %164 %48 %38
%166 = OpAccessChain %55 %23 %38 %38
%167 = OpAtomicIAdd %5 %166 %48 %38 %165
%168 = OpAccessChain %49 %29 %38 %44
%169 = OpLoad %25 %168
%170 = OpBitcast %52 %169
%171 = OpCompositeExtract %5 %170 0
%172 = OpGroupNonUniformIAdd %5 %63 ExclusiveScan %171
%173 = OpGroupNonUniformBroadcastFirst %5 %63 %172
%174 = OpAccessChain %55 %23 %38 %48
%175 = OpAtomicIAdd %5 %174 %48 %38 %173
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.ssbo.sm66.comp
================================================
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _11_15
{
    uint _m0[];
} _15[2];

layout(set = 0, binding = 0, std430) buffer _21_23
{
    uint _m0[];
} _23;

layout(set = 0, binding = 0, std140) uniform _27_29
{
    vec4 _m0[1024];
} _29;

layout(set = 0, binding = 1, std140) uniform _31_34
{
    vec4 _m0[1024];
} _34[2];

layout(set = 0, binding = 3) uniform samplerBuffer _19;

void main()
{
    uint _56 = atomicAdd(_23._m0[0u], floatBitsToUint(_29._m0[1u]).x);
    uint _64 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x));
    uint _71 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_34[0u]._m0[1u]).x));
    uint _78 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(floatBitsToUint(_34[0u]._m0[gl_GlobalInvocationID.x]).x));
    uint _82 = atomicAdd(_23._m0[0u], _9._m0[1u]);
    uint _87 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(_9._m0[gl_GlobalInvocationID.x]));
    uint _94 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(_15[0u]._m0[1u]));
    uint _99 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(_23._m0[2u]));
    uint _106 = atomicAdd(_23._m0[1u], uint(subgroupBroadcastFirst(texelFetch(_19, int(0u)).x)));
    uint _108 = atomicAdd(_23._m0[0u], gl_WorkGroupID.x);
    uint _111 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(gl_GlobalInvocationID.x));
    uint _117 = atomicAdd(_23._m0[0u], floatBitsToUint(_29._m0[1u]).x);
    uint _124 = atomicAdd(_23._m0[0u], subgroupAdd(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x));
    uint _131 = atomicAdd(_23._m0[0u], subgroupOr(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x));
    uint _140 = atomicAdd(_23._m0[0u], uint(subgroupAllEqual(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x)));
    uint _149 = atomicAdd(_23._m0[0u], subgroupBallot(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x != 0u).x);
    uint _158 = atomicAdd(_23._m0[0u], uint(subgroupAny(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _167 = atomicAdd(_23._m0[0u], uint(subgroupAll(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x != 0u)));
    uint _175 = atomicAdd(_23._m0[1u], subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(_29._m0[gl_GlobalInvocationID.x]).x)));
}


#if 0
// SPIR-V disassembly
// HeuristicWaveSize(32)
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 178
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %40 %44
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %21 "SSBO"
OpName %27 ""
OpName %31 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 1
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 3
OpDecorate %20 ArrayStride 4
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 1
OpDecorate %40 BuiltIn GlobalInvocationId
OpDecorate %44 BuiltIn WorkgroupId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpConstant %5 2
%13 = OpTypeArray %11 %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeFloat 32
%17 = OpTypeImage %16 Buffer 0 0 0 1 Unknown
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeRuntimeArray %5
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpConstant %5 1024
%25 = OpTypeVector %16 4
%26 = OpTypeArray %25 %24
%27 = OpTypeStruct %26
%28 = OpTypePointer Uniform %27
%29 = OpVariable %28 Uniform
%30 = OpTypeArray %25 %24
%31 = OpTypeStruct %30
%32 = OpTypeArray %31 %12
%33 = OpTypePointer Uniform %32
%34 = OpVariable %33 Uniform
%35 = OpTypePointer Uniform %31
%37 = OpConstant %5 0
%38 = OpTypeVector %5 3
%39 = OpTypePointer Input %38
%40 = OpVariable %39 Input
%41 = OpTypePointer Input %5
%44 = OpVariable %39 Input
%47 = OpConstant %5 1
%48 = OpTypePointer Uniform %25
%51 = OpTypeVector %5 4
%54 = OpTypePointer StorageBuffer %5
%62 = OpConstant %5 3
%88 = OpTypePointer StorageBuffer %11
%136 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %176
%176 = OpLabel
%36 = OpAccessChain %35 %34 %37
%42 = OpAccessChain %41 %40 %37
%43 = OpLoad %5 %42
%45 = OpAccessChain %41 %44 %37
%46 = OpLoad %5 %45
%49 = OpAccessChain %48 %29 %37 %47
%50 = OpLoad %25 %49
%52 = OpBitcast %51 %50
%53 = OpCompositeExtract %5 %52 0
%55 = OpAccessChain %54 %23 %37 %37
%56 = OpAtomicIAdd %5 %55 %47 %37 %53
%57 = OpAccessChain %48 %29 %37 %43
%58 = OpLoad %25 %57
%59 = OpBitcast %51 %58
%60 = OpCompositeExtract %5 %59 0
%61 = OpGroupNonUniformBroadcastFirst %5 %62 %60
%63 = OpAccessChain %54 %23 %37 %47
%64 = OpAtomicIAdd %5 %63 %47 %37 %61
%65 = OpAccessChain %48 %36 %37 %47
%66 = OpLoad %25 %65
%67 = OpBitcast %51 %66
%68 = OpCompositeExtract %5 %67 0
%69 = OpGroupNonUniformBroadcastFirst %5 %62 %68
%70 = OpAccessChain %54 %23 %37 %47
%71 = OpAtomicIAdd %5 %70 %47 %37 %69
%72 = OpAccessChain %48 %36 %37 %43
%73 = OpLoad %25 %72
%74 = OpBitcast %51 %73
%75 = OpCompositeExtract %5 %74 0
%76 = OpGroupNonUniformBroadcastFirst %5 %62 %75
%77 = OpAccessChain %54 %23 %37 %47
%78 = OpAtomicIAdd %5 %77 %47 %37 %76
%79 = OpAccessChain %54 %9 %37 %47
%80 = OpLoad %5 %79
%81 = OpAccessChain %54 %23 %37 %37
%82 = OpAtomicIAdd %5 %81 %47 %37 %80
%83 = OpAccessChain %54 %9 %37 %43
%84 = OpLoad %5 %83
%85 = OpGroupNonUniformBroadcastFirst %5 %62 %84
%86 = OpAccessChain %54 %23 %37 %47
%87 = OpAtomicIAdd %5 %86 %47 %37 %85
%89 = OpAccessChain %88 %15 %37
%90 = OpAccessChain %54 %89 %37 %47
%91 = OpLoad %5 %90
%92 = OpGroupNonUniformBroadcastFirst %5 %62 %91
%93 = OpAccessChain %54 %23 %37 %47
%94 = OpAtomicIAdd %5 %93 %47 %37 %92
%95 = OpAccessChain %54 %23 %37 %12
%96 = OpLoad %5 %95
%97 = OpGroupNonUniformBroadcastFirst %5 %62 %96
%98 = OpAccessChain %54 %23 %37 %47
%99 = OpAtomicIAdd %5 %98 %47 %37 %97
%100 = OpLoad %17 %19
%101 = OpImageFetch %25 %100 %37
%102 = OpCompositeExtract %16 %101 0
%103 = OpGroupNonUniformBroadcastFirst %16 %62 %102
%104 = OpConvertFToU %5 %103
%105 = OpAccessChain %54 %23 %37 %47
%106 = OpAtomicIAdd %5 %105 %47 %37 %104
%107 = OpAccessChain %54 %23 %37 %37
%108 = OpAtomicIAdd %5 %107 %47 %37 %46
%109 = OpGroupNonUniformBroadcastFirst %5 %62 %43
%110 = OpAccessChain %54 %23 %37 %47
%111 = OpAtomicIAdd %5 %110 %47 %37 %109
%112 = OpAccessChain %48 %29 %37 %47
%113 = OpLoad %25 %112
%114 = OpBitcast %51 %113
%115 = OpCompositeExtract %5 %114 0
%116 = OpAccessChain %54 %23 %37 %37
%117 = OpAtomicIAdd %5 %116 %47 %37 %115
%118 = OpAccessChain %48 %29 %37 %43
%119 = OpLoad %25 %118
%120 = OpBitcast %51 %119
%121 = OpCompositeExtract %5 %120 0
%122 = OpGroupNonUniformIAdd %5 %62 Reduce %121
%123 = OpAccessChain %54 %23 %37 %37
%124 = OpAtomicIAdd %5 %123 %47 %37 %122
%125 = OpAccessChain %48 %29 %37 %43
%126 = OpLoad %25 %125
%127 = OpBitcast %51 %126
%128 = OpCompositeExtract %5 %127 0
%129 = OpGroupNonUniformBitwiseOr %5 %62 Reduce %128
%130 = OpAccessChain %54 %23 %37 %37
%131 = OpAtomicIAdd %5 %130 %47 %37 %129
%132 = OpAccessChain %48 %29 %37 %43
%133 = OpLoad %25 %132
%134 = OpBitcast %51 %133
%135 = OpCompositeExtract %5 %134 0
%137 = OpGroupNonUniformAllEqual %136 %62 %135
%138 = OpSelect %5 %137 %47 %37
%139 = OpAccessChain %54 %23 %37 %37
%140 = OpAtomicIAdd %5 %139 %47 %37 %138
%141 = OpAccessChain %48 %29 %37 %43
%142 = OpLoad %25 %141
%143 = OpBitcast %51 %142
%144 = OpCompositeExtract %5 %143 0
%145 = OpINotEqual %136 %144 %37
%146 = OpGroupNonUniformBallot %51 %62 %145
%147 = OpCompositeExtract %5 %146 0
%148 = OpAccessChain %54 %23 %37 %37
%149 = OpAtomicIAdd %5 %148 %47 %37 %147
%150 = OpAccessChain %48 %29 %37 %43
%151 = OpLoad %25 %150
%152 = OpBitcast %51 %151
%153 = OpCompositeExtract %5 %152 0
%154 = OpINotEqual %136 %153 %37
%155 = OpGroupNonUniformAny %136 %62 %154
%156 = OpSelect %5 %155 %47 %37
%157 = OpAccessChain %54 %23 %37 %37
%158 = OpAtomicIAdd %5 %157 %47 %37 %156
%159 = OpAccessChain %48 %29 %37 %43
%160 = OpLoad %25 %159
%161 = OpBitcast %51 %160
%162 = OpCompositeExtract %5 %161 0
%163 = OpINotEqual %136 %162 %37
%164 = OpGroupNonUniformAll %136 %62 %163
%165 = OpSelect %5 %164 %47 %37
%166 = OpAccessChain %54 %23 %37 %37
%167 = OpAtomicIAdd %5 %166 %47 %37 %165
%168 = OpAccessChain %48 %29 %37 %43
%169 = OpLoad %25 %168
%170 = OpBitcast %51 %169
%171 = OpCompositeExtract %5 %170 0
%172 = OpGroupNonUniformIAdd %5 %62 ExclusiveScan %171
%173 = OpGroupNonUniformBroadcastFirst %5 %62 %172
%174 = OpAccessChain %54 %23 %37 %47
%175 = OpAtomicIAdd %5 %174 %47 %37 %173
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/opts/wave-read-lane-first.ssbo.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_vote : require

layout(set = 0, binding = 0, std430) restrict readonly buffer TSSBO
{
    uint _m0[];
} T;

layout(set = 0, binding = 0, std430) restrict readonly buffer T_1
{
    uvec2 _m0[];
} T_2;

layout(set = 0, binding = 1, std430) restrict readonly buffer TsSSBO
{
    uint _m0[];
} Ts[2];

layout(set = 0, binding = 4, std430) restrict readonly buffer TBSSBO
{
    uint _m0[];
} TB;

layout(set = 0, binding = 0, std430) buffer USSBO
{
    uint _m0[];
} U;

layout(set = 0, binding = 0, std140) uniform CUBO
{
    vec4 _m0[1024];
} C;

layout(set = 0, binding = 1, std140) uniform CsUBO
{
    vec4 _m0[1024];
} Cs[2];

layout(set = 0, binding = 3) uniform samplerBuffer B;

void main()
{
    uint _62 = atomicAdd(U._m0[0u], floatBitsToUint(C._m0[1u]).x);
    uint _70 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x));
    uint _77 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(floatBitsToUint(Cs[0u]._m0[1u]).x));
    uint _84 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(floatBitsToUint(Cs[0u]._m0[gl_LaunchIDEXT.x]).x));
    uint _88 = atomicAdd(U._m0[0u], T._m0[2u]);
    uint _92 = atomicAdd(U._m0[0u], TB._m0[0u]);
    uint _99 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(T_2._m0[gl_LaunchIDEXT.x].y));
    uint _103 = 1u * 2u;
    uint _110 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(T._m0[_103 + (((gl_LaunchIDEXT.x << 2u) & 4u) >> 2u)]));
    uint _117 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(Ts[0u]._m0[1u]));
    uint _122 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(TB._m0[gl_LaunchIDEXT.x]));
    uint _127 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(U._m0[2u]));
    uint _134 = atomicAdd(U._m0[1u], uint(subgroupBroadcastFirst(texelFetch(B, int(0u)).x)));
    uint _140 = atomicAdd(U._m0[0u], floatBitsToUint(C._m0[1u]).x);
    uint _147 = atomicAdd(U._m0[0u], subgroupAdd(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x));
    uint _154 = atomicAdd(U._m0[0u], subgroupOr(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x));
    uint _163 = atomicAdd(U._m0[0u], uint(subgroupAllEqual(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x)));
    uint _172 = atomicAdd(U._m0[0u], subgroupBallot(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x != 0u).x);
    uint _181 = atomicAdd(U._m0[0u], uint(subgroupAny(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x != 0u)));
    uint _190 = atomicAdd(U._m0[0u], uint(subgroupAll(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x != 0u)));
    uint _198 = atomicAdd(U._m0[1u], subgroupBroadcastFirst(subgroupExclusiveAdd(floatBitsToUint(C._m0[gl_LaunchIDEXT.x]).x)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 201
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability GroupNonUniformVote
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %9 %14 %20 %24 %28 %32 %38 %43 %49
OpName %3 "main"
OpName %7 "TSSBO"
OpName %9 "T"
OpName %12 "TSSBO"
OpName %14 "T"
OpName %16 "TsSSBO"
OpName %20 "Ts"
OpName %24 "B"
OpName %26 "TBSSBO"
OpName %28 "TB"
OpName %30 "USSBO"
OpName %32 "U"
OpName %36 "CUBO"
OpName %38 "C"
OpName %40 "CsUBO"
OpName %43 "Cs"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 3
OpDecorate %25 ArrayStride 4
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 4
OpDecorate %28 NonWritable
OpDecorate %28 Restrict
OpDecorate %29 ArrayStride 4
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 0
OpDecorate %35 ArrayStride 16
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 0
OpDecorate %39 ArrayStride 16
OpMemberDecorate %40 0 Offset 0
OpDecorate %40 Block
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 1
OpDecorate %49 BuiltIn LaunchIdKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpConstant %5 2
%18 = OpTypeArray %16 %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeFloat 32
%22 = OpTypeImage %21 Buffer 0 0 0 1 Unknown
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeRuntimeArray %5
%26 = OpTypeStruct %25
%27 = OpTypePointer StorageBuffer %26
%28 = OpVariable %27 StorageBuffer
%29 = OpTypeRuntimeArray %5
%30 = OpTypeStruct %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpConstant %5 1024
%34 = OpTypeVector %21 4
%35 = OpTypeArray %34 %33
%36 = OpTypeStruct %35
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypeArray %34 %33
%40 = OpTypeStruct %39
%41 = OpTypeArray %40 %17
%42 = OpTypePointer Uniform %41
%43 = OpVariable %42 Uniform
%44 = OpTypePointer Uniform %40
%46 = OpConstant %5 0
%47 = OpTypeVector %5 3
%48 = OpTypePointer Input %47
%49 = OpVariable %48 Input
%50 = OpTypePointer Input %5
%53 = OpConstant %5 1
%54 = OpTypePointer Uniform %34
%57 = OpTypeVector %5 4
%60 = OpTypePointer StorageBuffer %5
%68 = OpConstant %5 3
%93 = OpTypePointer StorageBuffer %10
%102 = OpConstant %5 4
%111 = OpTypePointer StorageBuffer %16
%159 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %199
%199 = OpLabel
%45 = OpAccessChain %44 %43 %46
%51 = OpAccessChain %50 %49 %46
%52 = OpLoad %5 %51
%55 = OpAccessChain %54 %38 %46 %53
%56 = OpLoad %34 %55
%58 = OpBitcast %57 %56
%59 = OpCompositeExtract %5 %58 0
%61 = OpAccessChain %60 %32 %46 %46
%62 = OpAtomicIAdd %5 %61 %53 %46 %59
%63 = OpAccessChain %54 %38 %46 %52
%64 = OpLoad %34 %63
%65 = OpBitcast %57 %64
%66 = OpCompositeExtract %5 %65 0
%67 = OpGroupNonUniformBroadcastFirst %5 %68 %66
%69 = OpAccessChain %60 %32 %46 %53
%70 = OpAtomicIAdd %5 %69 %53 %46 %67
%71 = OpAccessChain %54 %45 %46 %53
%72 = OpLoad %34 %71
%73 = OpBitcast %57 %72
%74 = OpCompositeExtract %5 %73 0
%75 = OpGroupNonUniformBroadcastFirst %5 %68 %74
%76 = OpAccessChain %60 %32 %46 %53
%77 = OpAtomicIAdd %5 %76 %53 %46 %75
%78 = OpAccessChain %54 %45 %46 %52
%79 = OpLoad %34 %78
%80 = OpBitcast %57 %79
%81 = OpCompositeExtract %5 %80 0
%82 = OpGroupNonUniformBroadcastFirst %5 %68 %81
%83 = OpAccessChain %60 %32 %46 %53
%84 = OpAtomicIAdd %5 %83 %53 %46 %82
%85 = OpAccessChain %60 %9 %46 %17
%86 = OpLoad %5 %85
%87 = OpAccessChain %60 %32 %46 %46
%88 = OpAtomicIAdd %5 %87 %53 %46 %86
%89 = OpAccessChain %60 %28 %46 %46
%90 = OpLoad %5 %89
%91 = OpAccessChain %60 %32 %46 %46
%92 = OpAtomicIAdd %5 %91 %53 %46 %90
%94 = OpAccessChain %93 %14 %46 %52
%95 = OpLoad %10 %94
%96 = OpCompositeExtract %5 %95 1
%97 = OpGroupNonUniformBroadcastFirst %5 %68 %96
%98 = OpAccessChain %60 %32 %46 %53
%99 = OpAtomicIAdd %5 %98 %53 %46 %97
%100 = OpShiftLeftLogical %5 %52 %17
%101 = OpBitwiseAnd %5 %100 %102
%103 = OpIMul %5 %53 %17
%104 = OpShiftRightLogical %5 %101 %17
%105 = OpIAdd %5 %103 %104
%106 = OpAccessChain %60 %9 %46 %105
%107 = OpLoad %5 %106
%108 = OpGroupNonUniformBroadcastFirst %5 %68 %107
%109 = OpAccessChain %60 %32 %46 %53
%110 = OpAtomicIAdd %5 %109 %53 %46 %108
%112 = OpAccessChain %111 %20 %46
%113 = OpAccessChain %60 %112 %46 %53
%114 = OpLoad %5 %113
%115 = OpGroupNonUniformBroadcastFirst %5 %68 %114
%116 = OpAccessChain %60 %32 %46 %53
%117 = OpAtomicIAdd %5 %116 %53 %46 %115
%118 = OpAccessChain %60 %28 %46 %52
%119 = OpLoad %5 %118
%120 = OpGroupNonUniformBroadcastFirst %5 %68 %119
%121 = OpAccessChain %60 %32 %46 %53
%122 = OpAtomicIAdd %5 %121 %53 %46 %120
%123 = OpAccessChain %60 %32 %46 %17
%124 = OpLoad %5 %123
%125 = OpGroupNonUniformBroadcastFirst %5 %68 %124
%126 = OpAccessChain %60 %32 %46 %53
%127 = OpAtomicIAdd %5 %126 %53 %46 %125
%128 = OpLoad %22 %24
%129 = OpImageFetch %34 %128 %46
%130 = OpCompositeExtract %21 %129 0
%131 = OpGroupNonUniformBroadcastFirst %21 %68 %130
%132 = OpConvertFToU %5 %131
%133 = OpAccessChain %60 %32 %46 %53
%134 = OpAtomicIAdd %5 %133 %53 %46 %132
%135 = OpAccessChain %54 %38 %46 %53
%136 = OpLoad %34 %135
%137 = OpBitcast %57 %136
%138 = OpCompositeExtract %5 %137 0
%139 = OpAccessChain %60 %32 %46 %46
%140 = OpAtomicIAdd %5 %139 %53 %46 %138
%141 = OpAccessChain %54 %38 %46 %52
%142 = OpLoad %34 %141
%143 = OpBitcast %57 %142
%144 = OpCompositeExtract %5 %143 0
%145 = OpGroupNonUniformIAdd %5 %68 Reduce %144
%146 = OpAccessChain %60 %32 %46 %46
%147 = OpAtomicIAdd %5 %146 %53 %46 %145
%148 = OpAccessChain %54 %38 %46 %52
%149 = OpLoad %34 %148
%150 = OpBitcast %57 %149
%151 = OpCompositeExtract %5 %150 0
%152 = OpGroupNonUniformBitwiseOr %5 %68 Reduce %151
%153 = OpAccessChain %60 %32 %46 %46
%154 = OpAtomicIAdd %5 %153 %53 %46 %152
%155 = OpAccessChain %54 %38 %46 %52
%156 = OpLoad %34 %155
%157 = OpBitcast %57 %156
%158 = OpCompositeExtract %5 %157 0
%160 = OpGroupNonUniformAllEqual %159 %68 %158
%161 = OpSelect %5 %160 %53 %46
%162 = OpAccessChain %60 %32 %46 %46
%163 = OpAtomicIAdd %5 %162 %53 %46 %161
%164 = OpAccessChain %54 %38 %46 %52
%165 = OpLoad %34 %164
%166 = OpBitcast %57 %165
%167 = OpCompositeExtract %5 %166 0
%168 = OpINotEqual %159 %167 %46
%169 = OpGroupNonUniformBallot %57 %68 %168
%170 = OpCompositeExtract %5 %169 0
%171 = OpAccessChain %60 %32 %46 %46
%172 = OpAtomicIAdd %5 %171 %53 %46 %170
%173 = OpAccessChain %54 %38 %46 %52
%174 = OpLoad %34 %173
%175 = OpBitcast %57 %174
%176 = OpCompositeExtract %5 %175 0
%177 = OpINotEqual %159 %176 %46
%178 = OpGroupNonUniformAny %159 %68 %177
%179 = OpSelect %5 %178 %53 %46
%180 = OpAccessChain %60 %32 %46 %46
%181 = OpAtomicIAdd %5 %180 %53 %46 %179
%182 = OpAccessChain %54 %38 %46 %52
%183 = OpLoad %34 %182
%184 = OpBitcast %57 %183
%185 = OpCompositeExtract %5 %184 0
%186 = OpINotEqual %159 %185 %46
%187 = OpGroupNonUniformAll %159 %68 %186
%188 = OpSelect %5 %187 %53 %46
%189 = OpAccessChain %60 %32 %46 %46
%190 = OpAtomicIAdd %5 %189 %53 %46 %188
%191 = OpAccessChain %54 %38 %46 %52
%192 = OpLoad %34 %191
%193 = OpBitcast %57 %192
%194 = OpCompositeExtract %5 %193 0
%195 = OpGroupNonUniformIAdd %5 %68 ExclusiveScan %194
%196 = OpGroupNonUniformBroadcastFirst %5 %68 %195
%197 = OpAccessChain %60 %32 %46 %53
%198 = OpAtomicIAdd %5 %197 %53 %46 %196
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/raw-access/bab-double1.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpCapability RawAccessChainsNV
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 3
%19 = OpTypeFloat 64
%20 = OpTypePointer StorageBuffer %19
%24 = OpConstant %19 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpShiftLeftLogical %5 %16 %18
%21 = OpRawAccessChainNV %20 %9 %15 %15 %17 RobustnessPerComponentNV
%22 = OpLoad %19 %21 Aligned 8
%23 = OpFAdd %19 %22 %24
%25 = OpRawAccessChainNV %20 %9 %15 %15 %17 RobustnessPerComponentNV
OpStore %25 %23 Aligned 8
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-double2.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 50
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability StorageBufferArrayDynamicIndexing
OpCapability DenormPreserve
OpCapability RawAccessChainsNV
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 2
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 3
%21 = OpTypePointer StorageBuffer %7
%23 = OpTypeFloat 64
%24 = OpTypeVector %23 2
%25 = OpTypePointer StorageBuffer %24
%31 = OpConstant %23 4
%36 = OpConstant %5 4
%38 = OpConstant %5 1
%40 = OpConstant %5 16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %48
%48 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpAccessChain %21 %11 %17
%26 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%27 = OpLoad %24 %26 Aligned 8
%28 = OpCompositeExtract %23 %27 0
%29 = OpCompositeExtract %23 %27 1
%30 = OpFAdd %23 %28 %31
%32 = OpFAdd %23 %29 %31
%33 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%34 = OpCompositeConstruct %24 %30 %32
OpStore %33 %34 Aligned 8
%35 = OpShiftLeftLogical %5 %18 %36
%37 = OpAccessChain %21 %11 %38
%39 = OpRawAccessChainNV %25 %37 %40 %18 %17 RobustnessPerElementNV
%41 = OpLoad %24 %39 Aligned 16
%42 = OpCompositeExtract %23 %41 0
%43 = OpCompositeExtract %23 %41 1
%44 = OpFAdd %23 %42 %31
%45 = OpFAdd %23 %43 %31
%46 = OpRawAccessChainNV %25 %37 %40 %18 %17 RobustnessPerElementNV
%47 = OpCompositeConstruct %24 %44 %45
OpStore %46 %47 Aligned 16
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-double3.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability StorageBufferArrayDynamicIndexing
OpCapability DenormPreserve
OpCapability RawAccessChainsNV
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 4
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 3
%21 = OpTypePointer StorageBuffer %7
%23 = OpTypeFloat 64
%24 = OpTypeVector %23 3
%25 = OpTypePointer StorageBuffer %24
%32 = OpConstant %23 4
%39 = OpConstant %5 1
%51 = OpConstant %5 24
%53 = OpConstant %5 2
%65 = OpConstant %5 5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %77
%77 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpAccessChain %21 %11 %17
%26 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%27 = OpLoad %24 %26 Aligned 8
%28 = OpCompositeExtract %23 %27 0
%29 = OpCompositeExtract %23 %27 1
%30 = OpCompositeExtract %23 %27 2
%31 = OpFAdd %23 %28 %32
%33 = OpFAdd %23 %29 %32
%34 = OpFAdd %23 %30 %32
%35 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%36 = OpCompositeConstruct %24 %31 %33 %34
OpStore %35 %36 Aligned 8
%37 = OpShiftLeftLogical %5 %18 %8
%38 = OpAccessChain %21 %11 %39
%40 = OpRawAccessChainNV %25 %38 %17 %17 %37 RobustnessPerComponentNV
%41 = OpLoad %24 %40 Aligned 16
%42 = OpCompositeExtract %23 %41 0
%43 = OpCompositeExtract %23 %41 1
%44 = OpCompositeExtract %23 %41 2
%45 = OpFAdd %23 %42 %32
%46 = OpFAdd %23 %43 %32
%47 = OpFAdd %23 %44 %32
%48 = OpRawAccessChainNV %25 %38 %17 %17 %37 RobustnessPerComponentNV
%49 = OpCompositeConstruct %24 %45 %46 %47
OpStore %48 %49 Aligned 16
%50 = OpIMul %5 %18 %51
%52 = OpAccessChain %21 %11 %53
%54 = OpRawAccessChainNV %25 %52 %17 %17 %50 RobustnessPerComponentNV
%55 = OpLoad %24 %54 Aligned 8
%56 = OpCompositeExtract %23 %55 0
%57 = OpCompositeExtract %23 %55 1
%58 = OpCompositeExtract %23 %55 2
%59 = OpFAdd %23 %56 %32
%60 = OpFAdd %23 %57 %32
%61 = OpFAdd %23 %58 %32
%62 = OpRawAccessChainNV %25 %52 %17 %17 %50 RobustnessPerComponentNV
%63 = OpCompositeConstruct %24 %59 %60 %61
OpStore %62 %63 Aligned 8
%64 = OpShiftLeftLogical %5 %18 %65
%66 = OpAccessChain %21 %11 %20
%67 = OpRawAccessChainNV %25 %66 %17 %17 %64 RobustnessPerComponentNV
%68 = OpLoad %24 %67 Aligned 32
%69 = OpCompositeExtract %23 %68 0
%70 = OpCompositeExtract %23 %68 1
%71 = OpCompositeExtract %23 %68 2
%72 = OpFAdd %23 %69 %32
%73 = OpFAdd %23 %70 %32
%74 = OpFAdd %23 %71 %32
%75 = OpRawAccessChainNV %25 %66 %17 %17 %64 RobustnessPerComponentNV
%76 = OpCompositeConstruct %24 %72 %73 %74
OpStore %75 %76 Aligned 32
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-double4.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability StorageBufferArrayDynamicIndexing
OpCapability DenormPreserve
OpCapability RawAccessChainsNV
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 5
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 3
%21 = OpTypePointer StorageBuffer %7
%23 = OpTypeFloat 64
%24 = OpTypeVector %23 4
%25 = OpTypePointer StorageBuffer %24
%33 = OpConstant %23 4
%40 = OpConstant %5 4
%42 = OpConstant %5 1
%56 = OpConstant %5 24
%58 = OpConstant %5 2
%86 = OpConstant %5 40
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %100
%100 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpAccessChain %21 %11 %17
%26 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%27 = OpLoad %24 %26 Aligned 8
%28 = OpCompositeExtract %23 %27 0
%29 = OpCompositeExtract %23 %27 1
%30 = OpCompositeExtract %23 %27 2
%31 = OpCompositeExtract %23 %27 3
%32 = OpFAdd %23 %28 %33
%34 = OpFAdd %23 %29 %33
%35 = OpFAdd %23 %30 %33
%36 = OpFAdd %23 %31 %33
%37 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%38 = OpCompositeConstruct %24 %32 %34 %35 %36
OpStore %37 %38 Aligned 8
%39 = OpShiftLeftLogical %5 %18 %40
%41 = OpAccessChain %21 %11 %42
%43 = OpRawAccessChainNV %25 %41 %17 %17 %39 RobustnessPerComponentNV
%44 = OpLoad %24 %43 Aligned 16
%45 = OpCompositeExtract %23 %44 0
%46 = OpCompositeExtract %23 %44 1
%47 = OpCompositeExtract %23 %44 2
%48 = OpCompositeExtract %23 %44 3
%49 = OpFAdd %23 %45 %33
%50 = OpFAdd %23 %46 %33
%51 = OpFAdd %23 %47 %33
%52 = OpFAdd %23 %48 %33
%53 = OpRawAccessChainNV %25 %41 %17 %17 %39 RobustnessPerComponentNV
%54 = OpCompositeConstruct %24 %49 %50 %51 %52
OpStore %53 %54 Aligned 16
%55 = OpIMul %5 %18 %56
%57 = OpAccessChain %21 %11 %58
%59 = OpRawAccessChainNV %25 %57 %17 %17 %55 RobustnessPerComponentNV
%60 = OpLoad %24 %59 Aligned 8
%61 = OpCompositeExtract %23 %60 0
%62 = OpCompositeExtract %23 %60 1
%63 = OpCompositeExtract %23 %60 2
%64 = OpCompositeExtract %23 %60 3
%65 = OpFAdd %23 %61 %33
%66 = OpFAdd %23 %62 %33
%67 = OpFAdd %23 %63 %33
%68 = OpFAdd %23 %64 %33
%69 = OpRawAccessChainNV %25 %57 %17 %17 %55 RobustnessPerComponentNV
%70 = OpCompositeConstruct %24 %65 %66 %67 %68
OpStore %69 %70 Aligned 8
%71 = OpShiftLeftLogical %5 %18 %8
%72 = OpAccessChain %21 %11 %20
%73 = OpRawAccessChainNV %25 %72 %17 %17 %71 RobustnessPerComponentNV
%74 = OpLoad %24 %73 Aligned 32
%75 = OpCompositeExtract %23 %74 0
%76 = OpCompositeExtract %23 %74 1
%77 = OpCompositeExtract %23 %74 2
%78 = OpCompositeExtract %23 %74 3
%79 = OpFAdd %23 %75 %33
%80 = OpFAdd %23 %76 %33
%81 = OpFAdd %23 %77 %33
%82 = OpFAdd %23 %78 %33
%83 = OpRawAccessChainNV %25 %72 %17 %17 %71 RobustnessPerComponentNV
%84 = OpCompositeConstruct %24 %79 %80 %81 %82
OpStore %83 %84 Aligned 32
%85 = OpIMul %5 %18 %86
%87 = OpAccessChain %21 %11 %40
%88 = OpRawAccessChainNV %25 %87 %17 %17 %85 RobustnessPerComponentNV
%89 = OpLoad %24 %88 Aligned 8
%90 = OpCompositeExtract %23 %89 0
%91 = OpCompositeExtract %23 %89 1
%92 = OpCompositeExtract %23 %89 2
%93 = OpCompositeExtract %23 %89 3
%94 = OpFAdd %23 %90 %33
%95 = OpFAdd %23 %91 %33
%96 = OpFAdd %23 %92 %33
%97 = OpFAdd %23 %93 %33
%98 = OpRawAccessChainNV %25 %87 %17 %17 %85 RobustnessPerComponentNV
%99 = OpCompositeConstruct %24 %94 %95 %96 %97
OpStore %98 %99 Aligned 8
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-float1.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 2
%19 = OpTypeFloat 32
%20 = OpTypePointer StorageBuffer %19
%24 = OpConstant %19 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpShiftLeftLogical %5 %16 %18
%21 = OpRawAccessChainNV %20 %9 %15 %15 %17 RobustnessPerComponentNV
%22 = OpLoad %19 %21 Aligned 4
%23 = OpFAdd %19 %22 %24
%25 = OpRawAccessChainNV %20 %9 %15 %15 %17 RobustnessPerComponentNV
OpStore %25 %23 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-float2.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 2
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpTypePointer StorageBuffer %7
%22 = OpTypeFloat 32
%23 = OpTypeVector %22 2
%24 = OpTypePointer StorageBuffer %23
%30 = OpConstant %22 4
%35 = OpConstant %5 3
%37 = OpConstant %5 1
%39 = OpConstant %5 8
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %47
%47 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %8
%21 = OpAccessChain %20 %11 %17
%25 = OpRawAccessChainNV %24 %21 %17 %17 %19 RobustnessPerComponentNV
%26 = OpLoad %23 %25 Aligned 4
%27 = OpCompositeExtract %22 %26 0
%28 = OpCompositeExtract %22 %26 1
%29 = OpFAdd %22 %27 %30
%31 = OpFAdd %22 %28 %30
%32 = OpRawAccessChainNV %24 %21 %17 %17 %19 RobustnessPerComponentNV
%33 = OpCompositeConstruct %23 %29 %31
OpStore %32 %33 Aligned 4
%34 = OpShiftLeftLogical %5 %18 %35
%36 = OpAccessChain %20 %11 %37
%38 = OpRawAccessChainNV %24 %36 %39 %18 %17 RobustnessPerElementNV
%40 = OpLoad %23 %38 Aligned 8
%41 = OpCompositeExtract %22 %40 0
%42 = OpCompositeExtract %22 %40 1
%43 = OpFAdd %22 %41 %30
%44 = OpFAdd %22 %42 %30
%45 = OpRawAccessChainNV %24 %36 %39 %18 %17 RobustnessPerElementNV
%46 = OpCompositeConstruct %23 %43 %44
OpStore %45 %46 Aligned 8
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-float3.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 4
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 2
%21 = OpTypePointer StorageBuffer %7
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 3
%25 = OpTypePointer StorageBuffer %24
%32 = OpConstant %23 4
%38 = OpConstant %5 3
%40 = OpConstant %5 1
%52 = OpConstant %5 12
%67 = OpConstant %5 16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %77
%77 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpAccessChain %21 %11 %17
%26 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%27 = OpLoad %24 %26 Aligned 4
%28 = OpCompositeExtract %23 %27 0
%29 = OpCompositeExtract %23 %27 1
%30 = OpCompositeExtract %23 %27 2
%31 = OpFAdd %23 %28 %32
%33 = OpFAdd %23 %29 %32
%34 = OpFAdd %23 %30 %32
%35 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%36 = OpCompositeConstruct %24 %31 %33 %34
OpStore %35 %36 Aligned 4
%37 = OpShiftLeftLogical %5 %18 %38
%39 = OpAccessChain %21 %11 %40
%41 = OpRawAccessChainNV %25 %39 %17 %17 %37 RobustnessPerComponentNV
%42 = OpLoad %24 %41 Aligned 8
%43 = OpCompositeExtract %23 %42 0
%44 = OpCompositeExtract %23 %42 1
%45 = OpCompositeExtract %23 %42 2
%46 = OpFAdd %23 %43 %32
%47 = OpFAdd %23 %44 %32
%48 = OpFAdd %23 %45 %32
%49 = OpRawAccessChainNV %25 %39 %17 %17 %37 RobustnessPerComponentNV
%50 = OpCompositeConstruct %24 %46 %47 %48
OpStore %49 %50 Aligned 8
%51 = OpIMul %5 %18 %52
%53 = OpAccessChain %21 %11 %20
%54 = OpRawAccessChainNV %25 %53 %17 %17 %51 RobustnessPerComponentNV
%55 = OpLoad %24 %54 Aligned 4
%56 = OpCompositeExtract %23 %55 0
%57 = OpCompositeExtract %23 %55 1
%58 = OpCompositeExtract %23 %55 2
%59 = OpFAdd %23 %56 %32
%60 = OpFAdd %23 %57 %32
%61 = OpFAdd %23 %58 %32
%62 = OpRawAccessChainNV %25 %53 %17 %17 %51 RobustnessPerComponentNV
%63 = OpCompositeConstruct %24 %59 %60 %61
OpStore %62 %63 Aligned 4
%64 = OpShiftLeftLogical %5 %18 %8
%65 = OpAccessChain %21 %11 %38
%66 = OpRawAccessChainNV %25 %65 %67 %18 %17 RobustnessPerElementNV
%68 = OpLoad %24 %66 Aligned 16
%69 = OpCompositeExtract %23 %68 0
%70 = OpCompositeExtract %23 %68 1
%71 = OpCompositeExtract %23 %68 2
%72 = OpFAdd %23 %69 %32
%73 = OpFAdd %23 %70 %32
%74 = OpFAdd %23 %71 %32
%75 = OpRawAccessChainNV %25 %65 %67 %18 %17 RobustnessPerElementNV
%76 = OpCompositeConstruct %24 %72 %73 %74
OpStore %75 %76 Aligned 16
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-float4.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 5
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 2
%21 = OpTypePointer StorageBuffer %7
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 4
%25 = OpTypePointer StorageBuffer %24
%33 = OpConstant %23 4
%40 = OpConstant %5 3
%42 = OpConstant %5 1
%56 = OpConstant %5 12
%71 = OpConstant %5 4
%74 = OpConstant %5 16
%87 = OpConstant %5 20
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %101
%101 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpAccessChain %21 %11 %17
%26 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%27 = OpLoad %24 %26 Aligned 4
%28 = OpCompositeExtract %23 %27 0
%29 = OpCompositeExtract %23 %27 1
%30 = OpCompositeExtract %23 %27 2
%31 = OpCompositeExtract %23 %27 3
%32 = OpFAdd %23 %28 %33
%34 = OpFAdd %23 %29 %33
%35 = OpFAdd %23 %30 %33
%36 = OpFAdd %23 %31 %33
%37 = OpRawAccessChainNV %25 %22 %17 %17 %19 RobustnessPerComponentNV
%38 = OpCompositeConstruct %24 %32 %34 %35 %36
OpStore %37 %38 Aligned 4
%39 = OpShiftLeftLogical %5 %18 %40
%41 = OpAccessChain %21 %11 %42
%43 = OpRawAccessChainNV %25 %41 %17 %17 %39 RobustnessPerComponentNV
%44 = OpLoad %24 %43 Aligned 8
%45 = OpCompositeExtract %23 %44 0
%46 = OpCompositeExtract %23 %44 1
%47 = OpCompositeExtract %23 %44 2
%48 = OpCompositeExtract %23 %44 3
%49 = OpFAdd %23 %45 %33
%50 = OpFAdd %23 %46 %33
%51 = OpFAdd %23 %47 %33
%52 = OpFAdd %23 %48 %33
%53 = OpRawAccessChainNV %25 %41 %17 %17 %39 RobustnessPerComponentNV
%54 = OpCompositeConstruct %24 %49 %50 %51 %52
OpStore %53 %54 Aligned 8
%55 = OpIMul %5 %18 %56
%57 = OpAccessChain %21 %11 %20
%58 = OpRawAccessChainNV %25 %57 %17 %17 %55 RobustnessPerComponentNV
%59 = OpLoad %24 %58 Aligned 4
%60 = OpCompositeExtract %23 %59 0
%61 = OpCompositeExtract %23 %59 1
%62 = OpCompositeExtract %23 %59 2
%63 = OpCompositeExtract %23 %59 3
%64 = OpFAdd %23 %60 %33
%65 = OpFAdd %23 %61 %33
%66 = OpFAdd %23 %62 %33
%67 = OpFAdd %23 %63 %33
%68 = OpRawAccessChainNV %25 %57 %17 %17 %55 RobustnessPerComponentNV
%69 = OpCompositeConstruct %24 %64 %65 %66 %67
OpStore %68 %69 Aligned 4
%70 = OpShiftLeftLogical %5 %18 %71
%72 = OpAccessChain %21 %11 %40
%73 = OpRawAccessChainNV %25 %72 %74 %18 %17 RobustnessPerElementNV
%75 = OpLoad %24 %73 Aligned 16
%76 = OpCompositeExtract %23 %75 0
%77 = OpCompositeExtract %23 %75 1
%78 = OpCompositeExtract %23 %75 2
%79 = OpCompositeExtract %23 %75 3
%80 = OpFAdd %23 %76 %33
%81 = OpFAdd %23 %77 %33
%82 = OpFAdd %23 %78 %33
%83 = OpFAdd %23 %79 %33
%84 = OpRawAccessChainNV %25 %72 %74 %18 %17 RobustnessPerElementNV
%85 = OpCompositeConstruct %24 %80 %81 %82 %83
OpStore %84 %85 Aligned 16
%86 = OpIMul %5 %18 %87
%88 = OpAccessChain %21 %11 %71
%89 = OpRawAccessChainNV %25 %88 %17 %17 %86 RobustnessPerComponentNV
%90 = OpLoad %24 %89 Aligned 4
%91 = OpCompositeExtract %23 %90 0
%92 = OpCompositeExtract %23 %90 1
%93 = OpCompositeExtract %23 %90 2
%94 = OpCompositeExtract %23 %90 3
%95 = OpFAdd %23 %91 %33
%96 = OpFAdd %23 %92 %33
%97 = OpFAdd %23 %93 %33
%98 = OpFAdd %23 %94 %33
%99 = OpRawAccessChainNV %25 %88 %17 %17 %86 RobustnessPerComponentNV
%100 = OpCompositeConstruct %24 %95 %96 %97 %98
OpStore %99 %100 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/bab-float4x4.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 168
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %14
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 2
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeVector %5 3
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %5
%17 = OpConstant %5 0
%20 = OpConstant %5 6
%21 = OpTypePointer StorageBuffer %7
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 4
%25 = OpTypePointer StorageBuffer %24
%28 = OpConstant %5 4
%29 = OpConstant %5 16
%39 = OpConstant %5 1
%53 = OpConstant %5 48
%57 = OpConstant %5 3
%64 = OpConstant %23 4
%88 = OpConstant %5 32
%98 = OpConstant %5 80
%102 = OpConstant %5 5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %166
%166 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %5 %16
%19 = OpShiftLeftLogical %5 %18 %20
%22 = OpAccessChain %21 %11 %17
%27 = OpIMul %5 %18 %28
%26 = OpRawAccessChainNV %25 %22 %29 %27 %17 RobustnessPerElementNV
%30 = OpLoad %24 %26 Aligned 16
%31 = OpCompositeExtract %23 %30 0
%32 = OpCompositeExtract %23 %30 1
%33 = OpCompositeExtract %23 %30 2
%34 = OpCompositeExtract %23 %30 3
%35 = OpBitwiseOr %5 %19 %29
%37 = OpIMul %5 %18 %28
%38 = OpIAdd %5 %37 %39
%36 = OpRawAccessChainNV %25 %22 %29 %38 %17 RobustnessPerElementNV
%40 = OpLoad %24 %36 Aligned 16
%41 = OpCompositeExtract %23 %40 0
%42 = OpCompositeExtract %23 %40 1
%43 = OpCompositeExtract %23 %40 2
%44 = OpCompositeExtract %23 %40 3
%45 = OpIAdd %5 %35 %29
%46 = OpRawAccessChainNV %25 %22 %17 %17 %45 RobustnessPerComponentNV
%47 = OpLoad %24 %46 Aligned 4
%48 = OpCompositeExtract %23 %47 0
%49 = OpCompositeExtract %23 %47 1
%50 = OpCompositeExtract %23 %47 2
%51 = OpCompositeExtract %23 %47 3
%52 = OpBitwiseOr %5 %19 %53
%55 = OpIMul %5 %18 %28
%56 = OpIAdd %5 %55 %57
%54 = OpRawAccessChainNV %25 %22 %29 %56 %17 RobustnessPerElementNV
%58 = OpLoad %24 %54 Aligned 16
%59 = OpCompositeExtract %23 %58 0
%60 = OpCompositeExtract %23 %58 1
%61 = OpCompositeExtract %23 %58 2
%62 = OpCompositeExtract %23 %58 3
%63 = OpFAdd %23 %31 %64
%65 = OpFAdd %23 %32 %64
%66 = OpFAdd %23 %33 %64
%67 = OpFAdd %23 %34 %64
%68 = OpFAdd %23 %41 %64
%69 = OpFAdd %23 %42 %64
%70 = OpFAdd %23 %43 %64
%71 = OpFAdd %23 %44 %64
%72 = OpFAdd %23 %48 %64
%73 = OpFAdd %23 %49 %64
%74 = OpFAdd %23 %50 %64
%75 = OpFAdd %23 %51 %64
%76 = OpFAdd %23 %59 %64
%77 = OpFAdd %23 %60 %64
%78 = OpFAdd %23 %61 %64
%79 = OpFAdd %23 %62 %64
%81 = OpIMul %5 %18 %28
%80 = OpRawAccessChainNV %25 %22 %29 %81 %17 RobustnessPerElementNV
%82 = OpCompositeConstruct %24 %63 %68 %72 %76
OpStore %80 %82 Aligned 16
%84 = OpIMul %5 %18 %28
%85 = OpIAdd %5 %84 %39
%83 = OpRawAccessChainNV %25 %22 %29 %85 %17 RobustnessPerElementNV
%86 = OpCompositeConstruct %24 %65 %69 %73 %77
OpStore %83 %86 Aligned 16
%87 = OpBitwiseOr %5 %19 %88
%90 = OpIMul %5 %18 %28
%91 = OpIAdd %5 %90 %8
%89 = OpRawAccessChainNV %25 %22 %29 %91 %17 RobustnessPerElementNV
%92 = OpCompositeConstruct %24 %66 %70 %74 %78
OpStore %89 %92 Aligned 16
%94 = OpIMul %5 %18 %28
%95 = OpIAdd %5 %94 %57
%93 = OpRawAccessChainNV %25 %22 %29 %95 %17 RobustnessPerElementNV
%96 = OpCompositeConstruct %24 %67 %71 %75 %79
OpStore %93 %96 Aligned 16
%97 = OpIMul %5 %18 %98
%99 = OpAccessChain %21 %11 %39
%101 = OpIMul %5 %18 %102
%100 = OpRawAccessChainNV %25 %99 %29 %101 %17 RobustnessPerElementNV
%103 = OpLoad %24 %100 Aligned 16
%104 = OpCompositeExtract %23 %103 0
%105 = OpCompositeExtract %23 %103 1
%106 = OpCompositeExtract %23 %103 2
%107 = OpCompositeExtract %23 %103 3
%108 = OpIAdd %5 %97 %29
%110 = OpIMul %5 %18 %102
%111 = OpIAdd %5 %110 %39
%109 = OpRawAccessChainNV %25 %99 %29 %111 %17 RobustnessPerElementNV
%112 = OpLoad %24 %109 Aligned 16
%113 = OpCompositeExtract %23 %112 0
%114 = OpCompositeExtract %23 %112 1
%115 = OpCompositeExtract %23 %112 2
%116 = OpCompositeExtract %23 %112 3
%117 = OpIAdd %5 %97 %88
%119 = OpIMul %5 %18 %102
%120 = OpIAdd %5 %119 %8
%118 = OpRawAccessChainNV %25 %99 %29 %120 %17 RobustnessPerElementNV
%121 = OpLoad %24 %118 Aligned 16
%122 = OpCompositeExtract %23 %121 0
%123 = OpCompositeExtract %23 %121 1
%124 = OpCompositeExtract %23 %121 2
%125 = OpCompositeExtract %23 %121 3
%126 = OpIAdd %5 %97 %53
%128 = OpIMul %5 %18 %102
%129 = OpIAdd %5 %128 %57
%127 = OpRawAccessChainNV %25 %99 %29 %129 %17 RobustnessPerElementNV
%130 = OpLoad %24 %127 Aligned 16
%131 = OpCompositeExtract %23 %130 0
%132 = OpCompositeExtract %23 %130 1
%133 = OpCompositeExtract %23 %130 2
%134 = OpCompositeExtract %23 %130 3
%135 = OpFAdd %23 %104 %64
%136 = OpFAdd %23 %105 %64
%137 = OpFAdd %23 %106 %64
%138 = OpFAdd %23 %107 %64
%139 = OpFAdd %23 %113 %64
%140 = OpFAdd %23 %114 %64
%141 = OpFAdd %23 %115 %64
%142 = OpFAdd %23 %116 %64
%143 = OpFAdd %23 %122 %64
%144 = OpFAdd %23 %123 %64
%145 = OpFAdd %23 %124 %64
%146 = OpFAdd %23 %125 %64
%147 = OpFAdd %23 %131 %64
%148 = OpFAdd %23 %132 %64
%149 = OpFAdd %23 %133 %64
%150 = OpFAdd %23 %134 %64
%152 = OpIMul %5 %18 %102
%151 = OpRawAccessChainNV %25 %99 %29 %152 %17 RobustnessPerElementNV
%153 = OpCompositeConstruct %24 %135 %139 %143 %147
OpStore %151 %153 Aligned 16
%155 = OpIMul %5 %18 %102
%156 = OpIAdd %5 %155 %39
%154 = OpRawAccessChainNV %25 %99 %29 %156 %17 RobustnessPerElementNV
%157 = OpCompositeConstruct %24 %136 %140 %144 %148
OpStore %154 %157 Aligned 16
%159 = OpIMul %5 %18 %102
%160 = OpIAdd %5 %159 %8
%158 = OpRawAccessChainNV %25 %99 %29 %160 %17 RobustnessPerElementNV
%161 = OpCompositeConstruct %24 %137 %141 %145 %149
OpStore %158 %161 Aligned 16
%163 = OpIMul %5 %18 %102
%164 = OpIAdd %5 %163 %57
%162 = OpRawAccessChainNV %25 %99 %29 %164 %17 RobustnessPerElementNV
%165 = OpCompositeConstruct %24 %138 %142 %146 %150
OpStore %162 %165 Aligned 16
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-float1.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 4
%23 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 4
%22 = OpFAdd %17 %21 %23
%24 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
OpStore %24 %22 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-float2.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 2
%19 = OpTypePointer StorageBuffer %18
%21 = OpConstant %5 8
%26 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%20 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%22 = OpLoad %18 %20 Aligned 8
%23 = OpCompositeExtract %17 %22 0
%24 = OpCompositeExtract %17 %22 1
%25 = OpFAdd %17 %23 %26
%27 = OpFAdd %17 %24 %26
%28 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%29 = OpCompositeConstruct %18 %25 %27
OpStore %28 %29 Aligned 8
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-float3.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 3
%19 = OpTypePointer StorageBuffer %18
%21 = OpConstant %5 12
%27 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%20 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%22 = OpLoad %18 %20 Aligned 4
%23 = OpCompositeExtract %17 %22 0
%24 = OpCompositeExtract %17 %22 1
%25 = OpCompositeExtract %17 %22 2
%26 = OpFAdd %17 %23 %27
%28 = OpFAdd %17 %24 %27
%29 = OpFAdd %17 %25 %27
%30 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%31 = OpCompositeConstruct %18 %26 %28 %29
OpStore %30 %31 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-float4.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 4
%19 = OpTypePointer StorageBuffer %18
%21 = OpConstant %5 16
%28 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%20 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%22 = OpLoad %18 %20 Aligned 16
%23 = OpCompositeExtract %17 %22 0
%24 = OpCompositeExtract %17 %22 1
%25 = OpCompositeExtract %17 %22 2
%26 = OpCompositeExtract %17 %22 3
%27 = OpFAdd %17 %23 %28
%29 = OpFAdd %17 %24 %28
%30 = OpFAdd %17 %25 %28
%31 = OpFAdd %17 %26 %28
%32 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%33 = OpCompositeConstruct %18 %27 %29 %30 %31
OpStore %32 %33 Aligned 16
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-float4x4.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 75
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 4
%19 = OpTypePointer StorageBuffer %18
%21 = OpConstant %5 64
%28 = OpConstant %5 16
%35 = OpConstant %5 32
%42 = OpConstant %5 48
%49 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%20 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%22 = OpLoad %18 %20 Aligned 16
%23 = OpCompositeExtract %17 %22 0
%24 = OpCompositeExtract %17 %22 1
%25 = OpCompositeExtract %17 %22 2
%26 = OpCompositeExtract %17 %22 3
%27 = OpRawAccessChainNV %19 %9 %21 %16 %28 RobustnessPerElementNV
%29 = OpLoad %18 %27 Aligned 16
%30 = OpCompositeExtract %17 %29 0
%31 = OpCompositeExtract %17 %29 1
%32 = OpCompositeExtract %17 %29 2
%33 = OpCompositeExtract %17 %29 3
%34 = OpRawAccessChainNV %19 %9 %21 %16 %35 RobustnessPerElementNV
%36 = OpLoad %18 %34 Aligned 16
%37 = OpCompositeExtract %17 %36 0
%38 = OpCompositeExtract %17 %36 1
%39 = OpCompositeExtract %17 %36 2
%40 = OpCompositeExtract %17 %36 3
%41 = OpRawAccessChainNV %19 %9 %21 %16 %42 RobustnessPerElementNV
%43 = OpLoad %18 %41 Aligned 16
%44 = OpCompositeExtract %17 %43 0
%45 = OpCompositeExtract %17 %43 1
%46 = OpCompositeExtract %17 %43 2
%47 = OpCompositeExtract %17 %43 3
%48 = OpFAdd %17 %23 %49
%50 = OpFAdd %17 %30 %49
%51 = OpFAdd %17 %37 %49
%52 = OpFAdd %17 %44 %49
%53 = OpFAdd %17 %24 %49
%54 = OpFAdd %17 %31 %49
%55 = OpFAdd %17 %38 %49
%56 = OpFAdd %17 %45 %49
%57 = OpFAdd %17 %25 %49
%58 = OpFAdd %17 %32 %49
%59 = OpFAdd %17 %39 %49
%60 = OpFAdd %17 %46 %49
%61 = OpFAdd %17 %26 %49
%62 = OpFAdd %17 %33 %49
%63 = OpFAdd %17 %40 %49
%64 = OpFAdd %17 %47 %49
%65 = OpRawAccessChainNV %19 %9 %21 %16 %15 RobustnessPerElementNV
%66 = OpCompositeConstruct %18 %48 %53 %57 %61
OpStore %65 %66 Aligned 16
%67 = OpRawAccessChainNV %19 %9 %21 %16 %28 RobustnessPerElementNV
%68 = OpCompositeConstruct %18 %50 %54 %58 %62
OpStore %67 %68 Aligned 16
%69 = OpRawAccessChainNV %19 %9 %21 %16 %35 RobustnessPerElementNV
%70 = OpCompositeConstruct %18 %51 %55 %59 %63
OpStore %69 %70 Aligned 16
%71 = OpRawAccessChainNV %19 %9 %21 %16 %42 RobustnessPerElementNV
%72 = OpCompositeConstruct %18 %52 %56 %60 %64
OpStore %71 %72 Aligned 16
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-half1.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpCapability RawAccessChainsNV
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 16
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 2
%23 = OpConstant %17 0x1p+2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 2
%22 = OpFAdd %17 %21 %23
%24 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
OpStore %24 %22 Aligned 2
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
OpDecorate %22 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 4
%23 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 4
%22 = OpFAdd %17 %21 %23
%24 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
OpStore %24 %22 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpCapability RawAccessChainsNV
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeFloat 32
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 4
%22 = OpTypeFloat 16
%25 = OpConstant %22 0x1p+2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 4
%23 = OpFConvert %22 %21
%24 = OpFAdd %22 %23 %25
%26 = OpFConvert %17 %24
%27 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
OpStore %27 %26 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability Int16
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeInt 16 0
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 4
%23 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 2
%22 = OpIAdd %17 %21 %23
%24 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
OpStore %24 %22 Aligned 2
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability Int16
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypePointer StorageBuffer %5
%19 = OpConstant %5 4
%21 = OpTypeInt 16 0
%24 = OpConstant %21 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpRawAccessChainNV %17 %9 %19 %16 %15 RobustnessPerElementNV
%20 = OpLoad %5 %18 Aligned 4
%22 = OpUConvert %21 %20
%23 = OpIAdd %21 %22 %24
%25 = OpUConvert %5 %23
%26 = OpRawAccessChainNV %17 %9 %19 %16 %15 RobustnessPerElementNV
OpStore %26 %25 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-uint1.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypePointer StorageBuffer %5
%19 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %23
%23 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpRawAccessChainNV %17 %9 %19 %16 %15 RobustnessPerElementNV
%20 = OpLoad %5 %18 Aligned 4
%21 = OpIAdd %5 %20 %19
%22 = OpRawAccessChainNV %17 %9 %19 %16 %15 RobustnessPerElementNV
OpStore %22 %21 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-uint2.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 31
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeVector %5 2
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 8
%25 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %29
%29 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 8
%22 = OpCompositeExtract %5 %21 0
%23 = OpCompositeExtract %5 %21 1
%24 = OpIAdd %5 %22 %25
%26 = OpIAdd %5 %23 %25
%27 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%28 = OpCompositeConstruct %17 %24 %26
OpStore %27 %28 Aligned 8
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-uint3.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypePointer StorageBuffer %10
%19 = OpConstant %5 12
%25 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpRawAccessChainNV %17 %9 %19 %16 %15 RobustnessPerElementNV
%20 = OpLoad %10 %18 Aligned 4
%21 = OpCompositeExtract %5 %20 0
%22 = OpCompositeExtract %5 %20 1
%23 = OpCompositeExtract %5 %20 2
%24 = OpIAdd %5 %21 %25
%26 = OpIAdd %5 %22 %25
%27 = OpIAdd %5 %23 %25
%28 = OpRawAccessChainNV %17 %9 %19 %16 %15 RobustnessPerElementNV
%29 = OpCompositeConstruct %10 %24 %26 %27
OpStore %28 %29 Aligned 4
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/raw-access/structured-uint4.raw-access-chains.noglsl.ssbo.comp
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability RawAccessChainsNV
OpExtension "SPV_NV_raw_access_chains"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeVector %5 4
%18 = OpTypePointer StorageBuffer %17
%20 = OpConstant %5 16
%27 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%19 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%21 = OpLoad %17 %19 Aligned 16
%22 = OpCompositeExtract %5 %21 0
%23 = OpCompositeExtract %5 %21 1
%24 = OpCompositeExtract %5 %21 2
%25 = OpCompositeExtract %5 %21 3
%26 = OpIAdd %5 %22 %27
%28 = OpIAdd %5 %23 %27
%29 = OpIAdd %5 %24 %27
%30 = OpIAdd %5 %25 %27
%31 = OpRawAccessChainNV %18 %9 %20 %16 %15 RobustnessPerElementNV
%32 = OpCompositeConstruct %17 %26 %28 %29 %30
OpStore %31 %32 Aligned 16
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/resources/acceleration-structure.bindless.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

struct _15
{
    vec4 _m0;
};

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform accelerationStructureEXT _12[];
layout(location = 0) rayPayloadEXT _15 _17;
layout(location = 1) rayPayloadEXT _15 _18;
layout(location = 2) rayPayloadEXT _15 _19;

void main()
{
    _19._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(_12[registers._m0 + 110u], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    _18._m0 = _19._m0;
    traceRayEXT(_12[registers._m0 + 3u], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    _17._m0 = _18._m0;
    traceRayEXT(_12[nonuniformEXT((registers._m0 + 100u) + uint(int(_18._m0.w)))], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %17 %18 %19
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 ""
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %58 NonUniform
OpDecorate %59 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeAccelerationStructureKHR
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeStruct %14
%16 = OpTypePointer RayPayloadKHR %15
%17 = OpVariable %16 RayPayloadKHR
%18 = OpVariable %16 RayPayloadKHR
%19 = OpVariable %16 RayPayloadKHR
%20 = OpTypePointer UniformConstant %9
%22 = OpTypePointer PushConstant %5
%24 = OpConstant %5 0
%27 = OpConstant %5 110
%29 = OpTypePointer RayPayloadKHR %14
%31 = OpConstant %13 1
%32 = OpConstant %13 2
%33 = OpConstant %13 3
%34 = OpConstant %13 4
%35 = OpConstantComposite %14 %31 %32 %33 %34
%36 = OpConstant %13 0
%37 = OpTypeVector %13 3
%45 = OpConstant %5 3
%57 = OpConstant %5 100
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %63
%63 = OpLabel
%23 = OpAccessChain %22 %8 %24
%25 = OpLoad %5 %23
%26 = OpIAdd %5 %25 %27
%21 = OpAccessChain %20 %12 %26
%28 = OpLoad %9 %21
%30 = OpInBoundsAccessChain %29 %19 %24
OpStore %30 %35
%38 = OpCompositeConstruct %37 %31 %32 %33
%39 = OpCompositeConstruct %37 %36 %36 %31
OpTraceRayKHR %28 %24 %24 %24 %24 %24 %38 %31 %39 %34 %19
%40 = OpLoad %14 %30
%42 = OpAccessChain %22 %8 %24
%43 = OpLoad %5 %42
%44 = OpIAdd %5 %43 %45
%41 = OpAccessChain %20 %12 %44
%46 = OpLoad %9 %41
%47 = OpInBoundsAccessChain %29 %18 %24
OpStore %47 %40
%48 = OpCompositeConstruct %37 %31 %32 %33
%49 = OpCompositeConstruct %37 %36 %36 %31
OpTraceRayKHR %46 %24 %24 %24 %24 %24 %48 %31 %49 %34 %18
%50 = OpLoad %14 %47
%51 = OpCompositeExtract %13 %50 3
%52 = OpConvertFToS %5 %51
%54 = OpAccessChain %22 %8 %24
%55 = OpLoad %5 %54
%56 = OpIAdd %5 %55 %57
%58 = OpIAdd %5 %56 %52
%53 = OpAccessChain %20 %12 %58
%59 = OpLoad %9 %53
%60 = OpInBoundsAccessChain %29 %17 %24
OpStore %60 %50
%61 = OpCompositeConstruct %37 %31 %32 %33
%62 = OpCompositeConstruct %37 %36 %36 %31
OpTraceRayKHR %59 %24 %24 %24 %24 %24 %61 %31 %62 %34 %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/acceleration-structure.bindless.ssbo-rtas.local-root-signature.raw-va-stride-offset.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _23
{
    vec4 _m0;
};

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _20;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) rayPayloadEXT _23 _25;
layout(location = 1) rayPayloadEXT _23 _26;
layout(location = 2) rayPayloadEXT _23 _27;
layout(location = 3) rayPayloadEXT _23 _28;

void main()
{
    _28._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(accelerationStructureEXT(_20._m0[(4u * subgroupBroadcastFirst(registers._m0 + 110u)) + 3u]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 3);
    _27._m0 = _28._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[(4u * subgroupBroadcastFirst(registers._m0 + 3u)) + 3u]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    _26._m0 = _27._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[(4u * ((registers._m0 + 100u) + uint(int(_27._m0.w)))) + 3u]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    _25._m0 = _26._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[(4u * subgroupBroadcastFirst((SBT._m7.x >> 6u) + 210u)) + 3u]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 104
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %16 %20 %25 %26 %27 %28
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %14 "SBTBlock"
OpName %16 "SBT"
OpName %18 "RTASHeap"
OpName %23 ""
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 4
OpDecorate %12 ArrayStride 4
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpMemberDecorate %14 1 Offset 20
OpMemberDecorate %14 2 Offset 48
OpMemberDecorate %14 3 Offset 56
OpMemberDecorate %14 4 Offset 64
OpMemberDecorate %14 5 Offset 72
OpMemberDecorate %14 6 Offset 80
OpMemberDecorate %14 7 Offset 88
OpMemberDecorate %14 8 Offset 96
OpMemberDecorate %14 9 Offset 104
OpMemberDecorate %14 10 Offset 112
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 5
%10 = OpTypeArray %5 %9
%11 = OpConstant %5 6
%12 = OpTypeArray %5 %11
%13 = OpTypeVector %5 2
%14 = OpTypeStruct %10 %12 %13 %13 %13 %13 %13 %13 %13 %13 %13
%15 = OpTypePointer ShaderRecordBufferKHR %14
%16 = OpVariable %15 ShaderRecordBufferKHR
%17 = OpTypeRuntimeArray %13
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeStruct %22
%24 = OpTypePointer RayPayloadKHR %23
%25 = OpVariable %24 RayPayloadKHR
%26 = OpVariable %24 RayPayloadKHR
%27 = OpVariable %24 RayPayloadKHR
%28 = OpVariable %24 RayPayloadKHR
%29 = OpTypePointer PushConstant %5
%31 = OpConstant %5 0
%34 = OpConstant %5 110
%36 = OpConstant %5 3
%38 = OpConstant %5 4
%40 = OpTypePointer StorageBuffer %13
%43 = OpTypeAccelerationStructureKHR
%45 = OpTypePointer RayPayloadKHR %22
%47 = OpConstant %21 1
%48 = OpConstant %21 2
%49 = OpConstant %21 3
%50 = OpConstant %21 4
%51 = OpConstantComposite %22 %47 %48 %49 %50
%52 = OpConstant %21 0
%53 = OpTypeVector %21 3
%75 = OpConstant %5 100
%86 = OpTypePointer ShaderRecordBufferKHR %5
%88 = OpConstant %5 7
%92 = OpConstant %5 210
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %102
%102 = OpLabel
%30 = OpAccessChain %29 %8 %31
%32 = OpLoad %5 %30
%33 = OpIAdd %5 %32 %34
%35 = OpGroupNonUniformBroadcastFirst %5 %36 %33
%37 = OpIMul %5 %38 %35
%39 = OpIAdd %5 %37 %36
%41 = OpAccessChain %40 %20 %31 %39
%42 = OpLoad %13 %41
%44 = OpConvertUToAccelerationStructureKHR %43 %42
%46 = OpInBoundsAccessChain %45 %28 %31
OpStore %46 %51
%54 = OpCompositeConstruct %53 %47 %48 %49
%55 = OpCompositeConstruct %53 %52 %52 %47
OpTraceRayKHR %44 %31 %31 %31 %31 %31 %54 %47 %55 %50 %28
%56 = OpLoad %22 %46
%57 = OpAccessChain %29 %8 %31
%58 = OpLoad %5 %57
%59 = OpIAdd %5 %58 %36
%60 = OpGroupNonUniformBroadcastFirst %5 %36 %59
%61 = OpIMul %5 %38 %60
%62 = OpIAdd %5 %61 %36
%63 = OpAccessChain %40 %20 %31 %62
%64 = OpLoad %13 %63
%65 = OpConvertUToAccelerationStructureKHR %43 %64
%66 = OpInBoundsAccessChain %45 %27 %31
OpStore %66 %56
%67 = OpCompositeConstruct %53 %47 %48 %49
%68 = OpCompositeConstruct %53 %52 %52 %47
OpTraceRayKHR %65 %31 %31 %31 %31 %31 %67 %47 %68 %50 %27
%69 = OpLoad %22 %66
%70 = OpCompositeExtract %21 %69 3
%71 = OpConvertFToS %5 %70
%72 = OpAccessChain %29 %8 %31
%73 = OpLoad %5 %72
%74 = OpIAdd %5 %73 %75
%76 = OpIAdd %5 %74 %71
%77 = OpIMul %5 %38 %76
%78 = OpIAdd %5 %77 %36
%79 = OpAccessChain %40 %20 %31 %78
%80 = OpLoad %13 %79
%81 = OpConvertUToAccelerationStructureKHR %43 %80
%82 = OpInBoundsAccessChain %45 %26 %31
OpStore %82 %69
%83 = OpCompositeConstruct %53 %47 %48 %49
%84 = OpCompositeConstruct %53 %52 %52 %47
OpTraceRayKHR %81 %31 %31 %31 %31 %31 %83 %47 %84 %50 %26
%85 = OpLoad %22 %82
%87 = OpAccessChain %86 %16 %88 %31
%89 = OpLoad %5 %87
%90 = OpShiftRightLogical %5 %89 %11
%91 = OpIAdd %5 %90 %92
%93 = OpGroupNonUniformBroadcastFirst %5 %36 %91
%94 = OpIMul %5 %38 %93
%95 = OpIAdd %5 %94 %36
%96 = OpAccessChain %40 %20 %31 %95
%97 = OpLoad %13 %96
%98 = OpConvertUToAccelerationStructureKHR %43 %97
%99 = OpInBoundsAccessChain %45 %25 %31
OpStore %99 %85
%100 = OpCompositeConstruct %53 %47 %48 %49
%101 = OpCompositeConstruct %53 %52 %52 %47
OpTraceRayKHR %98 %31 %31 %31 %31 %31 %100 %47 %101 %50 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/acceleration-structure.bindless.ssbo-rtas.local-root-signature.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _23
{
    vec4 _m0;
};

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _20;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) rayPayloadEXT _23 _25;
layout(location = 1) rayPayloadEXT _23 _26;
layout(location = 2) rayPayloadEXT _23 _27;
layout(location = 3) rayPayloadEXT _23 _28;

void main()
{
    _28._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(accelerationStructureEXT(_20._m0[subgroupBroadcastFirst(registers._m0 + 110u)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 3);
    _27._m0 = _28._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[subgroupBroadcastFirst(registers._m0 + 3u)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    _26._m0 = _27._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[(registers._m0 + 100u) + uint(int(_27._m0.w))]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    _25._m0 = _26._m0;
    traceRayEXT(accelerationStructureEXT(_20._m0[subgroupBroadcastFirst((SBT._m7.x >> 6u) + 210u)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %16 %20 %25 %26 %27 %28
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %14 "SBTBlock"
OpName %16 "SBT"
OpName %18 "RTASHeap"
OpName %23 ""
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 4
OpDecorate %12 ArrayStride 4
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpMemberDecorate %14 1 Offset 20
OpMemberDecorate %14 2 Offset 48
OpMemberDecorate %14 3 Offset 56
OpMemberDecorate %14 4 Offset 64
OpMemberDecorate %14 5 Offset 72
OpMemberDecorate %14 6 Offset 80
OpMemberDecorate %14 7 Offset 88
OpMemberDecorate %14 8 Offset 96
OpMemberDecorate %14 9 Offset 104
OpMemberDecorate %14 10 Offset 112
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 5
%10 = OpTypeArray %5 %9
%11 = OpConstant %5 6
%12 = OpTypeArray %5 %11
%13 = OpTypeVector %5 2
%14 = OpTypeStruct %10 %12 %13 %13 %13 %13 %13 %13 %13 %13 %13
%15 = OpTypePointer ShaderRecordBufferKHR %14
%16 = OpVariable %15 ShaderRecordBufferKHR
%17 = OpTypeRuntimeArray %13
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeStruct %22
%24 = OpTypePointer RayPayloadKHR %23
%25 = OpVariable %24 RayPayloadKHR
%26 = OpVariable %24 RayPayloadKHR
%27 = OpVariable %24 RayPayloadKHR
%28 = OpVariable %24 RayPayloadKHR
%29 = OpTypePointer PushConstant %5
%31 = OpConstant %5 0
%34 = OpConstant %5 110
%36 = OpConstant %5 3
%37 = OpTypePointer StorageBuffer %13
%40 = OpTypeAccelerationStructureKHR
%42 = OpTypePointer RayPayloadKHR %22
%44 = OpConstant %21 1
%45 = OpConstant %21 2
%46 = OpConstant %21 3
%47 = OpConstant %21 4
%48 = OpConstantComposite %22 %44 %45 %46 %47
%49 = OpConstant %21 0
%50 = OpTypeVector %21 3
%70 = OpConstant %5 100
%79 = OpTypePointer ShaderRecordBufferKHR %5
%81 = OpConstant %5 7
%85 = OpConstant %5 210
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %93
%93 = OpLabel
%30 = OpAccessChain %29 %8 %31
%32 = OpLoad %5 %30
%33 = OpIAdd %5 %32 %34
%35 = OpGroupNonUniformBroadcastFirst %5 %36 %33
%38 = OpAccessChain %37 %20 %31 %35
%39 = OpLoad %13 %38
%41 = OpConvertUToAccelerationStructureKHR %40 %39
%43 = OpInBoundsAccessChain %42 %28 %31
OpStore %43 %48
%51 = OpCompositeConstruct %50 %44 %45 %46
%52 = OpCompositeConstruct %50 %49 %49 %44
OpTraceRayKHR %41 %31 %31 %31 %31 %31 %51 %44 %52 %47 %28
%53 = OpLoad %22 %43
%54 = OpAccessChain %29 %8 %31
%55 = OpLoad %5 %54
%56 = OpIAdd %5 %55 %36
%57 = OpGroupNonUniformBroadcastFirst %5 %36 %56
%58 = OpAccessChain %37 %20 %31 %57
%59 = OpLoad %13 %58
%60 = OpConvertUToAccelerationStructureKHR %40 %59
%61 = OpInBoundsAccessChain %42 %27 %31
OpStore %61 %53
%62 = OpCompositeConstruct %50 %44 %45 %46
%63 = OpCompositeConstruct %50 %49 %49 %44
OpTraceRayKHR %60 %31 %31 %31 %31 %31 %62 %44 %63 %47 %27
%64 = OpLoad %22 %61
%65 = OpCompositeExtract %21 %64 3
%66 = OpConvertFToS %5 %65
%67 = OpAccessChain %29 %8 %31
%68 = OpLoad %5 %67
%69 = OpIAdd %5 %68 %70
%71 = OpIAdd %5 %69 %66
%72 = OpAccessChain %37 %20 %31 %71
%73 = OpLoad %13 %72
%74 = OpConvertUToAccelerationStructureKHR %40 %73
%75 = OpInBoundsAccessChain %42 %26 %31
OpStore %75 %64
%76 = OpCompositeConstruct %50 %44 %45 %46
%77 = OpCompositeConstruct %50 %49 %49 %44
OpTraceRayKHR %74 %31 %31 %31 %31 %31 %76 %44 %77 %47 %26
%78 = OpLoad %22 %75
%80 = OpAccessChain %79 %16 %81 %31
%82 = OpLoad %5 %80
%83 = OpShiftRightLogical %5 %82 %11
%84 = OpIAdd %5 %83 %85
%86 = OpGroupNonUniformBroadcastFirst %5 %36 %84
%87 = OpAccessChain %37 %20 %31 %86
%88 = OpLoad %13 %87
%89 = OpConvertUToAccelerationStructureKHR %40 %88
%90 = OpInBoundsAccessChain %42 %25 %31
OpStore %90 %78
%91 = OpCompositeConstruct %50 %44 %45 %46
%92 = OpCompositeConstruct %50 %49 %49 %44
OpTraceRayKHR %89 %31 %31 %31 %31 %31 %91 %44 %92 %47 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/acceleration-structure.local-root-signature.root-descriptor.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

struct _23
{
    vec4 _m0;
};

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(set = 0, binding = 1) uniform accelerationStructureEXT AS_Plain;
layout(location = 0) rayPayloadEXT _23 _25;
layout(location = 1) rayPayloadEXT _23 _26;
layout(location = 2) rayPayloadEXT _23 _27;

void main()
{
    _27._m0 = vec4(1.0, 2.0, 3.0, 4.0);
    traceRayEXT(accelerationStructureEXT(SBT._m2), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 2);
    _26._m0 = _27._m0;
    traceRayEXT(accelerationStructureEXT(registers._m1), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    _25._m0 = _26._m0;
    traceRayEXT(AS_Plain, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %9 %16 %20 %25 %26 %27
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %14 "SBTBlock"
OpName %16 "SBT"
OpName %20 "AS_Plain"
OpName %23 ""
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 4
OpDecorate %13 ArrayStride 4
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpMemberDecorate %14 1 Offset 20
OpMemberDecorate %14 2 Offset 48
OpMemberDecorate %14 3 Offset 56
OpMemberDecorate %14 4 Offset 64
OpMemberDecorate %14 5 Offset 72
OpMemberDecorate %14 6 Offset 80
OpMemberDecorate %14 7 Offset 88
OpMemberDecorate %14 8 Offset 96
OpMemberDecorate %14 9 Offset 104
OpMemberDecorate %14 10 Offset 112
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpConstant %5 5
%11 = OpTypeArray %5 %10
%12 = OpConstant %5 6
%13 = OpTypeArray %5 %12
%14 = OpTypeStruct %11 %13 %6 %6 %6 %6 %6 %6 %6 %6 %6
%15 = OpTypePointer ShaderRecordBufferKHR %14
%16 = OpVariable %15 ShaderRecordBufferKHR
%17 = OpTypeInt 32 1
%18 = OpTypeAccelerationStructureKHR
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeStruct %22
%24 = OpTypePointer RayPayloadKHR %23
%25 = OpVariable %24 RayPayloadKHR
%26 = OpVariable %24 RayPayloadKHR
%27 = OpVariable %24 RayPayloadKHR
%28 = OpTypePointer ShaderRecordBufferKHR %6
%30 = OpConstant %5 2
%33 = OpTypePointer RayPayloadKHR %22
%35 = OpConstant %5 0
%36 = OpConstant %21 1
%37 = OpConstant %21 2
%38 = OpConstant %21 3
%39 = OpConstant %21 4
%40 = OpConstantComposite %22 %36 %37 %38 %39
%41 = OpConstant %21 0
%42 = OpTypeVector %21 3
%46 = OpTypePointer PushConstant %6
%48 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%29 = OpAccessChain %28 %16 %30
%31 = OpLoad %6 %29
%32 = OpConvertUToAccelerationStructureKHR %18 %31
%34 = OpInBoundsAccessChain %33 %27 %35
OpStore %34 %40
%43 = OpCompositeConstruct %42 %36 %37 %38
%44 = OpCompositeConstruct %42 %41 %41 %36
OpTraceRayKHR %32 %35 %35 %35 %35 %35 %43 %36 %44 %39 %27
%45 = OpLoad %22 %34
%47 = OpAccessChain %46 %9 %48
%49 = OpLoad %6 %47
%50 = OpConvertUToAccelerationStructureKHR %18 %49
%51 = OpInBoundsAccessChain %33 %26 %35
OpStore %51 %45
%52 = OpCompositeConstruct %42 %36 %37 %38
%53 = OpCompositeConstruct %42 %41 %41 %36
OpTraceRayKHR %50 %35 %35 %35 %35 %35 %52 %36 %53 %39 %26
%54 = OpLoad %22 %51
%55 = OpLoad %18 %20
%56 = OpInBoundsAccessChain %33 %25 %35
OpStore %56 %54
%57 = OpCompositeConstruct %42 %36 %37 %38
%58 = OpCompositeConstruct %42 %41 %41 %36
OpTraceRayKHR %55 %35 %35 %35 %35 %35 %57 %36 %58 %39 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/basic.input-attachment.frag
================================================
#version 460

layout(input_attachment_index = 0, set = 1000, binding = 0) uniform subpassInput _8;
layout(input_attachment_index = 1, set = 1000, binding = 1) uniform isubpassInput _12;
layout(input_attachment_index = 2, set = 1000, binding = 2) uniform usubpassInput _16;
layout(set = 1001, binding = 3) uniform subpassInput _17;
layout(set = 1001, binding = 4) uniform usubpassInput _18;
layout(input_attachment_index = 5, set = 1000, binding = 5) uniform subpassInputMS _21;
layout(set = 1001, binding = 6) uniform subpassInputMS _22;

layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _47 = subpassLoad(_8);
    uvec4 _57 = uvec4(subpassLoad(_12));
    uvec4 _66 = subpassLoad(_16);
    vec4 _80 = subpassLoad(_21, uint(gl_SampleID));
    float _88 = subpassLoad(_22, uint(gl_SampleID)).x + (float(subpassLoad(_18).x) + subpassLoad(_17).x);
    SV_Target.x = (((_88 + _47.x) + float(int(_57.x))) + float(_66.x)) + _80.x;
    SV_Target.y = (((_88 + _47.y) + float(int(_57.y))) + float(_66.y)) + _80.y;
    SV_Target.z = (((_88 + _47.z) + float(int(_57.z))) + float(_66.z)) + _80.z;
    SV_Target.w = (((_88 + _47.w) + float(int(_57.w))) + float(_66.w)) + _80.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 114
; Schema: 0
OpCapability Shader
OpCapability StorageImageMultisample
OpCapability SampleRateShading
OpCapability InputAttachment
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %25 %27 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %25 "SV_Position"
OpName %27 "SV_SampleIndex"
OpName %29 "SV_Target"
OpDecorate %8 DescriptorSet 1000
OpDecorate %8 Binding 0
OpDecorate %8 InputAttachmentIndex 0
OpDecorate %12 DescriptorSet 1000
OpDecorate %12 Binding 1
OpDecorate %12 InputAttachmentIndex 1
OpDecorate %16 DescriptorSet 1000
OpDecorate %16 Binding 2
OpDecorate %16 InputAttachmentIndex 2
OpDecorate %17 DescriptorSet 1001
OpDecorate %17 Binding 3
OpDecorate %18 DescriptorSet 1001
OpDecorate %18 Binding 4
OpDecorate %21 DescriptorSet 1000
OpDecorate %21 Binding 5
OpDecorate %21 InputAttachmentIndex 5
OpDecorate %22 DescriptorSet 1001
OpDecorate %22 Binding 6
OpDecorate %25 BuiltIn FragCoord
OpDecorate %27 BuiltIn SampleId
OpDecorate %27 Flat
OpDecorate %29 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 SubpassData 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 1
%10 = OpTypeImage %9 SubpassData 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpTypeImage %13 SubpassData 0 0 0 2 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpVariable %7 UniformConstant
%18 = OpVariable %15 UniformConstant
%19 = OpTypeImage %5 SubpassData 0 0 1 2 Unknown
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpVariable %20 UniformConstant
%23 = OpTypeVector %5 4
%24 = OpTypePointer Input %23
%25 = OpVariable %24 Input
%26 = OpTypePointer Input %13
%27 = OpVariable %26 Input
%28 = OpTypePointer Output %23
%29 = OpVariable %28 Output
%37 = OpTypePointer Input %5
%39 = OpConstant %13 0
%42 = OpConstant %13 1
%48 = OpTypeVector %13 2
%49 = OpConstantComposite %48 %39 %39
%54 = OpTypeVector %9 4
%56 = OpTypeVector %13 4
%105 = OpTypePointer Output %5
%109 = OpConstant %13 2
%111 = OpConstant %13 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %112
%112 = OpLabel
%30 = OpLoad %19 %22
%31 = OpLoad %19 %21
%32 = OpLoad %14 %18
%33 = OpLoad %6 %17
%34 = OpLoad %14 %16
%35 = OpLoad %10 %12
%36 = OpLoad %6 %8
%38 = OpAccessChain %37 %25 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %37 %25 %42
%43 = OpLoad %5 %41
%44 = OpLoad %13 %27
%45 = OpConvertFToS %13 %40
%46 = OpConvertFToS %13 %43
%47 = OpImageRead %23 %36 %49 None
%50 = OpCompositeExtract %5 %47 0
%51 = OpCompositeExtract %5 %47 1
%52 = OpCompositeExtract %5 %47 2
%53 = OpCompositeExtract %5 %47 3
%55 = OpImageRead %54 %35 %49 None
%57 = OpBitcast %56 %55
%58 = OpCompositeExtract %13 %57 0
%59 = OpCompositeExtract %13 %57 1
%60 = OpCompositeExtract %13 %57 2
%61 = OpCompositeExtract %13 %57 3
%62 = OpConvertSToF %5 %58
%63 = OpConvertSToF %5 %59
%64 = OpConvertSToF %5 %60
%65 = OpConvertSToF %5 %61
%66 = OpImageRead %56 %34 %49 None
%67 = OpCompositeExtract %13 %66 0
%68 = OpCompositeExtract %13 %66 1
%69 = OpCompositeExtract %13 %66 2
%70 = OpCompositeExtract %13 %66 3
%71 = OpConvertUToF %5 %67
%72 = OpConvertUToF %5 %68
%73 = OpConvertUToF %5 %69
%74 = OpConvertUToF %5 %70
%75 = OpImageRead %23 %33 %49 None
%76 = OpCompositeExtract %5 %75 0
%77 = OpImageRead %56 %32 %49 None
%78 = OpCompositeExtract %13 %77 0
%79 = OpConvertUToF %5 %78
%80 = OpImageRead %23 %31 %49 Sample %44
%81 = OpCompositeExtract %5 %80 0
%82 = OpCompositeExtract %5 %80 1
%83 = OpCompositeExtract %5 %80 2
%84 = OpCompositeExtract %5 %80 3
%85 = OpImageRead %23 %30 %49 Sample %44
%86 = OpCompositeExtract %5 %85 0
%87 = OpFAdd %5 %79 %76
%88 = OpFAdd %5 %86 %87
%89 = OpFAdd %5 %88 %50
%90 = OpFAdd %5 %89 %62
%91 = OpFAdd %5 %90 %71
%92 = OpFAdd %5 %91 %81
%93 = OpFAdd %5 %88 %51
%94 = OpFAdd %5 %93 %63
%95 = OpFAdd %5 %94 %72
%96 = OpFAdd %5 %95 %82
%97 = OpFAdd %5 %88 %52
%98 = OpFAdd %5 %97 %64
%99 = OpFAdd %5 %98 %73
%100 = OpFAdd %5 %99 %83
%101 = OpFAdd %5 %88 %53
%102 = OpFAdd %5 %101 %65
%103 = OpFAdd %5 %102 %74
%104 = OpFAdd %5 %103 %84
%106 = OpAccessChain %105 %29 %39
OpStore %106 %92
%107 = OpAccessChain %105 %29 %42
OpStore %107 %96
%108 = OpAccessChain %105 %29 %109
OpStore %108 %100
%110 = OpAccessChain %105 %29 %111
OpStore %110 %104
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-16bit.ssbo.bindless.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 4, binding = 0, std430) buffer SSBO
{
    uint16_t _m0[];
} _14[];

layout(set = 4, binding = 0, std430) buffer _17_20
{
    u16vec2 _m0[];
} _20[];

layout(set = 4, binding = 0, scalar) buffer _23_26
{
    u16vec3 _m0[];
} _26[];

layout(set = 4, binding = 0, std430) buffer _29_32
{
    u16vec4 _m0[];
} _32[];

layout(set = 4, binding = 0, std430) buffer _34_37
{
    uint16_t _m0[];
} _37[];

layout(set = 4, binding = 0, std430) buffer _39_42
{
    u16vec2 _m0[];
} _42[];

layout(set = 4, binding = 0, scalar) buffer _44_47
{
    u16vec3 _m0[];
} _47[];

layout(set = 4, binding = 0, std430) buffer _49_52
{
    u16vec4 _m0[];
} _52[];

layout(set = 4, binding = 0, std430) buffer _54_57
{
    uint _m0[];
} _57[];

layout(set = 4, binding = 0, std430) buffer _60_63
{
    uvec4 _m0[];
} _63[];

layout(set = 4, binding = 0, std430) writeonly buffer _65_68
{
    uint16_t _m0[];
} _68[];

layout(set = 4, binding = 0, std430) writeonly buffer _70_73
{
    u16vec2 _m0[];
} _73[];

layout(set = 4, binding = 0, scalar) writeonly buffer _75_78
{
    u16vec3 _m0[];
} _78[];

layout(set = 4, binding = 0, std430) writeonly buffer _80_83
{
    u16vec4 _m0[];
} _83[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _123 = registers._m4 + 5u;
    uint _128 = registers._m4 + 4u;
    uint _133 = registers._m4 + 4u;
    uint _138 = registers._m4 + 4u;
    uint _143 = registers._m4 + 4u;
    uint _158 = registers._m4 + 3u;
    uint _164 = registers._m4 + 2u;
    uint _170 = registers._m4 + 1u;
    _14[registers._m4]._m0[gl_GlobalInvocationID.x] = float16BitsToUint16(uint16BitsToFloat16(_14[registers._m4]._m0[gl_GlobalInvocationID.x]) + float16_t(1.0));
    _20[_170]._m0[gl_GlobalInvocationID.x] = u16vec2(_20[_170]._m0[gl_GlobalInvocationID.x].x + 2us, _20[_170]._m0[gl_GlobalInvocationID.x].y + 2us);
    f16vec3 _206 = uint16BitsToFloat16(_26[_164]._m0[gl_GlobalInvocationID.x]);
    _26[_164]._m0[gl_GlobalInvocationID.x] = u16vec3(float16BitsToUint16(_206.x + float16_t(3.0)), float16BitsToUint16(_206.y + float16_t(3.0)), float16BitsToUint16(_206.z + float16_t(3.0)));
    _32[_158]._m0[gl_GlobalInvocationID.x] = u16vec4(_32[_158]._m0[gl_GlobalInvocationID.x].x + 4us, _32[_158]._m0[gl_GlobalInvocationID.x].y + 4us, _32[_158]._m0[gl_GlobalInvocationID.x].z + 4us, _32[_158]._m0[gl_GlobalInvocationID.x].w + 4us);
    uint16_t _235 = _37[_128]._m0[gl_GlobalInvocationID.x];
    float _238 = float(uint16BitsToFloat16(_235));
    u16vec2 _241 = _42[_133]._m0[gl_GlobalInvocationID.x];
    f16vec2 _243 = uint16BitsToFloat16(_241);
    float _247 = float(_243.y);
    u16vec3 _251 = _47[_138]._m0[gl_GlobalInvocationID.x];
    f16vec3 _252 = uint16BitsToFloat16(_251);
    float _256 = float(_252.x);
    u16vec4 _262 = _52[_143]._m0[gl_GlobalInvocationID.x];
    f16vec4 _264 = uint16BitsToFloat16(_262);
    uint16_t _275 = _37[_128]._m0[gl_GlobalInvocationID.x];
    float _276 = float(_275);
    u16vec2 _279 = _42[_133]._m0[gl_GlobalInvocationID.x];
    float _283 = float(_279.y);
    u16vec3 _286 = _47[_138]._m0[gl_GlobalInvocationID.x];
    float _290 = float(_286.x);
    u16vec4 _295 = _52[_143]._m0[gl_GlobalInvocationID.x];
    _68[registers._m4 + 5u]._m0[gl_GlobalInvocationID.x] = float16BitsToUint16(float16_t(1.0));
    _73[registers._m4 + 5u]._m0[gl_GlobalInvocationID.x] = u16vec2(float16BitsToUint16(float16_t(2.0)), float16BitsToUint16(float16_t(4.0)));
    _78[registers._m4 + 5u]._m0[gl_GlobalInvocationID.x] = u16vec3(float16BitsToUint16(float16_t(4.0)), float16BitsToUint16(float16_t(5.0)), float16BitsToUint16(float16_t(6.0)));
    _83[_123]._m0[gl_GlobalInvocationID.x] = u16vec4(float16BitsToUint16(float16_t(7.0)), float16BitsToUint16(float16_t(8.0)), float16BitsToUint16(float16_t(9.0)), float16BitsToUint16(float16_t(10.0)));
    _68[registers._m4 + 6u]._m0[gl_GlobalInvocationID.x] = 1us;
    _73[registers._m4 + 6u]._m0[gl_GlobalInvocationID.x] = u16vec2(2us, 4us);
    _78[registers._m4 + 6u]._m0[gl_GlobalInvocationID.x] = u16vec3(4us, 5us, 6us);
    _83[registers._m4 + 6u]._m0[gl_GlobalInvocationID.x] = u16vec4(7us, 8us, 9us, 10us);
    uint _346 = _57[registers._m4 + 4u]._m0[gl_GlobalInvocationID.x];
    float _347 = uintBitsToFloat(_346);
    uint _349 = gl_GlobalInvocationID.x + 0u;
    uint16_t _360 = _37[nonuniformEXT(registers._m4 + _349)]._m0[gl_GlobalInvocationID.x * 2u];
    float _362 = float(uint16BitsToFloat16(_360));
    float _365 = ((float(_243.x) + _238) + _276) + float(_279.x);
    float _371 = (((((_365 + _256) + float(_264.x)) + _290) + float(_295.x)) + _347) + _362;
    float _377 = (((((_365 + float(_252.y)) + float(_264.y)) + float(_286.y)) + float(_295.y)) + _347) + _362;
    _42[nonuniformEXT(registers._m4 + _349)]._m0[gl_GlobalInvocationID.x * 2u] = u16vec2(float16BitsToUint16(float16_t(_371)), float16BitsToUint16(float16_t(_377)));
    _63[registers._m4 + 4u]._m0[gl_GlobalInvocationID.x] = uvec4(uint((float(uint(_14[registers._m4]._m0.length()) / 1u) + _371) + float(uint(_83[_123]._m0.length()) * 8u)), uint(_377 + 2.0), uint((((((((((_247 + _238) + float(_252.z)) + float(_264.z)) + _276) + _283) + float(_286.z)) + float(_295.z)) + _347) + _362) + float(uint(_32[_158]._m0.length()) / 1u)), uint((((((((((_238 + 8.0) + _247) + _256) + float(_264.w)) + _276) + _283) + _290) + float(_295.w)) + _347) + _362));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 422
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %178
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %17 "SSBO"
OpName %23 "SSBO"
OpName %29 "SSBO"
OpName %34 "SSBO"
OpName %39 "SSBO"
OpName %44 "SSBO"
OpName %49 "SSBO"
OpName %54 "SSBO"
OpName %60 "SSBO"
OpName %65 "SSBO"
OpName %70 "SSBO"
OpName %75 "SSBO"
OpName %80 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 2
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 4
OpDecorate %14 Binding 0
OpDecorate %16 ArrayStride 4
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 DescriptorSet 4
OpDecorate %20 Binding 0
OpDecorate %22 ArrayStride 6
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %26 DescriptorSet 4
OpDecorate %26 Binding 0
OpDecorate %28 ArrayStride 8
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %32 DescriptorSet 4
OpDecorate %32 Binding 0
OpDecorate %33 ArrayStride 2
OpMemberDecorate %34 0 Offset 0
OpDecorate %34 Block
OpDecorate %37 DescriptorSet 4
OpDecorate %37 Binding 0
OpDecorate %37 Aliased
OpDecorate %38 ArrayStride 4
OpMemberDecorate %39 0 Offset 0
OpDecorate %39 Block
OpDecorate %42 DescriptorSet 4
OpDecorate %42 Binding 0
OpDecorate %42 Aliased
OpDecorate %43 ArrayStride 6
OpMemberDecorate %44 0 Offset 0
OpDecorate %44 Block
OpDecorate %47 DescriptorSet 4
OpDecorate %47 Binding 0
OpDecorate %47 Aliased
OpDecorate %48 ArrayStride 8
OpMemberDecorate %49 0 Offset 0
OpDecorate %49 Block
OpDecorate %52 DescriptorSet 4
OpDecorate %52 Binding 0
OpDecorate %52 Aliased
OpDecorate %53 ArrayStride 4
OpMemberDecorate %54 0 Offset 0
OpDecorate %54 Block
OpDecorate %57 DescriptorSet 4
OpDecorate %57 Binding 0
OpDecorate %57 Aliased
OpDecorate %59 ArrayStride 16
OpMemberDecorate %60 0 Offset 0
OpDecorate %60 Block
OpDecorate %63 DescriptorSet 4
OpDecorate %63 Binding 0
OpDecorate %63 Aliased
OpDecorate %64 ArrayStride 2
OpMemberDecorate %65 0 Offset 0
OpDecorate %65 Block
OpDecorate %68 DescriptorSet 4
OpDecorate %68 Binding 0
OpDecorate %68 NonReadable
OpDecorate %68 Aliased
OpDecorate %69 ArrayStride 4
OpMemberDecorate %70 0 Offset 0
OpDecorate %70 Block
OpDecorate %73 DescriptorSet 4
OpDecorate %73 Binding 0
OpDecorate %73 NonReadable
OpDecorate %73 Aliased
OpDecorate %74 ArrayStride 6
OpMemberDecorate %75 0 Offset 0
OpDecorate %75 Block
OpDecorate %78 DescriptorSet 4
OpDecorate %78 Binding 0
OpDecorate %78 NonReadable
OpDecorate %78 Aliased
OpDecorate %79 ArrayStride 8
OpMemberDecorate %80 0 Offset 0
OpDecorate %80 Block
OpDecorate %83 DescriptorSet 4
OpDecorate %83 Binding 0
OpDecorate %83 NonReadable
OpDecorate %83 Aliased
OpDecorate %178 BuiltIn GlobalInvocationId
OpDecorate %353 NonUniform
OpDecorate %350 NonUniform
OpDecorate %357 NonUniform
OpDecorate %354 NonUniform
OpDecorate %359 NonUniform
OpDecorate %385 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeInt 16 0
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %9 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypeRuntimeArray %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeVector %9 3
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeVector %9 4
%28 = OpTypeRuntimeArray %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpTypeRuntimeArray %9
%34 = OpTypeStruct %33
%35 = OpTypeRuntimeArray %34
%36 = OpTypePointer StorageBuffer %35
%37 = OpVariable %36 StorageBuffer
%38 = OpTypeRuntimeArray %15
%39 = OpTypeStruct %38
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer StorageBuffer %40
%42 = OpVariable %41 StorageBuffer
%43 = OpTypeRuntimeArray %21
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer StorageBuffer %45
%47 = OpVariable %46 StorageBuffer
%48 = OpTypeRuntimeArray %27
%49 = OpTypeStruct %48
%50 = OpTypeRuntimeArray %49
%51 = OpTypePointer StorageBuffer %50
%52 = OpVariable %51 StorageBuffer
%53 = OpTypeRuntimeArray %5
%54 = OpTypeStruct %53
%55 = OpTypeRuntimeArray %54
%56 = OpTypePointer StorageBuffer %55
%57 = OpVariable %56 StorageBuffer
%58 = OpTypeVector %5 4
%59 = OpTypeRuntimeArray %58
%60 = OpTypeStruct %59
%61 = OpTypeRuntimeArray %60
%62 = OpTypePointer StorageBuffer %61
%63 = OpVariable %62 StorageBuffer
%64 = OpTypeRuntimeArray %9
%65 = OpTypeStruct %64
%66 = OpTypeRuntimeArray %65
%67 = OpTypePointer StorageBuffer %66
%68 = OpVariable %67 StorageBuffer
%69 = OpTypeRuntimeArray %15
%70 = OpTypeStruct %69
%71 = OpTypeRuntimeArray %70
%72 = OpTypePointer StorageBuffer %71
%73 = OpVariable %72 StorageBuffer
%74 = OpTypeRuntimeArray %21
%75 = OpTypeStruct %74
%76 = OpTypeRuntimeArray %75
%77 = OpTypePointer StorageBuffer %76
%78 = OpVariable %77 StorageBuffer
%79 = OpTypeRuntimeArray %27
%80 = OpTypeStruct %79
%81 = OpTypeRuntimeArray %80
%82 = OpTypePointer StorageBuffer %81
%83 = OpVariable %82 StorageBuffer
%84 = OpTypePointer StorageBuffer %65
%86 = OpTypePointer PushConstant %5
%88 = OpConstant %5 4
%91 = OpConstant %5 6
%92 = OpTypePointer StorageBuffer %70
%97 = OpTypePointer StorageBuffer %75
%102 = OpTypePointer StorageBuffer %80
%111 = OpConstant %5 5
%124 = OpTypePointer StorageBuffer %34
%129 = OpTypePointer StorageBuffer %39
%134 = OpTypePointer StorageBuffer %44
%139 = OpTypePointer StorageBuffer %49
%144 = OpTypePointer StorageBuffer %54
%149 = OpTypePointer StorageBuffer %60
%154 = OpTypePointer StorageBuffer %29
%159 = OpConstant %5 3
%160 = OpTypePointer StorageBuffer %23
%165 = OpConstant %5 2
%166 = OpTypePointer StorageBuffer %17
%171 = OpConstant %5 1
%172 = OpTypePointer StorageBuffer %11
%176 = OpTypeVector %5 3
%177 = OpTypePointer Input %176
%178 = OpVariable %177 Input
%179 = OpTypePointer Input %5
%181 = OpConstant %5 0
%183 = OpTypePointer StorageBuffer %9
%186 = OpTypeFloat 16
%189 = OpConstant %186 0x1p+0
%192 = OpTypePointer StorageBuffer %15
%198 = OpConstant %9 2
%202 = OpTypePointer StorageBuffer %21
%205 = OpTypeVector %186 3
%211 = OpConstant %186 0x1.8p+1
%219 = OpTypePointer StorageBuffer %27
%227 = OpConstant %9 4
%237 = OpTypeFloat 32
%242 = OpTypeVector %186 2
%263 = OpTypeVector %186 4
%307 = OpConstant %186 0x1p+1
%309 = OpConstant %186 0x1p+2
%314 = OpConstant %186 0x1.4p+2
%316 = OpConstant %186 0x1.8p+2
%320 = OpConstant %186 0x1.cp+2
%322 = OpConstant %186 0x1p+3
%324 = OpConstant %186 0x1.2p+3
%326 = OpConstant %186 0x1.4p+3
%330 = OpConstant %9 1
%334 = OpConstant %9 5
%335 = OpConstant %9 6
%338 = OpConstant %9 7
%339 = OpConstant %9 8
%340 = OpConstant %9 9
%341 = OpConstant %9 10
%344 = OpTypePointer StorageBuffer %5
%391 = OpConstant %237 2
%397 = OpConstant %237 8
%409 = OpConstant %5 8
%418 = OpTypePointer StorageBuffer %58
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %420
%420 = OpLabel
%87 = OpAccessChain %86 %8 %88
%89 = OpLoad %5 %87
%90 = OpIAdd %5 %89 %91
%85 = OpAccessChain %84 %68 %90
%94 = OpAccessChain %86 %8 %88
%95 = OpLoad %5 %94
%96 = OpIAdd %5 %95 %91
%93 = OpAccessChain %92 %73 %96
%99 = OpAccessChain %86 %8 %88
%100 = OpLoad %5 %99
%101 = OpIAdd %5 %100 %91
%98 = OpAccessChain %97 %78 %101
%104 = OpAccessChain %86 %8 %88
%105 = OpLoad %5 %104
%106 = OpIAdd %5 %105 %91
%103 = OpAccessChain %102 %83 %106
%108 = OpAccessChain %86 %8 %88
%109 = OpLoad %5 %108
%110 = OpIAdd %5 %109 %111
%107 = OpAccessChain %84 %68 %110
%113 = OpAccessChain %86 %8 %88
%114 = OpLoad %5 %113
%115 = OpIAdd %5 %114 %111
%112 = OpAccessChain %92 %73 %115
%117 = OpAccessChain %86 %8 %88
%118 = OpLoad %5 %117
%119 = OpIAdd %5 %118 %111
%116 = OpAccessChain %97 %78 %119
%121 = OpAccessChain %86 %8 %88
%122 = OpLoad %5 %121
%123 = OpIAdd %5 %122 %111
%120 = OpAccessChain %102 %83 %123
%126 = OpAccessChain %86 %8 %88
%127 = OpLoad %5 %126
%128 = OpIAdd %5 %127 %88
%125 = OpAccessChain %124 %37 %128
%131 = OpAccessChain %86 %8 %88
%132 = OpLoad %5 %131
%133 = OpIAdd %5 %132 %88
%130 = OpAccessChain %129 %42 %133
%136 = OpAccessChain %86 %8 %88
%137 = OpLoad %5 %136
%138 = OpIAdd %5 %137 %88
%135 = OpAccessChain %134 %47 %138
%141 = OpAccessChain %86 %8 %88
%142 = OpLoad %5 %141
%143 = OpIAdd %5 %142 %88
%140 = OpAccessChain %139 %52 %143
%146 = OpAccessChain %86 %8 %88
%147 = OpLoad %5 %146
%148 = OpIAdd %5 %147 %88
%145 = OpAccessChain %144 %57 %148
%151 = OpAccessChain %86 %8 %88
%152 = OpLoad %5 %151
%153 = OpIAdd %5 %152 %88
%150 = OpAccessChain %149 %63 %153
%156 = OpAccessChain %86 %8 %88
%157 = OpLoad %5 %156
%158 = OpIAdd %5 %157 %159
%155 = OpAccessChain %154 %32 %158
%162 = OpAccessChain %86 %8 %88
%163 = OpLoad %5 %162
%164 = OpIAdd %5 %163 %165
%161 = OpAccessChain %160 %26 %164
%168 = OpAccessChain %86 %8 %88
%169 = OpLoad %5 %168
%170 = OpIAdd %5 %169 %171
%167 = OpAccessChain %166 %20 %170
%174 = OpAccessChain %86 %8 %88
%175 = OpLoad %5 %174
%173 = OpAccessChain %172 %14 %175
%180 = OpAccessChain %179 %178 %181
%182 = OpLoad %5 %180
%184 = OpAccessChain %183 %173 %181 %182
%185 = OpLoad %9 %184
%187 = OpBitcast %186 %185
%188 = OpFAdd %186 %187 %189
%190 = OpBitcast %9 %188
%191 = OpAccessChain %183 %173 %181 %182
OpStore %191 %190
%193 = OpAccessChain %192 %167 %181 %182
%194 = OpLoad %15 %193
%195 = OpCompositeExtract %9 %194 0
%196 = OpCompositeExtract %9 %194 1
%197 = OpIAdd %9 %195 %198
%199 = OpIAdd %9 %196 %198
%200 = OpCompositeConstruct %15 %197 %199
%201 = OpAccessChain %192 %167 %181 %182
OpStore %201 %200
%203 = OpAccessChain %202 %161 %181 %182
%204 = OpLoad %21 %203
%206 = OpBitcast %205 %204
%207 = OpCompositeExtract %186 %206 0
%208 = OpCompositeExtract %186 %206 1
%209 = OpCompositeExtract %186 %206 2
%210 = OpFAdd %186 %207 %211
%212 = OpFAdd %186 %208 %211
%213 = OpFAdd %186 %209 %211
%214 = OpBitcast %9 %210
%215 = OpBitcast %9 %212
%216 = OpBitcast %9 %213
%217 = OpCompositeConstruct %21 %214 %215 %216
%218 = OpAccessChain %202 %161 %181 %182
OpStore %218 %217
%220 = OpAccessChain %219 %155 %181 %182
%221 = OpLoad %27 %220
%222 = OpCompositeExtract %9 %221 0
%223 = OpCompositeExtract %9 %221 1
%224 = OpCompositeExtract %9 %221 2
%225 = OpCompositeExtract %9 %221 3
%226 = OpIAdd %9 %222 %227
%228 = OpIAdd %9 %223 %227
%229 = OpIAdd %9 %224 %227
%230 = OpIAdd %9 %225 %227
%231 = OpCompositeConstruct %27 %226 %228 %229 %230
%232 = OpAccessChain %219 %155 %181 %182
OpStore %232 %231
%233 = OpShiftLeftLogical %5 %182 %171
%234 = OpAccessChain %183 %125 %181 %182
%235 = OpLoad %9 %234
%236 = OpBitcast %186 %235
%238 = OpFConvert %237 %236
%239 = OpShiftLeftLogical %5 %182 %165
%240 = OpAccessChain %192 %130 %181 %182
%241 = OpLoad %15 %240
%243 = OpBitcast %242 %241
%244 = OpCompositeExtract %186 %243 0
%245 = OpCompositeExtract %186 %243 1
%246 = OpFConvert %237 %244
%247 = OpFConvert %237 %245
%248 = OpFAdd %237 %247 %238
%249 = OpIMul %5 %182 %91
%250 = OpAccessChain %202 %135 %181 %182
%251 = OpLoad %21 %250
%252 = OpBitcast %205 %251
%253 = OpCompositeExtract %186 %252 0
%254 = OpCompositeExtract %186 %252 1
%255 = OpCompositeExtract %186 %252 2
%256 = OpFConvert %237 %253
%257 = OpFConvert %237 %254
%258 = OpFConvert %237 %255
%259 = OpFAdd %237 %248 %258
%260 = OpShiftLeftLogical %5 %182 %159
%261 = OpAccessChain %219 %140 %181 %182
%262 = OpLoad %27 %261
%264 = OpBitcast %263 %262
%265 = OpCompositeExtract %186 %264 0
%266 = OpCompositeExtract %186 %264 1
%267 = OpCompositeExtract %186 %264 2
%268 = OpCompositeExtract %186 %264 3
%269 = OpFConvert %237 %265
%270 = OpFConvert %237 %266
%271 = OpFConvert %237 %267
%272 = OpFConvert %237 %268
%273 = OpFAdd %237 %259 %271
%274 = OpAccessChain %183 %125 %181 %182
%275 = OpLoad %9 %274
%276 = OpConvertUToF %237 %275
%277 = OpFAdd %237 %273 %276
%278 = OpAccessChain %192 %130 %181 %182
%279 = OpLoad %15 %278
%280 = OpCompositeExtract %9 %279 0
%281 = OpCompositeExtract %9 %279 1
%282 = OpConvertUToF %237 %280
%283 = OpConvertUToF %237 %281
%284 = OpFAdd %237 %277 %283
%285 = OpAccessChain %202 %135 %181 %182
%286 = OpLoad %21 %285
%287 = OpCompositeExtract %9 %286 0
%288 = OpCompositeExtract %9 %286 1
%289 = OpCompositeExtract %9 %286 2
%290 = OpConvertUToF %237 %287
%291 = OpConvertUToF %237 %288
%292 = OpConvertUToF %237 %289
%293 = OpFAdd %237 %284 %292
%294 = OpAccessChain %219 %140 %181 %182
%295 = OpLoad %27 %294
%296 = OpCompositeExtract %9 %295 0
%297 = OpCompositeExtract %9 %295 1
%298 = OpCompositeExtract %9 %295 2
%299 = OpCompositeExtract %9 %295 3
%300 = OpConvertUToF %237 %296
%301 = OpConvertUToF %237 %297
%302 = OpConvertUToF %237 %298
%303 = OpConvertUToF %237 %299
%304 = OpFAdd %237 %293 %302
%305 = OpBitcast %9 %189
%306 = OpAccessChain %183 %107 %181 %182
OpStore %306 %305
%308 = OpBitcast %9 %307
%310 = OpBitcast %9 %309
%311 = OpCompositeConstruct %15 %308 %310
%312 = OpAccessChain %192 %112 %181 %182
OpStore %312 %311
%313 = OpBitcast %9 %309
%315 = OpBitcast %9 %314
%317 = OpBitcast %9 %316
%318 = OpCompositeConstruct %21 %313 %315 %317
%319 = OpAccessChain %202 %116 %181 %182
OpStore %319 %318
%321 = OpBitcast %9 %320
%323 = OpBitcast %9 %322
%325 = OpBitcast %9 %324
%327 = OpBitcast %9 %326
%328 = OpCompositeConstruct %27 %321 %323 %325 %327
%329 = OpAccessChain %219 %120 %181 %182
OpStore %329 %328
%331 = OpAccessChain %183 %85 %181 %182
OpStore %331 %330
%332 = OpCompositeConstruct %15 %198 %227
%333 = OpAccessChain %192 %93 %181 %182
OpStore %333 %332
%336 = OpCompositeConstruct %21 %227 %334 %335
%337 = OpAccessChain %202 %98 %181 %182
OpStore %337 %336
%342 = OpCompositeConstruct %27 %338 %339 %340 %341
%343 = OpAccessChain %219 %103 %181 %182
OpStore %343 %342
%345 = OpAccessChain %344 %145 %181 %182
%346 = OpLoad %5 %345
%347 = OpBitcast %237 %346
%348 = OpFAdd %237 %304 %347
%349 = OpIAdd %5 %182 %181
%351 = OpAccessChain %86 %8 %88
%352 = OpLoad %5 %351
%353 = OpIAdd %5 %352 %349
%350 = OpAccessChain %124 %37 %353
%355 = OpAccessChain %86 %8 %88
%356 = OpLoad %5 %355
%357 = OpIAdd %5 %356 %349
%354 = OpAccessChain %129 %42 %357
%358 = OpIMul %5 %182 %165
%359 = OpAccessChain %183 %350 %181 %358
%360 = OpLoad %9 %359
%361 = OpBitcast %186 %360
%362 = OpFConvert %237 %361
%363 = OpFAdd %237 %246 %238
%364 = OpFAdd %237 %363 %276
%365 = OpFAdd %237 %364 %282
%366 = OpFAdd %237 %365 %256
%367 = OpFAdd %237 %366 %269
%368 = OpFAdd %237 %367 %290
%369 = OpFAdd %237 %368 %300
%370 = OpFAdd %237 %369 %347
%371 = OpFAdd %237 %370 %362
%372 = OpFAdd %237 %365 %257
%373 = OpFAdd %237 %372 %270
%374 = OpFAdd %237 %373 %291
%375 = OpFAdd %237 %374 %301
%376 = OpFAdd %237 %375 %347
%377 = OpFAdd %237 %376 %362
%378 = OpFAdd %237 %348 %362
%379 = OpFConvert %186 %371
%380 = OpFConvert %186 %377
%381 = OpIMul %5 %182 %165
%382 = OpBitcast %9 %379
%383 = OpBitcast %9 %380
%384 = OpCompositeConstruct %15 %382 %383
%385 = OpAccessChain %192 %354 %181 %381
OpStore %385 %384
%386 = OpArrayLength %5 %173 0
%387 = OpUDiv %5 %386 %171
%388 = OpConvertUToF %237 %387
%389 = OpFAdd %237 %388 %371
%390 = OpFAdd %237 %377 %391
%392 = OpArrayLength %5 %155 0
%393 = OpUDiv %5 %392 %171
%394 = OpConvertUToF %237 %393
%395 = OpFAdd %237 %378 %394
%396 = OpFAdd %237 %238 %397
%398 = OpFAdd %237 %396 %247
%399 = OpFAdd %237 %398 %256
%400 = OpFAdd %237 %399 %272
%401 = OpFAdd %237 %400 %276
%402 = OpFAdd %237 %401 %283
%403 = OpFAdd %237 %402 %290
%404 = OpFAdd %237 %403 %303
%405 = OpFAdd %237 %404 %347
%406 = OpFAdd %237 %405 %362
%407 = OpArrayLength %5 %120 0
%408 = OpIMul %5 %407 %409
%410 = OpConvertUToF %237 %408
%411 = OpFAdd %237 %389 %410
%412 = OpConvertFToU %5 %411
%413 = OpConvertFToU %5 %390
%414 = OpConvertFToU %5 %395
%415 = OpConvertFToU %5 %406
%416 = OpShiftLeftLogical %5 %182 %88
%417 = OpCompositeConstruct %58 %412 %413 %414 %415
%419 = OpAccessChain %418 %150 %181 %182
OpStore %419 %417
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) buffer SSBO
{
    uint16_t _m0[];
} _19[];

layout(set = 4, binding = 0, std430) buffer _22_25
{
    u16vec2 _m0[];
} _25[];

layout(set = 4, binding = 0, std430) buffer _28_31
{
    u16vec4 _m0[];
} _31[];

layout(set = 4, binding = 0, std430) buffer _33_36
{
    uint16_t _m0[];
} _36[];

layout(set = 4, binding = 0, std430) buffer _38_41
{
    u16vec2 _m0[];
} _41[];

layout(set = 4, binding = 0, std430) buffer _43_46
{
    u16vec4 _m0[];
} _46[];

layout(set = 4, binding = 0, std430) buffer _48_51
{
    uint _m0[];
} _51[];

layout(set = 4, binding = 0, std430) buffer _54_57
{
    uvec4 _m0[];
} _57[];

layout(set = 4, binding = 0, std430) writeonly buffer _59_62
{
    uint16_t _m0[];
} _62[];

layout(set = 4, binding = 0, std430) writeonly buffer _64_67
{
    u16vec2 _m0[];
} _67[];

layout(set = 4, binding = 0, std430) writeonly buffer _69_72
{
    u16vec4 _m0[];
} _72[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _79 = registers._m4 + 6u;
    uint _90 = registers._m4 + 6u;
    uint _91 = subgroupBroadcastFirst(_90);
    uint _100 = registers._m4 + 5u;
    uint _109 = registers._m4 + 5u;
    uint _110 = subgroupBroadcastFirst(_109);
    uint _117 = registers._m4 + 4u;
    uint _122 = registers._m4 + 4u;
    uint _127 = registers._m4 + 4u;
    uint _137 = registers._m4 + 4u;
    uint _138 = subgroupBroadcastFirst(_137);
    uint _145 = registers._m4 + 3u;
    uvec2 _149 = _13._m0[subgroupBroadcastFirst(_145)] >> uvec2(3u);
    uint _155 = registers._m4 + 2u;
    uvec2 _160 = _13._m0[subgroupBroadcastFirst(_155)] >> uvec2(1u);
    uint _167 = registers._m4 + 1u;
    uvec2 _171 = _13._m0[subgroupBroadcastFirst(_167)] >> uvec2(2u);
    uvec2 _179 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(1u);
    _19[registers._m4]._m0[(gl_GlobalInvocationID.x < _179.y) ? (gl_GlobalInvocationID.x + _179.x) : 2147483644u] = float16BitsToUint16(uint16BitsToFloat16(_19[registers._m4]._m0[(gl_GlobalInvocationID.x < _179.y) ? (gl_GlobalInvocationID.x + _179.x) : 2147483644u]) + float16_t(1.0));
    uint _211 = (gl_GlobalInvocationID.x < _171.y) ? (gl_GlobalInvocationID.x + _171.x) : 1073741823u;
    _25[_167]._m0[(gl_GlobalInvocationID.x < _171.y) ? (gl_GlobalInvocationID.x + _171.x) : 1073741823u] = u16vec2(_25[_167]._m0[_211].x + 2us, _25[_167]._m0[_211].y + 2us);
    uint _228 = gl_GlobalInvocationID.x * 3u;
    uint _233 = (_228 < _160.y) ? (_228 + _160.x) : 2147483644u;
    uint16_t _235 = _19[_155]._m0[_233];
    uint16_t _238 = _19[_155]._m0[_233 + 1u];
    uint16_t _241 = _19[_155]._m0[_233 + 2u];
    f16vec3 _245 = uint16BitsToFloat16(u16vec3(_235, _238, _241));
    uint _253 = gl_GlobalInvocationID.x * 3u;
    uint _258 = (_253 < _160.y) ? (_253 + _160.x) : 2147483644u;
    _19[_155]._m0[_258] = float16BitsToUint16(_245.x + float16_t(3.0));
    _19[_155]._m0[_258 + 1u] = float16BitsToUint16(_245.y + float16_t(3.0));
    _19[_155]._m0[_258 + 2u] = float16BitsToUint16(_245.z + float16_t(3.0));
    uint _271 = (gl_GlobalInvocationID.x < _149.y) ? (gl_GlobalInvocationID.x + _149.x) : 536870911u;
    _31[_145]._m0[(gl_GlobalInvocationID.x < _149.y) ? (gl_GlobalInvocationID.x + _149.x) : 536870911u] = u16vec4(_31[_145]._m0[_271].x + 4us, _31[_145]._m0[_271].y + 4us, _31[_145]._m0[_271].z + 4us, _31[_145]._m0[_271].w + 4us);
    uvec2 _293 = _13._m0[_138] >> uvec2(1u);
    uint16_t _300 = _36[_117]._m0[(gl_GlobalInvocationID.x < _293.y) ? (gl_GlobalInvocationID.x + _293.x) : 2147483644u];
    float _303 = float(uint16BitsToFloat16(_300));
    uvec2 _305 = _13._m0[_138] >> uvec2(2u);
    u16vec2 _312 = _41[_122]._m0[(gl_GlobalInvocationID.x < _305.y) ? (gl_GlobalInvocationID.x + _305.x) : 1073741823u];
    f16vec2 _314 = uint16BitsToFloat16(_312);
    float _318 = float(_314.y);
    uint _321 = gl_GlobalInvocationID.x * 3u;
    uvec2 _322 = _13._m0[_138] >> uvec2(1u);
    uint _327 = (_321 < _322.y) ? (_321 + _322.x) : 2147483644u;
    uint16_t _329 = _36[_117]._m0[_327];
    uint16_t _332 = _36[_117]._m0[_327 + 1u];
    uint16_t _335 = _36[_117]._m0[_327 + 2u];
    f16vec3 _337 = uint16BitsToFloat16(u16vec3(_329, _332, _335));
    float _341 = float(_337.x);
    uvec2 _346 = _13._m0[_138] >> uvec2(3u);
    u16vec4 _353 = _46[_127]._m0[(gl_GlobalInvocationID.x < _346.y) ? (gl_GlobalInvocationID.x + _346.x) : 536870911u];
    f16vec4 _355 = uint16BitsToFloat16(_353);
    uvec2 _365 = _13._m0[_138] >> uvec2(1u);
    uint16_t _372 = _36[_117]._m0[(gl_GlobalInvocationID.x < _365.y) ? (gl_GlobalInvocationID.x + _365.x) : 2147483644u];
    float _373 = float(_372);
    uvec2 _375 = _13._m0[_138] >> uvec2(2u);
    uint _380 = (gl_GlobalInvocationID.x < _375.y) ? (gl_GlobalInvocationID.x + _375.x) : 1073741823u;
    u16vec2 _382 = _41[_122]._m0[_380];
    float _386 = float(_382.y);
    uint _388 = gl_GlobalInvocationID.x * 3u;
    uvec2 _389 = _13._m0[_138] >> uvec2(1u);
    uint _394 = (_388 < _389.y) ? (_388 + _389.x) : 2147483644u;
    uint16_t _396 = _36[_117]._m0[_394];
    uint16_t _399 = _36[_117]._m0[_394 + 1u];
    uint16_t _402 = _36[_117]._m0[_394 + 2u];
    u16vec3 _403 = u16vec3(_396, _399, _402);
    float _407 = float(_403.x);
    uvec2 _411 = _13._m0[_138] >> uvec2(3u);
    uint _416 = (gl_GlobalInvocationID.x < _411.y) ? (gl_GlobalInvocationID.x + _411.x) : 536870911u;
    u16vec4 _418 = _46[_127]._m0[_416];
    uvec2 _428 = _13._m0[_110] >> uvec2(1u);
    _62[_100]._m0[(gl_GlobalInvocationID.x < _428.y) ? (gl_GlobalInvocationID.x + _428.x) : 2147483644u] = float16BitsToUint16(float16_t(1.0));
    uvec2 _436 = _13._m0[_110] >> uvec2(2u);
    _67[registers._m4 + 5u]._m0[(gl_GlobalInvocationID.x < _436.y) ? (gl_GlobalInvocationID.x + _436.x) : 1073741823u] = u16vec2(float16BitsToUint16(float16_t(2.0)), float16BitsToUint16(float16_t(4.0)));
    uint _448 = gl_GlobalInvocationID.x * 3u;
    uvec2 _449 = _13._m0[_110] >> uvec2(1u);
    uint _454 = (_448 < _449.y) ? (_448 + _449.x) : 2147483644u;
    _62[_100]._m0[_454] = float16BitsToUint16(float16_t(4.0));
    _62[_100]._m0[_454 + 1u] = float16BitsToUint16(float16_t(5.0));
    _62[_100]._m0[_454 + 2u] = float16BitsToUint16(float16_t(6.0));
    uvec2 _465 = _13._m0[_110] >> uvec2(3u);
    _72[_109]._m0[(gl_GlobalInvocationID.x < _465.y) ? (gl_GlobalInvocationID.x + _465.x) : 536870911u] = u16vec4(float16BitsToUint16(float16_t(7.0)), float16BitsToUint16(float16_t(8.0)), float16BitsToUint16(float16_t(9.0)), float16BitsToUint16(float16_t(10.0)));
    uvec2 _481 = _13._m0[_91] >> uvec2(1u);
    _62[_79]._m0[(gl_GlobalInvocationID.x < _481.y) ? (gl_GlobalInvocationID.x + _481.x) : 2147483644u] = 1us;
    uvec2 _489 = _13._m0[_91] >> uvec2(2u);
    _67[registers._m4 + 6u]._m0[(gl_GlobalInvocationID.x < _489.y) ? (gl_GlobalInvocationID.x + _489.x) : 1073741823u] = u16vec2(2us, 4us);
    uint _497 = gl_GlobalInvocationID.x * 3u;
    uvec2 _498 = _13._m0[_91] >> uvec2(1u);
    uint _503 = (_497 < _498.y) ? (_497 + _498.x) : 2147483644u;
    _62[_79]._m0[_503] = 4us;
    _62[_79]._m0[_503 + 1u] = 5us;
    _62[_79]._m0[_503 + 2u] = 6us;
    uvec2 _511 = _13._m0[_91] >> uvec2(3u);
    _72[_90]._m0[(gl_GlobalInvocationID.x < _511.y) ? (gl_GlobalInvocationID.x + _511.x) : 536870911u] = u16vec4(7us, 8us, 9us, 10us);
    uvec2 _523 = _13._m0[_138] >> uvec2(2u);
    uint _532 = _51[registers._m4 + 4u]._m0[(gl_GlobalInvocationID.x < _523.y) ? (gl_GlobalInvocationID.x + _523.x) : 1073741820u];
    float _533 = uintBitsToFloat(_532);
    uint _535 = gl_GlobalInvocationID.x + 0u;
    uint _543 = registers._m4 + _535;
    uint _546 = gl_GlobalInvocationID.x * 2u;
    uvec2 _547 = _13._m0[_543] >> uvec2(1u);
    uint16_t _554 = _36[nonuniformEXT(registers._m4 + _535)]._m0[(_546 < _547.y) ? (_546 + _547.x) : 2147483644u];
    float _556 = float(uint16BitsToFloat16(_554));
    float _559 = ((float(_314.x) + _303) + _373) + float(_382.x);
    float _565 = (((((_559 + _341) + float(_355.x)) + _407) + float(_418.x)) + _533) + _556;
    float _571 = (((((_559 + float(_337.y)) + float(_355.y)) + float(_403.y)) + float(_418.y)) + _533) + _556;
    uint _575 = gl_GlobalInvocationID.x * 2u;
    uvec2 _576 = _13._m0[_543] >> uvec2(2u);
    _41[nonuniformEXT(_543)]._m0[(_575 < _576.y) ? (_575 + _576.x) : 1073741823u] = u16vec2(float16BitsToUint16(float16_t(_565)), float16BitsToUint16(float16_t(_571)));
    uvec2 _617 = _13._m0[_138] >> uvec2(4u);
    _57[_137]._m0[(gl_GlobalInvocationID.x < _617.y) ? (gl_GlobalInvocationID.x + _617.x) : 268435455u] = uvec4(uint((float(_179.y / 1u) + _565) + float(_13._m0[_110].y * 8u)), uint(_571 + 2.0), uint((((((((((_318 + _303) + float(_337.z)) + float(_355.z)) + _373) + _386) + float(_403.z)) + float(_418.z)) + _533) + _556) + float(_149.y / 1u)), uint((((((((((_303 + 8.0) + _318) + _341) + float(_355.w)) + _373) + _386) + _407) + float(_418.w)) + _533) + _556));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 630
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
OpCapability GroupNonUniformBallot
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %182
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %16 "SSBO"
OpName %22 "SSBO"
OpName %28 "SSBO"
OpName %33 "SSBO"
OpName %38 "SSBO"
OpName %43 "SSBO"
OpName %48 "SSBO"
OpName %54 "SSBO"
OpName %59 "SSBO"
OpName %64 "SSBO"
OpName %69 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 2
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 4
OpDecorate %19 Binding 0
OpDecorate %21 ArrayStride 4
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %25 DescriptorSet 4
OpDecorate %25 Binding 0
OpDecorate %27 ArrayStride 8
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %31 DescriptorSet 4
OpDecorate %31 Binding 0
OpDecorate %32 ArrayStride 2
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %36 DescriptorSet 4
OpDecorate %36 Binding 0
OpDecorate %36 Aliased
OpDecorate %37 ArrayStride 4
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %41 DescriptorSet 4
OpDecorate %41 Binding 0
OpDecorate %41 Aliased
OpDecorate %42 ArrayStride 8
OpMemberDecorate %43 0 Offset 0
OpDecorate %43 Block
OpDecorate %46 DescriptorSet 4
OpDecorate %46 Binding 0
OpDecorate %46 Aliased
OpDecorate %47 ArrayStride 4
OpMemberDecorate %48 0 Offset 0
OpDecorate %48 Block
OpDecorate %51 DescriptorSet 4
OpDecorate %51 Binding 0
OpDecorate %51 Aliased
OpDecorate %53 ArrayStride 16
OpMemberDecorate %54 0 Offset 0
OpDecorate %54 Block
OpDecorate %57 DescriptorSet 4
OpDecorate %57 Binding 0
OpDecorate %57 Aliased
OpDecorate %58 ArrayStride 2
OpMemberDecorate %59 0 Offset 0
OpDecorate %59 Block
OpDecorate %62 DescriptorSet 4
OpDecorate %62 Binding 0
OpDecorate %62 NonReadable
OpDecorate %62 Aliased
OpDecorate %63 ArrayStride 4
OpMemberDecorate %64 0 Offset 0
OpDecorate %64 Block
OpDecorate %67 DescriptorSet 4
OpDecorate %67 Binding 0
OpDecorate %67 NonReadable
OpDecorate %67 Aliased
OpDecorate %68 ArrayStride 8
OpMemberDecorate %69 0 Offset 0
OpDecorate %69 Block
OpDecorate %72 DescriptorSet 4
OpDecorate %72 Binding 0
OpDecorate %72 NonReadable
OpDecorate %72 Aliased
OpDecorate %182 BuiltIn GlobalInvocationId
OpDecorate %539 NonUniform
OpDecorate %536 NonUniform
OpDecorate %543 NonUniform
OpDecorate %540 NonUniform
OpDecorate %553 NonUniform
OpDecorate %585 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeVector %14 2
%21 = OpTypeRuntimeArray %20
%22 = OpTypeStruct %21
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeVector %14 4
%27 = OpTypeRuntimeArray %26
%28 = OpTypeStruct %27
%29 = OpTypeRuntimeArray %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeRuntimeArray %14
%33 = OpTypeStruct %32
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer StorageBuffer %34
%36 = OpVariable %35 StorageBuffer
%37 = OpTypeRuntimeArray %20
%38 = OpTypeStruct %37
%39 = OpTypeRuntimeArray %38
%40 = OpTypePointer StorageBuffer %39
%41 = OpVariable %40 StorageBuffer
%42 = OpTypeRuntimeArray %26
%43 = OpTypeStruct %42
%44 = OpTypeRuntimeArray %43
%45 = OpTypePointer StorageBuffer %44
%46 = OpVariable %45 StorageBuffer
%47 = OpTypeRuntimeArray %5
%48 = OpTypeStruct %47
%49 = OpTypeRuntimeArray %48
%50 = OpTypePointer StorageBuffer %49
%51 = OpVariable %50 StorageBuffer
%52 = OpTypeVector %5 4
%53 = OpTypeRuntimeArray %52
%54 = OpTypeStruct %53
%55 = OpTypeRuntimeArray %54
%56 = OpTypePointer StorageBuffer %55
%57 = OpVariable %56 StorageBuffer
%58 = OpTypeRuntimeArray %14
%59 = OpTypeStruct %58
%60 = OpTypeRuntimeArray %59
%61 = OpTypePointer StorageBuffer %60
%62 = OpVariable %61 StorageBuffer
%63 = OpTypeRuntimeArray %20
%64 = OpTypeStruct %63
%65 = OpTypeRuntimeArray %64
%66 = OpTypePointer StorageBuffer %65
%67 = OpVariable %66 StorageBuffer
%68 = OpTypeRuntimeArray %26
%69 = OpTypeStruct %68
%70 = OpTypeRuntimeArray %69
%71 = OpTypePointer StorageBuffer %70
%72 = OpVariable %71 StorageBuffer
%73 = OpTypePointer StorageBuffer %59
%75 = OpTypePointer PushConstant %5
%77 = OpConstant %5 4
%80 = OpConstant %5 6
%81 = OpTypePointer StorageBuffer %64
%86 = OpTypePointer StorageBuffer %69
%92 = OpConstant %5 3
%93 = OpTypePointer StorageBuffer %9
%95 = OpConstant %5 0
%101 = OpConstant %5 5
%113 = OpTypePointer StorageBuffer %33
%118 = OpTypePointer StorageBuffer %38
%123 = OpTypePointer StorageBuffer %43
%128 = OpTypePointer StorageBuffer %48
%133 = OpTypePointer StorageBuffer %54
%141 = OpTypePointer StorageBuffer %28
%150 = OpConstantComposite %9 %92 %92
%151 = OpTypePointer StorageBuffer %16
%156 = OpConstant %5 2
%161 = OpConstant %5 1
%162 = OpConstantComposite %9 %161 %161
%163 = OpTypePointer StorageBuffer %22
%172 = OpConstantComposite %9 %156 %156
%180 = OpTypeVector %5 3
%181 = OpTypePointer Input %180
%182 = OpVariable %181 Input
%183 = OpTypePointer Input %5
%189 = OpTypeBool
%192 = OpConstant %5 2147483644
%193 = OpTypePointer StorageBuffer %14
%196 = OpTypeFloat 16
%199 = OpConstant %196 0x1p+0
%212 = OpConstant %5 1073741823
%213 = OpTypePointer StorageBuffer %20
%219 = OpConstant %14 2
%242 = OpTypeVector %14 3
%244 = OpTypeVector %196 3
%250 = OpConstant %196 0x1.8p+1
%272 = OpConstant %5 536870911
%273 = OpTypePointer StorageBuffer %26
%281 = OpConstant %14 4
%302 = OpTypeFloat 32
%313 = OpTypeVector %196 2
%354 = OpTypeVector %196 4
%442 = OpConstant %196 0x1p+1
%444 = OpConstant %196 0x1p+2
%456 = OpConstant %196 0x1.4p+2
%458 = OpConstant %196 0x1.8p+2
%471 = OpConstant %196 0x1.cp+2
%473 = OpConstant %196 0x1p+3
%475 = OpConstant %196 0x1.2p+3
%477 = OpConstant %196 0x1.4p+3
%487 = OpConstant %14 1
%504 = OpConstant %14 5
%505 = OpConstant %14 6
%517 = OpConstant %14 7
%518 = OpConstant %14 8
%519 = OpConstant %14 9
%520 = OpConstant %14 10
%529 = OpConstant %5 1073741820
%530 = OpTypePointer StorageBuffer %5
%591 = OpConstant %302 2
%597 = OpConstant %302 8
%609 = OpConstant %5 8
%618 = OpConstantComposite %9 %77 %77
%624 = OpConstant %5 268435455
%626 = OpTypePointer StorageBuffer %52
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %628
%628 = OpLabel
%76 = OpAccessChain %75 %8 %77
%78 = OpLoad %5 %76
%79 = OpIAdd %5 %78 %80
%74 = OpAccessChain %73 %62 %79
%83 = OpAccessChain %75 %8 %77
%84 = OpLoad %5 %83
%85 = OpIAdd %5 %84 %80
%82 = OpAccessChain %81 %67 %85
%88 = OpAccessChain %75 %8 %77
%89 = OpLoad %5 %88
%90 = OpIAdd %5 %89 %80
%87 = OpAccessChain %86 %72 %90
%91 = OpGroupNonUniformBroadcastFirst %5 %92 %90
%94 = OpAccessChain %93 %13 %95 %91
%96 = OpLoad %9 %94
%98 = OpAccessChain %75 %8 %77
%99 = OpLoad %5 %98
%100 = OpIAdd %5 %99 %101
%97 = OpAccessChain %73 %62 %100
%103 = OpAccessChain %75 %8 %77
%104 = OpLoad %5 %103
%105 = OpIAdd %5 %104 %101
%102 = OpAccessChain %81 %67 %105
%107 = OpAccessChain %75 %8 %77
%108 = OpLoad %5 %107
%109 = OpIAdd %5 %108 %101
%106 = OpAccessChain %86 %72 %109
%110 = OpGroupNonUniformBroadcastFirst %5 %92 %109
%111 = OpAccessChain %93 %13 %95 %110
%112 = OpLoad %9 %111
%115 = OpAccessChain %75 %8 %77
%116 = OpLoad %5 %115
%117 = OpIAdd %5 %116 %77
%114 = OpAccessChain %113 %36 %117
%120 = OpAccessChain %75 %8 %77
%121 = OpLoad %5 %120
%122 = OpIAdd %5 %121 %77
%119 = OpAccessChain %118 %41 %122
%125 = OpAccessChain %75 %8 %77
%126 = OpLoad %5 %125
%127 = OpIAdd %5 %126 %77
%124 = OpAccessChain %123 %46 %127
%130 = OpAccessChain %75 %8 %77
%131 = OpLoad %5 %130
%132 = OpIAdd %5 %131 %77
%129 = OpAccessChain %128 %51 %132
%135 = OpAccessChain %75 %8 %77
%136 = OpLoad %5 %135
%137 = OpIAdd %5 %136 %77
%134 = OpAccessChain %133 %57 %137
%138 = OpGroupNonUniformBroadcastFirst %5 %92 %137
%139 = OpAccessChain %93 %13 %95 %138
%140 = OpLoad %9 %139
%143 = OpAccessChain %75 %8 %77
%144 = OpLoad %5 %143
%145 = OpIAdd %5 %144 %92
%142 = OpAccessChain %141 %31 %145
%146 = OpGroupNonUniformBroadcastFirst %5 %92 %145
%147 = OpAccessChain %93 %13 %95 %146
%148 = OpLoad %9 %147
%149 = OpShiftRightLogical %9 %148 %150
%153 = OpAccessChain %75 %8 %77
%154 = OpLoad %5 %153
%155 = OpIAdd %5 %154 %156
%152 = OpAccessChain %151 %19 %155
%157 = OpGroupNonUniformBroadcastFirst %5 %92 %155
%158 = OpAccessChain %93 %13 %95 %157
%159 = OpLoad %9 %158
%160 = OpShiftRightLogical %9 %159 %162
%165 = OpAccessChain %75 %8 %77
%166 = OpLoad %5 %165
%167 = OpIAdd %5 %166 %161
%164 = OpAccessChain %163 %25 %167
%168 = OpGroupNonUniformBroadcastFirst %5 %92 %167
%169 = OpAccessChain %93 %13 %95 %168
%170 = OpLoad %9 %169
%171 = OpShiftRightLogical %9 %170 %172
%174 = OpAccessChain %75 %8 %77
%175 = OpLoad %5 %174
%173 = OpAccessChain %151 %19 %175
%176 = OpGroupNonUniformBroadcastFirst %5 %92 %175
%177 = OpAccessChain %93 %13 %95 %176
%178 = OpLoad %9 %177
%179 = OpShiftRightLogical %9 %178 %162
%184 = OpAccessChain %183 %182 %95
%185 = OpLoad %5 %184
%186 = OpCompositeExtract %5 %179 0
%187 = OpCompositeExtract %5 %179 1
%188 = OpIAdd %5 %185 %186
%190 = OpULessThan %189 %185 %187
%191 = OpSelect %5 %190 %188 %192
%194 = OpAccessChain %193 %173 %95 %191
%195 = OpLoad %14 %194
%197 = OpBitcast %196 %195
%198 = OpFAdd %196 %197 %199
%200 = OpCompositeExtract %5 %179 0
%201 = OpCompositeExtract %5 %179 1
%202 = OpIAdd %5 %185 %200
%203 = OpULessThan %189 %185 %201
%204 = OpSelect %5 %203 %202 %192
%205 = OpBitcast %14 %198
%206 = OpAccessChain %193 %173 %95 %204
OpStore %206 %205
%207 = OpCompositeExtract %5 %171 0
%208 = OpCompositeExtract %5 %171 1
%209 = OpIAdd %5 %185 %207
%210 = OpULessThan %189 %185 %208
%211 = OpSelect %5 %210 %209 %212
%214 = OpAccessChain %213 %164 %95 %211
%215 = OpLoad %20 %214
%216 = OpCompositeExtract %14 %215 0
%217 = OpCompositeExtract %14 %215 1
%218 = OpIAdd %14 %216 %219
%220 = OpIAdd %14 %217 %219
%221 = OpCompositeExtract %5 %171 0
%222 = OpCompositeExtract %5 %171 1
%223 = OpIAdd %5 %185 %221
%224 = OpULessThan %189 %185 %222
%225 = OpSelect %5 %224 %223 %212
%226 = OpCompositeConstruct %20 %218 %220
%227 = OpAccessChain %213 %164 %95 %225
OpStore %227 %226
%228 = OpIMul %5 %185 %92
%229 = OpCompositeExtract %5 %160 0
%230 = OpCompositeExtract %5 %160 1
%231 = OpIAdd %5 %228 %229
%232 = OpULessThan %189 %228 %230
%233 = OpSelect %5 %232 %231 %192
%234 = OpAccessChain %193 %152 %95 %233
%235 = OpLoad %14 %234
%237 = OpIAdd %5 %233 %161
%236 = OpAccessChain %193 %152 %95 %237
%238 = OpLoad %14 %236
%240 = OpIAdd %5 %233 %156
%239 = OpAccessChain %193 %152 %95 %240
%241 = OpLoad %14 %239
%243 = OpCompositeConstruct %242 %235 %238 %241
%245 = OpBitcast %244 %243
%246 = OpCompositeExtract %196 %245 0
%247 = OpCompositeExtract %196 %245 1
%248 = OpCompositeExtract %196 %245 2
%249 = OpFAdd %196 %246 %250
%251 = OpFAdd %196 %247 %250
%252 = OpFAdd %196 %248 %250
%253 = OpIMul %5 %185 %92
%254 = OpCompositeExtract %5 %160 0
%255 = OpCompositeExtract %5 %160 1
%256 = OpIAdd %5 %253 %254
%257 = OpULessThan %189 %253 %255
%258 = OpSelect %5 %257 %256 %192
%259 = OpBitcast %14 %249
%260 = OpBitcast %14 %251
%261 = OpBitcast %14 %252
%262 = OpAccessChain %193 %152 %95 %258
OpStore %262 %259
%264 = OpIAdd %5 %258 %161
%263 = OpAccessChain %193 %152 %95 %264
OpStore %263 %260
%266 = OpIAdd %5 %258 %156
%265 = OpAccessChain %193 %152 %95 %266
OpStore %265 %261
%267 = OpCompositeExtract %5 %149 0
%268 = OpCompositeExtract %5 %149 1
%269 = OpIAdd %5 %185 %267
%270 = OpULessThan %189 %185 %268
%271 = OpSelect %5 %270 %269 %272
%274 = OpAccessChain %273 %142 %95 %271
%275 = OpLoad %26 %274
%276 = OpCompositeExtract %14 %275 0
%277 = OpCompositeExtract %14 %275 1
%278 = OpCompositeExtract %14 %275 2
%279 = OpCompositeExtract %14 %275 3
%280 = OpIAdd %14 %276 %281
%282 = OpIAdd %14 %277 %281
%283 = OpIAdd %14 %278 %281
%284 = OpIAdd %14 %279 %281
%285 = OpCompositeExtract %5 %149 0
%286 = OpCompositeExtract %5 %149 1
%287 = OpIAdd %5 %185 %285
%288 = OpULessThan %189 %185 %286
%289 = OpSelect %5 %288 %287 %272
%290 = OpCompositeConstruct %26 %280 %282 %283 %284
%291 = OpAccessChain %273 %142 %95 %289
OpStore %291 %290
%292 = OpShiftLeftLogical %5 %185 %161
%293 = OpShiftRightLogical %9 %140 %162
%294 = OpCompositeExtract %5 %293 0
%295 = OpCompositeExtract %5 %293 1
%296 = OpIAdd %5 %185 %294
%297 = OpULessThan %189 %185 %295
%298 = OpSelect %5 %297 %296 %192
%299 = OpAccessChain %193 %114 %95 %298
%300 = OpLoad %14 %299
%301 = OpBitcast %196 %300
%303 = OpFConvert %302 %301
%304 = OpShiftLeftLogical %5 %185 %156
%305 = OpShiftRightLogical %9 %140 %172
%306 = OpCompositeExtract %5 %305 0
%307 = OpCompositeExtract %5 %305 1
%308 = OpIAdd %5 %185 %306
%309 = OpULessThan %189 %185 %307
%310 = OpSelect %5 %309 %308 %212
%311 = OpAccessChain %213 %119 %95 %310
%312 = OpLoad %20 %311
%314 = OpBitcast %313 %312
%315 = OpCompositeExtract %196 %314 0
%316 = OpCompositeExtract %196 %314 1
%317 = OpFConvert %302 %315
%318 = OpFConvert %302 %316
%319 = OpFAdd %302 %318 %303
%320 = OpIMul %5 %185 %80
%321 = OpIMul %5 %185 %92
%322 = OpShiftRightLogical %9 %140 %162
%323 = OpCompositeExtract %5 %322 0
%324 = OpCompositeExtract %5 %322 1
%325 = OpIAdd %5 %321 %323
%326 = OpULessThan %189 %321 %324
%327 = OpSelect %5 %326 %325 %192
%328 = OpAccessChain %193 %114 %95 %327
%329 = OpLoad %14 %328
%331 = OpIAdd %5 %327 %161
%330 = OpAccessChain %193 %114 %95 %331
%332 = OpLoad %14 %330
%334 = OpIAdd %5 %327 %156
%333 = OpAccessChain %193 %114 %95 %334
%335 = OpLoad %14 %333
%336 = OpCompositeConstruct %242 %329 %332 %335
%337 = OpBitcast %244 %336
%338 = OpCompositeExtract %196 %337 0
%339 = OpCompositeExtract %196 %337 1
%340 = OpCompositeExtract %196 %337 2
%341 = OpFConvert %302 %338
%342 = OpFConvert %302 %339
%343 = OpFConvert %302 %340
%344 = OpFAdd %302 %319 %343
%345 = OpShiftLeftLogical %5 %185 %92
%346 = OpShiftRightLogical %9 %140 %150
%347 = OpCompositeExtract %5 %346 0
%348 = OpCompositeExtract %5 %346 1
%349 = OpIAdd %5 %185 %347
%350 = OpULessThan %189 %185 %348
%351 = OpSelect %5 %350 %349 %272
%352 = OpAccessChain %273 %124 %95 %351
%353 = OpLoad %26 %352
%355 = OpBitcast %354 %353
%356 = OpCompositeExtract %196 %355 0
%357 = OpCompositeExtract %196 %355 1
%358 = OpCompositeExtract %196 %355 2
%359 = OpCompositeExtract %196 %355 3
%360 = OpFConvert %302 %356
%361 = OpFConvert %302 %357
%362 = OpFConvert %302 %358
%363 = OpFConvert %302 %359
%364 = OpFAdd %302 %344 %362
%365 = OpShiftRightLogical %9 %140 %162
%366 = OpCompositeExtract %5 %365 0
%367 = OpCompositeExtract %5 %365 1
%368 = OpIAdd %5 %185 %366
%369 = OpULessThan %189 %185 %367
%370 = OpSelect %5 %369 %368 %192
%371 = OpAccessChain %193 %114 %95 %370
%372 = OpLoad %14 %371
%373 = OpConvertUToF %302 %372
%374 = OpFAdd %302 %364 %373
%375 = OpShiftRightLogical %9 %140 %172
%376 = OpCompositeExtract %5 %375 0
%377 = OpCompositeExtract %5 %375 1
%378 = OpIAdd %5 %185 %376
%379 = OpULessThan %189 %185 %377
%380 = OpSelect %5 %379 %378 %212
%381 = OpAccessChain %213 %119 %95 %380
%382 = OpLoad %20 %381
%383 = OpCompositeExtract %14 %382 0
%384 = OpCompositeExtract %14 %382 1
%385 = OpConvertUToF %302 %383
%386 = OpConvertUToF %302 %384
%387 = OpFAdd %302 %374 %386
%388 = OpIMul %5 %185 %92
%389 = OpShiftRightLogical %9 %140 %162
%390 = OpCompositeExtract %5 %389 0
%391 = OpCompositeExtract %5 %389 1
%392 = OpIAdd %5 %388 %390
%393 = OpULessThan %189 %388 %391
%394 = OpSelect %5 %393 %392 %192
%395 = OpAccessChain %193 %114 %95 %394
%396 = OpLoad %14 %395
%398 = OpIAdd %5 %394 %161
%397 = OpAccessChain %193 %114 %95 %398
%399 = OpLoad %14 %397
%401 = OpIAdd %5 %394 %156
%400 = OpAccessChain %193 %114 %95 %401
%402 = OpLoad %14 %400
%403 = OpCompositeConstruct %242 %396 %399 %402
%404 = OpCompositeExtract %14 %403 0
%405 = OpCompositeExtract %14 %403 1
%406 = OpCompositeExtract %14 %403 2
%407 = OpConvertUToF %302 %404
%408 = OpConvertUToF %302 %405
%409 = OpConvertUToF %302 %406
%410 = OpFAdd %302 %387 %409
%411 = OpShiftRightLogical %9 %140 %150
%412 = OpCompositeExtract %5 %411 0
%413 = OpCompositeExtract %5 %411 1
%414 = OpIAdd %5 %185 %412
%415 = OpULessThan %189 %185 %413
%416 = OpSelect %5 %415 %414 %272
%417 = OpAccessChain %273 %124 %95 %416
%418 = OpLoad %26 %417
%419 = OpCompositeExtract %14 %418 0
%420 = OpCompositeExtract %14 %418 1
%421 = OpCompositeExtract %14 %418 2
%422 = OpCompositeExtract %14 %418 3
%423 = OpConvertUToF %302 %419
%424 = OpConvertUToF %302 %420
%425 = OpConvertUToF %302 %421
%426 = OpConvertUToF %302 %422
%427 = OpFAdd %302 %410 %425
%428 = OpShiftRightLogical %9 %112 %162
%429 = OpCompositeExtract %5 %428 0
%430 = OpCompositeExtract %5 %428 1
%431 = OpIAdd %5 %185 %429
%432 = OpULessThan %189 %185 %430
%433 = OpSelect %5 %432 %431 %192
%434 = OpBitcast %14 %199
%435 = OpAccessChain %193 %97 %95 %433
OpStore %435 %434
%436 = OpShiftRightLogical %9 %112 %172
%437 = OpCompositeExtract %5 %436 0
%438 = OpCompositeExtract %5 %436 1
%439 = OpIAdd %5 %185 %437
%440 = OpULessThan %189 %185 %438
%441 = OpSelect %5 %440 %439 %212
%443 = OpBitcast %14 %442
%445 = OpBitcast %14 %444
%446 = OpCompositeConstruct %20 %443 %445
%447 = OpAccessChain %213 %102 %95 %441
OpStore %447 %446
%448 = OpIMul %5 %185 %92
%449 = OpShiftRightLogical %9 %112 %162
%450 = OpCompositeExtract %5 %449 0
%451 = OpCompositeExtract %5 %449 1
%452 = OpIAdd %5 %448 %450
%453 = OpULessThan %189 %448 %451
%454 = OpSelect %5 %453 %452 %192
%455 = OpBitcast %14 %444
%457 = OpBitcast %14 %456
%459 = OpBitcast %14 %458
%460 = OpAccessChain %193 %97 %95 %454
OpStore %460 %455
%462 = OpIAdd %5 %454 %161
%461 = OpAccessChain %193 %97 %95 %462
OpStore %461 %457
%464 = OpIAdd %5 %454 %156
%463 = OpAccessChain %193 %97 %95 %464
OpStore %463 %459
%465 = OpShiftRightLogical %9 %112 %150
%466 = OpCompositeExtract %5 %465 0
%467 = OpCompositeExtract %5 %465 1
%468 = OpIAdd %5 %185 %466
%469 = OpULessThan %189 %185 %467
%470 = OpSelect %5 %469 %468 %272
%472 = OpBitcast %14 %471
%474 = OpBitcast %14 %473
%476 = OpBitcast %14 %475
%478 = OpBitcast %14 %477
%479 = OpCompositeConstruct %26 %472 %474 %476 %478
%480 = OpAccessChain %273 %106 %95 %470
OpStore %480 %479
%481 = OpShiftRightLogical %9 %96 %162
%482 = OpCompositeExtract %5 %481 0
%483 = OpCompositeExtract %5 %481 1
%484 = OpIAdd %5 %185 %482
%485 = OpULessThan %189 %185 %483
%486 = OpSelect %5 %485 %484 %192
%488 = OpAccessChain %193 %74 %95 %486
OpStore %488 %487
%489 = OpShiftRightLogical %9 %96 %172
%490 = OpCompositeExtract %5 %489 0
%491 = OpCompositeExtract %5 %489 1
%492 = OpIAdd %5 %185 %490
%493 = OpULessThan %189 %185 %491
%494 = OpSelect %5 %493 %492 %212
%495 = OpCompositeConstruct %20 %219 %281
%496 = OpAccessChain %213 %82 %95 %494
OpStore %496 %495
%497 = OpIMul %5 %185 %92
%498 = OpShiftRightLogical %9 %96 %162
%499 = OpCompositeExtract %5 %498 0
%500 = OpCompositeExtract %5 %498 1
%501 = OpIAdd %5 %497 %499
%502 = OpULessThan %189 %497 %500
%503 = OpSelect %5 %502 %501 %192
%506 = OpAccessChain %193 %74 %95 %503
OpStore %506 %281
%508 = OpIAdd %5 %503 %161
%507 = OpAccessChain %193 %74 %95 %508
OpStore %507 %504
%510 = OpIAdd %5 %503 %156
%509 = OpAccessChain %193 %74 %95 %510
OpStore %509 %505
%511 = OpShiftRightLogical %9 %96 %150
%512 = OpCompositeExtract %5 %511 0
%513 = OpCompositeExtract %5 %511 1
%514 = OpIAdd %5 %185 %512
%515 = OpULessThan %189 %185 %513
%516 = OpSelect %5 %515 %514 %272
%521 = OpCompositeConstruct %26 %517 %518 %519 %520
%522 = OpAccessChain %273 %87 %95 %516
OpStore %522 %521
%523 = OpShiftRightLogical %9 %140 %172
%524 = OpCompositeExtract %5 %523 0
%525 = OpCompositeExtract %5 %523 1
%526 = OpIAdd %5 %185 %524
%527 = OpULessThan %189 %185 %525
%528 = OpSelect %5 %527 %526 %529
%531 = OpAccessChain %530 %129 %95 %528
%532 = OpLoad %5 %531
%533 = OpBitcast %302 %532
%534 = OpFAdd %302 %427 %533
%535 = OpIAdd %5 %185 %95
%537 = OpAccessChain %75 %8 %77
%538 = OpLoad %5 %537
%539 = OpIAdd %5 %538 %535
%536 = OpAccessChain %113 %36 %539
%541 = OpAccessChain %75 %8 %77
%542 = OpLoad %5 %541
%543 = OpIAdd %5 %542 %535
%540 = OpAccessChain %118 %41 %543
%544 = OpAccessChain %93 %13 %95 %543
%545 = OpLoad %9 %544
%546 = OpIMul %5 %185 %156
%547 = OpShiftRightLogical %9 %545 %162
%548 = OpCompositeExtract %5 %547 0
%549 = OpCompositeExtract %5 %547 1
%550 = OpIAdd %5 %546 %548
%551 = OpULessThan %189 %546 %549
%552 = OpSelect %5 %551 %550 %192
%553 = OpAccessChain %193 %536 %95 %552
%554 = OpLoad %14 %553
%555 = OpBitcast %196 %554
%556 = OpFConvert %302 %555
%557 = OpFAdd %302 %317 %303
%558 = OpFAdd %302 %557 %373
%559 = OpFAdd %302 %558 %385
%560 = OpFAdd %302 %559 %341
%561 = OpFAdd %302 %560 %360
%562 = OpFAdd %302 %561 %407
%563 = OpFAdd %302 %562 %423
%564 = OpFAdd %302 %563 %533
%565 = OpFAdd %302 %564 %556
%566 = OpFAdd %302 %559 %342
%567 = OpFAdd %302 %566 %361
%568 = OpFAdd %302 %567 %408
%569 = OpFAdd %302 %568 %424
%570 = OpFAdd %302 %569 %533
%571 = OpFAdd %302 %570 %556
%572 = OpFAdd %302 %534 %556
%573 = OpFConvert %196 %565
%574 = OpFConvert %196 %571
%575 = OpIMul %5 %185 %156
%576 = OpShiftRightLogical %9 %545 %172
%577 = OpCompositeExtract %5 %576 0
%578 = OpCompositeExtract %5 %576 1
%579 = OpIAdd %5 %575 %577
%580 = OpULessThan %189 %575 %578
%581 = OpSelect %5 %580 %579 %212
%582 = OpBitcast %14 %573
%583 = OpBitcast %14 %574
%584 = OpCompositeConstruct %20 %582 %583
%585 = OpAccessChain %213 %540 %95 %581
OpStore %585 %584
%586 = OpCompositeExtract %5 %179 1
%587 = OpUDiv %5 %586 %161
%588 = OpConvertUToF %302 %587
%589 = OpFAdd %302 %588 %565
%590 = OpFAdd %302 %571 %591
%592 = OpCompositeExtract %5 %149 1
%593 = OpUDiv %5 %592 %161
%594 = OpConvertUToF %302 %593
%595 = OpFAdd %302 %572 %594
%596 = OpFAdd %302 %303 %597
%598 = OpFAdd %302 %596 %318
%599 = OpFAdd %302 %598 %341
%600 = OpFAdd %302 %599 %363
%601 = OpFAdd %302 %600 %373
%602 = OpFAdd %302 %601 %386
%603 = OpFAdd %302 %602 %407
%604 = OpFAdd %302 %603 %426
%605 = OpFAdd %302 %604 %533
%606 = OpFAdd %302 %605 %556
%607 = OpCompositeExtract %5 %112 1
%608 = OpIMul %5 %607 %609
%610 = OpConvertUToF %302 %608
%611 = OpFAdd %302 %589 %610
%612 = OpConvertFToU %5 %611
%613 = OpConvertFToU %5 %590
%614 = OpConvertFToU %5 %595
%615 = OpConvertFToU %5 %606
%616 = OpShiftLeftLogical %5 %185 %77
%617 = OpShiftRightLogical %9 %140 %618
%619 = OpCompositeExtract %5 %617 0
%620 = OpCompositeExtract %5 %617 1
%621 = OpIAdd %5 %185 %619
%622 = OpULessThan %189 %185 %620
%623 = OpSelect %5 %622 %621 %624
%625 = OpCompositeConstruct %52 %612 %613 %614 %615
%627 = OpAccessChain %626 %134 %95 %623
OpStore %627 %625
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-16bit.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint16_t _m0[];
} _9;

layout(set = 0, binding = 1, std430) buffer _12_14
{
    u16vec2 _m0[];
} _14;

layout(set = 0, binding = 2, scalar) buffer _17_19
{
    u16vec3 _m0[];
} _19;

layout(set = 0, binding = 3, std430) buffer _22_24
{
    u16vec4 _m0[];
} _24;

layout(set = 0, binding = 4, std430) buffer _26_28
{
    uint16_t _m0[];
} _28;

layout(set = 0, binding = 4, std430) buffer _30_32
{
    u16vec2 _m0[];
} _32;

layout(set = 0, binding = 4, scalar) buffer _34_36
{
    u16vec3 _m0[];
} _36;

layout(set = 0, binding = 4, std430) buffer _38_40
{
    u16vec4 _m0[];
} _40;

layout(set = 0, binding = 4, std430) buffer _43_45
{
    uint _m0[];
} _45;

layout(set = 0, binding = 4, std430) buffer _48_50
{
    uvec4 _m0[];
} _50;

layout(set = 0, binding = 5, std430) writeonly buffer _52_54
{
    uint16_t _m0[];
} _54;

layout(set = 0, binding = 5, std430) writeonly buffer _56_58
{
    u16vec2 _m0[];
} _58;

layout(set = 0, binding = 5, scalar) writeonly buffer _60_62
{
    u16vec3 _m0[];
} _62;

layout(set = 0, binding = 5, std430) writeonly buffer _64_66
{
    u16vec4 _m0[];
} _66;

layout(set = 0, binding = 6, std430) writeonly buffer _68_70
{
    uint16_t _m0[];
} _70;

layout(set = 0, binding = 6, std430) writeonly buffer _72_74
{
    u16vec2 _m0[];
} _74;

layout(set = 0, binding = 6, scalar) writeonly buffer _76_78
{
    u16vec3 _m0[];
} _78;

layout(set = 0, binding = 6, std430) writeonly buffer _80_82
{
    u16vec4 _m0[];
} _82;

layout(set = 1, binding = 0, std430) buffer _84_87
{
    uint16_t _m0[];
} _87[];

layout(set = 1, binding = 0, std430) buffer _89_92
{
    u16vec2 _m0[];
} _92[];

void main()
{
    _9._m0[gl_GlobalInvocationID.x] = float16BitsToUint16(uint16BitsToFloat16(_9._m0[gl_GlobalInvocationID.x]) + float16_t(1.0));
    _14._m0[gl_GlobalInvocationID.x] = u16vec2(_14._m0[gl_GlobalInvocationID.x].x + 2us, _14._m0[gl_GlobalInvocationID.x].y + 2us);
    f16vec3 _123 = uint16BitsToFloat16(_19._m0[gl_GlobalInvocationID.x]);
    _19._m0[gl_GlobalInvocationID.x] = u16vec3(float16BitsToUint16(_123.x + float16_t(3.0)), float16BitsToUint16(_123.y + float16_t(3.0)), float16BitsToUint16(_123.z + float16_t(3.0)));
    _24._m0[gl_GlobalInvocationID.x] = u16vec4(_24._m0[gl_GlobalInvocationID.x].x + 4us, _24._m0[gl_GlobalInvocationID.x].y + 4us, _24._m0[gl_GlobalInvocationID.x].z + 4us, _24._m0[gl_GlobalInvocationID.x].w + 4us);
    uint16_t _153 = _28._m0[gl_GlobalInvocationID.x];
    float _156 = float(uint16BitsToFloat16(_153));
    u16vec2 _160 = _32._m0[gl_GlobalInvocationID.x];
    f16vec2 _162 = uint16BitsToFloat16(_160);
    float _166 = float(_162.y);
    u16vec3 _171 = _36._m0[gl_GlobalInvocationID.x];
    f16vec3 _172 = uint16BitsToFloat16(_171);
    float _176 = float(_172.x);
    u16vec4 _183 = _40._m0[gl_GlobalInvocationID.x];
    f16vec4 _185 = uint16BitsToFloat16(_183);
    uint16_t _196 = _28._m0[gl_GlobalInvocationID.x];
    float _197 = float(_196);
    u16vec2 _200 = _32._m0[gl_GlobalInvocationID.x];
    float _204 = float(_200.y);
    u16vec3 _207 = _36._m0[gl_GlobalInvocationID.x];
    float _211 = float(_207.x);
    u16vec4 _216 = _40._m0[gl_GlobalInvocationID.x];
    _54._m0[gl_GlobalInvocationID.x] = float16BitsToUint16(float16_t(1.0));
    _58._m0[gl_GlobalInvocationID.x] = u16vec2(float16BitsToUint16(float16_t(2.0)), float16BitsToUint16(float16_t(4.0)));
    _62._m0[gl_GlobalInvocationID.x] = u16vec3(float16BitsToUint16(float16_t(4.0)), float16BitsToUint16(float16_t(5.0)), float16BitsToUint16(float16_t(6.0)));
    _66._m0[gl_GlobalInvocationID.x] = u16vec4(float16BitsToUint16(float16_t(7.0)), float16BitsToUint16(float16_t(8.0)), float16BitsToUint16(float16_t(9.0)), float16BitsToUint16(float16_t(10.0)));
    _70._m0[gl_GlobalInvocationID.x] = 1us;
    _74._m0[gl_GlobalInvocationID.x] = u16vec2(2us, 4us);
    _78._m0[gl_GlobalInvocationID.x] = u16vec3(4us, 5us, 6us);
    _82._m0[gl_GlobalInvocationID.x] = u16vec4(7us, 8us, 9us, 10us);
    uint _267 = _45._m0[gl_GlobalInvocationID.x];
    float _268 = uintBitsToFloat(_267);
    uint _270 = gl_GlobalInvocationID.x + 0u;
    uint16_t _277 = _87[nonuniformEXT(_270)]._m0[gl_GlobalInvocationID.x * 2u];
    float _279 = float(uint16BitsToFloat16(_277));
    float _282 = ((float(_162.x) + _156) + _197) + float(_200.x);
    float _288 = (((((_282 + _176) + float(_185.x)) + _211) + float(_216.x)) + _268) + _279;
    float _294 = (((((_282 + float(_172.y)) + float(_185.y)) + float(_207.y)) + float(_216.y)) + _268) + _279;
    _92[nonuniformEXT(_270)]._m0[gl_GlobalInvocationID.x * 2u] = u16vec2(float16BitsToUint16(float16_t(_288)), float16BitsToUint16(float16_t(_294)));
    _50._m0[gl_GlobalInvocationID.x] = uvec4(uint((float(uint(_9._m0.length()) / 1u) + _288) + float(uint(_66._m0.length()) * 8u)), uint(_294 + 2.0), uint((((((((((_166 + _156) + float(_172.z)) + float(_185.z)) + _197) + _204) + float(_207.z)) + float(_216.z)) + _268) + _279) + float(uint(_24._m0.length()) / 1u)), uint((((((((((_156 + 8.0) + _166) + _176) + float(_185.w)) + _197) + _204) + _211) + float(_216.w)) + _268) + _279));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 340
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %95
OpExecutionMode %3 LocalSize 64 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %17 "SSBO"
OpName %22 "SSBO"
OpName %26 "SSBO"
OpName %30 "SSBO"
OpName %34 "SSBO"
OpName %38 "SSBO"
OpName %43 "SSBO"
OpName %48 "SSBO"
OpName %52 "SSBO"
OpName %56 "SSBO"
OpName %60 "SSBO"
OpName %64 "SSBO"
OpName %68 "SSBO"
OpName %72 "SSBO"
OpName %76 "SSBO"
OpName %80 "SSBO"
OpName %84 "SSBO"
OpName %89 "SSBO"
OpDecorate %6 ArrayStride 2
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %16 ArrayStride 6
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 2
OpDecorate %21 ArrayStride 8
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 3
OpDecorate %25 ArrayStride 2
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 ArrayStride 4
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %33 ArrayStride 6
OpMemberDecorate %34 0 Offset 0
OpDecorate %34 Block
OpDecorate %37 ArrayStride 8
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %42 ArrayStride 4
OpMemberDecorate %43 0 Offset 0
OpDecorate %43 Block
OpDecorate %47 ArrayStride 16
OpMemberDecorate %48 0 Offset 0
OpDecorate %48 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 4
OpDecorate %28 Aliased
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 4
OpDecorate %32 Aliased
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 4
OpDecorate %36 Aliased
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 4
OpDecorate %40 Aliased
OpDecorate %45 DescriptorSet 0
OpDecorate %45 Binding 4
OpDecorate %45 Aliased
OpDecorate %50 DescriptorSet 0
OpDecorate %50 Binding 4
OpDecorate %50 Aliased
OpDecorate %51 ArrayStride 2
OpMemberDecorate %52 0 Offset 0
OpDecorate %52 Block
OpDecorate %55 ArrayStride 4
OpMemberDecorate %56 0 Offset 0
OpDecorate %56 Block
OpDecorate %59 ArrayStride 6
OpMemberDecorate %60 0 Offset 0
OpDecorate %60 Block
OpDecorate %63 ArrayStride 8
OpMemberDecorate %64 0 Offset 0
OpDecorate %64 Block
OpDecorate %54 DescriptorSet 0
OpDecorate %54 Binding 5
OpDecorate %54 NonReadable
OpDecorate %54 Aliased
OpDecorate %58 DescriptorSet 0
OpDecorate %58 Binding 5
OpDecorate %58 NonReadable
OpDecorate %58 Aliased
OpDecorate %62 DescriptorSet 0
OpDecorate %62 Binding 5
OpDecorate %62 NonReadable
OpDecorate %62 Aliased
OpDecorate %66 DescriptorSet 0
OpDecorate %66 Binding 5
OpDecorate %66 NonReadable
OpDecorate %66 Aliased
OpDecorate %67 ArrayStride 2
OpMemberDecorate %68 0 Offset 0
OpDecorate %68 Block
OpDecorate %71 ArrayStride 4
OpMemberDecorate %72 0 Offset 0
OpDecorate %72 Block
OpDecorate %75 ArrayStride 6
OpMemberDecorate %76 0 Offset 0
OpDecorate %76 Block
OpDecorate %79 ArrayStride 8
OpMemberDecorate %80 0 Offset 0
OpDecorate %80 Block
OpDecorate %70 DescriptorSet 0
OpDecorate %70 Binding 6
OpDecorate %70 NonReadable
OpDecorate %70 Aliased
OpDecorate %74 DescriptorSet 0
OpDecorate %74 Binding 6
OpDecorate %74 NonReadable
OpDecorate %74 Aliased
OpDecorate %78 DescriptorSet 0
OpDecorate %78 Binding 6
OpDecorate %78 NonReadable
OpDecorate %78 Aliased
OpDecorate %82 DescriptorSet 0
OpDecorate %82 Binding 6
OpDecorate %82 NonReadable
OpDecorate %82 Aliased
OpDecorate %83 ArrayStride 2
OpMemberDecorate %84 0 Offset 0
OpDecorate %84 Block
OpDecorate %88 ArrayStride 4
OpMemberDecorate %89 0 Offset 0
OpDecorate %89 Block
OpDecorate %87 DescriptorSet 1
OpDecorate %87 Binding 0
OpDecorate %87 Aliased
OpDecorate %92 DescriptorSet 1
OpDecorate %92 Binding 0
OpDecorate %92 Aliased
OpDecorate %95 BuiltIn GlobalInvocationId
OpDecorate %270 NonUniform
OpDecorate %272 NonUniform
OpDecorate %274 NonUniform
OpDecorate %276 NonUniform
OpDecorate %302 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 16 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 3
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeVector %5 4
%21 = OpTypeRuntimeArray %20
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %5
%26 = OpTypeStruct %25
%27 = OpTypePointer StorageBuffer %26
%28 = OpVariable %27 StorageBuffer
%29 = OpTypeRuntimeArray %10
%30 = OpTypeStruct %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpTypeRuntimeArray %15
%34 = OpTypeStruct %33
%35 = OpTypePointer StorageBuffer %34
%36 = OpVariable %35 StorageBuffer
%37 = OpTypeRuntimeArray %20
%38 = OpTypeStruct %37
%39 = OpTypePointer StorageBuffer %38
%40 = OpVariable %39 StorageBuffer
%41 = OpTypeInt 32 0
%42 = OpTypeRuntimeArray %41
%43 = OpTypeStruct %42
%44 = OpTypePointer StorageBuffer %43
%45 = OpVariable %44 StorageBuffer
%46 = OpTypeVector %41 4
%47 = OpTypeRuntimeArray %46
%48 = OpTypeStruct %47
%49 = OpTypePointer StorageBuffer %48
%50 = OpVariable %49 StorageBuffer
%51 = OpTypeRuntimeArray %5
%52 = OpTypeStruct %51
%53 = OpTypePointer StorageBuffer %52
%54 = OpVariable %53 StorageBuffer
%55 = OpTypeRuntimeArray %10
%56 = OpTypeStruct %55
%57 = OpTypePointer StorageBuffer %56
%58 = OpVariable %57 StorageBuffer
%59 = OpTypeRuntimeArray %15
%60 = OpTypeStruct %59
%61 = OpTypePointer StorageBuffer %60
%62 = OpVariable %61 StorageBuffer
%63 = OpTypeRuntimeArray %20
%64 = OpTypeStruct %63
%65 = OpTypePointer StorageBuffer %64
%66 = OpVariable %65 StorageBuffer
%67 = OpTypeRuntimeArray %5
%68 = OpTypeStruct %67
%69 = OpTypePointer StorageBuffer %68
%70 = OpVariable %69 StorageBuffer
%71 = OpTypeRuntimeArray %10
%72 = OpTypeStruct %71
%73 = OpTypePointer StorageBuffer %72
%74 = OpVariable %73 StorageBuffer
%75 = OpTypeRuntimeArray %15
%76 = OpTypeStruct %75
%77 = OpTypePointer StorageBuffer %76
%78 = OpVariable %77 StorageBuffer
%79 = OpTypeRuntimeArray %20
%80 = OpTypeStruct %79
%81 = OpTypePointer StorageBuffer %80
%82 = OpVariable %81 StorageBuffer
%83 = OpTypeRuntimeArray %5
%84 = OpTypeStruct %83
%85 = OpTypeRuntimeArray %84
%86 = OpTypePointer StorageBuffer %85
%87 = OpVariable %86 StorageBuffer
%88 = OpTypeRuntimeArray %10
%89 = OpTypeStruct %88
%90 = OpTypeRuntimeArray %89
%91 = OpTypePointer StorageBuffer %90
%92 = OpVariable %91 StorageBuffer
%93 = OpTypeVector %41 3
%94 = OpTypePointer Input %93
%95 = OpVariable %94 Input
%96 = OpTypePointer Input %41
%98 = OpConstant %41 0
%100 = OpTypePointer StorageBuffer %5
%103 = OpTypeFloat 16
%106 = OpConstant %103 0x1p+0
%109 = OpTypePointer StorageBuffer %10
%115 = OpConstant %5 2
%119 = OpTypePointer StorageBuffer %15
%122 = OpTypeVector %103 3
%128 = OpConstant %103 0x1.8p+1
%136 = OpTypePointer StorageBuffer %20
%144 = OpConstant %5 4
%151 = OpConstant %41 1
%155 = OpTypeFloat 32
%158 = OpConstant %41 2
%161 = OpTypeVector %103 2
%169 = OpConstant %41 6
%181 = OpConstant %41 3
%184 = OpTypeVector %103 4
%228 = OpConstant %103 0x1p+1
%230 = OpConstant %103 0x1p+2
%235 = OpConstant %103 0x1.4p+2
%237 = OpConstant %103 0x1.8p+2
%241 = OpConstant %103 0x1.cp+2
%243 = OpConstant %103 0x1p+3
%245 = OpConstant %103 0x1.2p+3
%247 = OpConstant %103 0x1.4p+3
%251 = OpConstant %5 1
%255 = OpConstant %5 5
%256 = OpConstant %5 6
%259 = OpConstant %5 7
%260 = OpConstant %5 8
%261 = OpConstant %5 9
%262 = OpConstant %5 10
%265 = OpTypePointer StorageBuffer %41
%271 = OpTypePointer StorageBuffer %84
%273 = OpTypePointer StorageBuffer %89
%308 = OpConstant %155 2
%314 = OpConstant %155 8
%326 = OpConstant %41 8
%334 = OpConstant %41 4
%336 = OpTypePointer StorageBuffer %46
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %338
%338 = OpLabel
%97 = OpAccessChain %96 %95 %98
%99 = OpLoad %41 %97
%101 = OpAccessChain %100 %9 %98 %99
%102 = OpLoad %5 %101
%104 = OpBitcast %103 %102
%105 = OpFAdd %103 %104 %106
%107 = OpBitcast %5 %105
%108 = OpAccessChain %100 %9 %98 %99
OpStore %108 %107
%110 = OpAccessChain %109 %14 %98 %99
%111 = OpLoad %10 %110
%112 = OpCompositeExtract %5 %111 0
%113 = OpCompositeExtract %5 %111 1
%114 = OpIAdd %5 %112 %115
%116 = OpIAdd %5 %113 %115
%117 = OpCompositeConstruct %10 %114 %116
%118 = OpAccessChain %109 %14 %98 %99
OpStore %118 %117
%120 = OpAccessChain %119 %19 %98 %99
%121 = OpLoad %15 %120
%123 = OpBitcast %122 %121
%124 = OpCompositeExtract %103 %123 0
%125 = OpCompositeExtract %103 %123 1
%126 = OpCompositeExtract %103 %123 2
%127 = OpFAdd %103 %124 %128
%129 = OpFAdd %103 %125 %128
%130 = OpFAdd %103 %126 %128
%131 = OpBitcast %5 %127
%132 = OpBitcast %5 %129
%133 = OpBitcast %5 %130
%134 = OpCompositeConstruct %15 %131 %132 %133
%135 = OpAccessChain %119 %19 %98 %99
OpStore %135 %134
%137 = OpAccessChain %136 %24 %98 %99
%138 = OpLoad %20 %137
%139 = OpCompositeExtract %5 %138 0
%140 = OpCompositeExtract %5 %138 1
%141 = OpCompositeExtract %5 %138 2
%142 = OpCompositeExtract %5 %138 3
%143 = OpIAdd %5 %139 %144
%145 = OpIAdd %5 %140 %144
%146 = OpIAdd %5 %141 %144
%147 = OpIAdd %5 %142 %144
%148 = OpCompositeConstruct %20 %143 %145 %146 %147
%149 = OpAccessChain %136 %24 %98 %99
OpStore %149 %148
%150 = OpShiftLeftLogical %41 %99 %151
%152 = OpAccessChain %100 %28 %98 %99
%153 = OpLoad %5 %152
%154 = OpBitcast %103 %153
%156 = OpFConvert %155 %154
%157 = OpShiftLeftLogical %41 %99 %158
%159 = OpAccessChain %109 %32 %98 %99
%160 = OpLoad %10 %159
%162 = OpBitcast %161 %160
%163 = OpCompositeExtract %103 %162 0
%164 = OpCompositeExtract %103 %162 1
%165 = OpFConvert %155 %163
%166 = OpFConvert %155 %164
%167 = OpFAdd %155 %166 %156
%168 = OpIMul %41 %99 %169
%170 = OpAccessChain %119 %36 %98 %99
%171 = OpLoad %15 %170
%172 = OpBitcast %122 %171
%173 = OpCompositeExtract %103 %172 0
%174 = OpCompositeExtract %103 %172 1
%175 = OpCompositeExtract %103 %172 2
%176 = OpFConvert %155 %173
%177 = OpFConvert %155 %174
%178 = OpFConvert %155 %175
%179 = OpFAdd %155 %167 %178
%180 = OpShiftLeftLogical %41 %99 %181
%182 = OpAccessChain %136 %40 %98 %99
%183 = OpLoad %20 %182
%185 = OpBitcast %184 %183
%186 = OpCompositeExtract %103 %185 0
%187 = OpCompositeExtract %103 %185 1
%188 = OpCompositeExtract %103 %185 2
%189 = OpCompositeExtract %103 %185 3
%190 = OpFConvert %155 %186
%191 = OpFConvert %155 %187
%192 = OpFConvert %155 %188
%193 = OpFConvert %155 %189
%194 = OpFAdd %155 %179 %192
%195 = OpAccessChain %100 %28 %98 %99
%196 = OpLoad %5 %195
%197 = OpConvertUToF %155 %196
%198 = OpFAdd %155 %194 %197
%199 = OpAccessChain %109 %32 %98 %99
%200 = OpLoad %10 %199
%201 = OpCompositeExtract %5 %200 0
%202 = OpCompositeExtract %5 %200 1
%203 = OpConvertUToF %155 %201
%204 = OpConvertUToF %155 %202
%205 = OpFAdd %155 %198 %204
%206 = OpAccessChain %119 %36 %98 %99
%207 = OpLoad %15 %206
%208 = OpCompositeExtract %5 %207 0
%209 = OpCompositeExtract %5 %207 1
%210 = OpCompositeExtract %5 %207 2
%211 = OpConvertUToF %155 %208
%212 = OpConvertUToF %155 %209
%213 = OpConvertUToF %155 %210
%214 = OpFAdd %155 %205 %213
%215 = OpAccessChain %136 %40 %98 %99
%216 = OpLoad %20 %215
%217 = OpCompositeExtract %5 %216 0
%218 = OpCompositeExtract %5 %216 1
%219 = OpCompositeExtract %5 %216 2
%220 = OpCompositeExtract %5 %216 3
%221 = OpConvertUToF %155 %217
%222 = OpConvertUToF %155 %218
%223 = OpConvertUToF %155 %219
%224 = OpConvertUToF %155 %220
%225 = OpFAdd %155 %214 %223
%226 = OpBitcast %5 %106
%227 = OpAccessChain %100 %54 %98 %99
OpStore %227 %226
%229 = OpBitcast %5 %228
%231 = OpBitcast %5 %230
%232 = OpCompositeConstruct %10 %229 %231
%233 = OpAccessChain %109 %58 %98 %99
OpStore %233 %232
%234 = OpBitcast %5 %230
%236 = OpBitcast %5 %235
%238 = OpBitcast %5 %237
%239 = OpCompositeConstruct %15 %234 %236 %238
%240 = OpAccessChain %119 %62 %98 %99
OpStore %240 %239
%242 = OpBitcast %5 %241
%244 = OpBitcast %5 %243
%246 = OpBitcast %5 %245
%248 = OpBitcast %5 %247
%249 = OpCompositeConstruct %20 %242 %244 %246 %248
%250 = OpAccessChain %136 %66 %98 %99
OpStore %250 %249
%252 = OpAccessChain %100 %70 %98 %99
OpStore %252 %251
%253 = OpCompositeConstruct %10 %115 %144
%254 = OpAccessChain %109 %74 %98 %99
OpStore %254 %253
%257 = OpCompositeConstruct %15 %144 %255 %256
%258 = OpAccessChain %119 %78 %98 %99
OpStore %258 %257
%263 = OpCompositeConstruct %20 %259 %260 %261 %262
%264 = OpAccessChain %136 %82 %98 %99
OpStore %264 %263
%266 = OpAccessChain %265 %45 %98 %99
%267 = OpLoad %41 %266
%268 = OpBitcast %155 %267
%269 = OpFAdd %155 %225 %268
%270 = OpIAdd %41 %99 %98
%272 = OpAccessChain %271 %87 %270
%274 = OpAccessChain %273 %92 %270
%275 = OpIMul %41 %99 %158
%276 = OpAccessChain %100 %272 %98 %275
%277 = OpLoad %5 %276
%278 = OpBitcast %103 %277
%279 = OpFConvert %155 %278
%280 = OpFAdd %155 %165 %156
%281 = OpFAdd %155 %280 %197
%282 = OpFAdd %155 %281 %203
%283 = OpFAdd %155 %282 %176
%284 = OpFAdd %155 %283 %190
%285 = OpFAdd %155 %284 %211
%286 = OpFAdd %155 %285 %221
%287 = OpFAdd %155 %286 %268
%288 = OpFAdd %155 %287 %279
%289 = OpFAdd %155 %282 %177
%290 = OpFAdd %155 %289 %191
%291 = OpFAdd %155 %290 %212
%292 = OpFAdd %155 %291 %222
%293 = OpFAdd %155 %292 %268
%294 = OpFAdd %155 %293 %279
%295 = OpFAdd %155 %269 %279
%296 = OpFConvert %103 %288
%297 = OpFConvert %103 %294
%298 = OpIMul %41 %99 %158
%299 = OpBitcast %5 %296
%300 = OpBitcast %5 %297
%301 = OpCompositeConstruct %10 %299 %300
%302 = OpAccessChain %109 %274 %98 %298
OpStore %302 %301
%303 = OpArrayLength %41 %9 0
%304 = OpUDiv %41 %303 %151
%305 = OpConvertUToF %155 %304
%306 = OpFAdd %155 %305 %288
%307 = OpFAdd %155 %294 %308
%309 = OpArrayLength %41 %24 0
%310 = OpUDiv %41 %309 %151
%311 = OpConvertUToF %155 %310
%312 = OpFAdd %155 %295 %311
%313 = OpFAdd %155 %156 %314
%315 = OpFAdd %155 %313 %166
%316 = OpFAdd %155 %315 %176
%317 = OpFAdd %155 %316 %193
%318 = OpFAdd %155 %317 %197
%319 = OpFAdd %155 %318 %204
%320 = OpFAdd %155 %319 %211
%321 = OpFAdd %155 %320 %224
%322 = OpFAdd %155 %321 %268
%323 = OpFAdd %155 %322 %279
%324 = OpArrayLength %41 %66 0
%325 = OpIMul %41 %324 %326
%327 = OpConvertUToF %155 %325
%328 = OpFAdd %155 %306 %327
%329 = OpConvertFToU %41 %328
%330 = OpConvertFToU %41 %307
%331 = OpConvertFToU %41 %312
%332 = OpConvertFToU %41 %323
%333 = OpShiftLeftLogical %41 %99 %334
%335 = OpCompositeConstruct %46 %329 %330 %331 %332
%337 = OpAccessChain %336 %50 %98 %99
OpStore %337 %335
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-64bit.ssbo.bindless.ssbo-align.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint64_t _m0[];
} _19[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _21_24
{
    uint64_t _m0[];
} _24[];

layout(set = 4, binding = 0, std430) writeonly buffer _26_29
{
    uint64_t _m0[];
} _29[];

layout(set = 4, binding = 0, std430) writeonly buffer _31_34
{
    uint64_t _m0[];
} _34[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _41 = registers._m4 + 1u;
    uvec2 _49 = _13._m0[subgroupBroadcastFirst(_41)] >> uvec2(3u);
    uvec2 _58 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(3u);
    uint _63 = registers._m1 + 1u;
    uvec2 _67 = _13._m0[subgroupBroadcastFirst(_63)] >> uvec2(3u);
    uvec2 _75 = _13._m0[subgroupBroadcastFirst(registers._m1)] >> uvec2(3u);
    uint _83 = gl_GlobalInvocationID.x * 6u;
    uint _90 = (_83 < _75.y) ? (_83 + _75.x) : 536870908u;
    u64vec3 _103 = u64vec3(_19[registers._m1]._m0[_90], _19[registers._m1]._m0[_90 + 1u], _19[registers._m1]._m0[_90 + 2u]);
    uint64_t _104 = _103.x;
    uint64_t _105 = _103.y;
    uint64_t _106 = _103.z;
    uint _109 = (gl_GlobalInvocationID.x * 6u) + 3u;
    uint _114 = (_109 < _75.y) ? (_109 + _75.x) : 536870908u;
    u64vec3 _123 = u64vec3(_19[registers._m1]._m0[_114], _19[registers._m1]._m0[_114 + 1u], _19[registers._m1]._m0[_114 + 2u]);
    uint64_t _124 = _123.x;
    uint64_t _125 = _123.y;
    uint64_t _126 = _123.z;
    uint _130 = gl_GlobalInvocationID.x << 3u;
    uint _131 = gl_GlobalInvocationID.x * 24u;
    uint _137 = (_131 < _58.y) ? (_131 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_137] = _124 + _104;
    _29[registers._m4]._m0[_137 + 1u] = _125 + _105;
    _29[registers._m4]._m0[_137 + 2u] = _126 + _106;
    uint _148 = (gl_GlobalInvocationID.x * 24u) + 3u;
    uint _153 = (_148 < _58.y) ? (_148 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_153] = _104 - _124;
    _29[registers._m4]._m0[_153 + 1u] = _105 - _125;
    _29[registers._m4]._m0[_153 + 2u] = _106 - _126;
    uint _164 = (gl_GlobalInvocationID.x * 24u) + 6u;
    uint _169 = (_164 < _58.y) ? (_164 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_169] = _124 * _104;
    _29[registers._m4]._m0[_169 + 1u] = _125 * _105;
    _29[registers._m4]._m0[_169 + 2u] = _126 * _106;
    uint _181 = (gl_GlobalInvocationID.x * 24u) + 9u;
    uint _186 = (_181 < _58.y) ? (_181 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_186] = _104 / _124;
    _29[registers._m4]._m0[_186 + 1u] = _105 / _125;
    _29[registers._m4]._m0[_186 + 2u] = _106 / _126;
    uint64_t _192 = _124 & 63ul;
    uint64_t _194 = _125 & 63ul;
    uint64_t _195 = _126 & 63ul;
    uint _202 = (gl_GlobalInvocationID.x * 24u) + 12u;
    uint _207 = (_202 < _58.y) ? (_202 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_207] = _104 << _192;
    _29[registers._m4]._m0[_207 + 1u] = _105 << _194;
    _29[registers._m4]._m0[_207 + 2u] = _106 << _195;
    uint _220 = (gl_GlobalInvocationID.x * 24u) + 15u;
    uint _225 = (_220 < _58.y) ? (_220 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_225] = _104 >> _192;
    _29[registers._m4]._m0[_225 + 1u] = _105 >> _194;
    _29[registers._m4]._m0[_225 + 2u] = _106 >> _195;
    uint _237 = (gl_GlobalInvocationID.x * 24u) + 18u;
    uint _242 = (_237 < _58.y) ? (_237 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_242] = uint64_t(int64_t(_104) >> int64_t(_192));
    _29[registers._m4]._m0[_242 + 1u] = uint64_t(int64_t(_105) >> int64_t(_194));
    _29[registers._m4]._m0[_242 + 2u] = uint64_t(int64_t(_106) >> int64_t(_195));
    uint _255 = (gl_GlobalInvocationID.x * 24u) + 21u;
    uint _260 = (_255 < _58.y) ? (_255 + _58.x) : 536870908u;
    _29[registers._m4]._m0[_260] = _124 & _104;
    _29[registers._m4]._m0[_260 + 1u] = _125 & _105;
    _29[registers._m4]._m0[_260 + 2u] = _126 & _106;
    uint _267 = gl_GlobalInvocationID.x * 3u;
    uint _272 = (_267 < _67.y) ? (_267 + _67.x) : 536870908u;
    u64vec2 _279 = u64vec2(_24[_63]._m0[_272], _24[_63]._m0[_272 + 1u]);
    uint _286 = (gl_GlobalInvocationID.x < _49.y) ? (gl_GlobalInvocationID.x + _49.x) : 536870908u;
    _34[_41]._m0[_286] = _279.x;
    _34[_41]._m0[_286 + 1u] = _279.y;
    _34[_41]._m0[_286 + 2u] = _106;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 294
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %78
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 8
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 8
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %25 ArrayStride 8
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %29 NonReadable
OpDecorate %30 ArrayStride 8
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 4
OpDecorate %34 Binding 0
OpDecorate %34 NonReadable
OpDecorate %78 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 64 0
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %14
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %14
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %14
%31 = OpTypeStruct %30
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypePointer StorageBuffer %31
%37 = OpTypePointer PushConstant %5
%39 = OpConstant %5 4
%42 = OpConstant %5 1
%44 = OpConstant %5 3
%45 = OpTypePointer StorageBuffer %9
%47 = OpConstant %5 0
%50 = OpConstantComposite %9 %44 %44
%51 = OpTypePointer StorageBuffer %26
%59 = OpTypePointer StorageBuffer %21
%68 = OpTypePointer StorageBuffer %16
%76 = OpTypeVector %5 3
%77 = OpTypePointer Input %76
%78 = OpVariable %77 Input
%79 = OpTypePointer Input %5
%84 = OpConstant %5 6
%88 = OpTypeBool
%91 = OpConstant %5 536870908
%92 = OpTypePointer StorageBuffer %14
%100 = OpConstant %5 2
%102 = OpTypeVector %14 3
%132 = OpConstant %5 24
%180 = OpConstant %5 9
%193 = OpConstant %14 63
%201 = OpConstant %5 12
%217 = OpConstant %5 5
%219 = OpConstant %5 15
%236 = OpConstant %5 18
%252 = OpConstant %5 7
%254 = OpConstant %5 21
%278 = OpTypeVector %14 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %292
%292 = OpLabel
%38 = OpAccessChain %37 %8 %39
%40 = OpLoad %5 %38
%41 = OpIAdd %5 %40 %42
%36 = OpAccessChain %35 %34 %41
%43 = OpGroupNonUniformBroadcastFirst %5 %44 %41
%46 = OpAccessChain %45 %13 %47 %43
%48 = OpLoad %9 %46
%49 = OpShiftRightLogical %9 %48 %50
%53 = OpAccessChain %37 %8 %39
%54 = OpLoad %5 %53
%52 = OpAccessChain %51 %29 %54
%55 = OpGroupNonUniformBroadcastFirst %5 %44 %54
%56 = OpAccessChain %45 %13 %47 %55
%57 = OpLoad %9 %56
%58 = OpShiftRightLogical %9 %57 %50
%61 = OpAccessChain %37 %8 %42
%62 = OpLoad %5 %61
%63 = OpIAdd %5 %62 %42
%60 = OpAccessChain %59 %24 %63
%64 = OpGroupNonUniformBroadcastFirst %5 %44 %63
%65 = OpAccessChain %45 %13 %47 %64
%66 = OpLoad %9 %65
%67 = OpShiftRightLogical %9 %66 %50
%70 = OpAccessChain %37 %8 %42
%71 = OpLoad %5 %70
%69 = OpAccessChain %68 %19 %71
%72 = OpGroupNonUniformBroadcastFirst %5 %44 %71
%73 = OpAccessChain %45 %13 %47 %72
%74 = OpLoad %9 %73
%75 = OpShiftRightLogical %9 %74 %50
%80 = OpAccessChain %79 %78 %47
%81 = OpLoad %5 %80
%82 = OpShiftLeftLogical %5 %81 %42
%83 = OpIMul %5 %81 %84
%85 = OpCompositeExtract %5 %75 0
%86 = OpCompositeExtract %5 %75 1
%87 = OpIAdd %5 %83 %85
%89 = OpULessThan %88 %83 %86
%90 = OpSelect %5 %89 %87 %91
%93 = OpAccessChain %92 %69 %47 %90
%94 = OpLoad %14 %93
%96 = OpIAdd %5 %90 %42
%95 = OpAccessChain %92 %69 %47 %96
%97 = OpLoad %14 %95
%99 = OpIAdd %5 %90 %100
%98 = OpAccessChain %92 %69 %47 %99
%101 = OpLoad %14 %98
%103 = OpCompositeConstruct %102 %94 %97 %101
%104 = OpCompositeExtract %14 %103 0
%105 = OpCompositeExtract %14 %103 1
%106 = OpCompositeExtract %14 %103 2
%107 = OpBitwiseOr %5 %82 %42
%108 = OpIMul %5 %81 %84
%109 = OpIAdd %5 %108 %44
%110 = OpCompositeExtract %5 %75 0
%111 = OpCompositeExtract %5 %75 1
%112 = OpIAdd %5 %109 %110
%113 = OpULessThan %88 %109 %111
%114 = OpSelect %5 %113 %112 %91
%115 = OpAccessChain %92 %69 %47 %114
%116 = OpLoad %14 %115
%118 = OpIAdd %5 %114 %42
%117 = OpAccessChain %92 %69 %47 %118
%119 = OpLoad %14 %117
%121 = OpIAdd %5 %114 %100
%120 = OpAccessChain %92 %69 %47 %121
%122 = OpLoad %14 %120
%123 = OpCompositeConstruct %102 %116 %119 %122
%124 = OpCompositeExtract %14 %123 0
%125 = OpCompositeExtract %14 %123 1
%126 = OpCompositeExtract %14 %123 2
%127 = OpIAdd %14 %124 %104
%128 = OpIAdd %14 %125 %105
%129 = OpIAdd %14 %126 %106
%130 = OpShiftLeftLogical %5 %81 %44
%131 = OpIMul %5 %81 %132
%133 = OpCompositeExtract %5 %58 0
%134 = OpCompositeExtract %5 %58 1
%135 = OpIAdd %5 %131 %133
%136 = OpULessThan %88 %131 %134
%137 = OpSelect %5 %136 %135 %91
%138 = OpAccessChain %92 %52 %47 %137
OpStore %138 %127
%140 = OpIAdd %5 %137 %42
%139 = OpAccessChain %92 %52 %47 %140
OpStore %139 %128
%142 = OpIAdd %5 %137 %100
%141 = OpAccessChain %92 %52 %47 %142
OpStore %141 %129
%143 = OpISub %14 %104 %124
%144 = OpISub %14 %105 %125
%145 = OpISub %14 %106 %126
%146 = OpBitwiseOr %5 %130 %42
%147 = OpIMul %5 %81 %132
%148 = OpIAdd %5 %147 %44
%149 = OpCompositeExtract %5 %58 0
%150 = OpCompositeExtract %5 %58 1
%151 = OpIAdd %5 %148 %149
%152 = OpULessThan %88 %148 %150
%153 = OpSelect %5 %152 %151 %91
%154 = OpAccessChain %92 %52 %47 %153
OpStore %154 %143
%156 = OpIAdd %5 %153 %42
%155 = OpAccessChain %92 %52 %47 %156
OpStore %155 %144
%158 = OpIAdd %5 %153 %100
%157 = OpAccessChain %92 %52 %47 %158
OpStore %157 %145
%159 = OpIMul %14 %124 %104
%160 = OpIMul %14 %125 %105
%161 = OpIMul %14 %126 %106
%162 = OpBitwiseOr %5 %130 %100
%163 = OpIMul %5 %81 %132
%164 = OpIAdd %5 %163 %84
%165 = OpCompositeExtract %5 %58 0
%166 = OpCompositeExtract %5 %58 1
%167 = OpIAdd %5 %164 %165
%168 = OpULessThan %88 %164 %166
%169 = OpSelect %5 %168 %167 %91
%170 = OpAccessChain %92 %52 %47 %169
OpStore %170 %159
%172 = OpIAdd %5 %169 %42
%171 = OpAccessChain %92 %52 %47 %172
OpStore %171 %160
%174 = OpIAdd %5 %169 %100
%173 = OpAccessChain %92 %52 %47 %174
OpStore %173 %161
%175 = OpUDiv %14 %104 %124
%176 = OpUDiv %14 %105 %125
%177 = OpUDiv %14 %106 %126
%178 = OpBitwiseOr %5 %130 %44
%179 = OpIMul %5 %81 %132
%181 = OpIAdd %5 %179 %180
%182 = OpCompositeExtract %5 %58 0
%183 = OpCompositeExtract %5 %58 1
%184 = OpIAdd %5 %181 %182
%185 = OpULessThan %88 %181 %183
%186 = OpSelect %5 %185 %184 %91
%187 = OpAccessChain %92 %52 %47 %186
OpStore %187 %175
%189 = OpIAdd %5 %186 %42
%188 = OpAccessChain %92 %52 %47 %189
OpStore %188 %176
%191 = OpIAdd %5 %186 %100
%190 = OpAccessChain %92 %52 %47 %191
OpStore %190 %177
%192 = OpBitwiseAnd %14 %124 %193
%194 = OpBitwiseAnd %14 %125 %193
%195 = OpBitwiseAnd %14 %126 %193
%196 = OpShiftLeftLogical %14 %104 %192
%197 = OpShiftLeftLogical %14 %105 %194
%198 = OpShiftLeftLogical %14 %106 %195
%199 = OpBitwiseOr %5 %130 %39
%200 = OpIMul %5 %81 %132
%202 = OpIAdd %5 %200 %201
%203 = OpCompositeExtract %5 %58 0
%204 = OpCompositeExtract %5 %58 1
%205 = OpIAdd %5 %202 %203
%206 = OpULessThan %88 %202 %204
%207 = OpSelect %5 %206 %205 %91
%208 = OpAccessChain %92 %52 %47 %207
OpStore %208 %196
%210 = OpIAdd %5 %207 %42
%209 = OpAccessChain %92 %52 %47 %210
OpStore %209 %197
%212 = OpIAdd %5 %207 %100
%211 = OpAccessChain %92 %52 %47 %212
OpStore %211 %198
%213 = OpShiftRightLogical %14 %104 %192
%214 = OpShiftRightLogical %14 %105 %194
%215 = OpShiftRightLogical %14 %106 %195
%216 = OpBitwiseOr %5 %130 %217
%218 = OpIMul %5 %81 %132
%220 = OpIAdd %5 %218 %219
%221 = OpCompositeExtract %5 %58 0
%222 = OpCompositeExtract %5 %58 1
%223 = OpIAdd %5 %220 %221
%224 = OpULessThan %88 %220 %222
%225 = OpSelect %5 %224 %223 %91
%226 = OpAccessChain %92 %52 %47 %225
OpStore %226 %213
%228 = OpIAdd %5 %225 %42
%227 = OpAccessChain %92 %52 %47 %228
OpStore %227 %214
%230 = OpIAdd %5 %225 %100
%229 = OpAccessChain %92 %52 %47 %230
OpStore %229 %215
%231 = OpShiftRightArithmetic %14 %104 %192
%232 = OpShiftRightArithmetic %14 %105 %194
%233 = OpShiftRightArithmetic %14 %106 %195
%234 = OpBitwiseOr %5 %130 %84
%235 = OpIMul %5 %81 %132
%237 = OpIAdd %5 %235 %236
%238 = OpCompositeExtract %5 %58 0
%239 = OpCompositeExtract %5 %58 1
%240 = OpIAdd %5 %237 %238
%241 = OpULessThan %88 %237 %239
%242 = OpSelect %5 %241 %240 %91
%243 = OpAccessChain %92 %52 %47 %242
OpStore %243 %231
%245 = OpIAdd %5 %242 %42
%244 = OpAccessChain %92 %52 %47 %245
OpStore %244 %232
%247 = OpIAdd %5 %242 %100
%246 = OpAccessChain %92 %52 %47 %247
OpStore %246 %233
%248 = OpBitwiseAnd %14 %124 %104
%249 = OpBitwiseAnd %14 %125 %105
%250 = OpBitwiseAnd %14 %126 %106
%251 = OpBitwiseOr %5 %130 %252
%253 = OpIMul %5 %81 %132
%255 = OpIAdd %5 %253 %254
%256 = OpCompositeExtract %5 %58 0
%257 = OpCompositeExtract %5 %58 1
%258 = OpIAdd %5 %255 %256
%259 = OpULessThan %88 %255 %257
%260 = OpSelect %5 %259 %258 %91
%261 = OpAccessChain %92 %52 %47 %260
OpStore %261 %248
%263 = OpIAdd %5 %260 %42
%262 = OpAccessChain %92 %52 %47 %263
OpStore %262 %249
%265 = OpIAdd %5 %260 %100
%264 = OpAccessChain %92 %52 %47 %265
OpStore %264 %250
%266 = OpIMul %5 %81 %132
%267 = OpIMul %5 %81 %44
%268 = OpCompositeExtract %5 %67 0
%269 = OpCompositeExtract %5 %67 1
%270 = OpIAdd %5 %267 %268
%271 = OpULessThan %88 %267 %269
%272 = OpSelect %5 %271 %270 %91
%273 = OpAccessChain %92 %60 %47 %272
%274 = OpLoad %14 %273
%276 = OpIAdd %5 %272 %42
%275 = OpAccessChain %92 %60 %47 %276
%277 = OpLoad %14 %275
%279 = OpCompositeConstruct %278 %274 %277
%280 = OpCompositeExtract %14 %279 0
%281 = OpCompositeExtract %14 %279 1
%282 = OpCompositeExtract %5 %49 0
%283 = OpCompositeExtract %5 %49 1
%284 = OpIAdd %5 %81 %282
%285 = OpULessThan %88 %81 %283
%286 = OpSelect %5 %285 %284 %91
%287 = OpAccessChain %92 %36 %47 %286
OpStore %287 %280
%289 = OpIAdd %5 %286 %42
%288 = OpAccessChain %92 %36 %47 %289
OpStore %288 %281
%291 = OpIAdd %5 %286 %100
%290 = OpAccessChain %92 %36 %47 %291
OpStore %290 %106
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-64bit.ssbo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) restrict readonly buffer SSBO
{
    u64vec3 _m0[];
} _11;

layout(set = 0, binding = 1, std430) restrict readonly buffer _13_15
{
    uint64_t _m0[];
} _15;

layout(set = 0, binding = 0, scalar) writeonly buffer _17_19
{
    u64vec3 _m0[];
} _19;

layout(set = 0, binding = 1, std430) writeonly buffer _21_23
{
    uint64_t _m0[];
} _23;

void main()
{
    uint _33 = gl_GlobalInvocationID.x * 2u;
    uint _43 = (gl_GlobalInvocationID.x * 2u) + 1u;
    uint _52 = gl_GlobalInvocationID.x << 3u;
    _19._m0[gl_GlobalInvocationID.x * 8u] = u64vec3(_11._m0[_43].x + _11._m0[_33].x, _11._m0[_43].y + _11._m0[_33].y, _11._m0[_43].z + _11._m0[_33].z);
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 1u] = u64vec3(_11._m0[_33].x - _11._m0[_43].x, _11._m0[_33].y - _11._m0[_43].y, _11._m0[_33].z - _11._m0[_43].z);
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 2u] = u64vec3(_11._m0[_43].x * _11._m0[_33].x, _11._m0[_43].y * _11._m0[_33].y, _11._m0[_43].z * _11._m0[_33].z);
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 3u] = u64vec3(_11._m0[_33].x / _11._m0[_43].x, _11._m0[_33].y / _11._m0[_43].y, _11._m0[_33].z / _11._m0[_43].z);
    uint64_t _82 = _11._m0[_43].x & 63ul;
    uint64_t _84 = _11._m0[_43].y & 63ul;
    uint64_t _85 = _11._m0[_43].z & 63ul;
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 4u] = u64vec3(_11._m0[_33].x << _82, _11._m0[_33].y << _84, _11._m0[_33].z << _85);
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 5u] = u64vec3(_11._m0[_33].x >> _82, _11._m0[_33].y >> _84, _11._m0[_33].z >> _85);
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 6u] = u64vec3(uint64_t(int64_t(_11._m0[_33].x) >> int64_t(_82)), uint64_t(int64_t(_11._m0[_33].y) >> int64_t(_84)), uint64_t(int64_t(_11._m0[_33].z) >> int64_t(_85)));
    _19._m0[(gl_GlobalInvocationID.x * 8u) + 7u] = u64vec3(_11._m0[_43].x & _11._m0[_33].x, _11._m0[_43].y & _11._m0[_33].y, _11._m0[_43].z & _11._m0[_33].z);
    uint _124 = gl_GlobalInvocationID.x * 3u;
    u64vec2 _132 = u64vec2(_15._m0[_124], _15._m0[_124 + 1u]);
    _23._m0[gl_GlobalInvocationID.x] = _132.x;
    _23._m0[gl_GlobalInvocationID.x + 1u] = _132.y;
    _23._m0[gl_GlobalInvocationID.x + 2u] = _11._m0[_33].z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 142
; Schema: 0
OpCapability Shader
OpCapability Int64
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %26
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %9 "SSBO"
OpName %13 "SSBO"
OpName %17 "SSBO"
OpName %21 "SSBO"
OpDecorate %8 ArrayStride 24
OpMemberDecorate %9 0 Offset 0
OpDecorate %9 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonWritable
OpDecorate %11 Restrict
OpDecorate %12 ArrayStride 8
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 1
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %16 ArrayStride 24
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %19 NonReadable
OpDecorate %20 ArrayStride 8
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 1
OpDecorate %23 NonReadable
OpDecorate %26 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeInt 64 0
%7 = OpTypeVector %6 3
%8 = OpTypeRuntimeArray %7
%9 = OpTypeStruct %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %6
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeRuntimeArray %7
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %6
%21 = OpTypeStruct %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypeVector %5 3
%25 = OpTypePointer Input %24
%26 = OpVariable %25 Input
%27 = OpTypePointer Input %5
%29 = OpConstant %5 0
%32 = OpConstant %5 1
%34 = OpConstant %5 2
%35 = OpTypePointer StorageBuffer %7
%53 = OpConstant %5 3
%55 = OpConstant %5 8
%83 = OpConstant %6 63
%90 = OpConstant %5 4
%99 = OpConstant %5 5
%108 = OpConstant %5 6
%117 = OpConstant %5 7
%123 = OpConstant %5 24
%125 = OpTypePointer StorageBuffer %6
%131 = OpTypeVector %6 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %140
%140 = OpLabel
%28 = OpAccessChain %27 %26 %29
%30 = OpLoad %5 %28
%31 = OpShiftLeftLogical %5 %30 %32
%33 = OpIMul %5 %30 %34
%36 = OpAccessChain %35 %11 %29 %33
%37 = OpLoad %7 %36
%38 = OpCompositeExtract %6 %37 0
%39 = OpCompositeExtract %6 %37 1
%40 = OpCompositeExtract %6 %37 2
%41 = OpBitwiseOr %5 %31 %32
%42 = OpIMul %5 %30 %34
%43 = OpIAdd %5 %42 %32
%44 = OpAccessChain %35 %11 %29 %43
%45 = OpLoad %7 %44
%46 = OpCompositeExtract %6 %45 0
%47 = OpCompositeExtract %6 %45 1
%48 = OpCompositeExtract %6 %45 2
%49 = OpIAdd %6 %46 %38
%50 = OpIAdd %6 %47 %39
%51 = OpIAdd %6 %48 %40
%52 = OpShiftLeftLogical %5 %30 %53
%54 = OpIMul %5 %30 %55
%56 = OpCompositeConstruct %7 %49 %50 %51
%57 = OpAccessChain %35 %19 %29 %54
OpStore %57 %56
%58 = OpISub %6 %38 %46
%59 = OpISub %6 %39 %47
%60 = OpISub %6 %40 %48
%61 = OpBitwiseOr %5 %52 %32
%62 = OpIMul %5 %30 %55
%63 = OpIAdd %5 %62 %32
%64 = OpCompositeConstruct %7 %58 %59 %60
%65 = OpAccessChain %35 %19 %29 %63
OpStore %65 %64
%66 = OpIMul %6 %46 %38
%67 = OpIMul %6 %47 %39
%68 = OpIMul %6 %48 %40
%69 = OpBitwiseOr %5 %52 %34
%70 = OpIMul %5 %30 %55
%71 = OpIAdd %5 %70 %34
%72 = OpCompositeConstruct %7 %66 %67 %68
%73 = OpAccessChain %35 %19 %29 %71
OpStore %73 %72
%74 = OpUDiv %6 %38 %46
%75 = OpUDiv %6 %39 %47
%76 = OpUDiv %6 %40 %48
%77 = OpBitwiseOr %5 %52 %53
%78 = OpIMul %5 %30 %55
%79 = OpIAdd %5 %78 %53
%80 = OpCompositeConstruct %7 %74 %75 %76
%81 = OpAccessChain %35 %19 %29 %79
OpStore %81 %80
%82 = OpBitwiseAnd %6 %46 %83
%84 = OpBitwiseAnd %6 %47 %83
%85 = OpBitwiseAnd %6 %48 %83
%86 = OpShiftLeftLogical %6 %38 %82
%87 = OpShiftLeftLogical %6 %39 %84
%88 = OpShiftLeftLogical %6 %40 %85
%89 = OpBitwiseOr %5 %52 %90
%91 = OpIMul %5 %30 %55
%92 = OpIAdd %5 %91 %90
%93 = OpCompositeConstruct %7 %86 %87 %88
%94 = OpAccessChain %35 %19 %29 %92
OpStore %94 %93
%95 = OpShiftRightLogical %6 %38 %82
%96 = OpShiftRightLogical %6 %39 %84
%97 = OpShiftRightLogical %6 %40 %85
%98 = OpBitwiseOr %5 %52 %99
%100 = OpIMul %5 %30 %55
%101 = OpIAdd %5 %100 %99
%102 = OpCompositeConstruct %7 %95 %96 %97
%103 = OpAccessChain %35 %19 %29 %101
OpStore %103 %102
%104 = OpShiftRightArithmetic %6 %38 %82
%105 = OpShiftRightArithmetic %6 %39 %84
%106 = OpShiftRightArithmetic %6 %40 %85
%107 = OpBitwiseOr %5 %52 %108
%109 = OpIMul %5 %30 %55
%110 = OpIAdd %5 %109 %108
%111 = OpCompositeConstruct %7 %104 %105 %106
%112 = OpAccessChain %35 %19 %29 %110
OpStore %112 %111
%113 = OpBitwiseAnd %6 %46 %38
%114 = OpBitwiseAnd %6 %47 %39
%115 = OpBitwiseAnd %6 %48 %40
%116 = OpBitwiseOr %5 %52 %117
%118 = OpIMul %5 %30 %55
%119 = OpIAdd %5 %118 %117
%120 = OpCompositeConstruct %7 %113 %114 %115
%121 = OpAccessChain %35 %19 %29 %119
OpStore %121 %120
%122 = OpIMul %5 %30 %123
%124 = OpIMul %5 %30 %53
%126 = OpAccessChain %125 %15 %29 %124
%127 = OpLoad %6 %126
%129 = OpIAdd %5 %124 %32
%128 = OpAccessChain %125 %15 %29 %129
%130 = OpLoad %6 %128
%132 = OpCompositeConstruct %131 %127 %130
%133 = OpCompositeExtract %6 %132 0
%134 = OpCompositeExtract %6 %132 1
%135 = OpAccessChain %125 %23 %29 %30
OpStore %135 %133
%137 = OpIAdd %5 %30 %32
%136 = OpAccessChain %125 %23 %29 %137
OpStore %136 %134
%139 = OpIAdd %5 %30 %34
%138 = OpAccessChain %125 %23 %29 %139
OpStore %138 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-alignment-fixup.bindless.root-constant.offset-layout.typed-buffer-offset.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 1, binding = 0) uniform usamplerBuffer _17[];
layout(set = 1, binding = 0) uniform usamplerBuffer _20[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _24[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _27[];
layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _30[];
layout(set = 4, binding = 0) uniform writeonly uimageBuffer _34[];

void main()
{
    uint _46 = (subgroupBroadcastFirst(registers._m4) * 2u) + 1u;
    uint _59 = (subgroupBroadcastFirst(registers._m4) * 2u) + 1u;
    uint _65 = registers._m4 + 1u;
    uint _69 = (subgroupBroadcastFirst(_65) * 2u) + 1u;
    uint _78 = (subgroupBroadcastFirst(registers._m4) * 2u) + 1u;
    uint _85 = registers._m1 + 1u;
    uint _89 = (subgroupBroadcastFirst(_85) * 2u) + 1u;
    uint _98 = (subgroupBroadcastFirst(registers._m1) * 2u) + 1u;
    uint _110 = gl_GlobalInvocationID.x * 2u;
    uint _116 = (_110 < _13._m0[_98].y) ? (_110 + _13._m0[_98].x) : 1073741820u;
    vec2 _127 = uintBitsToFloat(uvec2(texelFetch(_17[registers._m1], int(_116)).x, texelFetch(_17[registers._m1], int(_116 + 1u)).x));
    uint _130 = gl_GlobalInvocationID.x * 2u;
    uint _135 = (_130 < _13._m0[_78].y) ? (_130 + _13._m0[_78].x) : 1073741820u;
    imageStore(_24[registers._m4], int(_135), uvec4(floatBitsToUint(_127.x)));
    imageStore(_24[registers._m4], int(_135 + 1u), uvec4(floatBitsToUint(_127.y)));
    uint _142 = gl_GlobalInvocationID.x * 4u;
    uint _147 = (_142 < _13._m0[_89].y) ? (_142 + _13._m0[_89].x) : 1073741820u;
    uvec4 _159 = uvec4(texelFetch(_20[_85], int(_147)).x, texelFetch(_20[_85], int(_147 + 1u)).x, texelFetch(_20[_85], int(_147 + 2u)).x, texelFetch(_20[_85], int(_147 + 3u)).x);
    uint _164 = gl_GlobalInvocationID.x * 4u;
    uint _169 = (_164 < _13._m0[_69].y) ? (_164 + _13._m0[_69].x) : 1073741820u;
    imageStore(_27[_65], int(_169), uvec4(_159.x));
    imageStore(_27[_65], int(_169 + 1u), uvec4(_159.y));
    imageStore(_27[_65], int(_169 + 2u), uvec4(_159.z));
    imageStore(_27[_65], int(_169 + 3u), uvec4(_159.w));
    uint _177 = gl_GlobalInvocationID.x * 2u;
    uint _182 = (_177 < _13._m0[_98].y) ? (_177 + _13._m0[_98].x) : 1073741820u;
    vec2 _189 = uintBitsToFloat(uvec2(texelFetch(_17[registers._m1], int(_182)).x, texelFetch(_17[registers._m1], int(_182 + 1u)).x));
    uint _192 = gl_GlobalInvocationID.x * 2u;
    uint _197 = (_192 < _13._m0[_78].y) ? (_192 + _13._m0[_78].x) : 1073741820u;
    imageStore(_24[registers._m4], int(_197), uvec4(floatBitsToUint(_189.x)));
    imageStore(_24[registers._m4], int(_197 + 1u), uvec4(floatBitsToUint(_189.y)));
    uint _203 = gl_GlobalInvocationID.x * 4u;
    uint _208 = (_203 < _13._m0[_89].y) ? (_203 + _13._m0[_89].x) : 1073741820u;
    uvec4 _220 = uvec4(texelFetch(_20[_85], int(_208)).x, texelFetch(_20[_85], int(_208 + 1u)).x, texelFetch(_20[_85], int(_208 + 2u)).x, texelFetch(_20[_85], int(_208 + 3u)).x);
    uint _225 = gl_GlobalInvocationID.x * 4u;
    uint _230 = (_225 < _13._m0[_69].y) ? (_225 + _13._m0[_69].x) : 1073741820u;
    imageStore(_27[_65], int(_230), uvec4(_220.x));
    imageStore(_27[_65], int(_230 + 1u), uvec4(_220.y));
    imageStore(_27[_65], int(_230 + 2u), uvec4(_220.z));
    imageStore(_27[_65], int(_230 + 3u), uvec4(_220.w));
    uint _242 = registers._m1 + (gl_WorkGroupID.x + 2u);
    uint _246 = (subgroupBroadcastFirst(_242) * 2u) + 1u;
    uint _249 = gl_GlobalInvocationID.x * 2u;
    uint _254 = (_249 < _13._m0[_246].y) ? (_249 + _13._m0[_246].x) : 1073741820u;
    vec2 _261 = uintBitsToFloat(uvec2(texelFetch(_17[_242], int(_254)).x, texelFetch(_17[_242], int(_254 + 1u)).x));
    uint _268 = registers._m4 + (gl_WorkGroupID.x + 0u);
    uint _272 = (subgroupBroadcastFirst(_268) * 2u) + 1u;
    uint _275 = gl_GlobalInvocationID.x * 2u;
    uint _280 = (_275 < _13._m0[_272].y) ? (_275 + _13._m0[_272].x) : 1073741820u;
    imageStore(_24[_268], int(_280), uvec4(floatBitsToUint(_261.x)));
    imageStore(_24[_268], int(_280 + 1u), uvec4(floatBitsToUint(_261.y)));
    uint _290 = registers._m1 + (gl_GlobalInvocationID.x + 2u);
    uint _293 = (_290 * 2u) + 1u;
    uint _296 = gl_GlobalInvocationID.x * 2u;
    uint _301 = (_296 < _13._m0[_293].y) ? (_296 + _13._m0[_293].x) : 1073741820u;
    vec2 _308 = uintBitsToFloat(uvec2(texelFetch(_17[nonuniformEXT(_290)], int(_301)).x, texelFetch(_17[nonuniformEXT(_290)], int(_301 + 1u)).x));
    uint _315 = registers._m4 + (gl_GlobalInvocationID.x + 0u);
    uint _318 = (_315 * 2u) + 1u;
    uint _321 = gl_GlobalInvocationID.x * 2u;
    uint _326 = (_321 < _13._m0[_318].y) ? (_321 + _13._m0[_318].x) : 1073741820u;
    imageStore(_24[nonuniformEXT(_315)], int(_326), uvec4(floatBitsToUint(_308.x)));
    imageStore(_24[nonuniformEXT(_315)], int(_326 + 1u), uvec4(floatBitsToUint(_308.y)));
    uint _336 = registers._m1 + (gl_WorkGroupID.x + 3u);
    uint _340 = (subgroupBroadcastFirst(_336) * 2u) + 1u;
    uint _343 = gl_GlobalInvocationID.x * 4u;
    uint _348 = (_343 < _13._m0[_340].y) ? (_343 + _13._m0[_340].x) : 1073741820u;
    uvec4 _360 = uvec4(texelFetch(_20[_336], int(_348)).x, texelFetch(_20[_336], int(_348 + 1u)).x, texelFetch(_20[_336], int(_348 + 2u)).x, texelFetch(_20[_336], int(_348 + 3u)).x);
    uint _369 = registers._m4 + (gl_WorkGroupID.x + 1u);
    uint _373 = (subgroupBroadcastFirst(_369) * 2u) + 1u;
    uint _376 = gl_GlobalInvocationID.x * 4u;
    uint _381 = (_376 < _13._m0[_373].y) ? (_376 + _13._m0[_373].x) : 1073741820u;
    imageStore(_27[_369], int(_381), uvec4(_360.x));
    imageStore(_27[_369], int(_381 + 1u), uvec4(_360.y));
    imageStore(_27[_369], int(_381 + 2u), uvec4(_360.z));
    imageStore(_27[_369], int(_381 + 3u), uvec4(_360.w));
    uint _393 = registers._m1 + (gl_GlobalInvocationID.x + 3u);
    uint _396 = (_393 * 2u) + 1u;
    uint _399 = gl_GlobalInvocationID.x * 4u;
    uint _404 = (_399 < _13._m0[_396].y) ? (_399 + _13._m0[_396].x) : 1073741820u;
    uvec4 _416 = uvec4(texelFetch(_20[nonuniformEXT(_393)], int(_404)).x, texelFetch(_20[nonuniformEXT(_393)], int(_404 + 1u)).x, texelFetch(_20[nonuniformEXT(_393)], int(_404 + 2u)).x, texelFetch(_20[nonuniformEXT(_393)], int(_404 + 3u)).x);
    uint _425 = registers._m4 + (gl_GlobalInvocationID.x + 1u);
    uint _428 = (_425 * 2u) + 1u;
    uint _431 = gl_GlobalInvocationID.x * 4u;
    uint _436 = (_431 < _13._m0[_428].y) ? (_431 + _13._m0[_428].x) : 1073741820u;
    imageStore(_27[nonuniformEXT(_425)], int(_436), uvec4(_416.x));
    imageStore(_27[nonuniformEXT(_425)], int(_436 + 1u), uvec4(_416.y));
    imageStore(_27[nonuniformEXT(_425)], int(_436 + 2u), uvec4(_416.z));
    imageStore(_27[nonuniformEXT(_425)], int(_436 + 3u), uvec4(_416.w));
    uint _445 = (gl_GlobalInvocationID.x * 2u) + 1u;
    uint _453 = imageAtomicAdd(_30[registers._m4], int((_445 < _13._m0[_59].y) ? (_445 + _13._m0[_59].x) : 1073741820u), 1u);
    uint _456 = gl_GlobalInvocationID.x * 2u;
    uint _463 = imageAtomicAdd(_30[registers._m4], int((_456 < _13._m0[_59].y) ? (_456 + _13._m0[_59].x) : 1073741820u), _13._m0[_78].y / 2u);
    uint _464 = gl_GlobalInvocationID.x << 2u;
    imageStore(_34[registers._m4], int((gl_GlobalInvocationID.x < _13._m0[_46].y) ? (gl_GlobalInvocationID.x + _13._m0[_46].x) : 4294967295u), uvec4(_464, _464 | 1u, _464 | 2u, _464 | 3u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 477
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %103 %107
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 0
OpDecorate %24 DescriptorSet 4
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
OpDecorate %27 DescriptorSet 4
OpDecorate %27 Binding 0
OpDecorate %27 NonReadable
OpDecorate %30 DescriptorSet 4
OpDecorate %30 Binding 0
OpDecorate %34 DescriptorSet 4
OpDecorate %34 Binding 0
OpDecorate %34 NonReadable
OpDecorate %103 BuiltIn GlobalInvocationId
OpDecorate %107 BuiltIn WorkgroupId
OpDecorate %290 NonUniform
OpDecorate %291 NonUniform
OpDecorate %315 NonUniform
OpDecorate %316 NonUniform
OpDecorate %393 NonUniform
OpDecorate %394 NonUniform
OpDecorate %425 NonUniform
OpDecorate %426 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeRuntimeArray %14
%19 = OpTypePointer UniformConstant %18
%20 = OpVariable %19 UniformConstant
%21 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeRuntimeArray %21
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeRuntimeArray %21
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypePointer UniformConstant %31
%37 = OpTypePointer PushConstant %5
%39 = OpConstant %5 4
%43 = OpConstant %5 3
%45 = OpConstant %5 2
%47 = OpConstant %5 1
%48 = OpTypePointer StorageBuffer %9
%50 = OpConstant %5 0
%52 = OpTypePointer UniformConstant %21
%81 = OpTypePointer UniformConstant %14
%101 = OpTypeVector %5 3
%102 = OpTypePointer Input %101
%103 = OpVariable %102 Input
%104 = OpTypePointer Input %5
%107 = OpVariable %102 Input
%114 = OpTypeBool
%117 = OpConstant %5 1073741820
%118 = OpTypeVector %5 4
%125 = OpTypeFloat 32
%126 = OpTypeVector %125 2
%451 = OpTypePointer Image %5
%473 = OpConstant %5 4294967295
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %475
%475 = OpLabel
%38 = OpAccessChain %37 %8 %39
%40 = OpLoad %5 %38
%36 = OpAccessChain %35 %34 %40
%41 = OpLoad %31 %36
%42 = OpGroupNonUniformBroadcastFirst %5 %43 %40
%44 = OpIMul %5 %42 %45
%46 = OpIAdd %5 %44 %47
%49 = OpAccessChain %48 %13 %50 %46
%51 = OpLoad %9 %49
%54 = OpAccessChain %37 %8 %39
%55 = OpLoad %5 %54
%53 = OpAccessChain %52 %30 %55
%56 = OpLoad %21 %53
%57 = OpGroupNonUniformBroadcastFirst %5 %43 %55
%58 = OpIMul %5 %57 %45
%59 = OpIAdd %5 %58 %47
%60 = OpAccessChain %48 %13 %50 %59
%61 = OpLoad %9 %60
%63 = OpAccessChain %37 %8 %39
%64 = OpLoad %5 %63
%65 = OpIAdd %5 %64 %47
%62 = OpAccessChain %52 %27 %65
%66 = OpLoad %21 %62
%67 = OpGroupNonUniformBroadcastFirst %5 %43 %65
%68 = OpIMul %5 %67 %45
%69 = OpIAdd %5 %68 %47
%70 = OpAccessChain %48 %13 %50 %69
%71 = OpLoad %9 %70
%73 = OpAccessChain %37 %8 %39
%74 = OpLoad %5 %73
%72 = OpAccessChain %52 %24 %74
%75 = OpLoad %21 %72
%76 = OpGroupNonUniformBroadcastFirst %5 %43 %74
%77 = OpIMul %5 %76 %45
%78 = OpIAdd %5 %77 %47
%79 = OpAccessChain %48 %13 %50 %78
%80 = OpLoad %9 %79
%83 = OpAccessChain %37 %8 %47
%84 = OpLoad %5 %83
%85 = OpIAdd %5 %84 %47
%82 = OpAccessChain %81 %20 %85
%86 = OpLoad %14 %82
%87 = OpGroupNonUniformBroadcastFirst %5 %43 %85
%88 = OpIMul %5 %87 %45
%89 = OpIAdd %5 %88 %47
%90 = OpAccessChain %48 %13 %50 %89
%91 = OpLoad %9 %90
%93 = OpAccessChain %37 %8 %47
%94 = OpLoad %5 %93
%92 = OpAccessChain %81 %17 %94
%95 = OpLoad %14 %92
%96 = OpGroupNonUniformBroadcastFirst %5 %43 %94
%97 = OpIMul %5 %96 %45
%98 = OpIAdd %5 %97 %47
%99 = OpAccessChain %48 %13 %50 %98
%100 = OpLoad %9 %99
%105 = OpAccessChain %104 %103 %50
%106 = OpLoad %5 %105
%108 = OpAccessChain %104 %107 %50
%109 = OpLoad %5 %108
%110 = OpIMul %5 %106 %45
%111 = OpCompositeExtract %5 %100 0
%112 = OpCompositeExtract %5 %100 1
%113 = OpIAdd %5 %110 %111
%115 = OpULessThan %114 %110 %112
%116 = OpSelect %5 %115 %113 %117
%119 = OpImageFetch %118 %95 %116
%120 = OpCompositeExtract %5 %119 0
%122 = OpIAdd %5 %116 %47
%121 = OpImageFetch %118 %95 %122
%123 = OpCompositeExtract %5 %121 0
%124 = OpCompositeConstruct %9 %120 %123
%127 = OpBitcast %126 %124
%128 = OpCompositeExtract %125 %127 0
%129 = OpCompositeExtract %125 %127 1
%130 = OpIMul %5 %106 %45
%131 = OpCompositeExtract %5 %80 0
%132 = OpCompositeExtract %5 %80 1
%133 = OpIAdd %5 %130 %131
%134 = OpULessThan %114 %130 %132
%135 = OpSelect %5 %134 %133 %117
%136 = OpBitcast %5 %128
%137 = OpBitcast %5 %129
%138 = OpCompositeConstruct %118 %136 %136 %136 %136
OpImageWrite %75 %135 %138
%139 = OpCompositeConstruct %118 %137 %137 %137 %137
%140 = OpIAdd %5 %135 %47
OpImageWrite %75 %140 %139
%141 = OpShiftLeftLogical %5 %106 %39
%142 = OpIMul %5 %106 %39
%143 = OpCompositeExtract %5 %91 0
%144 = OpCompositeExtract %5 %91 1
%145 = OpIAdd %5 %142 %143
%146 = OpULessThan %114 %142 %144
%147 = OpSelect %5 %146 %145 %117
%148 = OpImageFetch %118 %86 %147
%149 = OpCompositeExtract %5 %148 0
%151 = OpIAdd %5 %147 %47
%150 = OpImageFetch %118 %86 %151
%152 = OpCompositeExtract %5 %150 0
%154 = OpIAdd %5 %147 %45
%153 = OpImageFetch %118 %86 %154
%155 = OpCompositeExtract %5 %153 0
%157 = OpIAdd %5 %147 %43
%156 = OpImageFetch %118 %86 %157
%158 = OpCompositeExtract %5 %156 0
%159 = OpCompositeConstruct %118 %149 %152 %155 %158
%160 = OpCompositeExtract %5 %159 0
%161 = OpCompositeExtract %5 %159 1
%162 = OpCompositeExtract %5 %159 2
%163 = OpCompositeExtract %5 %159 3
%164 = OpIMul %5 %106 %39
%165 = OpCompositeExtract %5 %71 0
%166 = OpCompositeExtract %5 %71 1
%167 = OpIAdd %5 %164 %165
%168 = OpULessThan %114 %164 %166
%169 = OpSelect %5 %168 %167 %117
%170 = OpCompositeConstruct %118 %160 %160 %160 %160
OpImageWrite %66 %169 %170
%171 = OpCompositeConstruct %118 %161 %161 %161 %161
%172 = OpIAdd %5 %169 %47
OpImageWrite %66 %172 %171
%173 = OpCompositeConstruct %118 %162 %162 %162 %162
%174 = OpIAdd %5 %169 %45
OpImageWrite %66 %174 %173
%175 = OpCompositeConstruct %118 %163 %163 %163 %163
%176 = OpIAdd %5 %169 %43
OpImageWrite %66 %176 %175
%177 = OpIMul %5 %106 %45
%178 = OpCompositeExtract %5 %100 0
%179 = OpCompositeExtract %5 %100 1
%180 = OpIAdd %5 %177 %178
%181 = OpULessThan %114 %177 %179
%182 = OpSelect %5 %181 %180 %117
%183 = OpImageFetch %118 %95 %182
%184 = OpCompositeExtract %5 %183 0
%186 = OpIAdd %5 %182 %47
%185 = OpImageFetch %118 %95 %186
%187 = OpCompositeExtract %5 %185 0
%188 = OpCompositeConstruct %9 %184 %187
%189 = OpBitcast %126 %188
%190 = OpCompositeExtract %125 %189 0
%191 = OpCompositeExtract %125 %189 1
%192 = OpIMul %5 %106 %45
%193 = OpCompositeExtract %5 %80 0
%194 = OpCompositeExtract %5 %80 1
%195 = OpIAdd %5 %192 %193
%196 = OpULessThan %114 %192 %194
%197 = OpSelect %5 %196 %195 %117
%198 = OpBitcast %5 %190
%199 = OpBitcast %5 %191
%200 = OpCompositeConstruct %118 %198 %198 %198 %198
OpImageWrite %75 %197 %200
%201 = OpCompositeConstruct %118 %199 %199 %199 %199
%202 = OpIAdd %5 %197 %47
OpImageWrite %75 %202 %201
%203 = OpIMul %5 %106 %39
%204 = OpCompositeExtract %5 %91 0
%205 = OpCompositeExtract %5 %91 1
%206 = OpIAdd %5 %203 %204
%207 = OpULessThan %114 %203 %205
%208 = OpSelect %5 %207 %206 %117
%209 = OpImageFetch %118 %86 %208
%210 = OpCompositeExtract %5 %209 0
%212 = OpIAdd %5 %208 %47
%211 = OpImageFetch %118 %86 %212
%213 = OpCompositeExtract %5 %211 0
%215 = OpIAdd %5 %208 %45
%214 = OpImageFetch %118 %86 %215
%216 = OpCompositeExtract %5 %214 0
%218 = OpIAdd %5 %208 %43
%217 = OpImageFetch %118 %86 %218
%219 = OpCompositeExtract %5 %217 0
%220 = OpCompositeConstruct %118 %210 %213 %216 %219
%221 = OpCompositeExtract %5 %220 0
%222 = OpCompositeExtract %5 %220 1
%223 = OpCompositeExtract %5 %220 2
%224 = OpCompositeExtract %5 %220 3
%225 = OpIMul %5 %106 %39
%226 = OpCompositeExtract %5 %71 0
%227 = OpCompositeExtract %5 %71 1
%228 = OpIAdd %5 %225 %226
%229 = OpULessThan %114 %225 %227
%230 = OpSelect %5 %229 %228 %117
%231 = OpCompositeConstruct %118 %221 %221 %221 %221
OpImageWrite %66 %230 %231
%232 = OpCompositeConstruct %118 %222 %222 %222 %222
%233 = OpIAdd %5 %230 %47
OpImageWrite %66 %233 %232
%234 = OpCompositeConstruct %118 %223 %223 %223 %223
%235 = OpIAdd %5 %230 %45
OpImageWrite %66 %235 %234
%236 = OpCompositeConstruct %118 %224 %224 %224 %224
%237 = OpIAdd %5 %230 %43
OpImageWrite %66 %237 %236
%238 = OpIAdd %5 %109 %45
%240 = OpAccessChain %37 %8 %47
%241 = OpLoad %5 %240
%242 = OpIAdd %5 %241 %238
%239 = OpAccessChain %81 %17 %242
%243 = OpLoad %14 %239
%244 = OpGroupNonUniformBroadcastFirst %5 %43 %242
%245 = OpIMul %5 %244 %45
%246 = OpIAdd %5 %245 %47
%247 = OpAccessChain %48 %13 %50 %246
%248 = OpLoad %9 %247
%249 = OpIMul %5 %106 %45
%250 = OpCompositeExtract %5 %248 0
%251 = OpCompositeExtract %5 %248 1
%252 = OpIAdd %5 %249 %250
%253 = OpULessThan %114 %249 %251
%254 = OpSelect %5 %253 %252 %117
%255 = OpImageFetch %118 %243 %254
%256 = OpCompositeExtract %5 %255 0
%258 = OpIAdd %5 %254 %47
%257 = OpImageFetch %118 %243 %258
%259 = OpCompositeExtract %5 %257 0
%260 = OpCompositeConstruct %9 %256 %259
%261 = OpBitcast %126 %260
%262 = OpCompositeExtract %125 %261 0
%263 = OpCompositeExtract %125 %261 1
%264 = OpIAdd %5 %109 %50
%266 = OpAccessChain %37 %8 %39
%267 = OpLoad %5 %266
%268 = OpIAdd %5 %267 %264
%265 = OpAccessChain %52 %24 %268
%269 = OpLoad %21 %265
%270 = OpGroupNonUniformBroadcastFirst %5 %43 %268
%271 = OpIMul %5 %270 %45
%272 = OpIAdd %5 %271 %47
%273 = OpAccessChain %48 %13 %50 %272
%274 = OpLoad %9 %273
%275 = OpIMul %5 %106 %45
%276 = OpCompositeExtract %5 %274 0
%277 = OpCompositeExtract %5 %274 1
%278 = OpIAdd %5 %275 %276
%279 = OpULessThan %114 %275 %277
%280 = OpSelect %5 %279 %278 %117
%281 = OpBitcast %5 %262
%282 = OpBitcast %5 %263
%283 = OpCompositeConstruct %118 %281 %281 %281 %281
OpImageWrite %269 %280 %283
%284 = OpCompositeConstruct %118 %282 %282 %282 %282
%285 = OpIAdd %5 %280 %47
OpImageWrite %269 %285 %284
%286 = OpIAdd %5 %106 %45
%288 = OpAccessChain %37 %8 %47
%289 = OpLoad %5 %288
%290 = OpIAdd %5 %289 %286
%287 = OpAccessChain %81 %17 %290
%291 = OpLoad %14 %287
%292 = OpIMul %5 %290 %45
%293 = OpIAdd %5 %292 %47
%294 = OpAccessChain %48 %13 %50 %293
%295 = OpLoad %9 %294
%296 = OpIMul %5 %106 %45
%297 = OpCompositeExtract %5 %295 0
%298 = OpCompositeExtract %5 %295 1
%299 = OpIAdd %5 %296 %297
%300 = OpULessThan %114 %296 %298
%301 = OpSelect %5 %300 %299 %117
%302 = OpImageFetch %118 %291 %301
%303 = OpCompositeExtract %5 %302 0
%305 = OpIAdd %5 %301 %47
%304 = OpImageFetch %118 %291 %305
%306 = OpCompositeExtract %5 %304 0
%307 = OpCompositeConstruct %9 %303 %306
%308 = OpBitcast %126 %307
%309 = OpCompositeExtract %125 %308 0
%310 = OpCompositeExtract %125 %308 1
%311 = OpIAdd %5 %106 %50
%313 = OpAccessChain %37 %8 %39
%314 = OpLoad %5 %313
%315 = OpIAdd %5 %314 %311
%312 = OpAccessChain %52 %24 %315
%316 = OpLoad %21 %312
%317 = OpIMul %5 %315 %45
%318 = OpIAdd %5 %317 %47
%319 = OpAccessChain %48 %13 %50 %318
%320 = OpLoad %9 %319
%321 = OpIMul %5 %106 %45
%322 = OpCompositeExtract %5 %320 0
%323 = OpCompositeExtract %5 %320 1
%324 = OpIAdd %5 %321 %322
%325 = OpULessThan %114 %321 %323
%326 = OpSelect %5 %325 %324 %117
%327 = OpBitcast %5 %309
%328 = OpBitcast %5 %310
%329 = OpCompositeConstruct %118 %327 %327 %327 %327
OpImageWrite %316 %326 %329
%330 = OpCompositeConstruct %118 %328 %328 %328 %328
%331 = OpIAdd %5 %326 %47
OpImageWrite %316 %331 %330
%332 = OpIAdd %5 %109 %43
%334 = OpAccessChain %37 %8 %47
%335 = OpLoad %5 %334
%336 = OpIAdd %5 %335 %332
%333 = OpAccessChain %81 %20 %336
%337 = OpLoad %14 %333
%338 = OpGroupNonUniformBroadcastFirst %5 %43 %336
%339 = OpIMul %5 %338 %45
%340 = OpIAdd %5 %339 %47
%341 = OpAccessChain %48 %13 %50 %340
%342 = OpLoad %9 %341
%343 = OpIMul %5 %106 %39
%344 = OpCompositeExtract %5 %342 0
%345 = OpCompositeExtract %5 %342 1
%346 = OpIAdd %5 %343 %344
%347 = OpULessThan %114 %343 %345
%348 = OpSelect %5 %347 %346 %117
%349 = OpImageFetch %118 %337 %348
%350 = OpCompositeExtract %5 %349 0
%352 = OpIAdd %5 %348 %47
%351 = OpImageFetch %118 %337 %352
%353 = OpCompositeExtract %5 %351 0
%355 = OpIAdd %5 %348 %45
%354 = OpImageFetch %118 %337 %355
%356 = OpCompositeExtract %5 %354 0
%358 = OpIAdd %5 %348 %43
%357 = OpImageFetch %118 %337 %358
%359 = OpCompositeExtract %5 %357 0
%360 = OpCompositeConstruct %118 %350 %353 %356 %359
%361 = OpCompositeExtract %5 %360 0
%362 = OpCompositeExtract %5 %360 1
%363 = OpCompositeExtract %5 %360 2
%364 = OpCompositeExtract %5 %360 3
%365 = OpIAdd %5 %109 %47
%367 = OpAccessChain %37 %8 %39
%368 = OpLoad %5 %367
%369 = OpIAdd %5 %368 %365
%366 = OpAccessChain %52 %27 %369
%370 = OpLoad %21 %366
%371 = OpGroupNonUniformBroadcastFirst %5 %43 %369
%372 = OpIMul %5 %371 %45
%373 = OpIAdd %5 %372 %47
%374 = OpAccessChain %48 %13 %50 %373
%375 = OpLoad %9 %374
%376 = OpIMul %5 %106 %39
%377 = OpCompositeExtract %5 %375 0
%378 = OpCompositeExtract %5 %375 1
%379 = OpIAdd %5 %376 %377
%380 = OpULessThan %114 %376 %378
%381 = OpSelect %5 %380 %379 %117
%382 = OpCompositeConstruct %118 %361 %361 %361 %361
OpImageWrite %370 %381 %382
%383 = OpCompositeConstruct %118 %362 %362 %362 %362
%384 = OpIAdd %5 %381 %47
OpImageWrite %370 %384 %383
%385 = OpCompositeConstruct %118 %363 %363 %363 %363
%386 = OpIAdd %5 %381 %45
OpImageWrite %370 %386 %385
%387 = OpCompositeConstruct %118 %364 %364 %364 %364
%388 = OpIAdd %5 %381 %43
OpImageWrite %370 %388 %387
%389 = OpIAdd %5 %106 %43
%391 = OpAccessChain %37 %8 %47
%392 = OpLoad %5 %391
%393 = OpIAdd %5 %392 %389
%390 = OpAccessChain %81 %20 %393
%394 = OpLoad %14 %390
%395 = OpIMul %5 %393 %45
%396 = OpIAdd %5 %395 %47
%397 = OpAccessChain %48 %13 %50 %396
%398 = OpLoad %9 %397
%399 = OpIMul %5 %106 %39
%400 = OpCompositeExtract %5 %398 0
%401 = OpCompositeExtract %5 %398 1
%402 = OpIAdd %5 %399 %400
%403 = OpULessThan %114 %399 %401
%404 = OpSelect %5 %403 %402 %117
%405 = OpImageFetch %118 %394 %404
%406 = OpCompositeExtract %5 %405 0
%408 = OpIAdd %5 %404 %47
%407 = OpImageFetch %118 %394 %408
%409 = OpCompositeExtract %5 %407 0
%411 = OpIAdd %5 %404 %45
%410 = OpImageFetch %118 %394 %411
%412 = OpCompositeExtract %5 %410 0
%414 = OpIAdd %5 %404 %43
%413 = OpImageFetch %118 %394 %414
%415 = OpCompositeExtract %5 %413 0
%416 = OpCompositeConstruct %118 %406 %409 %412 %415
%417 = OpCompositeExtract %5 %416 0
%418 = OpCompositeExtract %5 %416 1
%419 = OpCompositeExtract %5 %416 2
%420 = OpCompositeExtract %5 %416 3
%421 = OpIAdd %5 %106 %47
%423 = OpAccessChain %37 %8 %39
%424 = OpLoad %5 %423
%425 = OpIAdd %5 %424 %421
%422 = OpAccessChain %52 %27 %425
%426 = OpLoad %21 %422
%427 = OpIMul %5 %425 %45
%428 = OpIAdd %5 %427 %47
%429 = OpAccessChain %48 %13 %50 %428
%430 = OpLoad %9 %429
%431 = OpIMul %5 %106 %39
%432 = OpCompositeExtract %5 %430 0
%433 = OpCompositeExtract %5 %430 1
%434 = OpIAdd %5 %431 %432
%435 = OpULessThan %114 %431 %433
%436 = OpSelect %5 %435 %434 %117
%437 = OpCompositeConstruct %118 %417 %417 %417 %417
OpImageWrite %426 %436 %437
%438 = OpCompositeConstruct %118 %418 %418 %418 %418
%439 = OpIAdd %5 %436 %47
OpImageWrite %426 %439 %438
%440 = OpCompositeConstruct %118 %419 %419 %419 %419
%441 = OpIAdd %5 %436 %45
OpImageWrite %426 %441 %440
%442 = OpCompositeConstruct %118 %420 %420 %420 %420
%443 = OpIAdd %5 %436 %43
OpImageWrite %426 %443 %442
%444 = OpIMul %5 %106 %45
%445 = OpIAdd %5 %444 %47
%446 = OpCompositeExtract %5 %61 0
%447 = OpCompositeExtract %5 %61 1
%448 = OpIAdd %5 %445 %446
%449 = OpULessThan %114 %445 %447
%450 = OpSelect %5 %449 %448 %117
%452 = OpImageTexelPointer %451 %53 %450 %50
%453 = OpAtomicIAdd %5 %452 %47 %50 %47
%454 = OpCompositeExtract %5 %80 1
%455 = OpUDiv %5 %454 %45
%456 = OpIMul %5 %106 %45
%457 = OpCompositeExtract %5 %61 0
%458 = OpCompositeExtract %5 %61 1
%459 = OpIAdd %5 %456 %457
%460 = OpULessThan %114 %456 %458
%461 = OpSelect %5 %460 %459 %117
%462 = OpImageTexelPointer %451 %53 %461 %50
%463 = OpAtomicIAdd %5 %462 %47 %50 %455
%464 = OpShiftLeftLogical %5 %106 %45
%465 = OpBitwiseOr %5 %464 %47
%466 = OpBitwiseOr %5 %464 %45
%467 = OpBitwiseOr %5 %464 %43
%468 = OpCompositeExtract %5 %51 0
%469 = OpCompositeExtract %5 %51 1
%470 = OpIAdd %5 %106 %468
%471 = OpULessThan %114 %106 %469
%472 = OpSelect %5 %471 %470 %473
%474 = OpCompositeConstruct %118 %464 %465 %466 %467
OpImageWrite %41 %472 %474
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _18[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _21_24
{
    uvec4 _m0[];
} _24[];

layout(set = 4, binding = 0, std430) writeonly buffer _26_29
{
    uvec2 _m0[];
} _29[];

layout(set = 4, binding = 0, std430) writeonly buffer _31_34
{
    uvec4 _m0[];
} _34[];

layout(set = 4, binding = 0, std430) buffer _36_39
{
    uint _m0[];
} _39[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

void main()
{
    uvec2 _52 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(2u);
    uint _59 = registers._m4 + 1u;
    uvec2 _64 = _13._m0[subgroupBroadcastFirst(_59)] >> uvec2(4u);
    uvec2 _73 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(3u);
    uint _79 = registers._m1 + 1u;
    uvec2 _83 = _13._m0[subgroupBroadcastFirst(_79)] >> uvec2(4u);
    uvec2 _91 = _13._m0[subgroupBroadcastFirst(registers._m1)] >> uvec2(3u);
    vec2 _112 = uintBitsToFloat(_18[registers._m1]._m0[(gl_GlobalInvocationID.x < _91.y) ? (gl_GlobalInvocationID.x + _91.x) : 536870911u]);
    _29[registers._m4]._m0[(gl_GlobalInvocationID.x < _73.y) ? (gl_GlobalInvocationID.x + _73.x) : 536870911u] = uvec2(floatBitsToUint(_112.x), floatBitsToUint(_112.y));
    _34[_59]._m0[(gl_GlobalInvocationID.x < _64.y) ? (gl_GlobalInvocationID.x + _64.x) : 268435455u] = uvec4(_24[_79]._m0[(gl_GlobalInvocationID.x < _83.y) ? (gl_GlobalInvocationID.x + _83.x) : 268435455u]);
    vec2 _152 = uintBitsToFloat(_18[registers._m1]._m0[(gl_GlobalInvocationID.x < _91.y) ? (gl_GlobalInvocationID.x + _91.x) : 536870911u]);
    _29[registers._m4]._m0[(gl_GlobalInvocationID.x < _73.y) ? (gl_GlobalInvocationID.x + _73.x) : 536870911u] = uvec2(floatBitsToUint(_152.x), floatBitsToUint(_152.y));
    _34[_59]._m0[(gl_GlobalInvocationID.x < _64.y) ? (gl_GlobalInvocationID.x + _64.x) : 268435455u] = uvec4(_24[_79]._m0[(gl_GlobalInvocationID.x < _83.y) ? (gl_GlobalInvocationID.x + _83.x) : 268435455u]);
    uint _186 = registers._m1 + (gl_WorkGroupID.x + 2u);
    uvec2 _190 = _13._m0[subgroupBroadcastFirst(_186)] >> uvec2(3u);
    vec2 _198 = uintBitsToFloat(_18[_186]._m0[(gl_GlobalInvocationID.x < _190.y) ? (gl_GlobalInvocationID.x + _190.x) : 536870911u]);
    uint _205 = registers._m4 + (gl_WorkGroupID.x + 0u);
    uvec2 _209 = _13._m0[subgroupBroadcastFirst(_205)] >> uvec2(3u);
    _29[_205]._m0[(gl_GlobalInvocationID.x < _209.y) ? (gl_GlobalInvocationID.x + _209.x) : 536870911u] = uvec2(floatBitsToUint(_198.x), floatBitsToUint(_198.y));
    uint _223 = registers._m1 + (gl_GlobalInvocationID.x + 2u);
    uvec2 _226 = _13._m0[_223] >> uvec2(3u);
    vec2 _234 = uintBitsToFloat(_18[nonuniformEXT(_223)]._m0[(gl_GlobalInvocationID.x < _226.y) ? (gl_GlobalInvocationID.x + _226.x) : 536870911u]);
    uint _241 = registers._m4 + (gl_GlobalInvocationID.x + 0u);
    uvec2 _244 = _13._m0[_241] >> uvec2(3u);
    _29[nonuniformEXT(_241)]._m0[(gl_GlobalInvocationID.x < _244.y) ? (gl_GlobalInvocationID.x + _244.x) : 536870911u] = uvec2(floatBitsToUint(_234.x), floatBitsToUint(_234.y));
    uint _258 = registers._m1 + (gl_WorkGroupID.x + 3u);
    uvec2 _262 = _13._m0[subgroupBroadcastFirst(_258)] >> uvec2(4u);
    uint _278 = registers._m4 + (gl_WorkGroupID.x + 1u);
    uvec2 _282 = _13._m0[subgroupBroadcastFirst(_278)] >> uvec2(4u);
    _34[_278]._m0[(gl_GlobalInvocationID.x < _282.y) ? (gl_GlobalInvocationID.x + _282.x) : 268435455u] = uvec4(_24[_258]._m0[(gl_GlobalInvocationID.x < _262.y) ? (gl_GlobalInvocationID.x + _262.x) : 268435455u]);
    uint _294 = registers._m1 + (gl_GlobalInvocationID.x + 3u);
    uvec2 _297 = _13._m0[_294] >> uvec2(4u);
    uint _313 = registers._m4 + (gl_GlobalInvocationID.x + 1u);
    uvec2 _316 = _13._m0[_313] >> uvec2(4u);
    _34[nonuniformEXT(_313)]._m0[(gl_GlobalInvocationID.x < _316.y) ? (gl_GlobalInvocationID.x + _316.x) : 268435455u] = uvec4(_24[nonuniformEXT(_294)]._m0[(gl_GlobalInvocationID.x < _297.y) ? (gl_GlobalInvocationID.x + _297.x) : 268435455u]);
    uint _325 = (gl_GlobalInvocationID.x * 2u) + 1u;
    uint _334 = atomicAdd(_39[registers._m4]._m0[(_325 < _52.y) ? (_325 + _52.x) : 1073741820u], 1u);
    uint _337 = gl_GlobalInvocationID.x * 2u;
    uint _344 = atomicAdd(_39[registers._m4]._m0[(_337 < _52.y) ? (_337 + _52.x) : 1073741820u], _73.y / 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 347
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %94 %98
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %15 "SSBO"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "SSBO"
OpName %36 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 8
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 1
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %18 Restrict
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %25 ArrayStride 8
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %29 NonReadable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 4
OpDecorate %34 Binding 0
OpDecorate %34 NonReadable
OpDecorate %35 ArrayStride 4
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %39 DescriptorSet 4
OpDecorate %39 Binding 0
OpDecorate %94 BuiltIn GlobalInvocationId
OpDecorate %98 BuiltIn WorkgroupId
OpDecorate %223 NonUniform
OpDecorate %220 NonUniform
OpDecorate %232 NonUniform
OpDecorate %241 NonUniform
OpDecorate %238 NonUniform
OpDecorate %253 NonUniform
OpDecorate %294 NonUniform
OpDecorate %291 NonUniform
OpDecorate %303 NonUniform
OpDecorate %313 NonUniform
OpDecorate %310 NonUniform
OpDecorate %323 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %9
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeVector %5 4
%20 = OpTypeRuntimeArray %19
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %9
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %19
%31 = OpTypeStruct %30
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeRuntimeArray %5
%36 = OpTypeStruct %35
%37 = OpTypeRuntimeArray %36
%38 = OpTypePointer StorageBuffer %37
%39 = OpVariable %38 StorageBuffer
%40 = OpTypePointer StorageBuffer %36
%42 = OpTypePointer PushConstant %5
%44 = OpConstant %5 4
%47 = OpConstant %5 3
%48 = OpTypePointer StorageBuffer %9
%50 = OpConstant %5 0
%53 = OpConstant %5 2
%54 = OpConstantComposite %9 %53 %53
%55 = OpTypePointer StorageBuffer %31
%60 = OpConstant %5 1
%65 = OpConstantComposite %9 %44 %44
%66 = OpTypePointer StorageBuffer %26
%74 = OpConstantComposite %9 %47 %47
%75 = OpTypePointer StorageBuffer %21
%84 = OpTypePointer StorageBuffer %15
%92 = OpTypeVector %5 3
%93 = OpTypePointer Input %92
%94 = OpVariable %93 Input
%95 = OpTypePointer Input %5
%98 = OpVariable %93 Input
%104 = OpTypeBool
%107 = OpConstant %5 536870911
%110 = OpTypeFloat 32
%111 = OpTypeVector %110 2
%130 = OpConstant %5 268435455
%131 = OpTypePointer StorageBuffer %19
%331 = OpConstant %5 1073741820
%332 = OpTypePointer StorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %345
%345 = OpLabel
%43 = OpAccessChain %42 %8 %44
%45 = OpLoad %5 %43
%41 = OpAccessChain %40 %39 %45
%46 = OpGroupNonUniformBroadcastFirst %5 %47 %45
%49 = OpAccessChain %48 %13 %50 %46
%51 = OpLoad %9 %49
%52 = OpShiftRightLogical %9 %51 %54
%57 = OpAccessChain %42 %8 %44
%58 = OpLoad %5 %57
%59 = OpIAdd %5 %58 %60
%56 = OpAccessChain %55 %34 %59
%61 = OpGroupNonUniformBroadcastFirst %5 %47 %59
%62 = OpAccessChain %48 %13 %50 %61
%63 = OpLoad %9 %62
%64 = OpShiftRightLogical %9 %63 %65
%68 = OpAccessChain %42 %8 %44
%69 = OpLoad %5 %68
%67 = OpAccessChain %66 %29 %69
%70 = OpGroupNonUniformBroadcastFirst %5 %47 %69
%71 = OpAccessChain %48 %13 %50 %70
%72 = OpLoad %9 %71
%73 = OpShiftRightLogical %9 %72 %74
%77 = OpAccessChain %42 %8 %60
%78 = OpLoad %5 %77
%79 = OpIAdd %5 %78 %60
%76 = OpAccessChain %75 %24 %79
%80 = OpGroupNonUniformBroadcastFirst %5 %47 %79
%81 = OpAccessChain %48 %13 %50 %80
%82 = OpLoad %9 %81
%83 = OpShiftRightLogical %9 %82 %65
%86 = OpAccessChain %42 %8 %60
%87 = OpLoad %5 %86
%85 = OpAccessChain %84 %18 %87
%88 = OpGroupNonUniformBroadcastFirst %5 %47 %87
%89 = OpAccessChain %48 %13 %50 %88
%90 = OpLoad %9 %89
%91 = OpShiftRightLogical %9 %90 %74
%96 = OpAccessChain %95 %94 %50
%97 = OpLoad %5 %96
%99 = OpAccessChain %95 %98 %50
%100 = OpLoad %5 %99
%101 = OpCompositeExtract %5 %91 0
%102 = OpCompositeExtract %5 %91 1
%103 = OpIAdd %5 %97 %101
%105 = OpULessThan %104 %97 %102
%106 = OpSelect %5 %105 %103 %107
%108 = OpAccessChain %48 %85 %50 %106
%109 = OpLoad %9 %108
%112 = OpBitcast %111 %109
%113 = OpCompositeExtract %110 %112 0
%114 = OpCompositeExtract %110 %112 1
%115 = OpCompositeExtract %5 %73 0
%116 = OpCompositeExtract %5 %73 1
%117 = OpIAdd %5 %97 %115
%118 = OpULessThan %104 %97 %116
%119 = OpSelect %5 %118 %117 %107
%120 = OpBitcast %5 %113
%121 = OpBitcast %5 %114
%122 = OpCompositeConstruct %9 %120 %121
%123 = OpAccessChain %48 %67 %50 %119
OpStore %123 %122
%124 = OpShiftLeftLogical %5 %97 %44
%125 = OpCompositeExtract %5 %83 0
%126 = OpCompositeExtract %5 %83 1
%127 = OpIAdd %5 %97 %125
%128 = OpULessThan %104 %97 %126
%129 = OpSelect %5 %128 %127 %130
%132 = OpAccessChain %131 %76 %50 %129
%133 = OpLoad %19 %132
%134 = OpCompositeExtract %5 %133 0
%135 = OpCompositeExtract %5 %133 1
%136 = OpCompositeExtract %5 %133 2
%137 = OpCompositeExtract %5 %133 3
%138 = OpCompositeExtract %5 %64 0
%139 = OpCompositeExtract %5 %64 1
%140 = OpIAdd %5 %97 %138
%141 = OpULessThan %104 %97 %139
%142 = OpSelect %5 %141 %140 %130
%143 = OpCompositeConstruct %19 %134 %135 %136 %137
%144 = OpAccessChain %131 %56 %50 %142
OpStore %144 %143
%145 = OpCompositeExtract %5 %91 0
%146 = OpCompositeExtract %5 %91 1
%147 = OpIAdd %5 %97 %145
%148 = OpULessThan %104 %97 %146
%149 = OpSelect %5 %148 %147 %107
%150 = OpAccessChain %48 %85 %50 %149
%151 = OpLoad %9 %150
%152 = OpBitcast %111 %151
%153 = OpCompositeExtract %110 %152 0
%154 = OpCompositeExtract %110 %152 1
%155 = OpCompositeExtract %5 %73 0
%156 = OpCompositeExtract %5 %73 1
%157 = OpIAdd %5 %97 %155
%158 = OpULessThan %104 %97 %156
%159 = OpSelect %5 %158 %157 %107
%160 = OpBitcast %5 %153
%161 = OpBitcast %5 %154
%162 = OpCompositeConstruct %9 %160 %161
%163 = OpAccessChain %48 %67 %50 %159
OpStore %163 %162
%164 = OpCompositeExtract %5 %83 0
%165 = OpCompositeExtract %5 %83 1
%166 = OpIAdd %5 %97 %164
%167 = OpULessThan %104 %97 %165
%168 = OpSelect %5 %167 %166 %130
%169 = OpAccessChain %131 %76 %50 %168
%170 = OpLoad %19 %169
%171 = OpCompositeExtract %5 %170 0
%172 = OpCompositeExtract %5 %170 1
%173 = OpCompositeExtract %5 %170 2
%174 = OpCompositeExtract %5 %170 3
%175 = OpCompositeExtract %5 %64 0
%176 = OpCompositeExtract %5 %64 1
%177 = OpIAdd %5 %97 %175
%178 = OpULessThan %104 %97 %176
%179 = OpSelect %5 %178 %177 %130
%180 = OpCompositeConstruct %19 %171 %172 %173 %174
%181 = OpAccessChain %131 %56 %50 %179
OpStore %181 %180
%182 = OpIAdd %5 %100 %53
%184 = OpAccessChain %42 %8 %60
%185 = OpLoad %5 %184
%186 = OpIAdd %5 %185 %182
%183 = OpAccessChain %84 %18 %186
%187 = OpGroupNonUniformBroadcastFirst %5 %47 %186
%188 = OpAccessChain %48 %13 %50 %187
%189 = OpLoad %9 %188
%190 = OpShiftRightLogical %9 %189 %74
%191 = OpCompositeExtract %5 %190 0
%192 = OpCompositeExtract %5 %190 1
%193 = OpIAdd %5 %97 %191
%194 = OpULessThan %104 %97 %192
%195 = OpSelect %5 %194 %193 %107
%196 = OpAccessChain %48 %183 %50 %195
%197 = OpLoad %9 %196
%198 = OpBitcast %111 %197
%199 = OpCompositeExtract %110 %198 0
%200 = OpCompositeExtract %110 %198 1
%201 = OpIAdd %5 %100 %50
%203 = OpAccessChain %42 %8 %44
%204 = OpLoad %5 %203
%205 = OpIAdd %5 %204 %201
%202 = OpAccessChain %66 %29 %205
%206 = OpGroupNonUniformBroadcastFirst %5 %47 %205
%207 = OpAccessChain %48 %13 %50 %206
%208 = OpLoad %9 %207
%209 = OpShiftRightLogical %9 %208 %74
%210 = OpCompositeExtract %5 %209 0
%211 = OpCompositeExtract %5 %209 1
%212 = OpIAdd %5 %97 %210
%213 = OpULessThan %104 %97 %211
%214 = OpSelect %5 %213 %212 %107
%215 = OpBitcast %5 %199
%216 = OpBitcast %5 %200
%217 = OpCompositeConstruct %9 %215 %216
%218 = OpAccessChain %48 %202 %50 %214
OpStore %218 %217
%219 = OpIAdd %5 %97 %53
%221 = OpAccessChain %42 %8 %60
%222 = OpLoad %5 %221
%223 = OpIAdd %5 %222 %219
%220 = OpAccessChain %84 %18 %223
%224 = OpAccessChain %48 %13 %50 %223
%225 = OpLoad %9 %224
%226 = OpShiftRightLogical %9 %225 %74
%227 = OpCompositeExtract %5 %226 0
%228 = OpCompositeExtract %5 %226 1
%229 = OpIAdd %5 %97 %227
%230 = OpULessThan %104 %97 %228
%231 = OpSelect %5 %230 %229 %107
%232 = OpAccessChain %48 %220 %50 %231
%233 = OpLoad %9 %232
%234 = OpBitcast %111 %233
%235 = OpCompositeExtract %110 %234 0
%236 = OpCompositeExtract %110 %234 1
%237 = OpIAdd %5 %97 %50
%239 = OpAccessChain %42 %8 %44
%240 = OpLoad %5 %239
%241 = OpIAdd %5 %240 %237
%238 = OpAccessChain %66 %29 %241
%242 = OpAccessChain %48 %13 %50 %241
%243 = OpLoad %9 %242
%244 = OpShiftRightLogical %9 %243 %74
%245 = OpCompositeExtract %5 %244 0
%246 = OpCompositeExtract %5 %244 1
%247 = OpIAdd %5 %97 %245
%248 = OpULessThan %104 %97 %246
%249 = OpSelect %5 %248 %247 %107
%250 = OpBitcast %5 %235
%251 = OpBitcast %5 %236
%252 = OpCompositeConstruct %9 %250 %251
%253 = OpAccessChain %48 %238 %50 %249
OpStore %253 %252
%254 = OpIAdd %5 %100 %47
%256 = OpAccessChain %42 %8 %60
%257 = OpLoad %5 %256
%258 = OpIAdd %5 %257 %254
%255 = OpAccessChain %75 %24 %258
%259 = OpGroupNonUniformBroadcastFirst %5 %47 %258
%260 = OpAccessChain %48 %13 %50 %259
%261 = OpLoad %9 %260
%262 = OpShiftRightLogical %9 %261 %65
%263 = OpCompositeExtract %5 %262 0
%264 = OpCompositeExtract %5 %262 1
%265 = OpIAdd %5 %97 %263
%266 = OpULessThan %104 %97 %264
%267 = OpSelect %5 %266 %265 %130
%268 = OpAccessChain %131 %255 %50 %267
%269 = OpLoad %19 %268
%270 = OpCompositeExtract %5 %269 0
%271 = OpCompositeExtract %5 %269 1
%272 = OpCompositeExtract %5 %269 2
%273 = OpCompositeExtract %5 %269 3
%274 = OpIAdd %5 %100 %60
%276 = OpAccessChain %42 %8 %44
%277 = OpLoad %5 %276
%278 = OpIAdd %5 %277 %274
%275 = OpAccessChain %55 %34 %278
%279 = OpGroupNonUniformBroadcastFirst %5 %47 %278
%280 = OpAccessChain %48 %13 %50 %279
%281 = OpLoad %9 %280
%282 = OpShiftRightLogical %9 %281 %65
%283 = OpCompositeExtract %5 %282 0
%284 = OpCompositeExtract %5 %282 1
%285 = OpIAdd %5 %97 %283
%286 = OpULessThan %104 %97 %284
%287 = OpSelect %5 %286 %285 %130
%288 = OpCompositeConstruct %19 %270 %271 %272 %273
%289 = OpAccessChain %131 %275 %50 %287
OpStore %289 %288
%290 = OpIAdd %5 %97 %47
%292 = OpAccessChain %42 %8 %60
%293 = OpLoad %5 %292
%294 = OpIAdd %5 %293 %290
%291 = OpAccessChain %75 %24 %294
%295 = OpAccessChain %48 %13 %50 %294
%296 = OpLoad %9 %295
%297 = OpShiftRightLogical %9 %296 %65
%298 = OpCompositeExtract %5 %297 0
%299 = OpCompositeExtract %5 %297 1
%300 = OpIAdd %5 %97 %298
%301 = OpULessThan %104 %97 %299
%302 = OpSelect %5 %301 %300 %130
%303 = OpAccessChain %131 %291 %50 %302
%304 = OpLoad %19 %303
%305 = OpCompositeExtract %5 %304 0
%306 = OpCompositeExtract %5 %304 1
%307 = OpCompositeExtract %5 %304 2
%308 = OpCompositeExtract %5 %304 3
%309 = OpIAdd %5 %97 %60
%311 = OpAccessChain %42 %8 %44
%312 = OpLoad %5 %311
%313 = OpIAdd %5 %312 %309
%310 = OpAccessChain %55 %34 %313
%314 = OpAccessChain %48 %13 %50 %313
%315 = OpLoad %9 %314
%316 = OpShiftRightLogical %9 %315 %65
%317 = OpCompositeExtract %5 %316 0
%318 = OpCompositeExtract %5 %316 1
%319 = OpIAdd %5 %97 %317
%320 = OpULessThan %104 %97 %318
%321 = OpSelect %5 %320 %319 %130
%322 = OpCompositeConstruct %19 %305 %306 %307 %308
%323 = OpAccessChain %131 %310 %50 %321
OpStore %323 %322
%324 = OpIMul %5 %97 %53
%325 = OpIAdd %5 %324 %60
%326 = OpCompositeExtract %5 %52 0
%327 = OpCompositeExtract %5 %52 1
%328 = OpIAdd %5 %325 %326
%329 = OpULessThan %104 %325 %327
%330 = OpSelect %5 %329 %328 %331
%333 = OpAccessChain %332 %41 %50 %330
%334 = OpAtomicIAdd %5 %333 %60 %50 %60
%335 = OpCompositeExtract %5 %73 1
%336 = OpUDiv %5 %335 %60
%337 = OpIMul %5 %97 %53
%338 = OpCompositeExtract %5 %52 0
%339 = OpCompositeExtract %5 %52 1
%340 = OpIAdd %5 %337 %338
%341 = OpULessThan %104 %337 %339
%342 = OpSelect %5 %341 %340 %331
%343 = OpAccessChain %332 %41 %50 %342
%344 = OpAtomicIAdd %5 %343 %60 %50 %336
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.offset-layout.typed-buffer-offset.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _18[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _21_24
{
    uvec4 _m0[];
} _24[];

layout(set = 4, binding = 0, std430) writeonly buffer _26_29
{
    uvec2 _m0[];
} _29[];

layout(set = 4, binding = 0, std430) writeonly buffer _31_34
{
    uvec4 _m0[];
} _34[];

layout(set = 4, binding = 0, std430) buffer _36_39
{
    uint _m0[];
} _39[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0) uniform writeonly uimageBuffer _43[];

void main()
{
    uint _55 = (subgroupBroadcastFirst(registers._m4) * 2u) + 1u;
    uvec2 _69 = _13._m0[subgroupBroadcastFirst(registers._m4) * 2u] >> uvec2(2u);
    uint _75 = registers._m4 + 1u;
    uvec2 _80 = _13._m0[subgroupBroadcastFirst(_75) * 2u] >> uvec2(4u);
    uvec2 _90 = _13._m0[subgroupBroadcastFirst(registers._m4) * 2u] >> uvec2(3u);
    uint _96 = registers._m1 + 1u;
    uvec2 _101 = _13._m0[subgroupBroadcastFirst(_96) * 2u] >> uvec2(4u);
    uvec2 _110 = _13._m0[subgroupBroadcastFirst(registers._m1) * 2u] >> uvec2(3u);
    vec2 _131 = uintBitsToFloat(_18[registers._m1]._m0[(gl_GlobalInvocationID.x < _110.y) ? (gl_GlobalInvocationID.x + _110.x) : 536870911u]);
    _29[registers._m4]._m0[(gl_GlobalInvocationID.x < _90.y) ? (gl_GlobalInvocationID.x + _90.x) : 536870911u] = uvec2(floatBitsToUint(_131.x), floatBitsToUint(_131.y));
    _34[_75]._m0[(gl_GlobalInvocationID.x < _80.y) ? (gl_GlobalInvocationID.x + _80.x) : 268435455u] = uvec4(_24[_96]._m0[(gl_GlobalInvocationID.x < _101.y) ? (gl_GlobalInvocationID.x + _101.x) : 268435455u]);
    vec2 _171 = uintBitsToFloat(_18[registers._m1]._m0[(gl_GlobalInvocationID.x < _110.y) ? (gl_GlobalInvocationID.x + _110.x) : 536870911u]);
    _29[registers._m4]._m0[(gl_GlobalInvocationID.x < _90.y) ? (gl_GlobalInvocationID.x + _90.x) : 536870911u] = uvec2(floatBitsToUint(_171.x), floatBitsToUint(_171.y));
    _34[_75]._m0[(gl_GlobalInvocationID.x < _80.y) ? (gl_GlobalInvocationID.x + _80.x) : 268435455u] = uvec4(_24[_96]._m0[(gl_GlobalInvocationID.x < _101.y) ? (gl_GlobalInvocationID.x + _101.x) : 268435455u]);
    uint _205 = registers._m1 + (gl_WorkGroupID.x + 2u);
    uvec2 _210 = _13._m0[subgroupBroadcastFirst(_205) * 2u] >> uvec2(3u);
    vec2 _218 = uintBitsToFloat(_18[_205]._m0[(gl_GlobalInvocationID.x < _210.y) ? (gl_GlobalInvocationID.x + _210.x) : 536870911u]);
    uint _225 = registers._m4 + (gl_WorkGroupID.x + 0u);
    uvec2 _230 = _13._m0[subgroupBroadcastFirst(_225) * 2u] >> uvec2(3u);
    _29[_225]._m0[(gl_GlobalInvocationID.x < _230.y) ? (gl_GlobalInvocationID.x + _230.x) : 536870911u] = uvec2(floatBitsToUint(_218.x), floatBitsToUint(_218.y));
    uint _244 = registers._m1 + (gl_GlobalInvocationID.x + 2u);
    uvec2 _248 = _13._m0[_244 * 2u] >> uvec2(3u);
    vec2 _256 = uintBitsToFloat(_18[nonuniformEXT(_244)]._m0[(gl_GlobalInvocationID.x < _248.y) ? (gl_GlobalInvocationID.x + _248.x) : 536870911u]);
    uint _263 = registers._m4 + (gl_GlobalInvocationID.x + 0u);
    uvec2 _267 = _13._m0[_263 * 2u] >> uvec2(3u);
    _29[nonuniformEXT(_263)]._m0[(gl_GlobalInvocationID.x < _267.y) ? (gl_GlobalInvocationID.x + _267.x) : 536870911u] = uvec2(floatBitsToUint(_256.x), floatBitsToUint(_256.y));
    uint _281 = registers._m1 + (gl_WorkGroupID.x + 3u);
    uvec2 _286 = _13._m0[subgroupBroadcastFirst(_281) * 2u] >> uvec2(4u);
    uint _302 = registers._m4 + (gl_WorkGroupID.x + 1u);
    uvec2 _307 = _13._m0[subgroupBroadcastFirst(_302) * 2u] >> uvec2(4u);
    _34[_302]._m0[(gl_GlobalInvocationID.x < _307.y) ? (gl_GlobalInvocationID.x + _307.x) : 268435455u] = uvec4(_24[_281]._m0[(gl_GlobalInvocationID.x < _286.y) ? (gl_GlobalInvocationID.x + _286.x) : 268435455u]);
    uint _319 = registers._m1 + (gl_GlobalInvocationID.x + 3u);
    uvec2 _323 = _13._m0[_319 * 2u] >> uvec2(4u);
    uint _339 = registers._m4 + (gl_GlobalInvocationID.x + 1u);
    uvec2 _343 = _13._m0[_339 * 2u] >> uvec2(4u);
    _34[nonuniformEXT(_339)]._m0[(gl_GlobalInvocationID.x < _343.y) ? (gl_GlobalInvocationID.x + _343.x) : 268435455u] = uvec4(_24[nonuniformEXT(_319)]._m0[(gl_GlobalInvocationID.x < _323.y) ? (gl_GlobalInvocationID.x + _323.x) : 268435455u]);
    uint _352 = (gl_GlobalInvocationID.x * 2u) + 1u;
    uint _361 = atomicAdd(_39[registers._m4]._m0[(_352 < _69.y) ? (_352 + _69.x) : 1073741820u], 1u);
    uint _364 = gl_GlobalInvocationID.x * 2u;
    uint _371 = atomicAdd(_39[registers._m4]._m0[(_364 < _69.y) ? (_364 + _69.x) : 1073741820u], _90.y / 1u);
    uint _372 = gl_GlobalInvocationID.x << 2u;
    imageStore(_43[registers._m4], int((gl_GlobalInvocationID.x < _13._m0[_55].y) ? (gl_GlobalInvocationID.x + _13._m0[_55].x) : 4294967295u), uvec4(_372, _372 | 1u, _372 | 2u, _372 | 3u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 385
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %113 %117
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %15 "SSBO"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "SSBO"
OpName %36 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 8
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 1
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %18 Restrict
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %25 ArrayStride 8
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %29 NonReadable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 4
OpDecorate %34 Binding 0
OpDecorate %34 NonReadable
OpDecorate %35 ArrayStride 4
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %39 DescriptorSet 4
OpDecorate %39 Binding 0
OpDecorate %43 DescriptorSet 4
OpDecorate %43 Binding 0
OpDecorate %43 NonReadable
OpDecorate %113 BuiltIn GlobalInvocationId
OpDecorate %117 BuiltIn WorkgroupId
OpDecorate %244 NonUniform
OpDecorate %241 NonUniform
OpDecorate %254 NonUniform
OpDecorate %263 NonUniform
OpDecorate %260 NonUniform
OpDecorate %276 NonUniform
OpDecorate %319 NonUniform
OpDecorate %316 NonUniform
OpDecorate %329 NonUniform
OpDecorate %339 NonUniform
OpDecorate %336 NonUniform
OpDecorate %350 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %9
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeVector %5 4
%20 = OpTypeRuntimeArray %19
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %9
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %19
%31 = OpTypeStruct %30
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeRuntimeArray %5
%36 = OpTypeStruct %35
%37 = OpTypeRuntimeArray %36
%38 = OpTypePointer StorageBuffer %37
%39 = OpVariable %38 StorageBuffer
%40 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%41 = OpTypeRuntimeArray %40
%42 = OpTypePointer UniformConstant %41
%43 = OpVariable %42 UniformConstant
%44 = OpTypePointer UniformConstant %40
%46 = OpTypePointer PushConstant %5
%48 = OpConstant %5 4
%52 = OpConstant %5 3
%54 = OpConstant %5 2
%56 = OpConstant %5 1
%57 = OpTypePointer StorageBuffer %9
%59 = OpConstant %5 0
%61 = OpTypePointer StorageBuffer %36
%70 = OpConstantComposite %9 %54 %54
%71 = OpTypePointer StorageBuffer %31
%81 = OpConstantComposite %9 %48 %48
%82 = OpTypePointer StorageBuffer %26
%91 = OpConstantComposite %9 %52 %52
%92 = OpTypePointer StorageBuffer %21
%102 = OpTypePointer StorageBuffer %15
%111 = OpTypeVector %5 3
%112 = OpTypePointer Input %111
%113 = OpVariable %112 Input
%114 = OpTypePointer Input %5
%117 = OpVariable %112 Input
%123 = OpTypeBool
%126 = OpConstant %5 536870911
%129 = OpTypeFloat 32
%130 = OpTypeVector %129 2
%149 = OpConstant %5 268435455
%150 = OpTypePointer StorageBuffer %19
%358 = OpConstant %5 1073741820
%359 = OpTypePointer StorageBuffer %5
%381 = OpConstant %5 4294967295
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %383
%383 = OpLabel
%47 = OpAccessChain %46 %8 %48
%49 = OpLoad %5 %47
%45 = OpAccessChain %44 %43 %49
%50 = OpLoad %40 %45
%51 = OpGroupNonUniformBroadcastFirst %5 %52 %49
%53 = OpIMul %5 %51 %54
%55 = OpIAdd %5 %53 %56
%58 = OpAccessChain %57 %13 %59 %55
%60 = OpLoad %9 %58
%63 = OpAccessChain %46 %8 %48
%64 = OpLoad %5 %63
%62 = OpAccessChain %61 %39 %64
%65 = OpGroupNonUniformBroadcastFirst %5 %52 %64
%66 = OpIMul %5 %65 %54
%67 = OpAccessChain %57 %13 %59 %66
%68 = OpLoad %9 %67
%69 = OpShiftRightLogical %9 %68 %70
%73 = OpAccessChain %46 %8 %48
%74 = OpLoad %5 %73
%75 = OpIAdd %5 %74 %56
%72 = OpAccessChain %71 %34 %75
%76 = OpGroupNonUniformBroadcastFirst %5 %52 %75
%77 = OpIMul %5 %76 %54
%78 = OpAccessChain %57 %13 %59 %77
%79 = OpLoad %9 %78
%80 = OpShiftRightLogical %9 %79 %81
%84 = OpAccessChain %46 %8 %48
%85 = OpLoad %5 %84
%83 = OpAccessChain %82 %29 %85
%86 = OpGroupNonUniformBroadcastFirst %5 %52 %85
%87 = OpIMul %5 %86 %54
%88 = OpAccessChain %57 %13 %59 %87
%89 = OpLoad %9 %88
%90 = OpShiftRightLogical %9 %89 %91
%94 = OpAccessChain %46 %8 %56
%95 = OpLoad %5 %94
%96 = OpIAdd %5 %95 %56
%93 = OpAccessChain %92 %24 %96
%97 = OpGroupNonUniformBroadcastFirst %5 %52 %96
%98 = OpIMul %5 %97 %54
%99 = OpAccessChain %57 %13 %59 %98
%100 = OpLoad %9 %99
%101 = OpShiftRightLogical %9 %100 %81
%104 = OpAccessChain %46 %8 %56
%105 = OpLoad %5 %104
%103 = OpAccessChain %102 %18 %105
%106 = OpGroupNonUniformBroadcastFirst %5 %52 %105
%107 = OpIMul %5 %106 %54
%108 = OpAccessChain %57 %13 %59 %107
%109 = OpLoad %9 %108
%110 = OpShiftRightLogical %9 %109 %91
%115 = OpAccessChain %114 %113 %59
%116 = OpLoad %5 %115
%118 = OpAccessChain %114 %117 %59
%119 = OpLoad %5 %118
%120 = OpCompositeExtract %5 %110 0
%121 = OpCompositeExtract %5 %110 1
%122 = OpIAdd %5 %116 %120
%124 = OpULessThan %123 %116 %121
%125 = OpSelect %5 %124 %122 %126
%127 = OpAccessChain %57 %103 %59 %125
%128 = OpLoad %9 %127
%131 = OpBitcast %130 %128
%132 = OpCompositeExtract %129 %131 0
%133 = OpCompositeExtract %129 %131 1
%134 = OpCompositeExtract %5 %90 0
%135 = OpCompositeExtract %5 %90 1
%136 = OpIAdd %5 %116 %134
%137 = OpULessThan %123 %116 %135
%138 = OpSelect %5 %137 %136 %126
%139 = OpBitcast %5 %132
%140 = OpBitcast %5 %133
%141 = OpCompositeConstruct %9 %139 %140
%142 = OpAccessChain %57 %83 %59 %138
OpStore %142 %141
%143 = OpShiftLeftLogical %5 %116 %48
%144 = OpCompositeExtract %5 %101 0
%145 = OpCompositeExtract %5 %101 1
%146 = OpIAdd %5 %116 %144
%147 = OpULessThan %123 %116 %145
%148 = OpSelect %5 %147 %146 %149
%151 = OpAccessChain %150 %93 %59 %148
%152 = OpLoad %19 %151
%153 = OpCompositeExtract %5 %152 0
%154 = OpCompositeExtract %5 %152 1
%155 = OpCompositeExtract %5 %152 2
%156 = OpCompositeExtract %5 %152 3
%157 = OpCompositeExtract %5 %80 0
%158 = OpCompositeExtract %5 %80 1
%159 = OpIAdd %5 %116 %157
%160 = OpULessThan %123 %116 %158
%161 = OpSelect %5 %160 %159 %149
%162 = OpCompositeConstruct %19 %153 %154 %155 %156
%163 = OpAccessChain %150 %72 %59 %161
OpStore %163 %162
%164 = OpCompositeExtract %5 %110 0
%165 = OpCompositeExtract %5 %110 1
%166 = OpIAdd %5 %116 %164
%167 = OpULessThan %123 %116 %165
%168 = OpSelect %5 %167 %166 %126
%169 = OpAccessChain %57 %103 %59 %168
%170 = OpLoad %9 %169
%171 = OpBitcast %130 %170
%172 = OpCompositeExtract %129 %171 0
%173 = OpCompositeExtract %129 %171 1
%174 = OpCompositeExtract %5 %90 0
%175 = OpCompositeExtract %5 %90 1
%176 = OpIAdd %5 %116 %174
%177 = OpULessThan %123 %116 %175
%178 = OpSelect %5 %177 %176 %126
%179 = OpBitcast %5 %172
%180 = OpBitcast %5 %173
%181 = OpCompositeConstruct %9 %179 %180
%182 = OpAccessChain %57 %83 %59 %178
OpStore %182 %181
%183 = OpCompositeExtract %5 %101 0
%184 = OpCompositeExtract %5 %101 1
%185 = OpIAdd %5 %116 %183
%186 = OpULessThan %123 %116 %184
%187 = OpSelect %5 %186 %185 %149
%188 = OpAccessChain %150 %93 %59 %187
%189 = OpLoad %19 %188
%190 = OpCompositeExtract %5 %189 0
%191 = OpCompositeExtract %5 %189 1
%192 = OpCompositeExtract %5 %189 2
%193 = OpCompositeExtract %5 %189 3
%194 = OpCompositeExtract %5 %80 0
%195 = OpCompositeExtract %5 %80 1
%196 = OpIAdd %5 %116 %194
%197 = OpULessThan %123 %116 %195
%198 = OpSelect %5 %197 %196 %149
%199 = OpCompositeConstruct %19 %190 %191 %192 %193
%200 = OpAccessChain %150 %72 %59 %198
OpStore %200 %199
%201 = OpIAdd %5 %119 %54
%203 = OpAccessChain %46 %8 %56
%204 = OpLoad %5 %203
%205 = OpIAdd %5 %204 %201
%202 = OpAccessChain %102 %18 %205
%206 = OpGroupNonUniformBroadcastFirst %5 %52 %205
%207 = OpIMul %5 %206 %54
%208 = OpAccessChain %57 %13 %59 %207
%209 = OpLoad %9 %208
%210 = OpShiftRightLogical %9 %209 %91
%211 = OpCompositeExtract %5 %210 0
%212 = OpCompositeExtract %5 %210 1
%213 = OpIAdd %5 %116 %211
%214 = OpULessThan %123 %116 %212
%215 = OpSelect %5 %214 %213 %126
%216 = OpAccessChain %57 %202 %59 %215
%217 = OpLoad %9 %216
%218 = OpBitcast %130 %217
%219 = OpCompositeExtract %129 %218 0
%220 = OpCompositeExtract %129 %218 1
%221 = OpIAdd %5 %119 %59
%223 = OpAccessChain %46 %8 %48
%224 = OpLoad %5 %223
%225 = OpIAdd %5 %224 %221
%222 = OpAccessChain %82 %29 %225
%226 = OpGroupNonUniformBroadcastFirst %5 %52 %225
%227 = OpIMul %5 %226 %54
%228 = OpAccessChain %57 %13 %59 %227
%229 = OpLoad %9 %228
%230 = OpShiftRightLogical %9 %229 %91
%231 = OpCompositeExtract %5 %230 0
%232 = OpCompositeExtract %5 %230 1
%233 = OpIAdd %5 %116 %231
%234 = OpULessThan %123 %116 %232
%235 = OpSelect %5 %234 %233 %126
%236 = OpBitcast %5 %219
%237 = OpBitcast %5 %220
%238 = OpCompositeConstruct %9 %236 %237
%239 = OpAccessChain %57 %222 %59 %235
OpStore %239 %238
%240 = OpIAdd %5 %116 %54
%242 = OpAccessChain %46 %8 %56
%243 = OpLoad %5 %242
%244 = OpIAdd %5 %243 %240
%241 = OpAccessChain %102 %18 %244
%245 = OpIMul %5 %244 %54
%246 = OpAccessChain %57 %13 %59 %245
%247 = OpLoad %9 %246
%248 = OpShiftRightLogical %9 %247 %91
%249 = OpCompositeExtract %5 %248 0
%250 = OpCompositeExtract %5 %248 1
%251 = OpIAdd %5 %116 %249
%252 = OpULessThan %123 %116 %250
%253 = OpSelect %5 %252 %251 %126
%254 = OpAccessChain %57 %241 %59 %253
%255 = OpLoad %9 %254
%256 = OpBitcast %130 %255
%257 = OpCompositeExtract %129 %256 0
%258 = OpCompositeExtract %129 %256 1
%259 = OpIAdd %5 %116 %59
%261 = OpAccessChain %46 %8 %48
%262 = OpLoad %5 %261
%263 = OpIAdd %5 %262 %259
%260 = OpAccessChain %82 %29 %263
%264 = OpIMul %5 %263 %54
%265 = OpAccessChain %57 %13 %59 %264
%266 = OpLoad %9 %265
%267 = OpShiftRightLogical %9 %266 %91
%268 = OpCompositeExtract %5 %267 0
%269 = OpCompositeExtract %5 %267 1
%270 = OpIAdd %5 %116 %268
%271 = OpULessThan %123 %116 %269
%272 = OpSelect %5 %271 %270 %126
%273 = OpBitcast %5 %257
%274 = OpBitcast %5 %258
%275 = OpCompositeConstruct %9 %273 %274
%276 = OpAccessChain %57 %260 %59 %272
OpStore %276 %275
%277 = OpIAdd %5 %119 %52
%279 = OpAccessChain %46 %8 %56
%280 = OpLoad %5 %279
%281 = OpIAdd %5 %280 %277
%278 = OpAccessChain %92 %24 %281
%282 = OpGroupNonUniformBroadcastFirst %5 %52 %281
%283 = OpIMul %5 %282 %54
%284 = OpAccessChain %57 %13 %59 %283
%285 = OpLoad %9 %284
%286 = OpShiftRightLogical %9 %285 %81
%287 = OpCompositeExtract %5 %286 0
%288 = OpCompositeExtract %5 %286 1
%289 = OpIAdd %5 %116 %287
%290 = OpULessThan %123 %116 %288
%291 = OpSelect %5 %290 %289 %149
%292 = OpAccessChain %150 %278 %59 %291
%293 = OpLoad %19 %292
%294 = OpCompositeExtract %5 %293 0
%295 = OpCompositeExtract %5 %293 1
%296 = OpCompositeExtract %5 %293 2
%297 = OpCompositeExtract %5 %293 3
%298 = OpIAdd %5 %119 %56
%300 = OpAccessChain %46 %8 %48
%301 = OpLoad %5 %300
%302 = OpIAdd %5 %301 %298
%299 = OpAccessChain %71 %34 %302
%303 = OpGroupNonUniformBroadcastFirst %5 %52 %302
%304 = OpIMul %5 %303 %54
%305 = OpAccessChain %57 %13 %59 %304
%306 = OpLoad %9 %305
%307 = OpShiftRightLogical %9 %306 %81
%308 = OpCompositeExtract %5 %307 0
%309 = OpCompositeExtract %5 %307 1
%310 = OpIAdd %5 %116 %308
%311 = OpULessThan %123 %116 %309
%312 = OpSelect %5 %311 %310 %149
%313 = OpCompositeConstruct %19 %294 %295 %296 %297
%314 = OpAccessChain %150 %299 %59 %312
OpStore %314 %313
%315 = OpIAdd %5 %116 %52
%317 = OpAccessChain %46 %8 %56
%318 = OpLoad %5 %317
%319 = OpIAdd %5 %318 %315
%316 = OpAccessChain %92 %24 %319
%320 = OpIMul %5 %319 %54
%321 = OpAccessChain %57 %13 %59 %320
%322 = OpLoad %9 %321
%323 = OpShiftRightLogical %9 %322 %81
%324 = OpCompositeExtract %5 %323 0
%325 = OpCompositeExtract %5 %323 1
%326 = OpIAdd %5 %116 %324
%327 = OpULessThan %123 %116 %325
%328 = OpSelect %5 %327 %326 %149
%329 = OpAccessChain %150 %316 %59 %328
%330 = OpLoad %19 %329
%331 = OpCompositeExtract %5 %330 0
%332 = OpCompositeExtract %5 %330 1
%333 = OpCompositeExtract %5 %330 2
%334 = OpCompositeExtract %5 %330 3
%335 = OpIAdd %5 %116 %56
%337 = OpAccessChain %46 %8 %48
%338 = OpLoad %5 %337
%339 = OpIAdd %5 %338 %335
%336 = OpAccessChain %71 %34 %339
%340 = OpIMul %5 %339 %54
%341 = OpAccessChain %57 %13 %59 %340
%342 = OpLoad %9 %341
%343 = OpShiftRightLogical %9 %342 %81
%344 = OpCompositeExtract %5 %343 0
%345 = OpCompositeExtract %5 %343 1
%346 = OpIAdd %5 %116 %344
%347 = OpULessThan %123 %116 %345
%348 = OpSelect %5 %347 %346 %149
%349 = OpCompositeConstruct %19 %331 %332 %333 %334
%350 = OpAccessChain %150 %336 %59 %348
OpStore %350 %349
%351 = OpIMul %5 %116 %54
%352 = OpIAdd %5 %351 %56
%353 = OpCompositeExtract %5 %69 0
%354 = OpCompositeExtract %5 %69 1
%355 = OpIAdd %5 %352 %353
%356 = OpULessThan %123 %352 %354
%357 = OpSelect %5 %356 %355 %358
%360 = OpAccessChain %359 %62 %59 %357
%361 = OpAtomicIAdd %5 %360 %56 %59 %56
%362 = OpCompositeExtract %5 %90 1
%363 = OpUDiv %5 %362 %56
%364 = OpIMul %5 %116 %54
%365 = OpCompositeExtract %5 %69 0
%366 = OpCompositeExtract %5 %69 1
%367 = OpIAdd %5 %364 %365
%368 = OpULessThan %123 %364 %366
%369 = OpSelect %5 %368 %367 %358
%370 = OpAccessChain %359 %62 %59 %369
%371 = OpAtomicIAdd %5 %370 %56 %59 %363
%372 = OpShiftLeftLogical %5 %116 %54
%373 = OpBitwiseOr %5 %372 %56
%374 = OpBitwiseOr %5 %372 %54
%375 = OpBitwiseOr %5 %372 %52
%376 = OpCompositeExtract %5 %60 0
%377 = OpCompositeExtract %5 %60 1
%378 = OpIAdd %5 %116 %376
%379 = OpULessThan %123 %116 %377
%380 = OpSelect %5 %379 %378 %381
%382 = OpCompositeConstruct %19 %372 %373 %374 %375
OpImageWrite %50 %380 %382
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbuf.root-constant.min16float.sm60.frag
================================================
#version 460

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) in mediump vec4 C;
layout(location = 0) out mediump vec4 SV_Target;

void main()
{
    vec4 _43 = uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7));
    vec4 _61 = uintBitsToFloat(uvec4(registers._m8, registers._m9, registers._m10, registers._m11));
    SV_Target.x = (_43.x + C.x) + _61.x;
    SV_Target.y = (_43.y + C.y) + _61.y;
    SV_Target.z = (_43.z + C.z) + _61.z;
    SV_Target.w = (_43.w + C.w) + _61.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 81
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %14
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "C"
OpName %14 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 RelaxedPrecision
OpDecorate %12 Location 0
OpDecorate %14 RelaxedPrecision
OpDecorate %14 Location 0
OpDecorate %44 RelaxedPrecision
OpDecorate %45 RelaxedPrecision
OpDecorate %46 RelaxedPrecision
OpDecorate %47 RelaxedPrecision
OpDecorate %62 RelaxedPrecision
OpDecorate %63 RelaxedPrecision
OpDecorate %64 RelaxedPrecision
OpDecorate %65 RelaxedPrecision
OpDecorate %66 RelaxedPrecision
OpDecorate %67 RelaxedPrecision
OpDecorate %68 RelaxedPrecision
OpDecorate %69 RelaxedPrecision
OpDecorate %70 RelaxedPrecision
OpDecorate %71 RelaxedPrecision
OpDecorate %72 RelaxedPrecision
OpDecorate %73 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Output %10
%14 = OpVariable %13 Output
%15 = OpTypePointer Input %9
%17 = OpConstant %5 0
%20 = OpConstant %5 1
%23 = OpConstant %5 2
%26 = OpConstant %5 3
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 4
%33 = OpConstant %5 5
%36 = OpConstant %5 6
%39 = OpConstant %5 7
%41 = OpTypeVector %5 4
%49 = OpConstant %5 8
%52 = OpConstant %5 9
%55 = OpConstant %5 10
%58 = OpConstant %5 11
%74 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %79
%79 = OpLabel
%16 = OpAccessChain %15 %12 %17
%18 = OpLoad %9 %16
%19 = OpAccessChain %15 %12 %20
%21 = OpLoad %9 %19
%22 = OpAccessChain %15 %12 %23
%24 = OpLoad %9 %22
%25 = OpAccessChain %15 %12 %26
%27 = OpLoad %9 %25
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%32 = OpAccessChain %28 %8 %33
%34 = OpLoad %5 %32
%35 = OpAccessChain %28 %8 %36
%37 = OpLoad %5 %35
%38 = OpAccessChain %28 %8 %39
%40 = OpLoad %5 %38
%42 = OpCompositeConstruct %41 %31 %34 %37 %40
%43 = OpBitcast %10 %42
%44 = OpCompositeExtract %9 %43 0
%45 = OpCompositeExtract %9 %43 1
%46 = OpCompositeExtract %9 %43 2
%47 = OpCompositeExtract %9 %43 3
%48 = OpAccessChain %28 %8 %49
%50 = OpLoad %5 %48
%51 = OpAccessChain %28 %8 %52
%53 = OpLoad %5 %51
%54 = OpAccessChain %28 %8 %55
%56 = OpLoad %5 %54
%57 = OpAccessChain %28 %8 %58
%59 = OpLoad %5 %57
%60 = OpCompositeConstruct %41 %50 %53 %56 %59
%61 = OpBitcast %10 %60
%62 = OpCompositeExtract %9 %61 0
%63 = OpCompositeExtract %9 %61 1
%64 = OpCompositeExtract %9 %61 2
%65 = OpCompositeExtract %9 %61 3
%66 = OpFAdd %9 %44 %18
%67 = OpFAdd %9 %66 %62
%68 = OpFAdd %9 %45 %21
%69 = OpFAdd %9 %68 %63
%70 = OpFAdd %9 %46 %24
%71 = OpFAdd %9 %70 %64
%72 = OpFAdd %9 %47 %27
%73 = OpFAdd %9 %72 %65
%75 = OpAccessChain %74 %14 %17
OpStore %75 %67
%76 = OpAccessChain %74 %14 %20
OpStore %76 %69
%77 = OpAccessChain %74 %14 %23
OpStore %77 %71
%78 = OpAccessChain %74 %14 %26
OpStore %78 %73
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbuf.root-constant.min16float.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) in mediump vec4 C;
layout(location = 0) out mediump vec4 SV_Target;

void main()
{
    f16vec4 _50 = f16vec4(uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7)));
    f16vec4 _69 = f16vec4(uintBitsToFloat(uvec4(registers._m8, registers._m9, registers._m10, registers._m11)));
    SV_Target.x = float((_50.x + float16_t(C.x)) + _69.x);
    SV_Target.y = float((_50.y + float16_t(C.y)) + _69.y);
    SV_Target.z = float((_50.z + float16_t(C.z)) + _69.z);
    SV_Target.w = float((_50.w + float16_t(C.w)) + _69.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %14
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "C"
OpName %14 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 RelaxedPrecision
OpDecorate %12 Location 0
OpDecorate %14 RelaxedPrecision
OpDecorate %14 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Output %10
%14 = OpVariable %13 Output
%15 = OpTypePointer Input %9
%17 = OpConstant %5 0
%19 = OpTypeFloat 16
%22 = OpConstant %5 1
%26 = OpConstant %5 2
%30 = OpConstant %5 3
%33 = OpTypePointer PushConstant %5
%35 = OpConstant %5 4
%38 = OpConstant %5 5
%41 = OpConstant %5 6
%44 = OpConstant %5 7
%46 = OpTypeVector %5 4
%49 = OpTypeVector %19 4
%56 = OpConstant %5 8
%59 = OpConstant %5 9
%62 = OpConstant %5 10
%65 = OpConstant %5 11
%82 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%16 = OpAccessChain %15 %12 %17
%18 = OpLoad %9 %16
%20 = OpFConvert %19 %18
%21 = OpAccessChain %15 %12 %22
%23 = OpLoad %9 %21
%24 = OpFConvert %19 %23
%25 = OpAccessChain %15 %12 %26
%27 = OpLoad %9 %25
%28 = OpFConvert %19 %27
%29 = OpAccessChain %15 %12 %30
%31 = OpLoad %9 %29
%32 = OpFConvert %19 %31
%34 = OpAccessChain %33 %8 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %33 %8 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %33 %8 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %33 %8 %44
%45 = OpLoad %5 %43
%47 = OpCompositeConstruct %46 %36 %39 %42 %45
%48 = OpBitcast %10 %47
%50 = OpFConvert %49 %48
%51 = OpCompositeExtract %19 %50 0
%52 = OpCompositeExtract %19 %50 1
%53 = OpCompositeExtract %19 %50 2
%54 = OpCompositeExtract %19 %50 3
%55 = OpAccessChain %33 %8 %56
%57 = OpLoad %5 %55
%58 = OpAccessChain %33 %8 %59
%60 = OpLoad %5 %58
%61 = OpAccessChain %33 %8 %62
%63 = OpLoad %5 %61
%64 = OpAccessChain %33 %8 %65
%66 = OpLoad %5 %64
%67 = OpCompositeConstruct %46 %57 %60 %63 %66
%68 = OpBitcast %10 %67
%69 = OpFConvert %49 %68
%70 = OpCompositeExtract %19 %69 0
%71 = OpCompositeExtract %19 %69 1
%72 = OpCompositeExtract %19 %69 2
%73 = OpCompositeExtract %19 %69 3
%74 = OpFAdd %19 %51 %20
%75 = OpFAdd %19 %74 %70
%76 = OpFAdd %19 %52 %24
%77 = OpFAdd %19 %76 %71
%78 = OpFAdd %19 %53 %28
%79 = OpFAdd %19 %78 %72
%80 = OpFAdd %19 %54 %32
%81 = OpFAdd %19 %80 %73
%83 = OpAccessChain %82 %14 %17
%84 = OpFConvert %9 %75
OpStore %83 %84
%85 = OpAccessChain %82 %14 %22
%86 = OpFConvert %9 %77
OpStore %85 %86
%87 = OpAccessChain %82 %14 %26
%88 = OpFConvert %9 %79
OpStore %87 %88
%89 = OpAccessChain %82 %14 %30
%90 = OpFConvert %9 %81
OpStore %89 %90
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbuf.root-constant.min16int.sm60.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) in mediump vec4 C;
layout(location = 0) out mediump ivec4 SV_Target;

void main()
{
    u16vec4 _47 = u16vec4(uvec4(registers._m4, registers._m5, registers._m6, registers._m7));
    u16vec4 _65 = u16vec4(uvec4(registers._m8, registers._m9, registers._m10, registers._m11));
    SV_Target.x = int(int16_t((_47.x + uint16_t(int16_t(C.x))) + _65.x));
    SV_Target.y = int(int16_t((_47.y + uint16_t(int16_t(C.y))) + _65.y));
    SV_Target.z = int(int16_t((_47.z + uint16_t(int16_t(C.z))) + _65.z));
    SV_Target.w = int(int16_t((_47.w + uint16_t(int16_t(C.w))) + _65.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
OpCapability Shader
OpCapability Int16
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "C"
OpName %16 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 RelaxedPrecision
OpDecorate %12 Location 0
OpDecorate %16 RelaxedPrecision
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeInt 32 1
%14 = OpTypeVector %13 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpTypePointer Input %9
%19 = OpConstant %5 0
%22 = OpConstant %5 1
%25 = OpConstant %5 2
%28 = OpConstant %5 3
%30 = OpTypePointer PushConstant %5
%32 = OpConstant %5 4
%35 = OpConstant %5 5
%38 = OpConstant %5 6
%41 = OpConstant %5 7
%43 = OpTypeVector %5 4
%45 = OpTypeInt 16 0
%46 = OpTypeVector %45 4
%53 = OpConstant %5 8
%56 = OpConstant %5 9
%59 = OpConstant %5 10
%62 = OpConstant %5 11
%82 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%18 = OpAccessChain %17 %12 %19
%20 = OpLoad %9 %18
%21 = OpAccessChain %17 %12 %22
%23 = OpLoad %9 %21
%24 = OpAccessChain %17 %12 %25
%26 = OpLoad %9 %24
%27 = OpAccessChain %17 %12 %28
%29 = OpLoad %9 %27
%31 = OpAccessChain %30 %8 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %30 %8 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %30 %8 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %30 %8 %41
%42 = OpLoad %5 %40
%44 = OpCompositeConstruct %43 %33 %36 %39 %42
%47 = OpUConvert %46 %44
%48 = OpCompositeExtract %45 %47 0
%49 = OpCompositeExtract %45 %47 1
%50 = OpCompositeExtract %45 %47 2
%51 = OpCompositeExtract %45 %47 3
%52 = OpAccessChain %30 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %30 %8 %56
%57 = OpLoad %5 %55
%58 = OpAccessChain %30 %8 %59
%60 = OpLoad %5 %58
%61 = OpAccessChain %30 %8 %62
%63 = OpLoad %5 %61
%64 = OpCompositeConstruct %43 %54 %57 %60 %63
%65 = OpUConvert %46 %64
%66 = OpCompositeExtract %45 %65 0
%67 = OpCompositeExtract %45 %65 1
%68 = OpCompositeExtract %45 %65 2
%69 = OpCompositeExtract %45 %65 3
%70 = OpConvertFToS %45 %20
%71 = OpConvertFToS %45 %23
%72 = OpConvertFToS %45 %26
%73 = OpConvertFToS %45 %29
%74 = OpIAdd %45 %48 %70
%75 = OpIAdd %45 %74 %66
%76 = OpIAdd %45 %49 %71
%77 = OpIAdd %45 %76 %67
%78 = OpIAdd %45 %50 %72
%79 = OpIAdd %45 %78 %68
%80 = OpIAdd %45 %51 %73
%81 = OpIAdd %45 %80 %69
%83 = OpAccessChain %82 %16 %19
%84 = OpSConvert %13 %75
OpStore %83 %84
%85 = OpAccessChain %82 %16 %22
%86 = OpSConvert %13 %77
OpStore %85 %86
%87 = OpAccessChain %82 %16 %25
%88 = OpSConvert %13 %79
OpStore %87 %88
%89 = OpAccessChain %82 %16 %28
%90 = OpSConvert %13 %81
OpStore %89 %90
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbuf.root-constant.min16int.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) in mediump vec4 C;
layout(location = 0) out mediump ivec4 SV_Target;

void main()
{
    u16vec4 _52 = u16vec4(uvec4(registers._m4, registers._m5, registers._m6, registers._m7));
    u16vec4 _70 = u16vec4(uvec4(registers._m8, registers._m9, registers._m10, registers._m11));
    SV_Target.x = int(int16_t((_52.x + uint16_t(int16_t(float16_t(C.x)))) + _70.x));
    SV_Target.y = int(int16_t((_52.y + uint16_t(int16_t(float16_t(C.y)))) + _70.y));
    SV_Target.z = int(int16_t((_52.z + uint16_t(int16_t(float16_t(C.z)))) + _70.z));
    SV_Target.w = int(int16_t((_52.w + uint16_t(int16_t(float16_t(C.w)))) + _70.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 98
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "C"
OpName %16 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 RelaxedPrecision
OpDecorate %12 Location 0
OpDecorate %16 RelaxedPrecision
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeInt 32 1
%14 = OpTypeVector %13 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpTypePointer Input %9
%19 = OpConstant %5 0
%21 = OpTypeFloat 16
%24 = OpConstant %5 1
%28 = OpConstant %5 2
%32 = OpConstant %5 3
%35 = OpTypePointer PushConstant %5
%37 = OpConstant %5 4
%40 = OpConstant %5 5
%43 = OpConstant %5 6
%46 = OpConstant %5 7
%48 = OpTypeVector %5 4
%50 = OpTypeInt 16 0
%51 = OpTypeVector %50 4
%58 = OpConstant %5 8
%61 = OpConstant %5 9
%64 = OpConstant %5 10
%67 = OpConstant %5 11
%87 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %96
%96 = OpLabel
%18 = OpAccessChain %17 %12 %19
%20 = OpLoad %9 %18
%22 = OpFConvert %21 %20
%23 = OpAccessChain %17 %12 %24
%25 = OpLoad %9 %23
%26 = OpFConvert %21 %25
%27 = OpAccessChain %17 %12 %28
%29 = OpLoad %9 %27
%30 = OpFConvert %21 %29
%31 = OpAccessChain %17 %12 %32
%33 = OpLoad %9 %31
%34 = OpFConvert %21 %33
%36 = OpAccessChain %35 %8 %37
%38 = OpLoad %5 %36
%39 = OpAccessChain %35 %8 %40
%41 = OpLoad %5 %39
%42 = OpAccessChain %35 %8 %43
%44 = OpLoad %5 %42
%45 = OpAccessChain %35 %8 %46
%47 = OpLoad %5 %45
%49 = OpCompositeConstruct %48 %38 %41 %44 %47
%52 = OpUConvert %51 %49
%53 = OpCompositeExtract %50 %52 0
%54 = OpCompositeExtract %50 %52 1
%55 = OpCompositeExtract %50 %52 2
%56 = OpCompositeExtract %50 %52 3
%57 = OpAccessChain %35 %8 %58
%59 = OpLoad %5 %57
%60 = OpAccessChain %35 %8 %61
%62 = OpLoad %5 %60
%63 = OpAccessChain %35 %8 %64
%65 = OpLoad %5 %63
%66 = OpAccessChain %35 %8 %67
%68 = OpLoad %5 %66
%69 = OpCompositeConstruct %48 %59 %62 %65 %68
%70 = OpUConvert %51 %69
%71 = OpCompositeExtract %50 %70 0
%72 = OpCompositeExtract %50 %70 1
%73 = OpCompositeExtract %50 %70 2
%74 = OpCompositeExtract %50 %70 3
%75 = OpConvertFToS %50 %22
%76 = OpConvertFToS %50 %26
%77 = OpConvertFToS %50 %30
%78 = OpConvertFToS %50 %34
%79 = OpIAdd %50 %53 %75
%80 = OpIAdd %50 %79 %71
%81 = OpIAdd %50 %54 %76
%82 = OpIAdd %50 %81 %72
%83 = OpIAdd %50 %55 %77
%84 = OpIAdd %50 %83 %73
%85 = OpIAdd %50 %56 %78
%86 = OpIAdd %50 %85 %74
%88 = OpAccessChain %87 %16 %19
%89 = OpSConvert %13 %80
OpStore %88 %89
%90 = OpAccessChain %87 %16 %24
%91 = OpSConvert %13 %82
OpStore %90 %91
%92 = OpAccessChain %87 %16 %28
%93 = OpSConvert %13 %84
OpStore %92 %93
%94 = OpAccessChain %87 %16 %32
%95 = OpSConvert %13 %86
OpStore %94 %95
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-array-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std140) uniform _10_13
{
    vec4 _m0[4];
} _13[];

layout(set = 1, binding = 0, std140) uniform _15_19
{
    vec4 _m0[4];
} _19[100];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _25 = INDEX & 3u;
    uint _27 = INDEX + 0u;
    uint _38 = INDEX & 1u;
    uint _41 = (INDEX ^ 1u) + 0u;
    SV_Target.x = _19[nonuniformEXT(_41)]._m0[_38].x + _13[nonuniformEXT(_27)]._m0[_25].x;
    SV_Target.y = _19[nonuniformEXT(_41)]._m0[_38].y + _13[nonuniformEXT(_27)]._m0[_25].y;
    SV_Target.z = _19[nonuniformEXT(_41)]._m0[_38].z + _13[nonuniformEXT(_27)]._m0[_25].z;
    SV_Target.w = _19[nonuniformEXT(_41)]._m0[_38].w + _13[nonuniformEXT(_27)]._m0[_25].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %21 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %15 ""
OpName %21 "INDEX"
OpName %23 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %21 Flat
OpDecorate %21 Location 0
OpDecorate %23 Location 0
OpDecorate %27 NonUniform
OpDecorate %30 NonUniform
OpDecorate %32 NonUniform
OpDecorate %41 NonUniform
OpDecorate %43 NonUniform
OpDecorate %44 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 4
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer Uniform %11
%13 = OpVariable %12 Uniform
%14 = OpTypeArray %8 %6
%15 = OpTypeStruct %14
%16 = OpConstant %5 100
%17 = OpTypeArray %15 %16
%18 = OpTypePointer Uniform %17
%19 = OpVariable %18 Uniform
%20 = OpTypePointer Input %5
%21 = OpVariable %20 Input
%22 = OpTypePointer Output %8
%23 = OpVariable %22 Output
%26 = OpConstant %5 3
%28 = OpConstant %5 0
%29 = OpTypePointer Uniform %10
%31 = OpTypePointer Uniform %8
%39 = OpConstant %5 1
%42 = OpTypePointer Uniform %15
%54 = OpTypePointer Output %7
%58 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%24 = OpLoad %5 %21
%25 = OpBitwiseAnd %5 %24 %26
%27 = OpIAdd %5 %24 %28
%30 = OpAccessChain %29 %13 %27
%32 = OpAccessChain %31 %30 %28 %25
%33 = OpLoad %8 %32
%34 = OpCompositeExtract %7 %33 0
%35 = OpCompositeExtract %7 %33 1
%36 = OpCompositeExtract %7 %33 2
%37 = OpCompositeExtract %7 %33 3
%38 = OpBitwiseAnd %5 %24 %39
%40 = OpBitwiseXor %5 %24 %39
%41 = OpIAdd %5 %40 %28
%43 = OpAccessChain %42 %19 %41
%44 = OpAccessChain %31 %43 %28 %38
%45 = OpLoad %8 %44
%46 = OpCompositeExtract %7 %45 0
%47 = OpCompositeExtract %7 %45 1
%48 = OpCompositeExtract %7 %45 2
%49 = OpCompositeExtract %7 %45 3
%50 = OpFAdd %7 %46 %34
%51 = OpFAdd %7 %47 %35
%52 = OpFAdd %7 %48 %36
%53 = OpFAdd %7 %49 %37
%55 = OpAccessChain %54 %23 %28
OpStore %55 %50
%56 = OpAccessChain %54 %23 %39
OpStore %56 %51
%57 = OpAccessChain %54 %23 %58
OpStore %57 %52
%59 = OpAccessChain %54 %23 %26
OpStore %59 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-array.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std140) uniform _10_13
{
    vec4 _m0[4];
} _13[];

layout(set = 1, binding = 0, std140) uniform _15_19
{
    vec4 _m0[4];
} _19[100];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _25 = INDEX & 3u;
    uint _27 = INDEX + 0u;
    uint _38 = INDEX & 1u;
    uint _41 = (INDEX ^ 1u) + 0u;
    SV_Target.x = _19[nonuniformEXT(_41)]._m0[_38].x + _13[nonuniformEXT(_27)]._m0[_25].x;
    SV_Target.y = _19[nonuniformEXT(_41)]._m0[_38].y + _13[nonuniformEXT(_27)]._m0[_25].y;
    SV_Target.z = _19[nonuniformEXT(_41)]._m0[_38].z + _13[nonuniformEXT(_27)]._m0[_25].z;
    SV_Target.w = _19[nonuniformEXT(_41)]._m0[_38].w + _13[nonuniformEXT(_27)]._m0[_25].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %21 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %15 ""
OpName %21 "INDEX"
OpName %23 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %21 Flat
OpDecorate %21 Location 0
OpDecorate %23 Location 0
OpDecorate %30 NonUniform
OpDecorate %32 NonUniform
OpDecorate %43 NonUniform
OpDecorate %44 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 4
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer Uniform %11
%13 = OpVariable %12 Uniform
%14 = OpTypeArray %8 %6
%15 = OpTypeStruct %14
%16 = OpConstant %5 100
%17 = OpTypeArray %15 %16
%18 = OpTypePointer Uniform %17
%19 = OpVariable %18 Uniform
%20 = OpTypePointer Input %5
%21 = OpVariable %20 Input
%22 = OpTypePointer Output %8
%23 = OpVariable %22 Output
%26 = OpConstant %5 3
%28 = OpConstant %5 0
%29 = OpTypePointer Uniform %10
%31 = OpTypePointer Uniform %8
%39 = OpConstant %5 1
%42 = OpTypePointer Uniform %15
%54 = OpTypePointer Output %7
%58 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%24 = OpLoad %5 %21
%25 = OpBitwiseAnd %5 %24 %26
%27 = OpIAdd %5 %24 %28
%30 = OpAccessChain %29 %13 %27
%32 = OpAccessChain %31 %30 %28 %25
%33 = OpLoad %8 %32
%34 = OpCompositeExtract %7 %33 0
%35 = OpCompositeExtract %7 %33 1
%36 = OpCompositeExtract %7 %33 2
%37 = OpCompositeExtract %7 %33 3
%38 = OpBitwiseAnd %5 %24 %39
%40 = OpBitwiseXor %5 %24 %39
%41 = OpIAdd %5 %40 %28
%43 = OpAccessChain %42 %19 %41
%44 = OpAccessChain %31 %43 %28 %38
%45 = OpLoad %8 %44
%46 = OpCompositeExtract %7 %45 0
%47 = OpCompositeExtract %7 %45 1
%48 = OpCompositeExtract %7 %45 2
%49 = OpCompositeExtract %7 %45 3
%50 = OpFAdd %7 %46 %34
%51 = OpFAdd %7 %47 %35
%52 = OpFAdd %7 %48 %36
%53 = OpFAdd %7 %49 %37
%55 = OpAccessChain %54 %23 %28
OpStore %55 %50
%56 = OpAccessChain %54 %23 %39
OpStore %56 %51
%57 = OpAccessChain %54 %23 %58
OpStore %57 %52
%59 = OpAccessChain %54 %23 %26
OpStore %59 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-dynamic.no-legacy-cbuf-layout.local-root-signature.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

struct _17
{
    vec4 _m0;
    uvec4 _m1;
};

vec4 _63;
uvec4 _101;

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(location = 0) rayPayloadInEXT _17 payload;

void main()
{
    uvec4 _28 = payload._m1;
    uint _30 = 4u * _28.x;
    uvec4 _57 = uvec4(SBT._m0[4u], 0u, 0u, 0u);
    uint _60 = _57.z;
    vec4 _62;
    _62.x = uintBitsToFloat(uvec4(SBT._m0[min((_30 + 0u), 4u)], SBT._m0[min((_30 + 1u), 4u)], SBT._m0[min((_30 + 2u), 4u)], SBT._m0[min((_30 + 3u), 4u)])).x;
    _62.y = float(_57.y);
    _62.z = float(int(_60));
    _62.w = 1.0;
    payload._m0 = _62;
    uint _71 = 4u * _28.y;
    vec4 _89 = uintBitsToFloat(uvec4(SBT._m1[min((_71 + 0u), 5u)], SBT._m1[min((_71 + 1u), 5u)], SBT._m1[min((_71 + 2u), 5u)], SBT._m1[min((_71 + 3u), 5u)]));
    uvec4 _100;
    _100.x = uint(int(_89.x));
    _100.y = uint(int(_89.y));
    _100.z = uvec4(SBT._m1[4u], SBT._m1[5u], 0u, 0u).z;
    _100.w = _60;
    payload._m1 = _100;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
%35 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint MissKHR %3 "main" %13 %19
OpName %3 "main"
OpName %11 "SBTBlock"
OpName %13 "SBT"
OpName %17 ""
OpName %19 "payload"
OpDecorate %7 ArrayStride 4
OpDecorate %9 ArrayStride 4
OpDecorate %11 Block
OpMemberDecorate %11 0 Offset 0
OpMemberDecorate %11 1 Offset 20
OpMemberDecorate %11 2 Offset 48
OpMemberDecorate %11 3 Offset 56
OpMemberDecorate %11 4 Offset 64
OpMemberDecorate %11 5 Offset 72
OpMemberDecorate %11 6 Offset 80
OpMemberDecorate %11 7 Offset 88
OpMemberDecorate %11 8 Offset 96
OpMemberDecorate %11 9 Offset 104
OpMemberDecorate %11 10 Offset 112
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeArray %5 %6
%8 = OpConstant %5 6
%9 = OpTypeArray %5 %8
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %7 %9 %10 %10 %10 %10 %10 %10 %10 %10 %10
%12 = OpTypePointer ShaderRecordBufferKHR %11
%13 = OpVariable %12 ShaderRecordBufferKHR
%14 = OpTypeFloat 32
%15 = OpTypeVector %14 4
%16 = OpTypeVector %5 4
%17 = OpTypeStruct %15 %16
%18 = OpTypePointer IncomingRayPayloadKHR %17
%19 = OpVariable %18 IncomingRayPayloadKHR
%20 = OpTypePointer ShaderRecordBufferKHR %9
%22 = OpConstant %5 1
%23 = OpTypePointer ShaderRecordBufferKHR %7
%25 = OpConstant %5 0
%26 = OpTypePointer IncomingRayPayloadKHR %16
%31 = OpConstant %5 4
%32 = OpTypePointer ShaderRecordBufferKHR %5
%44 = OpConstant %5 2
%49 = OpConstant %5 3
%67 = OpConstant %14 1
%68 = OpTypePointer IncomingRayPayloadKHR %15
%3 = OpFunction %1 None %2
%4 = OpLabel
%63 = OpUndef %15
%101 = OpUndef %16
OpBranch %105
%105 = OpLabel
%21 = OpAccessChain %20 %13 %22
%24 = OpAccessChain %23 %13 %25
%27 = OpInBoundsAccessChain %26 %19 %22
%28 = OpLoad %16 %27
%29 = OpCompositeExtract %5 %28 0
%30 = OpIMul %5 %31 %29
%34 = OpIAdd %5 %30 %25
%36 = OpExtInst %5 %35 UMin %34 %31
%33 = OpAccessChain %32 %24 %36
%37 = OpLoad %5 %33
%39 = OpIAdd %5 %30 %22
%40 = OpExtInst %5 %35 UMin %39 %31
%38 = OpAccessChain %32 %24 %40
%41 = OpLoad %5 %38
%43 = OpIAdd %5 %30 %44
%45 = OpExtInst %5 %35 UMin %43 %31
%42 = OpAccessChain %32 %24 %45
%46 = OpLoad %5 %42
%48 = OpIAdd %5 %30 %49
%50 = OpExtInst %5 %35 UMin %48 %31
%47 = OpAccessChain %32 %24 %50
%51 = OpLoad %5 %47
%52 = OpCompositeConstruct %16 %37 %41 %46 %51
%53 = OpBitcast %15 %52
%54 = OpCompositeExtract %14 %53 0
%55 = OpAccessChain %32 %24 %31
%56 = OpLoad %5 %55
%57 = OpCompositeConstruct %16 %56 %25 %25 %25
%58 = OpCompositeExtract %5 %57 1
%59 = OpConvertUToF %14 %58
%60 = OpCompositeExtract %5 %57 2
%61 = OpConvertSToF %14 %60
%62 = OpCompositeInsert %15 %54 %63 0
%64 = OpCompositeInsert %15 %59 %62 1
%65 = OpCompositeInsert %15 %61 %64 2
%66 = OpCompositeInsert %15 %67 %65 3
%69 = OpInBoundsAccessChain %68 %19 %25
OpStore %69 %66
%70 = OpCompositeExtract %5 %28 1
%71 = OpIMul %5 %31 %70
%73 = OpIAdd %5 %71 %25
%74 = OpExtInst %5 %35 UMin %73 %6
%72 = OpAccessChain %32 %21 %74
%75 = OpLoad %5 %72
%77 = OpIAdd %5 %71 %22
%78 = OpExtInst %5 %35 UMin %77 %6
%76 = OpAccessChain %32 %21 %78
%79 = OpLoad %5 %76
%81 = OpIAdd %5 %71 %44
%82 = OpExtInst %5 %35 UMin %81 %6
%80 = OpAccessChain %32 %21 %82
%83 = OpLoad %5 %80
%85 = OpIAdd %5 %71 %49
%86 = OpExtInst %5 %35 UMin %85 %6
%84 = OpAccessChain %32 %21 %86
%87 = OpLoad %5 %84
%88 = OpCompositeConstruct %16 %75 %79 %83 %87
%89 = OpBitcast %15 %88
%90 = OpCompositeExtract %14 %89 0
%91 = OpCompositeExtract %14 %89 1
%92 = OpAccessChain %32 %21 %31
%93 = OpLoad %5 %92
%94 = OpAccessChain %32 %21 %6
%95 = OpLoad %5 %94
%96 = OpCompositeConstruct %16 %93 %95 %25 %25
%97 = OpCompositeExtract %5 %96 2
%98 = OpConvertFToS %5 %90
%99 = OpConvertFToS %5 %91
%100 = OpCompositeInsert %16 %98 %101 0
%102 = OpCompositeInsert %16 %99 %100 1
%103 = OpCompositeInsert %16 %97 %102 2
%104 = OpCompositeInsert %16 %60 %103 3
OpStore %27 %104
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-indexing.frag
================================================
#version 460

layout(set = 0, binding = 5, std140) uniform _10_14
{
    vec4 _m0[1];
} _14[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = (_14[2u]._m0[0u].x + _14[0u]._m0[0u].x) + _14[nonuniformEXT(V)]._m0[0u].x;
    SV_Target.y = (_14[2u]._m0[0u].y + _14[0u]._m0[0u].y) + _14[nonuniformEXT(V)]._m0[0u].y;
    SV_Target.z = (_14[2u]._m0[0u].z + _14[0u]._m0[0u].z) + _14[nonuniformEXT(V)]._m0[0u].z;
    SV_Target.w = (_14[2u]._m0[0u].w + _14[0u]._m0[0u].w) + _14[nonuniformEXT(V)]._m0[0u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %16 "V"
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 5
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %18 Location 0
OpDecorate %44 NonUniform
OpDecorate %45 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpConstant %5 3
%12 = OpTypeArray %10 %11
%13 = OpTypePointer Uniform %12
%14 = OpVariable %13 Uniform
%15 = OpTypePointer Input %5
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %8
%18 = OpVariable %17 Output
%19 = OpTypePointer Uniform %10
%21 = OpConstant %5 2
%23 = OpConstant %5 0
%25 = OpTypePointer Uniform %8
%43 = OpConstant %5 5
%55 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%20 = OpAccessChain %19 %14 %21
%22 = OpAccessChain %19 %14 %23
%24 = OpLoad %5 %16
%26 = OpAccessChain %25 %22 %23 %23
%27 = OpLoad %8 %26
%28 = OpCompositeExtract %7 %27 0
%29 = OpCompositeExtract %7 %27 1
%30 = OpCompositeExtract %7 %27 2
%31 = OpCompositeExtract %7 %27 3
%32 = OpAccessChain %25 %20 %23 %23
%33 = OpLoad %8 %32
%34 = OpCompositeExtract %7 %33 0
%35 = OpCompositeExtract %7 %33 1
%36 = OpCompositeExtract %7 %33 2
%37 = OpCompositeExtract %7 %33 3
%38 = OpFAdd %7 %34 %28
%39 = OpFAdd %7 %35 %29
%40 = OpFAdd %7 %36 %30
%41 = OpFAdd %7 %37 %31
%42 = OpIAdd %5 %24 %43
%44 = OpAccessChain %19 %14 %24
%45 = OpAccessChain %25 %44 %23 %23
%46 = OpLoad %8 %45
%47 = OpCompositeExtract %7 %46 0
%48 = OpCompositeExtract %7 %46 1
%49 = OpCompositeExtract %7 %46 2
%50 = OpCompositeExtract %7 %46 3
%51 = OpFAdd %7 %38 %47
%52 = OpFAdd %7 %39 %48
%53 = OpFAdd %7 %40 %49
%54 = OpFAdd %7 %41 %50
%56 = OpAccessChain %55 %18 %23
OpStore %56 %51
%57 = OpAccessChain %55 %18 %6
OpStore %57 %52
%58 = OpAccessChain %55 %18 %21
OpStore %58 %53
%59 = OpAccessChain %55 %18 %11
OpStore %59 %54
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-indexing.sm66.frag
================================================
#version 460

layout(set = 0, binding = 5, std140) uniform _10_14
{
    vec4 _m0[1];
} _14[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = (_14[2u]._m0[0u].x + _14[0u]._m0[0u].x) + _14[nonuniformEXT(V)]._m0[0u].x;
    SV_Target.y = (_14[2u]._m0[0u].y + _14[0u]._m0[0u].y) + _14[nonuniformEXT(V)]._m0[0u].y;
    SV_Target.z = (_14[2u]._m0[0u].z + _14[0u]._m0[0u].z) + _14[nonuniformEXT(V)]._m0[0u].z;
    SV_Target.w = (_14[2u]._m0[0u].w + _14[0u]._m0[0u].w) + _14[nonuniformEXT(V)]._m0[0u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability UniformBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %16 "V"
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 5
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %18 Location 0
OpDecorate %44 NonUniform
OpDecorate %45 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpConstant %5 3
%12 = OpTypeArray %10 %11
%13 = OpTypePointer Uniform %12
%14 = OpVariable %13 Uniform
%15 = OpTypePointer Input %5
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %8
%18 = OpVariable %17 Output
%19 = OpTypePointer Uniform %10
%21 = OpConstant %5 2
%23 = OpConstant %5 0
%25 = OpTypePointer Uniform %8
%43 = OpConstant %5 5
%55 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%20 = OpAccessChain %19 %14 %21
%22 = OpAccessChain %19 %14 %23
%24 = OpLoad %5 %16
%26 = OpAccessChain %25 %22 %23 %23
%27 = OpLoad %8 %26
%28 = OpCompositeExtract %7 %27 0
%29 = OpCompositeExtract %7 %27 1
%30 = OpCompositeExtract %7 %27 2
%31 = OpCompositeExtract %7 %27 3
%32 = OpAccessChain %25 %20 %23 %23
%33 = OpLoad %8 %32
%34 = OpCompositeExtract %7 %33 0
%35 = OpCompositeExtract %7 %33 1
%36 = OpCompositeExtract %7 %33 2
%37 = OpCompositeExtract %7 %33 3
%38 = OpFAdd %7 %34 %28
%39 = OpFAdd %7 %35 %29
%40 = OpFAdd %7 %36 %30
%41 = OpFAdd %7 %37 %31
%42 = OpIAdd %5 %24 %43
%44 = OpAccessChain %19 %14 %24
%45 = OpAccessChain %25 %44 %23 %23
%46 = OpLoad %8 %45
%47 = OpCompositeExtract %7 %46 0
%48 = OpCompositeExtract %7 %46 1
%49 = OpCompositeExtract %7 %46 2
%50 = OpCompositeExtract %7 %46 3
%51 = OpFAdd %7 %38 %47
%52 = OpFAdd %7 %39 %48
%53 = OpFAdd %7 %40 %49
%54 = OpFAdd %7 %41 %50
%56 = OpAccessChain %55 %18 %23
OpStore %56 %51
%57 = OpAccessChain %55 %18 %6
OpStore %57 %52
%58 = OpAccessChain %55 %18 %21
OpStore %58 %53
%59 = OpAccessChain %55 %18 %11
OpStore %59 %54
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-legacy-fp16-fp64.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

struct CBVComposite16x8
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    float16_t _m4;
    float16_t _m5;
    float16_t _m6;
    float16_t _m7;
};

layout(set = 0, binding = 0, std140) uniform _10_12
{
    u64vec2 _m0[4];
} _12;

layout(set = 0, binding = 0, std140) uniform _16_18
{
    vec4 _m0[4];
} _18;

layout(location = 0) out vec4 SV_Target;

void main()
{
    f16vec2 _38 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].x));
    f16vec2 _41 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].y));
    f16vec2 _44 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].z));
    f16vec2 _47 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].w));
    CBVComposite16x8 _51 = CBVComposite16x8(_38.x, _38.y, _41.x, _41.y, _44.x, _44.y, _47.x, _47.y);
    SV_Target.x = ((float(_51._m0) + _18._m0[0u].x) + float(_51._m4)) + float(int64_t(_12._m0[2u].x));
    SV_Target.y = ((float(_51._m1) + _18._m0[0u].y) + float(_51._m5)) + float(int64_t(_12._m0[2u].y));
    SV_Target.z = ((float(_51._m2) + _18._m0[0u].z) + float(_51._m6)) + float(int64_t(_12._m0[3u].x));
    SV_Target.w = ((float(_51._m3) + _18._m0[0u].w) + float(_51._m7)) + float(int64_t(_12._m0[3u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %10 ""
OpName %16 ""
OpName %20 "SV_Target"
OpName %50 "CBVComposite16x8"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 4
%7 = OpTypeInt 64 0
%8 = OpTypeVector %7 2
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeArray %14 %6
%16 = OpTypeStruct %15
%17 = OpTypePointer Uniform %16
%18 = OpVariable %17 Uniform
%19 = OpTypePointer Output %14
%20 = OpVariable %19 Output
%21 = OpConstant %5 0
%22 = OpTypePointer Uniform %14
%29 = OpTypeFloat 16
%30 = OpConstant %5 1
%33 = OpTypeVector %29 2
%50 = OpTypeStruct %29 %29 %29 %29 %29 %29 %29 %29
%76 = OpConstant %5 2
%77 = OpTypePointer Uniform %8
%82 = OpConstant %5 3
%95 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %100
%100 = OpLabel
%23 = OpAccessChain %22 %18 %21 %21
%24 = OpLoad %14 %23
%25 = OpCompositeExtract %13 %24 0
%26 = OpCompositeExtract %13 %24 1
%27 = OpCompositeExtract %13 %24 2
%28 = OpCompositeExtract %13 %24 3
%31 = OpAccessChain %22 %18 %21 %30
%32 = OpLoad %14 %31
%34 = OpCompositeExtract %13 %32 0
%35 = OpCompositeExtract %13 %32 1
%36 = OpCompositeExtract %13 %32 2
%37 = OpCompositeExtract %13 %32 3
%38 = OpBitcast %33 %34
%39 = OpCompositeExtract %29 %38 0
%40 = OpCompositeExtract %29 %38 1
%41 = OpBitcast %33 %35
%42 = OpCompositeExtract %29 %41 0
%43 = OpCompositeExtract %29 %41 1
%44 = OpBitcast %33 %36
%45 = OpCompositeExtract %29 %44 0
%46 = OpCompositeExtract %29 %44 1
%47 = OpBitcast %33 %37
%48 = OpCompositeExtract %29 %47 0
%49 = OpCompositeExtract %29 %47 1
%51 = OpCompositeConstruct %50 %39 %40 %42 %43 %45 %46 %48 %49
%52 = OpCompositeExtract %29 %51 0
%53 = OpCompositeExtract %29 %51 1
%54 = OpCompositeExtract %29 %51 2
%55 = OpCompositeExtract %29 %51 3
%56 = OpFConvert %13 %52
%57 = OpFConvert %13 %53
%58 = OpFConvert %13 %54
%59 = OpFConvert %13 %55
%60 = OpFAdd %13 %56 %25
%61 = OpFAdd %13 %57 %26
%62 = OpFAdd %13 %58 %27
%63 = OpFAdd %13 %59 %28
%64 = OpCompositeExtract %29 %51 4
%65 = OpCompositeExtract %29 %51 5
%66 = OpCompositeExtract %29 %51 6
%67 = OpCompositeExtract %29 %51 7
%68 = OpFConvert %13 %64
%69 = OpFConvert %13 %65
%70 = OpFConvert %13 %66
%71 = OpFConvert %13 %67
%72 = OpFAdd %13 %60 %68
%73 = OpFAdd %13 %61 %69
%74 = OpFAdd %13 %62 %70
%75 = OpFAdd %13 %63 %71
%78 = OpAccessChain %77 %12 %21 %76
%79 = OpLoad %8 %78
%80 = OpCompositeExtract %7 %79 0
%81 = OpCompositeExtract %7 %79 1
%83 = OpAccessChain %77 %12 %21 %82
%84 = OpLoad %8 %83
%85 = OpCompositeExtract %7 %84 0
%86 = OpCompositeExtract %7 %84 1
%87 = OpConvertSToF %13 %80
%88 = OpConvertSToF %13 %81
%89 = OpConvertSToF %13 %85
%90 = OpConvertSToF %13 %86
%91 = OpFAdd %13 %72 %87
%92 = OpFAdd %13 %73 %88
%93 = OpFAdd %13 %74 %89
%94 = OpFAdd %13 %75 %90
%96 = OpAccessChain %95 %20 %21
OpStore %96 %91
%97 = OpAccessChain %95 %20 %30
OpStore %97 %92
%98 = OpAccessChain %95 %20 %76
OpStore %98 %93
%99 = OpAccessChain %95 %20 %82
OpStore %99 %94
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray;

struct CBVComposite16x8
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    float16_t _m4;
    float16_t _m5;
    float16_t _m6;
    float16_t _m7;
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray
{
    u64vec2 value[4096];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) out vec4 SV_Target;

void main()
{
    PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerFloat4NonWriteCBVArray _32 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    f16vec2 _40 = unpackFloat2x16(floatBitsToUint(_32.value[1u].x));
    f16vec2 _43 = unpackFloat2x16(floatBitsToUint(_32.value[1u].y));
    f16vec2 _46 = unpackFloat2x16(floatBitsToUint(_32.value[1u].z));
    f16vec2 _49 = unpackFloat2x16(floatBitsToUint(_32.value[1u].w));
    CBVComposite16x8 _53 = CBVComposite16x8(_40.x, _40.y, _43.x, _43.y, _46.x, _46.y, _49.x, _49.y);
    PhysicalPointerUint642NonWriteCBVArray _84 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    PhysicalPointerUint642NonWriteCBVArray _91 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    SV_Target.x = ((float(_53._m0) + _22.value[0u].x) + float(_53._m4)) + float(int64_t(_84.value[2u].x));
    SV_Target.y = ((float(_53._m1) + _22.value[0u].y) + float(_53._m5)) + float(int64_t(_84.value[2u].y));
    SV_Target.z = ((float(_53._m2) + _22.value[0u].z) + float(_53._m6)) + float(int64_t(_91.value[3u].x));
    SV_Target.w = ((float(_53._m3) + _22.value[0u].w) + float(_53._m7)) + float(int64_t(_91.value[3u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 111
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability DenormPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %13 "SV_Target"
OpName %20 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %20 0 "value"
OpName %52 "CBVComposite16x8"
OpName %82 "PhysicalPointerUint642NonWriteCBVArray"
OpMemberName %82 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %13 Location 0
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpMemberDecorate %20 0 NonWritable
OpDecorate %81 ArrayStride 16
OpMemberDecorate %82 0 Offset 0
OpDecorate %82 Block
OpMemberDecorate %82 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypePointer PushConstant %6
%16 = OpConstant %5 0
%18 = OpConstant %5 4096
%19 = OpTypeArray %11 %18
%20 = OpTypeStruct %19
%21 = OpTypePointer PhysicalStorageBuffer %20
%23 = OpTypePointer PhysicalStorageBuffer %11
%30 = OpTypeFloat 16
%31 = OpConstant %5 1
%35 = OpTypeVector %30 2
%52 = OpTypeStruct %30 %30 %30 %30 %30 %30 %30 %30
%78 = OpTypeInt 64 0
%79 = OpConstant %5 2
%80 = OpTypeVector %78 2
%81 = OpTypeArray %80 %18
%82 = OpTypeStruct %81
%83 = OpTypePointer PhysicalStorageBuffer %82
%85 = OpTypePointer PhysicalStorageBuffer %80
%90 = OpConstant %5 3
%104 = OpTypePointer Output %10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %109
%109 = OpLabel
%15 = OpAccessChain %14 %9 %16
%17 = OpLoad %6 %15
%22 = OpBitcast %21 %17
%24 = OpInBoundsAccessChain %23 %22 %16 %16
%25 = OpLoad %11 %24 Aligned 16
%26 = OpCompositeExtract %10 %25 0
%27 = OpCompositeExtract %10 %25 1
%28 = OpCompositeExtract %10 %25 2
%29 = OpCompositeExtract %10 %25 3
%32 = OpBitcast %21 %17
%33 = OpInBoundsAccessChain %23 %32 %16 %31
%34 = OpLoad %11 %33 Aligned 16
%36 = OpCompositeExtract %10 %34 0
%37 = OpCompositeExtract %10 %34 1
%38 = OpCompositeExtract %10 %34 2
%39 = OpCompositeExtract %10 %34 3
%40 = OpBitcast %35 %36
%41 = OpCompositeExtract %30 %40 0
%42 = OpCompositeExtract %30 %40 1
%43 = OpBitcast %35 %37
%44 = OpCompositeExtract %30 %43 0
%45 = OpCompositeExtract %30 %43 1
%46 = OpBitcast %35 %38
%47 = OpCompositeExtract %30 %46 0
%48 = OpCompositeExtract %30 %46 1
%49 = OpBitcast %35 %39
%50 = OpCompositeExtract %30 %49 0
%51 = OpCompositeExtract %30 %49 1
%53 = OpCompositeConstruct %52 %41 %42 %44 %45 %47 %48 %50 %51
%54 = OpCompositeExtract %30 %53 0
%55 = OpCompositeExtract %30 %53 1
%56 = OpCompositeExtract %30 %53 2
%57 = OpCompositeExtract %30 %53 3
%58 = OpFConvert %10 %54
%59 = OpFConvert %10 %55
%60 = OpFConvert %10 %56
%61 = OpFConvert %10 %57
%62 = OpFAdd %10 %58 %26
%63 = OpFAdd %10 %59 %27
%64 = OpFAdd %10 %60 %28
%65 = OpFAdd %10 %61 %29
%66 = OpCompositeExtract %30 %53 4
%67 = OpCompositeExtract %30 %53 5
%68 = OpCompositeExtract %30 %53 6
%69 = OpCompositeExtract %30 %53 7
%70 = OpFConvert %10 %66
%71 = OpFConvert %10 %67
%72 = OpFConvert %10 %68
%73 = OpFConvert %10 %69
%74 = OpFAdd %10 %62 %70
%75 = OpFAdd %10 %63 %71
%76 = OpFAdd %10 %64 %72
%77 = OpFAdd %10 %65 %73
%84 = OpBitcast %83 %17
%86 = OpInBoundsAccessChain %85 %84 %16 %79
%87 = OpLoad %80 %86 Aligned 16
%88 = OpCompositeExtract %78 %87 0
%89 = OpCompositeExtract %78 %87 1
%91 = OpBitcast %83 %17
%92 = OpInBoundsAccessChain %85 %91 %16 %90
%93 = OpLoad %80 %92 Aligned 16
%94 = OpCompositeExtract %78 %93 0
%95 = OpCompositeExtract %78 %93 1
%96 = OpConvertSToF %10 %88
%97 = OpConvertSToF %10 %89
%98 = OpConvertSToF %10 %94
%99 = OpConvertSToF %10 %95
%100 = OpFAdd %10 %74 %96
%101 = OpFAdd %10 %75 %97
%102 = OpFAdd %10 %76 %98
%103 = OpFAdd %10 %77 %99
%105 = OpAccessChain %104 %13 %16
OpStore %105 %100
%106 = OpAccessChain %104 %13 %31
OpStore %106 %101
%107 = OpAccessChain %104 %13 %79
OpStore %107 %102
%108 = OpAccessChain %104 %13 %90
OpStore %108 %103
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray
{
    u64vec2 value[4096];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) out vec4 SV_Target;

void main()
{
    PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerFloat4NonWriteCBVArray _31 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerFloat4NonWriteCBVArray _43 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerUint642NonWriteCBVArray _60 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    PhysicalPointerUint642NonWriteCBVArray _67 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    SV_Target.x = ((_31.value[1u].x + _22.value[0u].x) + _43.value[2u].x) + float(int64_t(_60.value[3u].x));
    SV_Target.y = ((_31.value[1u].y + _22.value[0u].y) + _43.value[2u].y) + float(int64_t(_60.value[3u].y));
    SV_Target.z = ((_31.value[1u].z + _22.value[0u].z) + _43.value[2u].z) + float(int64_t(_67.value[4u].x));
    SV_Target.w = ((_31.value[1u].w + _22.value[0u].w) + _43.value[2u].w) + float(int64_t(_67.value[4u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %13 "SV_Target"
OpName %20 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %20 0 "value"
OpName %58 "PhysicalPointerUint642NonWriteCBVArray"
OpMemberName %58 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %13 Location 0
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpMemberDecorate %20 0 NonWritable
OpDecorate %34 RelaxedPrecision
OpDecorate %35 RelaxedPrecision
OpDecorate %36 RelaxedPrecision
OpDecorate %37 RelaxedPrecision
OpDecorate %46 RelaxedPrecision
OpDecorate %47 RelaxedPrecision
OpDecorate %48 RelaxedPrecision
OpDecorate %49 RelaxedPrecision
OpDecorate %57 ArrayStride 16
OpMemberDecorate %58 0 Offset 0
OpDecorate %58 Block
OpMemberDecorate %58 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypePointer PushConstant %6
%16 = OpConstant %5 0
%18 = OpConstant %5 4096
%19 = OpTypeArray %11 %18
%20 = OpTypeStruct %19
%21 = OpTypePointer PhysicalStorageBuffer %20
%23 = OpTypePointer PhysicalStorageBuffer %11
%30 = OpConstant %5 1
%42 = OpConstant %5 2
%54 = OpTypeInt 64 0
%55 = OpConstant %5 3
%56 = OpTypeVector %54 2
%57 = OpTypeArray %56 %18
%58 = OpTypeStruct %57
%59 = OpTypePointer PhysicalStorageBuffer %58
%61 = OpTypePointer PhysicalStorageBuffer %56
%66 = OpConstant %5 4
%80 = OpTypePointer Output %10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %85
%85 = OpLabel
%15 = OpAccessChain %14 %9 %16
%17 = OpLoad %6 %15
%22 = OpBitcast %21 %17
%24 = OpInBoundsAccessChain %23 %22 %16 %16
%25 = OpLoad %11 %24 Aligned 16
%26 = OpCompositeExtract %10 %25 0
%27 = OpCompositeExtract %10 %25 1
%28 = OpCompositeExtract %10 %25 2
%29 = OpCompositeExtract %10 %25 3
%31 = OpBitcast %21 %17
%32 = OpInBoundsAccessChain %23 %31 %16 %30
%33 = OpLoad %11 %32 Aligned 16
%34 = OpCompositeExtract %10 %33 0
%35 = OpCompositeExtract %10 %33 1
%36 = OpCompositeExtract %10 %33 2
%37 = OpCompositeExtract %10 %33 3
%38 = OpFAdd %10 %34 %26
%39 = OpFAdd %10 %35 %27
%40 = OpFAdd %10 %36 %28
%41 = OpFAdd %10 %37 %29
%43 = OpBitcast %21 %17
%44 = OpInBoundsAccessChain %23 %43 %16 %42
%45 = OpLoad %11 %44 Aligned 16
%46 = OpCompositeExtract %10 %45 0
%47 = OpCompositeExtract %10 %45 1
%48 = OpCompositeExtract %10 %45 2
%49 = OpCompositeExtract %10 %45 3
%50 = OpFAdd %10 %38 %46
%51 = OpFAdd %10 %39 %47
%52 = OpFAdd %10 %40 %48
%53 = OpFAdd %10 %41 %49
%60 = OpBitcast %59 %17
%62 = OpInBoundsAccessChain %61 %60 %16 %55
%63 = OpLoad %56 %62 Aligned 16
%64 = OpCompositeExtract %54 %63 0
%65 = OpCompositeExtract %54 %63 1
%67 = OpBitcast %59 %17
%68 = OpInBoundsAccessChain %61 %67 %16 %66
%69 = OpLoad %56 %68 Aligned 16
%70 = OpCompositeExtract %54 %69 0
%71 = OpCompositeExtract %54 %69 1
%72 = OpConvertSToF %10 %64
%73 = OpConvertSToF %10 %65
%74 = OpConvertSToF %10 %70
%75 = OpConvertSToF %10 %71
%76 = OpFAdd %10 %50 %72
%77 = OpFAdd %10 %51 %73
%78 = OpFAdd %10 %52 %74
%79 = OpFAdd %10 %53 %75
%81 = OpAccessChain %80 %13 %16
OpStore %81 %76
%82 = OpAccessChain %80 %13 %30
OpStore %82 %77
%83 = OpAccessChain %80 %13 %42
OpStore %83 %78
%84 = OpAccessChain %80 %13 %55
OpStore %84 %79
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray
{
    u64vec2 value[4096];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) out vec4 SV_Target;

void main()
{
    PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    f16vec4 _36 = f16vec4(PhysicalPointerFloat4NonWriteCBVArray(registers._m0).value[1u]);
    f16vec4 _53 = f16vec4(PhysicalPointerFloat4NonWriteCBVArray(registers._m0).value[2u]);
    PhysicalPointerUint642NonWriteCBVArray _72 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    PhysicalPointerUint642NonWriteCBVArray _79 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    SV_Target.x = ((float(_36.x) + _22.value[0u].x) + float(_53.x)) + float(int64_t(_72.value[3u].x));
    SV_Target.y = ((float(_36.y) + _22.value[0u].y) + float(_53.y)) + float(int64_t(_72.value[3u].y));
    SV_Target.z = ((float(_36.z) + _22.value[0u].z) + float(_53.z)) + float(int64_t(_79.value[4u].x));
    SV_Target.w = ((float(_36.w) + _22.value[0u].w) + float(_53.w)) + float(int64_t(_79.value[4u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability DenormPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %13 "SV_Target"
OpName %20 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %20 0 "value"
OpName %70 "PhysicalPointerUint642NonWriteCBVArray"
OpMemberName %70 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %13 Location 0
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpMemberDecorate %20 0 NonWritable
OpDecorate %69 ArrayStride 16
OpMemberDecorate %70 0 Offset 0
OpDecorate %70 Block
OpMemberDecorate %70 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypePointer PushConstant %6
%16 = OpConstant %5 0
%18 = OpConstant %5 4096
%19 = OpTypeArray %11 %18
%20 = OpTypeStruct %19
%21 = OpTypePointer PhysicalStorageBuffer %20
%23 = OpTypePointer PhysicalStorageBuffer %11
%30 = OpConstant %5 1
%34 = OpTypeFloat 16
%35 = OpTypeVector %34 4
%49 = OpConstant %5 2
%66 = OpTypeInt 64 0
%67 = OpConstant %5 3
%68 = OpTypeVector %66 2
%69 = OpTypeArray %68 %18
%70 = OpTypeStruct %69
%71 = OpTypePointer PhysicalStorageBuffer %70
%73 = OpTypePointer PhysicalStorageBuffer %68
%78 = OpConstant %5 4
%92 = OpTypePointer Output %10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %97
%97 = OpLabel
%15 = OpAccessChain %14 %9 %16
%17 = OpLoad %6 %15
%22 = OpBitcast %21 %17
%24 = OpInBoundsAccessChain %23 %22 %16 %16
%25 = OpLoad %11 %24 Aligned 16
%26 = OpCompositeExtract %10 %25 0
%27 = OpCompositeExtract %10 %25 1
%28 = OpCompositeExtract %10 %25 2
%29 = OpCompositeExtract %10 %25 3
%31 = OpBitcast %21 %17
%32 = OpInBoundsAccessChain %23 %31 %16 %30
%33 = OpLoad %11 %32 Aligned 16
%36 = OpFConvert %35 %33
%37 = OpCompositeExtract %34 %36 0
%38 = OpCompositeExtract %34 %36 1
%39 = OpCompositeExtract %34 %36 2
%40 = OpCompositeExtract %34 %36 3
%41 = OpFConvert %10 %37
%42 = OpFConvert %10 %38
%43 = OpFConvert %10 %39
%44 = OpFConvert %10 %40
%45 = OpFAdd %10 %41 %26
%46 = OpFAdd %10 %42 %27
%47 = OpFAdd %10 %43 %28
%48 = OpFAdd %10 %44 %29
%50 = OpBitcast %21 %17
%51 = OpInBoundsAccessChain %23 %50 %16 %49
%52 = OpLoad %11 %51 Aligned 16
%53 = OpFConvert %35 %52
%54 = OpCompositeExtract %34 %53 0
%55 = OpCompositeExtract %34 %53 1
%56 = OpCompositeExtract %34 %53 2
%57 = OpCompositeExtract %34 %53 3
%58 = OpFConvert %10 %54
%59 = OpFConvert %10 %55
%60 = OpFConvert %10 %56
%61 = OpFConvert %10 %57
%62 = OpFAdd %10 %45 %58
%63 = OpFAdd %10 %46 %59
%64 = OpFAdd %10 %47 %60
%65 = OpFAdd %10 %48 %61
%72 = OpBitcast %71 %17
%74 = OpInBoundsAccessChain %73 %72 %16 %67
%75 = OpLoad %68 %74 Aligned 16
%76 = OpCompositeExtract %66 %75 0
%77 = OpCompositeExtract %66 %75 1
%79 = OpBitcast %71 %17
%80 = OpInBoundsAccessChain %73 %79 %16 %78
%81 = OpLoad %68 %80 Aligned 16
%82 = OpCompositeExtract %66 %81 0
%83 = OpCompositeExtract %66 %81 1
%84 = OpConvertSToF %10 %76
%85 = OpConvertSToF %10 %77
%86 = OpConvertSToF %10 %82
%87 = OpConvertSToF %10 %83
%88 = OpFAdd %10 %62 %84
%89 = OpFAdd %10 %63 %85
%90 = OpFAdd %10 %64 %86
%91 = OpFAdd %10 %65 %87
%93 = OpAccessChain %92 %13 %16
OpStore %93 %88
%94 = OpAccessChain %92 %13 %30
OpStore %94 %89
%95 = OpAccessChain %92 %13 %49
OpStore %95 %90
%96 = OpAccessChain %92 %13 %67
OpStore %96 %91
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif

layout(set = 0, binding = 0, std140) uniform _10_12
{
    u64vec2 _m0[5];
} _12;

layout(set = 0, binding = 0, std140) uniform _16_18
{
    vec4 _m0[5];
} _18;

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = ((_18._m0[1u].x + _18._m0[0u].x) + _18._m0[2u].x) + float(int64_t(_12._m0[3u].x));
    SV_Target.y = ((_18._m0[1u].y + _18._m0[0u].y) + _18._m0[2u].y) + float(int64_t(_12._m0[3u].y));
    SV_Target.z = ((_18._m0[1u].z + _18._m0[0u].z) + _18._m0[2u].z) + float(int64_t(_12._m0[4u].x));
    SV_Target.w = ((_18._m0[1u].w + _18._m0[0u].w) + _18._m0[2u].w) + float(int64_t(_12._m0[4u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability Int64
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %16 ""
OpName %20 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 Location 0
OpDecorate %32 RelaxedPrecision
OpDecorate %33 RelaxedPrecision
OpDecorate %34 RelaxedPrecision
OpDecorate %35 RelaxedPrecision
OpDecorate %43 RelaxedPrecision
OpDecorate %44 RelaxedPrecision
OpDecorate %45 RelaxedPrecision
OpDecorate %46 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeInt 64 0
%8 = OpTypeVector %7 2
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeArray %14 %6
%16 = OpTypeStruct %15
%17 = OpTypePointer Uniform %16
%18 = OpVariable %17 Uniform
%19 = OpTypePointer Output %14
%20 = OpVariable %19 Output
%21 = OpConstant %5 0
%22 = OpTypePointer Uniform %14
%29 = OpConstant %5 1
%40 = OpConstant %5 2
%51 = OpConstant %5 3
%52 = OpTypePointer Uniform %8
%57 = OpConstant %5 4
%70 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%23 = OpAccessChain %22 %18 %21 %21
%24 = OpLoad %14 %23
%25 = OpCompositeExtract %13 %24 0
%26 = OpCompositeExtract %13 %24 1
%27 = OpCompositeExtract %13 %24 2
%28 = OpCompositeExtract %13 %24 3
%30 = OpAccessChain %22 %18 %21 %29
%31 = OpLoad %14 %30
%32 = OpCompositeExtract %13 %31 0
%33 = OpCompositeExtract %13 %31 1
%34 = OpCompositeExtract %13 %31 2
%35 = OpCompositeExtract %13 %31 3
%36 = OpFAdd %13 %32 %25
%37 = OpFAdd %13 %33 %26
%38 = OpFAdd %13 %34 %27
%39 = OpFAdd %13 %35 %28
%41 = OpAccessChain %22 %18 %21 %40
%42 = OpLoad %14 %41
%43 = OpCompositeExtract %13 %42 0
%44 = OpCompositeExtract %13 %42 1
%45 = OpCompositeExtract %13 %42 2
%46 = OpCompositeExtract %13 %42 3
%47 = OpFAdd %13 %36 %43
%48 = OpFAdd %13 %37 %44
%49 = OpFAdd %13 %38 %45
%50 = OpFAdd %13 %39 %46
%53 = OpAccessChain %52 %12 %21 %51
%54 = OpLoad %8 %53
%55 = OpCompositeExtract %7 %54 0
%56 = OpCompositeExtract %7 %54 1
%58 = OpAccessChain %52 %12 %21 %57
%59 = OpLoad %8 %58
%60 = OpCompositeExtract %7 %59 0
%61 = OpCompositeExtract %7 %59 1
%62 = OpConvertSToF %13 %55
%63 = OpConvertSToF %13 %56
%64 = OpConvertSToF %13 %60
%65 = OpConvertSToF %13 %61
%66 = OpFAdd %13 %47 %62
%67 = OpFAdd %13 %48 %63
%68 = OpFAdd %13 %49 %64
%69 = OpFAdd %13 %50 %65
%71 = OpAccessChain %70 %20 %21
OpStore %71 %66
%72 = OpAccessChain %70 %20 %29
OpStore %72 %67
%73 = OpAccessChain %70 %20 %40
OpStore %73 %68
%74 = OpAccessChain %70 %20 %51
OpStore %74 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(set = 0, binding = 0, std140) uniform _10_12
{
    u64vec2 _m0[5];
} _12;

layout(set = 0, binding = 0, std140) uniform _16_18
{
    vec4 _m0[5];
} _18;

layout(location = 0) out vec4 SV_Target;

void main()
{
    f16vec4 _34 = f16vec4(_18._m0[1u]);
    f16vec4 _50 = f16vec4(_18._m0[2u]);
    SV_Target.x = ((float(_34.x) + _18._m0[0u].x) + float(_50.x)) + float(int64_t(_12._m0[3u].x));
    SV_Target.y = ((float(_34.y) + _18._m0[0u].y) + float(_50.y)) + float(int64_t(_12._m0[3u].y));
    SV_Target.z = ((float(_34.z) + _18._m0[0u].z) + float(_50.z)) + float(int64_t(_12._m0[4u].x));
    SV_Target.w = ((float(_34.w) + _18._m0[0u].w) + float(_50.w)) + float(int64_t(_12._m0[4u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 89
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %10 ""
OpName %16 ""
OpName %20 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeInt 64 0
%8 = OpTypeVector %7 2
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeArray %14 %6
%16 = OpTypeStruct %15
%17 = OpTypePointer Uniform %16
%18 = OpVariable %17 Uniform
%19 = OpTypePointer Output %14
%20 = OpVariable %19 Output
%21 = OpConstant %5 0
%22 = OpTypePointer Uniform %14
%29 = OpConstant %5 1
%32 = OpTypeFloat 16
%33 = OpTypeVector %32 4
%47 = OpConstant %5 2
%63 = OpConstant %5 3
%64 = OpTypePointer Uniform %8
%69 = OpConstant %5 4
%82 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %87
%87 = OpLabel
%23 = OpAccessChain %22 %18 %21 %21
%24 = OpLoad %14 %23
%25 = OpCompositeExtract %13 %24 0
%26 = OpCompositeExtract %13 %24 1
%27 = OpCompositeExtract %13 %24 2
%28 = OpCompositeExtract %13 %24 3
%30 = OpAccessChain %22 %18 %21 %29
%31 = OpLoad %14 %30
%34 = OpFConvert %33 %31
%35 = OpCompositeExtract %32 %34 0
%36 = OpCompositeExtract %32 %34 1
%37 = OpCompositeExtract %32 %34 2
%38 = OpCompositeExtract %32 %34 3
%39 = OpFConvert %13 %35
%40 = OpFConvert %13 %36
%41 = OpFConvert %13 %37
%42 = OpFConvert %13 %38
%43 = OpFAdd %13 %39 %25
%44 = OpFAdd %13 %40 %26
%45 = OpFAdd %13 %41 %27
%46 = OpFAdd %13 %42 %28
%48 = OpAccessChain %22 %18 %21 %47
%49 = OpLoad %14 %48
%50 = OpFConvert %33 %49
%51 = OpCompositeExtract %32 %50 0
%52 = OpCompositeExtract %32 %50 1
%53 = OpCompositeExtract %32 %50 2
%54 = OpCompositeExtract %32 %50 3
%55 = OpFConvert %13 %51
%56 = OpFConvert %13 %52
%57 = OpFConvert %13 %53
%58 = OpFConvert %13 %54
%59 = OpFAdd %13 %43 %55
%60 = OpFAdd %13 %44 %56
%61 = OpFAdd %13 %45 %57
%62 = OpFAdd %13 %46 %58
%65 = OpAccessChain %64 %12 %21 %63
%66 = OpLoad %8 %65
%67 = OpCompositeExtract %7 %66 0
%68 = OpCompositeExtract %7 %66 1
%70 = OpAccessChain %64 %12 %21 %69
%71 = OpLoad %8 %70
%72 = OpCompositeExtract %7 %71 0
%73 = OpCompositeExtract %7 %71 1
%74 = OpConvertSToF %13 %67
%75 = OpConvertSToF %13 %68
%76 = OpConvertSToF %13 %72
%77 = OpConvertSToF %13 %73
%78 = OpFAdd %13 %59 %74
%79 = OpFAdd %13 %60 %75
%80 = OpFAdd %13 %61 %76
%81 = OpFAdd %13 %62 %77
%83 = OpAccessChain %82 %20 %21
OpStore %83 %78
%84 = OpAccessChain %82 %20 %29
OpStore %84 %79
%85 = OpAccessChain %82 %20 %47
OpStore %85 %80
%86 = OpAccessChain %82 %20 %63
OpStore %86 %81
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 5, binding = 0, std430) readonly buffer BindlessCBV
{
    vec4 _m0[4096];
} _16[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _27 = registers._m5 + 3u;
    uint _32 = registers._m5 + 5u;
    uint _70 = registers._m5 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _86 = registers._m5 + (INDEX + 100u);
    SV_Target.x = ((_16[_32]._m0[0u].x + _16[_27]._m0[0u].x) + _16[_70]._m0[0u].x) + _16[nonuniformEXT(_86)]._m0[0u].x;
    SV_Target.y = ((_16[_32]._m0[0u].y + _16[_27]._m0[0u].y) + _16[_70]._m0[0u].y) + _16[nonuniformEXT(_86)]._m0[0u].y;
    SV_Target.z = ((_16[_32]._m0[0u].z + _16[_27]._m0[0u].z) + _16[_70]._m0[0u].z) + _16[nonuniformEXT(_86)]._m0[0u].z;
    SV_Target.w = ((_16[_32]._m0[0u].w + _16[_27]._m0[0u].w) + _16[_70]._m0[0u].w) + _16[nonuniformEXT(_86)]._m0[0u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 106
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %18 %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %13 "BindlessCBV"
OpName %18 "INDEX"
OpName %20 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 ArrayStride 16
OpDecorate %13 Block
OpMemberDecorate %13 0 NonWritable
OpMemberDecorate %13 0 Offset 0
OpDecorate %16 DescriptorSet 5
OpDecorate %16 Binding 0
OpDecorate %18 Flat
OpDecorate %18 Location 0
OpDecorate %20 Location 0
OpDecorate %86 NonUniform
OpDecorate %83 NonUniform
OpDecorate %87 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpConstant %5 4096
%12 = OpTypeArray %10 %11
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypePointer Output %10
%20 = OpVariable %19 Output
%21 = OpTypePointer StorageBuffer %13
%23 = OpTypePointer PushConstant %5
%25 = OpConstant %5 5
%28 = OpConstant %5 3
%34 = OpConstant %5 0
%35 = OpTypePointer StorageBuffer %10
%53 = OpConstant %5 4
%58 = OpConstant %5 6
%61 = OpConstant %5 7
%63 = OpTypeVector %5 4
%82 = OpConstant %5 100
%97 = OpTypePointer Output %9
%100 = OpConstant %5 1
%102 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %104
%104 = OpLabel
%24 = OpAccessChain %23 %8 %25
%26 = OpLoad %5 %24
%27 = OpIAdd %5 %26 %28
%22 = OpAccessChain %21 %16 %27
%30 = OpAccessChain %23 %8 %25
%31 = OpLoad %5 %30
%32 = OpIAdd %5 %31 %25
%29 = OpAccessChain %21 %16 %32
%33 = OpLoad %5 %18
%36 = OpAccessChain %35 %22 %34 %34
%37 = OpLoad %10 %36
%38 = OpCompositeExtract %9 %37 0
%39 = OpCompositeExtract %9 %37 1
%40 = OpCompositeExtract %9 %37 2
%41 = OpCompositeExtract %9 %37 3
%42 = OpAccessChain %35 %29 %34 %34
%43 = OpLoad %10 %42
%44 = OpCompositeExtract %9 %43 0
%45 = OpCompositeExtract %9 %43 1
%46 = OpCompositeExtract %9 %43 2
%47 = OpCompositeExtract %9 %43 3
%48 = OpFAdd %9 %44 %38
%49 = OpFAdd %9 %45 %39
%50 = OpFAdd %9 %46 %40
%51 = OpFAdd %9 %47 %41
%52 = OpAccessChain %23 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %23 %8 %25
%56 = OpLoad %5 %55
%57 = OpAccessChain %23 %8 %58
%59 = OpLoad %5 %57
%60 = OpAccessChain %23 %8 %61
%62 = OpLoad %5 %60
%64 = OpCompositeConstruct %63 %54 %56 %59 %62
%65 = OpCompositeExtract %5 %64 0
%66 = OpIAdd %5 %65 %53
%68 = OpAccessChain %23 %8 %25
%69 = OpLoad %5 %68
%70 = OpIAdd %5 %69 %66
%67 = OpAccessChain %21 %16 %70
%71 = OpAccessChain %35 %67 %34 %34
%72 = OpLoad %10 %71
%73 = OpCompositeExtract %9 %72 0
%74 = OpCompositeExtract %9 %72 1
%75 = OpCompositeExtract %9 %72 2
%76 = OpCompositeExtract %9 %72 3
%77 = OpFAdd %9 %48 %73
%78 = OpFAdd %9 %49 %74
%79 = OpFAdd %9 %50 %75
%80 = OpFAdd %9 %51 %76
%81 = OpIAdd %5 %33 %82
%84 = OpAccessChain %23 %8 %25
%85 = OpLoad %5 %84
%86 = OpIAdd %5 %85 %81
%83 = OpAccessChain %21 %16 %86
%87 = OpAccessChain %35 %83 %34 %34
%88 = OpLoad %10 %87
%89 = OpCompositeExtract %9 %88 0
%90 = OpCompositeExtract %9 %88 1
%91 = OpCompositeExtract %9 %88 2
%92 = OpCompositeExtract %9 %88 3
%93 = OpFAdd %9 %77 %89
%94 = OpFAdd %9 %78 %90
%95 = OpFAdd %9 %79 %91
%96 = OpFAdd %9 %80 %92
%98 = OpAccessChain %97 %20 %34
OpStore %98 %93
%99 = OpAccessChain %97 %20 %100
OpStore %99 %94
%101 = OpAccessChain %97 %20 %102
OpStore %101 %95
%103 = OpAccessChain %97 %20 %28
OpStore %103 %96
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _16[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _27 = registers._m5 + 3u;
    uint _32 = registers._m5 + 5u;
    uint _70 = registers._m5 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _86 = registers._m5 + (INDEX + 100u);
    SV_Target.x = ((_16[_32]._m0[0u].x + _16[_27]._m0[0u].x) + _16[_70]._m0[0u].x) + _16[nonuniformEXT(_86)]._m0[0u].x;
    SV_Target.y = ((_16[_32]._m0[0u].y + _16[_27]._m0[0u].y) + _16[_70]._m0[0u].y) + _16[nonuniformEXT(_86)]._m0[0u].y;
    SV_Target.z = ((_16[_32]._m0[0u].z + _16[_27]._m0[0u].z) + _16[_70]._m0[0u].z) + _16[nonuniformEXT(_86)]._m0[0u].z;
    SV_Target.w = ((_16[_32]._m0[0u].w + _16[_27]._m0[0u].w) + _16[_70]._m0[0u].w) + _16[nonuniformEXT(_86)]._m0[0u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 106
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %18 %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %13 "BindlessCBV"
OpName %18 "INDEX"
OpName %20 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 ArrayStride 16
OpDecorate %13 Block
OpMemberDecorate %13 0 Offset 0
OpDecorate %16 DescriptorSet 5
OpDecorate %16 Binding 0
OpDecorate %18 Flat
OpDecorate %18 Location 0
OpDecorate %20 Location 0
OpDecorate %86 NonUniform
OpDecorate %83 NonUniform
OpDecorate %87 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpConstant %5 4096
%12 = OpTypeArray %10 %11
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypePointer Output %10
%20 = OpVariable %19 Output
%21 = OpTypePointer Uniform %13
%23 = OpTypePointer PushConstant %5
%25 = OpConstant %5 5
%28 = OpConstant %5 3
%34 = OpConstant %5 0
%35 = OpTypePointer Uniform %10
%53 = OpConstant %5 4
%58 = OpConstant %5 6
%61 = OpConstant %5 7
%63 = OpTypeVector %5 4
%82 = OpConstant %5 100
%97 = OpTypePointer Output %9
%100 = OpConstant %5 1
%102 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %104
%104 = OpLabel
%24 = OpAccessChain %23 %8 %25
%26 = OpLoad %5 %24
%27 = OpIAdd %5 %26 %28
%22 = OpAccessChain %21 %16 %27
%30 = OpAccessChain %23 %8 %25
%31 = OpLoad %5 %30
%32 = OpIAdd %5 %31 %25
%29 = OpAccessChain %21 %16 %32
%33 = OpLoad %5 %18
%36 = OpAccessChain %35 %22 %34 %34
%37 = OpLoad %10 %36
%38 = OpCompositeExtract %9 %37 0
%39 = OpCompositeExtract %9 %37 1
%40 = OpCompositeExtract %9 %37 2
%41 = OpCompositeExtract %9 %37 3
%42 = OpAccessChain %35 %29 %34 %34
%43 = OpLoad %10 %42
%44 = OpCompositeExtract %9 %43 0
%45 = OpCompositeExtract %9 %43 1
%46 = OpCompositeExtract %9 %43 2
%47 = OpCompositeExtract %9 %43 3
%48 = OpFAdd %9 %44 %38
%49 = OpFAdd %9 %45 %39
%50 = OpFAdd %9 %46 %40
%51 = OpFAdd %9 %47 %41
%52 = OpAccessChain %23 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %23 %8 %25
%56 = OpLoad %5 %55
%57 = OpAccessChain %23 %8 %58
%59 = OpLoad %5 %57
%60 = OpAccessChain %23 %8 %61
%62 = OpLoad %5 %60
%64 = OpCompositeConstruct %63 %54 %56 %59 %62
%65 = OpCompositeExtract %5 %64 0
%66 = OpIAdd %5 %65 %53
%68 = OpAccessChain %23 %8 %25
%69 = OpLoad %5 %68
%70 = OpIAdd %5 %69 %66
%67 = OpAccessChain %21 %16 %70
%71 = OpAccessChain %35 %67 %34 %34
%72 = OpLoad %10 %71
%73 = OpCompositeExtract %9 %72 0
%74 = OpCompositeExtract %9 %72 1
%75 = OpCompositeExtract %9 %72 2
%76 = OpCompositeExtract %9 %72 3
%77 = OpFAdd %9 %48 %73
%78 = OpFAdd %9 %49 %74
%79 = OpFAdd %9 %50 %75
%80 = OpFAdd %9 %51 %76
%81 = OpIAdd %5 %33 %82
%84 = OpAccessChain %23 %8 %25
%85 = OpLoad %5 %84
%86 = OpIAdd %5 %85 %81
%83 = OpAccessChain %21 %16 %86
%87 = OpAccessChain %35 %83 %34 %34
%88 = OpLoad %10 %87
%89 = OpCompositeExtract %9 %88 0
%90 = OpCompositeExtract %9 %88 1
%91 = OpCompositeExtract %9 %88 2
%92 = OpCompositeExtract %9 %88 3
%93 = OpFAdd %9 %77 %89
%94 = OpFAdd %9 %78 %90
%95 = OpFAdd %9 %79 %91
%96 = OpFAdd %9 %80 %92
%98 = OpAccessChain %97 %20 %34
OpStore %98 %93
%99 = OpAccessChain %97 %20 %100
OpStore %99 %94
%101 = OpAccessChain %97 %20 %102
OpStore %101 %95
%103 = OpAccessChain %97 %20 %28
OpStore %103 %96
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.frag
================================================
#version 460

layout(set = 2, binding = 1, std140) uniform _10_12
{
    vec4 _m0[1];
} _12;

layout(set = 2, binding = 2, std140) uniform _14_16
{
    vec4 _m0[1];
} _16;

layout(location = 0) out float SV_Target;

void main()
{
    uvec4 _31 = floatBitsToUint(_12._m0[0u]);
    uvec4 _35 = floatBitsToUint(_16._m0[0u]);
    SV_Target = ((_16._m0[0u].x + _12._m0[0u].x) + float(_35.y + _31.y)) + float(int(_35.z + _31.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 ""
OpName %18 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 2
OpDecorate %12 Binding 1
OpDecorate %13 ArrayStride 16
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 2
OpDecorate %16 Binding 2
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeArray %8 %6
%14 = OpTypeStruct %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypePointer Output %7
%18 = OpVariable %17 Output
%19 = OpConstant %5 0
%20 = OpTypePointer Uniform %8
%30 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%21 = OpAccessChain %20 %12 %19 %19
%22 = OpLoad %8 %21
%23 = OpCompositeExtract %7 %22 0
%24 = OpAccessChain %20 %16 %19 %19
%25 = OpLoad %8 %24
%26 = OpCompositeExtract %7 %25 0
%27 = OpFAdd %7 %26 %23
%28 = OpAccessChain %20 %12 %19 %19
%29 = OpLoad %8 %28
%31 = OpBitcast %30 %29
%32 = OpCompositeExtract %5 %31 1
%33 = OpAccessChain %20 %16 %19 %19
%34 = OpLoad %8 %33
%35 = OpBitcast %30 %34
%36 = OpCompositeExtract %5 %35 1
%37 = OpIAdd %5 %36 %32
%38 = OpConvertUToF %7 %37
%39 = OpFAdd %7 %27 %38
%40 = OpCompositeExtract %5 %31 2
%41 = OpCompositeExtract %5 %35 2
%42 = OpIAdd %5 %41 %40
%43 = OpConvertSToF %7 %42
%44 = OpFAdd %7 %39 %43
OpStore %18 %44
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

struct CBVComposite16x8
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    float16_t _m4;
    float16_t _m5;
    float16_t _m6;
    float16_t _m7;
};

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    u64vec2 _m0[4096];
} _16[];

layout(set = 5, binding = 0, std140) uniform _20_23
{
    vec4 _m0[4096];
} _23[];

layout(set = 5, binding = 0, std140) uniform _27_30
{
    dvec2 _m0[4096];
} _30[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _39 = registers._m5 + 2u;
    uint _45 = registers._m5 + 1u;
    f16vec2 _70 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].x));
    f16vec2 _73 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].y));
    f16vec2 _76 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].z));
    f16vec2 _79 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].w));
    CBVComposite16x8 _83 = CBVComposite16x8(_70.x, _70.y, _73.x, _73.y, _76.x, _76.y, _79.x, _79.y);
    SV_Target.x = (((float(_83._m0) + _23[registers._m5]._m0[0u].x) + float(int64_t(_16[registers._m5]._m0[2u].x))) + _23[_45]._m0[0u].x) + float(_30[_39]._m0[0u].x);
    SV_Target.y = (((float(_83._m1) + _23[registers._m5]._m0[0u].y) + float(int64_t(_16[registers._m5]._m0[2u].y))) + _23[_45]._m0[0u].y) + float(_30[_39]._m0[0u].y);
    SV_Target.z = (((float(_83._m2) + _23[registers._m5]._m0[0u].z) + float(int64_t(_16[registers._m5]._m0[3u].x))) + _23[_45]._m0[0u].z) + float(_30[_39]._m0[1u].x);
    SV_Target.w = (((float(_83._m3) + _23[registers._m5]._m0[0u].w) + float(int64_t(_16[registers._m5]._m0[3u].y))) + _23[_45]._m0[0u].w) + float(_30[_39]._m0[1u].y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 148
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Float64
OpCapability Int64
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %32
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %13 "BindlessCBV"
OpName %20 "BindlessCBV"
OpName %27 "BindlessCBV"
OpName %32 "SV_Target"
OpName %82 "CBVComposite16x8"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 ArrayStride 16
OpDecorate %13 Block
OpMemberDecorate %13 0 Offset 0
OpDecorate %16 DescriptorSet 5
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 16
OpDecorate %20 Block
OpMemberDecorate %20 0 Offset 0
OpDecorate %23 DescriptorSet 5
OpDecorate %23 Binding 0
OpDecorate %26 ArrayStride 16
OpDecorate %27 Block
OpMemberDecorate %27 0 Offset 0
OpDecorate %30 DescriptorSet 5
OpDecorate %30 Binding 0
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeInt 64 0
%10 = OpTypeVector %9 2
%11 = OpConstant %5 4096
%12 = OpTypeArray %10 %11
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 4
%19 = OpTypeArray %18 %11
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer Uniform %21
%23 = OpVariable %22 Uniform
%24 = OpTypeFloat 64
%25 = OpTypeVector %24 2
%26 = OpTypeArray %25 %11
%27 = OpTypeStruct %26
%28 = OpTypeRuntimeArray %27
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%31 = OpTypePointer Output %18
%32 = OpVariable %31 Output
%33 = OpTypePointer Uniform %27
%35 = OpTypePointer PushConstant %5
%37 = OpConstant %5 5
%40 = OpConstant %5 2
%41 = OpTypePointer Uniform %20
%46 = OpConstant %5 1
%47 = OpTypePointer Uniform %13
%54 = OpConstant %5 0
%55 = OpTypePointer Uniform %18
%62 = OpTypeFloat 16
%65 = OpTypeVector %62 2
%82 = OpTypeStruct %62 %62 %62 %62 %62 %62 %62 %62
%96 = OpTypePointer Uniform %10
%101 = OpConstant %5 3
%124 = OpTypePointer Uniform %25
%141 = OpTypePointer Output %17
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %146
%146 = OpLabel
%36 = OpAccessChain %35 %8 %37
%38 = OpLoad %5 %36
%39 = OpIAdd %5 %38 %40
%34 = OpAccessChain %33 %30 %39
%43 = OpAccessChain %35 %8 %37
%44 = OpLoad %5 %43
%45 = OpIAdd %5 %44 %46
%42 = OpAccessChain %41 %23 %45
%49 = OpAccessChain %35 %8 %37
%50 = OpLoad %5 %49
%48 = OpAccessChain %47 %16 %50
%52 = OpAccessChain %35 %8 %37
%53 = OpLoad %5 %52
%51 = OpAccessChain %41 %23 %53
%56 = OpAccessChain %55 %51 %54 %54
%57 = OpLoad %18 %56
%58 = OpCompositeExtract %17 %57 0
%59 = OpCompositeExtract %17 %57 1
%60 = OpCompositeExtract %17 %57 2
%61 = OpCompositeExtract %17 %57 3
%63 = OpAccessChain %55 %51 %54 %46
%64 = OpLoad %18 %63
%66 = OpCompositeExtract %17 %64 0
%67 = OpCompositeExtract %17 %64 1
%68 = OpCompositeExtract %17 %64 2
%69 = OpCompositeExtract %17 %64 3
%70 = OpBitcast %65 %66
%71 = OpCompositeExtract %62 %70 0
%72 = OpCompositeExtract %62 %70 1
%73 = OpBitcast %65 %67
%74 = OpCompositeExtract %62 %73 0
%75 = OpCompositeExtract %62 %73 1
%76 = OpBitcast %65 %68
%77 = OpCompositeExtract %62 %76 0
%78 = OpCompositeExtract %62 %76 1
%79 = OpBitcast %65 %69
%80 = OpCompositeExtract %62 %79 0
%81 = OpCompositeExtract %62 %79 1
%83 = OpCompositeConstruct %82 %71 %72 %74 %75 %77 %78 %80 %81
%84 = OpCompositeExtract %62 %83 0
%85 = OpCompositeExtract %62 %83 1
%86 = OpCompositeExtract %62 %83 2
%87 = OpCompositeExtract %62 %83 3
%88 = OpFConvert %17 %84
%89 = OpFConvert %17 %85
%90 = OpFConvert %17 %86
%91 = OpFConvert %17 %87
%92 = OpFAdd %17 %88 %58
%93 = OpFAdd %17 %89 %59
%94 = OpFAdd %17 %90 %60
%95 = OpFAdd %17 %91 %61
%97 = OpAccessChain %96 %48 %54 %40
%98 = OpLoad %10 %97
%99 = OpCompositeExtract %9 %98 0
%100 = OpCompositeExtract %9 %98 1
%102 = OpAccessChain %96 %48 %54 %101
%103 = OpLoad %10 %102
%104 = OpCompositeExtract %9 %103 0
%105 = OpCompositeExtract %9 %103 1
%106 = OpConvertSToF %17 %99
%107 = OpConvertSToF %17 %100
%108 = OpConvertSToF %17 %104
%109 = OpConvertSToF %17 %105
%110 = OpFAdd %17 %92 %106
%111 = OpFAdd %17 %93 %107
%112 = OpFAdd %17 %94 %108
%113 = OpFAdd %17 %95 %109
%114 = OpAccessChain %55 %42 %54 %54
%115 = OpLoad %18 %114
%116 = OpCompositeExtract %17 %115 0
%117 = OpCompositeExtract %17 %115 1
%118 = OpCompositeExtract %17 %115 2
%119 = OpCompositeExtract %17 %115 3
%120 = OpFAdd %17 %110 %116
%121 = OpFAdd %17 %111 %117
%122 = OpFAdd %17 %112 %118
%123 = OpFAdd %17 %113 %119
%125 = OpAccessChain %124 %34 %54 %54
%126 = OpLoad %25 %125
%127 = OpCompositeExtract %24 %126 0
%128 = OpCompositeExtract %24 %126 1
%129 = OpAccessChain %124 %34 %54 %46
%130 = OpLoad %25 %129
%131 = OpCompositeExtract %24 %130 0
%132 = OpCompositeExtract %24 %130 1
%133 = OpFConvert %17 %127
%134 = OpFConvert %17 %128
%135 = OpFConvert %17 %131
%136 = OpFConvert %17 %132
%137 = OpFAdd %17 %120 %133
%138 = OpFAdd %17 %121 %134
%139 = OpFAdd %17 %122 %135
%140 = OpFAdd %17 %123 %136
%142 = OpAccessChain %141 %32 %54
OpStore %142 %137
%143 = OpAccessChain %141 %32 %46
OpStore %143 %138
%144 = OpAccessChain %141 %32 %40
OpStore %144 %139
%145 = OpAccessChain %141 %32 %101
OpStore %145 %140
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[256];
} _12;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _23 = INDEX + 64u;
    float _29 = _12._m0[_23].x + _12._m0[INDEX].x;
    float _30 = _12._m0[_23].y + _12._m0[INDEX].x;
    uint _31 = INDEX + 128u;
    uint _42 = INDEX + 192u;
    SV_Target.x = (_29 + _12._m0[_31].x) + _12._m0[_42].x;
    SV_Target.y = (_30 + _12._m0[_31].y) + _12._m0[_42].y;
    SV_Target.z = (_29 + _12._m0[_31].z) + _12._m0[_42].z;
    SV_Target.w = (_30 + _12._m0[_31].x) + _12._m0[_42].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "INDEX"
OpName %16 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Flat
OpDecorate %14 Location 0
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 256
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%15 = OpTypePointer Output %8
%16 = OpVariable %15 Output
%18 = OpTypePointer Uniform %8
%20 = OpConstant %5 0
%24 = OpConstant %5 64
%32 = OpConstant %5 128
%43 = OpConstant %5 192
%54 = OpTypePointer Output %7
%57 = OpConstant %5 1
%59 = OpConstant %5 2
%61 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%17 = OpLoad %5 %14
%19 = OpAccessChain %18 %12 %20 %17
%21 = OpLoad %8 %19
%22 = OpCompositeExtract %7 %21 0
%23 = OpIAdd %5 %17 %24
%25 = OpAccessChain %18 %12 %20 %23
%26 = OpLoad %8 %25
%27 = OpCompositeExtract %7 %26 0
%28 = OpCompositeExtract %7 %26 1
%29 = OpFAdd %7 %27 %22
%30 = OpFAdd %7 %28 %22
%31 = OpIAdd %5 %17 %32
%33 = OpAccessChain %18 %12 %20 %31
%34 = OpLoad %8 %33
%35 = OpCompositeExtract %7 %34 0
%36 = OpCompositeExtract %7 %34 1
%37 = OpCompositeExtract %7 %34 2
%38 = OpFAdd %7 %29 %35
%39 = OpFAdd %7 %30 %36
%40 = OpFAdd %7 %29 %37
%41 = OpFAdd %7 %30 %35
%42 = OpIAdd %5 %17 %43
%44 = OpAccessChain %18 %12 %20 %42
%45 = OpLoad %8 %44
%46 = OpCompositeExtract %7 %45 0
%47 = OpCompositeExtract %7 %45 1
%48 = OpCompositeExtract %7 %45 2
%49 = OpCompositeExtract %7 %45 3
%50 = OpFAdd %7 %38 %46
%51 = OpFAdd %7 %39 %47
%52 = OpFAdd %7 %40 %48
%53 = OpFAdd %7 %41 %49
%55 = OpAccessChain %54 %16 %20
OpStore %55 %50
%56 = OpAccessChain %54 %16 %57
OpStore %56 %51
%58 = OpAccessChain %54 %16 %59
OpStore %58 %52
%60 = OpAccessChain %54 %16 %61
OpStore %60 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

struct _17
{
    vec4 _m0;
    uvec4 _m1;
};

vec4 _54;
uvec4 _84;

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(location = 0) rayPayloadInEXT _17 payload;

void main()
{
    uvec4 _48 = uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u]);
    uint _51 = _48.z;
    vec4 _53;
    _53.x = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u])).x;
    _53.y = float(_48.y);
    _53.z = float(int(_51));
    _53.w = 1.0;
    vec4 _68 = uintBitsToFloat(uvec4(SBT._m1[0u], SBT._m1[1u], SBT._m1[2u], SBT._m1[3u]));
    uvec4 _83;
    _83.x = uint(int(_68.x));
    _83.y = uint(int(_68.y));
    _83.z = uvec4(SBT._m1[0u], SBT._m1[1u], SBT._m1[2u], SBT._m1[3u]).z;
    _83.w = _51;
    payload._m0 = _53;
    payload._m1 = _83;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 94
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint MissKHR %3 "main" %13 %19
OpName %3 "main"
OpName %11 "SBTBlock"
OpName %13 "SBT"
OpName %17 ""
OpName %19 "payload"
OpDecorate %7 ArrayStride 4
OpDecorate %9 ArrayStride 4
OpDecorate %11 Block
OpMemberDecorate %11 0 Offset 0
OpMemberDecorate %11 1 Offset 20
OpMemberDecorate %11 2 Offset 48
OpMemberDecorate %11 3 Offset 56
OpMemberDecorate %11 4 Offset 64
OpMemberDecorate %11 5 Offset 72
OpMemberDecorate %11 6 Offset 80
OpMemberDecorate %11 7 Offset 88
OpMemberDecorate %11 8 Offset 96
OpMemberDecorate %11 9 Offset 104
OpMemberDecorate %11 10 Offset 112
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeArray %5 %6
%8 = OpConstant %5 6
%9 = OpTypeArray %5 %8
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %7 %9 %10 %10 %10 %10 %10 %10 %10 %10 %10
%12 = OpTypePointer ShaderRecordBufferKHR %11
%13 = OpVariable %12 ShaderRecordBufferKHR
%14 = OpTypeFloat 32
%15 = OpTypeVector %14 4
%16 = OpTypeVector %5 4
%17 = OpTypeStruct %15 %16
%18 = OpTypePointer IncomingRayPayloadKHR %17
%19 = OpVariable %18 IncomingRayPayloadKHR
%20 = OpTypePointer ShaderRecordBufferKHR %9
%22 = OpConstant %5 1
%23 = OpTypePointer ShaderRecordBufferKHR %7
%25 = OpConstant %5 0
%26 = OpTypePointer ShaderRecordBufferKHR %5
%32 = OpConstant %5 2
%35 = OpConstant %5 3
%58 = OpConstant %14 1
%88 = OpTypePointer IncomingRayPayloadKHR %15
%90 = OpTypePointer IncomingRayPayloadKHR %16
%3 = OpFunction %1 None %2
%4 = OpLabel
%54 = OpUndef %15
%84 = OpUndef %16
OpBranch %92
%92 = OpLabel
%21 = OpAccessChain %20 %13 %22
%24 = OpAccessChain %23 %13 %25
%27 = OpAccessChain %26 %24 %25
%28 = OpLoad %5 %27
%29 = OpAccessChain %26 %24 %22
%30 = OpLoad %5 %29
%31 = OpAccessChain %26 %24 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %26 %24 %35
%36 = OpLoad %5 %34
%37 = OpCompositeConstruct %16 %28 %30 %33 %36
%38 = OpBitcast %15 %37
%39 = OpCompositeExtract %14 %38 0
%40 = OpAccessChain %26 %24 %25
%41 = OpLoad %5 %40
%42 = OpAccessChain %26 %24 %22
%43 = OpLoad %5 %42
%44 = OpAccessChain %26 %24 %32
%45 = OpLoad %5 %44
%46 = OpAccessChain %26 %24 %35
%47 = OpLoad %5 %46
%48 = OpCompositeConstruct %16 %41 %43 %45 %47
%49 = OpCompositeExtract %5 %48 1
%50 = OpConvertUToF %14 %49
%51 = OpCompositeExtract %5 %48 2
%52 = OpConvertSToF %14 %51
%53 = OpCompositeInsert %15 %39 %54 0
%55 = OpCompositeInsert %15 %50 %53 1
%56 = OpCompositeInsert %15 %52 %55 2
%57 = OpCompositeInsert %15 %58 %56 3
%59 = OpAccessChain %26 %21 %25
%60 = OpLoad %5 %59
%61 = OpAccessChain %26 %21 %22
%62 = OpLoad %5 %61
%63 = OpAccessChain %26 %21 %32
%64 = OpLoad %5 %63
%65 = OpAccessChain %26 %21 %35
%66 = OpLoad %5 %65
%67 = OpCompositeConstruct %16 %60 %62 %64 %66
%68 = OpBitcast %15 %67
%69 = OpCompositeExtract %14 %68 0
%70 = OpCompositeExtract %14 %68 1
%71 = OpAccessChain %26 %21 %25
%72 = OpLoad %5 %71
%73 = OpAccessChain %26 %21 %22
%74 = OpLoad %5 %73
%75 = OpAccessChain %26 %21 %32
%76 = OpLoad %5 %75
%77 = OpAccessChain %26 %21 %35
%78 = OpLoad %5 %77
%79 = OpCompositeConstruct %16 %72 %74 %76 %78
%80 = OpCompositeExtract %5 %79 2
%81 = OpConvertFToS %5 %69
%82 = OpConvertFToS %5 %70
%83 = OpCompositeInsert %16 %81 %84 0
%85 = OpCompositeInsert %16 %82 %83 1
%86 = OpCompositeInsert %16 %80 %85 2
%87 = OpCompositeInsert %16 %51 %86 3
%89 = OpInBoundsAccessChain %88 %19 %25
OpStore %89 %57
%91 = OpInBoundsAccessChain %90 %19 %22
OpStore %91 %87
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(set = 0, binding = 0, std140) uniform _10_12
{
    u64vec2 _m0[5];
} _12;

layout(set = 0, binding = 0, std140) uniform _16_18
{
    vec4 _m0[5];
} _18;

layout(location = 0) out vec4 SV_Target;

void main()
{
    f16vec4 _34 = f16vec4(_18._m0[1u]);
    f16vec4 _50 = f16vec4(_18._m0[2u]);
    SV_Target.x = ((float(_34.x) + _18._m0[0u].x) + float(_50.x)) + float(int64_t(_12._m0[3u].x));
    SV_Target.y = ((float(_34.y) + _18._m0[0u].y) + float(_50.y)) + float(int64_t(_12._m0[3u].y));
    SV_Target.z = ((float(_34.z) + _18._m0[0u].z) + float(_50.z)) + float(int64_t(_12._m0[4u].x));
    SV_Target.w = ((float(_34.w) + _18._m0[0u].w) + float(_50.w)) + float(int64_t(_12._m0[4u].y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 89
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %10 ""
OpName %16 ""
OpName %20 "SV_Target"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 5
%7 = OpTypeInt 64 0
%8 = OpTypeVector %7 2
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeArray %14 %6
%16 = OpTypeStruct %15
%17 = OpTypePointer Uniform %16
%18 = OpVariable %17 Uniform
%19 = OpTypePointer Output %14
%20 = OpVariable %19 Output
%21 = OpConstant %5 0
%22 = OpTypePointer Uniform %14
%29 = OpConstant %5 1
%32 = OpTypeFloat 16
%33 = OpTypeVector %32 4
%47 = OpConstant %5 2
%63 = OpConstant %5 3
%64 = OpTypePointer Uniform %8
%69 = OpConstant %5 4
%82 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %87
%87 = OpLabel
%23 = OpAccessChain %22 %18 %21 %21
%24 = OpLoad %14 %23
%25 = OpCompositeExtract %13 %24 0
%26 = OpCompositeExtract %13 %24 1
%27 = OpCompositeExtract %13 %24 2
%28 = OpCompositeExtract %13 %24 3
%30 = OpAccessChain %22 %18 %21 %29
%31 = OpLoad %14 %30
%34 = OpFConvert %33 %31
%35 = OpCompositeExtract %32 %34 0
%36 = OpCompositeExtract %32 %34 1
%37 = OpCompositeExtract %32 %34 2
%38 = OpCompositeExtract %32 %34 3
%39 = OpFConvert %13 %35
%40 = OpFConvert %13 %36
%41 = OpFConvert %13 %37
%42 = OpFConvert %13 %38
%43 = OpFAdd %13 %39 %25
%44 = OpFAdd %13 %40 %26
%45 = OpFAdd %13 %41 %27
%46 = OpFAdd %13 %42 %28
%48 = OpAccessChain %22 %18 %21 %47
%49 = OpLoad %14 %48
%50 = OpFConvert %33 %49
%51 = OpCompositeExtract %32 %50 0
%52 = OpCompositeExtract %32 %50 1
%53 = OpCompositeExtract %32 %50 2
%54 = OpCompositeExtract %32 %50 3
%55 = OpFConvert %13 %51
%56 = OpFConvert %13 %52
%57 = OpFConvert %13 %53
%58 = OpFConvert %13 %54
%59 = OpFAdd %13 %43 %55
%60 = OpFAdd %13 %44 %56
%61 = OpFAdd %13 %45 %57
%62 = OpFAdd %13 %46 %58
%65 = OpAccessChain %64 %12 %21 %63
%66 = OpLoad %8 %65
%67 = OpCompositeExtract %7 %66 0
%68 = OpCompositeExtract %7 %66 1
%70 = OpAccessChain %64 %12 %21 %69
%71 = OpLoad %8 %70
%72 = OpCompositeExtract %7 %71 0
%73 = OpCompositeExtract %7 %71 1
%74 = OpConvertSToF %13 %67
%75 = OpConvertSToF %13 %68
%76 = OpConvertSToF %13 %72
%77 = OpConvertSToF %13 %73
%78 = OpFAdd %13 %59 %74
%79 = OpFAdd %13 %60 %75
%80 = OpFAdd %13 %61 %76
%81 = OpFAdd %13 %62 %77
%83 = OpAccessChain %82 %20 %21
OpStore %83 %78
%84 = OpAccessChain %82 %20 %29
OpStore %84 %79
%85 = OpAccessChain %82 %20 %47
OpStore %85 %80
%86 = OpAccessChain %82 %20 %63
OpStore %86 %81
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag
================================================
#version 460

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) out vec2 SV_Target;

void main()
{
    vec4 _44 = uintBitsToFloat(uvec4(registers._m0, registers._m1, registers._m2, registers._m3));
    uvec4 _55 = uvec4(registers._m4, registers._m5, registers._m6, registers._m7);
    uvec4 _65 = uvec4(registers._m0, registers._m1, registers._m2, registers._m3);
    float _74 = (float(_65.z + _55.y) + uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7)).x) + float(int(_65.w + _55.z));
    SV_Target.x = _74 + _44.x;
    SV_Target.y = _74 + _44.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 82
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %12 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 2
%11 = OpTypePointer Output %10
%12 = OpVariable %11 Output
%13 = OpTypePointer PushConstant %5
%15 = OpConstant %5 4
%18 = OpConstant %5 5
%21 = OpConstant %5 6
%24 = OpConstant %5 7
%26 = OpTypeVector %5 4
%28 = OpTypeVector %9 4
%32 = OpConstant %5 0
%35 = OpConstant %5 1
%38 = OpConstant %5 2
%41 = OpConstant %5 3
%77 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%14 = OpAccessChain %13 %8 %15
%16 = OpLoad %5 %14
%17 = OpAccessChain %13 %8 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %13 %8 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %13 %8 %24
%25 = OpLoad %5 %23
%27 = OpCompositeConstruct %26 %16 %19 %22 %25
%29 = OpBitcast %28 %27
%30 = OpCompositeExtract %9 %29 0
%31 = OpAccessChain %13 %8 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %13 %8 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %13 %8 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %13 %8 %41
%42 = OpLoad %5 %40
%43 = OpCompositeConstruct %26 %33 %36 %39 %42
%44 = OpBitcast %28 %43
%45 = OpCompositeExtract %9 %44 0
%46 = OpCompositeExtract %9 %44 1
%47 = OpAccessChain %13 %8 %15
%48 = OpLoad %5 %47
%49 = OpAccessChain %13 %8 %18
%50 = OpLoad %5 %49
%51 = OpAccessChain %13 %8 %21
%52 = OpLoad %5 %51
%53 = OpAccessChain %13 %8 %24
%54 = OpLoad %5 %53
%55 = OpCompositeConstruct %26 %48 %50 %52 %54
%56 = OpCompositeExtract %5 %55 1
%57 = OpAccessChain %13 %8 %32
%58 = OpLoad %5 %57
%59 = OpAccessChain %13 %8 %35
%60 = OpLoad %5 %59
%61 = OpAccessChain %13 %8 %38
%62 = OpLoad %5 %61
%63 = OpAccessChain %13 %8 %41
%64 = OpLoad %5 %63
%65 = OpCompositeConstruct %26 %58 %60 %62 %64
%66 = OpCompositeExtract %5 %65 2
%67 = OpIAdd %5 %66 %56
%68 = OpConvertUToF %9 %67
%69 = OpCompositeExtract %5 %55 2
%70 = OpCompositeExtract %5 %65 3
%71 = OpIAdd %5 %70 %69
%72 = OpConvertSToF %9 %71
%73 = OpFAdd %9 %68 %30
%74 = OpFAdd %9 %73 %72
%75 = OpFAdd %9 %74 %45
%76 = OpFAdd %9 %74 %46
%78 = OpAccessChain %77 %12 %32
OpStore %78 %75
%79 = OpAccessChain %77 %12 %35
OpStore %79 %76
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.root-constant.frag
================================================
#version 460

layout(set = 2, binding = 2, std140) uniform _13_15
{
    vec4 _m0[1];
} _15;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) out float SV_Target;

void main()
{
    uvec4 _49 = uvec4(registers._m4, registers._m5, registers._m6, registers._m7);
    uvec4 _53 = floatBitsToUint(_15._m0[0u]);
    SV_Target = ((_15._m0[0u].x + uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7)).x) + float(_53.y + _49.y)) + float(int(_53.z + _49.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %13 ""
OpName %17 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 ArrayStride 16
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 2
OpDecorate %15 Binding 2
OpDecorate %17 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 1
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypeArray %11 %9
%13 = OpTypeStruct %12
%14 = OpTypePointer Uniform %13
%15 = OpVariable %14 Uniform
%16 = OpTypePointer Output %10
%17 = OpVariable %16 Output
%18 = OpTypePointer PushConstant %5
%20 = OpConstant %5 4
%23 = OpConstant %5 5
%26 = OpConstant %5 6
%29 = OpConstant %5 7
%31 = OpTypeVector %5 4
%35 = OpConstant %5 0
%36 = OpTypePointer Uniform %11
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %63
%63 = OpLabel
%19 = OpAccessChain %18 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %18 %8 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %18 %8 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %18 %8 %29
%30 = OpLoad %5 %28
%32 = OpCompositeConstruct %31 %21 %24 %27 %30
%33 = OpBitcast %11 %32
%34 = OpCompositeExtract %10 %33 0
%37 = OpAccessChain %36 %15 %35 %35
%38 = OpLoad %11 %37
%39 = OpCompositeExtract %10 %38 0
%40 = OpFAdd %10 %39 %34
%41 = OpAccessChain %18 %8 %20
%42 = OpLoad %5 %41
%43 = OpAccessChain %18 %8 %23
%44 = OpLoad %5 %43
%45 = OpAccessChain %18 %8 %26
%46 = OpLoad %5 %45
%47 = OpAccessChain %18 %8 %29
%48 = OpLoad %5 %47
%49 = OpCompositeConstruct %31 %42 %44 %46 %48
%50 = OpCompositeExtract %5 %49 1
%51 = OpAccessChain %36 %15 %35 %35
%52 = OpLoad %11 %51
%53 = OpBitcast %31 %52
%54 = OpCompositeExtract %5 %53 1
%55 = OpIAdd %5 %54 %50
%56 = OpConvertUToF %10 %55
%57 = OpFAdd %10 %40 %56
%58 = OpCompositeExtract %5 %49 2
%59 = OpCompositeExtract %5 %53 2
%60 = OpIAdd %5 %59 %58
%61 = OpConvertSToF %10 %60
%62 = OpFAdd %10 %57 %61
OpStore %17 %62
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray;

struct CBVComposite16x8
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    float16_t _m4;
    float16_t _m5;
    float16_t _m6;
    float16_t _m7;
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray
{
    u64vec2 value[4096];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) out vec4 SV_Target;

void main()
{
    PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerUint642NonWriteCBVArray _36 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    PhysicalPointerUint642NonWriteCBVArray _43 = PhysicalPointerUint642NonWriteCBVArray(registers._m0);
    PhysicalPointerFloat4NonWriteCBVArray _58 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    f16vec2 _66 = unpackFloat2x16(floatBitsToUint(_58.value[1u].x));
    f16vec2 _69 = unpackFloat2x16(floatBitsToUint(_58.value[1u].y));
    f16vec2 _72 = unpackFloat2x16(floatBitsToUint(_58.value[1u].z));
    f16vec2 _75 = unpackFloat2x16(floatBitsToUint(_58.value[1u].w));
    CBVComposite16x8 _79 = CBVComposite16x8(_66.x, _66.y, _69.x, _69.y, _72.x, _72.y, _75.x, _75.y);
    SV_Target.x = (float(int64_t(_36.value[2u].x)) + _22.value[0u].x) + float(_79._m0);
    SV_Target.y = (float(int64_t(_36.value[2u].y)) + _22.value[0u].y) + float(_79._m1);
    SV_Target.z = (float(int64_t(_43.value[3u].x)) + _22.value[0u].z) + float(_79._m2);
    SV_Target.w = (float(int64_t(_43.value[3u].y)) + _22.value[0u].w) + float(_79._m3);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int64
OpCapability DenormPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %13 "SV_Target"
OpName %20 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %20 0 "value"
OpName %34 "PhysicalPointerUint642NonWriteCBVArray"
OpMemberName %34 0 "value"
OpName %78 "CBVComposite16x8"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %13 Location 0
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpMemberDecorate %20 0 NonWritable
OpDecorate %33 ArrayStride 16
OpMemberDecorate %34 0 Offset 0
OpDecorate %34 Block
OpMemberDecorate %34 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypePointer PushConstant %6
%16 = OpConstant %5 0
%18 = OpConstant %5 4096
%19 = OpTypeArray %11 %18
%20 = OpTypeStruct %19
%21 = OpTypePointer PhysicalStorageBuffer %20
%23 = OpTypePointer PhysicalStorageBuffer %11
%30 = OpTypeInt 64 0
%31 = OpConstant %5 2
%32 = OpTypeVector %30 2
%33 = OpTypeArray %32 %18
%34 = OpTypeStruct %33
%35 = OpTypePointer PhysicalStorageBuffer %34
%37 = OpTypePointer PhysicalStorageBuffer %32
%42 = OpConstant %5 3
%56 = OpTypeFloat 16
%57 = OpConstant %5 1
%61 = OpTypeVector %56 2
%78 = OpTypeStruct %56 %56 %56 %56 %56 %56 %56 %56
%92 = OpTypePointer Output %10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %97
%97 = OpLabel
%15 = OpAccessChain %14 %9 %16
%17 = OpLoad %6 %15
%22 = OpBitcast %21 %17
%24 = OpInBoundsAccessChain %23 %22 %16 %16
%25 = OpLoad %11 %24 Aligned 16
%26 = OpCompositeExtract %10 %25 0
%27 = OpCompositeExtract %10 %25 1
%28 = OpCompositeExtract %10 %25 2
%29 = OpCompositeExtract %10 %25 3
%36 = OpBitcast %35 %17
%38 = OpInBoundsAccessChain %37 %36 %16 %31
%39 = OpLoad %32 %38 Aligned 16
%40 = OpCompositeExtract %30 %39 0
%41 = OpCompositeExtract %30 %39 1
%43 = OpBitcast %35 %17
%44 = OpInBoundsAccessChain %37 %43 %16 %42
%45 = OpLoad %32 %44 Aligned 16
%46 = OpCompositeExtract %30 %45 0
%47 = OpCompositeExtract %30 %45 1
%48 = OpConvertSToF %10 %40
%49 = OpConvertSToF %10 %41
%50 = OpConvertSToF %10 %46
%51 = OpConvertSToF %10 %47
%52 = OpFAdd %10 %48 %26
%53 = OpFAdd %10 %49 %27
%54 = OpFAdd %10 %50 %28
%55 = OpFAdd %10 %51 %29
%58 = OpBitcast %21 %17
%59 = OpInBoundsAccessChain %23 %58 %16 %57
%60 = OpLoad %11 %59 Aligned 16
%62 = OpCompositeExtract %10 %60 0
%63 = OpCompositeExtract %10 %60 1
%64 = OpCompositeExtract %10 %60 2
%65 = OpCompositeExtract %10 %60 3
%66 = OpBitcast %61 %62
%67 = OpCompositeExtract %56 %66 0
%68 = OpCompositeExtract %56 %66 1
%69 = OpBitcast %61 %63
%70 = OpCompositeExtract %56 %69 0
%71 = OpCompositeExtract %56 %69 1
%72 = OpBitcast %61 %64
%73 = OpCompositeExtract %56 %72 0
%74 = OpCompositeExtract %56 %72 1
%75 = OpBitcast %61 %65
%76 = OpCompositeExtract %56 %75 0
%77 = OpCompositeExtract %56 %75 1
%79 = OpCompositeConstruct %78 %67 %68 %70 %71 %73 %74 %76 %77
%80 = OpCompositeExtract %56 %79 0
%81 = OpCompositeExtract %56 %79 1
%82 = OpCompositeExtract %56 %79 2
%83 = OpCompositeExtract %56 %79 3
%84 = OpFConvert %10 %80
%85 = OpFConvert %10 %81
%86 = OpFConvert %10 %82
%87 = OpFConvert %10 %83
%88 = OpFAdd %10 %52 %84
%89 = OpFAdd %10 %53 %85
%90 = OpFAdd %10 %54 %86
%91 = OpFAdd %10 %55 %87
%93 = OpAccessChain %92 %13 %16
OpStore %93 %88
%94 = OpAccessChain %92 %13 %57
OpStore %94 %89
%95 = OpAccessChain %92 %13 %31
OpStore %95 %90
%96 = OpAccessChain %92 %13 %42
OpStore %96 %91
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/combined-image-sampler-reuse.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 1, binding = 0) uniform sampler _11;

layout(location = 0) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _31 = texture(sampler2D(_8, _11), vec2(UV.x, UV.y));
    vec4 _40 = texture(sampler2D(_8, _11), vec2(UV.x + 0.100000001490116119384765625, UV.y + 0.100000001490116119384765625));
    SV_Target.x = _40.x + _31.x;
    SV_Target.y = _40.y + _31.y;
    SV_Target.z = _40.z + _31.z;
    SV_Target.w = _40.w + _31.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "UV"
OpName %17 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 1
OpDecorate %11 Binding 0
OpDecorate %14 Location 0
OpDecorate %17 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeSampler
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeVector %5 2
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypeVector %5 4
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%20 = OpTypePointer Input %5
%22 = OpTypeInt 32 0
%23 = OpConstant %22 0
%26 = OpConstant %22 1
%28 = OpTypeSampledImage %6
%30 = OpConstant %5 0
%38 = OpConstant %5 0.100000001
%50 = OpTypePointer Output %5
%54 = OpConstant %22 2
%56 = OpConstant %22 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%18 = OpLoad %6 %8
%19 = OpLoad %9 %11
%21 = OpAccessChain %20 %14 %23
%24 = OpLoad %5 %21
%25 = OpAccessChain %20 %14 %26
%27 = OpLoad %5 %25
%29 = OpSampledImage %28 %18 %19
%32 = OpCompositeConstruct %12 %24 %27
%31 = OpImageSampleImplicitLod %15 %29 %32 None
%33 = OpCompositeExtract %5 %31 0
%34 = OpCompositeExtract %5 %31 1
%35 = OpCompositeExtract %5 %31 2
%36 = OpCompositeExtract %5 %31 3
%37 = OpFAdd %5 %24 %38
%39 = OpFAdd %5 %27 %38
%41 = OpCompositeConstruct %12 %37 %39
%40 = OpImageSampleImplicitLod %15 %29 %41 None
%42 = OpCompositeExtract %5 %40 0
%43 = OpCompositeExtract %5 %40 1
%44 = OpCompositeExtract %5 %40 2
%45 = OpCompositeExtract %5 %40 3
%46 = OpFAdd %5 %42 %33
%47 = OpFAdd %5 %43 %34
%48 = OpFAdd %5 %44 %35
%49 = OpFAdd %5 %45 %36
%51 = OpAccessChain %50 %17 %23
OpStore %51 %46
%52 = OpAccessChain %50 %17 %26
OpStore %52 %47
%53 = OpAccessChain %50 %17 %54
OpStore %53 %48
%55 = OpAccessChain %50 %17 %56
OpStore %55 %49
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/dynamic-root-constant.root-constant.bindless.root-descriptor.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4Array;

layout(buffer_reference, buffer_reference_align = 16, std430) buffer PhysicalPointerFloat4Array
{
    vec4 value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
    uint root_constants_and_tables[16];
} registers;

layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _15[];

void main()
{
    uint _22 = registers.root_constants_and_tables[4u] + 2u;
    uint _41 = 4u * ((gl_WorkGroupID.x << 1u) | 1u);
    PhysicalPointerFloat4Array(registers._m2).value[gl_GlobalInvocationID.x] = vec4(uintBitsToFloat(uvec4(registers.root_constants_and_tables[min((_41 + 0u), 15u)], registers.root_constants_and_tables[min((_41 + 1u), 15u)], registers.root_constants_and_tables[min((_41 + 2u), 15u)], registers.root_constants_and_tables[min((_41 + 3u), 15u)])));
    vec4 _89 = uintBitsToFloat(uvec4(registers.root_constants_and_tables[12u], registers.root_constants_and_tables[13u], registers.root_constants_and_tables[14u], registers.root_constants_and_tables[15u]));
    uint _94 = gl_GlobalInvocationID.x * 4u;
    imageStore(_15[_22], int(_94), uvec4(floatBitsToUint(_89.x)));
    imageStore(_15[_22], int(_94 + 1u), uvec4(floatBitsToUint(_89.y)));
    imageStore(_15[_22], int(_94 + 2u), uvec4(floatBitsToUint(_89.z)));
    imageStore(_15[_22], int(_94 + 3u), uvec4(floatBitsToUint(_89.w)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 108
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
%44 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %30 %35
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %9 "RootConstants"
OpMemberName %9 4 "root_constants_and_tables"
OpName %11 "registers"
OpName %71 "PhysicalPointerFloat4Array"
OpMemberName %71 0 "value"
OpDecorate %8 ArrayStride 4
OpDecorate %9 Block
OpMemberDecorate %9 0 Offset 0
OpMemberDecorate %9 1 Offset 8
OpMemberDecorate %9 2 Offset 16
OpMemberDecorate %9 3 Offset 24
OpMemberDecorate %9 4 Offset 32
OpDecorate %15 DescriptorSet 4
OpDecorate %15 Binding 0
OpDecorate %15 NonReadable
OpDecorate %30 BuiltIn GlobalInvocationId
OpDecorate %35 BuiltIn WorkgroupId
OpDecorate %70 ArrayStride 16
OpMemberDecorate %71 0 Offset 0
OpDecorate %71 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpConstant %5 16
%8 = OpTypeArray %5 %7
%9 = OpTypeStruct %6 %6 %6 %6 %8
%10 = OpTypePointer PushConstant %9
%11 = OpVariable %10 PushConstant
%12 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypePointer UniformConstant %12
%18 = OpTypePointer PushConstant %5
%20 = OpConstant %5 4
%23 = OpConstant %5 2
%25 = OpTypePointer PushConstant %6
%28 = OpTypeVector %5 3
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%31 = OpTypePointer Input %5
%33 = OpConstant %5 0
%35 = OpVariable %29 Input
%39 = OpConstant %5 1
%46 = OpConstant %5 15
%58 = OpConstant %5 3
%61 = OpTypeVector %5 4
%63 = OpTypeFloat 32
%64 = OpTypeVector %63 4
%70 = OpTypeRuntimeArray %64
%71 = OpTypeStruct %70
%72 = OpTypePointer PhysicalStorageBuffer %71
%74 = OpTypePointer PhysicalStorageBuffer %64
%78 = OpConstant %5 12
%81 = OpConstant %5 13
%84 = OpConstant %5 14
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %106
%106 = OpLabel
%19 = OpAccessChain %18 %11 %20 %20
%21 = OpLoad %5 %19
%22 = OpIAdd %5 %21 %23
%17 = OpAccessChain %16 %15 %22
%24 = OpLoad %12 %17
%26 = OpAccessChain %25 %11 %23
%27 = OpLoad %6 %26
%32 = OpAccessChain %31 %30 %33
%34 = OpLoad %5 %32
%36 = OpAccessChain %31 %35 %33
%37 = OpLoad %5 %36
%38 = OpShiftLeftLogical %5 %37 %39
%40 = OpBitwiseOr %5 %38 %39
%41 = OpIMul %5 %20 %40
%43 = OpIAdd %5 %41 %33
%45 = OpExtInst %5 %44 UMin %43 %46
%42 = OpAccessChain %18 %11 %20 %45
%47 = OpLoad %5 %42
%49 = OpIAdd %5 %41 %39
%50 = OpExtInst %5 %44 UMin %49 %46
%48 = OpAccessChain %18 %11 %20 %50
%51 = OpLoad %5 %48
%53 = OpIAdd %5 %41 %23
%54 = OpExtInst %5 %44 UMin %53 %46
%52 = OpAccessChain %18 %11 %20 %54
%55 = OpLoad %5 %52
%57 = OpIAdd %5 %41 %58
%59 = OpExtInst %5 %44 UMin %57 %46
%56 = OpAccessChain %18 %11 %20 %59
%60 = OpLoad %5 %56
%62 = OpCompositeConstruct %61 %47 %51 %55 %60
%65 = OpBitcast %64 %62
%66 = OpCompositeExtract %63 %65 0
%67 = OpCompositeExtract %63 %65 1
%68 = OpCompositeExtract %63 %65 2
%69 = OpCompositeExtract %63 %65 3
%73 = OpBitcast %72 %27
%75 = OpInBoundsAccessChain %74 %73 %33 %34
%76 = OpCompositeConstruct %64 %66 %67 %68 %69
OpStore %75 %76 Aligned 16
%77 = OpAccessChain %18 %11 %20 %78
%79 = OpLoad %5 %77
%80 = OpAccessChain %18 %11 %20 %81
%82 = OpLoad %5 %80
%83 = OpAccessChain %18 %11 %20 %84
%85 = OpLoad %5 %83
%86 = OpAccessChain %18 %11 %20 %46
%87 = OpLoad %5 %86
%88 = OpCompositeConstruct %61 %79 %82 %85 %87
%89 = OpBitcast %64 %88
%90 = OpCompositeExtract %63 %89 0
%91 = OpCompositeExtract %63 %89 1
%92 = OpCompositeExtract %63 %89 2
%93 = OpCompositeExtract %63 %89 3
%94 = OpIMul %5 %34 %20
%95 = OpBitcast %5 %90
%96 = OpBitcast %5 %91
%97 = OpBitcast %5 %92
%98 = OpBitcast %5 %93
%99 = OpCompositeConstruct %61 %95 %95 %95 %95
OpImageWrite %24 %94 %99
%100 = OpCompositeConstruct %61 %96 %96 %96 %96
%101 = OpIAdd %5 %94 %39
OpImageWrite %24 %101 %100
%102 = OpCompositeConstruct %61 %97 %97 %97 %97
%103 = OpIAdd %5 %94 %23
OpImageWrite %24 %103 %102
%104 = OpCompositeConstruct %61 %98 %98 %98 %98
%105 = OpIAdd %5 %94 %58
OpImageWrite %24 %105 %104
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/min16-alloca-groupshared.sm60.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared mediump float _13[64];
shared mediump float _17[256];

void main()
{
    mediump float _31[4];
    _31[0u] = 0.0;
    _31[1u] = 0.0;
    _31[2u] = 0.0;
    _31[3u] = 0.0;
    float _41 = float(gl_LocalInvocationIndex);
    mediump float mp_copy_41 = _41;
    _13[gl_LocalInvocationIndex] = _41;
    _17[0u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 1.0;
    _17[1u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 2.0;
    _17[2u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 3.0;
    _17[3u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 4.0;
    barrier();
    _31[gl_LocalInvocationIndex & 3u] = _13[gl_LocalInvocationIndex ^ 5u];
    _31[(gl_LocalInvocationIndex + 1u) & 3u] = _13[gl_LocalInvocationIndex ^ 4u];
    uint _77 = gl_LocalInvocationIndex ^ 1u;
    uint _80 = gl_LocalInvocationIndex ^ 3u;
    uint _109 = gl_GlobalInvocationID.x * 4u;
    imageStore(_8, int(_109), uvec4(floatBitsToUint((_17[0u + (_80 * 4u)] + _13[_77]) + _31[0u])));
    imageStore(_8, int(_109 + 1u), uvec4(floatBitsToUint((_17[1u + (_80 * 4u)] + _13[_77]) + _31[1u])));
    imageStore(_8, int(_109 + 2u), uvec4(floatBitsToUint((_17[2u + (_80 * 4u)] + _13[_77]) + _31[2u])));
    imageStore(_8, int(_109 + 3u), uvec4(floatBitsToUint((_17[3u + (_80 * 4u)] + _13[_77]) + _31[3u])));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 124
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %21 %26
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %13 RelaxedPrecision
OpDecorate %17 RelaxedPrecision
OpDecorate %21 BuiltIn GlobalInvocationId
OpDecorate %26 BuiltIn LocalInvocationIndex
OpDecorate %31 RelaxedPrecision
OpDecorate %44 RelaxedPrecision
OpDecorate %46 RelaxedPrecision
OpDecorate %48 RelaxedPrecision
OpDecorate %50 RelaxedPrecision
OpDecorate %97 RelaxedPrecision
OpDecorate %98 RelaxedPrecision
OpDecorate %99 RelaxedPrecision
OpDecorate %100 RelaxedPrecision
OpDecorate %105 RelaxedPrecision
OpDecorate %106 RelaxedPrecision
OpDecorate %107 RelaxedPrecision
OpDecorate %108 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 64
%10 = OpTypeFloat 32
%11 = OpTypeArray %10 %9
%12 = OpTypePointer Workgroup %11
%13 = OpVariable %12 Workgroup
%14 = OpConstant %5 256
%15 = OpTypeArray %10 %14
%16 = OpTypePointer Workgroup %15
%17 = OpVariable %16 Workgroup
%19 = OpTypeVector %5 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypePointer Input %5
%24 = OpConstant %5 0
%26 = OpVariable %22 Input
%28 = OpConstant %5 4
%29 = OpTypeArray %10 %28
%30 = OpTypePointer Function %29
%32 = OpTypePointer Function %10
%35 = OpConstant %5 1
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%40 = OpConstant %10 0
%42 = OpTypePointer Workgroup %10
%45 = OpConstant %10 1
%47 = OpConstant %10 2
%49 = OpConstant %10 3
%51 = OpConstant %10 4
%64 = OpConstant %5 264
%66 = OpConstant %5 5
%114 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
%31 = OpVariable %30 Function
OpBranch %122
%122 = OpLabel
%18 = OpLoad %6 %8
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %5 %23
%27 = OpLoad %5 %26
%33 = OpInBoundsAccessChain %32 %31 %24
%34 = OpInBoundsAccessChain %32 %31 %35
%36 = OpInBoundsAccessChain %32 %31 %37
%38 = OpInBoundsAccessChain %32 %31 %39
OpStore %33 %40
OpStore %34 %40
OpStore %36 %40
OpStore %38 %40
%41 = OpConvertUToF %10 %27
%43 = OpAccessChain %42 %13 %27
OpStore %43 %41
%44 = OpFAdd %10 %41 %45
%46 = OpFAdd %10 %41 %47
%48 = OpFAdd %10 %41 %49
%50 = OpFAdd %10 %41 %51
%52 = OpIMul %5 %27 %28
%53 = OpIAdd %5 %24 %52
%54 = OpAccessChain %42 %17 %53
OpStore %54 %44
%55 = OpIMul %5 %27 %28
%56 = OpIAdd %5 %35 %55
%57 = OpAccessChain %42 %17 %56
OpStore %57 %46
%58 = OpIMul %5 %27 %28
%59 = OpIAdd %5 %37 %58
%60 = OpAccessChain %42 %17 %59
OpStore %60 %48
%61 = OpIMul %5 %27 %28
%62 = OpIAdd %5 %39 %61
%63 = OpAccessChain %42 %17 %62
OpStore %63 %50
OpControlBarrier %37 %37 %64
%65 = OpBitwiseXor %5 %27 %66
%67 = OpAccessChain %42 %13 %65
%68 = OpLoad %10 %67
%69 = OpBitwiseAnd %5 %27 %39
%70 = OpAccessChain %32 %31 %69
OpStore %70 %68
%71 = OpBitwiseXor %5 %27 %28
%72 = OpAccessChain %42 %13 %71
%73 = OpLoad %10 %72
%74 = OpIAdd %5 %27 %35
%75 = OpBitwiseAnd %5 %74 %39
%76 = OpAccessChain %32 %31 %75
OpStore %76 %73
%77 = OpBitwiseXor %5 %27 %35
%78 = OpAccessChain %42 %13 %77
%79 = OpLoad %10 %78
%80 = OpBitwiseXor %5 %27 %39
%81 = OpIMul %5 %80 %28
%82 = OpIAdd %5 %24 %81
%83 = OpAccessChain %42 %17 %82
%84 = OpLoad %10 %83
%85 = OpIMul %5 %80 %28
%86 = OpIAdd %5 %35 %85
%87 = OpAccessChain %42 %17 %86
%88 = OpLoad %10 %87
%89 = OpIMul %5 %80 %28
%90 = OpIAdd %5 %37 %89
%91 = OpAccessChain %42 %17 %90
%92 = OpLoad %10 %91
%93 = OpIMul %5 %80 %28
%94 = OpIAdd %5 %39 %93
%95 = OpAccessChain %42 %17 %94
%96 = OpLoad %10 %95
%97 = OpFAdd %10 %84 %79
%98 = OpFAdd %10 %88 %79
%99 = OpFAdd %10 %92 %79
%100 = OpFAdd %10 %96 %79
%101 = OpLoad %10 %33
%102 = OpLoad %10 %34
%103 = OpLoad %10 %36
%104 = OpLoad %10 %38
%105 = OpFAdd %10 %97 %101
%106 = OpFAdd %10 %98 %102
%107 = OpFAdd %10 %99 %103
%108 = OpFAdd %10 %100 %104
%109 = OpIMul %5 %25 %28
%110 = OpBitcast %5 %105
%111 = OpBitcast %5 %106
%112 = OpBitcast %5 %107
%113 = OpBitcast %5 %108
%115 = OpCompositeConstruct %114 %110 %110 %110 %110
OpImageWrite %18 %109 %115
%116 = OpCompositeConstruct %114 %111 %111 %111 %111
%117 = OpIAdd %5 %109 %35
OpImageWrite %18 %117 %116
%118 = OpCompositeConstruct %114 %112 %112 %112 %112
%119 = OpIAdd %5 %109 %37
OpImageWrite %18 %119 %118
%120 = OpCompositeConstruct %114 %113 %113 %113 %113
%121 = OpIAdd %5 %109 %39
OpImageWrite %18 %121 %120
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) writeonly buffer BUFSSBO
{
    uint16_t _m0[];
} BUF;

void main()
{
    BUF._m0[gl_LaunchIDEXT.x] = float16BitsToUint16(float16_t(20.0));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %9 %13
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "BUFSSBO"
OpName %9 "BUF"
OpDecorate %6 ArrayStride 2
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
OpDecorate %13 BuiltIn LaunchIdKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 16 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeInt 32 0
%11 = OpTypeVector %10 3
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpTypePointer Input %10
%16 = OpConstant %10 0
%18 = OpTypeFloat 16
%19 = OpConstant %18 0x1.4p+4
%21 = OpTypePointer StorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %23
%23 = OpLabel
%15 = OpAccessChain %14 %13 %16
%17 = OpLoad %10 %15
%20 = OpBitcast %5 %19
%22 = OpAccessChain %21 %9 %16 %17
OpStore %22 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/raw-buffer-addressing.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

void main()
{
    imageStore(_8, int(gl_GlobalInvocationID.x >> 2u), uvec4(floatBitsToUint(1.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x << 1u) >> 2u), uvec4(floatBitsToUint(2.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 3u) >> 2u), uvec4(floatBitsToUint(3.0)));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(4.0)));
    imageStore(_8, int(gl_GlobalInvocationID.x * 2u), uvec4(floatBitsToUint(5.0)));
    uint _45 = gl_GlobalInvocationID.x * 12u;
    imageStore(_8, int(gl_GlobalInvocationID.x * 3u), uvec4(floatBitsToUint(6.0)));
    imageStore(_8, int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(7.0)));
    imageStore(_8, int(gl_GlobalInvocationID.x * 2u), uvec4(floatBitsToUint(8.0)));
    imageStore(_8, int(gl_GlobalInvocationID.x * 4u), uvec4(floatBitsToUint(9.0)));
    imageStore(_8, int((_45 | 1u) >> 2u), uvec4(floatBitsToUint(10.0)));
    imageStore(_8, int((_45 | 2u) >> 2u), uvec4(floatBitsToUint(11.0)));
    imageStore(_8, int((_45 | 3u) >> 2u), uvec4(floatBitsToUint(12.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 3u) + 1u), uvec4(floatBitsToUint(13.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 3u) + 2u), uvec4(floatBitsToUint(14.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 3u) - 1u), uvec4(floatBitsToUint(15.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x * 3u) - 2u), uvec4(floatBitsToUint(16.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 4u) >> 2u), uvec4(floatBitsToUint(17.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 8u) >> 2u), uvec4(floatBitsToUint(18.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 4294967292u) >> 2u), uvec4(floatBitsToUint(19.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 4294967288u) >> 2u), uvec4(floatBitsToUint(20.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 5u) >> 2u), uvec4(floatBitsToUint(17.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 9u) >> 2u), uvec4(floatBitsToUint(18.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 4294967291u) >> 2u), uvec4(floatBitsToUint(19.0)));
    imageStore(_8, int((gl_GlobalInvocationID.x + 4294967287u) >> 2u), uvec4(floatBitsToUint(20.0)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 148
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 2
%19 = OpTypeFloat 32
%20 = OpConstant %19 1
%22 = OpTypeVector %5 4
%25 = OpConstant %5 1
%27 = OpConstant %19 2
%31 = OpConstant %5 3
%33 = OpConstant %19 3
%37 = OpConstant %19 4
%42 = OpConstant %19 5
%46 = OpConstant %5 12
%48 = OpConstant %19 6
%51 = OpConstant %19 7
%55 = OpConstant %19 8
%59 = OpConstant %5 4
%61 = OpConstant %19 9
%66 = OpConstant %19 10
%71 = OpConstant %19 11
%76 = OpConstant %19 12
%82 = OpConstant %19 13
%86 = OpConstant %5 8
%89 = OpConstant %19 14
%93 = OpConstant %5 4294967292
%96 = OpConstant %19 15
%100 = OpConstant %5 4294967288
%103 = OpConstant %19 16
%108 = OpConstant %19 17
%113 = OpConstant %19 18
%118 = OpConstant %19 19
%123 = OpConstant %19 20
%127 = OpConstant %5 5
%132 = OpConstant %5 9
%137 = OpConstant %5 4294967291
%142 = OpConstant %5 4294967287
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %146
%146 = OpLabel
%9 = OpLoad %6 %8
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpShiftRightLogical %5 %16 %18
%21 = OpBitcast %5 %20
%23 = OpCompositeConstruct %22 %21 %21 %21 %21
OpImageWrite %9 %17 %23
%24 = OpShiftLeftLogical %5 %16 %25
%26 = OpShiftRightLogical %5 %24 %18
%28 = OpBitcast %5 %27
%29 = OpCompositeConstruct %22 %28 %28 %28 %28
OpImageWrite %9 %26 %29
%30 = OpIMul %5 %16 %31
%32 = OpShiftRightLogical %5 %30 %18
%34 = OpBitcast %5 %33
%35 = OpCompositeConstruct %22 %34 %34 %34 %34
OpImageWrite %9 %32 %35
%36 = OpShiftLeftLogical %5 %16 %18
%38 = OpBitcast %5 %37
%39 = OpCompositeConstruct %22 %38 %38 %38 %38
OpImageWrite %9 %16 %39
%40 = OpShiftLeftLogical %5 %16 %31
%41 = OpIMul %5 %16 %18
%43 = OpBitcast %5 %42
%44 = OpCompositeConstruct %22 %43 %43 %43 %43
OpImageWrite %9 %41 %44
%45 = OpIMul %5 %16 %46
%47 = OpIMul %5 %16 %31
%49 = OpBitcast %5 %48
%50 = OpCompositeConstruct %22 %49 %49 %49 %49
OpImageWrite %9 %47 %50
%52 = OpBitcast %5 %51
%53 = OpCompositeConstruct %22 %52 %52 %52 %52
OpImageWrite %9 %16 %53
%54 = OpIMul %5 %16 %18
%56 = OpBitcast %5 %55
%57 = OpCompositeConstruct %22 %56 %56 %56 %56
OpImageWrite %9 %54 %57
%58 = OpShiftLeftLogical %5 %16 %59
%60 = OpIMul %5 %16 %59
%62 = OpBitcast %5 %61
%63 = OpCompositeConstruct %22 %62 %62 %62 %62
OpImageWrite %9 %60 %63
%64 = OpBitwiseOr %5 %45 %25
%65 = OpShiftRightLogical %5 %64 %18
%67 = OpBitcast %5 %66
%68 = OpCompositeConstruct %22 %67 %67 %67 %67
OpImageWrite %9 %65 %68
%69 = OpBitwiseOr %5 %45 %18
%70 = OpShiftRightLogical %5 %69 %18
%72 = OpBitcast %5 %71
%73 = OpCompositeConstruct %22 %72 %72 %72 %72
OpImageWrite %9 %70 %73
%74 = OpBitwiseOr %5 %45 %31
%75 = OpShiftRightLogical %5 %74 %18
%77 = OpBitcast %5 %76
%78 = OpCompositeConstruct %22 %77 %77 %77 %77
OpImageWrite %9 %75 %78
%79 = OpIAdd %5 %45 %59
%80 = OpIMul %5 %16 %31
%81 = OpIAdd %5 %80 %25
%83 = OpBitcast %5 %82
%84 = OpCompositeConstruct %22 %83 %83 %83 %83
OpImageWrite %9 %81 %84
%85 = OpIAdd %5 %45 %86
%87 = OpIMul %5 %16 %31
%88 = OpIAdd %5 %87 %18
%90 = OpBitcast %5 %89
%91 = OpCompositeConstruct %22 %90 %90 %90 %90
OpImageWrite %9 %88 %91
%92 = OpIAdd %5 %45 %93
%94 = OpIMul %5 %16 %31
%95 = OpISub %5 %94 %25
%97 = OpBitcast %5 %96
%98 = OpCompositeConstruct %22 %97 %97 %97 %97
OpImageWrite %9 %95 %98
%99 = OpIAdd %5 %45 %100
%101 = OpIMul %5 %16 %31
%102 = OpISub %5 %101 %18
%104 = OpBitcast %5 %103
%105 = OpCompositeConstruct %22 %104 %104 %104 %104
OpImageWrite %9 %102 %105
%106 = OpIAdd %5 %16 %59
%107 = OpShiftRightLogical %5 %106 %18
%109 = OpBitcast %5 %108
%110 = OpCompositeConstruct %22 %109 %109 %109 %109
OpImageWrite %9 %107 %110
%111 = OpIAdd %5 %16 %86
%112 = OpShiftRightLogical %5 %111 %18
%114 = OpBitcast %5 %113
%115 = OpCompositeConstruct %22 %114 %114 %114 %114
OpImageWrite %9 %112 %115
%116 = OpIAdd %5 %16 %93
%117 = OpShiftRightLogical %5 %116 %18
%119 = OpBitcast %5 %118
%120 = OpCompositeConstruct %22 %119 %119 %119 %119
OpImageWrite %9 %117 %120
%121 = OpIAdd %5 %16 %100
%122 = OpShiftRightLogical %5 %121 %18
%124 = OpBitcast %5 %123
%125 = OpCompositeConstruct %22 %124 %124 %124 %124
OpImageWrite %9 %122 %125
%126 = OpIAdd %5 %16 %127
%128 = OpShiftRightLogical %5 %126 %18
%129 = OpBitcast %5 %108
%130 = OpCompositeConstruct %22 %129 %129 %129 %129
OpImageWrite %9 %128 %130
%131 = OpIAdd %5 %16 %132
%133 = OpShiftRightLogical %5 %131 %18
%134 = OpBitcast %5 %113
%135 = OpCompositeConstruct %22 %134 %134 %134 %134
OpImageWrite %9 %133 %135
%136 = OpIAdd %5 %16 %137
%138 = OpShiftRightLogical %5 %136 %18
%139 = OpBitcast %5 %118
%140 = OpCompositeConstruct %22 %139 %139 %139 %139
OpImageWrite %9 %138 %140
%141 = OpIAdd %5 %16 %142
%143 = OpShiftRightLogical %5 %141 %18
%144 = OpBitcast %5 %123
%145 = OpCompositeConstruct %22 %144 %144 %144 %144
OpImageWrite %9 %143 %145
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/raw-buffer-addressing.ssbo.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _9;

void main()
{
    _9._m0[gl_GlobalInvocationID.x >> 2u] = floatBitsToUint(1.0);
    _9._m0[(gl_GlobalInvocationID.x << 1u) >> 2u] = floatBitsToUint(2.0);
    _9._m0[(gl_GlobalInvocationID.x * 3u) >> 2u] = floatBitsToUint(3.0);
    _9._m0[gl_GlobalInvocationID.x] = floatBitsToUint(4.0);
    _9._m0[gl_GlobalInvocationID.x * 2u] = floatBitsToUint(5.0);
    uint _45 = gl_GlobalInvocationID.x * 12u;
    _9._m0[gl_GlobalInvocationID.x * 3u] = floatBitsToUint(6.0);
    _9._m0[gl_GlobalInvocationID.x] = floatBitsToUint(7.0);
    _9._m0[gl_GlobalInvocationID.x * 2u] = floatBitsToUint(8.0);
    _9._m0[gl_GlobalInvocationID.x * 4u] = floatBitsToUint(9.0);
    _9._m0[(_45 | 1u) >> 2u] = floatBitsToUint(10.0);
    _9._m0[(_45 | 2u) >> 2u] = floatBitsToUint(11.0);
    _9._m0[(_45 | 3u) >> 2u] = floatBitsToUint(12.0);
    _9._m0[(gl_GlobalInvocationID.x * 3u) + 1u] = floatBitsToUint(13.0);
    _9._m0[(gl_GlobalInvocationID.x * 3u) + 2u] = floatBitsToUint(14.0);
    _9._m0[(gl_GlobalInvocationID.x * 3u) - 1u] = floatBitsToUint(15.0);
    _9._m0[(gl_GlobalInvocationID.x * 3u) - 2u] = floatBitsToUint(16.0);
    _9._m0[(gl_GlobalInvocationID.x + 4u) >> 2u] = floatBitsToUint(17.0);
    _9._m0[(gl_GlobalInvocationID.x + 8u) >> 2u] = floatBitsToUint(18.0);
    _9._m0[(gl_GlobalInvocationID.x + 4294967292u) >> 2u] = floatBitsToUint(19.0);
    _9._m0[(gl_GlobalInvocationID.x + 4294967288u) >> 2u] = floatBitsToUint(20.0);
    _9._m0[(gl_GlobalInvocationID.x + 5u) >> 2u] = floatBitsToUint(17.0);
    _9._m0[(gl_GlobalInvocationID.x + 9u) >> 2u] = floatBitsToUint(18.0);
    _9._m0[(gl_GlobalInvocationID.x + 4294967291u) >> 2u] = floatBitsToUint(19.0);
    _9._m0[(gl_GlobalInvocationID.x + 4294967287u) >> 2u] = floatBitsToUint(20.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 148
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
OpDecorate %12 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%18 = OpConstant %5 2
%19 = OpTypeFloat 32
%20 = OpConstant %19 1
%22 = OpTypePointer StorageBuffer %5
%25 = OpConstant %5 1
%27 = OpConstant %19 2
%31 = OpConstant %5 3
%33 = OpConstant %19 3
%37 = OpConstant %19 4
%42 = OpConstant %19 5
%46 = OpConstant %5 12
%48 = OpConstant %19 6
%51 = OpConstant %19 7
%55 = OpConstant %19 8
%59 = OpConstant %5 4
%61 = OpConstant %19 9
%66 = OpConstant %19 10
%71 = OpConstant %19 11
%76 = OpConstant %19 12
%82 = OpConstant %19 13
%86 = OpConstant %5 8
%89 = OpConstant %19 14
%93 = OpConstant %5 4294967292
%96 = OpConstant %19 15
%100 = OpConstant %5 4294967288
%103 = OpConstant %19 16
%108 = OpConstant %19 17
%113 = OpConstant %19 18
%118 = OpConstant %19 19
%123 = OpConstant %19 20
%127 = OpConstant %5 5
%132 = OpConstant %5 9
%137 = OpConstant %5 4294967291
%142 = OpConstant %5 4294967287
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %146
%146 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%17 = OpShiftRightLogical %5 %16 %18
%21 = OpBitcast %5 %20
%23 = OpAccessChain %22 %9 %15 %17
OpStore %23 %21
%24 = OpShiftLeftLogical %5 %16 %25
%26 = OpShiftRightLogical %5 %24 %18
%28 = OpBitcast %5 %27
%29 = OpAccessChain %22 %9 %15 %26
OpStore %29 %28
%30 = OpIMul %5 %16 %31
%32 = OpShiftRightLogical %5 %30 %18
%34 = OpBitcast %5 %33
%35 = OpAccessChain %22 %9 %15 %32
OpStore %35 %34
%36 = OpShiftLeftLogical %5 %16 %18
%38 = OpBitcast %5 %37
%39 = OpAccessChain %22 %9 %15 %16
OpStore %39 %38
%40 = OpShiftLeftLogical %5 %16 %31
%41 = OpIMul %5 %16 %18
%43 = OpBitcast %5 %42
%44 = OpAccessChain %22 %9 %15 %41
OpStore %44 %43
%45 = OpIMul %5 %16 %46
%47 = OpIMul %5 %16 %31
%49 = OpBitcast %5 %48
%50 = OpAccessChain %22 %9 %15 %47
OpStore %50 %49
%52 = OpBitcast %5 %51
%53 = OpAccessChain %22 %9 %15 %16
OpStore %53 %52
%54 = OpIMul %5 %16 %18
%56 = OpBitcast %5 %55
%57 = OpAccessChain %22 %9 %15 %54
OpStore %57 %56
%58 = OpShiftLeftLogical %5 %16 %59
%60 = OpIMul %5 %16 %59
%62 = OpBitcast %5 %61
%63 = OpAccessChain %22 %9 %15 %60
OpStore %63 %62
%64 = OpBitwiseOr %5 %45 %25
%65 = OpShiftRightLogical %5 %64 %18
%67 = OpBitcast %5 %66
%68 = OpAccessChain %22 %9 %15 %65
OpStore %68 %67
%69 = OpBitwiseOr %5 %45 %18
%70 = OpShiftRightLogical %5 %69 %18
%72 = OpBitcast %5 %71
%73 = OpAccessChain %22 %9 %15 %70
OpStore %73 %72
%74 = OpBitwiseOr %5 %45 %31
%75 = OpShiftRightLogical %5 %74 %18
%77 = OpBitcast %5 %76
%78 = OpAccessChain %22 %9 %15 %75
OpStore %78 %77
%79 = OpIAdd %5 %45 %59
%80 = OpIMul %5 %16 %31
%81 = OpIAdd %5 %80 %25
%83 = OpBitcast %5 %82
%84 = OpAccessChain %22 %9 %15 %81
OpStore %84 %83
%85 = OpIAdd %5 %45 %86
%87 = OpIMul %5 %16 %31
%88 = OpIAdd %5 %87 %18
%90 = OpBitcast %5 %89
%91 = OpAccessChain %22 %9 %15 %88
OpStore %91 %90
%92 = OpIAdd %5 %45 %93
%94 = OpIMul %5 %16 %31
%95 = OpISub %5 %94 %25
%97 = OpBitcast %5 %96
%98 = OpAccessChain %22 %9 %15 %95
OpStore %98 %97
%99 = OpIAdd %5 %45 %100
%101 = OpIMul %5 %16 %31
%102 = OpISub %5 %101 %18
%104 = OpBitcast %5 %103
%105 = OpAccessChain %22 %9 %15 %102
OpStore %105 %104
%106 = OpIAdd %5 %16 %59
%107 = OpShiftRightLogical %5 %106 %18
%109 = OpBitcast %5 %108
%110 = OpAccessChain %22 %9 %15 %107
OpStore %110 %109
%111 = OpIAdd %5 %16 %86
%112 = OpShiftRightLogical %5 %111 %18
%114 = OpBitcast %5 %113
%115 = OpAccessChain %22 %9 %15 %112
OpStore %115 %114
%116 = OpIAdd %5 %16 %93
%117 = OpShiftRightLogical %5 %116 %18
%119 = OpBitcast %5 %118
%120 = OpAccessChain %22 %9 %15 %117
OpStore %120 %119
%121 = OpIAdd %5 %16 %100
%122 = OpShiftRightLogical %5 %121 %18
%124 = OpBitcast %5 %123
%125 = OpAccessChain %22 %9 %15 %122
OpStore %125 %124
%126 = OpIAdd %5 %16 %127
%128 = OpShiftRightLogical %5 %126 %18
%129 = OpBitcast %5 %108
%130 = OpAccessChain %22 %9 %15 %128
OpStore %130 %129
%131 = OpIAdd %5 %16 %132
%133 = OpShiftRightLogical %5 %131 %18
%134 = OpBitcast %5 %113
%135 = OpAccessChain %22 %9 %15 %133
OpStore %135 %134
%136 = OpIAdd %5 %16 %137
%138 = OpShiftRightLogical %5 %136 %18
%139 = OpBitcast %5 %118
%140 = OpAccessChain %22 %9 %15 %138
OpStore %140 %139
%141 = OpIAdd %5 %16 %142
%143 = OpShiftRightLogical %5 %141 %18
%144 = OpBitcast %5 %123
%145 = OpAccessChain %22 %9 %15 %143
OpStore %145 %144
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/root-bda.root-descriptor.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;
layout(buffer_reference) buffer PhysicalPointerHalfArray;
layout(buffer_reference) buffer PhysicalPointerHalf2Array;
layout(buffer_reference) buffer PhysicalPointerHalf3Array;
layout(buffer_reference) buffer PhysicalPointerHalf4Array;
layout(buffer_reference) buffer PhysicalPointerUint2Array;

float16_t _72;
uint _112;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 2, std430) buffer PhysicalPointerHalfArray
{
    float16_t value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerHalf2Array
{
    f16vec2 value[];
};

layout(buffer_reference, buffer_reference_align = 2, scalar) buffer PhysicalPointerHalf3Array
{
    f16vec3 value[];
};

layout(buffer_reference, buffer_reference_align = 8, std430) buffer PhysicalPointerHalf4Array
{
    f16vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 8, std430) buffer PhysicalPointerUint2Array
{
    uvec2 value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    PhysicalPointerFloat4NonWriteArray _34 = PhysicalPointerFloat4NonWriteArray(registers._m1);
    PhysicalPointerFloat4NonWriteCBVArray _46 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_GlobalInvocationID.x] = vec4(_46.value[gl_GlobalInvocationID.x].x + _34.value[gl_GlobalInvocationID.x].x, _46.value[gl_GlobalInvocationID.x].y + _34.value[gl_GlobalInvocationID.x].y, _46.value[gl_GlobalInvocationID.x].z + _34.value[gl_GlobalInvocationID.x].z, _46.value[gl_GlobalInvocationID.x].w + _34.value[gl_GlobalInvocationID.x].w);
    PhysicalPointerHalfArray(registers._m3).value[gl_GlobalInvocationID.x] = float16_t(1.0);
    PhysicalPointerHalf2Array(registers._m3).value[gl_GlobalInvocationID.x] = f16vec2(float16_t(1.0), float16_t(2.0));
    PhysicalPointerHalf3Array(registers._m3).value[gl_GlobalInvocationID.x] = f16vec3(float16_t(1.0), float16_t(2.0), float16_t(3.0));
    PhysicalPointerHalf4Array(registers._m3).value[gl_GlobalInvocationID.x] = f16vec4(float16_t(1.0), float16_t(2.0), float16_t(3.0), float16_t(4.0));
    PhysicalPointerUint2Array(registers._m3).value[gl_GlobalInvocationID.x] = uvec2(4u, 5u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 116
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability DenormPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %25
OpExecutionMode %3 LocalSize 1 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %32 "PhysicalPointerFloat4NonWriteArray"
OpMemberName %32 0 "value"
OpName %44 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %44 0 "value"
OpName %58 "PhysicalPointerFloat4CoherentArray"
OpMemberName %58 0 "value"
OpName %66 "PhysicalPointerHalfArray"
OpMemberName %66 0 "value"
OpName %76 "PhysicalPointerHalf2Array"
OpMemberName %76 0 "value"
OpName %87 "PhysicalPointerHalf3Array"
OpMemberName %87 0 "value"
OpName %97 "PhysicalPointerHalf4Array"
OpMemberName %97 0 "value"
OpName %105 "PhysicalPointerUint2Array"
OpMemberName %105 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %25 BuiltIn GlobalInvocationId
OpDecorate %31 ArrayStride 16
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpMemberDecorate %32 0 NonWritable
OpDecorate %43 ArrayStride 16
OpMemberDecorate %44 0 Offset 0
OpDecorate %44 Block
OpMemberDecorate %44 0 NonWritable
OpDecorate %57 ArrayStride 16
OpMemberDecorate %58 0 Offset 0
OpDecorate %58 Block
OpMemberDecorate %58 0 Coherent
OpDecorate %65 ArrayStride 2
OpMemberDecorate %66 0 Offset 0
OpDecorate %66 Block
OpDecorate %75 ArrayStride 4
OpMemberDecorate %76 0 Offset 0
OpDecorate %76 Block
OpDecorate %86 ArrayStride 6
OpMemberDecorate %87 0 Offset 0
OpDecorate %87 Block
OpDecorate %96 ArrayStride 8
OpMemberDecorate %97 0 Offset 0
OpDecorate %97 Block
OpDecorate %104 ArrayStride 8
OpMemberDecorate %105 0 Offset 0
OpDecorate %105 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer PushConstant %6
%12 = OpConstant %5 3
%15 = OpConstant %5 2
%18 = OpConstant %5 1
%21 = OpConstant %5 0
%23 = OpTypeVector %5 3
%24 = OpTypePointer Input %23
%25 = OpVariable %24 Input
%26 = OpTypePointer Input %5
%29 = OpTypeFloat 32
%30 = OpTypeVector %29 4
%31 = OpTypeRuntimeArray %30
%32 = OpTypeStruct %31
%33 = OpTypePointer PhysicalStorageBuffer %32
%35 = OpTypePointer PhysicalStorageBuffer %30
%42 = OpConstant %5 4096
%43 = OpTypeArray %30 %42
%44 = OpTypeStruct %43
%45 = OpTypePointer PhysicalStorageBuffer %44
%57 = OpTypeRuntimeArray %30
%58 = OpTypeStruct %57
%59 = OpTypePointer PhysicalStorageBuffer %58
%64 = OpTypeFloat 16
%65 = OpTypeRuntimeArray %64
%66 = OpTypeStruct %65
%67 = OpTypePointer PhysicalStorageBuffer %66
%69 = OpTypePointer PhysicalStorageBuffer %64
%71 = OpConstant %64 0x1p+0
%74 = OpTypeVector %64 2
%75 = OpTypeRuntimeArray %74
%76 = OpTypeStruct %75
%77 = OpTypePointer PhysicalStorageBuffer %76
%79 = OpTypePointer PhysicalStorageBuffer %74
%81 = OpConstant %64 0x1p+1
%84 = OpConstant %5 6
%85 = OpTypeVector %64 3
%86 = OpTypeRuntimeArray %85
%87 = OpTypeStruct %86
%88 = OpTypePointer PhysicalStorageBuffer %87
%90 = OpTypePointer PhysicalStorageBuffer %85
%92 = OpConstant %64 0x1.8p+1
%95 = OpTypeVector %64 4
%96 = OpTypeRuntimeArray %95
%97 = OpTypeStruct %96
%98 = OpTypePointer PhysicalStorageBuffer %97
%100 = OpTypePointer PhysicalStorageBuffer %95
%102 = OpConstant %64 0x1p+2
%104 = OpTypeRuntimeArray %6
%105 = OpTypeStruct %104
%106 = OpTypePointer PhysicalStorageBuffer %105
%108 = OpTypePointer PhysicalStorageBuffer %6
%110 = OpConstant %5 4
%111 = OpConstant %5 5
%3 = OpFunction %1 None %2
%4 = OpLabel
%72 = OpUndef %64
%112 = OpUndef %5
OpBranch %114
%114 = OpLabel
%11 = OpAccessChain %10 %9 %12
%13 = OpLoad %6 %11
%14 = OpAccessChain %10 %9 %15
%16 = OpLoad %6 %14
%17 = OpAccessChain %10 %9 %18
%19 = OpLoad %6 %17
%20 = OpAccessChain %10 %9 %21
%22 = OpLoad %6 %20
%27 = OpAccessChain %26 %25 %21
%28 = OpLoad %5 %27
%34 = OpBitcast %33 %19
%36 = OpInBoundsAccessChain %35 %34 %21 %28
%37 = OpLoad %30 %36 Aligned 16
%38 = OpCompositeExtract %29 %37 0
%39 = OpCompositeExtract %29 %37 1
%40 = OpCompositeExtract %29 %37 2
%41 = OpCompositeExtract %29 %37 3
%46 = OpBitcast %45 %22
%47 = OpInBoundsAccessChain %35 %46 %21 %28
%48 = OpLoad %30 %47 Aligned 16
%49 = OpCompositeExtract %29 %48 0
%50 = OpCompositeExtract %29 %48 1
%51 = OpCompositeExtract %29 %48 2
%52 = OpCompositeExtract %29 %48 3
%53 = OpFAdd %29 %49 %38
%54 = OpFAdd %29 %50 %39
%55 = OpFAdd %29 %51 %40
%56 = OpFAdd %29 %52 %41
%60 = OpBitcast %59 %16
%61 = OpInBoundsAccessChain %35 %60 %21 %28
%62 = OpCompositeConstruct %30 %53 %54 %55 %56
OpStore %61 %62 Aligned 16
%63 = OpShiftLeftLogical %5 %28 %18
%68 = OpBitcast %67 %13
%70 = OpInBoundsAccessChain %69 %68 %21 %28
OpStore %70 %71 Aligned 2
%73 = OpShiftLeftLogical %5 %28 %15
%78 = OpBitcast %77 %13
%80 = OpInBoundsAccessChain %79 %78 %21 %28
%82 = OpCompositeConstruct %74 %71 %81
OpStore %80 %82 Aligned 4
%83 = OpIMul %5 %28 %84
%89 = OpBitcast %88 %13
%91 = OpInBoundsAccessChain %90 %89 %21 %28
%93 = OpCompositeConstruct %85 %71 %81 %92
OpStore %91 %93 Aligned 2
%94 = OpShiftLeftLogical %5 %28 %12
%99 = OpBitcast %98 %13
%101 = OpInBoundsAccessChain %100 %99 %21 %28
%103 = OpCompositeConstruct %95 %71 %81 %92 %102
OpStore %101 %103 Aligned 8
%107 = OpBitcast %106 %13
%109 = OpInBoundsAccessChain %108 %107 %21 %28
%113 = OpCompositeConstruct %6 %110 %111
OpStore %109 %113 Aligned 8
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/root-bda.root-descriptor.sm60.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    PhysicalPointerFloat4NonWriteArray _31 = PhysicalPointerFloat4NonWriteArray(registers._m1);
    PhysicalPointerFloat4NonWriteCBVArray _43 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0);
    PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_GlobalInvocationID.x] = vec4(_43.value[gl_GlobalInvocationID.x].x + _31.value[gl_GlobalInvocationID.x].x, _43.value[gl_GlobalInvocationID.x].y + _31.value[gl_GlobalInvocationID.x].y, _43.value[gl_GlobalInvocationID.x].z + _31.value[gl_GlobalInvocationID.x].z, _43.value[gl_GlobalInvocationID.x].w + _31.value[gl_GlobalInvocationID.x].w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %22
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %29 "PhysicalPointerFloat4NonWriteArray"
OpMemberName %29 0 "value"
OpName %41 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %41 0 "value"
OpName %55 "PhysicalPointerFloat4CoherentArray"
OpMemberName %55 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %22 BuiltIn GlobalInvocationId
OpDecorate %28 ArrayStride 16
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpMemberDecorate %29 0 NonWritable
OpDecorate %40 ArrayStride 16
OpMemberDecorate %41 0 Offset 0
OpDecorate %41 Block
OpMemberDecorate %41 0 NonWritable
OpDecorate %54 ArrayStride 16
OpMemberDecorate %55 0 Offset 0
OpDecorate %55 Block
OpMemberDecorate %55 0 Coherent
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer PushConstant %6
%12 = OpConstant %5 2
%15 = OpConstant %5 1
%18 = OpConstant %5 0
%20 = OpTypeVector %5 3
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypePointer Input %5
%26 = OpTypeFloat 32
%27 = OpTypeVector %26 4
%28 = OpTypeRuntimeArray %27
%29 = OpTypeStruct %28
%30 = OpTypePointer PhysicalStorageBuffer %29
%32 = OpTypePointer PhysicalStorageBuffer %27
%39 = OpConstant %5 4096
%40 = OpTypeArray %27 %39
%41 = OpTypeStruct %40
%42 = OpTypePointer PhysicalStorageBuffer %41
%54 = OpTypeRuntimeArray %27
%55 = OpTypeStruct %54
%56 = OpTypePointer PhysicalStorageBuffer %55
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%11 = OpAccessChain %10 %9 %12
%13 = OpLoad %6 %11
%14 = OpAccessChain %10 %9 %15
%16 = OpLoad %6 %14
%17 = OpAccessChain %10 %9 %18
%19 = OpLoad %6 %17
%24 = OpAccessChain %23 %22 %18
%25 = OpLoad %5 %24
%31 = OpBitcast %30 %16
%33 = OpInBoundsAccessChain %32 %31 %18 %25
%34 = OpLoad %27 %33 Aligned 16
%35 = OpCompositeExtract %26 %34 0
%36 = OpCompositeExtract %26 %34 1
%37 = OpCompositeExtract %26 %34 2
%38 = OpCompositeExtract %26 %34 3
%43 = OpBitcast %42 %19
%44 = OpInBoundsAccessChain %32 %43 %18 %25
%45 = OpLoad %27 %44 Aligned 16
%46 = OpCompositeExtract %26 %45 0
%47 = OpCompositeExtract %26 %45 1
%48 = OpCompositeExtract %26 %45 2
%49 = OpCompositeExtract %26 %45 3
%50 = OpFAdd %26 %46 %35
%51 = OpFAdd %26 %47 %36
%52 = OpFAdd %26 %48 %37
%53 = OpFAdd %26 %49 %38
%57 = OpBitcast %56 %13
%58 = OpInBoundsAccessChain %32 %57 %18 %25
%59 = OpCompositeConstruct %27 %50 %51 %52 %53
OpStore %58 %59 Aligned 16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/root-constant-with-bda.root-descriptor.root-constant.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
    uint _m16;
    uint _m17;
    uint _m18;
    uint _m19;
} registers;

void main()
{
    PhysicalPointerFloat4NonWriteArray _29 = PhysicalPointerFloat4NonWriteArray(registers._m1);
    float _85 = uintBitsToFloat(uvec4(registers._m12, registers._m13, registers._m14, registers._m15)).z + (uintBitsToFloat(uvec4(registers._m8, registers._m9, registers._m10, registers._m11)).y + uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7)).x);
    PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_GlobalInvocationID.x] = vec4(_85 + _29.value[gl_GlobalInvocationID.x].x, _85 + _29.value[gl_GlobalInvocationID.x].y, _85 + _29.value[gl_GlobalInvocationID.x].z, _85 + _29.value[gl_GlobalInvocationID.x].w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 98
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %19
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %27 "PhysicalPointerFloat4NonWriteArray"
OpMemberName %27 0 "value"
OpName %91 "PhysicalPointerFloat4CoherentArray"
OpMemberName %91 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpMemberDecorate %7 4 Offset 32
OpMemberDecorate %7 5 Offset 36
OpMemberDecorate %7 6 Offset 40
OpMemberDecorate %7 7 Offset 44
OpMemberDecorate %7 8 Offset 48
OpMemberDecorate %7 9 Offset 52
OpMemberDecorate %7 10 Offset 56
OpMemberDecorate %7 11 Offset 60
OpMemberDecorate %7 12 Offset 64
OpMemberDecorate %7 13 Offset 68
OpMemberDecorate %7 14 Offset 72
OpMemberDecorate %7 15 Offset 76
OpMemberDecorate %7 16 Offset 80
OpMemberDecorate %7 17 Offset 84
OpMemberDecorate %7 18 Offset 88
OpMemberDecorate %7 19 Offset 92
OpDecorate %19 BuiltIn GlobalInvocationId
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpMemberDecorate %27 0 NonWritable
OpDecorate %90 ArrayStride 16
OpMemberDecorate %91 0 Offset 0
OpDecorate %91 Block
OpMemberDecorate %91 0 Coherent
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypePointer PushConstant %6
%12 = OpConstant %5 2
%15 = OpConstant %5 1
%17 = OpTypeVector %5 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %5
%22 = OpConstant %5 0
%24 = OpTypeFloat 32
%25 = OpTypeVector %24 4
%26 = OpTypeRuntimeArray %25
%27 = OpTypeStruct %26
%28 = OpTypePointer PhysicalStorageBuffer %27
%30 = OpTypePointer PhysicalStorageBuffer %25
%37 = OpTypePointer PushConstant %5
%39 = OpConstant %5 4
%42 = OpConstant %5 5
%45 = OpConstant %5 6
%48 = OpConstant %5 7
%50 = OpTypeVector %5 4
%55 = OpConstant %5 8
%58 = OpConstant %5 9
%61 = OpConstant %5 10
%64 = OpConstant %5 11
%70 = OpConstant %5 12
%73 = OpConstant %5 13
%76 = OpConstant %5 14
%79 = OpConstant %5 15
%90 = OpTypeRuntimeArray %25
%91 = OpTypeStruct %90
%92 = OpTypePointer PhysicalStorageBuffer %91
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %96
%96 = OpLabel
%11 = OpAccessChain %10 %9 %12
%13 = OpLoad %6 %11
%14 = OpAccessChain %10 %9 %15
%16 = OpLoad %6 %14
%21 = OpAccessChain %20 %19 %22
%23 = OpLoad %5 %21
%29 = OpBitcast %28 %16
%31 = OpInBoundsAccessChain %30 %29 %22 %23
%32 = OpLoad %25 %31 Aligned 16
%33 = OpCompositeExtract %24 %32 0
%34 = OpCompositeExtract %24 %32 1
%35 = OpCompositeExtract %24 %32 2
%36 = OpCompositeExtract %24 %32 3
%38 = OpAccessChain %37 %9 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %37 %9 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %37 %9 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %37 %9 %48
%49 = OpLoad %5 %47
%51 = OpCompositeConstruct %50 %40 %43 %46 %49
%52 = OpBitcast %25 %51
%53 = OpCompositeExtract %24 %52 0
%54 = OpAccessChain %37 %9 %55
%56 = OpLoad %5 %54
%57 = OpAccessChain %37 %9 %58
%59 = OpLoad %5 %57
%60 = OpAccessChain %37 %9 %61
%62 = OpLoad %5 %60
%63 = OpAccessChain %37 %9 %64
%65 = OpLoad %5 %63
%66 = OpCompositeConstruct %50 %56 %59 %62 %65
%67 = OpBitcast %25 %66
%68 = OpCompositeExtract %24 %67 1
%69 = OpAccessChain %37 %9 %70
%71 = OpLoad %5 %69
%72 = OpAccessChain %37 %9 %73
%74 = OpLoad %5 %72
%75 = OpAccessChain %37 %9 %76
%77 = OpLoad %5 %75
%78 = OpAccessChain %37 %9 %79
%80 = OpLoad %5 %78
%81 = OpCompositeConstruct %50 %71 %74 %77 %80
%82 = OpBitcast %25 %81
%83 = OpCompositeExtract %24 %82 2
%84 = OpFAdd %24 %68 %53
%85 = OpFAdd %24 %83 %84
%86 = OpFAdd %24 %85 %33
%87 = OpFAdd %24 %85 %34
%88 = OpFAdd %24 %85 %35
%89 = OpFAdd %24 %85 %36
%93 = OpBitcast %92 %13
%94 = OpInBoundsAccessChain %30 %93 %22 %23
%95 = OpCompositeConstruct %25 %86 %87 %88 %89
OpStore %94 %95 Aligned 16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerUintNonWriteArray;
layout(buffer_reference) buffer PhysicalPointerUint2NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerUint3NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerUint4NonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloatArray;
layout(buffer_reference) buffer PhysicalPointerUintArray;

struct _37
{
    vec4 _m0;
    uint _m1;
};

vec4 _372;
float _378;

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray
{
    vec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUintNonWriteArray
{
    uint value[];
};

layout(buffer_reference, buffer_reference_align = 8, std430) readonly buffer PhysicalPointerUint2NonWriteArray
{
    uvec2 value[];
};

layout(buffer_reference, buffer_reference_align = 4, scalar) readonly buffer PhysicalPointerUint3NonWriteArray
{
    uvec3 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint4NonWriteArray
{
    uvec4 value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray
{
    float value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray
{
    uint value[];
};

layout(shaderRecordEXT, std430) buffer SBTBlock
{
    uint _m0[5];
    uint _m1[6];
    uvec2 _m2;
    uvec2 _m3;
    uvec2 _m4;
    uvec2 _m5;
    uvec2 _m6;
    uvec2 _m7;
    uvec2 _m8;
    uvec2 _m9;
    uvec2 _m10;
} SBT;

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _32[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _21[];
layout(set = 3, binding = 0, r32f) uniform readonly image2D _25[];
layout(set = 2, binding = 0) uniform sampler _36[];
layout(location = 0) rayPayloadInEXT _37 payload;

void main()
{
    uint _53 = (SBT._m9.x >> 6u) + 12u;
    uint _58 = payload._m1;
    uint _59 = _58 & 1u;
    vec4 _67 = texelFetch(_21[registers._m0 + _59], ivec2(uvec2(0u)), int(0u));
    vec4 _80 = texelFetch(_21[registers._m0 + _58], ivec2(uvec2(0u)), int(0u));
    vec4 _99 = texelFetch(_21[nonuniformEXT(((SBT._m7.x >> 6u) + 17u) + _58)], ivec2(uvec2(0u)), int(0u));
    vec4 _119 = imageLoad(_25[nonuniformEXT(((SBT._m8.x >> 6u) + 18u) + _58)], ivec2(uvec2(0u)));
    uint _146 = ((SBT._m9.x >> 6u) + 13u) + _58;
    vec4 _169 = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u]));
    vec4 _182 = uintBitsToFloat(uvec4(SBT._m0[4u], 0u, 0u, 0u));
    PhysicalPointerFloat4NonWriteCBVArray _194 = PhysicalPointerFloat4NonWriteCBVArray(SBT._m6);
    vec4 _223 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _59], _36[(SBT._m10.x >> 5u) + 13u])), vec2(0.5), 0.0);
    vec4 _249 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)])), vec2(0.5), 0.0);
    PhysicalPointerFloat4NonWriteArray _264 = PhysicalPointerFloat4NonWriteArray(SBT._m2);
    float _285 = uintBitsToFloat(PhysicalPointerUintNonWriteArray(SBT._m4).value[_58]);
    PhysicalPointerUint2NonWriteArray _294 = PhysicalPointerUint2NonWriteArray(SBT._m4);
    float _300 = uintBitsToFloat(_294.value[_58].x);
    float _301 = uintBitsToFloat(_294.value[_58].y);
    PhysicalPointerUint3NonWriteArray _311 = PhysicalPointerUint3NonWriteArray(SBT._m4);
    float _320 = uintBitsToFloat(_311.value[_58].z);
    PhysicalPointerUint4NonWriteArray _329 = PhysicalPointerUint4NonWriteArray(SBT._m4);
    PhysicalPointerFloatArray _350 = PhysicalPointerFloatArray(SBT._m3);
    float _366 = uintBitsToFloat(PhysicalPointerUintArray(SBT._m5).value[_58]);
    float _367 = _366 + (_350.value[_58] + ((((_285 + (_264.value[_58].x + (_249.x + (_223.x + ((((_32[nonuniformEXT(_146)]._m0[0u].x + ((_119.x + (_99.x + (_80.x + _67.x))) + _32[nonuniformEXT(_53)]._m0[0u].x)) + _169.x) + _182.x) + _194.value[1u].x))))) + _300) + uintBitsToFloat(_311.value[_58].x)) + uintBitsToFloat(_329.value[_58].x)));
    float _368 = _366 + (_350.value[_58] + ((((_285 + (_264.value[_58].y + (_249.y + (_223.y + ((((_32[nonuniformEXT(_146)]._m0[0u].y + ((_119.y + (_99.y + (_80.y + _67.y))) + _32[nonuniformEXT(_53)]._m0[0u].y)) + _169.y) + _182.y) + _194.value[1u].y))))) + _301) + uintBitsToFloat(_311.value[_58].y)) + uintBitsToFloat(_329.value[_58].y)));
    vec4 _371;
    _371.x = _367;
    _371.y = _368;
    _371.z = _366 + (_350.value[_58] + ((((_285 + (_264.value[_58].z + (_249.z + (_223.z + ((((_32[nonuniformEXT(_146)]._m0[0u].z + ((_119.z + (_99.z + (_80.z + _67.z))) + _32[nonuniformEXT(_53)]._m0[0u].z)) + _169.z) + _182.z) + _194.value[1u].z))))) + _300) + _320) + uintBitsToFloat(_329.value[_58].z)));
    _371.w = _366 + (_350.value[_58] + ((((_285 + (_264.value[_58].w + (_249.w + (_223.w + ((((_32[nonuniformEXT(_146)]._m0[0u].w + ((_119.w + (_99.w + (_80.w + _67.w))) + _32[nonuniformEXT(_53)]._m0[0u].w)) + _169.w) + _182.w) + _194.value[1u].w))))) + _301) + _320) + uintBitsToFloat(_329.value[_58].w)));
    payload._m0 = _371;
    PhysicalPointerFloatArray(SBT._m3).value[_58] = _367;
    PhysicalPointerFloatArray(SBT._m5).value[_58] = _368;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 385
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint MissKHR %3 "main" %8 %16 %21 %25 %32 %36 %39
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %14 "SBTBlock"
OpName %16 "SBT"
OpName %29 "BindlessCBV"
OpName %37 ""
OpName %39 "payload"
OpName %192 "PhysicalPointerFloat4NonWriteCBVArray"
OpMemberName %192 0 "value"
OpName %262 "PhysicalPointerFloat4NonWriteArray"
OpMemberName %262 0 "value"
OpName %279 "PhysicalPointerUintNonWriteArray"
OpMemberName %279 0 "value"
OpName %292 "PhysicalPointerUint2NonWriteArray"
OpMemberName %292 0 "value"
OpName %309 "PhysicalPointerUint3NonWriteArray"
OpMemberName %309 0 "value"
OpName %327 "PhysicalPointerUint4NonWriteArray"
OpMemberName %327 0 "value"
OpName %348 "PhysicalPointerFloatArray"
OpMemberName %348 0 "value"
OpName %361 "PhysicalPointerUintArray"
OpMemberName %361 0 "value"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 4
OpDecorate %12 ArrayStride 4
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpMemberDecorate %14 1 Offset 20
OpMemberDecorate %14 2 Offset 48
OpMemberDecorate %14 3 Offset 56
OpMemberDecorate %14 4 Offset 64
OpMemberDecorate %14 5 Offset 72
OpMemberDecorate %14 6 Offset 80
OpMemberDecorate %14 7 Offset 88
OpMemberDecorate %14 8 Offset 96
OpMemberDecorate %14 9 Offset 104
OpMemberDecorate %14 10 Offset 112
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %25 DescriptorSet 3
OpDecorate %25 Binding 0
OpDecorate %25 NonWritable
OpDecorate %28 ArrayStride 16
OpDecorate %29 Block
OpMemberDecorate %29 0 Offset 0
OpDecorate %32 DescriptorSet 5
OpDecorate %32 Binding 0
OpDecorate %36 DescriptorSet 2
OpDecorate %36 Binding 0
OpDecorate %47 NonUniform
OpDecorate %97 NonUniform
OpDecorate %98 NonUniform
OpDecorate %117 NonUniform
OpDecorate %118 NonUniform
OpDecorate %130 NonUniform
OpDecorate %146 NonUniform
OpDecorate %140 NonUniform
OpDecorate %147 NonUniform
OpDecorate %191 ArrayStride 16
OpMemberDecorate %192 0 Offset 0
OpDecorate %192 Block
OpMemberDecorate %192 0 NonWritable
OpDecorate %218 NonUniform
OpDecorate %220 NonUniform
OpDecorate %246 NonUniform
OpDecorate %247 NonUniform
OpDecorate %248 NonUniform
OpDecorate %261 ArrayStride 16
OpMemberDecorate %262 0 Offset 0
OpDecorate %262 Block
OpMemberDecorate %262 0 NonWritable
OpDecorate %278 ArrayStride 4
OpMemberDecorate %279 0 Offset 0
OpDecorate %279 Block
OpMemberDecorate %279 0 NonWritable
OpDecorate %291 ArrayStride 8
OpMemberDecorate %292 0 Offset 0
OpDecorate %292 Block
OpMemberDecorate %292 0 NonWritable
OpDecorate %308 ArrayStride 12
OpMemberDecorate %309 0 Offset 0
OpDecorate %309 Block
OpMemberDecorate %309 0 NonWritable
OpDecorate %326 ArrayStride 16
OpMemberDecorate %327 0 Offset 0
OpDecorate %327 Block
OpMemberDecorate %327 0 NonWritable
OpDecorate %347 ArrayStride 4
OpMemberDecorate %348 0 Offset 0
OpDecorate %348 Block
OpDecorate %360 ArrayStride 4
OpMemberDecorate %361 0 Offset 0
OpDecorate %361 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpConstant %5 5
%10 = OpTypeArray %5 %9
%11 = OpConstant %5 6
%12 = OpTypeArray %5 %11
%13 = OpTypeVector %5 2
%14 = OpTypeStruct %10 %12 %13 %13 %13 %13 %13 %13 %13 %13 %13
%15 = OpTypePointer ShaderRecordBufferKHR %14
%16 = OpVariable %15 ShaderRecordBufferKHR
%17 = OpTypeFloat 32
%18 = OpTypeImage %17 2D 0 0 0 1 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %17 2D 0 0 0 2 R32f
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %17 4
%27 = OpConstant %5 4096
%28 = OpTypeArray %26 %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer Uniform %30
%32 = OpVariable %31 Uniform
%33 = OpTypeSampler
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeStruct %26 %5
%38 = OpTypePointer IncomingRayPayloadKHR %37
%39 = OpVariable %38 IncomingRayPayloadKHR
%40 = OpTypePointer ShaderRecordBufferKHR %13
%43 = OpTypePointer ShaderRecordBufferKHR %10
%45 = OpConstant %5 0
%46 = OpTypePointer Uniform %29
%48 = OpTypePointer ShaderRecordBufferKHR %5
%50 = OpConstant %5 9
%54 = OpConstant %5 12
%55 = OpTypePointer IncomingRayPayloadKHR %5
%57 = OpConstant %5 1
%60 = OpTypePointer UniformConstant %18
%62 = OpTypePointer PushConstant %5
%73 = OpTypePointer IncomingRayPayloadKHR %26
%92 = OpConstant %5 7
%96 = OpConstant %5 17
%109 = OpTypePointer UniformConstant %22
%112 = OpConstant %5 8
%116 = OpConstant %5 18
%129 = OpTypePointer Uniform %26
%145 = OpConstant %5 13
%162 = OpConstant %5 2
%165 = OpConstant %5 3
%167 = OpTypeVector %5 4
%179 = OpConstant %5 4
%191 = OpTypeArray %26 %27
%192 = OpTypeStruct %191
%193 = OpTypePointer PhysicalStorageBuffer %192
%195 = OpTypePointer PhysicalStorageBuffer %26
%211 = OpTypePointer UniformConstant %33
%214 = OpConstant %5 10
%219 = OpTypeSampledImage %18
%221 = OpConstant %17 0.5
%222 = OpConstant %17 0
%224 = OpTypeVector %17 2
%245 = OpConstant %5 14
%261 = OpTypeRuntimeArray %26
%262 = OpTypeStruct %261
%263 = OpTypePointer PhysicalStorageBuffer %262
%278 = OpTypeRuntimeArray %5
%279 = OpTypeStruct %278
%280 = OpTypePointer PhysicalStorageBuffer %279
%282 = OpTypePointer PhysicalStorageBuffer %5
%291 = OpTypeRuntimeArray %13
%292 = OpTypeStruct %291
%293 = OpTypePointer PhysicalStorageBuffer %292
%295 = OpTypePointer PhysicalStorageBuffer %13
%307 = OpTypeVector %5 3
%308 = OpTypeRuntimeArray %307
%309 = OpTypeStruct %308
%310 = OpTypePointer PhysicalStorageBuffer %309
%312 = OpTypePointer PhysicalStorageBuffer %307
%326 = OpTypeRuntimeArray %167
%327 = OpTypeStruct %326
%328 = OpTypePointer PhysicalStorageBuffer %327
%330 = OpTypePointer PhysicalStorageBuffer %167
%347 = OpTypeRuntimeArray %17
%348 = OpTypeStruct %347
%349 = OpTypePointer PhysicalStorageBuffer %348
%351 = OpTypePointer PhysicalStorageBuffer %17
%360 = OpTypeRuntimeArray %5
%361 = OpTypeStruct %360
%362 = OpTypePointer PhysicalStorageBuffer %361
%3 = OpFunction %1 None %2
%4 = OpLabel
%372 = OpUndef %26
%378 = OpUndef %17
OpBranch %383
%383 = OpLabel
%41 = OpAccessChain %40 %16 %11
%42 = OpLoad %13 %41
%44 = OpAccessChain %43 %16 %45
%49 = OpAccessChain %48 %16 %50 %45
%51 = OpLoad %5 %49
%52 = OpShiftRightLogical %5 %51 %11
%53 = OpIAdd %5 %52 %54
%47 = OpAccessChain %46 %32 %53
%56 = OpInBoundsAccessChain %55 %39 %57
%58 = OpLoad %5 %56
%59 = OpBitwiseAnd %5 %58 %57
%63 = OpAccessChain %62 %8 %45
%64 = OpLoad %5 %63
%65 = OpIAdd %5 %64 %59
%61 = OpAccessChain %60 %21 %65
%66 = OpLoad %18 %61
%68 = OpCompositeConstruct %13 %45 %45
%67 = OpImageFetch %26 %66 %68 Lod %45
%69 = OpCompositeExtract %17 %67 0
%70 = OpCompositeExtract %17 %67 1
%71 = OpCompositeExtract %17 %67 2
%72 = OpCompositeExtract %17 %67 3
%74 = OpInBoundsAccessChain %73 %39 %45
%76 = OpAccessChain %62 %8 %45
%77 = OpLoad %5 %76
%78 = OpIAdd %5 %77 %58
%75 = OpAccessChain %60 %21 %78
%79 = OpLoad %18 %75
%81 = OpCompositeConstruct %13 %45 %45
%80 = OpImageFetch %26 %79 %81 Lod %45
%82 = OpCompositeExtract %17 %80 0
%83 = OpCompositeExtract %17 %80 1
%84 = OpCompositeExtract %17 %80 2
%85 = OpCompositeExtract %17 %80 3
%86 = OpFAdd %17 %82 %69
%87 = OpFAdd %17 %83 %70
%88 = OpFAdd %17 %84 %71
%89 = OpFAdd %17 %85 %72
%91 = OpAccessChain %48 %16 %92 %45
%93 = OpLoad %5 %91
%94 = OpShiftRightLogical %5 %93 %11
%95 = OpIAdd %5 %94 %96
%97 = OpIAdd %5 %95 %58
%90 = OpAccessChain %60 %21 %97
%98 = OpLoad %18 %90
%100 = OpCompositeConstruct %13 %45 %45
%99 = OpImageFetch %26 %98 %100 Lod %45
%101 = OpCompositeExtract %17 %99 0
%102 = OpCompositeExtract %17 %99 1
%103 = OpCompositeExtract %17 %99 2
%104 = OpCompositeExtract %17 %99 3
%105 = OpFAdd %17 %101 %86
%106 = OpFAdd %17 %102 %87
%107 = OpFAdd %17 %103 %88
%108 = OpFAdd %17 %104 %89
%111 = OpAccessChain %48 %16 %112 %45
%113 = OpLoad %5 %111
%114 = OpShiftRightLogical %5 %113 %11
%115 = OpIAdd %5 %114 %116
%117 = OpIAdd %5 %115 %58
%110 = OpAccessChain %109 %25 %117
%118 = OpLoad %22 %110
%120 = OpCompositeConstruct %13 %45 %45
%119 = OpImageRead %26 %118 %120 None
%121 = OpCompositeExtract %17 %119 0
%122 = OpCompositeExtract %17 %119 1
%123 = OpCompositeExtract %17 %119 2
%124 = OpCompositeExtract %17 %119 3
%125 = OpFAdd %17 %121 %105
%126 = OpFAdd %17 %122 %106
%127 = OpFAdd %17 %123 %107
%128 = OpFAdd %17 %124 %108
%130 = OpAccessChain %129 %47 %45 %45
%131 = OpLoad %26 %130
%132 = OpCompositeExtract %17 %131 0
%133 = OpCompositeExtract %17 %131 1
%134 = OpCompositeExtract %17 %131 2
%135 = OpCompositeExtract %17 %131 3
%136 = OpFAdd %17 %125 %132
%137 = OpFAdd %17 %126 %133
%138 = OpFAdd %17 %127 %134
%139 = OpFAdd %17 %128 %135
%141 = OpAccessChain %48 %16 %50 %45
%142 = OpLoad %5 %141
%143 = OpShiftRightLogical %5 %142 %11
%144 = OpIAdd %5 %143 %145
%146 = OpIAdd %5 %144 %58
%140 = OpAccessChain %46 %32 %146
%147 = OpAccessChain %129 %140 %45 %45
%148 = OpLoad %26 %147
%149 = OpCompositeExtract %17 %148 0
%150 = OpCompositeExtract %17 %148 1
%151 = OpCompositeExtract %17 %148 2
%152 = OpCompositeExtract %17 %148 3
%153 = OpFAdd %17 %149 %136
%154 = OpFAdd %17 %150 %137
%155 = OpFAdd %17 %151 %138
%156 = OpFAdd %17 %152 %139
%157 = OpAccessChain %48 %44 %45
%158 = OpLoad %5 %157
%159 = OpAccessChain %48 %44 %57
%160 = OpLoad %5 %159
%161 = OpAccessChain %48 %44 %162
%163 = OpLoad %5 %161
%164 = OpAccessChain %48 %44 %165
%166 = OpLoad %5 %164
%168 = OpCompositeConstruct %167 %158 %160 %163 %166
%169 = OpBitcast %26 %168
%170 = OpCompositeExtract %17 %169 0
%171 = OpCompositeExtract %17 %169 1
%172 = OpCompositeExtract %17 %169 2
%173 = OpCompositeExtract %17 %169 3
%174 = OpFAdd %17 %153 %170
%175 = OpFAdd %17 %154 %171
%176 = OpFAdd %17 %155 %172
%177 = OpFAdd %17 %156 %173
%178 = OpAccessChain %48 %44 %179
%180 = OpLoad %5 %178
%181 = OpCompositeConstruct %167 %180 %45 %45 %45
%182 = OpBitcast %26 %181
%183 = OpCompositeExtract %17 %182 0
%184 = OpCompositeExtract %17 %182 1
%185 = OpCompositeExtract %17 %182 2
%186 = OpCompositeExtract %17 %182 3
%187 = OpFAdd %17 %174 %183
%188 = OpFAdd %17 %175 %184
%189 = OpFAdd %17 %176 %185
%190 = OpFAdd %17 %177 %186
%194 = OpBitcast %193 %42
%196 = OpInBoundsAccessChain %195 %194 %45 %57
%197 = OpLoad %26 %196 Aligned 16
%198 = OpCompositeExtract %17 %197 0
%199 = OpCompositeExtract %17 %197 1
%200 = OpCompositeExtract %17 %197 2
%201 = OpCompositeExtract %17 %197 3
%202 = OpFAdd %17 %187 %198
%203 = OpFAdd %17 %188 %199
%204 = OpFAdd %17 %189 %200
%205 = OpFAdd %17 %190 %201
%207 = OpAccessChain %62 %8 %45
%208 = OpLoad %5 %207
%209 = OpIAdd %5 %208 %59
%206 = OpAccessChain %60 %21 %209
%210 = OpLoad %18 %206
%213 = OpAccessChain %48 %16 %214 %45
%215 = OpLoad %5 %213
%216 = OpShiftRightLogical %5 %215 %9
%217 = OpIAdd %5 %216 %145
%212 = OpAccessChain %211 %36 %217
%218 = OpLoad %33 %212
%220 = OpSampledImage %219 %210 %218
%225 = OpCompositeConstruct %224 %221 %221
%223 = OpImageSampleExplicitLod %26 %220 %225 Lod %222
%226 = OpCompositeExtract %17 %223 0
%227 = OpCompositeExtract %17 %223 1
%228 = OpCompositeExtract %17 %223 2
%229 = OpCompositeExtract %17 %223 3
%230 = OpFAdd %17 %226 %202
%231 = OpFAdd %17 %227 %203
%232 = OpFAdd %17 %228 %204
%233 = OpFAdd %17 %229 %205
%234 = OpBitwiseXor %5 %58 %57
%236 = OpAccessChain %62 %8 %45
%237 = OpLoad %5 %236
%238 = OpIAdd %5 %237 %58
%235 = OpAccessChain %60 %21 %238
%239 = OpLoad %18 %235
%241 = OpAccessChain %48 %16 %214 %45
%242 = OpLoad %5 %241
%243 = OpShiftRightLogical %5 %242 %9
%244 = OpIAdd %5 %243 %245
%246 = OpIAdd %5 %244 %234
%240 = OpAccessChain %211 %36 %246
%247 = OpLoad %33 %240
%248 = OpSampledImage %219 %239 %247
%250 = OpCompositeConstruct %224 %221 %221
%249 = OpImageSampleExplicitLod %26 %248 %250 Lod %222
%251 = OpCompositeExtract %17 %249 0
%252 = OpCompositeExtract %17 %249 1
%253 = OpCompositeExtract %17 %249 2
%254 = OpCompositeExtract %17 %249 3
%255 = OpFAdd %17 %251 %230
%256 = OpFAdd %17 %252 %231
%257 = OpFAdd %17 %253 %232
%258 = OpFAdd %17 %254 %233
%259 = OpAccessChain %40 %16 %162
%260 = OpLoad %13 %259
%264 = OpBitcast %263 %260
%265 = OpInBoundsAccessChain %195 %264 %45 %58
%266 = OpLoad %26 %265 Aligned 16
%267 = OpCompositeExtract %17 %266 0
%268 = OpCompositeExtract %17 %266 1
%269 = OpCompositeExtract %17 %266 2
%270 = OpCompositeExtract %17 %266 3
%271 = OpFAdd %17 %267 %255
%272 = OpFAdd %17 %268 %256
%273 = OpFAdd %17 %269 %257
%274 = OpFAdd %17 %270 %258
%275 = OpShiftLeftLogical %5 %58 %162
%276 = OpAccessChain %40 %16 %179
%277 = OpLoad %13 %276
%281 = OpBitcast %280 %277
%283 = OpInBoundsAccessChain %282 %281 %45 %58
%284 = OpLoad %5 %283 Aligned 4
%285 = OpBitcast %17 %284
%286 = OpFAdd %17 %285 %271
%287 = OpFAdd %17 %285 %272
%288 = OpFAdd %17 %285 %273
%289 = OpFAdd %17 %285 %274
%290 = OpShiftLeftLogical %5 %58 %165
%294 = OpBitcast %293 %277
%296 = OpInBoundsAccessChain %295 %294 %45 %58
%297 = OpLoad %13 %296 Aligned 8
%298 = OpCompositeExtract %5 %297 0
%299 = OpCompositeExtract %5 %297 1
%300 = OpBitcast %17 %298
%301 = OpBitcast %17 %299
%302 = OpFAdd %17 %286 %300
%303 = OpFAdd %17 %287 %301
%304 = OpFAdd %17 %288 %300
%305 = OpFAdd %17 %289 %301
%306 = OpIMul %5 %58 %54
%311 = OpBitcast %310 %277
%313 = OpInBoundsAccessChain %312 %311 %45 %58
%314 = OpLoad %307 %313 Aligned 4
%315 = OpCompositeExtract %5 %314 0
%316 = OpCompositeExtract %5 %314 1
%317 = OpCompositeExtract %5 %314 2
%318 = OpBitcast %17 %315
%319 = OpBitcast %17 %316
%320 = OpBitcast %17 %317
%321 = OpFAdd %17 %302 %318
%322 = OpFAdd %17 %303 %319
%323 = OpFAdd %17 %304 %320
%324 = OpFAdd %17 %305 %320
%325 = OpShiftLeftLogical %5 %58 %179
%329 = OpBitcast %328 %277
%331 = OpInBoundsAccessChain %330 %329 %45 %58
%332 = OpLoad %167 %331 Aligned 16
%333 = OpCompositeExtract %5 %332 0
%334 = OpCompositeExtract %5 %332 1
%335 = OpCompositeExtract %5 %332 2
%336 = OpCompositeExtract %5 %332 3
%337 = OpBitcast %17 %333
%338 = OpBitcast %17 %334
%339 = OpBitcast %17 %335
%340 = OpBitcast %17 %336
%341 = OpFAdd %17 %321 %337
%342 = OpFAdd %17 %322 %338
%343 = OpFAdd %17 %323 %339
%344 = OpFAdd %17 %324 %340
%345 = OpAccessChain %40 %16 %165
%346 = OpLoad %13 %345
%350 = OpBitcast %349 %346
%352 = OpInBoundsAccessChain %351 %350 %45 %58
%353 = OpLoad %17 %352 Aligned 4
%354 = OpFAdd %17 %353 %341
%355 = OpFAdd %17 %353 %342
%356 = OpFAdd %17 %353 %343
%357 = OpFAdd %17 %353 %344
%358 = OpAccessChain %40 %16 %9
%359 = OpLoad %13 %358
%363 = OpBitcast %362 %359
%364 = OpInBoundsAccessChain %282 %363 %45 %58
%365 = OpLoad %5 %364 Aligned 4
%366 = OpBitcast %17 %365
%367 = OpFAdd %17 %366 %354
%368 = OpFAdd %17 %366 %355
%369 = OpFAdd %17 %366 %356
%370 = OpFAdd %17 %366 %357
%371 = OpCompositeInsert %26 %367 %372 0
%373 = OpCompositeInsert %26 %368 %371 1
%374 = OpCompositeInsert %26 %369 %373 2
%375 = OpCompositeInsert %26 %370 %374 3
OpStore %74 %375
%376 = OpBitcast %349 %346
%377 = OpInBoundsAccessChain %351 %376 %45 %58
OpStore %377 %367 Aligned 4
%379 = OpAccessChain %40 %16 %9
%380 = OpLoad %13 %379
%381 = OpBitcast %349 %380
%382 = OpInBoundsAccessChain %351 %381 %45 %58
OpStore %382 %368 Aligned 4
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/rt-resources.bindless.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

struct _15
{
    vec4 _m0;
    uint _m1;
};

vec4 _73;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(location = 0) rayPayloadInEXT _15 payload;

void main()
{
    vec4 _31 = texelFetch(_13[registers._m0 + (payload._m1 & 1u)], ivec2(uvec2(0u)), int(0u));
    vec4 _45 = texelFetch(_13[registers._m0 + payload._m1], ivec2(uvec2(0u)), int(0u));
    vec4 _62 = texelFetch(_13[(registers._m0 + 10u) + payload._m1], ivec2(uvec2(0u)), int(0u));
    vec4 _72;
    _72.x = _62.x + (_45.x + _31.x);
    _72.y = _62.y + (_45.y + _31.y);
    _72.z = _62.z + (_45.z + _31.z);
    _72.w = _62.w + (_45.w + _31.w);
    payload._m0 = _72;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint MissKHR %3 "main" %8 %13 %17
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 ""
OpName %17 "payload"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypeStruct %14 %5
%16 = OpTypePointer IncomingRayPayloadKHR %15
%17 = OpVariable %16 IncomingRayPayloadKHR
%18 = OpTypePointer IncomingRayPayloadKHR %5
%20 = OpConstant %5 1
%23 = OpTypePointer UniformConstant %10
%25 = OpTypePointer PushConstant %5
%27 = OpConstant %5 0
%32 = OpTypeVector %5 2
%38 = OpTypePointer IncomingRayPayloadKHR %14
%59 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
%73 = OpUndef %14
OpBranch %77
%77 = OpLabel
%19 = OpInBoundsAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitwiseAnd %5 %21 %20
%26 = OpAccessChain %25 %8 %27
%28 = OpLoad %5 %26
%29 = OpIAdd %5 %28 %22
%24 = OpAccessChain %23 %13 %29
%30 = OpLoad %10 %24
%33 = OpCompositeConstruct %32 %27 %27
%31 = OpImageFetch %14 %30 %33 Lod %27
%34 = OpCompositeExtract %9 %31 0
%35 = OpCompositeExtract %9 %31 1
%36 = OpCompositeExtract %9 %31 2
%37 = OpCompositeExtract %9 %31 3
%39 = OpInBoundsAccessChain %38 %17 %27
%41 = OpAccessChain %25 %8 %27
%42 = OpLoad %5 %41
%43 = OpIAdd %5 %42 %21
%40 = OpAccessChain %23 %13 %43
%44 = OpLoad %10 %40
%46 = OpCompositeConstruct %32 %27 %27
%45 = OpImageFetch %14 %44 %46 Lod %27
%47 = OpCompositeExtract %9 %45 0
%48 = OpCompositeExtract %9 %45 1
%49 = OpCompositeExtract %9 %45 2
%50 = OpCompositeExtract %9 %45 3
%51 = OpFAdd %9 %47 %34
%52 = OpFAdd %9 %48 %35
%53 = OpFAdd %9 %49 %36
%54 = OpFAdd %9 %50 %37
%56 = OpAccessChain %25 %8 %27
%57 = OpLoad %5 %56
%58 = OpIAdd %5 %57 %59
%60 = OpIAdd %5 %58 %21
%55 = OpAccessChain %23 %13 %60
%61 = OpLoad %10 %55
%63 = OpCompositeConstruct %32 %27 %27
%62 = OpImageFetch %14 %61 %63 Lod %27
%64 = OpCompositeExtract %9 %62 0
%65 = OpCompositeExtract %9 %62 1
%66 = OpCompositeExtract %9 %62 2
%67 = OpCompositeExtract %9 %62 3
%68 = OpFAdd %9 %64 %51
%69 = OpFAdd %9 %65 %52
%70 = OpFAdd %9 %66 %53
%71 = OpFAdd %9 %67 %54
%72 = OpCompositeInsert %14 %68 %73 0
%74 = OpCompositeInsert %14 %69 %72 1
%75 = OpCompositeInsert %14 %70 %74 2
%76 = OpCompositeInsert %14 %71 %75 3
OpStore %39 %76
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/rt-resources.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

struct _16
{
    vec4 _m0;
    uint _m1;
};

vec4 _50;

layout(set = 0, binding = 0) uniform texture2D Tex[2];
layout(set = 1, binding = 0) uniform texture2D TexUnsized[];
layout(location = 0) rayPayloadInEXT _16 payload;

void main()
{
    vec4 _28 = texelFetch(Tex[payload._m1 & 1u], ivec2(uvec2(0u)), int(0u));
    vec4 _39 = texelFetch(TexUnsized[payload._m1], ivec2(uvec2(0u)), int(0u));
    vec4 _49;
    _49.x = _39.x + _28.x;
    _49.y = _39.y + _28.y;
    _49.z = _39.z + _28.z;
    _49.w = _39.w + _28.w;
    payload._m0 = _49;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint MissKHR %3 "main" %11 %14 %18
OpName %3 "main"
OpName %11 "Tex"
OpName %14 "TexUnsized"
OpName %16 ""
OpName %18 "payload"
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeInt 32 0
%8 = OpConstant %7 2
%9 = OpTypeArray %6 %8
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeRuntimeArray %6
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeVector %5 4
%16 = OpTypeStruct %15 %7
%17 = OpTypePointer IncomingRayPayloadKHR %16
%18 = OpVariable %17 IncomingRayPayloadKHR
%19 = OpTypePointer IncomingRayPayloadKHR %7
%21 = OpConstant %7 1
%24 = OpTypePointer UniformConstant %6
%27 = OpConstant %7 0
%29 = OpTypeVector %7 2
%35 = OpTypePointer IncomingRayPayloadKHR %15
%3 = OpFunction %1 None %2
%4 = OpLabel
%50 = OpUndef %15
OpBranch %54
%54 = OpLabel
%20 = OpInBoundsAccessChain %19 %18 %21
%22 = OpLoad %7 %20
%23 = OpBitwiseAnd %7 %22 %21
%25 = OpAccessChain %24 %11 %23
%26 = OpLoad %6 %25
%30 = OpCompositeConstruct %29 %27 %27
%28 = OpImageFetch %15 %26 %30 Lod %27
%31 = OpCompositeExtract %5 %28 0
%32 = OpCompositeExtract %5 %28 1
%33 = OpCompositeExtract %5 %28 2
%34 = OpCompositeExtract %5 %28 3
%36 = OpInBoundsAccessChain %35 %18 %27
%37 = OpAccessChain %24 %14 %22
%38 = OpLoad %6 %37
%40 = OpCompositeConstruct %29 %27 %27
%39 = OpImageFetch %15 %38 %40 Lod %27
%41 = OpCompositeExtract %5 %39 0
%42 = OpCompositeExtract %5 %39 1
%43 = OpCompositeExtract %5 %39 2
%44 = OpCompositeExtract %5 %39 3
%45 = OpFAdd %5 %41 %31
%46 = OpFAdd %5 %42 %32
%47 = OpFAdd %5 %43 %33
%48 = OpFAdd %5 %44 %34
%49 = OpCompositeInsert %15 %45 %50 0
%51 = OpCompositeInsert %15 %46 %49 1
%52 = OpCompositeInsert %15 %47 %51 2
%53 = OpCompositeInsert %15 %48 %52 3
OpStore %36 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sampler-array.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform sampler _12[];
layout(set = 1, binding = 0) uniform sampler _17[100];

layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _30 = texture(sampler2D(_8, _12[2u]), vec2(0.5));
    vec4 _41 = texture(sampler2D(_8, _17[3u]), vec2(0.5));
    SV_Target.x = _41.x + _30.x;
    SV_Target.y = _41.y + _30.y;
    SV_Target.z = _41.z + _30.z;
    SV_Target.w = _41.w + _30.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %20 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeSampler
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpConstant %13 100
%15 = OpTypeArray %9 %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %5 4
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpConstant %13 2
%26 = OpTypeSampledImage %6
%28 = OpConstant %5 0.5
%29 = OpConstant %5 0
%31 = OpTypeVector %5 2
%38 = OpConstant %13 3
%51 = OpTypePointer Output %5
%53 = OpConstant %13 0
%55 = OpConstant %13 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%21 = OpLoad %6 %8
%23 = OpAccessChain %22 %12 %24
%25 = OpLoad %9 %23
%27 = OpSampledImage %26 %21 %25
%32 = OpCompositeConstruct %31 %28 %28
%30 = OpImageSampleImplicitLod %18 %27 %32 None
%33 = OpCompositeExtract %5 %30 0
%34 = OpCompositeExtract %5 %30 1
%35 = OpCompositeExtract %5 %30 2
%36 = OpCompositeExtract %5 %30 3
%37 = OpAccessChain %22 %17 %38
%39 = OpLoad %9 %37
%40 = OpSampledImage %26 %21 %39
%42 = OpCompositeConstruct %31 %28 %28
%41 = OpImageSampleImplicitLod %18 %40 %42 None
%43 = OpCompositeExtract %5 %41 0
%44 = OpCompositeExtract %5 %41 1
%45 = OpCompositeExtract %5 %41 2
%46 = OpCompositeExtract %5 %41 3
%47 = OpFAdd %5 %43 %33
%48 = OpFAdd %5 %44 %34
%49 = OpFAdd %5 %45 %35
%50 = OpFAdd %5 %46 %36
%52 = OpAccessChain %51 %20 %53
OpStore %52 %47
%54 = OpAccessChain %51 %20 %55
OpStore %54 %48
%56 = OpAccessChain %51 %20 %24
OpStore %56 %49
%57 = OpAccessChain %51 %20 %38
OpStore %57 %50
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sampler-indexing.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 5) uniform sampler _14[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _30 = texture(sampler2D(_8, _14[0u]), vec2(0.5));
    vec4 _41 = texture(sampler2D(_8, _14[2u]), vec2(0.5));
    vec4 _56 = texture(sampler2D(_8, _14[V]), vec2(0.5));
    SV_Target.x = (_41.x + _30.x) + _56.x;
    SV_Target.y = (_41.y + _30.y) + _56.y;
    SV_Target.z = (_41.z + _30.z) + _56.z;
    SV_Target.w = (_41.w + _30.w) + _56.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "V"
OpName %19 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 5
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeSampler
%10 = OpTypeInt 32 0
%11 = OpConstant %10 3
%12 = OpTypeArray %9 %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypePointer Input %10
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 4
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpConstant %10 0
%26 = OpTypeSampledImage %6
%28 = OpConstant %5 0.5
%29 = OpConstant %5 0
%31 = OpTypeVector %5 2
%38 = OpConstant %10 2
%52 = OpConstant %10 5
%66 = OpTypePointer Output %5
%69 = OpConstant %10 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %72
%72 = OpLabel
%20 = OpLoad %6 %8
%21 = OpLoad %10 %16
%23 = OpAccessChain %22 %14 %24
%25 = OpLoad %9 %23
%27 = OpSampledImage %26 %20 %25
%32 = OpCompositeConstruct %31 %28 %28
%30 = OpImageSampleImplicitLod %17 %27 %32 None
%33 = OpCompositeExtract %5 %30 0
%34 = OpCompositeExtract %5 %30 1
%35 = OpCompositeExtract %5 %30 2
%36 = OpCompositeExtract %5 %30 3
%37 = OpAccessChain %22 %14 %38
%39 = OpLoad %9 %37
%40 = OpSampledImage %26 %20 %39
%42 = OpCompositeConstruct %31 %28 %28
%41 = OpImageSampleImplicitLod %17 %40 %42 None
%43 = OpCompositeExtract %5 %41 0
%44 = OpCompositeExtract %5 %41 1
%45 = OpCompositeExtract %5 %41 2
%46 = OpCompositeExtract %5 %41 3
%47 = OpFAdd %5 %43 %33
%48 = OpFAdd %5 %44 %34
%49 = OpFAdd %5 %45 %35
%50 = OpFAdd %5 %46 %36
%51 = OpIAdd %10 %21 %52
%53 = OpAccessChain %22 %14 %21
%54 = OpLoad %9 %53
%55 = OpSampledImage %26 %20 %54
%57 = OpCompositeConstruct %31 %28 %28
%56 = OpImageSampleImplicitLod %17 %55 %57 None
%58 = OpCompositeExtract %5 %56 0
%59 = OpCompositeExtract %5 %56 1
%60 = OpCompositeExtract %5 %56 2
%61 = OpCompositeExtract %5 %56 3
%62 = OpFAdd %5 %47 %58
%63 = OpFAdd %5 %48 %59
%64 = OpFAdd %5 %49 %60
%65 = OpFAdd %5 %50 %61
%67 = OpAccessChain %66 %19 %24
OpStore %67 %62
%68 = OpAccessChain %66 %19 %69
OpStore %68 %63
%70 = OpAccessChain %66 %19 %38
OpStore %70 %64
%71 = OpAccessChain %66 %19 %11
OpStore %71 %65
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sampler-indexing.sm66.frag
================================================
#version 460

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 5) uniform sampler _14[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _30 = texture(sampler2D(_8, _14[0u]), vec2(0.5));
    vec4 _42 = texture(sampler2D(_8, _14[2u]), vec2(0.5));
    vec4 _58 = texture(sampler2D(_8, _14[V]), vec2(0.5));
    SV_Target.x = (_42.x + _30.x) + _58.x;
    SV_Target.y = (_42.y + _30.y) + _58.y;
    SV_Target.z = (_42.z + _30.z) + _58.z;
    SV_Target.w = (_42.w + _30.w) + _58.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 76
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "V"
OpName %19 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 5
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeSampler
%10 = OpTypeInt 32 0
%11 = OpConstant %10 3
%12 = OpTypeArray %9 %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypePointer Input %10
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 4
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpConstant %10 0
%26 = OpTypeSampledImage %6
%28 = OpConstant %5 0.5
%29 = OpConstant %5 0
%31 = OpTypeVector %5 2
%39 = OpConstant %10 2
%54 = OpConstant %10 5
%68 = OpTypePointer Output %5
%71 = OpConstant %10 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %74
%74 = OpLabel
%20 = OpLoad %10 %16
%21 = OpLoad %6 %8
%23 = OpAccessChain %22 %14 %24
%25 = OpLoad %9 %23
%27 = OpSampledImage %26 %21 %25
%32 = OpCompositeConstruct %31 %28 %28
%30 = OpImageSampleImplicitLod %17 %27 %32 None
%33 = OpCompositeExtract %5 %30 0
%34 = OpCompositeExtract %5 %30 1
%35 = OpCompositeExtract %5 %30 2
%36 = OpCompositeExtract %5 %30 3
%37 = OpLoad %6 %8
%38 = OpAccessChain %22 %14 %39
%40 = OpLoad %9 %38
%41 = OpSampledImage %26 %37 %40
%43 = OpCompositeConstruct %31 %28 %28
%42 = OpImageSampleImplicitLod %17 %41 %43 None
%44 = OpCompositeExtract %5 %42 0
%45 = OpCompositeExtract %5 %42 1
%46 = OpCompositeExtract %5 %42 2
%47 = OpCompositeExtract %5 %42 3
%48 = OpFAdd %5 %44 %33
%49 = OpFAdd %5 %45 %34
%50 = OpFAdd %5 %46 %35
%51 = OpFAdd %5 %47 %36
%52 = OpLoad %6 %8
%53 = OpIAdd %10 %20 %54
%55 = OpAccessChain %22 %14 %20
%56 = OpLoad %9 %55
%57 = OpSampledImage %26 %52 %56
%59 = OpCompositeConstruct %31 %28 %28
%58 = OpImageSampleImplicitLod %17 %57 %59 None
%60 = OpCompositeExtract %5 %58 0
%61 = OpCompositeExtract %5 %58 1
%62 = OpCompositeExtract %5 %58 2
%63 = OpCompositeExtract %5 %58 3
%64 = OpFAdd %5 %48 %60
%65 = OpFAdd %5 %49 %61
%66 = OpFAdd %5 %50 %62
%67 = OpFAdd %5 %51 %63
%69 = OpAccessChain %68 %19 %24
OpStore %69 %64
%70 = OpAccessChain %68 %19 %71
OpStore %70 %65
%72 = OpAccessChain %68 %19 %39
OpStore %72 %66
%73 = OpAccessChain %68 %19 %11
OpStore %73 %67
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sampler.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 2, binding = 0) uniform sampler _17[];

layout(location = 0) in vec2 UV;
layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _51 = texture(sampler2D(_13[registers._m0], _17[registers._m2 + 3u]), vec2(UV.x, UV.y));
    vec4 _64 = texture(sampler2D(_13[registers._m0], _17[registers._m2 + 5u]), vec2(UV.x, UV.y));
    vec4 _95 = texture(sampler2D(_13[registers._m0], _17[registers._m2 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u)]), vec2(UV.x, UV.y));
    vec4 _113 = texture(nonuniformEXT(sampler2D(_13[registers._m0], _17[registers._m2 + (INDEX + 100u)])), vec2(UV.x, UV.y));
    SV_Target.x = ((_64.x + _51.x) + _95.x) + _113.x;
    SV_Target.y = ((_64.y + _51.y) + _95.y) + _113.y;
    SV_Target.z = ((_64.z + _51.z) + _95.z) + _113.z;
    SV_Target.w = ((_64.w + _51.w) + _95.w) + _113.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 130
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %22 %25
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %20 "UV"
OpName %22 "INDEX"
OpName %25 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
OpDecorate %22 Flat
OpDecorate %22 Location 1
OpDecorate %25 Location 0
OpDecorate %110 NonUniform
OpDecorate %111 NonUniform
OpDecorate %112 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeSampler
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %9 2
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %5
%22 = OpVariable %21 Input
%23 = OpTypeVector %9 4
%24 = OpTypePointer Output %23
%25 = OpVariable %24 Output
%26 = OpTypePointer UniformConstant %10
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 0
%33 = OpTypePointer UniformConstant %14
%36 = OpConstant %5 2
%39 = OpConstant %5 3
%42 = OpTypePointer Input %9
%46 = OpConstant %5 1
%48 = OpTypeSampledImage %10
%50 = OpConstant %9 0
%61 = OpConstant %5 5
%75 = OpConstant %5 4
%80 = OpConstant %5 6
%83 = OpConstant %5 7
%85 = OpTypeVector %5 4
%106 = OpConstant %5 100
%123 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %128
%128 = OpLabel
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%27 = OpAccessChain %26 %13 %31
%32 = OpLoad %10 %27
%35 = OpAccessChain %28 %8 %36
%37 = OpLoad %5 %35
%38 = OpIAdd %5 %37 %39
%34 = OpAccessChain %33 %17 %38
%40 = OpLoad %14 %34
%41 = OpLoad %5 %22
%43 = OpAccessChain %42 %20 %30
%44 = OpLoad %9 %43
%45 = OpAccessChain %42 %20 %46
%47 = OpLoad %9 %45
%49 = OpSampledImage %48 %32 %40
%52 = OpCompositeConstruct %18 %44 %47
%51 = OpImageSampleImplicitLod %23 %49 %52 None
%53 = OpCompositeExtract %9 %51 0
%54 = OpCompositeExtract %9 %51 1
%55 = OpCompositeExtract %9 %51 2
%56 = OpCompositeExtract %9 %51 3
%58 = OpAccessChain %28 %8 %36
%59 = OpLoad %5 %58
%60 = OpIAdd %5 %59 %61
%57 = OpAccessChain %33 %17 %60
%62 = OpLoad %14 %57
%63 = OpSampledImage %48 %32 %62
%65 = OpCompositeConstruct %18 %44 %47
%64 = OpImageSampleImplicitLod %23 %63 %65 None
%66 = OpCompositeExtract %9 %64 0
%67 = OpCompositeExtract %9 %64 1
%68 = OpCompositeExtract %9 %64 2
%69 = OpCompositeExtract %9 %64 3
%70 = OpFAdd %9 %66 %53
%71 = OpFAdd %9 %67 %54
%72 = OpFAdd %9 %68 %55
%73 = OpFAdd %9 %69 %56
%74 = OpAccessChain %28 %8 %75
%76 = OpLoad %5 %74
%77 = OpAccessChain %28 %8 %61
%78 = OpLoad %5 %77
%79 = OpAccessChain %28 %8 %80
%81 = OpLoad %5 %79
%82 = OpAccessChain %28 %8 %83
%84 = OpLoad %5 %82
%86 = OpCompositeConstruct %85 %76 %78 %81 %84
%87 = OpCompositeExtract %5 %86 0
%88 = OpIAdd %5 %87 %75
%90 = OpAccessChain %28 %8 %36
%91 = OpLoad %5 %90
%92 = OpIAdd %5 %91 %88
%89 = OpAccessChain %33 %17 %92
%93 = OpLoad %14 %89
%94 = OpSampledImage %48 %32 %93
%96 = OpCompositeConstruct %18 %44 %47
%95 = OpImageSampleImplicitLod %23 %94 %96 None
%97 = OpCompositeExtract %9 %95 0
%98 = OpCompositeExtract %9 %95 1
%99 = OpCompositeExtract %9 %95 2
%100 = OpCompositeExtract %9 %95 3
%101 = OpFAdd %9 %70 %97
%102 = OpFAdd %9 %71 %98
%103 = OpFAdd %9 %72 %99
%104 = OpFAdd %9 %73 %100
%105 = OpIAdd %5 %41 %106
%108 = OpAccessChain %28 %8 %36
%109 = OpLoad %5 %108
%110 = OpIAdd %5 %109 %105
%107 = OpAccessChain %33 %17 %110
%111 = OpLoad %14 %107
%112 = OpSampledImage %48 %32 %111
%114 = OpCompositeConstruct %18 %44 %47
%113 = OpImageSampleImplicitLod %23 %112 %114 None
%115 = OpCompositeExtract %9 %113 0
%116 = OpCompositeExtract %9 %113 1
%117 = OpCompositeExtract %9 %113 2
%118 = OpCompositeExtract %9 %113 3
%119 = OpFAdd %9 %101 %115
%120 = OpFAdd %9 %102 %116
%121 = OpFAdd %9 %103 %117
%122 = OpFAdd %9 %104 %118
%124 = OpAccessChain %123 %25 %30
OpStore %124 %119
%125 = OpAccessChain %123 %25 %46
OpStore %125 %120
%126 = OpAccessChain %123 %25 %36
OpStore %126 %121
%127 = OpAccessChain %123 %25 %39
OpStore %127 %122
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-64bit-groupshared.ssbo.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

struct CmpXchgResult
{
    uint64_t _m0;
    bool _m1;
};

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint64_t _m0[];
} _9;

shared uint64_t _14[4];

void main()
{
    bool _23 = gl_GlobalInvocationID.x < 4u;
    if (_23)
    {
        _14[gl_GlobalInvocationID.x] = 0ul;
    }
    barrier();
    uint _29 = gl_GlobalInvocationID.x & 3u;
    uint64_t _31 = uint64_t(gl_GlobalInvocationID.x);
    uint64_t _33 = atomicAdd(_14[_29], _31);
    uint64_t _34 = atomicAnd(_14[_29], _31);
    uint64_t _35 = atomicOr(_14[_29], _31);
    uint64_t _36 = atomicXor(_14[_29], _31);
    uint64_t _37 = atomicMin(_14[_29], _31);
    uint64_t _38 = atomicMax(_14[_29], _31);
    uint64_t _39 = atomicMin(_14[_29], _31);
    uint64_t _40 = atomicMax(_14[_29], _31);
    uint64_t _41 = atomicExchange(_14[_29], _31);
    uint64_t _42 = atomicCompSwap(_14[_29], 10ul, _41);
    uint64_t _47 = atomicCompSwap(_14[_29], 20ul, _31);
    uint64_t _51 = atomicAdd(_14[_29], _31);
    uint64_t _52 = atomicAdd(_14[_29], _31);
    barrier();
    if (_23)
    {
        _9._m0[gl_GlobalInvocationID.x] = _14[gl_GlobalInvocationID.x];
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %45 "CmpXchgResult"
OpDecorate %6 ArrayStride 8
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonReadable
OpDecorate %17 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeInt 32 0
%11 = OpConstant %10 4
%12 = OpTypeArray %5 %11
%13 = OpTypePointer Workgroup %12
%14 = OpVariable %13 Workgroup
%15 = OpTypeVector %10 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %10
%20 = OpConstant %10 0
%22 = OpTypeBool
%24 = OpTypePointer Workgroup %5
%26 = OpConstant %5 0
%27 = OpConstant %10 2
%28 = OpConstant %10 264
%30 = OpConstant %10 3
%43 = OpConstant %5 10
%45 = OpTypeStruct %5 %22
%48 = OpConstant %5 20
%56 = OpTypePointer StorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %10 %19
%23 = OpULessThan %22 %21 %11
OpSelectionMerge %60 None
OpBranchConditional %23 %59 %60
%59 = OpLabel
%25 = OpAccessChain %24 %14 %21
OpStore %25 %26
OpBranch %60
%60 = OpLabel
OpControlBarrier %27 %27 %28
%29 = OpBitwiseAnd %10 %21 %30
%31 = OpUConvert %5 %21
%32 = OpAccessChain %24 %14 %29
%33 = OpAtomicIAdd %5 %32 %27 %20 %31
%34 = OpAtomicAnd %5 %32 %27 %20 %31
%35 = OpAtomicOr %5 %32 %27 %20 %31
%36 = OpAtomicXor %5 %32 %27 %20 %31
%37 = OpAtomicSMin %5 %32 %27 %20 %31
%38 = OpAtomicSMax %5 %32 %27 %20 %31
%39 = OpAtomicSMin %5 %32 %27 %20 %31
%40 = OpAtomicSMax %5 %32 %27 %20 %31
%41 = OpAtomicExchange %5 %32 %27 %20 %31
%42 = OpAtomicCompareExchange %5 %32 %27 %20 %20 %41 %43
%44 = OpIEqual %22 %42 %43
%46 = OpCompositeConstruct %45 %42 %44
%47 = OpAtomicCompareExchange %5 %32 %27 %20 %20 %31 %48
%49 = OpIEqual %22 %47 %48
%50 = OpCompositeConstruct %45 %47 %49
%51 = OpAtomicIAdd %5 %32 %27 %20 %31
%52 = OpAtomicIAdd %5 %32 %27 %20 %31
OpControlBarrier %27 %27 %28
OpSelectionMerge %62 None
OpBranchConditional %23 %61 %62
%61 = OpLabel
%53 = OpAccessChain %24 %14 %21
%54 = OpLoad %5 %53
%55 = OpShiftLeftLogical %10 %21 %30
%57 = OpAccessChain %56 %9 %20 %21
OpStore %57 %54
OpBranch %62
%62 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-64bit.root-descriptor.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerUint64Array;

layout(buffer_reference, buffer_reference_align = 8, std430) buffer PhysicalPointerUint64Array
{
    uint64_t value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    uint64_t _18 = uint64_t(gl_GlobalInvocationID.x);
    uint64_t _29 = atomicAdd(PhysicalPointerUint64Array(registers._m2).value[0u], _18);
    uint64_t _35 = atomicAnd(PhysicalPointerUint64Array(registers._m2).value[1u], _18);
    uint64_t _40 = atomicOr(PhysicalPointerUint64Array(registers._m2).value[2u], _18);
    uint64_t _46 = atomicXor(PhysicalPointerUint64Array(registers._m2).value[3u], _18);
    uint64_t _52 = atomicMin(PhysicalPointerUint64Array(registers._m2).value[4u], _18);
    uint64_t _58 = atomicMax(PhysicalPointerUint64Array(registers._m2).value[5u], _18);
    uint64_t _64 = atomicExchange(PhysicalPointerUint64Array(registers._m2).value[6u], _18);
    uint64_t _70 = atomicCompSwap(PhysicalPointerUint64Array(registers._m2).value[7u], 10ul, _64);
    uint64_t _77 = atomicCompSwap(PhysicalPointerUint64Array(registers._m2).value[8u], 20ul, _18);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 81
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %12
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %24 "PhysicalPointerUint64Array"
OpMemberName %24 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %12 BuiltIn GlobalInvocationId
OpDecorate %23 ArrayStride 8
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeVector %5 3
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Input %5
%15 = OpConstant %5 0
%17 = OpTypeInt 64 0
%19 = OpTypePointer PushConstant %6
%21 = OpConstant %5 2
%23 = OpTypeRuntimeArray %17
%24 = OpTypeStruct %23
%25 = OpTypePointer PhysicalStorageBuffer %24
%27 = OpTypePointer PhysicalStorageBuffer %17
%30 = OpConstant %5 1
%43 = OpConstant %5 3
%49 = OpConstant %5 4
%55 = OpConstant %5 5
%61 = OpConstant %5 6
%67 = OpConstant %5 7
%71 = OpConstant %17 10
%74 = OpConstant %5 8
%78 = OpConstant %17 20
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %79
%79 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %5 %14
%18 = OpUConvert %17 %16
%20 = OpAccessChain %19 %9 %21
%22 = OpLoad %6 %20
%26 = OpBitcast %25 %22
%28 = OpAccessChain %27 %26 %15 %15
%29 = OpAtomicIAdd %17 %28 %30 %15 %18
%31 = OpAccessChain %19 %9 %21
%32 = OpLoad %6 %31
%33 = OpBitcast %25 %32
%34 = OpAccessChain %27 %33 %15 %30
%35 = OpAtomicAnd %17 %34 %30 %15 %18
%36 = OpAccessChain %19 %9 %21
%37 = OpLoad %6 %36
%38 = OpBitcast %25 %37
%39 = OpAccessChain %27 %38 %15 %21
%40 = OpAtomicOr %17 %39 %30 %15 %18
%41 = OpAccessChain %19 %9 %21
%42 = OpLoad %6 %41
%44 = OpBitcast %25 %42
%45 = OpAccessChain %27 %44 %15 %43
%46 = OpAtomicXor %17 %45 %30 %15 %18
%47 = OpAccessChain %19 %9 %21
%48 = OpLoad %6 %47
%50 = OpBitcast %25 %48
%51 = OpAccessChain %27 %50 %15 %49
%52 = OpAtomicUMin %17 %51 %30 %15 %18
%53 = OpAccessChain %19 %9 %21
%54 = OpLoad %6 %53
%56 = OpBitcast %25 %54
%57 = OpAccessChain %27 %56 %15 %55
%58 = OpAtomicUMax %17 %57 %30 %15 %18
%59 = OpAccessChain %19 %9 %21
%60 = OpLoad %6 %59
%62 = OpBitcast %25 %60
%63 = OpAccessChain %27 %62 %15 %61
%64 = OpAtomicExchange %17 %63 %30 %15 %18
%65 = OpAccessChain %19 %9 %21
%66 = OpLoad %6 %65
%68 = OpBitcast %25 %66
%69 = OpAccessChain %27 %68 %15 %67
%70 = OpAtomicCompareExchange %17 %69 %30 %15 %15 %64 %71
%72 = OpAccessChain %19 %9 %21
%73 = OpLoad %6 %72
%75 = OpBitcast %25 %73
%76 = OpAccessChain %27 %75 %15 %74
%77 = OpAtomicCompareExchange %17 %76 %30 %15 %15 %18 %78
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-64bit.ssbo.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint64_t _m0[];
} _10[];

layout(set = 0, binding = 0, std430) buffer _12_15
{
    uint64_t _m0[];
} _15[];

void main()
{
    uint64_t _31 = uint64_t(gl_GlobalInvocationID.x);
    uint64_t _34 = atomicAdd(_10[0u]._m0[0u], _31);
    uint64_t _36 = atomicAnd(_10[0u]._m0[1u], _31);
    uint64_t _38 = atomicOr(_10[0u]._m0[2u], _31);
    uint64_t _41 = atomicXor(_10[0u]._m0[3u], _31);
    uint64_t _44 = atomicMin(_10[0u]._m0[4u], _31);
    uint64_t _47 = atomicMax(_10[0u]._m0[5u], _31);
    uint64_t _50 = atomicMin(_10[1u]._m0[6u], _31);
    uint64_t _53 = atomicMax(_10[1u]._m0[7u], _31);
    uint64_t _56 = atomicExchange(_10[0u]._m0[8u], _31);
    uint64_t _59 = atomicCompSwap(_10[0u]._m0[9u], 10ul, _56);
    uint64_t _63 = atomicCompSwap(_10[0u]._m0[10u], 20ul, _31);
    uint64_t _66 = atomicAdd(_15[2u]._m0[0u], _31);
    uint64_t _68 = atomicAnd(_15[2u]._m0[1u], _31);
    uint64_t _70 = atomicOr(_15[2u]._m0[2u], _31);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %19
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpDecorate %6 ArrayStride 8
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %19 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypeRuntimeArray %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %5
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeInt 32 0
%17 = OpTypeVector %16 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %16
%22 = OpConstant %16 0
%24 = OpTypePointer StorageBuffer %7
%27 = OpConstant %16 1
%28 = OpTypePointer StorageBuffer %12
%30 = OpConstant %16 2
%32 = OpTypePointer StorageBuffer %5
%39 = OpConstant %16 3
%42 = OpConstant %16 4
%45 = OpConstant %16 5
%48 = OpConstant %16 6
%51 = OpConstant %16 7
%54 = OpConstant %16 8
%57 = OpConstant %16 9
%60 = OpConstant %5 10
%61 = OpConstant %16 10
%64 = OpConstant %5 20
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %71
%71 = OpLabel
%21 = OpAccessChain %20 %19 %22
%23 = OpLoad %16 %21
%25 = OpAccessChain %24 %10 %22
%26 = OpAccessChain %24 %10 %27
%29 = OpAccessChain %28 %15 %30
%31 = OpUConvert %5 %23
%33 = OpAccessChain %32 %25 %22 %22
%34 = OpAtomicIAdd %5 %33 %27 %22 %31
%35 = OpAccessChain %32 %25 %22 %27
%36 = OpAtomicAnd %5 %35 %27 %22 %31
%37 = OpAccessChain %32 %25 %22 %30
%38 = OpAtomicOr %5 %37 %27 %22 %31
%40 = OpAccessChain %32 %25 %22 %39
%41 = OpAtomicXor %5 %40 %27 %22 %31
%43 = OpAccessChain %32 %25 %22 %42
%44 = OpAtomicUMin %5 %43 %27 %22 %31
%46 = OpAccessChain %32 %25 %22 %45
%47 = OpAtomicUMax %5 %46 %27 %22 %31
%49 = OpAccessChain %32 %26 %22 %48
%50 = OpAtomicSMin %5 %49 %27 %22 %31
%52 = OpAccessChain %32 %26 %22 %51
%53 = OpAtomicSMax %5 %52 %27 %22 %31
%55 = OpAccessChain %32 %25 %22 %54
%56 = OpAtomicExchange %5 %55 %27 %22 %31
%58 = OpAccessChain %32 %25 %22 %57
%59 = OpAtomicCompareExchange %5 %58 %27 %22 %22 %56 %60
%62 = OpAccessChain %32 %25 %22 %61
%63 = OpAtomicCompareExchange %5 %62 %27 %22 %22 %31 %64
%65 = OpAccessChain %32 %29 %22 %22
%66 = OpAtomicIAdd %5 %65 %27 %22 %31
%67 = OpAccessChain %32 %29 %22 %27
%68 = OpAtomicAnd %5 %67 %27 %22 %31
%69 = OpAccessChain %32 %29 %22 %30
%70 = OpAtomicOr %5 %69 %27 %22 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-component-alias.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_image_int64 : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform utexture2D _11;
layout(set = 0, binding = 0, r64ui) uniform u64imageBuffer _15;
layout(set = 0, binding = 1, r64ui) uniform u64image2D _18;

void main()
{
    uvec4 _32 = uvec4(texelFetch(_8, int(gl_GlobalInvocationID.x)).xyxy);
    imageStore(_15, int(gl_GlobalInvocationID.x), u64vec4(packUint2x32(_32.xy), packUint2x32(_32.zw), 0ul, 0ul));
    uvec4 _47 = uvec4(unpackUint2x32(imageLoad(_15, int(gl_GlobalInvocationID.x + 10u)).x), 0u, 0u);
    uint _55 = gl_GlobalInvocationID.x + 1u;
    uint64_t _59 = imageAtomicAdd(_15, int(_55), (uint64_t(_47.y) << 32ul) | uint64_t(_47.x));
    uvec4 _67 = uvec4(texelFetch(_11, ivec2(uvec2(gl_GlobalInvocationID.x)), int(0u)).xyxy);
    imageStore(_18, ivec2(uvec2(gl_GlobalInvocationID.x)), u64vec4(packUint2x32(_67.xy), packUint2x32(_67.zw), 0ul, 0ul));
    uvec4 _82 = uvec4(unpackUint2x32(imageLoad(_18, ivec2(uvec2(gl_GlobalInvocationID.x + 3u, gl_GlobalInvocationID.x + 4u))).x), 0u, 0u);
    uint64_t _93 = imageAtomicAdd(_18, ivec2(uvec2(_55, gl_GlobalInvocationID.x + 2u)), (uint64_t(_82.y) << 32ul) | uint64_t(_82.x));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 96
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability Int64ImageEXT
OpExtension "SPV_EXT_shader_image_int64"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %21
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 1
OpDecorate %21 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 0 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeInt 64 0
%13 = OpTypeImage %12 Buffer 0 0 0 2 R64ui
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeImage %12 2D 0 0 0 2 R64ui
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeVector %5 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypePointer Input %5
%24 = OpConstant %5 0
%27 = OpTypeVector %5 4
%33 = OpTypeVector %5 2
%38 = OpConstant %12 0
%39 = OpTypeVector %12 4
%42 = OpConstant %5 10
%53 = OpConstant %12 32
%56 = OpConstant %5 1
%57 = OpTypePointer Image %12
%74 = OpConstant %5 3
%76 = OpConstant %5 4
%90 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %94
%94 = OpLabel
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %5 %23
%26 = OpLoad %6 %8
%28 = OpImageFetch %27 %26 %25
%29 = OpCompositeExtract %5 %28 0
%30 = OpCompositeExtract %5 %28 1
%31 = OpLoad %13 %15
%32 = OpCompositeConstruct %27 %29 %30 %29 %30
%34 = OpVectorShuffle %33 %32 %32 0 1
%35 = OpBitcast %12 %34
%36 = OpVectorShuffle %33 %32 %32 2 3
%37 = OpBitcast %12 %36
%40 = OpCompositeConstruct %39 %35 %37 %38 %38
OpImageWrite %31 %25 %40
%41 = OpIAdd %5 %25 %42
%43 = OpLoad %13 %15
%44 = OpImageRead %39 %43 %41
%45 = OpCompositeExtract %12 %44 0
%46 = OpBitcast %33 %45
%47 = OpCompositeConstruct %27 %46 %24 %24
%48 = OpCompositeExtract %5 %47 0
%49 = OpCompositeExtract %5 %47 1
%50 = OpUConvert %12 %48
%51 = OpUConvert %12 %49
%52 = OpShiftLeftLogical %12 %51 %53
%54 = OpBitwiseOr %12 %52 %50
%55 = OpIAdd %5 %25 %56
%58 = OpImageTexelPointer %57 %15 %55 %24
%59 = OpAtomicIAdd %12 %58 %56 %24 %54
%60 = OpLoad %9 %11
%62 = OpCompositeConstruct %33 %25 %25
%61 = OpImageFetch %27 %60 %62 Lod %24
%63 = OpCompositeExtract %5 %61 0
%64 = OpCompositeExtract %5 %61 1
%65 = OpLoad %16 %18
%66 = OpCompositeConstruct %33 %25 %25
%67 = OpCompositeConstruct %27 %63 %64 %63 %64
%68 = OpVectorShuffle %33 %67 %67 0 1
%69 = OpBitcast %12 %68
%70 = OpVectorShuffle %33 %67 %67 2 3
%71 = OpBitcast %12 %70
%72 = OpCompositeConstruct %39 %69 %71 %38 %38
OpImageWrite %65 %66 %72
%73 = OpIAdd %5 %25 %74
%75 = OpIAdd %5 %25 %76
%77 = OpLoad %16 %18
%79 = OpCompositeConstruct %33 %73 %75
%78 = OpImageRead %39 %77 %79 None
%80 = OpCompositeExtract %12 %78 0
%81 = OpBitcast %33 %80
%82 = OpCompositeConstruct %27 %81 %24 %24
%83 = OpCompositeExtract %5 %82 0
%84 = OpCompositeExtract %5 %82 1
%85 = OpUConvert %12 %83
%86 = OpUConvert %12 %84
%87 = OpShiftLeftLogical %12 %86 %53
%88 = OpBitwiseOr %12 %87 %85
%89 = OpIAdd %5 %25 %90
%91 = OpCompositeConstruct %33 %55 %89
%92 = OpImageTexelPointer %57 %18 %91 %24
%93 = OpAtomicIAdd %12 %92 %56 %24 %88
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-typed-64bit-heap.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_image_int64 : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r64ui) uniform u64imageBuffer _9[];
layout(set = 0, binding = 0, r64ui) uniform u64image2D _13[];

void main()
{
    uint64_t _35 = uint64_t(gl_GlobalInvocationID.x);
    uint64_t _38 = imageAtomicAdd(_9[0u], int(0u), _35);
    uint64_t _40 = imageAtomicAnd(_9[1u], int(1u), _35);
    uint64_t _42 = imageAtomicOr(_9[0u], int(2u), _35);
    uint64_t _44 = imageAtomicXor(_9[1u], int(3u), _35);
    uint64_t _47 = imageAtomicMin(_9[0u], int(4u), _35);
    uint64_t _50 = imageAtomicMax(_9[0u], int(5u), _35);
    uint64_t _53 = imageAtomicMin(_9[1u], int(6u), _35);
    uint64_t _56 = imageAtomicMax(_9[1u], int(7u), _35);
    uint64_t _59 = imageAtomicExchange(_9[1u], int(8u), _35);
    uint64_t _62 = imageAtomicCompSwap(_9[0u], int(9u), 10ul, _59);
    uint64_t _66 = imageAtomicCompSwap(_9[1u], int(10u), 20ul, _35);
    uint64_t _71 = imageAtomicAdd(_13[2u], ivec2(uvec2(1u, 2u)), _35);
    uint64_t _74 = imageAtomicAdd(_13[3u], ivec2(uvec2(3u, 4u)), _35);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability ImageBuffer
OpCapability Int64ImageEXT
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_shader_image_int64"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R64ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeImage %5 2D 0 0 0 2 R64ui
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeInt 32 0
%15 = OpTypeVector %14 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %14
%20 = OpConstant %14 0
%22 = OpTypePointer UniformConstant %6
%26 = OpConstant %14 1
%28 = OpTypePointer UniformConstant %10
%30 = OpConstant %14 2
%33 = OpConstant %14 3
%36 = OpTypePointer Image %5
%45 = OpConstant %14 4
%48 = OpConstant %14 5
%51 = OpConstant %14 6
%54 = OpConstant %14 7
%57 = OpConstant %14 8
%60 = OpConstant %14 9
%63 = OpConstant %5 10
%64 = OpConstant %14 10
%67 = OpConstant %5 20
%68 = OpTypeVector %14 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %14 %19
%23 = OpAccessChain %22 %9 %20
%24 = OpLoad %6 %23
%25 = OpAccessChain %22 %9 %26
%27 = OpLoad %6 %25
%29 = OpAccessChain %28 %13 %30
%31 = OpLoad %10 %29
%32 = OpAccessChain %28 %13 %33
%34 = OpLoad %10 %32
%35 = OpUConvert %5 %21
%37 = OpImageTexelPointer %36 %23 %20 %20
%38 = OpAtomicIAdd %5 %37 %26 %20 %35
%39 = OpImageTexelPointer %36 %25 %26 %20
%40 = OpAtomicAnd %5 %39 %26 %20 %35
%41 = OpImageTexelPointer %36 %23 %30 %20
%42 = OpAtomicOr %5 %41 %26 %20 %35
%43 = OpImageTexelPointer %36 %25 %33 %20
%44 = OpAtomicXor %5 %43 %26 %20 %35
%46 = OpImageTexelPointer %36 %23 %45 %20
%47 = OpAtomicUMin %5 %46 %26 %20 %35
%49 = OpImageTexelPointer %36 %23 %48 %20
%50 = OpAtomicUMax %5 %49 %26 %20 %35
%52 = OpImageTexelPointer %36 %25 %51 %20
%53 = OpAtomicSMin %5 %52 %26 %20 %35
%55 = OpImageTexelPointer %36 %25 %54 %20
%56 = OpAtomicSMax %5 %55 %26 %20 %35
%58 = OpImageTexelPointer %36 %25 %57 %20
%59 = OpAtomicExchange %5 %58 %26 %20 %35
%61 = OpImageTexelPointer %36 %23 %60 %20
%62 = OpAtomicCompareExchange %5 %61 %26 %20 %20 %59 %63
%65 = OpImageTexelPointer %36 %25 %64 %20
%66 = OpAtomicCompareExchange %5 %65 %26 %20 %20 %35 %67
%69 = OpCompositeConstruct %68 %26 %30
%70 = OpImageTexelPointer %36 %29 %69 %20
%71 = OpAtomicIAdd %5 %70 %26 %20 %35
%72 = OpCompositeConstruct %68 %33 %45
%73 = OpImageTexelPointer %36 %32 %72 %20
%74 = OpAtomicIAdd %5 %73 %26 %20 %35
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-typed-64bit.bindless.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_image_int64 : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 4, binding = 0, r64ui) uniform u64imageBuffer _13[];
layout(set = 3, binding = 0, r64ui) uniform u64image2D _17[];

void main()
{
    uint64_t _25 = uint64_t(gl_GlobalInvocationID.x);
    uint64_t _35 = imageAtomicAdd(_13[registers._m4], int(0u), _25);
    uint64_t _43 = imageAtomicAnd(_13[registers._m4 + 1u], int(1u), _25);
    uint64_t _50 = imageAtomicOr(_13[registers._m4], int(2u), _25);
    uint64_t _58 = imageAtomicXor(_13[registers._m4 + 1u], int(3u), _25);
    uint64_t _64 = imageAtomicMin(_13[registers._m4], int(4u), _25);
    uint64_t _71 = imageAtomicMax(_13[registers._m4], int(5u), _25);
    uint64_t _79 = imageAtomicMin(_13[registers._m4 + 1u], int(6u), _25);
    uint64_t _87 = imageAtomicMax(_13[registers._m4 + 1u], int(7u), _25);
    uint64_t _95 = imageAtomicExchange(_13[registers._m4 + 1u], int(8u), _25);
    uint64_t _102 = imageAtomicCompSwap(_13[registers._m4], int(9u), 10ul, _95);
    uint64_t _111 = imageAtomicCompSwap(_13[registers._m4 + 1u], int(10u), 20ul, _25);
    uint64_t _122 = imageAtomicAdd(_17[registers._m3 + 2u], ivec2(uvec2(1u, 2u)), _25);
    uint64_t _130 = imageAtomicAdd(_17[registers._m3 + 3u], ivec2(uvec2(3u, 4u)), _25);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 133
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability ImageBuffer
OpCapability Int64ImageEXT
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_shader_image_int64"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %20
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 4
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 3
OpDecorate %17 Binding 0
OpDecorate %20 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeInt 64 0
%10 = OpTypeImage %9 Buffer 0 0 0 2 R64ui
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 2D 0 0 0 2 R64ui
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %5 3
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %5
%23 = OpConstant %5 0
%26 = OpTypePointer UniformConstant %10
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 4
%33 = OpTypePointer Image %9
%36 = OpConstant %5 1
%48 = OpConstant %5 2
%56 = OpConstant %5 3
%69 = OpConstant %5 5
%77 = OpConstant %5 6
%85 = OpConstant %5 7
%93 = OpConstant %5 8
%100 = OpConstant %5 9
%103 = OpConstant %9 10
%109 = OpConstant %5 10
%112 = OpConstant %9 20
%113 = OpTypePointer UniformConstant %14
%119 = OpTypeVector %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %131
%131 = OpLabel
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpUConvert %9 %24
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%27 = OpAccessChain %26 %13 %31
%32 = OpLoad %10 %27
%34 = OpImageTexelPointer %33 %27 %23 %23
%35 = OpAtomicIAdd %9 %34 %36 %23 %25
%38 = OpAccessChain %28 %8 %30
%39 = OpLoad %5 %38
%40 = OpIAdd %5 %39 %36
%37 = OpAccessChain %26 %13 %40
%41 = OpLoad %10 %37
%42 = OpImageTexelPointer %33 %37 %36 %23
%43 = OpAtomicAnd %9 %42 %36 %23 %25
%45 = OpAccessChain %28 %8 %30
%46 = OpLoad %5 %45
%44 = OpAccessChain %26 %13 %46
%47 = OpLoad %10 %44
%49 = OpImageTexelPointer %33 %44 %48 %23
%50 = OpAtomicOr %9 %49 %36 %23 %25
%52 = OpAccessChain %28 %8 %30
%53 = OpLoad %5 %52
%54 = OpIAdd %5 %53 %36
%51 = OpAccessChain %26 %13 %54
%55 = OpLoad %10 %51
%57 = OpImageTexelPointer %33 %51 %56 %23
%58 = OpAtomicXor %9 %57 %36 %23 %25
%60 = OpAccessChain %28 %8 %30
%61 = OpLoad %5 %60
%59 = OpAccessChain %26 %13 %61
%62 = OpLoad %10 %59
%63 = OpImageTexelPointer %33 %59 %30 %23
%64 = OpAtomicUMin %9 %63 %36 %23 %25
%66 = OpAccessChain %28 %8 %30
%67 = OpLoad %5 %66
%65 = OpAccessChain %26 %13 %67
%68 = OpLoad %10 %65
%70 = OpImageTexelPointer %33 %65 %69 %23
%71 = OpAtomicUMax %9 %70 %36 %23 %25
%73 = OpAccessChain %28 %8 %30
%74 = OpLoad %5 %73
%75 = OpIAdd %5 %74 %36
%72 = OpAccessChain %26 %13 %75
%76 = OpLoad %10 %72
%78 = OpImageTexelPointer %33 %72 %77 %23
%79 = OpAtomicSMin %9 %78 %36 %23 %25
%81 = OpAccessChain %28 %8 %30
%82 = OpLoad %5 %81
%83 = OpIAdd %5 %82 %36
%80 = OpAccessChain %26 %13 %83
%84 = OpLoad %10 %80
%86 = OpImageTexelPointer %33 %80 %85 %23
%87 = OpAtomicSMax %9 %86 %36 %23 %25
%89 = OpAccessChain %28 %8 %30
%90 = OpLoad %5 %89
%91 = OpIAdd %5 %90 %36
%88 = OpAccessChain %26 %13 %91
%92 = OpLoad %10 %88
%94 = OpImageTexelPointer %33 %88 %93 %23
%95 = OpAtomicExchange %9 %94 %36 %23 %25
%97 = OpAccessChain %28 %8 %30
%98 = OpLoad %5 %97
%96 = OpAccessChain %26 %13 %98
%99 = OpLoad %10 %96
%101 = OpImageTexelPointer %33 %96 %100 %23
%102 = OpAtomicCompareExchange %9 %101 %36 %23 %23 %95 %103
%105 = OpAccessChain %28 %8 %30
%106 = OpLoad %5 %105
%107 = OpIAdd %5 %106 %36
%104 = OpAccessChain %26 %13 %107
%108 = OpLoad %10 %104
%110 = OpImageTexelPointer %33 %104 %109 %23
%111 = OpAtomicCompareExchange %9 %110 %36 %23 %23 %25 %112
%115 = OpAccessChain %28 %8 %56
%116 = OpLoad %5 %115
%117 = OpIAdd %5 %116 %48
%114 = OpAccessChain %113 %17 %117
%118 = OpLoad %14 %114
%120 = OpCompositeConstruct %119 %36 %48
%121 = OpImageTexelPointer %33 %114 %120 %23
%122 = OpAtomicIAdd %9 %121 %36 %23 %25
%124 = OpAccessChain %28 %8 %56
%125 = OpLoad %5 %124
%126 = OpIAdd %5 %125 %56
%123 = OpAccessChain %113 %17 %126
%127 = OpLoad %14 %123
%128 = OpCompositeConstruct %119 %56 %30
%129 = OpImageTexelPointer %33 %123 %128 %23
%130 = OpAtomicIAdd %9 %129 %36 %23 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/atomics-typed-64bit.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_shader_image_int64 : require
#extension GL_EXT_shader_atomic_int64 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r64ui) uniform u64imageBuffer _8;
layout(set = 0, binding = 1, r64ui) uniform u64imageBuffer _9;
layout(set = 0, binding = 2, r64ui) uniform u64image2D _12;
layout(set = 0, binding = 3, r64ui) uniform u64image2D _13;

void main()
{
    uint64_t _22 = uint64_t(gl_GlobalInvocationID.x);
    uint64_t _26 = imageAtomicAdd(_8, int(0u), _22);
    uint64_t _30 = imageAtomicAnd(_9, int(1u), _22);
    uint64_t _34 = imageAtomicOr(_8, int(2u), _22);
    uint64_t _38 = imageAtomicXor(_9, int(3u), _22);
    uint64_t _42 = imageAtomicMin(_8, int(4u), _22);
    uint64_t _46 = imageAtomicMax(_8, int(5u), _22);
    uint64_t _50 = imageAtomicMin(_9, int(6u), _22);
    uint64_t _54 = imageAtomicMax(_9, int(7u), _22);
    uint64_t _58 = imageAtomicExchange(_9, int(8u), _22);
    uint64_t _62 = imageAtomicCompSwap(_8, int(9u), 10ul, _58);
    uint64_t _67 = imageAtomicCompSwap(_9, int(10u), 20ul, _22);
    uint64_t _73 = imageAtomicAdd(_12, ivec2(uvec2(1u, 2u)), _22);
    uint64_t _77 = imageAtomicAdd(_13, ivec2(uvec2(3u, 4u)), _22);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 80
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability ImageBuffer
OpCapability Int64ImageEXT
OpExtension "SPV_EXT_shader_image_int64"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 2
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 3
OpDecorate %17 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R64ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 2D 0 0 0 2 R64ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpVariable %11 UniformConstant
%14 = OpTypeInt 32 0
%15 = OpTypeVector %14 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %14
%20 = OpConstant %14 0
%24 = OpTypePointer Image %5
%27 = OpConstant %14 1
%32 = OpConstant %14 2
%36 = OpConstant %14 3
%40 = OpConstant %14 4
%44 = OpConstant %14 5
%48 = OpConstant %14 6
%52 = OpConstant %14 7
%56 = OpConstant %14 8
%60 = OpConstant %14 9
%63 = OpConstant %5 10
%65 = OpConstant %14 10
%68 = OpConstant %5 20
%70 = OpTypeVector %14 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %78
%78 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %14 %19
%22 = OpUConvert %5 %21
%23 = OpLoad %6 %8
%25 = OpImageTexelPointer %24 %8 %20 %20
%26 = OpAtomicIAdd %5 %25 %27 %20 %22
%28 = OpLoad %6 %9
%29 = OpImageTexelPointer %24 %9 %27 %20
%30 = OpAtomicAnd %5 %29 %27 %20 %22
%31 = OpLoad %6 %8
%33 = OpImageTexelPointer %24 %8 %32 %20
%34 = OpAtomicOr %5 %33 %27 %20 %22
%35 = OpLoad %6 %9
%37 = OpImageTexelPointer %24 %9 %36 %20
%38 = OpAtomicXor %5 %37 %27 %20 %22
%39 = OpLoad %6 %8
%41 = OpImageTexelPointer %24 %8 %40 %20
%42 = OpAtomicUMin %5 %41 %27 %20 %22
%43 = OpLoad %6 %8
%45 = OpImageTexelPointer %24 %8 %44 %20
%46 = OpAtomicUMax %5 %45 %27 %20 %22
%47 = OpLoad %6 %9
%49 = OpImageTexelPointer %24 %9 %48 %20
%50 = OpAtomicSMin %5 %49 %27 %20 %22
%51 = OpLoad %6 %9
%53 = OpImageTexelPointer %24 %9 %52 %20
%54 = OpAtomicSMax %5 %53 %27 %20 %22
%55 = OpLoad %6 %9
%57 = OpImageTexelPointer %24 %9 %56 %20
%58 = OpAtomicExchange %5 %57 %27 %20 %22
%59 = OpLoad %6 %8
%61 = OpImageTexelPointer %24 %8 %60 %20
%62 = OpAtomicCompareExchange %5 %61 %27 %20 %20 %58 %63
%64 = OpLoad %6 %9
%66 = OpImageTexelPointer %24 %9 %65 %20
%67 = OpAtomicCompareExchange %5 %66 %27 %20 %20 %22 %68
%69 = OpLoad %10 %12
%71 = OpCompositeConstruct %70 %27 %32
%72 = OpImageTexelPointer %24 %12 %71 %20
%73 = OpAtomicIAdd %5 %72 %27 %20 %22
%74 = OpLoad %10 %13
%75 = OpCompositeConstruct %70 %36 %40
%76 = OpImageTexelPointer %24 %13 %75 %20
%77 = OpAtomicIAdd %5 %76 %27 %20 %22
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/binding-range-selection.bindless.sm66.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _17;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];
layout(set = 3, binding = 0) uniform writeonly image2D _22[];

uint RobustPhysicalAtomicCounter(uvec2 _60, uint _61, uint _62)
{
    uint _76;
    if (any(notEqual(_60, uvec2(0u))))
    {
        uint _73 = atomicAdd(uintPointer(_60).value, _61);
        _76 = _73 + _62;
    }
    else
    {
        _76 = 0u;
    }
    return _76;
}

void main()
{
    if (gl_GlobalInvocationID.x > 80u)
    {
        imageStore(_22[registers._m3 + 6u], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(0.0));
        if (gl_GlobalInvocationID.x > 90u)
        {
            imageStore(_22[registers._m3 + ((gl_GlobalInvocationID.x & 1u) + 7u)], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(0.0));
        }
    }
    uvec2 _58 = _17.counters[registers._m4];
    uint _78 = RobustPhysicalAtomicCounter(_58, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %25
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 "AtomicCounters"
OpMemberName %15 0 "counters"
OpName %63 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %14 ArrayStride 8
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpMemberDecorate %15 0 NonWritable
OpDecorate %17 DescriptorSet 7
OpDecorate %17 Binding 0
OpDecorate %17 AliasedPointer
OpDecorate %22 DescriptorSet 3
OpDecorate %22 Binding 0
OpDecorate %22 NonReadable
OpDecorate %25 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %5 2
%14 = OpTypeRuntimeArray %13
%15 = OpTypeStruct %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypeFloat 32
%19 = OpTypeImage %18 2D 0 0 0 2 Unknown
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeVector %5 3
%24 = OpTypePointer Input %23
%25 = OpVariable %24 Input
%26 = OpTypePointer Input %5
%28 = OpConstant %5 0
%30 = OpTypeBool
%32 = OpConstant %5 80
%33 = OpTypePointer UniformConstant %19
%35 = OpTypePointer PushConstant %5
%37 = OpConstant %5 3
%40 = OpConstant %5 6
%42 = OpConstant %18 0
%44 = OpTypeVector %18 4
%47 = OpConstant %5 90
%48 = OpTypePointer UniformConstant %9
%51 = OpConstant %5 4
%54 = OpTypePointer StorageBuffer %13
%59 = OpTypeFunction %5 %13 %5 %5
%67 = OpTypeVector %30 2
%68 = OpConstantNull %13
%71 = OpTypePointer PhysicalStorageBuffer %5
%74 = OpConstant %5 1
%81 = OpConstant %5 7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %89
%89 = OpLabel
%27 = OpAccessChain %26 %25 %28
%29 = OpLoad %5 %27
%31 = OpUGreaterThan %30 %29 %32
OpSelectionMerge %93 None
OpBranchConditional %31 %90 %93
%90 = OpLabel
%36 = OpAccessChain %35 %8 %37
%38 = OpLoad %5 %36
%39 = OpIAdd %5 %38 %40
%34 = OpAccessChain %33 %22 %39
%41 = OpLoad %19 %34
%43 = OpCompositeConstruct %13 %29 %28
%45 = OpCompositeConstruct %44 %42 %42 %42 %42
OpImageWrite %41 %43 %45
%46 = OpUGreaterThan %30 %29 %47
OpSelectionMerge %92 None
OpBranchConditional %46 %91 %92
%91 = OpLabel
%79 = OpBitwiseAnd %5 %29 %74
%80 = OpIAdd %5 %79 %81
%83 = OpAccessChain %35 %8 %37
%84 = OpLoad %5 %83
%85 = OpIAdd %5 %84 %80
%82 = OpAccessChain %33 %22 %85
%86 = OpLoad %19 %82
%87 = OpCompositeConstruct %13 %28 %29
%88 = OpCompositeConstruct %44 %42 %42 %42 %42
OpImageWrite %86 %87 %88
OpBranch %92
%92 = OpLabel
OpBranch %93
%93 = OpLabel
%50 = OpAccessChain %35 %8 %51
%52 = OpLoad %5 %50
%49 = OpAccessChain %48 %12 %52
%53 = OpLoad %9 %49
%56 = OpAccessChain %35 %8 %51
%57 = OpLoad %5 %56
%55 = OpAccessChain %54 %17 %28 %57
%58 = OpLoad %13 %55
%78 = OpFunctionCall %5 %63 %58 %74 %28
OpReturn
OpFunctionEnd
%63 = OpFunction %5 None %59
%60 = OpFunctionParameter %13
%61 = OpFunctionParameter %5
%62 = OpFunctionParameter %5
%64 = OpLabel
%69 = OpINotEqual %67 %60 %68
%70 = OpAny %30 %69
OpSelectionMerge %66 None
OpBranchConditional %70 %65 %66
%65 = OpLabel
%72 = OpBitcast %71 %60
%73 = OpAtomicIAdd %5 %72 %74 %28 %61
%75 = OpIAdd %5 %73 %62
OpBranch %66
%66 = OpLabel
%76 = OpPhi %5 %28 %64 %75 %65
OpReturnValue %76
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/binding-range-selection.sm66.comp
================================================
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _8;
layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _9;
layout(set = 0, binding = 6) uniform writeonly image2D _13;
layout(set = 0, binding = 7) uniform writeonly image2D _17[2];

void main()
{
    if (gl_GlobalInvocationID.x > 80u)
    {
        imageStore(_13, ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(0.0));
        if (gl_GlobalInvocationID.x > 90u)
        {
            uint _41 = gl_GlobalInvocationID.x & 1u;
            imageStore(_17[_41], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(0.0));
        }
    }
    uint _39 = imageAtomicAdd(_9, int(0u), 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %20
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %8 NonWritable
OpDecorate %9 DescriptorSet 7
OpDecorate %9 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 6
OpDecorate %13 NonReadable
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 7
OpDecorate %17 NonReadable
OpDecorate %20 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeFloat 32
%11 = OpTypeImage %10 2D 0 0 0 2 Unknown
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpConstant %5 2
%15 = OpTypeArray %11 %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeVector %5 3
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %5
%23 = OpConstant %5 0
%25 = OpTypeBool
%27 = OpConstant %5 80
%29 = OpConstant %10 0
%30 = OpTypeVector %5 2
%32 = OpTypeVector %10 4
%35 = OpConstant %5 90
%37 = OpTypePointer Image %5
%40 = OpConstant %5 1
%43 = OpConstant %5 7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %48
%48 = OpLabel
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%26 = OpUGreaterThan %25 %24 %27
OpSelectionMerge %52 None
OpBranchConditional %26 %49 %52
%49 = OpLabel
%28 = OpLoad %11 %13
%31 = OpCompositeConstruct %30 %24 %23
%33 = OpCompositeConstruct %32 %29 %29 %29 %29
OpImageWrite %28 %31 %33
%34 = OpUGreaterThan %25 %24 %35
OpSelectionMerge %51 None
OpBranchConditional %34 %50 %51
%50 = OpLabel
%41 = OpBitwiseAnd %5 %24 %40
%42 = OpIAdd %5 %41 %43
%44 = OpAccessChain %12 %17 %41
%45 = OpLoad %11 %44
%46 = OpCompositeConstruct %30 %23 %24
%47 = OpCompositeConstruct %32 %29 %29 %29 %29
OpImageWrite %45 %46 %47
OpBranch %51
%51 = OpLabel
OpBranch %52
%52 = OpLabel
%36 = OpLoad %6 %8
%38 = OpImageTexelPointer %37 %9 %23 %23
%39 = OpAtomicIAdd %5 %38 %40 %23 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/buffer-64bit-double.ssbo.sm66.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) buffer SSBO
{
    dvec3 _m0[];
} _11[];

layout(set = 0, binding = 0, scalar) restrict readonly buffer _13_16
{
    dvec3 _m0[];
} _16[];

layout(set = 0, binding = 0, std430) buffer _18_21
{
    double _m0[];
} _21[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _23_26
{
    double _m0[];
} _26[];

void main()
{
    uint _47 = gl_GlobalInvocationID.x * 2u;
    uint _56 = (gl_GlobalInvocationID.x * 2u) + 1u;
    uint _65 = gl_GlobalInvocationID.x << 2u;
    _11[0u]._m0[gl_GlobalInvocationID.x * 4u] = dvec3(_16[1u]._m0[_56].x + _16[1u]._m0[_47].x, _16[1u]._m0[_56].y + _16[1u]._m0[_47].y, _16[1u]._m0[_56].z + _16[1u]._m0[_47].z);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 4u) + 1u] = dvec3(_16[1u]._m0[_47].x - _16[1u]._m0[_56].x, _16[1u]._m0[_47].y - _16[1u]._m0[_56].y, _16[1u]._m0[_47].z - _16[1u]._m0[_56].z);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 4u) + 2u] = dvec3(_16[1u]._m0[_56].x * _16[1u]._m0[_47].x, _16[1u]._m0[_56].y * _16[1u]._m0[_47].y, _16[1u]._m0[_56].z * _16[1u]._m0[_47].z);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 4u) + 3u] = dvec3(_16[1u]._m0[_47].x / _16[1u]._m0[_56].x, _16[1u]._m0[_47].y / _16[1u]._m0[_56].y, _16[1u]._m0[_47].z / _16[1u]._m0[_56].z);
    uint _96 = gl_GlobalInvocationID.x * 3u;
    dvec2 _104 = dvec2(_26[3u]._m0[_96], _26[3u]._m0[_96 + 1u]);
    _21[2u]._m0[gl_GlobalInvocationID.x] = _104.x;
    _21[2u]._m0[gl_GlobalInvocationID.x + 1u] = _104.y;
    _21[2u]._m0[gl_GlobalInvocationID.x + 2u] = _16[1u]._m0[_47].z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 115
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %30
OpExecutionMode %3 LocalSize 128 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "SSBO"
OpName %18 "SSBO"
OpName %23 "SSBO"
OpDecorate %7 ArrayStride 24
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %12 ArrayStride 24
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %16 NonWritable
OpDecorate %16 Restrict
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %22 ArrayStride 8
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %26 NonWritable
OpDecorate %26 Restrict
OpDecorate %30 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 64
%6 = OpTypeVector %5 3
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %6
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeRuntimeArray %5
%18 = OpTypeStruct %17
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %5
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeInt 32 0
%28 = OpTypeVector %27 3
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%31 = OpTypePointer Input %27
%33 = OpConstant %27 0
%35 = OpTypePointer StorageBuffer %8
%37 = OpTypePointer StorageBuffer %13
%39 = OpConstant %27 1
%40 = OpTypePointer StorageBuffer %18
%42 = OpConstant %27 2
%43 = OpTypePointer StorageBuffer %23
%45 = OpConstant %27 3
%48 = OpTypePointer StorageBuffer %6
%67 = OpConstant %27 4
%95 = OpConstant %27 24
%97 = OpTypePointer StorageBuffer %5
%103 = OpTypeVector %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %113
%113 = OpLabel
%32 = OpAccessChain %31 %30 %33
%34 = OpLoad %27 %32
%36 = OpAccessChain %35 %11 %33
%38 = OpAccessChain %37 %16 %39
%41 = OpAccessChain %40 %21 %42
%44 = OpAccessChain %43 %26 %45
%46 = OpShiftLeftLogical %27 %34 %39
%47 = OpIMul %27 %34 %42
%49 = OpAccessChain %48 %38 %33 %47
%50 = OpLoad %6 %49
%51 = OpCompositeExtract %5 %50 0
%52 = OpCompositeExtract %5 %50 1
%53 = OpCompositeExtract %5 %50 2
%54 = OpBitwiseOr %27 %46 %39
%55 = OpIMul %27 %34 %42
%56 = OpIAdd %27 %55 %39
%57 = OpAccessChain %48 %38 %33 %56
%58 = OpLoad %6 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpCompositeExtract %5 %58 1
%61 = OpCompositeExtract %5 %58 2
%62 = OpFAdd %5 %59 %51
%63 = OpFAdd %5 %60 %52
%64 = OpFAdd %5 %61 %53
%65 = OpShiftLeftLogical %27 %34 %42
%66 = OpIMul %27 %34 %67
%68 = OpCompositeConstruct %6 %62 %63 %64
%69 = OpAccessChain %48 %36 %33 %66
OpStore %69 %68
%70 = OpFSub %5 %51 %59
%71 = OpFSub %5 %52 %60
%72 = OpFSub %5 %53 %61
%73 = OpBitwiseOr %27 %65 %39
%74 = OpIMul %27 %34 %67
%75 = OpIAdd %27 %74 %39
%76 = OpCompositeConstruct %6 %70 %71 %72
%77 = OpAccessChain %48 %36 %33 %75
OpStore %77 %76
%78 = OpFMul %5 %59 %51
%79 = OpFMul %5 %60 %52
%80 = OpFMul %5 %61 %53
%81 = OpBitwiseOr %27 %65 %42
%82 = OpIMul %27 %34 %67
%83 = OpIAdd %27 %82 %42
%84 = OpCompositeConstruct %6 %78 %79 %80
%85 = OpAccessChain %48 %36 %33 %83
OpStore %85 %84
%86 = OpFDiv %5 %51 %59
%87 = OpFDiv %5 %52 %60
%88 = OpFDiv %5 %53 %61
%89 = OpBitwiseOr %27 %65 %45
%90 = OpIMul %27 %34 %67
%91 = OpIAdd %27 %90 %45
%92 = OpCompositeConstruct %6 %86 %87 %88
%93 = OpAccessChain %48 %36 %33 %91
OpStore %93 %92
%94 = OpIMul %27 %34 %95
%96 = OpIMul %27 %34 %45
%98 = OpAccessChain %97 %44 %33 %96
%99 = OpLoad %5 %98
%101 = OpIAdd %27 %96 %39
%100 = OpAccessChain %97 %44 %33 %101
%102 = OpLoad %5 %100
%104 = OpCompositeConstruct %103 %99 %102
%105 = OpCompositeExtract %5 %104 0
%106 = OpCompositeExtract %5 %104 1
%107 = OpShiftLeftLogical %27 %34 %45
%108 = OpAccessChain %97 %41 %33 %34
OpStore %108 %105
%110 = OpIAdd %27 %34 %39
%109 = OpAccessChain %97 %41 %33 %110
OpStore %109 %106
%112 = OpIAdd %27 %34 %42
%111 = OpAccessChain %97 %41 %33 %112
OpStore %111 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/buffer-64bit.ssbo.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) buffer SSBO
{
    u64vec3 _m0[];
} _11[];

layout(set = 0, binding = 0, scalar) restrict readonly buffer _13_16
{
    u64vec3 _m0[];
} _16[];

layout(set = 0, binding = 0, std430) buffer _18_21
{
    uint64_t _m0[];
} _21[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _23_26
{
    uint64_t _m0[];
} _26[];

void main()
{
    uint _47 = gl_GlobalInvocationID.x * 2u;
    uint _56 = (gl_GlobalInvocationID.x * 2u) + 1u;
    uint _65 = gl_GlobalInvocationID.x << 3u;
    _11[0u]._m0[gl_GlobalInvocationID.x * 8u] = u64vec3(_16[1u]._m0[_56].x + _16[1u]._m0[_47].x, _16[1u]._m0[_56].y + _16[1u]._m0[_47].y, _16[1u]._m0[_56].z + _16[1u]._m0[_47].z);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 1u] = u64vec3(_16[1u]._m0[_47].x - _16[1u]._m0[_56].x, _16[1u]._m0[_47].y - _16[1u]._m0[_56].y, _16[1u]._m0[_47].z - _16[1u]._m0[_56].z);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 2u] = u64vec3(_16[1u]._m0[_56].x * _16[1u]._m0[_47].x, _16[1u]._m0[_56].y * _16[1u]._m0[_47].y, _16[1u]._m0[_56].z * _16[1u]._m0[_47].z);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 3u] = u64vec3(_16[1u]._m0[_47].x / _16[1u]._m0[_56].x, _16[1u]._m0[_47].y / _16[1u]._m0[_56].y, _16[1u]._m0[_47].z / _16[1u]._m0[_56].z);
    uint64_t _94 = _16[1u]._m0[_56].x & 63ul;
    uint64_t _96 = _16[1u]._m0[_56].y & 63ul;
    uint64_t _97 = _16[1u]._m0[_56].z & 63ul;
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 4u] = u64vec3(_16[1u]._m0[_47].x << _94, _16[1u]._m0[_47].y << _96, _16[1u]._m0[_47].z << _97);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 5u] = u64vec3(_16[1u]._m0[_47].x >> _94, _16[1u]._m0[_47].y >> _96, _16[1u]._m0[_47].z >> _97);
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 6u] = u64vec3(uint64_t(int64_t(_16[1u]._m0[_47].x) >> int64_t(_94)), uint64_t(int64_t(_16[1u]._m0[_47].y) >> int64_t(_96)), uint64_t(int64_t(_16[1u]._m0[_47].z) >> int64_t(_97)));
    _11[0u]._m0[(gl_GlobalInvocationID.x * 8u) + 7u] = u64vec3(_16[1u]._m0[_56].x & _16[1u]._m0[_47].x, _16[1u]._m0[_56].y & _16[1u]._m0[_47].y, _16[1u]._m0[_56].z & _16[1u]._m0[_47].z);
    uint _136 = gl_GlobalInvocationID.x * 3u;
    u64vec2 _144 = u64vec2(_26[3u]._m0[_136], _26[3u]._m0[_136 + 1u]);
    _21[2u]._m0[gl_GlobalInvocationID.x] = _144.x;
    _21[2u]._m0[gl_GlobalInvocationID.x + 1u] = _144.y;
    _21[2u]._m0[gl_GlobalInvocationID.x + 2u] = _16[1u]._m0[_47].z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 154
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %30
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "SSBO"
OpName %18 "SSBO"
OpName %23 "SSBO"
OpDecorate %7 ArrayStride 24
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %12 ArrayStride 24
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %16 NonWritable
OpDecorate %16 Restrict
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %22 ArrayStride 8
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %26 NonWritable
OpDecorate %26 Restrict
OpDecorate %30 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeVector %5 3
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %6
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeRuntimeArray %5
%18 = OpTypeStruct %17
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %5
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeInt 32 0
%28 = OpTypeVector %27 3
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%31 = OpTypePointer Input %27
%33 = OpConstant %27 0
%35 = OpTypePointer StorageBuffer %8
%37 = OpTypePointer StorageBuffer %13
%39 = OpConstant %27 1
%40 = OpTypePointer StorageBuffer %18
%42 = OpConstant %27 2
%43 = OpTypePointer StorageBuffer %23
%45 = OpConstant %27 3
%48 = OpTypePointer StorageBuffer %6
%67 = OpConstant %27 8
%95 = OpConstant %5 63
%102 = OpConstant %27 4
%111 = OpConstant %27 5
%120 = OpConstant %27 6
%129 = OpConstant %27 7
%135 = OpConstant %27 24
%137 = OpTypePointer StorageBuffer %5
%143 = OpTypeVector %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %152
%152 = OpLabel
%32 = OpAccessChain %31 %30 %33
%34 = OpLoad %27 %32
%36 = OpAccessChain %35 %11 %33
%38 = OpAccessChain %37 %16 %39
%41 = OpAccessChain %40 %21 %42
%44 = OpAccessChain %43 %26 %45
%46 = OpShiftLeftLogical %27 %34 %39
%47 = OpIMul %27 %34 %42
%49 = OpAccessChain %48 %38 %33 %47
%50 = OpLoad %6 %49
%51 = OpCompositeExtract %5 %50 0
%52 = OpCompositeExtract %5 %50 1
%53 = OpCompositeExtract %5 %50 2
%54 = OpBitwiseOr %27 %46 %39
%55 = OpIMul %27 %34 %42
%56 = OpIAdd %27 %55 %39
%57 = OpAccessChain %48 %38 %33 %56
%58 = OpLoad %6 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpCompositeExtract %5 %58 1
%61 = OpCompositeExtract %5 %58 2
%62 = OpIAdd %5 %59 %51
%63 = OpIAdd %5 %60 %52
%64 = OpIAdd %5 %61 %53
%65 = OpShiftLeftLogical %27 %34 %45
%66 = OpIMul %27 %34 %67
%68 = OpCompositeConstruct %6 %62 %63 %64
%69 = OpAccessChain %48 %36 %33 %66
OpStore %69 %68
%70 = OpISub %5 %51 %59
%71 = OpISub %5 %52 %60
%72 = OpISub %5 %53 %61
%73 = OpBitwiseOr %27 %65 %39
%74 = OpIMul %27 %34 %67
%75 = OpIAdd %27 %74 %39
%76 = OpCompositeConstruct %6 %70 %71 %72
%77 = OpAccessChain %48 %36 %33 %75
OpStore %77 %76
%78 = OpIMul %5 %59 %51
%79 = OpIMul %5 %60 %52
%80 = OpIMul %5 %61 %53
%81 = OpBitwiseOr %27 %65 %42
%82 = OpIMul %27 %34 %67
%83 = OpIAdd %27 %82 %42
%84 = OpCompositeConstruct %6 %78 %79 %80
%85 = OpAccessChain %48 %36 %33 %83
OpStore %85 %84
%86 = OpUDiv %5 %51 %59
%87 = OpUDiv %5 %52 %60
%88 = OpUDiv %5 %53 %61
%89 = OpBitwiseOr %27 %65 %45
%90 = OpIMul %27 %34 %67
%91 = OpIAdd %27 %90 %45
%92 = OpCompositeConstruct %6 %86 %87 %88
%93 = OpAccessChain %48 %36 %33 %91
OpStore %93 %92
%94 = OpBitwiseAnd %5 %59 %95
%96 = OpBitwiseAnd %5 %60 %95
%97 = OpBitwiseAnd %5 %61 %95
%98 = OpShiftLeftLogical %5 %51 %94
%99 = OpShiftLeftLogical %5 %52 %96
%100 = OpShiftLeftLogical %5 %53 %97
%101 = OpBitwiseOr %27 %65 %102
%103 = OpIMul %27 %34 %67
%104 = OpIAdd %27 %103 %102
%105 = OpCompositeConstruct %6 %98 %99 %100
%106 = OpAccessChain %48 %36 %33 %104
OpStore %106 %105
%107 = OpShiftRightLogical %5 %51 %94
%108 = OpShiftRightLogical %5 %52 %96
%109 = OpShiftRightLogical %5 %53 %97
%110 = OpBitwiseOr %27 %65 %111
%112 = OpIMul %27 %34 %67
%113 = OpIAdd %27 %112 %111
%114 = OpCompositeConstruct %6 %107 %108 %109
%115 = OpAccessChain %48 %36 %33 %113
OpStore %115 %114
%116 = OpShiftRightArithmetic %5 %51 %94
%117 = OpShiftRightArithmetic %5 %52 %96
%118 = OpShiftRightArithmetic %5 %53 %97
%119 = OpBitwiseOr %27 %65 %120
%121 = OpIMul %27 %34 %67
%122 = OpIAdd %27 %121 %120
%123 = OpCompositeConstruct %6 %116 %117 %118
%124 = OpAccessChain %48 %36 %33 %122
OpStore %124 %123
%125 = OpBitwiseAnd %5 %59 %51
%126 = OpBitwiseAnd %5 %60 %52
%127 = OpBitwiseAnd %5 %61 %53
%128 = OpBitwiseOr %27 %65 %129
%130 = OpIMul %27 %34 %67
%131 = OpIAdd %27 %130 %129
%132 = OpCompositeConstruct %6 %125 %126 %127
%133 = OpAccessChain %48 %36 %33 %131
OpStore %133 %132
%134 = OpIMul %27 %34 %135
%136 = OpIMul %27 %34 %45
%138 = OpAccessChain %137 %44 %33 %136
%139 = OpLoad %5 %138
%141 = OpIAdd %27 %136 %39
%140 = OpAccessChain %137 %44 %33 %141
%142 = OpLoad %5 %140
%144 = OpCompositeConstruct %143 %139 %142
%145 = OpCompositeExtract %5 %144 0
%146 = OpCompositeExtract %5 %144 1
%147 = OpAccessChain %137 %41 %33 %34
OpStore %147 %145
%149 = OpIAdd %27 %34 %39
%148 = OpAccessChain %137 %41 %33 %149
OpStore %148 %146
%151 = OpIAdd %27 %34 %42
%150 = OpAccessChain %137 %41 %33 %151
OpStore %150 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/buffer-64bit.ssbo.ssbo-align.sm66.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint64_t _m0[];
} _16[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _18_21
{
    uint64_t _m0[];
} _21[];

layout(set = 0, binding = 0, std430) buffer _23_26
{
    uint64_t _m0[];
} _26[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _28_31
{
    uint64_t _m0[];
} _31[];

void main()
{
    uvec2 _46 = _10._m0[subgroupBroadcastFirst(0u)] >> uvec2(3u);
    uvec2 _54 = _10._m0[subgroupBroadcastFirst(1u)] >> uvec2(3u);
    uvec2 _61 = _10._m0[subgroupBroadcastFirst(2u)] >> uvec2(3u);
    uvec2 _67 = _10._m0[subgroupBroadcastFirst(3u)] >> uvec2(3u);
    uint _69 = gl_GlobalInvocationID.x * 6u;
    uint _76 = (_69 < _54.y) ? (_69 + _54.x) : 536870908u;
    u64vec3 _88 = u64vec3(_21[1u]._m0[_76], _21[1u]._m0[_76 + 1u], _21[1u]._m0[_76 + 2u]);
    uint64_t _89 = _88.x;
    uint64_t _90 = _88.y;
    uint64_t _91 = _88.z;
    uint _94 = (gl_GlobalInvocationID.x * 6u) + 3u;
    uint _99 = (_94 < _54.y) ? (_94 + _54.x) : 536870908u;
    u64vec3 _108 = u64vec3(_21[1u]._m0[_99], _21[1u]._m0[_99 + 1u], _21[1u]._m0[_99 + 2u]);
    uint64_t _109 = _108.x;
    uint64_t _110 = _108.y;
    uint64_t _111 = _108.z;
    uint _115 = gl_GlobalInvocationID.x << 3u;
    uint _116 = gl_GlobalInvocationID.x * 24u;
    uint _122 = (_116 < _46.y) ? (_116 + _46.x) : 536870908u;
    _16[0u]._m0[_122] = _109 + _89;
    _16[0u]._m0[_122 + 1u] = _110 + _90;
    _16[0u]._m0[_122 + 2u] = _111 + _91;
    uint _133 = (gl_GlobalInvocationID.x * 24u) + 3u;
    uint _138 = (_133 < _46.y) ? (_133 + _46.x) : 536870908u;
    _16[0u]._m0[_138] = _89 - _109;
    _16[0u]._m0[_138 + 1u] = _90 - _110;
    _16[0u]._m0[_138 + 2u] = _91 - _111;
    uint _149 = (gl_GlobalInvocationID.x * 24u) + 6u;
    uint _154 = (_149 < _46.y) ? (_149 + _46.x) : 536870908u;
    _16[0u]._m0[_154] = _109 * _89;
    _16[0u]._m0[_154 + 1u] = _110 * _90;
    _16[0u]._m0[_154 + 2u] = _111 * _91;
    uint _166 = (gl_GlobalInvocationID.x * 24u) + 9u;
    uint _171 = (_166 < _46.y) ? (_166 + _46.x) : 536870908u;
    _16[0u]._m0[_171] = _89 / _109;
    _16[0u]._m0[_171 + 1u] = _90 / _110;
    _16[0u]._m0[_171 + 2u] = _91 / _111;
    uint64_t _177 = _109 & 63ul;
    uint64_t _179 = _110 & 63ul;
    uint64_t _180 = _111 & 63ul;
    uint _188 = (gl_GlobalInvocationID.x * 24u) + 12u;
    uint _193 = (_188 < _46.y) ? (_188 + _46.x) : 536870908u;
    _16[0u]._m0[_193] = _89 << _177;
    _16[0u]._m0[_193 + 1u] = _90 << _179;
    _16[0u]._m0[_193 + 2u] = _91 << _180;
    uint _206 = (gl_GlobalInvocationID.x * 24u) + 15u;
    uint _211 = (_206 < _46.y) ? (_206 + _46.x) : 536870908u;
    _16[0u]._m0[_211] = _89 >> _177;
    _16[0u]._m0[_211 + 1u] = _90 >> _179;
    _16[0u]._m0[_211 + 2u] = _91 >> _180;
    uint _223 = (gl_GlobalInvocationID.x * 24u) + 18u;
    uint _228 = (_223 < _46.y) ? (_223 + _46.x) : 536870908u;
    _16[0u]._m0[_228] = uint64_t(int64_t(_89) >> int64_t(_177));
    _16[0u]._m0[_228 + 1u] = uint64_t(int64_t(_90) >> int64_t(_179));
    _16[0u]._m0[_228 + 2u] = uint64_t(int64_t(_91) >> int64_t(_180));
    uint _241 = (gl_GlobalInvocationID.x * 24u) + 21u;
    uint _246 = (_241 < _46.y) ? (_241 + _46.x) : 536870908u;
    _16[0u]._m0[_246] = _109 & _89;
    _16[0u]._m0[_246 + 1u] = _110 & _90;
    _16[0u]._m0[_246 + 2u] = _111 & _91;
    uint _253 = gl_GlobalInvocationID.x * 3u;
    uint _258 = (_253 < _67.y) ? (_253 + _67.x) : 536870908u;
    u64vec2 _265 = u64vec2(_31[3u]._m0[_258], _31[3u]._m0[_258 + 1u]);
    uint _272 = (gl_GlobalInvocationID.x < _61.y) ? (gl_GlobalInvocationID.x + _61.x) : 536870908u;
    _26[2u]._m0[_272] = _265.x;
    _26[2u]._m0[_272 + 1u] = _265.y;
    _26[2u]._m0[_272 + 2u] = _91;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 280
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %34
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO_Offsets"
OpName %13 "SSBO"
OpName %18 "SSBO"
OpName %23 "SSBO"
OpName %28 "SSBO"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 15
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %12 ArrayStride 8
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %22 ArrayStride 8
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %27 ArrayStride 8
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 0
OpDecorate %31 NonWritable
OpDecorate %31 Restrict
OpDecorate %34 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeInt 64 0
%12 = OpTypeRuntimeArray %11
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeRuntimeArray %11
%18 = OpTypeStruct %17
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeRuntimeArray %11
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeRuntimeArray %11
%28 = OpTypeStruct %27
%29 = OpTypeRuntimeArray %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeVector %5 3
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%35 = OpTypePointer Input %5
%37 = OpConstant %5 0
%39 = OpTypePointer StorageBuffer %13
%42 = OpConstant %5 3
%43 = OpTypePointer StorageBuffer %6
%47 = OpConstantComposite %6 %42 %42
%48 = OpTypePointer StorageBuffer %18
%50 = OpConstant %5 1
%55 = OpTypePointer StorageBuffer %23
%57 = OpConstant %5 2
%62 = OpTypePointer StorageBuffer %28
%70 = OpConstant %5 6
%74 = OpTypeBool
%77 = OpConstant %5 536870908
%78 = OpTypePointer StorageBuffer %11
%87 = OpTypeVector %11 3
%117 = OpConstant %5 24
%165 = OpConstant %5 9
%178 = OpConstant %11 63
%185 = OpConstant %5 4
%187 = OpConstant %5 12
%203 = OpConstant %5 5
%205 = OpConstant %5 15
%222 = OpConstant %5 18
%238 = OpConstant %5 7
%240 = OpConstant %5 21
%264 = OpTypeVector %11 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %278
%278 = OpLabel
%36 = OpAccessChain %35 %34 %37
%38 = OpLoad %5 %36
%40 = OpAccessChain %39 %16 %37
%41 = OpGroupNonUniformBroadcastFirst %5 %42 %37
%44 = OpAccessChain %43 %10 %37 %41
%45 = OpLoad %6 %44
%46 = OpShiftRightLogical %6 %45 %47
%49 = OpAccessChain %48 %21 %50
%51 = OpGroupNonUniformBroadcastFirst %5 %42 %50
%52 = OpAccessChain %43 %10 %37 %51
%53 = OpLoad %6 %52
%54 = OpShiftRightLogical %6 %53 %47
%56 = OpAccessChain %55 %26 %57
%58 = OpGroupNonUniformBroadcastFirst %5 %42 %57
%59 = OpAccessChain %43 %10 %37 %58
%60 = OpLoad %6 %59
%61 = OpShiftRightLogical %6 %60 %47
%63 = OpAccessChain %62 %31 %42
%64 = OpGroupNonUniformBroadcastFirst %5 %42 %42
%65 = OpAccessChain %43 %10 %37 %64
%66 = OpLoad %6 %65
%67 = OpShiftRightLogical %6 %66 %47
%68 = OpShiftLeftLogical %5 %38 %50
%69 = OpIMul %5 %38 %70
%71 = OpCompositeExtract %5 %54 0
%72 = OpCompositeExtract %5 %54 1
%73 = OpIAdd %5 %69 %71
%75 = OpULessThan %74 %69 %72
%76 = OpSelect %5 %75 %73 %77
%79 = OpAccessChain %78 %49 %37 %76
%80 = OpLoad %11 %79
%82 = OpIAdd %5 %76 %50
%81 = OpAccessChain %78 %49 %37 %82
%83 = OpLoad %11 %81
%85 = OpIAdd %5 %76 %57
%84 = OpAccessChain %78 %49 %37 %85
%86 = OpLoad %11 %84
%88 = OpCompositeConstruct %87 %80 %83 %86
%89 = OpCompositeExtract %11 %88 0
%90 = OpCompositeExtract %11 %88 1
%91 = OpCompositeExtract %11 %88 2
%92 = OpBitwiseOr %5 %68 %50
%93 = OpIMul %5 %38 %70
%94 = OpIAdd %5 %93 %42
%95 = OpCompositeExtract %5 %54 0
%96 = OpCompositeExtract %5 %54 1
%97 = OpIAdd %5 %94 %95
%98 = OpULessThan %74 %94 %96
%99 = OpSelect %5 %98 %97 %77
%100 = OpAccessChain %78 %49 %37 %99
%101 = OpLoad %11 %100
%103 = OpIAdd %5 %99 %50
%102 = OpAccessChain %78 %49 %37 %103
%104 = OpLoad %11 %102
%106 = OpIAdd %5 %99 %57
%105 = OpAccessChain %78 %49 %37 %106
%107 = OpLoad %11 %105
%108 = OpCompositeConstruct %87 %101 %104 %107
%109 = OpCompositeExtract %11 %108 0
%110 = OpCompositeExtract %11 %108 1
%111 = OpCompositeExtract %11 %108 2
%112 = OpIAdd %11 %109 %89
%113 = OpIAdd %11 %110 %90
%114 = OpIAdd %11 %111 %91
%115 = OpShiftLeftLogical %5 %38 %42
%116 = OpIMul %5 %38 %117
%118 = OpCompositeExtract %5 %46 0
%119 = OpCompositeExtract %5 %46 1
%120 = OpIAdd %5 %116 %118
%121 = OpULessThan %74 %116 %119
%122 = OpSelect %5 %121 %120 %77
%123 = OpAccessChain %78 %40 %37 %122
OpStore %123 %112
%125 = OpIAdd %5 %122 %50
%124 = OpAccessChain %78 %40 %37 %125
OpStore %124 %113
%127 = OpIAdd %5 %122 %57
%126 = OpAccessChain %78 %40 %37 %127
OpStore %126 %114
%128 = OpISub %11 %89 %109
%129 = OpISub %11 %90 %110
%130 = OpISub %11 %91 %111
%131 = OpBitwiseOr %5 %115 %50
%132 = OpIMul %5 %38 %117
%133 = OpIAdd %5 %132 %42
%134 = OpCompositeExtract %5 %46 0
%135 = OpCompositeExtract %5 %46 1
%136 = OpIAdd %5 %133 %134
%137 = OpULessThan %74 %133 %135
%138 = OpSelect %5 %137 %136 %77
%139 = OpAccessChain %78 %40 %37 %138
OpStore %139 %128
%141 = OpIAdd %5 %138 %50
%140 = OpAccessChain %78 %40 %37 %141
OpStore %140 %129
%143 = OpIAdd %5 %138 %57
%142 = OpAccessChain %78 %40 %37 %143
OpStore %142 %130
%144 = OpIMul %11 %109 %89
%145 = OpIMul %11 %110 %90
%146 = OpIMul %11 %111 %91
%147 = OpBitwiseOr %5 %115 %57
%148 = OpIMul %5 %38 %117
%149 = OpIAdd %5 %148 %70
%150 = OpCompositeExtract %5 %46 0
%151 = OpCompositeExtract %5 %46 1
%152 = OpIAdd %5 %149 %150
%153 = OpULessThan %74 %149 %151
%154 = OpSelect %5 %153 %152 %77
%155 = OpAccessChain %78 %40 %37 %154
OpStore %155 %144
%157 = OpIAdd %5 %154 %50
%156 = OpAccessChain %78 %40 %37 %157
OpStore %156 %145
%159 = OpIAdd %5 %154 %57
%158 = OpAccessChain %78 %40 %37 %159
OpStore %158 %146
%160 = OpUDiv %11 %89 %109
%161 = OpUDiv %11 %90 %110
%162 = OpUDiv %11 %91 %111
%163 = OpBitwiseOr %5 %115 %42
%164 = OpIMul %5 %38 %117
%166 = OpIAdd %5 %164 %165
%167 = OpCompositeExtract %5 %46 0
%168 = OpCompositeExtract %5 %46 1
%169 = OpIAdd %5 %166 %167
%170 = OpULessThan %74 %166 %168
%171 = OpSelect %5 %170 %169 %77
%172 = OpAccessChain %78 %40 %37 %171
OpStore %172 %160
%174 = OpIAdd %5 %171 %50
%173 = OpAccessChain %78 %40 %37 %174
OpStore %173 %161
%176 = OpIAdd %5 %171 %57
%175 = OpAccessChain %78 %40 %37 %176
OpStore %175 %162
%177 = OpBitwiseAnd %11 %109 %178
%179 = OpBitwiseAnd %11 %110 %178
%180 = OpBitwiseAnd %11 %111 %178
%181 = OpShiftLeftLogical %11 %89 %177
%182 = OpShiftLeftLogical %11 %90 %179
%183 = OpShiftLeftLogical %11 %91 %180
%184 = OpBitwiseOr %5 %115 %185
%186 = OpIMul %5 %38 %117
%188 = OpIAdd %5 %186 %187
%189 = OpCompositeExtract %5 %46 0
%190 = OpCompositeExtract %5 %46 1
%191 = OpIAdd %5 %188 %189
%192 = OpULessThan %74 %188 %190
%193 = OpSelect %5 %192 %191 %77
%194 = OpAccessChain %78 %40 %37 %193
OpStore %194 %181
%196 = OpIAdd %5 %193 %50
%195 = OpAccessChain %78 %40 %37 %196
OpStore %195 %182
%198 = OpIAdd %5 %193 %57
%197 = OpAccessChain %78 %40 %37 %198
OpStore %197 %183
%199 = OpShiftRightLogical %11 %89 %177
%200 = OpShiftRightLogical %11 %90 %179
%201 = OpShiftRightLogical %11 %91 %180
%202 = OpBitwiseOr %5 %115 %203
%204 = OpIMul %5 %38 %117
%206 = OpIAdd %5 %204 %205
%207 = OpCompositeExtract %5 %46 0
%208 = OpCompositeExtract %5 %46 1
%209 = OpIAdd %5 %206 %207
%210 = OpULessThan %74 %206 %208
%211 = OpSelect %5 %210 %209 %77
%212 = OpAccessChain %78 %40 %37 %211
OpStore %212 %199
%214 = OpIAdd %5 %211 %50
%213 = OpAccessChain %78 %40 %37 %214
OpStore %213 %200
%216 = OpIAdd %5 %211 %57
%215 = OpAccessChain %78 %40 %37 %216
OpStore %215 %201
%217 = OpShiftRightArithmetic %11 %89 %177
%218 = OpShiftRightArithmetic %11 %90 %179
%219 = OpShiftRightArithmetic %11 %91 %180
%220 = OpBitwiseOr %5 %115 %70
%221 = OpIMul %5 %38 %117
%223 = OpIAdd %5 %221 %222
%224 = OpCompositeExtract %5 %46 0
%225 = OpCompositeExtract %5 %46 1
%226 = OpIAdd %5 %223 %224
%227 = OpULessThan %74 %223 %225
%228 = OpSelect %5 %227 %226 %77
%229 = OpAccessChain %78 %40 %37 %228
OpStore %229 %217
%231 = OpIAdd %5 %228 %50
%230 = OpAccessChain %78 %40 %37 %231
OpStore %230 %218
%233 = OpIAdd %5 %228 %57
%232 = OpAccessChain %78 %40 %37 %233
OpStore %232 %219
%234 = OpBitwiseAnd %11 %109 %89
%235 = OpBitwiseAnd %11 %110 %90
%236 = OpBitwiseAnd %11 %111 %91
%237 = OpBitwiseOr %5 %115 %238
%239 = OpIMul %5 %38 %117
%241 = OpIAdd %5 %239 %240
%242 = OpCompositeExtract %5 %46 0
%243 = OpCompositeExtract %5 %46 1
%244 = OpIAdd %5 %241 %242
%245 = OpULessThan %74 %241 %243
%246 = OpSelect %5 %245 %244 %77
%247 = OpAccessChain %78 %40 %37 %246
OpStore %247 %234
%249 = OpIAdd %5 %246 %50
%248 = OpAccessChain %78 %40 %37 %249
OpStore %248 %235
%251 = OpIAdd %5 %246 %57
%250 = OpAccessChain %78 %40 %37 %251
OpStore %250 %236
%252 = OpIMul %5 %38 %117
%253 = OpIMul %5 %38 %42
%254 = OpCompositeExtract %5 %67 0
%255 = OpCompositeExtract %5 %67 1
%256 = OpIAdd %5 %253 %254
%257 = OpULessThan %74 %253 %255
%258 = OpSelect %5 %257 %256 %77
%259 = OpAccessChain %78 %63 %37 %258
%260 = OpLoad %11 %259
%262 = OpIAdd %5 %258 %50
%261 = OpAccessChain %78 %63 %37 %262
%263 = OpLoad %11 %261
%265 = OpCompositeConstruct %264 %260 %263
%266 = OpCompositeExtract %11 %265 0
%267 = OpCompositeExtract %11 %265 1
%268 = OpCompositeExtract %5 %61 0
%269 = OpCompositeExtract %5 %61 1
%270 = OpIAdd %5 %38 %268
%271 = OpULessThan %74 %38 %269
%272 = OpSelect %5 %271 %270 %77
%273 = OpAccessChain %78 %56 %37 %272
OpStore %273 %266
%275 = OpIAdd %5 %272 %50
%274 = OpAccessChain %78 %56 %37 %275
OpStore %274 %267
%277 = OpIAdd %5 %272 %57
%276 = OpAccessChain %78 %56 %37 %277
OpStore %276 %91
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/cbuffer-heap.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _17[];

layout(set = 0, binding = 0) uniform texture3D _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) in vec3 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uvec4 _40 = floatBitsToUint(_17[nonuniformEXT(INDEX + 2u)]._m0[0u]);
    vec4 _45 = texelFetch(_9[INDEX + 1u], ivec3(uvec3(_40.xyz)), int(_40.w));
    SV_Target.x = _45.x;
    SV_Target.y = _45.y;
    SV_Target.z = _45.z;
    SV_Target.w = _45.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %22 %24
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "BindlessCBV"
OpName %19 "INDEX"
OpName %22 "UV"
OpName %24 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %13 ArrayStride 16
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %22 Location 1
OpDecorate %24 Location 0
OpDecorate %34 NonUniform
OpDecorate %37 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 3D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeVector %5 4
%11 = OpTypeInt 32 0
%12 = OpConstant %11 4096
%13 = OpTypeArray %10 %12
%14 = OpTypeStruct %13
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Input %11
%19 = OpVariable %18 Input
%20 = OpTypeVector %5 3
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypePointer Output %10
%24 = OpVariable %23 Output
%27 = OpConstant %11 1
%28 = OpTypePointer UniformConstant %6
%32 = OpConstant %11 2
%33 = OpTypePointer Uniform %14
%35 = OpConstant %11 0
%36 = OpTypePointer Uniform %10
%39 = OpTypeVector %11 4
%46 = OpTypeVector %11 3
%52 = OpTypePointer Output %5
%57 = OpConstant %11 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%25 = OpLoad %11 %19
%26 = OpIAdd %11 %25 %27
%29 = OpAccessChain %28 %9 %26
%30 = OpLoad %6 %29
%31 = OpIAdd %11 %25 %32
%34 = OpAccessChain %33 %17 %31
%37 = OpAccessChain %36 %34 %35 %35
%38 = OpLoad %10 %37
%40 = OpBitcast %39 %38
%41 = OpCompositeExtract %11 %40 0
%42 = OpCompositeExtract %11 %40 1
%43 = OpCompositeExtract %11 %40 2
%44 = OpCompositeExtract %11 %40 3
%47 = OpCompositeConstruct %46 %41 %42 %43
%45 = OpImageFetch %10 %30 %47 Lod %44
%48 = OpCompositeExtract %5 %45 0
%49 = OpCompositeExtract %5 %45 1
%50 = OpCompositeExtract %5 %45 2
%51 = OpCompositeExtract %5 %45 3
%53 = OpAccessChain %52 %24 %35
OpStore %53 %48
%54 = OpAccessChain %52 %24 %27
OpStore %54 %49
%55 = OpAccessChain %52 %24 %32
OpStore %55 %50
%56 = OpAccessChain %52 %24 %57
OpStore %56 %51
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

struct CBVComposite16x8
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    float16_t _m4;
    float16_t _m5;
    float16_t _m6;
    float16_t _m7;
};

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    u64vec2 _m0[4096];
} _16[];

layout(set = 5, binding = 0, std140) uniform _20_23
{
    vec4 _m0[4096];
} _23[];

layout(set = 5, binding = 0, std140) uniform _27_30
{
    dvec2 _m0[4096];
} _30[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _39 = registers._m5 + 2u;
    uint _45 = registers._m5 + 1u;
    f16vec2 _70 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].x));
    f16vec2 _73 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].y));
    f16vec2 _76 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].z));
    f16vec2 _79 = unpackFloat2x16(floatBitsToUint(_23[registers._m5]._m0[1u].w));
    CBVComposite16x8 _83 = CBVComposite16x8(_70.x, _70.y, _73.x, _73.y, _76.x, _76.y, _79.x, _79.y);
    SV_Target.x = (((float(_83._m0) + _23[registers._m5]._m0[0u].x) + float(int64_t(_16[registers._m5]._m0[2u].x))) + _23[_45]._m0[0u].x) + float(_30[_39]._m0[0u].x);
    SV_Target.y = (((float(_83._m1) + _23[registers._m5]._m0[0u].y) + float(int64_t(_16[registers._m5]._m0[2u].y))) + _23[_45]._m0[0u].y) + float(_30[_39]._m0[0u].y);
    SV_Target.z = (((float(_83._m2) + _23[registers._m5]._m0[0u].z) + float(int64_t(_16[registers._m5]._m0[3u].x))) + _23[_45]._m0[0u].z) + float(_30[_39]._m0[1u].x);
    SV_Target.w = (((float(_83._m3) + _23[registers._m5]._m0[0u].w) + float(int64_t(_16[registers._m5]._m0[3u].y))) + _23[_45]._m0[0u].w) + float(_30[_39]._m0[1u].y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 148
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Float64
OpCapability Int64
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %32
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %13 "BindlessCBV"
OpName %20 "BindlessCBV"
OpName %27 "BindlessCBV"
OpName %32 "SV_Target"
OpName %82 "CBVComposite16x8"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 ArrayStride 16
OpDecorate %13 Block
OpMemberDecorate %13 0 Offset 0
OpDecorate %16 DescriptorSet 5
OpDecorate %16 Binding 0
OpDecorate %19 ArrayStride 16
OpDecorate %20 Block
OpMemberDecorate %20 0 Offset 0
OpDecorate %23 DescriptorSet 5
OpDecorate %23 Binding 0
OpDecorate %26 ArrayStride 16
OpDecorate %27 Block
OpMemberDecorate %27 0 Offset 0
OpDecorate %30 DescriptorSet 5
OpDecorate %30 Binding 0
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeInt 64 0
%10 = OpTypeVector %9 2
%11 = OpConstant %5 4096
%12 = OpTypeArray %10 %11
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 4
%19 = OpTypeArray %18 %11
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer Uniform %21
%23 = OpVariable %22 Uniform
%24 = OpTypeFloat 64
%25 = OpTypeVector %24 2
%26 = OpTypeArray %25 %11
%27 = OpTypeStruct %26
%28 = OpTypeRuntimeArray %27
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%31 = OpTypePointer Output %18
%32 = OpVariable %31 Output
%33 = OpTypePointer Uniform %27
%35 = OpTypePointer PushConstant %5
%37 = OpConstant %5 5
%40 = OpConstant %5 2
%41 = OpTypePointer Uniform %20
%46 = OpConstant %5 1
%47 = OpTypePointer Uniform %13
%54 = OpConstant %5 0
%55 = OpTypePointer Uniform %18
%62 = OpTypeFloat 16
%65 = OpTypeVector %62 2
%82 = OpTypeStruct %62 %62 %62 %62 %62 %62 %62 %62
%96 = OpTypePointer Uniform %10
%101 = OpConstant %5 3
%124 = OpTypePointer Uniform %25
%141 = OpTypePointer Output %17
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %146
%146 = OpLabel
%36 = OpAccessChain %35 %8 %37
%38 = OpLoad %5 %36
%39 = OpIAdd %5 %38 %40
%34 = OpAccessChain %33 %30 %39
%43 = OpAccessChain %35 %8 %37
%44 = OpLoad %5 %43
%45 = OpIAdd %5 %44 %46
%42 = OpAccessChain %41 %23 %45
%49 = OpAccessChain %35 %8 %37
%50 = OpLoad %5 %49
%48 = OpAccessChain %47 %16 %50
%52 = OpAccessChain %35 %8 %37
%53 = OpLoad %5 %52
%51 = OpAccessChain %41 %23 %53
%56 = OpAccessChain %55 %51 %54 %54
%57 = OpLoad %18 %56
%58 = OpCompositeExtract %17 %57 0
%59 = OpCompositeExtract %17 %57 1
%60 = OpCompositeExtract %17 %57 2
%61 = OpCompositeExtract %17 %57 3
%63 = OpAccessChain %55 %51 %54 %46
%64 = OpLoad %18 %63
%66 = OpCompositeExtract %17 %64 0
%67 = OpCompositeExtract %17 %64 1
%68 = OpCompositeExtract %17 %64 2
%69 = OpCompositeExtract %17 %64 3
%70 = OpBitcast %65 %66
%71 = OpCompositeExtract %62 %70 0
%72 = OpCompositeExtract %62 %70 1
%73 = OpBitcast %65 %67
%74 = OpCompositeExtract %62 %73 0
%75 = OpCompositeExtract %62 %73 1
%76 = OpBitcast %65 %68
%77 = OpCompositeExtract %62 %76 0
%78 = OpCompositeExtract %62 %76 1
%79 = OpBitcast %65 %69
%80 = OpCompositeExtract %62 %79 0
%81 = OpCompositeExtract %62 %79 1
%83 = OpCompositeConstruct %82 %71 %72 %74 %75 %77 %78 %80 %81
%84 = OpCompositeExtract %62 %83 0
%85 = OpCompositeExtract %62 %83 1
%86 = OpCompositeExtract %62 %83 2
%87 = OpCompositeExtract %62 %83 3
%88 = OpFConvert %17 %84
%89 = OpFConvert %17 %85
%90 = OpFConvert %17 %86
%91 = OpFConvert %17 %87
%92 = OpFAdd %17 %88 %58
%93 = OpFAdd %17 %89 %59
%94 = OpFAdd %17 %90 %60
%95 = OpFAdd %17 %91 %61
%97 = OpAccessChain %96 %48 %54 %40
%98 = OpLoad %10 %97
%99 = OpCompositeExtract %9 %98 0
%100 = OpCompositeExtract %9 %98 1
%102 = OpAccessChain %96 %48 %54 %101
%103 = OpLoad %10 %102
%104 = OpCompositeExtract %9 %103 0
%105 = OpCompositeExtract %9 %103 1
%106 = OpConvertSToF %17 %99
%107 = OpConvertSToF %17 %100
%108 = OpConvertSToF %17 %104
%109 = OpConvertSToF %17 %105
%110 = OpFAdd %17 %92 %106
%111 = OpFAdd %17 %93 %107
%112 = OpFAdd %17 %94 %108
%113 = OpFAdd %17 %95 %109
%114 = OpAccessChain %55 %42 %54 %54
%115 = OpLoad %18 %114
%116 = OpCompositeExtract %17 %115 0
%117 = OpCompositeExtract %17 %115 1
%118 = OpCompositeExtract %17 %115 2
%119 = OpCompositeExtract %17 %115 3
%120 = OpFAdd %17 %110 %116
%121 = OpFAdd %17 %111 %117
%122 = OpFAdd %17 %112 %118
%123 = OpFAdd %17 %113 %119
%125 = OpAccessChain %124 %34 %54 %54
%126 = OpLoad %25 %125
%127 = OpCompositeExtract %24 %126 0
%128 = OpCompositeExtract %24 %126 1
%129 = OpAccessChain %124 %34 %54 %46
%130 = OpLoad %25 %129
%131 = OpCompositeExtract %24 %130 0
%132 = OpCompositeExtract %24 %130 1
%133 = OpFConvert %17 %127
%134 = OpFConvert %17 %128
%135 = OpFConvert %17 %131
%136 = OpFConvert %17 %132
%137 = OpFAdd %17 %120 %133
%138 = OpFAdd %17 %121 %134
%139 = OpFAdd %17 %122 %135
%140 = OpFAdd %17 %123 %136
%142 = OpAccessChain %141 %32 %54
OpStore %142 %137
%143 = OpAccessChain %141 %32 %46
OpStore %143 %138
%144 = OpAccessChain %141 %32 %40
OpStore %144 %139
%145 = OpAccessChain %141 %32 %101
OpStore %145 %140
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

struct CBVComposite16x8
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    float16_t _m4;
    float16_t _m5;
    float16_t _m6;
    float16_t _m7;
};

layout(set = 0, binding = 0, std140) uniform _10_12
{
    u64vec2 _m0[4];
} _12;

layout(set = 0, binding = 0, std140) uniform _16_18
{
    vec4 _m0[4];
} _18;

layout(set = 0, binding = 1, std140) uniform _21_23
{
    vec4 _m0[1];
} _23;

layout(set = 0, binding = 2, std140) uniform _28_30
{
    dvec2 _m0[2];
} _30;

layout(location = 0) out vec4 SV_Target;

void main()
{
    f16vec2 _49 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].x));
    f16vec2 _52 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].y));
    f16vec2 _55 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].z));
    f16vec2 _58 = unpackFloat2x16(floatBitsToUint(_18._m0[1u].w));
    CBVComposite16x8 _62 = CBVComposite16x8(_49.x, _49.y, _52.x, _52.y, _55.x, _55.y, _58.x, _58.y);
    SV_Target.x = (((float(_62._m0) + _18._m0[0u].x) + float(int64_t(_12._m0[2u].x))) + _23._m0[0u].x) + float(_30._m0[0u].x);
    SV_Target.y = (((float(_62._m1) + _18._m0[0u].y) + float(int64_t(_12._m0[2u].y))) + _23._m0[0u].y) + float(_30._m0[0u].y);
    SV_Target.z = (((float(_62._m2) + _18._m0[0u].z) + float(int64_t(_12._m0[3u].x))) + _23._m0[0u].z) + float(_30._m0[1u].x);
    SV_Target.w = (((float(_62._m3) + _18._m0[0u].w) + float(int64_t(_12._m0[3u].y))) + _23._m0[0u].w) + float(_30._m0[1u].y);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 127
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Float64
OpCapability Int64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %10 ""
OpName %16 ""
OpName %21 ""
OpName %28 ""
OpName %32 "SV_Target"
OpName %61 "CBVComposite16x8"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 1
OpDecorate %27 ArrayStride 16
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 2
OpDecorate %32 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 4
%7 = OpTypeInt 64 0
%8 = OpTypeVector %7 2
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypeArray %14 %6
%16 = OpTypeStruct %15
%17 = OpTypePointer Uniform %16
%18 = OpVariable %17 Uniform
%19 = OpConstant %5 1
%20 = OpTypeArray %14 %19
%21 = OpTypeStruct %20
%22 = OpTypePointer Uniform %21
%23 = OpVariable %22 Uniform
%24 = OpConstant %5 2
%25 = OpTypeFloat 64
%26 = OpTypeVector %25 2
%27 = OpTypeArray %26 %24
%28 = OpTypeStruct %27
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%31 = OpTypePointer Output %14
%32 = OpVariable %31 Output
%33 = OpConstant %5 0
%34 = OpTypePointer Uniform %14
%41 = OpTypeFloat 16
%44 = OpTypeVector %41 2
%61 = OpTypeStruct %41 %41 %41 %41 %41 %41 %41 %41
%75 = OpTypePointer Uniform %8
%80 = OpConstant %5 3
%103 = OpTypePointer Uniform %26
%120 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %125
%125 = OpLabel
%35 = OpAccessChain %34 %18 %33 %33
%36 = OpLoad %14 %35
%37 = OpCompositeExtract %13 %36 0
%38 = OpCompositeExtract %13 %36 1
%39 = OpCompositeExtract %13 %36 2
%40 = OpCompositeExtract %13 %36 3
%42 = OpAccessChain %34 %18 %33 %19
%43 = OpLoad %14 %42
%45 = OpCompositeExtract %13 %43 0
%46 = OpCompositeExtract %13 %43 1
%47 = OpCompositeExtract %13 %43 2
%48 = OpCompositeExtract %13 %43 3
%49 = OpBitcast %44 %45
%50 = OpCompositeExtract %41 %49 0
%51 = OpCompositeExtract %41 %49 1
%52 = OpBitcast %44 %46
%53 = OpCompositeExtract %41 %52 0
%54 = OpCompositeExtract %41 %52 1
%55 = OpBitcast %44 %47
%56 = OpCompositeExtract %41 %55 0
%57 = OpCompositeExtract %41 %55 1
%58 = OpBitcast %44 %48
%59 = OpCompositeExtract %41 %58 0
%60 = OpCompositeExtract %41 %58 1
%62 = OpCompositeConstruct %61 %50 %51 %53 %54 %56 %57 %59 %60
%63 = OpCompositeExtract %41 %62 0
%64 = OpCompositeExtract %41 %62 1
%65 = OpCompositeExtract %41 %62 2
%66 = OpCompositeExtract %41 %62 3
%67 = OpFConvert %13 %63
%68 = OpFConvert %13 %64
%69 = OpFConvert %13 %65
%70 = OpFConvert %13 %66
%71 = OpFAdd %13 %67 %37
%72 = OpFAdd %13 %68 %38
%73 = OpFAdd %13 %69 %39
%74 = OpFAdd %13 %70 %40
%76 = OpAccessChain %75 %12 %33 %24
%77 = OpLoad %8 %76
%78 = OpCompositeExtract %7 %77 0
%79 = OpCompositeExtract %7 %77 1
%81 = OpAccessChain %75 %12 %33 %80
%82 = OpLoad %8 %81
%83 = OpCompositeExtract %7 %82 0
%84 = OpCompositeExtract %7 %82 1
%85 = OpConvertSToF %13 %78
%86 = OpConvertSToF %13 %79
%87 = OpConvertSToF %13 %83
%88 = OpConvertSToF %13 %84
%89 = OpFAdd %13 %71 %85
%90 = OpFAdd %13 %72 %86
%91 = OpFAdd %13 %73 %87
%92 = OpFAdd %13 %74 %88
%93 = OpAccessChain %34 %23 %33 %33
%94 = OpLoad %14 %93
%95 = OpCompositeExtract %13 %94 0
%96 = OpCompositeExtract %13 %94 1
%97 = OpCompositeExtract %13 %94 2
%98 = OpCompositeExtract %13 %94 3
%99 = OpFAdd %13 %89 %95
%100 = OpFAdd %13 %90 %96
%101 = OpFAdd %13 %91 %97
%102 = OpFAdd %13 %92 %98
%104 = OpAccessChain %103 %30 %33 %33
%105 = OpLoad %26 %104
%106 = OpCompositeExtract %25 %105 0
%107 = OpCompositeExtract %25 %105 1
%108 = OpAccessChain %103 %30 %33 %19
%109 = OpLoad %26 %108
%110 = OpCompositeExtract %25 %109 0
%111 = OpCompositeExtract %25 %109 1
%112 = OpFConvert %13 %106
%113 = OpFConvert %13 %107
%114 = OpFConvert %13 %110
%115 = OpFConvert %13 %111
%116 = OpFAdd %13 %99 %112
%117 = OpFAdd %13 %100 %113
%118 = OpFAdd %13 %101 %114
%119 = OpFAdd %13 %102 %115
%121 = OpAccessChain %120 %32 %33
OpStore %121 %116
%122 = OpAccessChain %120 %32 %19
OpStore %122 %117
%123 = OpAccessChain %120 %32 %24
OpStore %123 %118
%124 = OpAccessChain %120 %32 %80
OpStore %124 %119
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raw-buffer-heap.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform usamplerBuffer _9[];
layout(set = 0, binding = 0, r32ui) uniform coherent writeonly uimageBuffer _13[];

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in ivec3 UV;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _37 = INDEX + 1u;
    uint _41 = uint(UV.x) >> 2u;
    uvec3 _53 = uvec3(texelFetch(_9[INDEX], int(_41)).x, texelFetch(_9[INDEX], int(_41 + 1u)).x, texelFetch(_9[INDEX], int(_41 + 2u)).x);
    uint _54 = _53.x;
    uint _55 = _53.y;
    uint _56 = _53.z;
    uint _60 = uint(UV.y) >> 2u;
    imageStore(_13[_37], int(_60), uvec4(_54));
    imageStore(_13[_37], int(_60 + 1u), uvec4(_55));
    imageStore(_13[_37], int(_60 + 2u), uvec4(_56));
    SV_Target.x = uintBitsToFloat(_54);
    SV_Target.y = uintBitsToFloat(_55);
    SV_Target.z = uintBitsToFloat(_56);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %15 %19 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %15 "INDEX"
OpName %19 "UV"
OpName %23 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 Coherent
OpDecorate %15 Flat
OpDecorate %15 Location 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %19 Component 1
OpDecorate %23 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%16 = OpTypeInt 32 1
%17 = OpTypeVector %16 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypeFloat 32
%21 = OpTypeVector %20 3
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpTypePointer Input %16
%26 = OpConstant %5 0
%30 = OpConstant %5 1
%34 = OpTypePointer UniformConstant %6
%38 = OpTypePointer UniformConstant %10
%42 = OpConstant %5 2
%43 = OpTypeVector %5 4
%52 = OpTypeVector %5 3
%66 = OpTypePointer Output %20
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
%25 = OpAccessChain %24 %19 %26
%27 = OpLoad %16 %25
%28 = OpBitcast %5 %27
%29 = OpAccessChain %24 %19 %30
%31 = OpLoad %16 %29
%32 = OpBitcast %5 %31
%33 = OpLoad %5 %15
%35 = OpAccessChain %34 %9 %33
%36 = OpLoad %6 %35
%37 = OpIAdd %5 %33 %30
%39 = OpAccessChain %38 %13 %37
%40 = OpLoad %10 %39
%41 = OpShiftRightLogical %5 %28 %42
%44 = OpImageFetch %43 %36 %41
%45 = OpCompositeExtract %5 %44 0
%47 = OpIAdd %5 %41 %30
%46 = OpImageFetch %43 %36 %47
%48 = OpCompositeExtract %5 %46 0
%50 = OpIAdd %5 %41 %42
%49 = OpImageFetch %43 %36 %50
%51 = OpCompositeExtract %5 %49 0
%53 = OpCompositeConstruct %52 %45 %48 %51
%54 = OpCompositeExtract %5 %53 0
%55 = OpCompositeExtract %5 %53 1
%56 = OpCompositeExtract %5 %53 2
%57 = OpBitcast %20 %54
%58 = OpBitcast %20 %55
%59 = OpBitcast %20 %56
%60 = OpShiftRightLogical %5 %32 %42
%61 = OpCompositeConstruct %43 %54 %54 %54 %54
OpImageWrite %40 %60 %61
%62 = OpCompositeConstruct %43 %55 %55 %55 %55
%63 = OpIAdd %5 %60 %30
OpImageWrite %40 %63 %62
%64 = OpCompositeConstruct %43 %56 %56 %56 %56
%65 = OpIAdd %5 %60 %42
OpImageWrite %40 %65 %64
%67 = OpAccessChain %66 %23 %26
OpStore %67 %57
%68 = OpAccessChain %66 %23 %30
OpStore %68 %58
%69 = OpAccessChain %66 %23 %42
OpStore %69 %59
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raw-buffer-heap.ssbo.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(set = 0, binding = 0, std430) coherent buffer _12_15
{
    uint _m0[];
} _15[];

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in ivec3 UV;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _38 = INDEX + 1u;
    uint _41 = uint(UV.x) >> 2u;
    uvec3 _53 = uvec3(_10[INDEX]._m0[_41], _10[INDEX]._m0[_41 + 1u], _10[INDEX]._m0[_41 + 2u]);
    uint _54 = _53.x;
    uint _55 = _53.y;
    uint _56 = _53.z;
    uint _60 = uint(UV.y) >> 2u;
    _15[_38]._m0[_60] = _54;
    _15[_38]._m0[_60 + 1u] = _55;
    _15[_38]._m0[_60 + 2u] = _56;
    SV_Target.x = uintBitsToFloat(_54);
    SV_Target.y = uintBitsToFloat(_55);
    SV_Target.z = uintBitsToFloat(_56);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %21 %25
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %17 "INDEX"
OpName %21 "UV"
OpName %25 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 Coherent
OpDecorate %17 Flat
OpDecorate %17 Location 0
OpDecorate %21 Flat
OpDecorate %21 Location 0
OpDecorate %21 Component 1
OpDecorate %25 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypeRuntimeArray %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %5
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%18 = OpTypeInt 32 1
%19 = OpTypeVector %18 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypeFloat 32
%23 = OpTypeVector %22 3
%24 = OpTypePointer Output %23
%25 = OpVariable %24 Output
%26 = OpTypePointer Input %18
%28 = OpConstant %5 0
%32 = OpConstant %5 1
%36 = OpTypePointer StorageBuffer %7
%39 = OpTypePointer StorageBuffer %12
%42 = OpConstant %5 2
%43 = OpTypePointer StorageBuffer %5
%52 = OpTypeVector %5 3
%66 = OpTypePointer Output %22
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
%27 = OpAccessChain %26 %21 %28
%29 = OpLoad %18 %27
%30 = OpBitcast %5 %29
%31 = OpAccessChain %26 %21 %32
%33 = OpLoad %18 %31
%34 = OpBitcast %5 %33
%35 = OpLoad %5 %17
%37 = OpAccessChain %36 %10 %35
%38 = OpIAdd %5 %35 %32
%40 = OpAccessChain %39 %15 %38
%41 = OpShiftRightLogical %5 %30 %42
%44 = OpAccessChain %43 %37 %28 %41
%45 = OpLoad %5 %44
%47 = OpIAdd %5 %41 %32
%46 = OpAccessChain %43 %37 %28 %47
%48 = OpLoad %5 %46
%50 = OpIAdd %5 %41 %42
%49 = OpAccessChain %43 %37 %28 %50
%51 = OpLoad %5 %49
%53 = OpCompositeConstruct %52 %45 %48 %51
%54 = OpCompositeExtract %5 %53 0
%55 = OpCompositeExtract %5 %53 1
%56 = OpCompositeExtract %5 %53 2
%57 = OpBitcast %22 %54
%58 = OpBitcast %22 %55
%59 = OpBitcast %22 %56
%60 = OpShiftRightLogical %5 %34 %42
%61 = OpAccessChain %43 %40 %28 %60
OpStore %61 %54
%63 = OpIAdd %5 %60 %32
%62 = OpAccessChain %43 %40 %28 %63
OpStore %62 %55
%65 = OpIAdd %5 %60 %42
%64 = OpAccessChain %43 %40 %28 %65
OpStore %64 %56
%67 = OpAccessChain %66 %25 %28
OpStore %67 %57
%68 = OpAccessChain %66 %25 %32
OpStore %68 %58
%69 = OpAccessChain %66 %25 %42
OpStore %69 %59
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raw-buffer-heap.typed-buffer-offset.sm66.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform usamplerBuffer _17[];
layout(set = 0, binding = 0, r32ui) uniform coherent writeonly uimageBuffer _21[];

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in ivec3 UV;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _45 = subgroupBroadcastFirst(INDEX);
    uint _50 = INDEX + 1u;
    uint _54 = subgroupBroadcastFirst(_50);
    uint _57 = uint(UV.x) >> 2u;
    uint _64 = (_57 < _13._m0[_45].y) ? (_57 + _13._m0[_45].x) : 1073741820u;
    uvec3 _76 = uvec3(texelFetch(_17[INDEX], int(_64)).x, texelFetch(_17[INDEX], int(_64 + 1u)).x, texelFetch(_17[INDEX], int(_64 + 2u)).x);
    uint _77 = _76.x;
    uint _78 = _76.y;
    uint _79 = _76.z;
    uint _83 = uint(UV.y) >> 2u;
    uint _88 = (_83 < _13._m0[_54].y) ? (_83 + _13._m0[_54].x) : 1073741820u;
    imageStore(_21[_50], int(_88), uvec4(_77));
    imageStore(_21[_50], int(_88 + 1u), uvec4(_78));
    imageStore(_21[_50], int(_88 + 2u), uvec4(_79));
    SV_Target.x = uintBitsToFloat(_77);
    SV_Target.y = uintBitsToFloat(_78);
    SV_Target.z = uintBitsToFloat(_79);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 100
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %23 %27 %31
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %23 "INDEX"
OpName %27 "UV"
OpName %31 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 Coherent
OpDecorate %23 Flat
OpDecorate %23 Location 0
OpDecorate %27 Flat
OpDecorate %27 Location 0
OpDecorate %27 Component 1
OpDecorate %31 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypePointer Input %5
%23 = OpVariable %22 Input
%24 = OpTypeInt 32 1
%25 = OpTypeVector %24 3
%26 = OpTypePointer Input %25
%27 = OpVariable %26 Input
%28 = OpTypeFloat 32
%29 = OpTypeVector %28 3
%30 = OpTypePointer Output %29
%31 = OpVariable %30 Output
%32 = OpTypePointer Input %24
%34 = OpConstant %5 0
%38 = OpConstant %5 1
%42 = OpTypePointer UniformConstant %14
%46 = OpConstant %5 3
%47 = OpTypePointer StorageBuffer %9
%51 = OpTypePointer UniformConstant %18
%58 = OpConstant %5 2
%62 = OpTypeBool
%65 = OpConstant %5 1073741820
%66 = OpTypeVector %5 4
%75 = OpTypeVector %5 3
%94 = OpTypePointer Output %28
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %98
%98 = OpLabel
%33 = OpAccessChain %32 %27 %34
%35 = OpLoad %24 %33
%36 = OpBitcast %5 %35
%37 = OpAccessChain %32 %27 %38
%39 = OpLoad %24 %37
%40 = OpBitcast %5 %39
%41 = OpLoad %5 %23
%43 = OpAccessChain %42 %17 %41
%44 = OpLoad %14 %43
%45 = OpGroupNonUniformBroadcastFirst %5 %46 %41
%48 = OpAccessChain %47 %13 %34 %45
%49 = OpLoad %9 %48
%50 = OpIAdd %5 %41 %38
%52 = OpAccessChain %51 %21 %50
%53 = OpLoad %18 %52
%54 = OpGroupNonUniformBroadcastFirst %5 %46 %50
%55 = OpAccessChain %47 %13 %34 %54
%56 = OpLoad %9 %55
%57 = OpShiftRightLogical %5 %36 %58
%59 = OpCompositeExtract %5 %49 0
%60 = OpCompositeExtract %5 %49 1
%61 = OpIAdd %5 %57 %59
%63 = OpULessThan %62 %57 %60
%64 = OpSelect %5 %63 %61 %65
%67 = OpImageFetch %66 %44 %64
%68 = OpCompositeExtract %5 %67 0
%70 = OpIAdd %5 %64 %38
%69 = OpImageFetch %66 %44 %70
%71 = OpCompositeExtract %5 %69 0
%73 = OpIAdd %5 %64 %58
%72 = OpImageFetch %66 %44 %73
%74 = OpCompositeExtract %5 %72 0
%76 = OpCompositeConstruct %75 %68 %71 %74
%77 = OpCompositeExtract %5 %76 0
%78 = OpCompositeExtract %5 %76 1
%79 = OpCompositeExtract %5 %76 2
%80 = OpBitcast %28 %77
%81 = OpBitcast %28 %78
%82 = OpBitcast %28 %79
%83 = OpShiftRightLogical %5 %40 %58
%84 = OpCompositeExtract %5 %56 0
%85 = OpCompositeExtract %5 %56 1
%86 = OpIAdd %5 %83 %84
%87 = OpULessThan %62 %83 %85
%88 = OpSelect %5 %87 %86 %65
%89 = OpCompositeConstruct %66 %77 %77 %77 %77
OpImageWrite %53 %88 %89
%90 = OpCompositeConstruct %66 %78 %78 %78 %78
%91 = OpIAdd %5 %88 %38
OpImageWrite %53 %91 %90
%92 = OpCompositeConstruct %66 %79 %79 %79 %79
%93 = OpIAdd %5 %88 %58
OpImageWrite %53 %93 %92
%95 = OpAccessChain %94 %31 %34
OpStore %95 %80
%96 = OpAccessChain %94 %31 %38
OpStore %96 %81
%97 = OpAccessChain %94 %31 %58
OpStore %97 %82
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _13[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _17_20
{
    u16vec2 _m0[];
} _20[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _23_26
{
    u16vec4 _m0[];
} _26[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _29_32
{
    uvec4 _m0[];
} _32[];

layout(set = 4, binding = 0, std430) readonly buffer _34_37
{
    uint _m0[];
} _37[];

layout(set = 4, binding = 0, std430) readonly buffer _39_42
{
    u16vec2 _m0[];
} _42[];

layout(set = 4, binding = 0, std430) buffer _44_47
{
    u16vec4 _m0[];
} _47[];

layout(set = 4, binding = 0, std430) buffer _49_52
{
    uvec4 _m0[];
} _52[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    f16vec2 _99 = uint16BitsToFloat16(_20[registers._m1 + 1u]._m0[uint(UV.y)]);
    uint _111 = _37[registers._m4 + 2u]._m0[uint(UV.z)];
    u16vec2 _120 = _42[registers._m4 + 3u]._m0[uint(UV.w)];
    f16vec2 _121 = uint16BitsToFloat16(_120);
    f16vec4 _141 = uint16BitsToFloat16(_26[registers._m1 + 4u]._m0[1u]);
    vec4 _157 = uintBitsToFloat(_32[registers._m1 + 4u]._m0[1u]);
    u16vec4 _178 = _47[registers._m4 + 5u]._m0[1u];
    f16vec4 _179 = uint16BitsToFloat16(_178);
    float _188 = (((float(_121.y) + uintBitsToFloat(_13[registers._m1]._m0[uint(UV.x)])) + float(_141.x)) + _157.x) + float(_179.x);
    float _189 = ((float(_141.y) + float(_99.x)) + _157.y) + float(_179.y);
    float _190 = (((uintBitsToFloat(_111) + float(_99.y)) + float(_141.z)) + _157.z) + float(_179.z);
    float _191 = ((float(_141.w) + float(_121.x)) + _157.w) + float(_179.w);
    _52[registers._m4 + 5u]._m0[1u] = uvec4(floatBitsToUint(_188), floatBitsToUint(_189), floatBitsToUint(_190), floatBitsToUint(_191));
    SV_Target.x = _188;
    SV_Target.y = _189;
    SV_Target.z = _190;
    SV_Target.w = _191;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 205
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %54 %58 %62
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %10 "SSBO"
OpName %17 "SSBO"
OpName %23 "SSBO"
OpName %29 "SSBO"
OpName %34 "SSBO"
OpName %39 "SSBO"
OpName %44 "SSBO"
OpName %49 "SSBO"
OpName %54 "INDEX"
OpName %58 "UV"
OpName %62 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %9 ArrayStride 4
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %13 DescriptorSet 1
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 4
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 DescriptorSet 1
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %22 ArrayStride 8
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 0
OpDecorate %26 NonWritable
OpDecorate %26 Restrict
OpDecorate %28 ArrayStride 16
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %32 DescriptorSet 1
OpDecorate %32 Binding 0
OpDecorate %32 NonWritable
OpDecorate %32 Restrict
OpDecorate %33 ArrayStride 4
OpMemberDecorate %34 0 Offset 0
OpDecorate %34 Block
OpDecorate %37 DescriptorSet 4
OpDecorate %37 Binding 0
OpDecorate %37 NonWritable
OpDecorate %38 ArrayStride 4
OpMemberDecorate %39 0 Offset 0
OpDecorate %39 Block
OpDecorate %42 DescriptorSet 4
OpDecorate %42 Binding 0
OpDecorate %42 NonWritable
OpDecorate %43 ArrayStride 8
OpMemberDecorate %44 0 Offset 0
OpDecorate %44 Block
OpDecorate %47 DescriptorSet 4
OpDecorate %47 Binding 0
OpDecorate %47 Aliased
OpDecorate %48 ArrayStride 16
OpMemberDecorate %49 0 Offset 0
OpDecorate %49 Block
OpDecorate %52 DescriptorSet 4
OpDecorate %52 Binding 0
OpDecorate %52 Aliased
OpDecorate %54 Flat
OpDecorate %54 Location 0
OpDecorate %58 Flat
OpDecorate %58 Location 1
OpDecorate %62 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeRuntimeArray %5
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeVector %14 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypeRuntimeArray %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeVector %14 4
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeVector %5 4
%28 = OpTypeRuntimeArray %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpTypeRuntimeArray %5
%34 = OpTypeStruct %33
%35 = OpTypeRuntimeArray %34
%36 = OpTypePointer StorageBuffer %35
%37 = OpVariable %36 StorageBuffer
%38 = OpTypeRuntimeArray %15
%39 = OpTypeStruct %38
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer StorageBuffer %40
%42 = OpVariable %41 StorageBuffer
%43 = OpTypeRuntimeArray %21
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer StorageBuffer %45
%47 = OpVariable %46 StorageBuffer
%48 = OpTypeRuntimeArray %27
%49 = OpTypeStruct %48
%50 = OpTypeRuntimeArray %49
%51 = OpTypePointer StorageBuffer %50
%52 = OpVariable %51 StorageBuffer
%53 = OpTypePointer Input %5
%54 = OpVariable %53 Input
%55 = OpTypeInt 32 1
%56 = OpTypeVector %55 4
%57 = OpTypePointer Input %56
%58 = OpVariable %57 Input
%59 = OpTypeFloat 32
%60 = OpTypeVector %59 4
%61 = OpTypePointer Output %60
%62 = OpVariable %61 Output
%63 = OpTypePointer Input %55
%65 = OpConstant %5 0
%69 = OpConstant %5 1
%73 = OpConstant %5 2
%77 = OpConstant %5 3
%80 = OpTypePointer StorageBuffer %10
%82 = OpTypePointer PushConstant %5
%85 = OpTypePointer StorageBuffer %5
%89 = OpTypePointer StorageBuffer %17
%94 = OpTypePointer StorageBuffer %15
%97 = OpTypeFloat 16
%98 = OpTypeVector %97 2
%104 = OpTypePointer StorageBuffer %34
%107 = OpConstant %5 4
%114 = OpTypePointer StorageBuffer %39
%127 = OpTypePointer StorageBuffer %23
%132 = OpTypePointer StorageBuffer %29
%137 = OpTypePointer StorageBuffer %21
%140 = OpTypeVector %97 4
%154 = OpTypePointer StorageBuffer %27
%166 = OpTypePointer StorageBuffer %44
%171 = OpConstant %5 5
%172 = OpTypePointer StorageBuffer %49
%198 = OpTypePointer Output %59
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %203
%203 = OpLabel
%64 = OpAccessChain %63 %58 %65
%66 = OpLoad %55 %64
%67 = OpBitcast %5 %66
%68 = OpAccessChain %63 %58 %69
%70 = OpLoad %55 %68
%71 = OpBitcast %5 %70
%72 = OpAccessChain %63 %58 %73
%74 = OpLoad %55 %72
%75 = OpBitcast %5 %74
%76 = OpAccessChain %63 %58 %77
%78 = OpLoad %55 %76
%79 = OpBitcast %5 %78
%83 = OpAccessChain %82 %8 %69
%84 = OpLoad %5 %83
%81 = OpAccessChain %80 %13 %84
%86 = OpAccessChain %85 %81 %65 %67
%87 = OpLoad %5 %86
%88 = OpBitcast %59 %87
%91 = OpAccessChain %82 %8 %69
%92 = OpLoad %5 %91
%93 = OpIAdd %5 %92 %69
%90 = OpAccessChain %89 %20 %93
%95 = OpAccessChain %94 %90 %65 %71
%96 = OpLoad %15 %95
%99 = OpBitcast %98 %96
%100 = OpCompositeExtract %97 %99 0
%101 = OpCompositeExtract %97 %99 1
%102 = OpFConvert %59 %100
%103 = OpFConvert %59 %101
%106 = OpAccessChain %82 %8 %107
%108 = OpLoad %5 %106
%109 = OpIAdd %5 %108 %73
%105 = OpAccessChain %104 %37 %109
%110 = OpAccessChain %85 %105 %65 %75
%111 = OpLoad %5 %110
%112 = OpBitcast %59 %111
%113 = OpFAdd %59 %112 %103
%116 = OpAccessChain %82 %8 %107
%117 = OpLoad %5 %116
%118 = OpIAdd %5 %117 %77
%115 = OpAccessChain %114 %42 %118
%119 = OpAccessChain %94 %115 %65 %79
%120 = OpLoad %15 %119
%121 = OpBitcast %98 %120
%122 = OpCompositeExtract %97 %121 0
%123 = OpCompositeExtract %97 %121 1
%124 = OpFConvert %59 %122
%125 = OpFConvert %59 %123
%126 = OpFAdd %59 %125 %88
%129 = OpAccessChain %82 %8 %69
%130 = OpLoad %5 %129
%131 = OpIAdd %5 %130 %107
%128 = OpAccessChain %127 %26 %131
%134 = OpAccessChain %82 %8 %69
%135 = OpLoad %5 %134
%136 = OpIAdd %5 %135 %107
%133 = OpAccessChain %132 %32 %136
%138 = OpAccessChain %137 %128 %65 %69
%139 = OpLoad %21 %138
%141 = OpBitcast %140 %139
%142 = OpCompositeExtract %97 %141 0
%143 = OpCompositeExtract %97 %141 1
%144 = OpCompositeExtract %97 %141 2
%145 = OpCompositeExtract %97 %141 3
%146 = OpFConvert %59 %142
%147 = OpFConvert %59 %143
%148 = OpFConvert %59 %144
%149 = OpFConvert %59 %145
%150 = OpFAdd %59 %126 %146
%151 = OpFAdd %59 %147 %102
%152 = OpFAdd %59 %113 %148
%153 = OpFAdd %59 %149 %124
%155 = OpAccessChain %154 %133 %65 %69
%156 = OpLoad %27 %155
%157 = OpBitcast %60 %156
%158 = OpCompositeExtract %59 %157 0
%159 = OpCompositeExtract %59 %157 1
%160 = OpCompositeExtract %59 %157 2
%161 = OpCompositeExtract %59 %157 3
%162 = OpFAdd %59 %150 %158
%163 = OpFAdd %59 %151 %159
%164 = OpFAdd %59 %152 %160
%165 = OpFAdd %59 %153 %161
%168 = OpAccessChain %82 %8 %107
%169 = OpLoad %5 %168
%170 = OpIAdd %5 %169 %171
%167 = OpAccessChain %166 %47 %170
%174 = OpAccessChain %82 %8 %107
%175 = OpLoad %5 %174
%176 = OpIAdd %5 %175 %171
%173 = OpAccessChain %172 %52 %176
%177 = OpAccessChain %137 %167 %65 %69
%178 = OpLoad %21 %177
%179 = OpBitcast %140 %178
%180 = OpCompositeExtract %97 %179 0
%181 = OpCompositeExtract %97 %179 1
%182 = OpCompositeExtract %97 %179 2
%183 = OpCompositeExtract %97 %179 3
%184 = OpFConvert %59 %180
%185 = OpFConvert %59 %181
%186 = OpFConvert %59 %182
%187 = OpFConvert %59 %183
%188 = OpFAdd %59 %162 %184
%189 = OpFAdd %59 %163 %185
%190 = OpFAdd %59 %164 %186
%191 = OpFAdd %59 %165 %187
%192 = OpBitcast %5 %188
%193 = OpBitcast %5 %189
%194 = OpBitcast %5 %190
%195 = OpBitcast %5 %191
%196 = OpCompositeConstruct %27 %192 %193 %194 %195
%197 = OpAccessChain %154 %173 %65 %69
OpStore %197 %196
%199 = OpAccessChain %198 %62 %65
OpStore %199 %188
%200 = OpAccessChain %198 %62 %69
OpStore %200 %189
%201 = OpAccessChain %198 %62 %73
OpStore %201 %190
%202 = OpAccessChain %198 %62 %77
OpStore %202 %191
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _18[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _22_25
{
    u16vec2 _m0[];
} _25[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _28_31
{
    u16vec4 _m0[];
} _31[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _34_37
{
    uvec4 _m0[];
} _37[];

layout(set = 4, binding = 0, std430) readonly buffer _39_42
{
    uint _m0[];
} _42[];

layout(set = 4, binding = 0, std430) readonly buffer _44_47
{
    u16vec2 _m0[];
} _47[];

layout(set = 4, binding = 0, std430) buffer _49_52
{
    u16vec4 _m0[];
} _52[];

layout(set = 4, binding = 0, std430) buffer _54_57
{
    uvec4 _m0[];
} _57[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _72 = uint(UV.x);
    uint _76 = uint(UV.y);
    uint _80 = uint(UV.z);
    uint _84 = uint(UV.w);
    uvec2 _94 = _13._m0[subgroupBroadcastFirst(registers._m1)] >> uvec2(2u);
    uint _111 = registers._m1 + 1u;
    uvec2 _115 = _13._m0[subgroupBroadcastFirst(_111)] >> uvec2(2u);
    f16vec2 _127 = uint16BitsToFloat16(_25[_111]._m0[(_76 < _115.y) ? (_76 + _115.x) : 1073741823u]);
    uint _137 = registers._m4 + 2u;
    uvec2 _141 = _13._m0[subgroupBroadcastFirst(_137)] >> uvec2(2u);
    uint _148 = _42[_137]._m0[(_80 < _141.y) ? (_80 + _141.x) : 1073741820u];
    uint _155 = registers._m4 + 3u;
    uvec2 _159 = _13._m0[subgroupBroadcastFirst(_155)] >> uvec2(2u);
    u16vec2 _166 = _47[_155]._m0[(_84 < _159.y) ? (_84 + _159.x) : 1073741823u];
    f16vec2 _167 = uint16BitsToFloat16(_166);
    uint _182 = registers._m1 + 4u;
    uint _183 = subgroupBroadcastFirst(_182);
    uvec2 _186 = _13._m0[_183] >> uvec2(3u);
    f16vec4 _198 = uint16BitsToFloat16(_31[registers._m1 + 4u]._m0[(1u < _186.y) ? (1u + _186.x) : 536870911u]);
    uvec2 _211 = _13._m0[_183] >> uvec2(4u);
    vec4 _222 = uintBitsToFloat(_37[_182]._m0[(1u < _211.y) ? (1u + _211.x) : 268435455u]);
    uint _241 = registers._m4 + 5u;
    uint _242 = subgroupBroadcastFirst(_241);
    uvec2 _245 = _13._m0[_242] >> uvec2(3u);
    u16vec4 _252 = _52[registers._m4 + 5u]._m0[(1u < _245.y) ? (1u + _245.x) : 536870911u];
    f16vec4 _253 = uint16BitsToFloat16(_252);
    float _262 = (((float(_167.y) + uintBitsToFloat(_18[registers._m1]._m0[(_72 < _94.y) ? (_72 + _94.x) : 1073741820u])) + float(_198.x)) + _222.x) + float(_253.x);
    float _263 = ((float(_198.y) + float(_127.x)) + _222.y) + float(_253.y);
    float _264 = (((uintBitsToFloat(_148) + float(_127.y)) + float(_198.z)) + _222.z) + float(_253.z);
    float _265 = ((float(_198.w) + float(_167.x)) + _222.w) + float(_253.w);
    uvec2 _266 = _13._m0[_242] >> uvec2(4u);
    _57[_241]._m0[(1u < _266.y) ? (1u + _266.x) : 268435455u] = uvec4(floatBitsToUint(_262), floatBitsToUint(_263), floatBitsToUint(_264), floatBitsToUint(_265));
    SV_Target.x = _262;
    SV_Target.y = _263;
    SV_Target.z = _264;
    SV_Target.w = _265;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 285
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %59 %63 %67
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %15 "SSBO"
OpName %22 "SSBO"
OpName %28 "SSBO"
OpName %34 "SSBO"
OpName %39 "SSBO"
OpName %44 "SSBO"
OpName %49 "SSBO"
OpName %54 "SSBO"
OpName %59 "INDEX"
OpName %63 "UV"
OpName %67 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 4
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 1
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %18 Restrict
OpDecorate %21 ArrayStride 4
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %25 DescriptorSet 1
OpDecorate %25 Binding 0
OpDecorate %25 NonWritable
OpDecorate %25 Restrict
OpDecorate %27 ArrayStride 8
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %31 DescriptorSet 1
OpDecorate %31 Binding 0
OpDecorate %31 NonWritable
OpDecorate %31 Restrict
OpDecorate %33 ArrayStride 16
OpMemberDecorate %34 0 Offset 0
OpDecorate %34 Block
OpDecorate %37 DescriptorSet 1
OpDecorate %37 Binding 0
OpDecorate %37 NonWritable
OpDecorate %37 Restrict
OpDecorate %38 ArrayStride 4
OpMemberDecorate %39 0 Offset 0
OpDecorate %39 Block
OpDecorate %42 DescriptorSet 4
OpDecorate %42 Binding 0
OpDecorate %42 NonWritable
OpDecorate %43 ArrayStride 4
OpMemberDecorate %44 0 Offset 0
OpDecorate %44 Block
OpDecorate %47 DescriptorSet 4
OpDecorate %47 Binding 0
OpDecorate %47 NonWritable
OpDecorate %48 ArrayStride 8
OpMemberDecorate %49 0 Offset 0
OpDecorate %49 Block
OpDecorate %52 DescriptorSet 4
OpDecorate %52 Binding 0
OpDecorate %52 Aliased
OpDecorate %53 ArrayStride 16
OpMemberDecorate %54 0 Offset 0
OpDecorate %54 Block
OpDecorate %57 DescriptorSet 4
OpDecorate %57 Binding 0
OpDecorate %57 Aliased
OpDecorate %59 Flat
OpDecorate %59 Location 0
OpDecorate %63 Flat
OpDecorate %63 Location 1
OpDecorate %67 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %5
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeInt 16 0
%20 = OpTypeVector %19 2
%21 = OpTypeRuntimeArray %20
%22 = OpTypeStruct %21
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeVector %19 4
%27 = OpTypeRuntimeArray %26
%28 = OpTypeStruct %27
%29 = OpTypeRuntimeArray %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeVector %5 4
%33 = OpTypeRuntimeArray %32
%34 = OpTypeStruct %33
%35 = OpTypeRuntimeArray %34
%36 = OpTypePointer StorageBuffer %35
%37 = OpVariable %36 StorageBuffer
%38 = OpTypeRuntimeArray %5
%39 = OpTypeStruct %38
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer StorageBuffer %40
%42 = OpVariable %41 StorageBuffer
%43 = OpTypeRuntimeArray %20
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer StorageBuffer %45
%47 = OpVariable %46 StorageBuffer
%48 = OpTypeRuntimeArray %26
%49 = OpTypeStruct %48
%50 = OpTypeRuntimeArray %49
%51 = OpTypePointer StorageBuffer %50
%52 = OpVariable %51 StorageBuffer
%53 = OpTypeRuntimeArray %32
%54 = OpTypeStruct %53
%55 = OpTypeRuntimeArray %54
%56 = OpTypePointer StorageBuffer %55
%57 = OpVariable %56 StorageBuffer
%58 = OpTypePointer Input %5
%59 = OpVariable %58 Input
%60 = OpTypeInt 32 1
%61 = OpTypeVector %60 4
%62 = OpTypePointer Input %61
%63 = OpVariable %62 Input
%64 = OpTypeFloat 32
%65 = OpTypeVector %64 4
%66 = OpTypePointer Output %65
%67 = OpVariable %66 Output
%68 = OpTypePointer Input %60
%70 = OpConstant %5 0
%74 = OpConstant %5 1
%78 = OpConstant %5 2
%82 = OpConstant %5 3
%85 = OpTypePointer StorageBuffer %15
%87 = OpTypePointer PushConstant %5
%91 = OpTypePointer StorageBuffer %9
%95 = OpConstantComposite %9 %78 %78
%99 = OpTypeBool
%102 = OpConstant %5 1073741820
%103 = OpTypePointer StorageBuffer %5
%107 = OpTypePointer StorageBuffer %22
%121 = OpConstant %5 1073741823
%122 = OpTypePointer StorageBuffer %20
%125 = OpTypeFloat 16
%126 = OpTypeVector %125 2
%132 = OpTypePointer StorageBuffer %39
%135 = OpConstant %5 4
%151 = OpTypePointer StorageBuffer %44
%173 = OpTypePointer StorageBuffer %28
%178 = OpTypePointer StorageBuffer %34
%187 = OpConstantComposite %9 %82 %82
%193 = OpConstant %5 536870911
%194 = OpTypePointer StorageBuffer %26
%197 = OpTypeVector %125 4
%212 = OpConstantComposite %9 %135 %135
%218 = OpConstant %5 268435455
%219 = OpTypePointer StorageBuffer %32
%231 = OpTypePointer StorageBuffer %49
%236 = OpConstant %5 5
%237 = OpTypePointer StorageBuffer %54
%278 = OpTypePointer Output %64
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %283
%283 = OpLabel
%69 = OpAccessChain %68 %63 %70
%71 = OpLoad %60 %69
%72 = OpBitcast %5 %71
%73 = OpAccessChain %68 %63 %74
%75 = OpLoad %60 %73
%76 = OpBitcast %5 %75
%77 = OpAccessChain %68 %63 %78
%79 = OpLoad %60 %77
%80 = OpBitcast %5 %79
%81 = OpAccessChain %68 %63 %82
%83 = OpLoad %60 %81
%84 = OpBitcast %5 %83
%88 = OpAccessChain %87 %8 %74
%89 = OpLoad %5 %88
%86 = OpAccessChain %85 %18 %89
%90 = OpGroupNonUniformBroadcastFirst %5 %82 %89
%92 = OpAccessChain %91 %13 %70 %90
%93 = OpLoad %9 %92
%94 = OpShiftRightLogical %9 %93 %95
%96 = OpCompositeExtract %5 %94 0
%97 = OpCompositeExtract %5 %94 1
%98 = OpIAdd %5 %72 %96
%100 = OpULessThan %99 %72 %97
%101 = OpSelect %5 %100 %98 %102
%104 = OpAccessChain %103 %86 %70 %101
%105 = OpLoad %5 %104
%106 = OpBitcast %64 %105
%109 = OpAccessChain %87 %8 %74
%110 = OpLoad %5 %109
%111 = OpIAdd %5 %110 %74
%108 = OpAccessChain %107 %25 %111
%112 = OpGroupNonUniformBroadcastFirst %5 %82 %111
%113 = OpAccessChain %91 %13 %70 %112
%114 = OpLoad %9 %113
%115 = OpShiftRightLogical %9 %114 %95
%116 = OpCompositeExtract %5 %115 0
%117 = OpCompositeExtract %5 %115 1
%118 = OpIAdd %5 %76 %116
%119 = OpULessThan %99 %76 %117
%120 = OpSelect %5 %119 %118 %121
%123 = OpAccessChain %122 %108 %70 %120
%124 = OpLoad %20 %123
%127 = OpBitcast %126 %124
%128 = OpCompositeExtract %125 %127 0
%129 = OpCompositeExtract %125 %127 1
%130 = OpFConvert %64 %128
%131 = OpFConvert %64 %129
%134 = OpAccessChain %87 %8 %135
%136 = OpLoad %5 %134
%137 = OpIAdd %5 %136 %78
%133 = OpAccessChain %132 %42 %137
%138 = OpGroupNonUniformBroadcastFirst %5 %82 %137
%139 = OpAccessChain %91 %13 %70 %138
%140 = OpLoad %9 %139
%141 = OpShiftRightLogical %9 %140 %95
%142 = OpCompositeExtract %5 %141 0
%143 = OpCompositeExtract %5 %141 1
%144 = OpIAdd %5 %80 %142
%145 = OpULessThan %99 %80 %143
%146 = OpSelect %5 %145 %144 %102
%147 = OpAccessChain %103 %133 %70 %146
%148 = OpLoad %5 %147
%149 = OpBitcast %64 %148
%150 = OpFAdd %64 %149 %131
%153 = OpAccessChain %87 %8 %135
%154 = OpLoad %5 %153
%155 = OpIAdd %5 %154 %82
%152 = OpAccessChain %151 %47 %155
%156 = OpGroupNonUniformBroadcastFirst %5 %82 %155
%157 = OpAccessChain %91 %13 %70 %156
%158 = OpLoad %9 %157
%159 = OpShiftRightLogical %9 %158 %95
%160 = OpCompositeExtract %5 %159 0
%161 = OpCompositeExtract %5 %159 1
%162 = OpIAdd %5 %84 %160
%163 = OpULessThan %99 %84 %161
%164 = OpSelect %5 %163 %162 %121
%165 = OpAccessChain %122 %152 %70 %164
%166 = OpLoad %20 %165
%167 = OpBitcast %126 %166
%168 = OpCompositeExtract %125 %167 0
%169 = OpCompositeExtract %125 %167 1
%170 = OpFConvert %64 %168
%171 = OpFConvert %64 %169
%172 = OpFAdd %64 %171 %106
%175 = OpAccessChain %87 %8 %74
%176 = OpLoad %5 %175
%177 = OpIAdd %5 %176 %135
%174 = OpAccessChain %173 %31 %177
%180 = OpAccessChain %87 %8 %74
%181 = OpLoad %5 %180
%182 = OpIAdd %5 %181 %135
%179 = OpAccessChain %178 %37 %182
%183 = OpGroupNonUniformBroadcastFirst %5 %82 %182
%184 = OpAccessChain %91 %13 %70 %183
%185 = OpLoad %9 %184
%186 = OpShiftRightLogical %9 %185 %187
%188 = OpCompositeExtract %5 %186 0
%189 = OpCompositeExtract %5 %186 1
%190 = OpIAdd %5 %74 %188
%191 = OpULessThan %99 %74 %189
%192 = OpSelect %5 %191 %190 %193
%195 = OpAccessChain %194 %174 %70 %192
%196 = OpLoad %26 %195
%198 = OpBitcast %197 %196
%199 = OpCompositeExtract %125 %198 0
%200 = OpCompositeExtract %125 %198 1
%201 = OpCompositeExtract %125 %198 2
%202 = OpCompositeExtract %125 %198 3
%203 = OpFConvert %64 %199
%204 = OpFConvert %64 %200
%205 = OpFConvert %64 %201
%206 = OpFConvert %64 %202
%207 = OpFAdd %64 %172 %203
%208 = OpFAdd %64 %204 %130
%209 = OpFAdd %64 %150 %205
%210 = OpFAdd %64 %206 %170
%211 = OpShiftRightLogical %9 %185 %212
%213 = OpCompositeExtract %5 %211 0
%214 = OpCompositeExtract %5 %211 1
%215 = OpIAdd %5 %74 %213
%216 = OpULessThan %99 %74 %214
%217 = OpSelect %5 %216 %215 %218
%220 = OpAccessChain %219 %179 %70 %217
%221 = OpLoad %32 %220
%222 = OpBitcast %65 %221
%223 = OpCompositeExtract %64 %222 0
%224 = OpCompositeExtract %64 %222 1
%225 = OpCompositeExtract %64 %222 2
%226 = OpCompositeExtract %64 %222 3
%227 = OpFAdd %64 %207 %223
%228 = OpFAdd %64 %208 %224
%229 = OpFAdd %64 %209 %225
%230 = OpFAdd %64 %210 %226
%233 = OpAccessChain %87 %8 %135
%234 = OpLoad %5 %233
%235 = OpIAdd %5 %234 %236
%232 = OpAccessChain %231 %52 %235
%239 = OpAccessChain %87 %8 %135
%240 = OpLoad %5 %239
%241 = OpIAdd %5 %240 %236
%238 = OpAccessChain %237 %57 %241
%242 = OpGroupNonUniformBroadcastFirst %5 %82 %241
%243 = OpAccessChain %91 %13 %70 %242
%244 = OpLoad %9 %243
%245 = OpShiftRightLogical %9 %244 %187
%246 = OpCompositeExtract %5 %245 0
%247 = OpCompositeExtract %5 %245 1
%248 = OpIAdd %5 %74 %246
%249 = OpULessThan %99 %74 %247
%250 = OpSelect %5 %249 %248 %193
%251 = OpAccessChain %194 %232 %70 %250
%252 = OpLoad %26 %251
%253 = OpBitcast %197 %252
%254 = OpCompositeExtract %125 %253 0
%255 = OpCompositeExtract %125 %253 1
%256 = OpCompositeExtract %125 %253 2
%257 = OpCompositeExtract %125 %253 3
%258 = OpFConvert %64 %254
%259 = OpFConvert %64 %255
%260 = OpFConvert %64 %256
%261 = OpFConvert %64 %257
%262 = OpFAdd %64 %227 %258
%263 = OpFAdd %64 %228 %259
%264 = OpFAdd %64 %229 %260
%265 = OpFAdd %64 %230 %261
%266 = OpShiftRightLogical %9 %244 %212
%267 = OpCompositeExtract %5 %266 0
%268 = OpCompositeExtract %5 %266 1
%269 = OpIAdd %5 %74 %267
%270 = OpULessThan %99 %74 %268
%271 = OpSelect %5 %270 %269 %218
%272 = OpBitcast %5 %262
%273 = OpBitcast %5 %263
%274 = OpBitcast %5 %264
%275 = OpBitcast %5 %265
%276 = OpCompositeConstruct %32 %272 %273 %274 %275
%277 = OpAccessChain %219 %238 %70 %271
OpStore %277 %276
%279 = OpAccessChain %278 %67 %70
OpStore %279 %262
%280 = OpAccessChain %278 %67 %74
OpStore %280 %263
%281 = OpAccessChain %278 %67 %78
OpStore %281 %264
%282 = OpAccessChain %278 %67 %82
OpStore %282 %265
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raw-buffers-binding.ssbo.sm66.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _12;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO_16bit
{
    uint16_t _m0[];
} _14;

layout(set = 0, binding = 1, std430) restrict readonly buffer _16_18
{
    uint _m0[];
} _18;

layout(set = 0, binding = 4, std430) restrict readonly buffer _20_22
{
    uint _m0[];
} _22;

layout(set = 0, binding = 2, std430) buffer _24_26
{
    uint _m0[];
} _26;

layout(set = 0, binding = 2, std430) buffer _28_30
{
    uint16_t _m0[];
} _30;

layout(set = 0, binding = 3, std430) writeonly readonly buffer _32_34
{
    uint _m0[];
} _34;

layout(set = 0, binding = 5, std430) writeonly readonly buffer _36_38
{
    uint _m0[];
} _38;

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

uint _104;

void main()
{
    uint _70 = uint(UV.y) * 2u;
    f16vec2 _81 = uint16BitsToFloat16(u16vec2(_14._m0[_70], _14._m0[_70 + 1u]));
    uint _87 = _26._m0[uint(UV.z)];
    uint _90 = uint(UV.w) * 2u;
    uint16_t _92 = _30._m0[_90];
    uint16_t _95 = _30._m0[_90 + 1u];
    f16vec2 _97 = uint16BitsToFloat16(u16vec2(_92, _95));
    uint _107 = (8u * 2u) + (_104 >> 1u);
    f16vec4 _122 = uint16BitsToFloat16(u16vec4(_14._m0[_107], _14._m0[_107 + 1u], _14._m0[_107 + 2u], _14._m0[_107 + 3u]));
    uint _137 = 16u + (_104 >> 2u);
    vec4 _151 = uintBitsToFloat(uvec4(_12._m0[_137], _12._m0[_137 + 1u], _12._m0[_137 + 2u], _12._m0[_137 + 3u]));
    uint _162 = (8u * 2u) + (_104 >> 1u);
    uint16_t _164 = _30._m0[_162];
    uint16_t _167 = _30._m0[_162 + 1u];
    uint16_t _170 = _30._m0[_162 + 2u];
    uint16_t _173 = _30._m0[_162 + 3u];
    f16vec4 _175 = uint16BitsToFloat16(u16vec4(_164, _167, _170, _173));
    float _184 = (((float(_97.y) + uintBitsToFloat(_12._m0[uint(UV.x)])) + float(_122.x)) + _151.x) + float(_175.x);
    float _185 = ((float(_122.y) + float(_81.x)) + _151.y) + float(_175.y);
    float _186 = (((uintBitsToFloat(_87) + float(_81.y)) + float(_122.z)) + _151.z) + float(_175.z);
    float _187 = ((float(_122.w) + float(_97.x)) + _151.w) + float(_175.w);
    uint _189 = 16u + (_104 >> 2u);
    _26._m0[_189] = floatBitsToUint(_184);
    _26._m0[_189 + 1u] = floatBitsToUint(_185);
    _26._m0[_189 + 2u] = floatBitsToUint(_186);
    _26._m0[_189 + 3u] = floatBitsToUint(_187);
    SV_Target.x = _184;
    SV_Target.y = _185;
    SV_Target.z = _186;
    SV_Target.w = _187;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 208
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %40 %44 %48
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %10 "SSBO_16bit"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %24 "SSBO"
OpName %28 "SSBO_16bit"
OpName %32 "SSBO"
OpName %36 "SSBO"
OpName %40 "INDEX"
OpName %44 "UV"
OpName %48 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 ArrayStride 2
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonWritable
OpDecorate %12 Restrict
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 1
OpDecorate %18 NonWritable
OpDecorate %18 Restrict
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 4
OpDecorate %22 NonWritable
OpDecorate %22 Restrict
OpDecorate %23 ArrayStride 4
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %27 ArrayStride 2
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 2
OpDecorate %30 DescriptorSet 0
OpDecorate %30 Binding 2
OpDecorate %26 Aliased
OpDecorate %30 Aliased
OpDecorate %31 ArrayStride 4
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 3
OpDecorate %34 NonReadable
OpDecorate %34 NonWritable
OpDecorate %35 ArrayStride 4
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 5
OpDecorate %38 NonReadable
OpDecorate %38 NonWritable
OpDecorate %40 Flat
OpDecorate %40 Location 0
OpDecorate %44 Flat
OpDecorate %44 Location 1
OpDecorate %48 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypeInt 16 0
%9 = OpTypeRuntimeArray %8
%10 = OpTypeStruct %9
%11 = OpTypePointer StorageBuffer %7
%12 = OpVariable %11 StorageBuffer
%13 = OpTypePointer StorageBuffer %10
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %5
%24 = OpTypeStruct %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeRuntimeArray %8
%28 = OpTypeStruct %27
%29 = OpTypePointer StorageBuffer %28
%30 = OpVariable %29 StorageBuffer
%31 = OpTypeRuntimeArray %5
%32 = OpTypeStruct %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeRuntimeArray %5
%36 = OpTypeStruct %35
%37 = OpTypePointer StorageBuffer %36
%38 = OpVariable %37 StorageBuffer
%39 = OpTypePointer Input %5
%40 = OpVariable %39 Input
%41 = OpTypeInt 32 1
%42 = OpTypeVector %41 4
%43 = OpTypePointer Input %42
%44 = OpVariable %43 Input
%45 = OpTypeFloat 32
%46 = OpTypeVector %45 4
%47 = OpTypePointer Output %46
%48 = OpVariable %47 Output
%49 = OpTypePointer Input %41
%51 = OpConstant %5 0
%55 = OpConstant %5 1
%59 = OpConstant %5 2
%63 = OpConstant %5 3
%66 = OpTypePointer StorageBuffer %5
%71 = OpTypePointer StorageBuffer %8
%77 = OpTypeVector %8 2
%79 = OpTypeFloat 16
%80 = OpTypeVector %79 2
%103 = OpConstant %5 8
%119 = OpTypeVector %8 4
%121 = OpTypeVector %79 4
%135 = OpConstant %5 16
%149 = OpTypeVector %5 4
%201 = OpTypePointer Output %45
%3 = OpFunction %1 None %2
%4 = OpLabel
%104 = OpUndef %5
OpBranch %206
%206 = OpLabel
%50 = OpAccessChain %49 %44 %51
%52 = OpLoad %41 %50
%53 = OpBitcast %5 %52
%54 = OpAccessChain %49 %44 %55
%56 = OpLoad %41 %54
%57 = OpBitcast %5 %56
%58 = OpAccessChain %49 %44 %59
%60 = OpLoad %41 %58
%61 = OpBitcast %5 %60
%62 = OpAccessChain %49 %44 %63
%64 = OpLoad %41 %62
%65 = OpBitcast %5 %64
%67 = OpAccessChain %66 %12 %51 %53
%68 = OpLoad %5 %67
%69 = OpBitcast %45 %68
%70 = OpIMul %5 %57 %59
%72 = OpAccessChain %71 %14 %51 %70
%73 = OpLoad %8 %72
%75 = OpIAdd %5 %70 %55
%74 = OpAccessChain %71 %14 %51 %75
%76 = OpLoad %8 %74
%78 = OpCompositeConstruct %77 %73 %76
%81 = OpBitcast %80 %78
%82 = OpCompositeExtract %79 %81 0
%83 = OpCompositeExtract %79 %81 1
%84 = OpFConvert %45 %82
%85 = OpFConvert %45 %83
%86 = OpAccessChain %66 %26 %51 %61
%87 = OpLoad %5 %86
%88 = OpBitcast %45 %87
%89 = OpFAdd %45 %88 %85
%90 = OpIMul %5 %65 %59
%91 = OpAccessChain %71 %30 %51 %90
%92 = OpLoad %8 %91
%94 = OpIAdd %5 %90 %55
%93 = OpAccessChain %71 %30 %51 %94
%95 = OpLoad %8 %93
%96 = OpCompositeConstruct %77 %92 %95
%97 = OpBitcast %80 %96
%98 = OpCompositeExtract %79 %97 0
%99 = OpCompositeExtract %79 %97 1
%100 = OpFConvert %45 %98
%101 = OpFConvert %45 %99
%102 = OpFAdd %45 %101 %69
%105 = OpIMul %5 %103 %59
%106 = OpShiftRightLogical %5 %104 %55
%107 = OpIAdd %5 %105 %106
%108 = OpAccessChain %71 %14 %51 %107
%109 = OpLoad %8 %108
%111 = OpIAdd %5 %107 %55
%110 = OpAccessChain %71 %14 %51 %111
%112 = OpLoad %8 %110
%114 = OpIAdd %5 %107 %59
%113 = OpAccessChain %71 %14 %51 %114
%115 = OpLoad %8 %113
%117 = OpIAdd %5 %107 %63
%116 = OpAccessChain %71 %14 %51 %117
%118 = OpLoad %8 %116
%120 = OpCompositeConstruct %119 %109 %112 %115 %118
%122 = OpBitcast %121 %120
%123 = OpCompositeExtract %79 %122 0
%124 = OpCompositeExtract %79 %122 1
%125 = OpCompositeExtract %79 %122 2
%126 = OpCompositeExtract %79 %122 3
%127 = OpFConvert %45 %123
%128 = OpFConvert %45 %124
%129 = OpFConvert %45 %125
%130 = OpFConvert %45 %126
%131 = OpFAdd %45 %102 %127
%132 = OpFAdd %45 %128 %84
%133 = OpFAdd %45 %89 %129
%134 = OpFAdd %45 %130 %100
%136 = OpShiftRightLogical %5 %104 %59
%137 = OpIAdd %5 %135 %136
%138 = OpAccessChain %66 %12 %51 %137
%139 = OpLoad %5 %138
%141 = OpIAdd %5 %137 %55
%140 = OpAccessChain %66 %12 %51 %141
%142 = OpLoad %5 %140
%144 = OpIAdd %5 %137 %59
%143 = OpAccessChain %66 %12 %51 %144
%145 = OpLoad %5 %143
%147 = OpIAdd %5 %137 %63
%146 = OpAccessChain %66 %12 %51 %147
%148 = OpLoad %5 %146
%150 = OpCompositeConstruct %149 %139 %142 %145 %148
%151 = OpBitcast %46 %150
%152 = OpCompositeExtract %45 %151 0
%153 = OpCompositeExtract %45 %151 1
%154 = OpCompositeExtract %45 %151 2
%155 = OpCompositeExtract %45 %151 3
%156 = OpFAdd %45 %131 %152
%157 = OpFAdd %45 %132 %153
%158 = OpFAdd %45 %133 %154
%159 = OpFAdd %45 %134 %155
%160 = OpIMul %5 %103 %59
%161 = OpShiftRightLogical %5 %104 %55
%162 = OpIAdd %5 %160 %161
%163 = OpAccessChain %71 %30 %51 %162
%164 = OpLoad %8 %163
%166 = OpIAdd %5 %162 %55
%165 = OpAccessChain %71 %30 %51 %166
%167 = OpLoad %8 %165
%169 = OpIAdd %5 %162 %59
%168 = OpAccessChain %71 %30 %51 %169
%170 = OpLoad %8 %168
%172 = OpIAdd %5 %162 %63
%171 = OpAccessChain %71 %30 %51 %172
%173 = OpLoad %8 %171
%174 = OpCompositeConstruct %119 %164 %167 %170 %173
%175 = OpBitcast %121 %174
%176 = OpCompositeExtract %79 %175 0
%177 = OpCompositeExtract %79 %175 1
%178 = OpCompositeExtract %79 %175 2
%179 = OpCompositeExtract %79 %175 3
%180 = OpFConvert %45 %176
%181 = OpFConvert %45 %177
%182 = OpFConvert %45 %178
%183 = OpFConvert %45 %179
%184 = OpFAdd %45 %156 %180
%185 = OpFAdd %45 %157 %181
%186 = OpFAdd %45 %158 %182
%187 = OpFAdd %45 %159 %183
%188 = OpShiftRightLogical %5 %104 %59
%189 = OpIAdd %5 %135 %188
%190 = OpBitcast %5 %184
%191 = OpBitcast %5 %185
%192 = OpBitcast %5 %186
%193 = OpBitcast %5 %187
%194 = OpAccessChain %66 %26 %51 %189
OpStore %194 %190
%196 = OpIAdd %5 %189 %55
%195 = OpAccessChain %66 %26 %51 %196
OpStore %195 %191
%198 = OpIAdd %5 %189 %59
%197 = OpAccessChain %66 %26 %51 %198
OpStore %197 %192
%200 = OpIAdd %5 %189 %63
%199 = OpAccessChain %66 %26 %51 %200
OpStore %199 %193
%202 = OpAccessChain %201 %48 %51
OpStore %202 %184
%203 = OpAccessChain %201 %48 %55
OpStore %203 %185
%204 = OpAccessChain %201 %48 %59
OpStore %204 %186
%205 = OpAccessChain %201 %48 %63
OpStore %205 %187
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raygen-heap.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _14
{
    float _m0;
};

struct _18
{
    vec4 _m0;
};

layout(set = 0, binding = 0) uniform accelerationStructureEXT _8[];
layout(set = 0, binding = 0) uniform writeonly image2D _13[];
layout(location = 0) rayPayloadEXT _14 _16;
layout(location = 1) rayPayloadEXT _18 _20;

void main()
{
    traceRayEXT(_8[0u], 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(_8[0u], 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(_13[1u], ivec2(uvec2(0u)), vec4(_16._m0 + _20._m0.x, _16._m0 + _20._m0.y, _16._m0 + _20._m0.z, _16._m0 + _20._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %13 %16 %20
OpName %3 "main"
OpName %14 ""
OpName %18 ""
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeAccelerationStructureKHR
%6 = OpTypeRuntimeArray %5
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeStruct %9
%15 = OpTypePointer RayPayloadKHR %14
%16 = OpVariable %15 RayPayloadKHR
%17 = OpTypeVector %9 4
%18 = OpTypeStruct %17
%19 = OpTypePointer RayPayloadKHR %18
%20 = OpVariable %19 RayPayloadKHR
%21 = OpTypePointer UniformConstant %5
%23 = OpTypeInt 32 0
%24 = OpConstant %23 0
%26 = OpTypePointer UniformConstant %10
%28 = OpConstant %23 1
%30 = OpConstant %9 1
%31 = OpConstant %9 0
%32 = OpConstant %9 2
%33 = OpConstant %9 3
%34 = OpConstant %9 4
%35 = OpTypeVector %9 3
%38 = OpTypePointer RayPayloadKHR %17
%47 = OpTypePointer RayPayloadKHR %9
%54 = OpTypeVector %23 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%22 = OpAccessChain %21 %8 %24
%25 = OpLoad %5 %22
%27 = OpAccessChain %26 %13 %28
%29 = OpLoad %10 %27
%36 = OpCompositeConstruct %35 %30 %32 %33
%37 = OpCompositeConstruct %35 %31 %31 %30
OpTraceRayKHR %25 %24 %24 %24 %24 %24 %36 %30 %37 %34 %20
%39 = OpInBoundsAccessChain %38 %20 %24
%40 = OpLoad %17 %39
%41 = OpCompositeExtract %9 %40 0
%42 = OpCompositeExtract %9 %40 1
%43 = OpCompositeExtract %9 %40 2
%44 = OpCompositeExtract %9 %40 3
%45 = OpCompositeConstruct %35 %30 %32 %33
%46 = OpCompositeConstruct %35 %31 %31 %30
OpTraceRayKHR %25 %24 %28 %24 %24 %24 %45 %30 %46 %34 %16
%48 = OpInBoundsAccessChain %47 %16 %24
%49 = OpLoad %9 %48
%50 = OpFAdd %9 %49 %41
%51 = OpFAdd %9 %49 %42
%52 = OpFAdd %9 %49 %43
%53 = OpFAdd %9 %49 %44
%55 = OpCompositeConstruct %54 %24 %24
%56 = OpCompositeConstruct %17 %50 %51 %52 %53
OpImageWrite %29 %55 %56
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raygen-heap.ssbo-rtas.raw-va-stride-offset.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _16
{
    float _m0;
};

struct _20
{
    vec4 _m0;
};

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0) uniform writeonly image2D _15[];
layout(location = 0) rayPayloadEXT _16 _18;
layout(location = 1) rayPayloadEXT _20 _22;

void main()
{
    uint _24 = subgroupBroadcastFirst(0u);
    uvec2 _33 = _10._m0[(4u * _24) + 3u];
    traceRayEXT(accelerationStructureEXT(_33), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(accelerationStructureEXT(_33), 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(_15[1u], ivec2(uvec2(0u)), vec4(_18._m0 + _22._m0.x, _18._m0 + _22._m0.y, _18._m0 + _22._m0.z, _18._m0 + _22._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %10 %15 %18 %22
OpName %3 "main"
OpName %8 "RTASHeap"
OpName %16 ""
OpName %20 ""
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeFloat 32
%12 = OpTypeImage %11 2D 0 0 0 2 Unknown
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeStruct %11
%17 = OpTypePointer RayPayloadKHR %16
%18 = OpVariable %17 RayPayloadKHR
%19 = OpTypeVector %11 4
%20 = OpTypeStruct %19
%21 = OpTypePointer RayPayloadKHR %20
%22 = OpVariable %21 RayPayloadKHR
%23 = OpConstant %5 0
%25 = OpConstant %5 3
%27 = OpConstant %5 4
%29 = OpTypePointer StorageBuffer %6
%32 = OpTypeAccelerationStructureKHR
%34 = OpTypePointer UniformConstant %12
%36 = OpConstant %5 1
%38 = OpConstant %11 1
%39 = OpConstant %11 0
%40 = OpConstant %11 2
%41 = OpConstant %11 3
%42 = OpConstant %11 4
%43 = OpTypeVector %11 3
%46 = OpTypePointer RayPayloadKHR %19
%55 = OpTypePointer RayPayloadKHR %11
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%24 = OpGroupNonUniformBroadcastFirst %5 %25 %23
%26 = OpIMul %5 %27 %24
%28 = OpIAdd %5 %26 %25
%30 = OpAccessChain %29 %10 %23 %28
%31 = OpLoad %6 %30
%33 = OpConvertUToAccelerationStructureKHR %32 %31
%35 = OpAccessChain %34 %15 %36
%37 = OpLoad %12 %35
%44 = OpCompositeConstruct %43 %38 %40 %41
%45 = OpCompositeConstruct %43 %39 %39 %38
OpTraceRayKHR %33 %23 %23 %23 %23 %23 %44 %38 %45 %42 %22
%47 = OpInBoundsAccessChain %46 %22 %23
%48 = OpLoad %19 %47
%49 = OpCompositeExtract %11 %48 0
%50 = OpCompositeExtract %11 %48 1
%51 = OpCompositeExtract %11 %48 2
%52 = OpCompositeExtract %11 %48 3
%53 = OpCompositeConstruct %43 %38 %40 %41
%54 = OpCompositeConstruct %43 %39 %39 %38
OpTraceRayKHR %33 %23 %36 %23 %23 %23 %53 %38 %54 %42 %18
%56 = OpInBoundsAccessChain %55 %18 %23
%57 = OpLoad %11 %56
%58 = OpFAdd %11 %57 %49
%59 = OpFAdd %11 %57 %50
%60 = OpFAdd %11 %57 %51
%61 = OpFAdd %11 %57 %52
%62 = OpCompositeConstruct %6 %23 %23
%63 = OpCompositeConstruct %19 %58 %59 %60 %61
OpImageWrite %37 %62 %63
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raygen-heap.ssbo-rtas.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _16
{
    float _m0;
};

struct _20
{
    vec4 _m0;
};

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0) uniform writeonly image2D _15[];
layout(location = 0) rayPayloadEXT _16 _18;
layout(location = 1) rayPayloadEXT _20 _22;

void main()
{
    uint _24 = subgroupBroadcastFirst(0u);
    uvec2 _30 = _10._m0[_24];
    traceRayEXT(accelerationStructureEXT(_30), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(accelerationStructureEXT(_30), 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(_15[1u], ivec2(uvec2(0u)), vec4(_18._m0 + _22._m0.x, _18._m0 + _22._m0.y, _18._m0 + _22._m0.z, _18._m0 + _22._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 63
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %10 %15 %18 %22
OpName %3 "main"
OpName %8 "RTASHeap"
OpName %16 ""
OpName %20 ""
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeFloat 32
%12 = OpTypeImage %11 2D 0 0 0 2 Unknown
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeStruct %11
%17 = OpTypePointer RayPayloadKHR %16
%18 = OpVariable %17 RayPayloadKHR
%19 = OpTypeVector %11 4
%20 = OpTypeStruct %19
%21 = OpTypePointer RayPayloadKHR %20
%22 = OpVariable %21 RayPayloadKHR
%23 = OpConstant %5 0
%25 = OpConstant %5 3
%26 = OpTypePointer StorageBuffer %6
%29 = OpTypeAccelerationStructureKHR
%31 = OpTypePointer UniformConstant %12
%33 = OpConstant %5 1
%35 = OpConstant %11 1
%36 = OpConstant %11 0
%37 = OpConstant %11 2
%38 = OpConstant %11 3
%39 = OpConstant %11 4
%40 = OpTypeVector %11 3
%43 = OpTypePointer RayPayloadKHR %19
%52 = OpTypePointer RayPayloadKHR %11
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %61
%61 = OpLabel
%24 = OpGroupNonUniformBroadcastFirst %5 %25 %23
%27 = OpAccessChain %26 %10 %23 %24
%28 = OpLoad %6 %27
%30 = OpConvertUToAccelerationStructureKHR %29 %28
%32 = OpAccessChain %31 %15 %33
%34 = OpLoad %12 %32
%41 = OpCompositeConstruct %40 %35 %37 %38
%42 = OpCompositeConstruct %40 %36 %36 %35
OpTraceRayKHR %30 %23 %23 %23 %23 %23 %41 %35 %42 %39 %22
%44 = OpInBoundsAccessChain %43 %22 %23
%45 = OpLoad %19 %44
%46 = OpCompositeExtract %11 %45 0
%47 = OpCompositeExtract %11 %45 1
%48 = OpCompositeExtract %11 %45 2
%49 = OpCompositeExtract %11 %45 3
%50 = OpCompositeConstruct %40 %35 %37 %38
%51 = OpCompositeConstruct %40 %36 %36 %35
OpTraceRayKHR %30 %23 %33 %23 %23 %23 %50 %35 %51 %39 %18
%53 = OpInBoundsAccessChain %52 %18 %23
%54 = OpLoad %11 %53
%55 = OpFAdd %11 %54 %46
%56 = OpFAdd %11 %54 %47
%57 = OpFAdd %11 %54 %48
%58 = OpFAdd %11 %54 %49
%59 = OpCompositeConstruct %6 %23 %23
%60 = OpCompositeConstruct %19 %55 %56 %57 %58
OpImageWrite %34 %59 %60
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raygen.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _13
{
    float _m0;
};

struct _17
{
    vec4 _m0;
};

layout(set = 40, binding = 30) uniform accelerationStructureEXT AS;
layout(set = 20, binding = 10) uniform writeonly image2D IMG;
layout(location = 0) rayPayloadEXT _13 _15;
layout(location = 1) rayPayloadEXT _17 _19;

void main()
{
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(AS, 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_15._m0 + _19._m0.x, _15._m0 + _19._m0.y, _15._m0 + _19._m0.z, _15._m0 + _19._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %15 %19
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %13 ""
OpName %17 ""
OpDecorate %8 DescriptorSet 40
OpDecorate %8 Binding 30
OpDecorate %12 DescriptorSet 20
OpDecorate %12 Binding 10
OpDecorate %12 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeStruct %9
%14 = OpTypePointer RayPayloadKHR %13
%15 = OpVariable %14 RayPayloadKHR
%16 = OpTypeVector %9 4
%17 = OpTypeStruct %16
%18 = OpTypePointer RayPayloadKHR %17
%19 = OpVariable %18 RayPayloadKHR
%21 = OpTypeInt 32 0
%22 = OpConstant %21 0
%23 = OpConstant %9 1
%24 = OpConstant %9 0
%25 = OpConstant %9 2
%26 = OpConstant %9 3
%27 = OpConstant %9 4
%28 = OpTypeVector %9 3
%31 = OpTypePointer RayPayloadKHR %16
%39 = OpConstant %21 1
%42 = OpTypePointer RayPayloadKHR %9
%50 = OpTypeVector %21 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %53
%53 = OpLabel
%20 = OpLoad %6 %8
%29 = OpCompositeConstruct %28 %23 %25 %26
%30 = OpCompositeConstruct %28 %24 %24 %23
OpTraceRayKHR %20 %22 %22 %22 %22 %22 %29 %23 %30 %27 %19
%32 = OpInBoundsAccessChain %31 %19 %22
%33 = OpLoad %16 %32
%34 = OpCompositeExtract %9 %33 0
%35 = OpCompositeExtract %9 %33 1
%36 = OpCompositeExtract %9 %33 2
%37 = OpCompositeExtract %9 %33 3
%38 = OpLoad %6 %8
%40 = OpCompositeConstruct %28 %23 %25 %26
%41 = OpCompositeConstruct %28 %24 %24 %23
OpTraceRayKHR %38 %22 %39 %22 %22 %22 %40 %23 %41 %27 %15
%43 = OpInBoundsAccessChain %42 %15 %22
%44 = OpLoad %9 %43
%45 = OpFAdd %9 %44 %34
%46 = OpFAdd %9 %44 %35
%47 = OpFAdd %9 %44 %36
%48 = OpFAdd %9 %44 %37
%49 = OpLoad %10 %12
%51 = OpCompositeConstruct %50 %22 %22
%52 = OpCompositeConstruct %16 %45 %46 %47 %48
OpImageWrite %49 %51 %52
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raygen.ssbo-rtas.bindless.raw-va-stride-offset.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _19
{
    float _m0;
};

struct _23
{
    vec4 _m0;
};

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0) uniform writeonly image2D _18[];
layout(location = 0) rayPayloadEXT _19 _21;
layout(location = 1) rayPayloadEXT _23 _25;

void main()
{
    traceRayEXT(accelerationStructureEXT(_13._m0[(4u * subgroupBroadcastFirst(registers._m0 + 30u)) + 3u]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(accelerationStructureEXT(_13._m0[(4u * subgroupBroadcastFirst(registers._m0 + 30u)) + 3u]), 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(_18[registers._m3 + 10u], ivec2(uvec2(0u)), vec4(_21._m0 + _25._m0.x, _21._m0 + _25._m0.y, _21._m0 + _25._m0.z, _21._m0 + _25._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %13 %18 %21 %25
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "RTASHeap"
OpName %19 ""
OpName %23 ""
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %18 DescriptorSet 3
OpDecorate %18 Binding 0
OpDecorate %18 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 32
%15 = OpTypeImage %14 2D 0 0 0 2 Unknown
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeStruct %14
%20 = OpTypePointer RayPayloadKHR %19
%21 = OpVariable %20 RayPayloadKHR
%22 = OpTypeVector %14 4
%23 = OpTypeStruct %22
%24 = OpTypePointer RayPayloadKHR %23
%25 = OpVariable %24 RayPayloadKHR
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 0
%31 = OpConstant %5 30
%33 = OpConstant %5 3
%35 = OpConstant %5 4
%37 = OpTypePointer StorageBuffer %9
%40 = OpTypeAccelerationStructureKHR
%42 = OpConstant %14 1
%43 = OpConstant %14 0
%44 = OpConstant %14 2
%45 = OpConstant %14 3
%46 = OpConstant %14 4
%47 = OpTypeVector %14 3
%50 = OpTypePointer RayPayloadKHR %22
%66 = OpConstant %5 1
%69 = OpTypePointer RayPayloadKHR %14
%76 = OpTypePointer UniformConstant %15
%81 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %85
%85 = OpLabel
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %31
%32 = OpGroupNonUniformBroadcastFirst %5 %33 %30
%34 = OpIMul %5 %35 %32
%36 = OpIAdd %5 %34 %33
%38 = OpAccessChain %37 %13 %28 %36
%39 = OpLoad %9 %38
%41 = OpConvertUToAccelerationStructureKHR %40 %39
%48 = OpCompositeConstruct %47 %42 %44 %45
%49 = OpCompositeConstruct %47 %43 %43 %42
OpTraceRayKHR %41 %28 %28 %28 %28 %28 %48 %42 %49 %46 %25
%51 = OpInBoundsAccessChain %50 %25 %28
%52 = OpLoad %22 %51
%53 = OpCompositeExtract %14 %52 0
%54 = OpCompositeExtract %14 %52 1
%55 = OpCompositeExtract %14 %52 2
%56 = OpCompositeExtract %14 %52 3
%57 = OpAccessChain %26 %8 %28
%58 = OpLoad %5 %57
%59 = OpIAdd %5 %58 %31
%60 = OpGroupNonUniformBroadcastFirst %5 %33 %59
%61 = OpIMul %5 %35 %60
%62 = OpIAdd %5 %61 %33
%63 = OpAccessChain %37 %13 %28 %62
%64 = OpLoad %9 %63
%65 = OpConvertUToAccelerationStructureKHR %40 %64
%67 = OpCompositeConstruct %47 %42 %44 %45
%68 = OpCompositeConstruct %47 %43 %43 %42
OpTraceRayKHR %65 %28 %66 %28 %28 %28 %67 %42 %68 %46 %21
%70 = OpInBoundsAccessChain %69 %21 %28
%71 = OpLoad %14 %70
%72 = OpFAdd %14 %71 %53
%73 = OpFAdd %14 %71 %54
%74 = OpFAdd %14 %71 %55
%75 = OpFAdd %14 %71 %56
%78 = OpAccessChain %26 %8 %33
%79 = OpLoad %5 %78
%80 = OpIAdd %5 %79 %81
%77 = OpAccessChain %76 %18 %80
%82 = OpLoad %15 %77
%83 = OpCompositeConstruct %9 %28 %28
%84 = OpCompositeConstruct %22 %72 %73 %74 %75
OpImageWrite %82 %83 %84
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/raygen.ssbo-rtas.bindless.sm66.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

struct _19
{
    float _m0;
};

struct _23
{
    vec4 _m0;
};

layout(set = 0, binding = 0, std430) restrict readonly buffer RTASHeap
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0) uniform writeonly image2D _18[];
layout(location = 0) rayPayloadEXT _19 _21;
layout(location = 1) rayPayloadEXT _23 _25;

void main()
{
    traceRayEXT(accelerationStructureEXT(_13._m0[subgroupBroadcastFirst(registers._m0 + 30u)]), 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(accelerationStructureEXT(_13._m0[subgroupBroadcastFirst(registers._m0 + 30u)]), 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(_18[registers._m3 + 10u], ivec2(uvec2(0u)), vec4(_21._m0 + _25._m0.x, _21._m0 + _25._m0.y, _21._m0 + _25._m0.z, _21._m0 + _25._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 82
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %13 %18 %21 %25
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "RTASHeap"
OpName %19 ""
OpName %23 ""
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %18 DescriptorSet 3
OpDecorate %18 Binding 0
OpDecorate %18 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 32
%15 = OpTypeImage %14 2D 0 0 0 2 Unknown
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeStruct %14
%20 = OpTypePointer RayPayloadKHR %19
%21 = OpVariable %20 RayPayloadKHR
%22 = OpTypeVector %14 4
%23 = OpTypeStruct %22
%24 = OpTypePointer RayPayloadKHR %23
%25 = OpVariable %24 RayPayloadKHR
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 0
%31 = OpConstant %5 30
%33 = OpConstant %5 3
%34 = OpTypePointer StorageBuffer %9
%37 = OpTypeAccelerationStructureKHR
%39 = OpConstant %14 1
%40 = OpConstant %14 0
%41 = OpConstant %14 2
%42 = OpConstant %14 3
%43 = OpConstant %14 4
%44 = OpTypeVector %14 3
%47 = OpTypePointer RayPayloadKHR %22
%61 = OpConstant %5 1
%64 = OpTypePointer RayPayloadKHR %14
%71 = OpTypePointer UniformConstant %15
%76 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %31
%32 = OpGroupNonUniformBroadcastFirst %5 %33 %30
%35 = OpAccessChain %34 %13 %28 %32
%36 = OpLoad %9 %35
%38 = OpConvertUToAccelerationStructureKHR %37 %36
%45 = OpCompositeConstruct %44 %39 %41 %42
%46 = OpCompositeConstruct %44 %40 %40 %39
OpTraceRayKHR %38 %28 %28 %28 %28 %28 %45 %39 %46 %43 %25
%48 = OpInBoundsAccessChain %47 %25 %28
%49 = OpLoad %22 %48
%50 = OpCompositeExtract %14 %49 0
%51 = OpCompositeExtract %14 %49 1
%52 = OpCompositeExtract %14 %49 2
%53 = OpCompositeExtract %14 %49 3
%54 = OpAccessChain %26 %8 %28
%55 = OpLoad %5 %54
%56 = OpIAdd %5 %55 %31
%57 = OpGroupNonUniformBroadcastFirst %5 %33 %56
%58 = OpAccessChain %34 %13 %28 %57
%59 = OpLoad %9 %58
%60 = OpConvertUToAccelerationStructureKHR %37 %59
%62 = OpCompositeConstruct %44 %39 %41 %42
%63 = OpCompositeConstruct %44 %40 %40 %39
OpTraceRayKHR %60 %28 %61 %28 %28 %28 %62 %39 %63 %43 %21
%65 = OpInBoundsAccessChain %64 %21 %28
%66 = OpLoad %14 %65
%67 = OpFAdd %14 %66 %50
%68 = OpFAdd %14 %66 %51
%69 = OpFAdd %14 %66 %52
%70 = OpFAdd %14 %66 %53
%73 = OpAccessChain %26 %8 %33
%74 = OpLoad %5 %73
%75 = OpIAdd %5 %74 %76
%72 = OpAccessChain %71 %18 %75
%77 = OpLoad %15 %72
%78 = OpCompositeConstruct %9 %28 %28
%79 = OpCompositeConstruct %22 %67 %68 %69 %70
OpImageWrite %77 %78 %79
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/rw-typed-binding.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly image2D _9[];
layout(set = 1, binding = 0) uniform writeonly uimageBuffer _14[];
layout(set = 2, binding = 0, r32i) uniform coherent iimage2D _19[];
layout(set = 3, binding = 0, r32ui) uniform coherent uimage1D _23[];

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in ivec3 UV;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _36 = uint(UV.x);
    uint _40 = uint(UV.y);
    uint _44 = uint(UV.z);
    vec4 _51 = imageLoad(_9[INDEX + 0u], ivec2(uvec2(_36, _40)));
    imageStore(_14[INDEX + 0u], int(_36), uvec4(_40));
    uint _61 = INDEX + 0u;
    ivec4 _66 = imageLoad(_19[_61], ivec2(uvec2(_40, _44)));
    imageStore(_19[_61], ivec2(uvec2(_40, _44)), ivec4(uvec4(_36)));
    uint _80 = imageAtomicAdd(_23[INDEX + 0u], int(_36), _40);
    SV_Target.x = _51.x;
    SV_Target.y = float(int(uvec4(_66).x));
    SV_Target.z = float(_80);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 88
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpCapability Image1D
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %25 %28 %31
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %25 "INDEX"
OpName %28 "UV"
OpName %31 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %14 NonReadable
OpDecorate %19 DescriptorSet 2
OpDecorate %19 Binding 0
OpDecorate %19 Coherent
OpDecorate %23 DescriptorSet 3
OpDecorate %23 Binding 0
OpDecorate %23 Coherent
OpDecorate %25 Flat
OpDecorate %25 Location 0
OpDecorate %28 Flat
OpDecorate %28 Location 0
OpDecorate %28 Component 1
OpDecorate %31 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypeImage %10 Buffer 0 0 0 2 Unknown
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeInt 32 1
%16 = OpTypeImage %15 2D 0 0 0 2 R32i
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeImage %10 1D 0 0 0 2 R32ui
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypePointer Input %10
%25 = OpVariable %24 Input
%26 = OpTypeVector %15 3
%27 = OpTypePointer Input %26
%28 = OpVariable %27 Input
%29 = OpTypeVector %5 3
%30 = OpTypePointer Output %29
%31 = OpVariable %30 Output
%32 = OpTypePointer Input %15
%34 = OpConstant %10 0
%38 = OpConstant %10 1
%42 = OpConstant %10 2
%47 = OpTypePointer UniformConstant %6
%50 = OpTypeVector %5 4
%52 = OpTypeVector %10 2
%56 = OpTypePointer UniformConstant %11
%59 = OpTypeVector %10 4
%62 = OpTypePointer UniformConstant %16
%65 = OpTypeVector %15 4
%75 = OpTypePointer UniformConstant %20
%78 = OpTypePointer Image %10
%82 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %86
%86 = OpLabel
%33 = OpAccessChain %32 %28 %34
%35 = OpLoad %15 %33
%36 = OpBitcast %10 %35
%37 = OpAccessChain %32 %28 %38
%39 = OpLoad %15 %37
%40 = OpBitcast %10 %39
%41 = OpAccessChain %32 %28 %42
%43 = OpLoad %15 %41
%44 = OpBitcast %10 %43
%45 = OpLoad %10 %25
%46 = OpIAdd %10 %45 %34
%48 = OpAccessChain %47 %9 %46
%49 = OpLoad %6 %48
%53 = OpCompositeConstruct %52 %36 %40
%51 = OpImageRead %50 %49 %53 None
%54 = OpCompositeExtract %5 %51 0
%55 = OpIAdd %10 %45 %34
%57 = OpAccessChain %56 %14 %55
%58 = OpLoad %11 %57
%60 = OpCompositeConstruct %59 %40 %40 %40 %40
OpImageWrite %58 %36 %60
%61 = OpIAdd %10 %45 %34
%63 = OpAccessChain %62 %19 %61
%64 = OpLoad %16 %63
%67 = OpCompositeConstruct %52 %40 %44
%66 = OpImageRead %65 %64 %67 None
%68 = OpBitcast %59 %66
%69 = OpCompositeExtract %10 %68 0
%70 = OpConvertSToF %5 %69
%71 = OpCompositeConstruct %52 %40 %44
%72 = OpCompositeConstruct %59 %36 %36 %36 %36
%73 = OpBitcast %65 %72
OpImageWrite %64 %71 %73
%74 = OpIAdd %10 %45 %34
%76 = OpAccessChain %75 %23 %74
%77 = OpLoad %20 %76
%79 = OpImageTexelPointer %78 %76 %36 %34
%80 = OpAtomicIAdd %10 %79 %38 %34 %40
%81 = OpConvertUToF %5 %80
%83 = OpAccessChain %82 %31 %34
OpStore %83 %54
%84 = OpAccessChain %82 %31 %38
OpStore %84 %70
%85 = OpAccessChain %82 %31 %42
OpStore %85 %81
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/rw-typed-heap.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly image2D _9[];
layout(set = 0, binding = 0) uniform writeonly uimageBuffer _14[];
layout(set = 0, binding = 0, r32i) uniform coherent iimage2D _19[];
layout(set = 0, binding = 0, r32ui) uniform coherent uimage1D _23[];

layout(location = 0) flat in uint INDEX;
layout(location = 0, component = 1) flat in ivec3 UV;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _36 = uint(UV.x);
    uint _40 = uint(UV.y);
    uint _44 = uint(UV.z);
    uint _53 = INDEX + 2u;
    vec4 _63 = imageLoad(_9[INDEX], ivec2(uvec2(_36, _40)));
    imageStore(_14[INDEX + 1u], int(_36), uvec4(_40));
    ivec4 _70 = imageLoad(_19[_53], ivec2(uvec2(_40, _44)));
    imageStore(_19[_53], ivec2(uvec2(_40, _44)), ivec4(uvec4(_36)));
    uint _80 = imageAtomicAdd(_23[INDEX + 3u], int(_36), _40);
    SV_Target.x = _63.x;
    SV_Target.y = float(int(uvec4(_70).x));
    SV_Target.z = float(_80);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 88
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %25 %28 %31
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %25 "INDEX"
OpName %28 "UV"
OpName %31 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonReadable
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %19 Coherent
OpDecorate %23 DescriptorSet 0
OpDecorate %23 Binding 0
OpDecorate %23 Coherent
OpDecorate %25 Flat
OpDecorate %25 Location 0
OpDecorate %28 Flat
OpDecorate %28 Location 0
OpDecorate %28 Component 1
OpDecorate %31 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypeImage %10 Buffer 0 0 0 2 Unknown
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeInt 32 1
%16 = OpTypeImage %15 2D 0 0 0 2 R32i
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeImage %10 1D 0 0 0 2 R32ui
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer UniformConstant %21
%23 = OpVariable %22 UniformConstant
%24 = OpTypePointer Input %10
%25 = OpVariable %24 Input
%26 = OpTypeVector %15 3
%27 = OpTypePointer Input %26
%28 = OpVariable %27 Input
%29 = OpTypeVector %5 3
%30 = OpTypePointer Output %29
%31 = OpVariable %30 Output
%32 = OpTypePointer Input %15
%34 = OpConstant %10 0
%38 = OpConstant %10 1
%42 = OpConstant %10 2
%46 = OpTypePointer UniformConstant %6
%50 = OpTypePointer UniformConstant %11
%54 = OpTypePointer UniformConstant %16
%58 = OpConstant %10 3
%59 = OpTypePointer UniformConstant %20
%62 = OpTypeVector %5 4
%64 = OpTypeVector %10 2
%67 = OpTypeVector %10 4
%69 = OpTypeVector %15 4
%78 = OpTypePointer Image %10
%82 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %86
%86 = OpLabel
%33 = OpAccessChain %32 %28 %34
%35 = OpLoad %15 %33
%36 = OpBitcast %10 %35
%37 = OpAccessChain %32 %28 %38
%39 = OpLoad %15 %37
%40 = OpBitcast %10 %39
%41 = OpAccessChain %32 %28 %42
%43 = OpLoad %15 %41
%44 = OpBitcast %10 %43
%45 = OpLoad %10 %25
%47 = OpAccessChain %46 %9 %45
%48 = OpLoad %6 %47
%49 = OpIAdd %10 %45 %38
%51 = OpAccessChain %50 %14 %49
%52 = OpLoad %11 %51
%53 = OpIAdd %10 %45 %42
%55 = OpAccessChain %54 %19 %53
%56 = OpLoad %16 %55
%57 = OpIAdd %10 %45 %58
%60 = OpAccessChain %59 %23 %57
%61 = OpLoad %20 %60
%65 = OpCompositeConstruct %64 %36 %40
%63 = OpImageRead %62 %48 %65 None
%66 = OpCompositeExtract %5 %63 0
%68 = OpCompositeConstruct %67 %40 %40 %40 %40
OpImageWrite %52 %36 %68
%71 = OpCompositeConstruct %64 %40 %44
%70 = OpImageRead %69 %56 %71 None
%72 = OpBitcast %67 %70
%73 = OpCompositeExtract %10 %72 0
%74 = OpConvertSToF %5 %73
%75 = OpCompositeConstruct %64 %40 %44
%76 = OpCompositeConstruct %67 %36 %36 %36 %36
%77 = OpBitcast %69 %76
OpImageWrite %56 %75 %77
%79 = OpImageTexelPointer %78 %60 %36 %34
%80 = OpAtomicIAdd %10 %79 %38 %34 %40
%81 = OpConvertUToF %5 %80
%83 = OpAccessChain %82 %31 %34
OpStore %83 %66
%84 = OpAccessChain %82 %31 %38
OpStore %84 %74
%85 = OpAccessChain %82 %31 %42
OpStore %85 %81
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/sampled-types-binding.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 4) uniform texture2D _9[];
layout(set = 1, binding = 5) uniform samplerBuffer _15[1000];
layout(set = 2, binding = 6) uniform texture1D _18;

layout(location = 0) flat in uint INDEX;
layout(location = 1) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _35 = uint(int(UV.x));
    uint _36 = uint(int(UV.y));
    vec4 _42 = texelFetch(_9[nonuniformEXT(INDEX)], ivec2(uvec2(_35, _36)), int(0u));
    uint _49 = INDEX + 1u;
    vec4 _55 = texelFetch(_15[nonuniformEXT(_49)], int(_35));
    vec4 _65 = texelFetch(_18, int(_35), int(_36));
    SV_Target.x = (_55.x + _42.x) + _65.x;
    SV_Target.y = (_55.y + _42.y) + _65.y;
    SV_Target.z = (_55.z + _42.z) + _65.z;
    SV_Target.w = (_55.w + _42.w) + _65.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 83
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability Sampled1D
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20 %23 %26
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %20 "INDEX"
OpName %23 "UV"
OpName %26 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 4
OpDecorate %15 DescriptorSet 1
OpDecorate %15 Binding 5
OpDecorate %18 DescriptorSet 2
OpDecorate %18 Binding 6
OpDecorate %20 Flat
OpDecorate %20 Location 0
OpDecorate %23 Location 1
OpDecorate %26 Location 0
OpDecorate %34 NonUniform
OpDecorate %41 NonUniform
OpDecorate %49 NonUniform
OpDecorate %54 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%11 = OpTypeInt 32 0
%12 = OpConstant %11 1000
%13 = OpTypeArray %10 %12
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeImage %5 1D 0 0 0 1 Unknown
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypePointer Input %11
%20 = OpVariable %19 Input
%21 = OpTypeVector %5 2
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeVector %5 4
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%27 = OpTypePointer Input %5
%29 = OpConstant %11 0
%32 = OpConstant %11 1
%38 = OpConstant %11 4
%39 = OpTypePointer UniformConstant %6
%43 = OpTypeVector %11 2
%51 = OpConstant %11 5
%52 = OpTypePointer UniformConstant %10
%74 = OpTypePointer Output %5
%78 = OpConstant %11 2
%80 = OpConstant %11 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %81
%81 = OpLabel
%28 = OpAccessChain %27 %23 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %27 %23 %32
%33 = OpLoad %5 %31
%34 = OpLoad %11 %20
%35 = OpConvertFToS %11 %30
%36 = OpConvertFToS %11 %33
%37 = OpIAdd %11 %34 %38
%40 = OpAccessChain %39 %9 %34
%41 = OpLoad %6 %40
%44 = OpCompositeConstruct %43 %35 %36
%42 = OpImageFetch %24 %41 %44 Lod %29
%45 = OpCompositeExtract %5 %42 0
%46 = OpCompositeExtract %5 %42 1
%47 = OpCompositeExtract %5 %42 2
%48 = OpCompositeExtract %5 %42 3
%49 = OpIAdd %11 %34 %32
%50 = OpIAdd %11 %49 %51
%53 = OpAccessChain %52 %15 %49
%54 = OpLoad %10 %53
%55 = OpImageFetch %24 %54 %35
%56 = OpCompositeExtract %5 %55 0
%57 = OpCompositeExtract %5 %55 1
%58 = OpCompositeExtract %5 %55 2
%59 = OpCompositeExtract %5 %55 3
%60 = OpFAdd %5 %56 %45
%61 = OpFAdd %5 %57 %46
%62 = OpFAdd %5 %58 %47
%63 = OpFAdd %5 %59 %48
%64 = OpLoad %16 %18
%65 = OpImageFetch %24 %64 %35 Lod %36
%66 = OpCompositeExtract %5 %65 0
%67 = OpCompositeExtract %5 %65 1
%68 = OpCompositeExtract %5 %65 2
%69 = OpCompositeExtract %5 %65 3
%70 = OpFAdd %5 %60 %66
%71 = OpFAdd %5 %61 %67
%72 = OpFAdd %5 %62 %68
%73 = OpFAdd %5 %63 %69
%75 = OpAccessChain %74 %26 %29
OpStore %75 %70
%76 = OpAccessChain %74 %26 %32
OpStore %76 %71
%77 = OpAccessChain %74 %26 %78
OpStore %77 %72
%79 = OpAccessChain %74 %26 %80
OpStore %79 %73
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/sampled-types.sm66.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform mediump texture2D _9[];
layout(set = 0, binding = 0) uniform samplerBuffer _13[];
layout(set = 0, binding = 0) uniform texture1D _17[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) in vec2 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _47 = uint(int(UV.x));
    uint _48 = uint(int(UV.y));
    f16vec4 _54 = f16vec4(texelFetch(_9[nonuniformEXT(INDEX)], ivec2(uvec2(_47, _48)), int(0u)));
    vec4 _63 = texelFetch(_13[nonuniformEXT(INDEX + 1u)], int(_47));
    vec4 _72 = texelFetch(_17[nonuniformEXT(INDEX + 2u)], int(_48), int(_48));
    SV_Target.x = (float(_54.x) + _63.x) + _72.x;
    SV_Target.y = (float(_54.y) + _63.y) + _72.y;
    SV_Target.z = (float(_54.z) + _63.z) + _72.z;
    SV_Target.w = (float(_54.w) + _63.w) + _72.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 89
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Sampled1D
OpCapability SampledBuffer
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %20 %23 %26
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %20 "INDEX"
OpName %23 "UV"
OpName %26 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 RelaxedPrecision
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %20 Flat
OpDecorate %20 Location 0
OpDecorate %23 Location 1
OpDecorate %26 Location 0
OpDecorate %34 NonUniform
OpDecorate %37 NonUniform
OpDecorate %38 NonUniform
OpDecorate %41 NonUniform
OpDecorate %42 NonUniform
OpDecorate %46 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %5 1D 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeInt 32 0
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypeVector %5 2
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%24 = OpTypeVector %5 4
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%27 = OpTypePointer Input %5
%29 = OpConstant %18 0
%32 = OpConstant %18 1
%35 = OpTypePointer UniformConstant %6
%39 = OpTypePointer UniformConstant %10
%43 = OpConstant %18 2
%44 = OpTypePointer UniformConstant %14
%50 = OpTypeVector %18 2
%52 = OpTypeFloat 16
%53 = OpTypeVector %52 4
%81 = OpTypePointer Output %5
%86 = OpConstant %18 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %87
%87 = OpLabel
%28 = OpAccessChain %27 %23 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %27 %23 %32
%33 = OpLoad %5 %31
%34 = OpLoad %18 %20
%36 = OpAccessChain %35 %9 %34
%37 = OpLoad %6 %36
%38 = OpIAdd %18 %34 %32
%40 = OpAccessChain %39 %13 %38
%41 = OpLoad %10 %40
%42 = OpIAdd %18 %34 %43
%45 = OpAccessChain %44 %17 %42
%46 = OpLoad %14 %45
%47 = OpConvertFToS %18 %30
%48 = OpConvertFToS %18 %33
%51 = OpCompositeConstruct %50 %47 %48
%49 = OpImageFetch %24 %37 %51 Lod %29
%54 = OpFConvert %53 %49
%55 = OpCompositeExtract %52 %54 0
%56 = OpCompositeExtract %52 %54 1
%57 = OpCompositeExtract %52 %54 2
%58 = OpCompositeExtract %52 %54 3
%59 = OpFConvert %5 %55
%60 = OpFConvert %5 %56
%61 = OpFConvert %5 %57
%62 = OpFConvert %5 %58
%63 = OpImageFetch %24 %41 %47
%64 = OpCompositeExtract %5 %63 0
%65 = OpCompositeExtract %5 %63 1
%66 = OpCompositeExtract %5 %63 2
%67 = OpCompositeExtract %5 %63 3
%68 = OpFAdd %5 %59 %64
%69 = OpFAdd %5 %60 %65
%70 = OpFAdd %5 %61 %66
%71 = OpFAdd %5 %62 %67
%72 = OpImageFetch %24 %46 %48 Lod %48
%73 = OpCompositeExtract %5 %72 0
%74 = OpCompositeExtract %5 %72 1
%75 = OpCompositeExtract %5 %72 2
%76 = OpCompositeExtract %5 %72 3
%77 = OpFAdd %5 %68 %73
%78 = OpFAdd %5 %69 %74
%79 = OpFAdd %5 %70 %75
%80 = OpFAdd %5 %71 %76
%82 = OpAccessChain %81 %26 %29
OpStore %82 %77
%83 = OpAccessChain %81 %26 %32
OpStore %83 %78
%84 = OpAccessChain %81 %26 %43
OpStore %84 %79
%85 = OpAccessChain %81 %26 %86
OpStore %85 %80
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/sampler-binding.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 1) uniform texture3D _9[];
layout(set = 0, binding = 4) uniform sampler _13[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) in vec3 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _46 = texture(sampler3D(_9[INDEX], _13[INDEX]), vec3(UV.x, UV.y, UV.z));
    SV_Target.x = _46.x;
    SV_Target.y = _46.y;
    SV_Target.z = _46.z;
    SV_Target.w = _46.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19 %22
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "INDEX"
OpName %19 "UV"
OpName %22 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 4
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Location 1
OpDecorate %22 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 3D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeSampler
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeInt 32 0
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypeVector %5 4
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%23 = OpTypePointer Input %5
%25 = OpConstant %14 0
%28 = OpConstant %14 1
%31 = OpConstant %14 2
%35 = OpTypePointer UniformConstant %6
%39 = OpConstant %14 4
%40 = OpTypePointer UniformConstant %10
%43 = OpTypeSampledImage %6
%45 = OpConstant %5 0
%52 = OpTypePointer Output %5
%57 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%24 = OpAccessChain %23 %19 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %23 %19 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %23 %19 %31
%32 = OpLoad %5 %30
%33 = OpLoad %14 %16
%34 = OpIAdd %14 %33 %28
%36 = OpAccessChain %35 %9 %33
%37 = OpLoad %6 %36
%38 = OpIAdd %14 %33 %39
%41 = OpAccessChain %40 %13 %33
%42 = OpLoad %10 %41
%44 = OpSampledImage %43 %37 %42
%47 = OpCompositeConstruct %17 %26 %29 %32
%46 = OpImageSampleImplicitLod %20 %44 %47 None
%48 = OpCompositeExtract %5 %46 0
%49 = OpCompositeExtract %5 %46 1
%50 = OpCompositeExtract %5 %46 2
%51 = OpCompositeExtract %5 %46 3
%53 = OpAccessChain %52 %22 %25
OpStore %53 %48
%54 = OpAccessChain %52 %22 %28
OpStore %54 %49
%55 = OpAccessChain %52 %22 %31
OpStore %55 %50
%56 = OpAccessChain %52 %22 %57
OpStore %56 %51
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/sampler-heap.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform texture3D _9[];
layout(set = 0, binding = 0) uniform sampler _13[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) in vec3 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _45 = texture(sampler3D(_9[INDEX + 1u], _13[INDEX + 2u]), vec3(UV.x, UV.y, UV.z));
    SV_Target.x = _45.x;
    SV_Target.y = _45.y;
    SV_Target.z = _45.z;
    SV_Target.w = _45.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19 %22
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "INDEX"
OpName %19 "UV"
OpName %22 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Location 1
OpDecorate %22 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 3D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeSampler
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeInt 32 0
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypeVector %5 4
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%23 = OpTypePointer Input %5
%25 = OpConstant %14 0
%28 = OpConstant %14 1
%31 = OpConstant %14 2
%35 = OpTypePointer UniformConstant %6
%39 = OpTypePointer UniformConstant %10
%42 = OpTypeSampledImage %6
%44 = OpConstant %5 0
%51 = OpTypePointer Output %5
%56 = OpConstant %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %57
%57 = OpLabel
%24 = OpAccessChain %23 %19 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %23 %19 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %23 %19 %31
%32 = OpLoad %5 %30
%33 = OpLoad %14 %16
%34 = OpIAdd %14 %33 %28
%36 = OpAccessChain %35 %9 %34
%37 = OpLoad %6 %36
%38 = OpIAdd %14 %33 %31
%40 = OpAccessChain %39 %13 %38
%41 = OpLoad %10 %40
%43 = OpSampledImage %42 %37 %41
%46 = OpCompositeConstruct %17 %26 %29 %32
%45 = OpImageSampleImplicitLod %20 %43 %46 None
%47 = OpCompositeExtract %5 %45 0
%48 = OpCompositeExtract %5 %45 1
%49 = OpCompositeExtract %5 %45 2
%50 = OpCompositeExtract %5 %45 3
%52 = OpAccessChain %51 %22 %25
OpStore %52 %47
%53 = OpAccessChain %51 %22 %28
OpStore %53 %48
%54 = OpAccessChain %51 %22 %31
OpStore %54 %49
%55 = OpAccessChain %51 %22 %56
OpStore %55 %50
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _14_17
{
    u16vec2 _m0[];
} _17[];

layout(set = 0, binding = 0, std430) buffer _19_22
{
    uint _m0[];
} _22[];

layout(set = 0, binding = 0, std430) buffer _24_27
{
    u16vec2 _m0[];
} _27[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _30_33
{
    u16vec4 _m0[];
} _33[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _36_39
{
    uvec4 _m0[];
} _39[];

layout(set = 0, binding = 0, std430) buffer _41_44
{
    u16vec4 _m0[];
} _44[];

layout(set = 0, binding = 0, std430) buffer _46_49
{
    uvec4 _m0[];
} _49[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _91 = INDEX + 8u;
    uint _97 = INDEX + 9u;
    f16vec2 _112 = uint16BitsToFloat16(_17[INDEX + 1u]._m0[uint(UV.y)]);
    uint _118 = _22[INDEX + 4u]._m0[uint(UV.z)];
    u16vec2 _122 = _27[INDEX + 5u]._m0[uint(UV.w)];
    f16vec2 _123 = uint16BitsToFloat16(_122);
    f16vec4 _133 = uint16BitsToFloat16(_33[_91]._m0[1u]);
    vec4 _149 = uintBitsToFloat(_39[_91]._m0[1u]);
    u16vec4 _159 = _44[_97]._m0[1u];
    f16vec4 _160 = uint16BitsToFloat16(_159);
    float _169 = (((float(_123.y) + uintBitsToFloat(_10[INDEX]._m0[uint(UV.x)])) + float(_133.x)) + _149.x) + float(_160.x);
    float _170 = ((float(_133.y) + float(_112.x)) + _149.y) + float(_160.y);
    float _171 = (((float(_112.y) + uintBitsToFloat(_118)) + float(_133.z)) + _149.z) + float(_160.z);
    float _172 = ((float(_133.w) + float(_123.x)) + _149.w) + float(_160.w);
    _49[_97]._m0[1u] = uvec4(floatBitsToUint(_169), floatBitsToUint(_170), floatBitsToUint(_171), floatBitsToUint(_172));
    SV_Target.x = _169;
    SV_Target.y = _170;
    SV_Target.z = _171;
    SV_Target.w = _172;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 186
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %51 %55 %59
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %14 "SSBO"
OpName %19 "SSBO"
OpName %24 "SSBO"
OpName %30 "SSBO"
OpName %36 "SSBO"
OpName %41 "SSBO"
OpName %46 "SSBO"
OpName %51 "INDEX"
OpName %55 "UV"
OpName %59 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %13 ArrayStride 4
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 NonWritable
OpDecorate %17 Restrict
OpDecorate %18 ArrayStride 4
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %23 ArrayStride 4
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %29 ArrayStride 8
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %33 NonWritable
OpDecorate %33 Restrict
OpDecorate %35 ArrayStride 16
OpMemberDecorate %36 0 Offset 0
OpDecorate %36 Block
OpDecorate %39 DescriptorSet 0
OpDecorate %39 Binding 0
OpDecorate %39 NonWritable
OpDecorate %39 Restrict
OpDecorate %40 ArrayStride 8
OpMemberDecorate %41 0 Offset 0
OpDecorate %41 Block
OpDecorate %44 DescriptorSet 0
OpDecorate %44 Binding 0
OpDecorate %44 Aliased
OpDecorate %45 ArrayStride 16
OpMemberDecorate %46 0 Offset 0
OpDecorate %46 Block
OpDecorate %49 DescriptorSet 0
OpDecorate %49 Binding 0
OpDecorate %49 Aliased
OpDecorate %51 Flat
OpDecorate %51 Location 0
OpDecorate %55 Flat
OpDecorate %55 Location 1
OpDecorate %59 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypeRuntimeArray %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeInt 16 0
%12 = OpTypeVector %11 2
%13 = OpTypeRuntimeArray %12
%14 = OpTypeStruct %13
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypeRuntimeArray %5
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %12
%24 = OpTypeStruct %23
%25 = OpTypeRuntimeArray %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeVector %11 4
%29 = OpTypeRuntimeArray %28
%30 = OpTypeStruct %29
%31 = OpTypeRuntimeArray %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeVector %5 4
%35 = OpTypeRuntimeArray %34
%36 = OpTypeStruct %35
%37 = OpTypeRuntimeArray %36
%38 = OpTypePointer StorageBuffer %37
%39 = OpVariable %38 StorageBuffer
%40 = OpTypeRuntimeArray %28
%41 = OpTypeStruct %40
%42 = OpTypeRuntimeArray %41
%43 = OpTypePointer StorageBuffer %42
%44 = OpVariable %43 StorageBuffer
%45 = OpTypeRuntimeArray %34
%46 = OpTypeStruct %45
%47 = OpTypeRuntimeArray %46
%48 = OpTypePointer StorageBuffer %47
%49 = OpVariable %48 StorageBuffer
%50 = OpTypePointer Input %5
%51 = OpVariable %50 Input
%52 = OpTypeInt 32 1
%53 = OpTypeVector %52 4
%54 = OpTypePointer Input %53
%55 = OpVariable %54 Input
%56 = OpTypeFloat 32
%57 = OpTypeVector %56 4
%58 = OpTypePointer Output %57
%59 = OpVariable %58 Output
%60 = OpTypePointer Input %52
%62 = OpConstant %5 0
%66 = OpConstant %5 1
%70 = OpConstant %5 2
%74 = OpConstant %5 3
%78 = OpTypePointer StorageBuffer %7
%81 = OpTypePointer StorageBuffer %14
%84 = OpConstant %5 4
%85 = OpTypePointer StorageBuffer %19
%88 = OpConstant %5 5
%89 = OpTypePointer StorageBuffer %24
%92 = OpConstant %5 8
%93 = OpTypePointer StorageBuffer %30
%95 = OpTypePointer StorageBuffer %36
%98 = OpConstant %5 9
%99 = OpTypePointer StorageBuffer %41
%101 = OpTypePointer StorageBuffer %46
%103 = OpTypePointer StorageBuffer %5
%107 = OpTypePointer StorageBuffer %12
%110 = OpTypeFloat 16
%111 = OpTypeVector %110 2
%129 = OpTypePointer StorageBuffer %28
%132 = OpTypeVector %110 4
%146 = OpTypePointer StorageBuffer %34
%179 = OpTypePointer Output %56
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %184
%184 = OpLabel
%61 = OpAccessChain %60 %55 %62
%63 = OpLoad %52 %61
%64 = OpBitcast %5 %63
%65 = OpAccessChain %60 %55 %66
%67 = OpLoad %52 %65
%68 = OpBitcast %5 %67
%69 = OpAccessChain %60 %55 %70
%71 = OpLoad %52 %69
%72 = OpBitcast %5 %71
%73 = OpAccessChain %60 %55 %74
%75 = OpLoad %52 %73
%76 = OpBitcast %5 %75
%77 = OpLoad %5 %51
%79 = OpAccessChain %78 %10 %77
%80 = OpIAdd %5 %77 %66
%82 = OpAccessChain %81 %17 %80
%83 = OpIAdd %5 %77 %84
%86 = OpAccessChain %85 %22 %83
%87 = OpIAdd %5 %77 %88
%90 = OpAccessChain %89 %27 %87
%91 = OpIAdd %5 %77 %92
%94 = OpAccessChain %93 %33 %91
%96 = OpAccessChain %95 %39 %91
%97 = OpIAdd %5 %77 %98
%100 = OpAccessChain %99 %44 %97
%102 = OpAccessChain %101 %49 %97
%104 = OpAccessChain %103 %79 %62 %64
%105 = OpLoad %5 %104
%106 = OpBitcast %56 %105
%108 = OpAccessChain %107 %82 %62 %68
%109 = OpLoad %12 %108
%112 = OpBitcast %111 %109
%113 = OpCompositeExtract %110 %112 0
%114 = OpCompositeExtract %110 %112 1
%115 = OpFConvert %56 %113
%116 = OpFConvert %56 %114
%117 = OpAccessChain %103 %86 %62 %72
%118 = OpLoad %5 %117
%119 = OpBitcast %56 %118
%120 = OpFAdd %56 %116 %119
%121 = OpAccessChain %107 %90 %62 %76
%122 = OpLoad %12 %121
%123 = OpBitcast %111 %122
%124 = OpCompositeExtract %110 %123 0
%125 = OpCompositeExtract %110 %123 1
%126 = OpFConvert %56 %124
%127 = OpFConvert %56 %125
%128 = OpFAdd %56 %127 %106
%130 = OpAccessChain %129 %94 %62 %66
%131 = OpLoad %28 %130
%133 = OpBitcast %132 %131
%134 = OpCompositeExtract %110 %133 0
%135 = OpCompositeExtract %110 %133 1
%136 = OpCompositeExtract %110 %133 2
%137 = OpCompositeExtract %110 %133 3
%138 = OpFConvert %56 %134
%139 = OpFConvert %56 %135
%140 = OpFConvert %56 %136
%141 = OpFConvert %56 %137
%142 = OpFAdd %56 %128 %138
%143 = OpFAdd %56 %139 %115
%144 = OpFAdd %56 %120 %140
%145 = OpFAdd %56 %141 %126
%147 = OpAccessChain %146 %96 %62 %66
%148 = OpLoad %34 %147
%149 = OpBitcast %57 %148
%150 = OpCompositeExtract %56 %149 0
%151 = OpCompositeExtract %56 %149 1
%152 = OpCompositeExtract %56 %149 2
%153 = OpCompositeExtract %56 %149 3
%154 = OpFAdd %56 %142 %150
%155 = OpFAdd %56 %143 %151
%156 = OpFAdd %56 %144 %152
%157 = OpFAdd %56 %145 %153
%158 = OpAccessChain %129 %100 %62 %66
%159 = OpLoad %28 %158
%160 = OpBitcast %132 %159
%161 = OpCompositeExtract %110 %160 0
%162 = OpCompositeExtract %110 %160 1
%163 = OpCompositeExtract %110 %160 2
%164 = OpCompositeExtract %110 %160 3
%165 = OpFConvert %56 %161
%166 = OpFConvert %56 %162
%167 = OpFConvert %56 %163
%168 = OpFConvert %56 %164
%169 = OpFAdd %56 %154 %165
%170 = OpFAdd %56 %155 %166
%171 = OpFAdd %56 %156 %167
%172 = OpFAdd %56 %157 %168
%173 = OpBitcast %5 %169
%174 = OpBitcast %5 %170
%175 = OpBitcast %5 %171
%176 = OpBitcast %5 %172
%177 = OpCompositeConstruct %34 %173 %174 %175 %176
%178 = OpAccessChain %146 %102 %62 %66
OpStore %178 %177
%180 = OpAccessChain %179 %59 %62
OpStore %180 %169
%181 = OpAccessChain %179 %59 %66
OpStore %181 %170
%182 = OpAccessChain %179 %59 %70
OpStore %182 %171
%183 = OpAccessChain %179 %59 %74
OpStore %183 %172
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _15[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _19_22
{
    u16vec2 _m0[];
} _22[];

layout(set = 0, binding = 0, std430) buffer _24_27
{
    uint _m0[];
} _27[];

layout(set = 0, binding = 0, std430) buffer _29_32
{
    u16vec2 _m0[];
} _32[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _35_38
{
    u16vec4 _m0[];
} _38[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _41_44
{
    uvec4 _m0[];
} _44[];

layout(set = 0, binding = 0, std430) buffer _46_49
{
    u16vec4 _m0[];
} _49[];

layout(set = 0, binding = 0, std430) buffer _51_54
{
    uvec4 _m0[];
} _54[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _69 = uint(UV.x);
    uint _73 = uint(UV.y);
    uint _77 = uint(UV.z);
    uint _81 = uint(UV.w);
    uvec2 _89 = _10._m0[subgroupBroadcastFirst(INDEX)] >> uvec2(2u);
    uint _91 = INDEX + 1u;
    uvec2 _97 = _10._m0[subgroupBroadcastFirst(_91)] >> uvec2(2u);
    uint _98 = INDEX + 4u;
    uvec2 _105 = _10._m0[subgroupBroadcastFirst(_98)] >> uvec2(2u);
    uint _106 = INDEX + 5u;
    uvec2 _113 = _10._m0[subgroupBroadcastFirst(_106)] >> uvec2(2u);
    uint _114 = INDEX + 8u;
    uint _120 = subgroupBroadcastFirst(_114);
    uint _123 = INDEX + 9u;
    uint _129 = subgroupBroadcastFirst(_123);
    f16vec2 _154 = uint16BitsToFloat16(_22[_91]._m0[(_73 < _97.y) ? (_73 + _97.x) : 1073741823u]);
    uint _165 = _27[_98]._m0[(_77 < _105.y) ? (_77 + _105.x) : 1073741820u];
    u16vec2 _174 = _32[_106]._m0[(_81 < _113.y) ? (_81 + _113.x) : 1073741823u];
    f16vec2 _175 = uint16BitsToFloat16(_174);
    uvec2 _181 = _10._m0[_120] >> uvec2(3u);
    f16vec4 _193 = uint16BitsToFloat16(_38[_114]._m0[(1u < _181.y) ? (1u + _181.x) : 536870911u]);
    uvec2 _206 = _10._m0[_120] >> uvec2(4u);
    vec4 _217 = uintBitsToFloat(_44[_114]._m0[(1u < _206.y) ? (1u + _206.x) : 268435455u]);
    uvec2 _226 = _10._m0[_129] >> uvec2(3u);
    u16vec4 _233 = _49[_123]._m0[(1u < _226.y) ? (1u + _226.x) : 536870911u];
    f16vec4 _234 = uint16BitsToFloat16(_233);
    float _243 = (((float(_175.y) + uintBitsToFloat(_15[INDEX]._m0[(_69 < _89.y) ? (_69 + _89.x) : 1073741820u])) + float(_193.x)) + _217.x) + float(_234.x);
    float _244 = ((float(_193.y) + float(_154.x)) + _217.y) + float(_234.y);
    float _245 = (((float(_154.y) + uintBitsToFloat(_165)) + float(_193.z)) + _217.z) + float(_234.z);
    float _246 = ((float(_193.w) + float(_175.x)) + _217.w) + float(_234.w);
    uvec2 _247 = _10._m0[_129] >> uvec2(4u);
    _54[_123]._m0[(1u < _247.y) ? (1u + _247.x) : 268435455u] = uvec4(floatBitsToUint(_243), floatBitsToUint(_244), floatBitsToUint(_245), floatBitsToUint(_246));
    SV_Target.x = _243;
    SV_Target.y = _244;
    SV_Target.z = _245;
    SV_Target.w = _246;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 266
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %56 %60 %64
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "SSBO_Offsets"
OpName %12 "SSBO"
OpName %19 "SSBO"
OpName %24 "SSBO"
OpName %29 "SSBO"
OpName %35 "SSBO"
OpName %41 "SSBO"
OpName %46 "SSBO"
OpName %51 "SSBO"
OpName %56 "INDEX"
OpName %60 "UV"
OpName %64 "SV_Target"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 15
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %18 ArrayStride 4
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %22 NonWritable
OpDecorate %22 Restrict
OpDecorate %23 ArrayStride 4
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 0
OpDecorate %28 ArrayStride 4
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %32 DescriptorSet 0
OpDecorate %32 Binding 0
OpDecorate %34 ArrayStride 8
OpMemberDecorate %35 0 Offset 0
OpDecorate %35 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 0
OpDecorate %38 NonWritable
OpDecorate %38 Restrict
OpDecorate %40 ArrayStride 16
OpMemberDecorate %41 0 Offset 0
OpDecorate %41 Block
OpDecorate %44 DescriptorSet 0
OpDecorate %44 Binding 0
OpDecorate %44 NonWritable
OpDecorate %44 Restrict
OpDecorate %45 ArrayStride 8
OpMemberDecorate %46 0 Offset 0
OpDecorate %46 Block
OpDecorate %49 DescriptorSet 0
OpDecorate %49 Binding 0
OpDecorate %49 Aliased
OpDecorate %50 ArrayStride 16
OpMemberDecorate %51 0 Offset 0
OpDecorate %51 Block
OpDecorate %54 DescriptorSet 0
OpDecorate %54 Binding 0
OpDecorate %54 Aliased
OpDecorate %56 Flat
OpDecorate %56 Location 0
OpDecorate %60 Flat
OpDecorate %60 Location 1
OpDecorate %64 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %5
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeInt 16 0
%17 = OpTypeVector %16 2
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %5
%24 = OpTypeStruct %23
%25 = OpTypeRuntimeArray %24
%26 = OpTypePointer StorageBuffer %25
%27 = OpVariable %26 StorageBuffer
%28 = OpTypeRuntimeArray %17
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer StorageBuffer %30
%32 = OpVariable %31 StorageBuffer
%33 = OpTypeVector %16 4
%34 = OpTypeRuntimeArray %33
%35 = OpTypeStruct %34
%36 = OpTypeRuntimeArray %35
%37 = OpTypePointer StorageBuffer %36
%38 = OpVariable %37 StorageBuffer
%39 = OpTypeVector %5 4
%40 = OpTypeRuntimeArray %39
%41 = OpTypeStruct %40
%42 = OpTypeRuntimeArray %41
%43 = OpTypePointer StorageBuffer %42
%44 = OpVariable %43 StorageBuffer
%45 = OpTypeRuntimeArray %33
%46 = OpTypeStruct %45
%47 = OpTypeRuntimeArray %46
%48 = OpTypePointer StorageBuffer %47
%49 = OpVariable %48 StorageBuffer
%50 = OpTypeRuntimeArray %39
%51 = OpTypeStruct %50
%52 = OpTypeRuntimeArray %51
%53 = OpTypePointer StorageBuffer %52
%54 = OpVariable %53 StorageBuffer
%55 = OpTypePointer Input %5
%56 = OpVariable %55 Input
%57 = OpTypeInt 32 1
%58 = OpTypeVector %57 4
%59 = OpTypePointer Input %58
%60 = OpVariable %59 Input
%61 = OpTypeFloat 32
%62 = OpTypeVector %61 4
%63 = OpTypePointer Output %62
%64 = OpVariable %63 Output
%65 = OpTypePointer Input %57
%67 = OpConstant %5 0
%71 = OpConstant %5 1
%75 = OpConstant %5 2
%79 = OpConstant %5 3
%83 = OpTypePointer StorageBuffer %12
%86 = OpTypePointer StorageBuffer %6
%90 = OpConstantComposite %6 %75 %75
%92 = OpTypePointer StorageBuffer %19
%99 = OpConstant %5 4
%100 = OpTypePointer StorageBuffer %24
%107 = OpConstant %5 5
%108 = OpTypePointer StorageBuffer %29
%115 = OpConstant %5 8
%116 = OpTypePointer StorageBuffer %35
%118 = OpTypePointer StorageBuffer %41
%124 = OpConstant %5 9
%125 = OpTypePointer StorageBuffer %46
%127 = OpTypePointer StorageBuffer %51
%135 = OpTypeBool
%138 = OpConstant %5 1073741820
%139 = OpTypePointer StorageBuffer %5
%148 = OpConstant %5 1073741823
%149 = OpTypePointer StorageBuffer %17
%152 = OpTypeFloat 16
%153 = OpTypeVector %152 2
%182 = OpConstantComposite %6 %79 %79
%188 = OpConstant %5 536870911
%189 = OpTypePointer StorageBuffer %33
%192 = OpTypeVector %152 4
%207 = OpConstantComposite %6 %99 %99
%213 = OpConstant %5 268435455
%214 = OpTypePointer StorageBuffer %39
%259 = OpTypePointer Output %61
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %264
%264 = OpLabel
%66 = OpAccessChain %65 %60 %67
%68 = OpLoad %57 %66
%69 = OpBitcast %5 %68
%70 = OpAccessChain %65 %60 %71
%72 = OpLoad %57 %70
%73 = OpBitcast %5 %72
%74 = OpAccessChain %65 %60 %75
%76 = OpLoad %57 %74
%77 = OpBitcast %5 %76
%78 = OpAccessChain %65 %60 %79
%80 = OpLoad %57 %78
%81 = OpBitcast %5 %80
%82 = OpLoad %5 %56
%84 = OpAccessChain %83 %15 %82
%85 = OpGroupNonUniformBroadcastFirst %5 %79 %82
%87 = OpAccessChain %86 %10 %67 %85
%88 = OpLoad %6 %87
%89 = OpShiftRightLogical %6 %88 %90
%91 = OpIAdd %5 %82 %71
%93 = OpAccessChain %92 %22 %91
%94 = OpGroupNonUniformBroadcastFirst %5 %79 %91
%95 = OpAccessChain %86 %10 %67 %94
%96 = OpLoad %6 %95
%97 = OpShiftRightLogical %6 %96 %90
%98 = OpIAdd %5 %82 %99
%101 = OpAccessChain %100 %27 %98
%102 = OpGroupNonUniformBroadcastFirst %5 %79 %98
%103 = OpAccessChain %86 %10 %67 %102
%104 = OpLoad %6 %103
%105 = OpShiftRightLogical %6 %104 %90
%106 = OpIAdd %5 %82 %107
%109 = OpAccessChain %108 %32 %106
%110 = OpGroupNonUniformBroadcastFirst %5 %79 %106
%111 = OpAccessChain %86 %10 %67 %110
%112 = OpLoad %6 %111
%113 = OpShiftRightLogical %6 %112 %90
%114 = OpIAdd %5 %82 %115
%117 = OpAccessChain %116 %38 %114
%119 = OpAccessChain %118 %44 %114
%120 = OpGroupNonUniformBroadcastFirst %5 %79 %114
%121 = OpAccessChain %86 %10 %67 %120
%122 = OpLoad %6 %121
%123 = OpIAdd %5 %82 %124
%126 = OpAccessChain %125 %49 %123
%128 = OpAccessChain %127 %54 %123
%129 = OpGroupNonUniformBroadcastFirst %5 %79 %123
%130 = OpAccessChain %86 %10 %67 %129
%131 = OpLoad %6 %130
%132 = OpCompositeExtract %5 %89 0
%133 = OpCompositeExtract %5 %89 1
%134 = OpIAdd %5 %69 %132
%136 = OpULessThan %135 %69 %133
%137 = OpSelect %5 %136 %134 %138
%140 = OpAccessChain %139 %84 %67 %137
%141 = OpLoad %5 %140
%142 = OpBitcast %61 %141
%143 = OpCompositeExtract %5 %97 0
%144 = OpCompositeExtract %5 %97 1
%145 = OpIAdd %5 %73 %143
%146 = OpULessThan %135 %73 %144
%147 = OpSelect %5 %146 %145 %148
%150 = OpAccessChain %149 %93 %67 %147
%151 = OpLoad %17 %150
%154 = OpBitcast %153 %151
%155 = OpCompositeExtract %152 %154 0
%156 = OpCompositeExtract %152 %154 1
%157 = OpFConvert %61 %155
%158 = OpFConvert %61 %156
%159 = OpCompositeExtract %5 %105 0
%160 = OpCompositeExtract %5 %105 1
%161 = OpIAdd %5 %77 %159
%162 = OpULessThan %135 %77 %160
%163 = OpSelect %5 %162 %161 %138
%164 = OpAccessChain %139 %101 %67 %163
%165 = OpLoad %5 %164
%166 = OpBitcast %61 %165
%167 = OpFAdd %61 %158 %166
%168 = OpCompositeExtract %5 %113 0
%169 = OpCompositeExtract %5 %113 1
%170 = OpIAdd %5 %81 %168
%171 = OpULessThan %135 %81 %169
%172 = OpSelect %5 %171 %170 %148
%173 = OpAccessChain %149 %109 %67 %172
%174 = OpLoad %17 %173
%175 = OpBitcast %153 %174
%176 = OpCompositeExtract %152 %175 0
%177 = OpCompositeExtract %152 %175 1
%178 = OpFConvert %61 %176
%179 = OpFConvert %61 %177
%180 = OpFAdd %61 %179 %142
%181 = OpShiftRightLogical %6 %122 %182
%183 = OpCompositeExtract %5 %181 0
%184 = OpCompositeExtract %5 %181 1
%185 = OpIAdd %5 %71 %183
%186 = OpULessThan %135 %71 %184
%187 = OpSelect %5 %186 %185 %188
%190 = OpAccessChain %189 %117 %67 %187
%191 = OpLoad %33 %190
%193 = OpBitcast %192 %191
%194 = OpCompositeExtract %152 %193 0
%195 = OpCompositeExtract %152 %193 1
%196 = OpCompositeExtract %152 %193 2
%197 = OpCompositeExtract %152 %193 3
%198 = OpFConvert %61 %194
%199 = OpFConvert %61 %195
%200 = OpFConvert %61 %196
%201 = OpFConvert %61 %197
%202 = OpFAdd %61 %180 %198
%203 = OpFAdd %61 %199 %157
%204 = OpFAdd %61 %167 %200
%205 = OpFAdd %61 %201 %178
%206 = OpShiftRightLogical %6 %122 %207
%208 = OpCompositeExtract %5 %206 0
%209 = OpCompositeExtract %5 %206 1
%210 = OpIAdd %5 %71 %208
%211 = OpULessThan %135 %71 %209
%212 = OpSelect %5 %211 %210 %213
%215 = OpAccessChain %214 %119 %67 %212
%216 = OpLoad %39 %215
%217 = OpBitcast %62 %216
%218 = OpCompositeExtract %61 %217 0
%219 = OpCompositeExtract %61 %217 1
%220 = OpCompositeExtract %61 %217 2
%221 = OpCompositeExtract %61 %217 3
%222 = OpFAdd %61 %202 %218
%223 = OpFAdd %61 %203 %219
%224 = OpFAdd %61 %204 %220
%225 = OpFAdd %61 %205 %221
%226 = OpShiftRightLogical %6 %131 %182
%227 = OpCompositeExtract %5 %226 0
%228 = OpCompositeExtract %5 %226 1
%229 = OpIAdd %5 %71 %227
%230 = OpULessThan %135 %71 %228
%231 = OpSelect %5 %230 %229 %188
%232 = OpAccessChain %189 %126 %67 %231
%233 = OpLoad %33 %232
%234 = OpBitcast %192 %233
%235 = OpCompositeExtract %152 %234 0
%236 = OpCompositeExtract %152 %234 1
%237 = OpCompositeExtract %152 %234 2
%238 = OpCompositeExtract %152 %234 3
%239 = OpFConvert %61 %235
%240 = OpFConvert %61 %236
%241 = OpFConvert %61 %237
%242 = OpFConvert %61 %238
%243 = OpFAdd %61 %222 %239
%244 = OpFAdd %61 %223 %240
%245 = OpFAdd %61 %224 %241
%246 = OpFAdd %61 %225 %242
%247 = OpShiftRightLogical %6 %131 %207
%248 = OpCompositeExtract %5 %247 0
%249 = OpCompositeExtract %5 %247 1
%250 = OpIAdd %5 %71 %248
%251 = OpULessThan %135 %71 %249
%252 = OpSelect %5 %251 %250 %213
%253 = OpBitcast %5 %243
%254 = OpBitcast %5 %244
%255 = OpBitcast %5 %245
%256 = OpBitcast %5 %246
%257 = OpCompositeConstruct %39 %253 %254 %255 %256
%258 = OpAccessChain %214 %128 %67 %252
OpStore %258 %257
%260 = OpAccessChain %259 %64 %67
OpStore %260 %243
%261 = OpAccessChain %259 %64 %71
OpStore %261 %244
%262 = OpAccessChain %259 %64 %75
OpStore %262 %245
%263 = OpAccessChain %259 %64 %79
OpStore %263 %246
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/structured-buffer-heap.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform usamplerBuffer _9[];
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _13[];
layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _16[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _31 = uint(UV.x);
    uint _35 = uint(UV.y);
    uint _39 = uint(UV.z);
    uint _43 = uint(UV.w);
    uint _48 = INDEX + 1u;
    uint _51 = INDEX + 2u;
    uint _54 = INDEX + 3u;
    uint _62 = INDEX + 5u;
    uint _66 = INDEX + 6u;
    uint _70 = INDEX + 7u;
    uint _78 = INDEX + 9u;
    uint _82 = INDEX + 10u;
    uint _86 = INDEX + 11u;
    uint _94 = _35 * 2u;
    vec2 _103 = uintBitsToFloat(uvec2(texelFetch(_9[_48], int(_94)).x, texelFetch(_9[_48], int(_94 + 1u)).x));
    uint _107 = _39 * 3u;
    vec3 _119 = uintBitsToFloat(uvec3(texelFetch(_9[_51], int(_107)).x, texelFetch(_9[_51], int(_107 + 1u)).x, texelFetch(_9[_51], int(_107 + 2u)).x));
    uint _125 = _43 * 4u;
    vec4 _138 = uintBitsToFloat(uvec4(texelFetch(_9[_54], int(_125)).x, texelFetch(_9[_54], int(_125 + 1u)).x, texelFetch(_9[_54], int(_125 + 2u)).x, texelFetch(_9[_54], int(_125 + 3u)).x));
    uvec4 _146 = imageLoad(_13[INDEX + 4u], int(_31));
    uint _150 = _35 * 2u;
    vec2 _157 = uintBitsToFloat(uvec2(imageLoad(_13[_62], int(_150)).x, imageLoad(_13[_62], int(_150 + 1u)).x));
    uint _162 = _39 * 3u;
    vec3 _172 = uintBitsToFloat(uvec3(imageLoad(_13[_66], int(_162)).x, imageLoad(_13[_66], int(_162 + 1u)).x, imageLoad(_13[_66], int(_162 + 2u)).x));
    uint _179 = _43 * 4u;
    uvec4 _180 = imageLoad(_13[_70], int(_179));
    uvec4 _182 = imageLoad(_13[_70], int(_179 + 1u));
    uvec4 _185 = imageLoad(_13[_70], int(_179 + 2u));
    uvec4 _188 = imageLoad(_13[_70], int(_179 + 3u));
    vec4 _192 = uintBitsToFloat(uvec4(_180.x, _182.x, _185.x, _188.x));
    uvec4 _201 = imageLoad(_16[INDEX + 8u], int(_31));
    uint _205 = _35 * 2u;
    uvec4 _206 = imageLoad(_16[_78], int(_205));
    uvec4 _208 = imageLoad(_16[_78], int(_205 + 1u));
    vec2 _212 = uintBitsToFloat(uvec2(_206.x, _208.x));
    uint _217 = _39 * 3u;
    uvec4 _218 = imageLoad(_16[_82], int(_217));
    uvec4 _220 = imageLoad(_16[_82], int(_217 + 1u));
    uvec4 _223 = imageLoad(_16[_82], int(_217 + 2u));
    vec3 _227 = uintBitsToFloat(uvec3(_218.x, _220.x, _223.x));
    uint _234 = _43 * 4u;
    uvec4 _235 = imageLoad(_16[_86], int(_234));
    uvec4 _237 = imageLoad(_16[_86], int(_234 + 1u));
    uvec4 _240 = imageLoad(_16[_86], int(_234 + 2u));
    uvec4 _243 = imageLoad(_16[_86], int(_234 + 3u));
    vec4 _247 = uintBitsToFloat(uvec4(_235.x, _237.x, _240.x, _243.x));
    uint _256 = _31 * 2u;
    imageStore(_13[_62], int(_256), uvec4(floatBitsToUint(20.0)));
    imageStore(_13[_62], int(_256 + 1u), uvec4(floatBitsToUint(20.0)));
    uint _263 = _35 * 3u;
    imageStore(_16[_82], int(_263), uvec4(floatBitsToUint(30.0)));
    imageStore(_16[_82], int(_263 + 1u), uvec4(floatBitsToUint(30.0)));
    imageStore(_16[_82], int(_263 + 2u), uvec4(floatBitsToUint(30.0)));
    SV_Target.x = ((((((((((_103.x + uintBitsToFloat(texelFetch(_9[INDEX], int(_31)).x)) + _119.x) + _138.x) + uintBitsToFloat(_146.x)) + _157.x) + _172.x) + _192.x) + uintBitsToFloat(_201.x)) + _212.x) + _227.x) + _247.x;
    SV_Target.y = (((((((_119.y + _103.y) + _138.y) + _157.y) + _172.y) + _192.y) + _212.y) + _227.y) + _247.y;
    SV_Target.z = ((((_138.z + _119.z) + _172.z) + _192.z) + _227.z) + _247.z;
    SV_Target.w = (_192.w + _138.w) + _247.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 280
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %18 %22 %26
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %18 "INDEX"
OpName %22 "UV"
OpName %26 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %16 Coherent
OpDecorate %18 Flat
OpDecorate %18 Location 0
OpDecorate %22 Flat
OpDecorate %22 Location 1
OpDecorate %26 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeRuntimeArray %10
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypeInt 32 1
%20 = OpTypeVector %19 4
%21 = OpTypePointer Input %20
%22 = OpVariable %21 Input
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 4
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%27 = OpTypePointer Input %19
%29 = OpConstant %5 0
%33 = OpConstant %5 1
%37 = OpConstant %5 2
%41 = OpConstant %5 3
%45 = OpTypePointer UniformConstant %6
%58 = OpConstant %5 4
%59 = OpTypePointer UniformConstant %10
%63 = OpConstant %5 5
%67 = OpConstant %5 6
%71 = OpConstant %5 7
%75 = OpConstant %5 8
%79 = OpConstant %5 9
%83 = OpConstant %5 10
%87 = OpConstant %5 11
%90 = OpTypeVector %5 4
%100 = OpTypeVector %5 2
%102 = OpTypeVector %23 2
%116 = OpTypeVector %5 3
%118 = OpTypeVector %23 3
%257 = OpConstant %23 20
%264 = OpConstant %23 30
%273 = OpTypePointer Output %23
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %278
%278 = OpLabel
%28 = OpAccessChain %27 %22 %29
%30 = OpLoad %19 %28
%31 = OpBitcast %5 %30
%32 = OpAccessChain %27 %22 %33
%34 = OpLoad %19 %32
%35 = OpBitcast %5 %34
%36 = OpAccessChain %27 %22 %37
%38 = OpLoad %19 %36
%39 = OpBitcast %5 %38
%40 = OpAccessChain %27 %22 %41
%42 = OpLoad %19 %40
%43 = OpBitcast %5 %42
%44 = OpLoad %5 %18
%46 = OpAccessChain %45 %9 %44
%47 = OpLoad %6 %46
%48 = OpIAdd %5 %44 %33
%49 = OpAccessChain %45 %9 %48
%50 = OpLoad %6 %49
%51 = OpIAdd %5 %44 %37
%52 = OpAccessChain %45 %9 %51
%53 = OpLoad %6 %52
%54 = OpIAdd %5 %44 %41
%55 = OpAccessChain %45 %9 %54
%56 = OpLoad %6 %55
%57 = OpIAdd %5 %44 %58
%60 = OpAccessChain %59 %13 %57
%61 = OpLoad %10 %60
%62 = OpIAdd %5 %44 %63
%64 = OpAccessChain %59 %13 %62
%65 = OpLoad %10 %64
%66 = OpIAdd %5 %44 %67
%68 = OpAccessChain %59 %13 %66
%69 = OpLoad %10 %68
%70 = OpIAdd %5 %44 %71
%72 = OpAccessChain %59 %13 %70
%73 = OpLoad %10 %72
%74 = OpIAdd %5 %44 %75
%76 = OpAccessChain %59 %16 %74
%77 = OpLoad %10 %76
%78 = OpIAdd %5 %44 %79
%80 = OpAccessChain %59 %16 %78
%81 = OpLoad %10 %80
%82 = OpIAdd %5 %44 %83
%84 = OpAccessChain %59 %16 %82
%85 = OpLoad %10 %84
%86 = OpIAdd %5 %44 %87
%88 = OpAccessChain %59 %16 %86
%89 = OpLoad %10 %88
%91 = OpImageFetch %90 %47 %31
%92 = OpCompositeExtract %5 %91 0
%93 = OpBitcast %23 %92
%94 = OpIMul %5 %35 %37
%95 = OpImageFetch %90 %50 %94
%96 = OpCompositeExtract %5 %95 0
%98 = OpIAdd %5 %94 %33
%97 = OpImageFetch %90 %50 %98
%99 = OpCompositeExtract %5 %97 0
%101 = OpCompositeConstruct %100 %96 %99
%103 = OpBitcast %102 %101
%104 = OpCompositeExtract %23 %103 0
%105 = OpCompositeExtract %23 %103 1
%106 = OpFAdd %23 %104 %93
%107 = OpIMul %5 %39 %41
%108 = OpImageFetch %90 %53 %107
%109 = OpCompositeExtract %5 %108 0
%111 = OpIAdd %5 %107 %33
%110 = OpImageFetch %90 %53 %111
%112 = OpCompositeExtract %5 %110 0
%114 = OpIAdd %5 %107 %37
%113 = OpImageFetch %90 %53 %114
%115 = OpCompositeExtract %5 %113 0
%117 = OpCompositeConstruct %116 %109 %112 %115
%119 = OpBitcast %118 %117
%120 = OpCompositeExtract %23 %119 0
%121 = OpCompositeExtract %23 %119 1
%122 = OpCompositeExtract %23 %119 2
%123 = OpFAdd %23 %106 %120
%124 = OpFAdd %23 %121 %105
%125 = OpIMul %5 %43 %58
%126 = OpImageFetch %90 %56 %125
%127 = OpCompositeExtract %5 %126 0
%129 = OpIAdd %5 %125 %33
%128 = OpImageFetch %90 %56 %129
%130 = OpCompositeExtract %5 %128 0
%132 = OpIAdd %5 %125 %37
%131 = OpImageFetch %90 %56 %132
%133 = OpCompositeExtract %5 %131 0
%135 = OpIAdd %5 %125 %41
%134 = OpImageFetch %90 %56 %135
%136 = OpCompositeExtract %5 %134 0
%137 = OpCompositeConstruct %90 %127 %130 %133 %136
%138 = OpBitcast %24 %137
%139 = OpCompositeExtract %23 %138 0
%140 = OpCompositeExtract %23 %138 1
%141 = OpCompositeExtract %23 %138 2
%142 = OpCompositeExtract %23 %138 3
%143 = OpFAdd %23 %123 %139
%144 = OpFAdd %23 %124 %140
%145 = OpFAdd %23 %141 %122
%146 = OpImageRead %90 %61 %31
%147 = OpCompositeExtract %5 %146 0
%148 = OpBitcast %23 %147
%149 = OpFAdd %23 %143 %148
%150 = OpIMul %5 %35 %37
%151 = OpImageRead %90 %65 %150
%152 = OpCompositeExtract %5 %151 0
%154 = OpIAdd %5 %150 %33
%153 = OpImageRead %90 %65 %154
%155 = OpCompositeExtract %5 %153 0
%156 = OpCompositeConstruct %100 %152 %155
%157 = OpBitcast %102 %156
%158 = OpCompositeExtract %23 %157 0
%159 = OpCompositeExtract %23 %157 1
%160 = OpFAdd %23 %149 %158
%161 = OpFAdd %23 %144 %159
%162 = OpIMul %5 %39 %41
%163 = OpImageRead %90 %69 %162
%164 = OpCompositeExtract %5 %163 0
%166 = OpIAdd %5 %162 %33
%165 = OpImageRead %90 %69 %166
%167 = OpCompositeExtract %5 %165 0
%169 = OpIAdd %5 %162 %37
%168 = OpImageRead %90 %69 %169
%170 = OpCompositeExtract %5 %168 0
%171 = OpCompositeConstruct %116 %164 %167 %170
%172 = OpBitcast %118 %171
%173 = OpCompositeExtract %23 %172 0
%174 = OpCompositeExtract %23 %172 1
%175 = OpCompositeExtract %23 %172 2
%176 = OpFAdd %23 %160 %173
%177 = OpFAdd %23 %161 %174
%178 = OpFAdd %23 %145 %175
%179 = OpIMul %5 %43 %58
%180 = OpImageRead %90 %73 %179
%181 = OpCompositeExtract %5 %180 0
%183 = OpIAdd %5 %179 %33
%182 = OpImageRead %90 %73 %183
%184 = OpCompositeExtract %5 %182 0
%186 = OpIAdd %5 %179 %37
%185 = OpImageRead %90 %73 %186
%187 = OpCompositeExtract %5 %185 0
%189 = OpIAdd %5 %179 %41
%188 = OpImageRead %90 %73 %189
%190 = OpCompositeExtract %5 %188 0
%191 = OpCompositeConstruct %90 %181 %184 %187 %190
%192 = OpBitcast %24 %191
%193 = OpCompositeExtract %23 %192 0
%194 = OpCompositeExtract %23 %192 1
%195 = OpCompositeExtract %23 %192 2
%196 = OpCompositeExtract %23 %192 3
%197 = OpFAdd %23 %176 %193
%198 = OpFAdd %23 %177 %194
%199 = OpFAdd %23 %178 %195
%200 = OpFAdd %23 %196 %142
%201 = OpImageRead %90 %77 %31
%202 = OpCompositeExtract %5 %201 0
%203 = OpBitcast %23 %202
%204 = OpFAdd %23 %197 %203
%205 = OpIMul %5 %35 %37
%206 = OpImageRead %90 %81 %205
%207 = OpCompositeExtract %5 %206 0
%209 = OpIAdd %5 %205 %33
%208 = OpImageRead %90 %81 %209
%210 = OpCompositeExtract %5 %208 0
%211 = OpCompositeConstruct %100 %207 %210
%212 = OpBitcast %102 %211
%213 = OpCompositeExtract %23 %212 0
%214 = OpCompositeExtract %23 %212 1
%215 = OpFAdd %23 %204 %213
%216 = OpFAdd %23 %198 %214
%217 = OpIMul %5 %39 %41
%218 = OpImageRead %90 %85 %217
%219 = OpCompositeExtract %5 %218 0
%221 = OpIAdd %5 %217 %33
%220 = OpImageRead %90 %85 %221
%222 = OpCompositeExtract %5 %220 0
%224 = OpIAdd %5 %217 %37
%223 = OpImageRead %90 %85 %224
%225 = OpCompositeExtract %5 %223 0
%226 = OpCompositeConstruct %116 %219 %222 %225
%227 = OpBitcast %118 %226
%228 = OpCompositeExtract %23 %227 0
%229 = OpCompositeExtract %23 %227 1
%230 = OpCompositeExtract %23 %227 2
%231 = OpFAdd %23 %215 %228
%232 = OpFAdd %23 %216 %229
%233 = OpFAdd %23 %199 %230
%234 = OpIMul %5 %43 %58
%235 = OpImageRead %90 %89 %234
%236 = OpCompositeExtract %5 %235 0
%238 = OpIAdd %5 %234 %33
%237 = OpImageRead %90 %89 %238
%239 = OpCompositeExtract %5 %237 0
%241 = OpIAdd %5 %234 %37
%240 = OpImageRead %90 %89 %241
%242 = OpCompositeExtract %5 %240 0
%244 = OpIAdd %5 %234 %41
%243 = OpImageRead %90 %89 %244
%245 = OpCompositeExtract %5 %243 0
%246 = OpCompositeConstruct %90 %236 %239 %242 %245
%247 = OpBitcast %24 %246
%248 = OpCompositeExtract %23 %247 0
%249 = OpCompositeExtract %23 %247 1
%250 = OpCompositeExtract %23 %247 2
%251 = OpCompositeExtract %23 %247 3
%252 = OpFAdd %23 %231 %248
%253 = OpFAdd %23 %232 %249
%254 = OpFAdd %23 %233 %250
%255 = OpFAdd %23 %200 %251
%256 = OpIMul %5 %31 %37
%258 = OpBitcast %5 %257
%259 = OpBitcast %5 %257
%260 = OpCompositeConstruct %90 %258 %258 %258 %258
OpImageWrite %65 %256 %260
%261 = OpCompositeConstruct %90 %259 %259 %259 %259
%262 = OpIAdd %5 %256 %33
OpImageWrite %65 %262 %261
%263 = OpIMul %5 %35 %41
%265 = OpBitcast %5 %264
%266 = OpBitcast %5 %264
%267 = OpBitcast %5 %264
%268 = OpCompositeConstruct %90 %265 %265 %265 %265
OpImageWrite %85 %263 %268
%269 = OpCompositeConstruct %90 %266 %266 %266 %266
%270 = OpIAdd %5 %263 %33
OpImageWrite %85 %270 %269
%271 = OpCompositeConstruct %90 %267 %267 %267 %267
%272 = OpIAdd %5 %263 %37
OpImageWrite %85 %272 %271
%274 = OpAccessChain %273 %26 %29
OpStore %274 %252
%275 = OpAccessChain %273 %26 %33
OpStore %275 %253
%276 = OpAccessChain %273 %26 %37
OpStore %276 %254
%277 = OpAccessChain %273 %26 %41
OpStore %277 %255
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/structured-buffer-heap.ssbo.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _13_16
{
    uvec2 _m0[];
} _16[];

layout(set = 0, binding = 0, scalar) restrict readonly buffer _19_22
{
    uvec3 _m0[];
} _22[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _25_28
{
    uvec4 _m0[];
} _28[];

layout(set = 0, binding = 0, std430) buffer _30_33
{
    uint _m0[];
} _33[];

layout(set = 0, binding = 0, std430) buffer _35_38
{
    uvec2 _m0[];
} _38[];

layout(set = 0, binding = 0, scalar) buffer _40_43
{
    uvec3 _m0[];
} _43[];

layout(set = 0, binding = 0, std430) buffer _45_48
{
    uvec4 _m0[];
} _48[];

layout(set = 0, binding = 0, std430) coherent buffer _50_53
{
    uint _m0[];
} _53[];

layout(set = 0, binding = 0, std430) coherent buffer _55_58
{
    uvec2 _m0[];
} _58[];

layout(set = 0, binding = 0, scalar) coherent buffer _60_63
{
    uvec3 _m0[];
} _63[];

layout(set = 0, binding = 0, std430) coherent buffer _65_68
{
    uvec4 _m0[];
} _68[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _83 = uint(UV.x);
    uint _87 = uint(UV.y);
    uint _91 = uint(UV.z);
    uint _95 = uint(UV.w);
    uint _112 = INDEX + 5u;
    uint _132 = INDEX + 10u;
    vec2 _148 = uintBitsToFloat(_16[INDEX + 1u]._m0[_87]);
    vec3 _156 = uintBitsToFloat(_22[INDEX + 2u]._m0[_91]);
    vec4 _165 = uintBitsToFloat(_28[INDEX + 3u]._m0[_95]);
    uint _174 = _33[INDEX + 4u]._m0[_83];
    vec2 _179 = uintBitsToFloat(_38[_112]._m0[_87]);
    uvec3 _185 = _43[INDEX + 6u]._m0[_91];
    vec3 _186 = uintBitsToFloat(_185);
    uvec4 _194 = _48[INDEX + 7u]._m0[_95];
    vec4 _195 = uintBitsToFloat(_194);
    uint _205 = _53[INDEX + 8u]._m0[_83];
    uvec2 _209 = _58[INDEX + 9u]._m0[_87];
    vec2 _210 = uintBitsToFloat(_209);
    uvec3 _216 = _63[_132]._m0[_91];
    vec3 _217 = uintBitsToFloat(_216);
    uvec4 _225 = _68[INDEX + 11u]._m0[_95];
    vec4 _226 = uintBitsToFloat(_225);
    _38[_112]._m0[_83] = uvec2(floatBitsToUint(20.0), floatBitsToUint(20.0));
    _63[_132]._m0[_87] = uvec3(floatBitsToUint(30.0), floatBitsToUint(30.0), floatBitsToUint(30.0));
    SV_Target.x = ((((((((((_148.x + uintBitsToFloat(_10[INDEX]._m0[_83])) + _156.x) + _165.x) + uintBitsToFloat(_174)) + _179.x) + _186.x) + _195.x) + uintBitsToFloat(_205)) + _210.x) + _217.x) + _226.x;
    SV_Target.y = (((((((_156.y + _148.y) + _165.y) + _179.y) + _186.y) + _195.y) + _210.y) + _217.y) + _226.y;
    SV_Target.z = ((((_165.z + _156.z) + _186.z) + _195.z) + _217.z) + _226.z;
    SV_Target.w = (_195.w + _165.w) + _226.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 253
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %70 %74 %78
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %13 "SSBO"
OpName %19 "SSBO"
OpName %25 "SSBO"
OpName %30 "SSBO"
OpName %35 "SSBO"
OpName %40 "SSBO"
OpName %45 "SSBO"
OpName %50 "SSBO"
OpName %55 "SSBO"
OpName %60 "SSBO"
OpName %65 "SSBO"
OpName %70 "INDEX"
OpName %74 "UV"
OpName %78 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %12 ArrayStride 8
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %16 NonWritable
OpDecorate %16 Restrict
OpDecorate %18 ArrayStride 12
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %22 NonWritable
OpDecorate %22 Restrict
OpDecorate %24 ArrayStride 16
OpMemberDecorate %25 0 Offset 0
OpDecorate %25 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %28 NonWritable
OpDecorate %28 Restrict
OpDecorate %29 ArrayStride 4
OpMemberDecorate %30 0 Offset 0
OpDecorate %30 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %34 ArrayStride 8
OpMemberDecorate %35 0 Offset 0
OpDecorate %35 Block
OpDecorate %38 DescriptorSet 0
OpDecorate %38 Binding 0
OpDecorate %39 ArrayStride 12
OpMemberDecorate %40 0 Offset 0
OpDecorate %40 Block
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 0
OpDecorate %44 ArrayStride 16
OpMemberDecorate %45 0 Offset 0
OpDecorate %45 Block
OpDecorate %48 DescriptorSet 0
OpDecorate %48 Binding 0
OpDecorate %49 ArrayStride 4
OpMemberDecorate %50 0 Offset 0
OpDecorate %50 Block
OpDecorate %53 DescriptorSet 0
OpDecorate %53 Binding 0
OpDecorate %53 Coherent
OpDecorate %54 ArrayStride 8
OpMemberDecorate %55 0 Offset 0
OpDecorate %55 Block
OpDecorate %58 DescriptorSet 0
OpDecorate %58 Binding 0
OpDecorate %58 Coherent
OpDecorate %59 ArrayStride 12
OpMemberDecorate %60 0 Offset 0
OpDecorate %60 Block
OpDecorate %63 DescriptorSet 0
OpDecorate %63 Binding 0
OpDecorate %63 Coherent
OpDecorate %64 ArrayStride 16
OpMemberDecorate %65 0 Offset 0
OpDecorate %65 Block
OpDecorate %68 DescriptorSet 0
OpDecorate %68 Binding 0
OpDecorate %68 Coherent
OpDecorate %70 Flat
OpDecorate %70 Location 0
OpDecorate %74 Flat
OpDecorate %74 Location 1
OpDecorate %78 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypeRuntimeArray %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeVector %5 2
%12 = OpTypeRuntimeArray %11
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeVector %5 3
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeVector %5 4
%24 = OpTypeRuntimeArray %23
%25 = OpTypeStruct %24
%26 = OpTypeRuntimeArray %25
%27 = OpTypePointer StorageBuffer %26
%28 = OpVariable %27 StorageBuffer
%29 = OpTypeRuntimeArray %5
%30 = OpTypeStruct %29
%31 = OpTypeRuntimeArray %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeRuntimeArray %11
%35 = OpTypeStruct %34
%36 = OpTypeRuntimeArray %35
%37 = OpTypePointer StorageBuffer %36
%38 = OpVariable %37 StorageBuffer
%39 = OpTypeRuntimeArray %17
%40 = OpTypeStruct %39
%41 = OpTypeRuntimeArray %40
%42 = OpTypePointer StorageBuffer %41
%43 = OpVariable %42 StorageBuffer
%44 = OpTypeRuntimeArray %23
%45 = OpTypeStruct %44
%46 = OpTypeRuntimeArray %45
%47 = OpTypePointer StorageBuffer %46
%48 = OpVariable %47 StorageBuffer
%49 = OpTypeRuntimeArray %5
%50 = OpTypeStruct %49
%51 = OpTypeRuntimeArray %50
%52 = OpTypePointer StorageBuffer %51
%53 = OpVariable %52 StorageBuffer
%54 = OpTypeRuntimeArray %11
%55 = OpTypeStruct %54
%56 = OpTypeRuntimeArray %55
%57 = OpTypePointer StorageBuffer %56
%58 = OpVariable %57 StorageBuffer
%59 = OpTypeRuntimeArray %17
%60 = OpTypeStruct %59
%61 = OpTypeRuntimeArray %60
%62 = OpTypePointer StorageBuffer %61
%63 = OpVariable %62 StorageBuffer
%64 = OpTypeRuntimeArray %23
%65 = OpTypeStruct %64
%66 = OpTypeRuntimeArray %65
%67 = OpTypePointer StorageBuffer %66
%68 = OpVariable %67 StorageBuffer
%69 = OpTypePointer Input %5
%70 = OpVariable %69 Input
%71 = OpTypeInt 32 1
%72 = OpTypeVector %71 4
%73 = OpTypePointer Input %72
%74 = OpVariable %73 Input
%75 = OpTypeFloat 32
%76 = OpTypeVector %75 4
%77 = OpTypePointer Output %76
%78 = OpVariable %77 Output
%79 = OpTypePointer Input %71
%81 = OpConstant %5 0
%85 = OpConstant %5 1
%89 = OpConstant %5 2
%93 = OpConstant %5 3
%97 = OpTypePointer StorageBuffer %7
%100 = OpTypePointer StorageBuffer %13
%103 = OpTypePointer StorageBuffer %19
%106 = OpTypePointer StorageBuffer %25
%109 = OpConstant %5 4
%110 = OpTypePointer StorageBuffer %30
%113 = OpConstant %5 5
%114 = OpTypePointer StorageBuffer %35
%117 = OpConstant %5 6
%118 = OpTypePointer StorageBuffer %40
%121 = OpConstant %5 7
%122 = OpTypePointer StorageBuffer %45
%125 = OpConstant %5 8
%126 = OpTypePointer StorageBuffer %50
%129 = OpConstant %5 9
%130 = OpTypePointer StorageBuffer %55
%133 = OpConstant %5 10
%134 = OpTypePointer StorageBuffer %60
%137 = OpConstant %5 11
%138 = OpTypePointer StorageBuffer %65
%140 = OpTypePointer StorageBuffer %5
%144 = OpTypePointer StorageBuffer %11
%147 = OpTypeVector %75 2
%152 = OpTypePointer StorageBuffer %17
%155 = OpTypeVector %75 3
%162 = OpTypePointer StorageBuffer %23
%235 = OpConstant %75 20
%240 = OpConstant %75 30
%246 = OpTypePointer Output %75
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %251
%251 = OpLabel
%80 = OpAccessChain %79 %74 %81
%82 = OpLoad %71 %80
%83 = OpBitcast %5 %82
%84 = OpAccessChain %79 %74 %85
%86 = OpLoad %71 %84
%87 = OpBitcast %5 %86
%88 = OpAccessChain %79 %74 %89
%90 = OpLoad %71 %88
%91 = OpBitcast %5 %90
%92 = OpAccessChain %79 %74 %93
%94 = OpLoad %71 %92
%95 = OpBitcast %5 %94
%96 = OpLoad %5 %70
%98 = OpAccessChain %97 %10 %96
%99 = OpIAdd %5 %96 %85
%101 = OpAccessChain %100 %16 %99
%102 = OpIAdd %5 %96 %89
%104 = OpAccessChain %103 %22 %102
%105 = OpIAdd %5 %96 %93
%107 = OpAccessChain %106 %28 %105
%108 = OpIAdd %5 %96 %109
%111 = OpAccessChain %110 %33 %108
%112 = OpIAdd %5 %96 %113
%115 = OpAccessChain %114 %38 %112
%116 = OpIAdd %5 %96 %117
%119 = OpAccessChain %118 %43 %116
%120 = OpIAdd %5 %96 %121
%123 = OpAccessChain %122 %48 %120
%124 = OpIAdd %5 %96 %125
%127 = OpAccessChain %126 %53 %124
%128 = OpIAdd %5 %96 %129
%131 = OpAccessChain %130 %58 %128
%132 = OpIAdd %5 %96 %133
%135 = OpAccessChain %134 %63 %132
%136 = OpIAdd %5 %96 %137
%139 = OpAccessChain %138 %68 %136
%141 = OpAccessChain %140 %98 %81 %83
%142 = OpLoad %5 %141
%143 = OpBitcast %75 %142
%145 = OpAccessChain %144 %101 %81 %87
%146 = OpLoad %11 %145
%148 = OpBitcast %147 %146
%149 = OpCompositeExtract %75 %148 0
%150 = OpCompositeExtract %75 %148 1
%151 = OpFAdd %75 %149 %143
%153 = OpAccessChain %152 %104 %81 %91
%154 = OpLoad %17 %153
%156 = OpBitcast %155 %154
%157 = OpCompositeExtract %75 %156 0
%158 = OpCompositeExtract %75 %156 1
%159 = OpCompositeExtract %75 %156 2
%160 = OpFAdd %75 %151 %157
%161 = OpFAdd %75 %158 %150
%163 = OpAccessChain %162 %107 %81 %95
%164 = OpLoad %23 %163
%165 = OpBitcast %76 %164
%166 = OpCompositeExtract %75 %165 0
%167 = OpCompositeExtract %75 %165 1
%168 = OpCompositeExtract %75 %165 2
%169 = OpCompositeExtract %75 %165 3
%170 = OpFAdd %75 %160 %166
%171 = OpFAdd %75 %161 %167
%172 = OpFAdd %75 %168 %159
%173 = OpAccessChain %140 %111 %81 %83
%174 = OpLoad %5 %173
%175 = OpBitcast %75 %174
%176 = OpFAdd %75 %170 %175
%177 = OpAccessChain %144 %115 %81 %87
%178 = OpLoad %11 %177
%179 = OpBitcast %147 %178
%180 = OpCompositeExtract %75 %179 0
%181 = OpCompositeExtract %75 %179 1
%182 = OpFAdd %75 %176 %180
%183 = OpFAdd %75 %171 %181
%184 = OpAccessChain %152 %119 %81 %91
%185 = OpLoad %17 %184
%186 = OpBitcast %155 %185
%187 = OpCompositeExtract %75 %186 0
%188 = OpCompositeExtract %75 %186 1
%189 = OpCompositeExtract %75 %186 2
%190 = OpFAdd %75 %182 %187
%191 = OpFAdd %75 %183 %188
%192 = OpFAdd %75 %172 %189
%193 = OpAccessChain %162 %123 %81 %95
%194 = OpLoad %23 %193
%195 = OpBitcast %76 %194
%196 = OpCompositeExtract %75 %195 0
%197 = OpCompositeExtract %75 %195 1
%198 = OpCompositeExtract %75 %195 2
%199 = OpCompositeExtract %75 %195 3
%200 = OpFAdd %75 %190 %196
%201 = OpFAdd %75 %191 %197
%202 = OpFAdd %75 %192 %198
%203 = OpFAdd %75 %199 %169
%204 = OpAccessChain %140 %127 %81 %83
%205 = OpLoad %5 %204
%206 = OpBitcast %75 %205
%207 = OpFAdd %75 %200 %206
%208 = OpAccessChain %144 %131 %81 %87
%209 = OpLoad %11 %208
%210 = OpBitcast %147 %209
%211 = OpCompositeExtract %75 %210 0
%212 = OpCompositeExtract %75 %210 1
%213 = OpFAdd %75 %207 %211
%214 = OpFAdd %75 %201 %212
%215 = OpAccessChain %152 %135 %81 %91
%216 = OpLoad %17 %215
%217 = OpBitcast %155 %216
%218 = OpCompositeExtract %75 %217 0
%219 = OpCompositeExtract %75 %217 1
%220 = OpCompositeExtract %75 %217 2
%221 = OpFAdd %75 %213 %218
%222 = OpFAdd %75 %214 %219
%223 = OpFAdd %75 %202 %220
%224 = OpAccessChain %162 %139 %81 %95
%225 = OpLoad %23 %224
%226 = OpBitcast %76 %225
%227 = OpCompositeExtract %75 %226 0
%228 = OpCompositeExtract %75 %226 1
%229 = OpCompositeExtract %75 %226 2
%230 = OpCompositeExtract %75 %226 3
%231 = OpFAdd %75 %221 %227
%232 = OpFAdd %75 %222 %228
%233 = OpFAdd %75 %223 %229
%234 = OpFAdd %75 %203 %230
%236 = OpBitcast %5 %235
%237 = OpBitcast %5 %235
%238 = OpCompositeConstruct %11 %236 %237
%239 = OpAccessChain %144 %115 %81 %83
OpStore %239 %238
%241 = OpBitcast %5 %240
%242 = OpBitcast %5 %240
%243 = OpBitcast %5 %240
%244 = OpCompositeConstruct %17 %241 %242 %243
%245 = OpAccessChain %152 %135 %81 %87
OpStore %245 %244
%247 = OpAccessChain %246 %78 %81
OpStore %247 %231
%248 = OpAccessChain %246 %78 %85
OpStore %248 %232
%249 = OpAccessChain %246 %78 %89
OpStore %249 %233
%250 = OpAccessChain %246 %78 %93
OpStore %250 %234
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/structured-buffer-heap.ssbo.ssbo-align.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _15[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _17_20
{
    uvec2 _m0[];
} _20[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _23_26
{
    uvec4 _m0[];
} _26[];

layout(set = 0, binding = 0, std430) buffer _28_31
{
    uint _m0[];
} _31[];

layout(set = 0, binding = 0, std430) buffer _33_36
{
    uvec2 _m0[];
} _36[];

layout(set = 0, binding = 0, std430) buffer _38_41
{
    uvec4 _m0[];
} _41[];

layout(set = 0, binding = 0, std430) coherent buffer _43_46
{
    uint _m0[];
} _46[];

layout(set = 0, binding = 0, std430) coherent buffer _48_51
{
    uvec2 _m0[];
} _51[];

layout(set = 0, binding = 0, std430) coherent buffer _53_56
{
    uvec4 _m0[];
} _56[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _71 = uint(UV.x);
    uint _75 = uint(UV.y);
    uint _79 = uint(UV.z);
    uint _83 = uint(UV.w);
    uvec2 _91 = _10._m0[subgroupBroadcastFirst(INDEX)] >> uvec2(2u);
    uint _93 = INDEX + 1u;
    uvec2 _99 = _10._m0[subgroupBroadcastFirst(_93)] >> uvec2(3u);
    uint _101 = INDEX + 2u;
    uvec2 _106 = _10._m0[subgroupBroadcastFirst(_101)] >> uvec2(2u);
    uint _107 = INDEX + 3u;
    uvec2 _113 = _10._m0[subgroupBroadcastFirst(_107)] >> uvec2(4u);
    uint _116 = INDEX + 4u;
    uvec2 _122 = _10._m0[subgroupBroadcastFirst(_116)] >> uvec2(2u);
    uint _123 = INDEX + 5u;
    uvec2 _130 = _10._m0[subgroupBroadcastFirst(_123)] >> uvec2(3u);
    uint _131 = INDEX + 6u;
    uvec2 _137 = _10._m0[subgroupBroadcastFirst(_131)] >> uvec2(2u);
    uint _138 = INDEX + 7u;
    uvec2 _145 = _10._m0[subgroupBroadcastFirst(_138)] >> uvec2(4u);
    uint _146 = INDEX + 8u;
    uvec2 _153 = _10._m0[subgroupBroadcastFirst(_146)] >> uvec2(2u);
    uint _154 = INDEX + 9u;
    uvec2 _161 = _10._m0[subgroupBroadcastFirst(_154)] >> uvec2(3u);
    uint _162 = INDEX + 10u;
    uvec2 _168 = _10._m0[subgroupBroadcastFirst(_162)] >> uvec2(2u);
    uint _169 = INDEX + 11u;
    uvec2 _176 = _10._m0[subgroupBroadcastFirst(_169)] >> uvec2(4u);
    vec2 _197 = uintBitsToFloat(_20[_93]._m0[(_75 < _99.y) ? (_75 + _99.x) : 536870911u]);
    uint _201 = _79 * 3u;
    uint _206 = (_201 < _106.y) ? (_201 + _106.x) : 1073741820u;
    vec3 _218 = uintBitsToFloat(uvec3(_15[_101]._m0[_206], _15[_101]._m0[_206 + 1u], _15[_101]._m0[_206 + 2u]));
    vec4 _233 = uintBitsToFloat(_26[_107]._m0[(_83 < _113.y) ? (_83 + _113.x) : 268435455u]);
    uint _247 = _31[_116]._m0[(_71 < _122.y) ? (_71 + _122.x) : 1073741820u];
    vec2 _257 = uintBitsToFloat(_36[_123]._m0[(_75 < _130.y) ? (_75 + _130.x) : 536870911u]);
    uint _262 = _79 * 3u;
    uint _267 = (_262 < _137.y) ? (_262 + _137.x) : 1073741820u;
    vec3 _277 = uintBitsToFloat(uvec3(_31[_131]._m0[_267], _31[_131]._m0[_267 + 1u], _31[_131]._m0[_267 + 2u]));
    uvec4 _290 = _41[_138]._m0[(_83 < _145.y) ? (_83 + _145.x) : 268435455u];
    vec4 _291 = uintBitsToFloat(_290);
    uint _306 = _46[_146]._m0[(_71 < _153.y) ? (_71 + _153.x) : 1073741820u];
    uvec2 _315 = _51[_154]._m0[(_75 < _161.y) ? (_75 + _161.x) : 536870911u];
    vec2 _316 = uintBitsToFloat(_315);
    uint _321 = _79 * 3u;
    uint _326 = (_321 < _168.y) ? (_321 + _168.x) : 1073741820u;
    uint _328 = _46[_162]._m0[_326];
    uint _331 = _46[_162]._m0[_326 + 1u];
    uint _334 = _46[_162]._m0[_326 + 2u];
    vec3 _336 = uintBitsToFloat(uvec3(_328, _331, _334));
    uvec4 _349 = _56[_169]._m0[(_83 < _176.y) ? (_83 + _176.x) : 268435455u];
    vec4 _350 = uintBitsToFloat(_349);
    _36[_123]._m0[(_71 < _130.y) ? (_71 + _130.x) : 536870911u] = uvec2(floatBitsToUint(20.0), floatBitsToUint(20.0));
    uint _369 = _75 * 3u;
    uint _374 = (_369 < _168.y) ? (_369 + _168.x) : 1073741820u;
    _46[_162]._m0[_374] = floatBitsToUint(30.0);
    _46[_162]._m0[_374 + 1u] = floatBitsToUint(30.0);
    _46[_162]._m0[_374 + 2u] = floatBitsToUint(30.0);
    SV_Target.x = ((((((((((_197.x + uintBitsToFloat(_15[INDEX]._m0[(_71 < _91.y) ? (_71 + _91.x) : 1073741820u])) + _218.x) + _233.x) + uintBitsToFloat(_247)) + _257.x) + _277.x) + _291.x) + uintBitsToFloat(_306)) + _316.x) + _336.x) + _350.x;
    SV_Target.y = (((((((_218.y + _197.y) + _233.y) + _257.y) + _277.y) + _291.y) + _316.y) + _336.y) + _350.y;
    SV_Target.z = ((((_233.z + _218.z) + _277.z) + _291.z) + _336.z) + _350.z;
    SV_Target.w = (_291.w + _233.w) + _350.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 391
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %58 %62 %66
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SSBO_Offsets"
OpName %12 "SSBO"
OpName %17 "SSBO"
OpName %23 "SSBO"
OpName %28 "SSBO"
OpName %33 "SSBO"
OpName %38 "SSBO"
OpName %43 "SSBO"
OpName %48 "SSBO"
OpName %53 "SSBO"
OpName %58 "INDEX"
OpName %62 "UV"
OpName %66 "SV_Target"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 15
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %16 ArrayStride 8
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %22 ArrayStride 16
OpMemberDecorate %23 0 Offset 0
OpDecorate %23 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %26 NonWritable
OpDecorate %26 Restrict
OpDecorate %27 ArrayStride 4
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 0
OpDecorate %32 ArrayStride 8
OpMemberDecorate %33 0 Offset 0
OpDecorate %33 Block
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 0
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %41 DescriptorSet 0
OpDecorate %41 Binding 0
OpDecorate %42 ArrayStride 4
OpMemberDecorate %43 0 Offset 0
OpDecorate %43 Block
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 0
OpDecorate %46 Coherent
OpDecorate %47 ArrayStride 8
OpMemberDecorate %48 0 Offset 0
OpDecorate %48 Block
OpDecorate %51 DescriptorSet 0
OpDecorate %51 Binding 0
OpDecorate %51 Coherent
OpDecorate %52 ArrayStride 16
OpMemberDecorate %53 0 Offset 0
OpDecorate %53 Block
OpDecorate %56 DescriptorSet 0
OpDecorate %56 Binding 0
OpDecorate %56 Coherent
OpDecorate %58 Flat
OpDecorate %58 Location 0
OpDecorate %62 Flat
OpDecorate %62 Location 1
OpDecorate %66 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %5
%12 = OpTypeStruct %11
%13 = OpTypeRuntimeArray %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeRuntimeArray %6
%17 = OpTypeStruct %16
%18 = OpTypeRuntimeArray %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeVector %5 4
%22 = OpTypeRuntimeArray %21
%23 = OpTypeStruct %22
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpTypeRuntimeArray %5
%28 = OpTypeStruct %27
%29 = OpTypeRuntimeArray %28
%30 = OpTypePointer StorageBuffer %29
%31 = OpVariable %30 StorageBuffer
%32 = OpTypeRuntimeArray %6
%33 = OpTypeStruct %32
%34 = OpTypeRuntimeArray %33
%35 = OpTypePointer StorageBuffer %34
%36 = OpVariable %35 StorageBuffer
%37 = OpTypeRuntimeArray %21
%38 = OpTypeStruct %37
%39 = OpTypeRuntimeArray %38
%40 = OpTypePointer StorageBuffer %39
%41 = OpVariable %40 StorageBuffer
%42 = OpTypeRuntimeArray %5
%43 = OpTypeStruct %42
%44 = OpTypeRuntimeArray %43
%45 = OpTypePointer StorageBuffer %44
%46 = OpVariable %45 StorageBuffer
%47 = OpTypeRuntimeArray %6
%48 = OpTypeStruct %47
%49 = OpTypeRuntimeArray %48
%50 = OpTypePointer StorageBuffer %49
%51 = OpVariable %50 StorageBuffer
%52 = OpTypeRuntimeArray %21
%53 = OpTypeStruct %52
%54 = OpTypeRuntimeArray %53
%55 = OpTypePointer StorageBuffer %54
%56 = OpVariable %55 StorageBuffer
%57 = OpTypePointer Input %5
%58 = OpVariable %57 Input
%59 = OpTypeInt 32 1
%60 = OpTypeVector %59 4
%61 = OpTypePointer Input %60
%62 = OpVariable %61 Input
%63 = OpTypeFloat 32
%64 = OpTypeVector %63 4
%65 = OpTypePointer Output %64
%66 = OpVariable %65 Output
%67 = OpTypePointer Input %59
%69 = OpConstant %5 0
%73 = OpConstant %5 1
%77 = OpConstant %5 2
%81 = OpConstant %5 3
%85 = OpTypePointer StorageBuffer %12
%88 = OpTypePointer StorageBuffer %6
%92 = OpConstantComposite %6 %77 %77
%94 = OpTypePointer StorageBuffer %17
%100 = OpConstantComposite %6 %81 %81
%108 = OpTypePointer StorageBuffer %23
%114 = OpConstant %5 4
%115 = OpConstantComposite %6 %114 %114
%117 = OpTypePointer StorageBuffer %28
%124 = OpConstant %5 5
%125 = OpTypePointer StorageBuffer %33
%132 = OpConstant %5 6
%139 = OpConstant %5 7
%140 = OpTypePointer StorageBuffer %38
%147 = OpConstant %5 8
%148 = OpTypePointer StorageBuffer %43
%155 = OpConstant %5 9
%156 = OpTypePointer StorageBuffer %48
%163 = OpConstant %5 10
%170 = OpConstant %5 11
%171 = OpTypePointer StorageBuffer %53
%180 = OpTypeBool
%183 = OpConstant %5 1073741820
%184 = OpTypePointer StorageBuffer %5
%193 = OpConstant %5 536870911
%196 = OpTypeVector %63 2
%215 = OpTypeVector %5 3
%217 = OpTypeVector %63 3
%229 = OpConstant %5 268435455
%230 = OpTypePointer StorageBuffer %21
%364 = OpConstant %63 20
%375 = OpConstant %63 30
%384 = OpTypePointer Output %63
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %389
%389 = OpLabel
%68 = OpAccessChain %67 %62 %69
%70 = OpLoad %59 %68
%71 = OpBitcast %5 %70
%72 = OpAccessChain %67 %62 %73
%74 = OpLoad %59 %72
%75 = OpBitcast %5 %74
%76 = OpAccessChain %67 %62 %77
%78 = OpLoad %59 %76
%79 = OpBitcast %5 %78
%80 = OpAccessChain %67 %62 %81
%82 = OpLoad %59 %80
%83 = OpBitcast %5 %82
%84 = OpLoad %5 %58
%86 = OpAccessChain %85 %15 %84
%87 = OpGroupNonUniformBroadcastFirst %5 %81 %84
%89 = OpAccessChain %88 %10 %69 %87
%90 = OpLoad %6 %89
%91 = OpShiftRightLogical %6 %90 %92
%93 = OpIAdd %5 %84 %73
%95 = OpAccessChain %94 %20 %93
%96 = OpGroupNonUniformBroadcastFirst %5 %81 %93
%97 = OpAccessChain %88 %10 %69 %96
%98 = OpLoad %6 %97
%99 = OpShiftRightLogical %6 %98 %100
%101 = OpIAdd %5 %84 %77
%102 = OpAccessChain %85 %15 %101
%103 = OpGroupNonUniformBroadcastFirst %5 %81 %101
%104 = OpAccessChain %88 %10 %69 %103
%105 = OpLoad %6 %104
%106 = OpShiftRightLogical %6 %105 %92
%107 = OpIAdd %5 %84 %81
%109 = OpAccessChain %108 %26 %107
%110 = OpGroupNonUniformBroadcastFirst %5 %81 %107
%111 = OpAccessChain %88 %10 %69 %110
%112 = OpLoad %6 %111
%113 = OpShiftRightLogical %6 %112 %115
%116 = OpIAdd %5 %84 %114
%118 = OpAccessChain %117 %31 %116
%119 = OpGroupNonUniformBroadcastFirst %5 %81 %116
%120 = OpAccessChain %88 %10 %69 %119
%121 = OpLoad %6 %120
%122 = OpShiftRightLogical %6 %121 %92
%123 = OpIAdd %5 %84 %124
%126 = OpAccessChain %125 %36 %123
%127 = OpGroupNonUniformBroadcastFirst %5 %81 %123
%128 = OpAccessChain %88 %10 %69 %127
%129 = OpLoad %6 %128
%130 = OpShiftRightLogical %6 %129 %100
%131 = OpIAdd %5 %84 %132
%133 = OpAccessChain %117 %31 %131
%134 = OpGroupNonUniformBroadcastFirst %5 %81 %131
%135 = OpAccessChain %88 %10 %69 %134
%136 = OpLoad %6 %135
%137 = OpShiftRightLogical %6 %136 %92
%138 = OpIAdd %5 %84 %139
%141 = OpAccessChain %140 %41 %138
%142 = OpGroupNonUniformBroadcastFirst %5 %81 %138
%143 = OpAccessChain %88 %10 %69 %142
%144 = OpLoad %6 %143
%145 = OpShiftRightLogical %6 %144 %115
%146 = OpIAdd %5 %84 %147
%149 = OpAccessChain %148 %46 %146
%150 = OpGroupNonUniformBroadcastFirst %5 %81 %146
%151 = OpAccessChain %88 %10 %69 %150
%152 = OpLoad %6 %151
%153 = OpShiftRightLogical %6 %152 %92
%154 = OpIAdd %5 %84 %155
%157 = OpAccessChain %156 %51 %154
%158 = OpGroupNonUniformBroadcastFirst %5 %81 %154
%159 = OpAccessChain %88 %10 %69 %158
%160 = OpLoad %6 %159
%161 = OpShiftRightLogical %6 %160 %100
%162 = OpIAdd %5 %84 %163
%164 = OpAccessChain %148 %46 %162
%165 = OpGroupNonUniformBroadcastFirst %5 %81 %162
%166 = OpAccessChain %88 %10 %69 %165
%167 = OpLoad %6 %166
%168 = OpShiftRightLogical %6 %167 %92
%169 = OpIAdd %5 %84 %170
%172 = OpAccessChain %171 %56 %169
%173 = OpGroupNonUniformBroadcastFirst %5 %81 %169
%174 = OpAccessChain %88 %10 %69 %173
%175 = OpLoad %6 %174
%176 = OpShiftRightLogical %6 %175 %115
%177 = OpCompositeExtract %5 %91 0
%178 = OpCompositeExtract %5 %91 1
%179 = OpIAdd %5 %71 %177
%181 = OpULessThan %180 %71 %178
%182 = OpSelect %5 %181 %179 %183
%185 = OpAccessChain %184 %86 %69 %182
%186 = OpLoad %5 %185
%187 = OpBitcast %63 %186
%188 = OpCompositeExtract %5 %99 0
%189 = OpCompositeExtract %5 %99 1
%190 = OpIAdd %5 %75 %188
%191 = OpULessThan %180 %75 %189
%192 = OpSelect %5 %191 %190 %193
%194 = OpAccessChain %88 %95 %69 %192
%195 = OpLoad %6 %194
%197 = OpBitcast %196 %195
%198 = OpCompositeExtract %63 %197 0
%199 = OpCompositeExtract %63 %197 1
%200 = OpFAdd %63 %198 %187
%201 = OpIMul %5 %79 %81
%202 = OpCompositeExtract %5 %106 0
%203 = OpCompositeExtract %5 %106 1
%204 = OpIAdd %5 %201 %202
%205 = OpULessThan %180 %201 %203
%206 = OpSelect %5 %205 %204 %183
%207 = OpAccessChain %184 %102 %69 %206
%208 = OpLoad %5 %207
%210 = OpIAdd %5 %206 %73
%209 = OpAccessChain %184 %102 %69 %210
%211 = OpLoad %5 %209
%213 = OpIAdd %5 %206 %77
%212 = OpAccessChain %184 %102 %69 %213
%214 = OpLoad %5 %212
%216 = OpCompositeConstruct %215 %208 %211 %214
%218 = OpBitcast %217 %216
%219 = OpCompositeExtract %63 %218 0
%220 = OpCompositeExtract %63 %218 1
%221 = OpCompositeExtract %63 %218 2
%222 = OpFAdd %63 %200 %219
%223 = OpFAdd %63 %220 %199
%224 = OpCompositeExtract %5 %113 0
%225 = OpCompositeExtract %5 %113 1
%226 = OpIAdd %5 %83 %224
%227 = OpULessThan %180 %83 %225
%228 = OpSelect %5 %227 %226 %229
%231 = OpAccessChain %230 %109 %69 %228
%232 = OpLoad %21 %231
%233 = OpBitcast %64 %232
%234 = OpCompositeExtract %63 %233 0
%235 = OpCompositeExtract %63 %233 1
%236 = OpCompositeExtract %63 %233 2
%237 = OpCompositeExtract %63 %233 3
%238 = OpFAdd %63 %222 %234
%239 = OpFAdd %63 %223 %235
%240 = OpFAdd %63 %236 %221
%241 = OpCompositeExtract %5 %122 0
%242 = OpCompositeExtract %5 %122 1
%243 = OpIAdd %5 %71 %241
%244 = OpULessThan %180 %71 %242
%245 = OpSelect %5 %244 %243 %183
%246 = OpAccessChain %184 %118 %69 %245
%247 = OpLoad %5 %246
%248 = OpBitcast %63 %247
%249 = OpFAdd %63 %238 %248
%250 = OpCompositeExtract %5 %130 0
%251 = OpCompositeExtract %5 %130 1
%252 = OpIAdd %5 %75 %250
%253 = OpULessThan %180 %75 %251
%254 = OpSelect %5 %253 %252 %193
%255 = OpAccessChain %88 %126 %69 %254
%256 = OpLoad %6 %255
%257 = OpBitcast %196 %256
%258 = OpCompositeExtract %63 %257 0
%259 = OpCompositeExtract %63 %257 1
%260 = OpFAdd %63 %249 %258
%261 = OpFAdd %63 %239 %259
%262 = OpIMul %5 %79 %81
%263 = OpCompositeExtract %5 %137 0
%264 = OpCompositeExtract %5 %137 1
%265 = OpIAdd %5 %262 %263
%266 = OpULessThan %180 %262 %264
%267 = OpSelect %5 %266 %265 %183
%268 = OpAccessChain %184 %133 %69 %267
%269 = OpLoad %5 %268
%271 = OpIAdd %5 %267 %73
%270 = OpAccessChain %184 %133 %69 %271
%272 = OpLoad %5 %270
%274 = OpIAdd %5 %267 %77
%273 = OpAccessChain %184 %133 %69 %274
%275 = OpLoad %5 %273
%276 = OpCompositeConstruct %215 %269 %272 %275
%277 = OpBitcast %217 %276
%278 = OpCompositeExtract %63 %277 0
%279 = OpCompositeExtract %63 %277 1
%280 = OpCompositeExtract %63 %277 2
%281 = OpFAdd %63 %260 %278
%282 = OpFAdd %63 %261 %279
%283 = OpFAdd %63 %240 %280
%284 = OpCompositeExtract %5 %145 0
%285 = OpCompositeExtract %5 %145 1
%286 = OpIAdd %5 %83 %284
%287 = OpULessThan %180 %83 %285
%288 = OpSelect %5 %287 %286 %229
%289 = OpAccessChain %230 %141 %69 %288
%290 = OpLoad %21 %289
%291 = OpBitcast %64 %290
%292 = OpCompositeExtract %63 %291 0
%293 = OpCompositeExtract %63 %291 1
%294 = OpCompositeExtract %63 %291 2
%295 = OpCompositeExtract %63 %291 3
%296 = OpFAdd %63 %281 %292
%297 = OpFAdd %63 %282 %293
%298 = OpFAdd %63 %283 %294
%299 = OpFAdd %63 %295 %237
%300 = OpCompositeExtract %5 %153 0
%301 = OpCompositeExtract %5 %153 1
%302 = OpIAdd %5 %71 %300
%303 = OpULessThan %180 %71 %301
%304 = OpSelect %5 %303 %302 %183
%305 = OpAccessChain %184 %149 %69 %304
%306 = OpLoad %5 %305
%307 = OpBitcast %63 %306
%308 = OpFAdd %63 %296 %307
%309 = OpCompositeExtract %5 %161 0
%310 = OpCompositeExtract %5 %161 1
%311 = OpIAdd %5 %75 %309
%312 = OpULessThan %180 %75 %310
%313 = OpSelect %5 %312 %311 %193
%314 = OpAccessChain %88 %157 %69 %313
%315 = OpLoad %6 %314
%316 = OpBitcast %196 %315
%317 = OpCompositeExtract %63 %316 0
%318 = OpCompositeExtract %63 %316 1
%319 = OpFAdd %63 %308 %317
%320 = OpFAdd %63 %297 %318
%321 = OpIMul %5 %79 %81
%322 = OpCompositeExtract %5 %168 0
%323 = OpCompositeExtract %5 %168 1
%324 = OpIAdd %5 %321 %322
%325 = OpULessThan %180 %321 %323
%326 = OpSelect %5 %325 %324 %183
%327 = OpAccessChain %184 %164 %69 %326
%328 = OpLoad %5 %327
%330 = OpIAdd %5 %326 %73
%329 = OpAccessChain %184 %164 %69 %330
%331 = OpLoad %5 %329
%333 = OpIAdd %5 %326 %77
%332 = OpAccessChain %184 %164 %69 %333
%334 = OpLoad %5 %332
%335 = OpCompositeConstruct %215 %328 %331 %334
%336 = OpBitcast %217 %335
%337 = OpCompositeExtract %63 %336 0
%338 = OpCompositeExtract %63 %336 1
%339 = OpCompositeExtract %63 %336 2
%340 = OpFAdd %63 %319 %337
%341 = OpFAdd %63 %320 %338
%342 = OpFAdd %63 %298 %339
%343 = OpCompositeExtract %5 %176 0
%344 = OpCompositeExtract %5 %176 1
%345 = OpIAdd %5 %83 %343
%346 = OpULessThan %180 %83 %344
%347 = OpSelect %5 %346 %345 %229
%348 = OpAccessChain %230 %172 %69 %347
%349 = OpLoad %21 %348
%350 = OpBitcast %64 %349
%351 = OpCompositeExtract %63 %350 0
%352 = OpCompositeExtract %63 %350 1
%353 = OpCompositeExtract %63 %350 2
%354 = OpCompositeExtract %63 %350 3
%355 = OpFAdd %63 %340 %351
%356 = OpFAdd %63 %341 %352
%357 = OpFAdd %63 %342 %353
%358 = OpFAdd %63 %299 %354
%359 = OpCompositeExtract %5 %130 0
%360 = OpCompositeExtract %5 %130 1
%361 = OpIAdd %5 %71 %359
%362 = OpULessThan %180 %71 %360
%363 = OpSelect %5 %362 %361 %193
%365 = OpBitcast %5 %364
%366 = OpBitcast %5 %364
%367 = OpCompositeConstruct %6 %365 %366
%368 = OpAccessChain %88 %126 %69 %363
OpStore %368 %367
%369 = OpIMul %5 %75 %81
%370 = OpCompositeExtract %5 %168 0
%371 = OpCompositeExtract %5 %168 1
%372 = OpIAdd %5 %369 %370
%373 = OpULessThan %180 %369 %371
%374 = OpSelect %5 %373 %372 %183
%376 = OpBitcast %5 %375
%377 = OpBitcast %5 %375
%378 = OpBitcast %5 %375
%379 = OpAccessChain %184 %164 %69 %374
OpStore %379 %376
%381 = OpIAdd %5 %374 %73
%380 = OpAccessChain %184 %164 %69 %381
OpStore %380 %377
%383 = OpIAdd %5 %374 %77
%382 = OpAccessChain %184 %164 %69 %383
OpStore %382 %378
%385 = OpAccessChain %384 %66 %69
OpStore %385 %355
%386 = OpAccessChain %384 %66 %73
OpStore %386 %356
%387 = OpAccessChain %384 %66 %77
OpStore %387 %357
%388 = OpAccessChain %384 %66 %81
OpStore %388 %358
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/sm66/structured-buffer-heap.typed-buffer-offset.sm66.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform usamplerBuffer _17[];
layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _21[];
layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _24[];

layout(location = 0) flat in uint INDEX;
layout(location = 1) flat in ivec4 UV;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _39 = uint(UV.x);
    uint _43 = uint(UV.y);
    uint _47 = uint(UV.z);
    uint _51 = uint(UV.w);
    uint _56 = subgroupBroadcastFirst(INDEX);
    uint _60 = INDEX + 1u;
    uint _63 = subgroupBroadcastFirst(_60);
    uint _66 = INDEX + 2u;
    uint _69 = subgroupBroadcastFirst(_66);
    uint _72 = INDEX + 3u;
    uint _75 = subgroupBroadcastFirst(_72);
    uint _78 = INDEX + 4u;
    uint _83 = subgroupBroadcastFirst(_78);
    uint _86 = INDEX + 5u;
    uint _90 = subgroupBroadcastFirst(_86);
    uint _93 = INDEX + 6u;
    uint _97 = subgroupBroadcastFirst(_93);
    uint _100 = INDEX + 7u;
    uint _104 = subgroupBroadcastFirst(_100);
    uint _107 = INDEX + 8u;
    uint _111 = subgroupBroadcastFirst(_107);
    uint _114 = INDEX + 9u;
    uint _118 = subgroupBroadcastFirst(_114);
    uint _121 = INDEX + 10u;
    uint _125 = subgroupBroadcastFirst(_121);
    uint _128 = INDEX + 11u;
    uint _132 = subgroupBroadcastFirst(_128);
    uint _146 = _43 * 2u;
    uint _151 = (_146 < _13._m0[_63].y) ? (_146 + _13._m0[_63].x) : 1073741820u;
    vec2 _159 = uintBitsToFloat(uvec2(texelFetch(_17[_60], int(_151)).x, texelFetch(_17[_60], int(_151 + 1u)).x));
    uint _163 = _47 * 3u;
    uint _168 = (_163 < _13._m0[_69].y) ? (_163 + _13._m0[_69].x) : 1073741820u;
    vec3 _180 = uintBitsToFloat(uvec3(texelFetch(_17[_66], int(_168)).x, texelFetch(_17[_66], int(_168 + 1u)).x, texelFetch(_17[_66], int(_168 + 2u)).x));
    uint _186 = _51 * 4u;
    uint _191 = (_186 < _13._m0[_75].y) ? (_186 + _13._m0[_75].x) : 1073741820u;
    vec4 _204 = uintBitsToFloat(uvec4(texelFetch(_17[_72], int(_191)).x, texelFetch(_17[_72], int(_191 + 1u)).x, texelFetch(_17[_72], int(_191 + 2u)).x, texelFetch(_17[_72], int(_191 + 3u)).x));
    uvec4 _217 = imageLoad(_21[_78], int((_39 < _13._m0[_83].y) ? (_39 + _13._m0[_83].x) : 1073741820u));
    uint _221 = _43 * 2u;
    uint _226 = (_221 < _13._m0[_90].y) ? (_221 + _13._m0[_90].x) : 1073741820u;
    vec2 _233 = uintBitsToFloat(uvec2(imageLoad(_21[_86], int(_226)).x, imageLoad(_21[_86], int(_226 + 1u)).x));
    uint _238 = _47 * 3u;
    uint _243 = (_238 < _13._m0[_97].y) ? (_238 + _13._m0[_97].x) : 1073741820u;
    uvec4 _244 = imageLoad(_21[_93], int(_243));
    uvec4 _246 = imageLoad(_21[_93], int(_243 + 1u));
    uvec4 _249 = imageLoad(_21[_93], int(_243 + 2u));
    vec3 _253 = uintBitsToFloat(uvec3(_244.x, _246.x, _249.x));
    uint _260 = _51 * 4u;
    uint _265 = (_260 < _13._m0[_104].y) ? (_260 + _13._m0[_104].x) : 1073741820u;
    uvec4 _266 = imageLoad(_21[_100], int(_265));
    uvec4 _268 = imageLoad(_21[_100], int(_265 + 1u));
    uvec4 _271 = imageLoad(_21[_100], int(_265 + 2u));
    uvec4 _274 = imageLoad(_21[_100], int(_265 + 3u));
    vec4 _278 = uintBitsToFloat(uvec4(_266.x, _268.x, _271.x, _274.x));
    uvec4 _292 = imageLoad(_24[_107], int((_39 < _13._m0[_111].y) ? (_39 + _13._m0[_111].x) : 1073741820u));
    uint _296 = _43 * 2u;
    uint _301 = (_296 < _13._m0[_118].y) ? (_296 + _13._m0[_118].x) : 1073741820u;
    uvec4 _302 = imageLoad(_24[_114], int(_301));
    uvec4 _304 = imageLoad(_24[_114], int(_301 + 1u));
    vec2 _308 = uintBitsToFloat(uvec2(_302.x, _304.x));
    uint _313 = _47 * 3u;
    uint _318 = (_313 < _13._m0[_125].y) ? (_313 + _13._m0[_125].x) : 1073741820u;
    uvec4 _319 = imageLoad(_24[_121], int(_318));
    uvec4 _321 = imageLoad(_24[_121], int(_318 + 1u));
    uvec4 _324 = imageLoad(_24[_121], int(_318 + 2u));
    vec3 _328 = uintBitsToFloat(uvec3(_319.x, _321.x, _324.x));
    uint _335 = _51 * 4u;
    uint _340 = (_335 < _13._m0[_132].y) ? (_335 + _13._m0[_132].x) : 1073741820u;
    uvec4 _341 = imageLoad(_24[_128], int(_340));
    uvec4 _343 = imageLoad(_24[_128], int(_340 + 1u));
    uvec4 _346 = imageLoad(_24[_128], int(_340 + 2u));
    uvec4 _349 = imageLoad(_24[_128], int(_340 + 3u));
    vec4 _353 = uintBitsToFloat(uvec4(_341.x, _343.x, _346.x, _349.x));
    uint _362 = _39 * 2u;
    uint _367 = (_362 < _13._m0[_90].y) ? (_362 + _13._m0[_90].x) : 1073741820u;
    imageStore(_21[_86], int(_367), uvec4(floatBitsToUint(20.0)));
    imageStore(_21[_86], int(_367 + 1u), uvec4(floatBitsToUint(20.0)));
    uint _374 = _43 * 3u;
    uint _379 = (_374 < _13._m0[_125].y) ? (_374 + _13._m0[_125].x) : 1073741820u;
    imageStore(_24[_121], int(_379), uvec4(floatBitsToUint(30.0)));
    imageStore(_24[_121], int(_379 + 1u), uvec4(floatBitsToUint(30.0)));
    imageStore(_24[_121], int(_379 + 2u), uvec4(floatBitsToUint(30.0)));
    SV_Target.x = ((((((((((_159.x + uintBitsToFloat(texelFetch(_17[INDEX], int((_39 < _13._m0[_56].y) ? (_39 + _13._m0[_56].x) : 1073741820u)).x)) + _180.x) + _204.x) + uintBitsToFloat(_217.x)) + _233.x) + _253.x) + _278.x) + uintBitsToFloat(_292.x)) + _308.x) + _328.x) + _353.x;
    SV_Target.y = (((((((_180.y + _159.y) + _204.y) + _233.y) + _253.y) + _278.y) + _308.y) + _328.y) + _353.y;
    SV_Target.z = ((((_204.z + _180.z) + _253.z) + _278.z) + _328.z) + _353.z;
    SV_Target.w = (_278.w + _204.w) + _353.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 396
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %26 %30 %34
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %26 "INDEX"
OpName %30 "UV"
OpName %34 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 Coherent
OpDecorate %26 Flat
OpDecorate %26 Location 0
OpDecorate %30 Flat
OpDecorate %30 Location 1
OpDecorate %34 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypePointer Input %5
%26 = OpVariable %25 Input
%27 = OpTypeInt 32 1
%28 = OpTypeVector %27 4
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%31 = OpTypeFloat 32
%32 = OpTypeVector %31 4
%33 = OpTypePointer Output %32
%34 = OpVariable %33 Output
%35 = OpTypePointer Input %27
%37 = OpConstant %5 0
%41 = OpConstant %5 1
%45 = OpConstant %5 2
%49 = OpConstant %5 3
%53 = OpTypePointer UniformConstant %14
%57 = OpTypePointer StorageBuffer %9
%79 = OpConstant %5 4
%80 = OpTypePointer UniformConstant %18
%87 = OpConstant %5 5
%94 = OpConstant %5 6
%101 = OpConstant %5 7
%108 = OpConstant %5 8
%115 = OpConstant %5 9
%122 = OpConstant %5 10
%129 = OpConstant %5 11
%138 = OpTypeBool
%141 = OpConstant %5 1073741820
%142 = OpTypeVector %5 4
%158 = OpTypeVector %31 2
%177 = OpTypeVector %5 3
%179 = OpTypeVector %31 3
%368 = OpConstant %31 20
%380 = OpConstant %31 30
%389 = OpTypePointer Output %31
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %394
%394 = OpLabel
%36 = OpAccessChain %35 %30 %37
%38 = OpLoad %27 %36
%39 = OpBitcast %5 %38
%40 = OpAccessChain %35 %30 %41
%42 = OpLoad %27 %40
%43 = OpBitcast %5 %42
%44 = OpAccessChain %35 %30 %45
%46 = OpLoad %27 %44
%47 = OpBitcast %5 %46
%48 = OpAccessChain %35 %30 %49
%50 = OpLoad %27 %48
%51 = OpBitcast %5 %50
%52 = OpLoad %5 %26
%54 = OpAccessChain %53 %17 %52
%55 = OpLoad %14 %54
%56 = OpGroupNonUniformBroadcastFirst %5 %49 %52
%58 = OpAccessChain %57 %13 %37 %56
%59 = OpLoad %9 %58
%60 = OpIAdd %5 %52 %41
%61 = OpAccessChain %53 %17 %60
%62 = OpLoad %14 %61
%63 = OpGroupNonUniformBroadcastFirst %5 %49 %60
%64 = OpAccessChain %57 %13 %37 %63
%65 = OpLoad %9 %64
%66 = OpIAdd %5 %52 %45
%67 = OpAccessChain %53 %17 %66
%68 = OpLoad %14 %67
%69 = OpGroupNonUniformBroadcastFirst %5 %49 %66
%70 = OpAccessChain %57 %13 %37 %69
%71 = OpLoad %9 %70
%72 = OpIAdd %5 %52 %49
%73 = OpAccessChain %53 %17 %72
%74 = OpLoad %14 %73
%75 = OpGroupNonUniformBroadcastFirst %5 %49 %72
%76 = OpAccessChain %57 %13 %37 %75
%77 = OpLoad %9 %76
%78 = OpIAdd %5 %52 %79
%81 = OpAccessChain %80 %21 %78
%82 = OpLoad %18 %81
%83 = OpGroupNonUniformBroadcastFirst %5 %49 %78
%84 = OpAccessChain %57 %13 %37 %83
%85 = OpLoad %9 %84
%86 = OpIAdd %5 %52 %87
%88 = OpAccessChain %80 %21 %86
%89 = OpLoad %18 %88
%90 = OpGroupNonUniformBroadcastFirst %5 %49 %86
%91 = OpAccessChain %57 %13 %37 %90
%92 = OpLoad %9 %91
%93 = OpIAdd %5 %52 %94
%95 = OpAccessChain %80 %21 %93
%96 = OpLoad %18 %95
%97 = OpGroupNonUniformBroadcastFirst %5 %49 %93
%98 = OpAccessChain %57 %13 %37 %97
%99 = OpLoad %9 %98
%100 = OpIAdd %5 %52 %101
%102 = OpAccessChain %80 %21 %100
%103 = OpLoad %18 %102
%104 = OpGroupNonUniformBroadcastFirst %5 %49 %100
%105 = OpAccessChain %57 %13 %37 %104
%106 = OpLoad %9 %105
%107 = OpIAdd %5 %52 %108
%109 = OpAccessChain %80 %24 %107
%110 = OpLoad %18 %109
%111 = OpGroupNonUniformBroadcastFirst %5 %49 %107
%112 = OpAccessChain %57 %13 %37 %111
%113 = OpLoad %9 %112
%114 = OpIAdd %5 %52 %115
%116 = OpAccessChain %80 %24 %114
%117 = OpLoad %18 %116
%118 = OpGroupNonUniformBroadcastFirst %5 %49 %114
%119 = OpAccessChain %57 %13 %37 %118
%120 = OpLoad %9 %119
%121 = OpIAdd %5 %52 %122
%123 = OpAccessChain %80 %24 %121
%124 = OpLoad %18 %123
%125 = OpGroupNonUniformBroadcastFirst %5 %49 %121
%126 = OpAccessChain %57 %13 %37 %125
%127 = OpLoad %9 %126
%128 = OpIAdd %5 %52 %129
%130 = OpAccessChain %80 %24 %128
%131 = OpLoad %18 %130
%132 = OpGroupNonUniformBroadcastFirst %5 %49 %128
%133 = OpAccessChain %57 %13 %37 %132
%134 = OpLoad %9 %133
%135 = OpCompositeExtract %5 %59 0
%136 = OpCompositeExtract %5 %59 1
%137 = OpIAdd %5 %39 %135
%139 = OpULessThan %138 %39 %136
%140 = OpSelect %5 %139 %137 %141
%143 = OpImageFetch %142 %55 %140
%144 = OpCompositeExtract %5 %143 0
%145 = OpBitcast %31 %144
%146 = OpIMul %5 %43 %45
%147 = OpCompositeExtract %5 %65 0
%148 = OpCompositeExtract %5 %65 1
%149 = OpIAdd %5 %146 %147
%150 = OpULessThan %138 %146 %148
%151 = OpSelect %5 %150 %149 %141
%152 = OpImageFetch %142 %62 %151
%153 = OpCompositeExtract %5 %152 0
%155 = OpIAdd %5 %151 %41
%154 = OpImageFetch %142 %62 %155
%156 = OpCompositeExtract %5 %154 0
%157 = OpCompositeConstruct %9 %153 %156
%159 = OpBitcast %158 %157
%160 = OpCompositeExtract %31 %159 0
%161 = OpCompositeExtract %31 %159 1
%162 = OpFAdd %31 %160 %145
%163 = OpIMul %5 %47 %49
%164 = OpCompositeExtract %5 %71 0
%165 = OpCompositeExtract %5 %71 1
%166 = OpIAdd %5 %163 %164
%167 = OpULessThan %138 %163 %165
%168 = OpSelect %5 %167 %166 %141
%169 = OpImageFetch %142 %68 %168
%170 = OpCompositeExtract %5 %169 0
%172 = OpIAdd %5 %168 %41
%171 = OpImageFetch %142 %68 %172
%173 = OpCompositeExtract %5 %171 0
%175 = OpIAdd %5 %168 %45
%174 = OpImageFetch %142 %68 %175
%176 = OpCompositeExtract %5 %174 0
%178 = OpCompositeConstruct %177 %170 %173 %176
%180 = OpBitcast %179 %178
%181 = OpCompositeExtract %31 %180 0
%182 = OpCompositeExtract %31 %180 1
%183 = OpCompositeExtract %31 %180 2
%184 = OpFAdd %31 %162 %181
%185 = OpFAdd %31 %182 %161
%186 = OpIMul %5 %51 %79
%187 = OpCompositeExtract %5 %77 0
%188 = OpCompositeExtract %5 %77 1
%189 = OpIAdd %5 %186 %187
%190 = OpULessThan %138 %186 %188
%191 = OpSelect %5 %190 %189 %141
%192 = OpImageFetch %142 %74 %191
%193 = OpCompositeExtract %5 %192 0
%195 = OpIAdd %5 %191 %41
%194 = OpImageFetch %142 %74 %195
%196 = OpCompositeExtract %5 %194 0
%198 = OpIAdd %5 %191 %45
%197 = OpImageFetch %142 %74 %198
%199 = OpCompositeExtract %5 %197 0
%201 = OpIAdd %5 %191 %49
%200 = OpImageFetch %142 %74 %201
%202 = OpCompositeExtract %5 %200 0
%203 = OpCompositeConstruct %142 %193 %196 %199 %202
%204 = OpBitcast %32 %203
%205 = OpCompositeExtract %31 %204 0
%206 = OpCompositeExtract %31 %204 1
%207 = OpCompositeExtract %31 %204 2
%208 = OpCompositeExtract %31 %204 3
%209 = OpFAdd %31 %184 %205
%210 = OpFAdd %31 %185 %206
%211 = OpFAdd %31 %207 %183
%212 = OpCompositeExtract %5 %85 0
%213 = OpCompositeExtract %5 %85 1
%214 = OpIAdd %5 %39 %212
%215 = OpULessThan %138 %39 %213
%216 = OpSelect %5 %215 %214 %141
%217 = OpImageRead %142 %82 %216
%218 = OpCompositeExtract %5 %217 0
%219 = OpBitcast %31 %218
%220 = OpFAdd %31 %209 %219
%221 = OpIMul %5 %43 %45
%222 = OpCompositeExtract %5 %92 0
%223 = OpCompositeExtract %5 %92 1
%224 = OpIAdd %5 %221 %222
%225 = OpULessThan %138 %221 %223
%226 = OpSelect %5 %225 %224 %141
%227 = OpImageRead %142 %89 %226
%228 = OpCompositeExtract %5 %227 0
%230 = OpIAdd %5 %226 %41
%229 = OpImageRead %142 %89 %230
%231 = OpCompositeExtract %5 %229 0
%232 = OpCompositeConstruct %9 %228 %231
%233 = OpBitcast %158 %232
%234 = OpCompositeExtract %31 %233 0
%235 = OpCompositeExtract %31 %233 1
%236 = OpFAdd %31 %220 %234
%237 = OpFAdd %31 %210 %235
%238 = OpIMul %5 %47 %49
%239 = OpCompositeExtract %5 %99 0
%240 = OpCompositeExtract %5 %99 1
%241 = OpIAdd %5 %238 %239
%242 = OpULessThan %138 %238 %240
%243 = OpSelect %5 %242 %241 %141
%244 = OpImageRead %142 %96 %243
%245 = OpCompositeExtract %5 %244 0
%247 = OpIAdd %5 %243 %41
%246 = OpImageRead %142 %96 %247
%248 = OpCompositeExtract %5 %246 0
%250 = OpIAdd %5 %243 %45
%249 = OpImageRead %142 %96 %250
%251 = OpCompositeExtract %5 %249 0
%252 = OpCompositeConstruct %177 %245 %248 %251
%253 = OpBitcast %179 %252
%254 = OpCompositeExtract %31 %253 0
%255 = OpCompositeExtract %31 %253 1
%256 = OpCompositeExtract %31 %253 2
%257 = OpFAdd %31 %236 %254
%258 = OpFAdd %31 %237 %255
%259 = OpFAdd %31 %211 %256
%260 = OpIMul %5 %51 %79
%261 = OpCompositeExtract %5 %106 0
%262 = OpCompositeExtract %5 %106 1
%263 = OpIAdd %5 %260 %261
%264 = OpULessThan %138 %260 %262
%265 = OpSelect %5 %264 %263 %141
%266 = OpImageRead %142 %103 %265
%267 = OpCompositeExtract %5 %266 0
%269 = OpIAdd %5 %265 %41
%268 = OpImageRead %142 %103 %269
%270 = OpCompositeExtract %5 %268 0
%272 = OpIAdd %5 %265 %45
%271 = OpImageRead %142 %103 %272
%273 = OpCompositeExtract %5 %271 0
%275 = OpIAdd %5 %265 %49
%274 = OpImageRead %142 %103 %275
%276 = OpCompositeExtract %5 %274 0
%277 = OpCompositeConstruct %142 %267 %270 %273 %276
%278 = OpBitcast %32 %277
%279 = OpCompositeExtract %31 %278 0
%280 = OpCompositeExtract %31 %278 1
%281 = OpCompositeExtract %31 %278 2
%282 = OpCompositeExtract %31 %278 3
%283 = OpFAdd %31 %257 %279
%284 = OpFAdd %31 %258 %280
%285 = OpFAdd %31 %259 %281
%286 = OpFAdd %31 %282 %208
%287 = OpCompositeExtract %5 %113 0
%288 = OpCompositeExtract %5 %113 1
%289 = OpIAdd %5 %39 %287
%290 = OpULessThan %138 %39 %288
%291 = OpSelect %5 %290 %289 %141
%292 = OpImageRead %142 %110 %291
%293 = OpCompositeExtract %5 %292 0
%294 = OpBitcast %31 %293
%295 = OpFAdd %31 %283 %294
%296 = OpIMul %5 %43 %45
%297 = OpCompositeExtract %5 %120 0
%298 = OpCompositeExtract %5 %120 1
%299 = OpIAdd %5 %296 %297
%300 = OpULessThan %138 %296 %298
%301 = OpSelect %5 %300 %299 %141
%302 = OpImageRead %142 %117 %301
%303 = OpCompositeExtract %5 %302 0
%305 = OpIAdd %5 %301 %41
%304 = OpImageRead %142 %117 %305
%306 = OpCompositeExtract %5 %304 0
%307 = OpCompositeConstruct %9 %303 %306
%308 = OpBitcast %158 %307
%309 = OpCompositeExtract %31 %308 0
%310 = OpCompositeExtract %31 %308 1
%311 = OpFAdd %31 %295 %309
%312 = OpFAdd %31 %284 %310
%313 = OpIMul %5 %47 %49
%314 = OpCompositeExtract %5 %127 0
%315 = OpCompositeExtract %5 %127 1
%316 = OpIAdd %5 %313 %314
%317 = OpULessThan %138 %313 %315
%318 = OpSelect %5 %317 %316 %141
%319 = OpImageRead %142 %124 %318
%320 = OpCompositeExtract %5 %319 0
%322 = OpIAdd %5 %318 %41
%321 = OpImageRead %142 %124 %322
%323 = OpCompositeExtract %5 %321 0
%325 = OpIAdd %5 %318 %45
%324 = OpImageRead %142 %124 %325
%326 = OpCompositeExtract %5 %324 0
%327 = OpCompositeConstruct %177 %320 %323 %326
%328 = OpBitcast %179 %327
%329 = OpCompositeExtract %31 %328 0
%330 = OpCompositeExtract %31 %328 1
%331 = OpCompositeExtract %31 %328 2
%332 = OpFAdd %31 %311 %329
%333 = OpFAdd %31 %312 %330
%334 = OpFAdd %31 %285 %331
%335 = OpIMul %5 %51 %79
%336 = OpCompositeExtract %5 %134 0
%337 = OpCompositeExtract %5 %134 1
%338 = OpIAdd %5 %335 %336
%339 = OpULessThan %138 %335 %337
%340 = OpSelect %5 %339 %338 %141
%341 = OpImageRead %142 %131 %340
%342 = OpCompositeExtract %5 %341 0
%344 = OpIAdd %5 %340 %41
%343 = OpImageRead %142 %131 %344
%345 = OpCompositeExtract %5 %343 0
%347 = OpIAdd %5 %340 %45
%346 = OpImageRead %142 %131 %347
%348 = OpCompositeExtract %5 %346 0
%350 = OpIAdd %5 %340 %49
%349 = OpImageRead %142 %131 %350
%351 = OpCompositeExtract %5 %349 0
%352 = OpCompositeConstruct %142 %342 %345 %348 %351
%353 = OpBitcast %32 %352
%354 = OpCompositeExtract %31 %353 0
%355 = OpCompositeExtract %31 %353 1
%356 = OpCompositeExtract %31 %353 2
%357 = OpCompositeExtract %31 %353 3
%358 = OpFAdd %31 %332 %354
%359 = OpFAdd %31 %333 %355
%360 = OpFAdd %31 %334 %356
%361 = OpFAdd %31 %286 %357
%362 = OpIMul %5 %39 %45
%363 = OpCompositeExtract %5 %92 0
%364 = OpCompositeExtract %5 %92 1
%365 = OpIAdd %5 %362 %363
%366 = OpULessThan %138 %362 %364
%367 = OpSelect %5 %366 %365 %141
%369 = OpBitcast %5 %368
%370 = OpBitcast %5 %368
%371 = OpCompositeConstruct %142 %369 %369 %369 %369
OpImageWrite %89 %367 %371
%372 = OpCompositeConstruct %142 %370 %370 %370 %370
%373 = OpIAdd %5 %367 %41
OpImageWrite %89 %373 %372
%374 = OpIMul %5 %43 %49
%375 = OpCompositeExtract %5 %127 0
%376 = OpCompositeExtract %5 %127 1
%377 = OpIAdd %5 %374 %375
%378 = OpULessThan %138 %374 %376
%379 = OpSelect %5 %378 %377 %141
%381 = OpBitcast %5 %380
%382 = OpBitcast %5 %380
%383 = OpBitcast %5 %380
%384 = OpCompositeConstruct %142 %381 %381 %381 %381
OpImageWrite %124 %379 %384
%385 = OpCompositeConstruct %142 %382 %382 %382 %382
%386 = OpIAdd %5 %379 %41
OpImageWrite %124 %386 %385
%387 = OpCompositeConstruct %142 %383 %383 %383 %383
%388 = OpIAdd %5 %379 %45
OpImageWrite %124 %388 %387
%390 = OpAccessChain %389 %34 %37
OpStore %390 %358
%391 = OpAccessChain %389 %34 %41
OpStore %391 %359
%392 = OpAccessChain %389 %34 %45
OpStore %392 %360
%393 = OpAccessChain %389 %34 %49
OpStore %393 %361
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-raw-buffer-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform usamplerBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _19 = INDEX + 0u;
    uint _24 = INDEX * 4u;
    uvec4 _40 = uvec4(texelFetch(_9[nonuniformEXT(_19)], int(_24)).x, texelFetch(_9[nonuniformEXT(_19)], int(_24 + 1u)).x, texelFetch(_9[nonuniformEXT(_19)], int(_24 + 2u)).x, texelFetch(_9[nonuniformEXT(_19)], int(_24 + 3u)).x);
    SV_Target.x = uintBitsToFloat(_40.x);
    SV_Target.y = uintBitsToFloat(_40.y);
    SV_Target.z = uintBitsToFloat(_40.z);
    SV_Target.w = uintBitsToFloat(_40.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
OpDecorate %19 NonUniform
OpDecorate %23 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 4
%20 = OpConstant %5 0
%21 = OpTypePointer UniformConstant %6
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%49 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%16 = OpLoad %5 %11
%17 = OpShiftLeftLogical %5 %16 %18
%19 = OpIAdd %5 %16 %20
%22 = OpAccessChain %21 %9 %19
%23 = OpLoad %6 %22
%24 = OpIMul %5 %16 %18
%26 = OpImageFetch %25 %23 %24
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %24 %30
%28 = OpImageFetch %25 %23 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %24 %34
%32 = OpImageFetch %25 %23 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %24 %38
%36 = OpImageFetch %25 %23 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpBitcast %12 %41
%46 = OpBitcast %12 %42
%47 = OpBitcast %12 %43
%48 = OpBitcast %12 %44
%50 = OpAccessChain %49 %15 %20
OpStore %50 %45
%51 = OpAccessChain %49 %15 %30
OpStore %51 %46
%52 = OpAccessChain %49 %15 %34
OpStore %52 %47
%53 = OpAccessChain %49 %15 %38
OpStore %53 %48
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-raw-buffer.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform usamplerBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _19 = INDEX + 0u;
    uint _24 = INDEX * 4u;
    uvec4 _40 = uvec4(texelFetch(_9[_19], int(_24)).x, texelFetch(_9[_19], int(_24 + 1u)).x, texelFetch(_9[_19], int(_24 + 2u)).x, texelFetch(_9[_19], int(_24 + 3u)).x);
    SV_Target.x = uintBitsToFloat(_40.x);
    SV_Target.y = uintBitsToFloat(_40.y);
    SV_Target.z = uintBitsToFloat(_40.z);
    SV_Target.w = uintBitsToFloat(_40.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 4
%20 = OpConstant %5 0
%21 = OpTypePointer UniformConstant %6
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%49 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%16 = OpLoad %5 %11
%17 = OpShiftLeftLogical %5 %16 %18
%19 = OpIAdd %5 %16 %20
%22 = OpAccessChain %21 %9 %19
%23 = OpLoad %6 %22
%24 = OpIMul %5 %16 %18
%26 = OpImageFetch %25 %23 %24
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %24 %30
%28 = OpImageFetch %25 %23 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %24 %34
%32 = OpImageFetch %25 %23 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %24 %38
%36 = OpImageFetch %25 %23 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpBitcast %12 %41
%46 = OpBitcast %12 %42
%47 = OpBitcast %12 %43
%48 = OpBitcast %12 %44
%50 = OpAccessChain %49 %15 %20
OpStore %50 %45
%51 = OpAccessChain %49 %15 %30
OpStore %51 %46
%52 = OpAccessChain %49 %15 %34
OpStore %52 %47
%53 = OpAccessChain %49 %15 %38
OpStore %53 %48
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-structured-buffer-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform usamplerBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _17 = INDEX + 0u;
    uint _22 = INDEX * 4u;
    vec4 _40 = uintBitsToFloat(uvec4(texelFetch(_9[nonuniformEXT(_17)], int(_22)).x, texelFetch(_9[nonuniformEXT(_17)], int(_22 + 1u)).x, texelFetch(_9[nonuniformEXT(_17)], int(_22 + 2u)).x, texelFetch(_9[nonuniformEXT(_17)], int(_22 + 3u)).x));
    SV_Target.x = _40.x;
    SV_Target.y = _40.y;
    SV_Target.z = _40.z;
    SV_Target.w = _40.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
OpDecorate %17 NonUniform
OpDecorate %21 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 0
%19 = OpTypePointer UniformConstant %6
%23 = OpConstant %5 4
%24 = OpTypeVector %5 4
%29 = OpConstant %5 1
%33 = OpConstant %5 2
%37 = OpConstant %5 3
%45 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%16 = OpLoad %5 %11
%17 = OpIAdd %5 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpIMul %5 %16 %23
%25 = OpImageFetch %24 %21 %22
%26 = OpCompositeExtract %5 %25 0
%28 = OpIAdd %5 %22 %29
%27 = OpImageFetch %24 %21 %28
%30 = OpCompositeExtract %5 %27 0
%32 = OpIAdd %5 %22 %33
%31 = OpImageFetch %24 %21 %32
%34 = OpCompositeExtract %5 %31 0
%36 = OpIAdd %5 %22 %37
%35 = OpImageFetch %24 %21 %36
%38 = OpCompositeExtract %5 %35 0
%39 = OpCompositeConstruct %24 %26 %30 %34 %38
%40 = OpBitcast %13 %39
%41 = OpCompositeExtract %12 %40 0
%42 = OpCompositeExtract %12 %40 1
%43 = OpCompositeExtract %12 %40 2
%44 = OpCompositeExtract %12 %40 3
%46 = OpAccessChain %45 %15 %18
OpStore %46 %41
%47 = OpAccessChain %45 %15 %29
OpStore %47 %42
%48 = OpAccessChain %45 %15 %33
OpStore %48 %43
%49 = OpAccessChain %45 %15 %37
OpStore %49 %44
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-structured-buffer.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform usamplerBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _17 = INDEX + 0u;
    uint _22 = INDEX * 4u;
    vec4 _40 = uintBitsToFloat(uvec4(texelFetch(_9[_17], int(_22)).x, texelFetch(_9[_17], int(_22 + 1u)).x, texelFetch(_9[_17], int(_22 + 2u)).x, texelFetch(_9[_17], int(_22 + 3u)).x));
    SV_Target.x = _40.x;
    SV_Target.y = _40.y;
    SV_Target.z = _40.z;
    SV_Target.w = _40.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 0
%19 = OpTypePointer UniformConstant %6
%23 = OpConstant %5 4
%24 = OpTypeVector %5 4
%29 = OpConstant %5 1
%33 = OpConstant %5 2
%37 = OpConstant %5 3
%45 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%16 = OpLoad %5 %11
%17 = OpIAdd %5 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpIMul %5 %16 %23
%25 = OpImageFetch %24 %21 %22
%26 = OpCompositeExtract %5 %25 0
%28 = OpIAdd %5 %22 %29
%27 = OpImageFetch %24 %21 %28
%30 = OpCompositeExtract %5 %27 0
%32 = OpIAdd %5 %22 %33
%31 = OpImageFetch %24 %21 %32
%34 = OpCompositeExtract %5 %31 0
%36 = OpIAdd %5 %22 %37
%35 = OpImageFetch %24 %21 %36
%38 = OpCompositeExtract %5 %35 0
%39 = OpCompositeConstruct %24 %26 %30 %34 %38
%40 = OpBitcast %13 %39
%41 = OpCompositeExtract %12 %40 0
%42 = OpCompositeExtract %12 %40 1
%43 = OpCompositeExtract %12 %40 2
%44 = OpCompositeExtract %12 %40 3
%46 = OpAccessChain %45 %15 %18
OpStore %46 %41
%47 = OpAccessChain %45 %15 %29
OpStore %47 %42
%48 = OpAccessChain %45 %15 %33
OpStore %48 %43
%49 = OpAccessChain %45 %15 %37
OpStore %49 %44
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-texture-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform texture2D _9[];
layout(set = 1, binding = 0) uniform texture2D _14[100];
layout(set = 0, binding = 0) uniform sampler _17;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _34 = texture(nonuniformEXT(sampler2D(_9[INDEX + 0u], _17)), vec2(0.5));
    vec4 _47 = texture(nonuniformEXT(sampler2D(_14[(INDEX ^ 1u) + 0u], _17)), vec2(0.5));
    SV_Target.x = _47.x + _34.x;
    SV_Target.y = _47.y + _34.y;
    SV_Target.z = _47.z + _34.z;
    SV_Target.w = _47.w + _34.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %22
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %19 "INDEX"
OpName %22 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %22 Location 0
OpDecorate %25 NonUniform
OpDecorate %29 NonUniform
OpDecorate %31 NonUniform
OpDecorate %43 NonUniform
OpDecorate %45 NonUniform
OpDecorate %46 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpConstant %10 100
%12 = OpTypeArray %6 %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeSampler
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypePointer Input %10
%19 = OpVariable %18 Input
%20 = OpTypeVector %5 4
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%26 = OpConstant %10 0
%27 = OpTypePointer UniformConstant %6
%30 = OpTypeSampledImage %6
%32 = OpConstant %5 0.5
%33 = OpConstant %5 0
%35 = OpTypeVector %5 2
%42 = OpConstant %10 1
%57 = OpTypePointer Output %5
%61 = OpConstant %10 2
%63 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%23 = OpLoad %15 %17
%24 = OpLoad %10 %19
%25 = OpIAdd %10 %24 %26
%28 = OpAccessChain %27 %9 %25
%29 = OpLoad %6 %28
%31 = OpSampledImage %30 %29 %23
%36 = OpCompositeConstruct %35 %32 %32
%34 = OpImageSampleImplicitLod %20 %31 %36 None
%37 = OpCompositeExtract %5 %34 0
%38 = OpCompositeExtract %5 %34 1
%39 = OpCompositeExtract %5 %34 2
%40 = OpCompositeExtract %5 %34 3
%41 = OpBitwiseXor %10 %24 %42
%43 = OpIAdd %10 %41 %26
%44 = OpAccessChain %27 %14 %43
%45 = OpLoad %6 %44
%46 = OpSampledImage %30 %45 %23
%48 = OpCompositeConstruct %35 %32 %32
%47 = OpImageSampleImplicitLod %20 %46 %48 None
%49 = OpCompositeExtract %5 %47 0
%50 = OpCompositeExtract %5 %47 1
%51 = OpCompositeExtract %5 %47 2
%52 = OpCompositeExtract %5 %47 3
%53 = OpFAdd %5 %49 %37
%54 = OpFAdd %5 %50 %38
%55 = OpFAdd %5 %51 %39
%56 = OpFAdd %5 %52 %40
%58 = OpAccessChain %57 %22 %26
OpStore %58 %53
%59 = OpAccessChain %57 %22 %42
OpStore %59 %54
%60 = OpAccessChain %57 %22 %61
OpStore %60 %55
%62 = OpAccessChain %57 %22 %63
OpStore %62 %56
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-texture.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform texture2D _9[];
layout(set = 1, binding = 0) uniform texture2D _14[100];
layout(set = 0, binding = 0) uniform sampler _17;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _34 = texture(sampler2D(_9[INDEX + 0u], _17), vec2(0.5));
    vec4 _47 = texture(sampler2D(_14[(INDEX ^ 1u) + 0u], _17), vec2(0.5));
    SV_Target.x = _47.x + _34.x;
    SV_Target.y = _47.y + _34.y;
    SV_Target.z = _47.z + _34.z;
    SV_Target.w = _47.w + _34.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %19 %22
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %19 "INDEX"
OpName %22 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %19 Flat
OpDecorate %19 Location 0
OpDecorate %22 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpConstant %10 100
%12 = OpTypeArray %6 %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeSampler
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypePointer Input %10
%19 = OpVariable %18 Input
%20 = OpTypeVector %5 4
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%26 = OpConstant %10 0
%27 = OpTypePointer UniformConstant %6
%30 = OpTypeSampledImage %6
%32 = OpConstant %5 0.5
%33 = OpConstant %5 0
%35 = OpTypeVector %5 2
%42 = OpConstant %10 1
%57 = OpTypePointer Output %5
%61 = OpConstant %10 2
%63 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%23 = OpLoad %15 %17
%24 = OpLoad %10 %19
%25 = OpIAdd %10 %24 %26
%28 = OpAccessChain %27 %9 %25
%29 = OpLoad %6 %28
%31 = OpSampledImage %30 %29 %23
%36 = OpCompositeConstruct %35 %32 %32
%34 = OpImageSampleImplicitLod %20 %31 %36 None
%37 = OpCompositeExtract %5 %34 0
%38 = OpCompositeExtract %5 %34 1
%39 = OpCompositeExtract %5 %34 2
%40 = OpCompositeExtract %5 %34 3
%41 = OpBitwiseXor %10 %24 %42
%43 = OpIAdd %10 %41 %26
%44 = OpAccessChain %27 %14 %43
%45 = OpLoad %6 %44
%46 = OpSampledImage %30 %45 %23
%48 = OpCompositeConstruct %35 %32 %32
%47 = OpImageSampleImplicitLod %20 %46 %48 None
%49 = OpCompositeExtract %5 %47 0
%50 = OpCompositeExtract %5 %47 1
%51 = OpCompositeExtract %5 %47 2
%52 = OpCompositeExtract %5 %47 3
%53 = OpFAdd %5 %49 %37
%54 = OpFAdd %5 %50 %38
%55 = OpFAdd %5 %51 %39
%56 = OpFAdd %5 %52 %40
%58 = OpAccessChain %57 %22 %26
OpStore %58 %53
%59 = OpAccessChain %57 %22 %42
OpStore %59 %54
%60 = OpAccessChain %57 %22 %61
OpStore %60 %55
%62 = OpAccessChain %57 %22 %63
OpStore %62 %56
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-typed-buffer-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform samplerBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = texelFetch(_9[nonuniformEXT(INDEX + 0u)], int(INDEX));
    SV_Target.x = _22.x;
    SV_Target.y = _22.y;
    SV_Target.z = _22.z;
    SV_Target.w = _22.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %15 Location 0
OpDecorate %17 NonUniform
OpDecorate %21 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %10 0
%19 = OpTypePointer UniformConstant %6
%27 = OpTypePointer Output %5
%30 = OpConstant %10 1
%32 = OpConstant %10 2
%34 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%16 = OpLoad %10 %12
%17 = OpIAdd %10 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpImageFetch %13 %21 %16
%23 = OpCompositeExtract %5 %22 0
%24 = OpCompositeExtract %5 %22 1
%25 = OpCompositeExtract %5 %22 2
%26 = OpCompositeExtract %5 %22 3
%28 = OpAccessChain %27 %15 %18
OpStore %28 %23
%29 = OpAccessChain %27 %15 %30
OpStore %29 %24
%31 = OpAccessChain %27 %15 %32
OpStore %31 %25
%33 = OpAccessChain %27 %15 %34
OpStore %33 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-array-typed-buffer.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform samplerBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = texelFetch(_9[INDEX + 0u], int(INDEX));
    SV_Target.x = _22.x;
    SV_Target.y = _22.y;
    SV_Target.z = _22.z;
    SV_Target.w = _22.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %10 0
%19 = OpTypePointer UniformConstant %6
%27 = OpTypePointer Output %5
%30 = OpConstant %10 1
%32 = OpConstant %10 2
%34 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%16 = OpLoad %10 %12
%17 = OpIAdd %10 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpImageFetch %13 %21 %16
%23 = OpCompositeExtract %5 %22 0
%24 = OpCompositeExtract %5 %22 1
%25 = OpCompositeExtract %5 %22 2
%26 = OpCompositeExtract %5 %22 3
%28 = OpAccessChain %27 %15 %18
OpStore %28 %23
%29 = OpAccessChain %27 %15 %30
OpStore %29 %24
%31 = OpAccessChain %27 %15 %32
OpStore %31 %25
%33 = OpAccessChain %27 %15 %34
OpStore %33 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-indexing.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 5) uniform texture2D _11[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = texelFetch(_11[0u], ivec2(uvec2(0u)), int(0u));
    vec4 _32 = texelFetch(_11[2u], ivec2(uvec2(0u)), int(0u));
    vec4 _46 = texelFetch(_11[V], ivec2(uvec2(0u)), int(0u));
    SV_Target.x = (_32.x + _22.x) + _46.x;
    SV_Target.y = (_32.y + _22.y) + _46.y;
    SV_Target.z = (_32.z + _22.z) + _46.z;
    SV_Target.w = (_32.w + _22.w) + _46.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "V"
OpName %16 "SV_Target"
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 5
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypePointer Input %7
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%18 = OpTypePointer UniformConstant %6
%20 = OpConstant %7 0
%23 = OpTypeVector %7 2
%30 = OpConstant %7 2
%43 = OpConstant %7 5
%56 = OpTypePointer Output %5
%59 = OpConstant %7 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%17 = OpLoad %7 %13
%19 = OpAccessChain %18 %11 %20
%21 = OpLoad %6 %19
%24 = OpCompositeConstruct %23 %20 %20
%22 = OpImageFetch %14 %21 %24 Lod %20
%25 = OpCompositeExtract %5 %22 0
%26 = OpCompositeExtract %5 %22 1
%27 = OpCompositeExtract %5 %22 2
%28 = OpCompositeExtract %5 %22 3
%29 = OpAccessChain %18 %11 %30
%31 = OpLoad %6 %29
%33 = OpCompositeConstruct %23 %20 %20
%32 = OpImageFetch %14 %31 %33 Lod %20
%34 = OpCompositeExtract %5 %32 0
%35 = OpCompositeExtract %5 %32 1
%36 = OpCompositeExtract %5 %32 2
%37 = OpCompositeExtract %5 %32 3
%38 = OpFAdd %5 %34 %25
%39 = OpFAdd %5 %35 %26
%40 = OpFAdd %5 %36 %27
%41 = OpFAdd %5 %37 %28
%42 = OpIAdd %7 %17 %43
%44 = OpAccessChain %18 %11 %17
%45 = OpLoad %6 %44
%47 = OpCompositeConstruct %23 %20 %20
%46 = OpImageFetch %14 %45 %47 Lod %20
%48 = OpCompositeExtract %5 %46 0
%49 = OpCompositeExtract %5 %46 1
%50 = OpCompositeExtract %5 %46 2
%51 = OpCompositeExtract %5 %46 3
%52 = OpFAdd %5 %38 %48
%53 = OpFAdd %5 %39 %49
%54 = OpFAdd %5 %40 %50
%55 = OpFAdd %5 %41 %51
%57 = OpAccessChain %56 %16 %20
OpStore %57 %52
%58 = OpAccessChain %56 %16 %59
OpStore %58 %53
%60 = OpAccessChain %56 %16 %30
OpStore %60 %54
%61 = OpAccessChain %56 %16 %8
OpStore %61 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-indexing.sm66.frag
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 5) uniform texture2D _11[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = texelFetch(_11[0u], ivec2(uvec2(0u)), int(0u));
    vec4 _32 = texelFetch(_11[2u], ivec2(uvec2(0u)), int(0u));
    vec4 _46 = texelFetch(_11[V], ivec2(uvec2(0u)), int(0u));
    SV_Target.x = (_32.x + _22.x) + _46.x;
    SV_Target.y = (_32.y + _22.y) + _46.y;
    SV_Target.z = (_32.z + _22.z) + _46.z;
    SV_Target.w = (_32.w + _22.w) + _46.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "V"
OpName %16 "SV_Target"
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 5
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypePointer Input %7
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%18 = OpTypePointer UniformConstant %6
%20 = OpConstant %7 0
%23 = OpTypeVector %7 2
%30 = OpConstant %7 2
%43 = OpConstant %7 5
%56 = OpTypePointer Output %5
%59 = OpConstant %7 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%17 = OpLoad %7 %13
%19 = OpAccessChain %18 %11 %20
%21 = OpLoad %6 %19
%24 = OpCompositeConstruct %23 %20 %20
%22 = OpImageFetch %14 %21 %24 Lod %20
%25 = OpCompositeExtract %5 %22 0
%26 = OpCompositeExtract %5 %22 1
%27 = OpCompositeExtract %5 %22 2
%28 = OpCompositeExtract %5 %22 3
%29 = OpAccessChain %18 %11 %30
%31 = OpLoad %6 %29
%33 = OpCompositeConstruct %23 %20 %20
%32 = OpImageFetch %14 %31 %33 Lod %20
%34 = OpCompositeExtract %5 %32 0
%35 = OpCompositeExtract %5 %32 1
%36 = OpCompositeExtract %5 %32 2
%37 = OpCompositeExtract %5 %32 3
%38 = OpFAdd %5 %34 %25
%39 = OpFAdd %5 %35 %26
%40 = OpFAdd %5 %36 %27
%41 = OpFAdd %5 %37 %28
%42 = OpIAdd %7 %17 %43
%44 = OpAccessChain %18 %11 %17
%45 = OpLoad %6 %44
%47 = OpCompositeConstruct %23 %20 %20
%46 = OpImageFetch %14 %45 %47 Lod %20
%48 = OpCompositeExtract %5 %46 0
%49 = OpCompositeExtract %5 %46 1
%50 = OpCompositeExtract %5 %46 2
%51 = OpCompositeExtract %5 %46 3
%52 = OpFAdd %5 %38 %48
%53 = OpFAdd %5 %39 %49
%54 = OpFAdd %5 %40 %50
%55 = OpFAdd %5 %41 %51
%57 = OpAccessChain %56 %16 %20
OpStore %57 %52
%58 = OpAccessChain %56 %16 %59
OpStore %58 %53
%60 = OpAccessChain %56 %16 %30
OpStore %60 %54
%61 = OpAccessChain %56 %16 %8
OpStore %61 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-raw-buffer.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 1, binding = 0) uniform usamplerBuffer _12[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    uint _28 = registers._m1 + 3u;
    uint _36 = uint(int(gl_FragCoord.x));
    uint _38 = _36 * 2u;
    uvec2 _46 = uvec2(texelFetch(_12[_28], int(_38)).x, texelFetch(_12[_28], int(_38 + 1u)).x);
    uint _67 = registers._m1 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _69 = _36 * 2u;
    uvec2 _75 = uvec2(texelFetch(_12[_67], int(_69)).x, texelFetch(_12[_67], int(_69 + 1u)).x);
    uint _85 = registers._m1 + (INDEX + 100u);
    uint _87 = _36 * 2u;
    uvec2 _93 = uvec2(texelFetch(_12[nonuniformEXT(_85)], int(_87)).x, texelFetch(_12[nonuniformEXT(_85)], int(_87 + 1u)).x);
    SV_Target.x = (_75.x + _46.x) + _93.x;
    SV_Target.y = (_75.y + _46.y) + _93.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18 %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpName %18 "INDEX"
OpName %21 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 1
OpDecorate %12 Binding 0
OpDecorate %16 BuiltIn FragCoord
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %21 Location 0
OpDecorate %85 NonUniform
OpDecorate %86 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypeVector %5 2
%20 = OpTypePointer Output %19
%21 = OpVariable %20 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpTypePointer PushConstant %5
%26 = OpConstant %5 1
%29 = OpConstant %5 3
%32 = OpTypePointer Input %13
%34 = OpConstant %5 0
%39 = OpConstant %5 2
%40 = OpTypeVector %5 4
%50 = OpConstant %5 4
%53 = OpConstant %5 5
%56 = OpConstant %5 6
%59 = OpConstant %5 7
%81 = OpConstant %5 100
%98 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %101
%101 = OpLabel
%25 = OpAccessChain %24 %8 %26
%27 = OpLoad %5 %25
%28 = OpIAdd %5 %27 %29
%23 = OpAccessChain %22 %12 %28
%30 = OpLoad %9 %23
%31 = OpLoad %5 %18
%33 = OpAccessChain %32 %16 %34
%35 = OpLoad %13 %33
%36 = OpConvertFToS %5 %35
%37 = OpShiftLeftLogical %5 %36 %29
%38 = OpIMul %5 %36 %39
%41 = OpImageFetch %40 %30 %38
%42 = OpCompositeExtract %5 %41 0
%44 = OpIAdd %5 %38 %26
%43 = OpImageFetch %40 %30 %44
%45 = OpCompositeExtract %5 %43 0
%46 = OpCompositeConstruct %19 %42 %45
%47 = OpCompositeExtract %5 %46 0
%48 = OpCompositeExtract %5 %46 1
%49 = OpAccessChain %24 %8 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %24 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %24 %8 %56
%57 = OpLoad %5 %55
%58 = OpAccessChain %24 %8 %59
%60 = OpLoad %5 %58
%61 = OpCompositeConstruct %40 %51 %54 %57 %60
%62 = OpCompositeExtract %5 %61 0
%63 = OpIAdd %5 %62 %50
%65 = OpAccessChain %24 %8 %26
%66 = OpLoad %5 %65
%67 = OpIAdd %5 %66 %63
%64 = OpAccessChain %22 %12 %67
%68 = OpLoad %9 %64
%69 = OpIMul %5 %36 %39
%70 = OpImageFetch %40 %68 %69
%71 = OpCompositeExtract %5 %70 0
%73 = OpIAdd %5 %69 %26
%72 = OpImageFetch %40 %68 %73
%74 = OpCompositeExtract %5 %72 0
%75 = OpCompositeConstruct %19 %71 %74
%76 = OpCompositeExtract %5 %75 0
%77 = OpCompositeExtract %5 %75 1
%78 = OpIAdd %5 %76 %47
%79 = OpIAdd %5 %77 %48
%80 = OpIAdd %5 %31 %81
%83 = OpAccessChain %24 %8 %26
%84 = OpLoad %5 %83
%85 = OpIAdd %5 %84 %80
%82 = OpAccessChain %22 %12 %85
%86 = OpLoad %9 %82
%87 = OpIMul %5 %36 %39
%88 = OpImageFetch %40 %86 %87
%89 = OpCompositeExtract %5 %88 0
%91 = OpIAdd %5 %87 %26
%90 = OpImageFetch %40 %86 %91
%92 = OpCompositeExtract %5 %90 0
%93 = OpCompositeConstruct %19 %89 %92
%94 = OpCompositeExtract %5 %93 0
%95 = OpCompositeExtract %5 %93 1
%96 = OpIAdd %5 %78 %94
%97 = OpIAdd %5 %79 %95
%99 = OpAccessChain %98 %21 %34
OpStore %99 %96
%100 = OpAccessChain %98 %21 %26
OpStore %100 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-raw-buffer.bindless.root-constant.ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _14[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 1) flat in uint INDEX;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    uint _29 = registers._m1 + 3u;
    uint _36 = uint(int(gl_FragCoord.x));
    uint _62 = registers._m1 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _74 = registers._m1 + (INDEX + 100u);
    SV_Target.x = (_14[_62]._m0[_36].x + _14[_29]._m0[_36].x) + _14[nonuniformEXT(_74)]._m0[_36].x;
    SV_Target.y = (_14[_62]._m0[_36].y + _14[_29]._m0[_36].y) + _14[nonuniformEXT(_74)]._m0[_36].y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 86
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %18 %20 %22
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %18 "SV_Position"
OpName %20 "INDEX"
OpName %22 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %18 BuiltIn FragCoord
OpDecorate %20 Flat
OpDecorate %20 Location 1
OpDecorate %22 Location 0
OpDecorate %74 NonUniform
OpDecorate %71 NonUniform
OpDecorate %75 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeFloat 32
%16 = OpTypeVector %15 4
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %5
%20 = OpVariable %19 Input
%21 = OpTypePointer Output %9
%22 = OpVariable %21 Output
%23 = OpTypePointer StorageBuffer %11
%25 = OpTypePointer PushConstant %5
%27 = OpConstant %5 1
%30 = OpConstant %5 3
%32 = OpTypePointer Input %15
%34 = OpConstant %5 0
%38 = OpTypePointer StorageBuffer %9
%44 = OpConstant %5 4
%47 = OpConstant %5 5
%50 = OpConstant %5 6
%53 = OpConstant %5 7
%55 = OpTypeVector %5 4
%70 = OpConstant %5 100
%81 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %84
%84 = OpLabel
%26 = OpAccessChain %25 %8 %27
%28 = OpLoad %5 %26
%29 = OpIAdd %5 %28 %30
%24 = OpAccessChain %23 %14 %29
%31 = OpLoad %5 %20
%33 = OpAccessChain %32 %18 %34
%35 = OpLoad %15 %33
%36 = OpConvertFToS %5 %35
%37 = OpShiftLeftLogical %5 %36 %30
%39 = OpAccessChain %38 %24 %34 %36
%40 = OpLoad %9 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpAccessChain %25 %8 %44
%45 = OpLoad %5 %43
%46 = OpAccessChain %25 %8 %47
%48 = OpLoad %5 %46
%49 = OpAccessChain %25 %8 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %25 %8 %53
%54 = OpLoad %5 %52
%56 = OpCompositeConstruct %55 %45 %48 %51 %54
%57 = OpCompositeExtract %5 %56 0
%58 = OpIAdd %5 %57 %44
%60 = OpAccessChain %25 %8 %27
%61 = OpLoad %5 %60
%62 = OpIAdd %5 %61 %58
%59 = OpAccessChain %23 %14 %62
%63 = OpAccessChain %38 %59 %34 %36
%64 = OpLoad %9 %63
%65 = OpCompositeExtract %5 %64 0
%66 = OpCompositeExtract %5 %64 1
%67 = OpIAdd %5 %65 %41
%68 = OpIAdd %5 %66 %42
%69 = OpIAdd %5 %31 %70
%72 = OpAccessChain %25 %8 %27
%73 = OpLoad %5 %72
%74 = OpIAdd %5 %73 %69
%71 = OpAccessChain %23 %14 %74
%75 = OpAccessChain %38 %71 %34 %36
%76 = OpLoad %9 %75
%77 = OpCompositeExtract %5 %76 0
%78 = OpCompositeExtract %5 %76 1
%79 = OpIAdd %5 %67 %77
%80 = OpIAdd %5 %68 %78
%82 = OpAccessChain %81 %22 %34
OpStore %82 %79
%83 = OpAccessChain %81 %22 %27
OpStore %83 %80
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-raw-buffer.ssbo.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 3, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 4, std430) restrict readonly buffer _12_16
{
    uvec2 _m0[];
} _16[64];

layout(set = 0, binding = 100, std430) restrict readonly buffer _18_21
{
    uvec2 _m0[];
} _21[];

layout(set = 0, binding = 0, std140) uniform _26_28
{
    vec4 _m0[1];
} _28;

layout(location = 1) flat in uint INDEX;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    uint _40 = uint(int(gl_FragCoord.x));
    uvec4 _52 = floatBitsToUint(_28._m0[0u]);
    uint _53 = _52.x;
    SV_Target.x = (_16[_53]._m0[_40].x + _10._m0[_40].x) + _21[nonuniformEXT(INDEX)]._m0[_40].x;
    SV_Target.y = (_16[_53]._m0[_40].y + _10._m0[_40].y) + _21[nonuniformEXT(INDEX)]._m0[_40].y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %30 %32 %34
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpName %18 "SSBO"
OpName %26 ""
OpName %30 "SV_Position"
OpName %32 "INDEX"
OpName %34 "SV_Target"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 3
OpDecorate %10 NonWritable
OpDecorate %10 Restrict
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 4
OpDecorate %16 NonWritable
OpDecorate %16 Restrict
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 100
OpDecorate %21 NonWritable
OpDecorate %21 Restrict
OpDecorate %25 ArrayStride 16
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %30 BuiltIn FragCoord
OpDecorate %32 Flat
OpDecorate %32 Location 1
OpDecorate %34 Location 0
OpDecorate %35 NonUniform
OpDecorate %67 NonUniform
OpDecorate %68 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpConstant %5 64
%14 = OpTypeArray %12 %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeRuntimeArray %6
%18 = OpTypeStruct %17
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpConstant %5 1
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 4
%25 = OpTypeArray %24 %22
%26 = OpTypeStruct %25
%27 = OpTypePointer Uniform %26
%28 = OpVariable %27 Uniform
%29 = OpTypePointer Input %24
%30 = OpVariable %29 Input
%31 = OpTypePointer Input %5
%32 = OpVariable %31 Input
%33 = OpTypePointer Output %6
%34 = OpVariable %33 Output
%36 = OpTypePointer Input %23
%38 = OpConstant %5 0
%42 = OpConstant %5 3
%43 = OpTypePointer StorageBuffer %6
%48 = OpTypePointer Uniform %24
%51 = OpTypeVector %5 4
%55 = OpConstant %5 4
%56 = OpTypePointer StorageBuffer %12
%65 = OpConstant %5 100
%66 = OpTypePointer StorageBuffer %18
%74 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %77
%77 = OpLabel
%35 = OpLoad %5 %32
%37 = OpAccessChain %36 %30 %38
%39 = OpLoad %23 %37
%40 = OpConvertFToS %5 %39
%41 = OpShiftLeftLogical %5 %40 %42
%44 = OpAccessChain %43 %10 %38 %40
%45 = OpLoad %6 %44
%46 = OpCompositeExtract %5 %45 0
%47 = OpCompositeExtract %5 %45 1
%49 = OpAccessChain %48 %28 %38 %38
%50 = OpLoad %24 %49
%52 = OpBitcast %51 %50
%53 = OpCompositeExtract %5 %52 0
%54 = OpIAdd %5 %53 %55
%57 = OpAccessChain %56 %16 %53
%58 = OpAccessChain %43 %57 %38 %40
%59 = OpLoad %6 %58
%60 = OpCompositeExtract %5 %59 0
%61 = OpCompositeExtract %5 %59 1
%62 = OpIAdd %5 %60 %46
%63 = OpIAdd %5 %61 %47
%64 = OpIAdd %5 %35 %65
%67 = OpAccessChain %66 %21 %35
%68 = OpAccessChain %43 %67 %38 %40
%69 = OpLoad %6 %68
%70 = OpCompositeExtract %5 %69 0
%71 = OpCompositeExtract %5 %69 1
%72 = OpIAdd %5 %62 %70
%73 = OpIAdd %5 %63 %71
%75 = OpAccessChain %74 %34 %38
OpStore %75 %72
%76 = OpAccessChain %74 %34 %22
OpStore %76 %73
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-structured-buffer.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 1, binding = 0) uniform usamplerBuffer _12[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _36 = uint(int(gl_FragCoord.x));
    float _40 = uintBitsToFloat(texelFetch(_12[registers._m1 + 3u], int(_36)).x);
    uint _59 = registers._m1 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _61 = _36 * 2u;
    vec2 _71 = uintBitsToFloat(uvec2(texelFetch(_12[_59], int(_61)).x, texelFetch(_12[_59], int(_61 + 1u)).x));
    float _74 = _71.x + _40;
    uint _81 = registers._m1 + (INDEX + 100u);
    uint _83 = _36 * 3u;
    vec3 _94 = uintBitsToFloat(uvec3(texelFetch(_12[nonuniformEXT(_81)], int(_83)).x, texelFetch(_12[nonuniformEXT(_81)], int(_83 + 1u)).x, texelFetch(_12[nonuniformEXT(_81)], int(_83 + 2u)).x));
    SV_Target.x = _94.x + _74;
    SV_Target.y = (_71.y + _40) + _94.y;
    SV_Target.z = _94.z + _74;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18 %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpName %18 "INDEX"
OpName %21 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 1
OpDecorate %12 Binding 0
OpDecorate %16 BuiltIn FragCoord
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %21 Location 0
OpDecorate %81 NonUniform
OpDecorate %82 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypeVector %13 3
%20 = OpTypePointer Output %19
%21 = OpVariable %20 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpTypePointer PushConstant %5
%26 = OpConstant %5 1
%29 = OpConstant %5 3
%32 = OpTypePointer Input %13
%34 = OpConstant %5 0
%37 = OpTypeVector %5 4
%42 = OpConstant %5 4
%45 = OpConstant %5 5
%48 = OpConstant %5 6
%51 = OpConstant %5 7
%62 = OpConstant %5 2
%68 = OpTypeVector %5 2
%70 = OpTypeVector %13 2
%77 = OpConstant %5 100
%92 = OpTypeVector %5 3
%101 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %105
%105 = OpLabel
%25 = OpAccessChain %24 %8 %26
%27 = OpLoad %5 %25
%28 = OpIAdd %5 %27 %29
%23 = OpAccessChain %22 %12 %28
%30 = OpLoad %9 %23
%31 = OpLoad %5 %18
%33 = OpAccessChain %32 %16 %34
%35 = OpLoad %13 %33
%36 = OpConvertFToS %5 %35
%38 = OpImageFetch %37 %30 %36
%39 = OpCompositeExtract %5 %38 0
%40 = OpBitcast %13 %39
%41 = OpAccessChain %24 %8 %42
%43 = OpLoad %5 %41
%44 = OpAccessChain %24 %8 %45
%46 = OpLoad %5 %44
%47 = OpAccessChain %24 %8 %48
%49 = OpLoad %5 %47
%50 = OpAccessChain %24 %8 %51
%52 = OpLoad %5 %50
%53 = OpCompositeConstruct %37 %43 %46 %49 %52
%54 = OpCompositeExtract %5 %53 0
%55 = OpIAdd %5 %54 %42
%57 = OpAccessChain %24 %8 %26
%58 = OpLoad %5 %57
%59 = OpIAdd %5 %58 %55
%56 = OpAccessChain %22 %12 %59
%60 = OpLoad %9 %56
%61 = OpIMul %5 %36 %62
%63 = OpImageFetch %37 %60 %61
%64 = OpCompositeExtract %5 %63 0
%66 = OpIAdd %5 %61 %26
%65 = OpImageFetch %37 %60 %66
%67 = OpCompositeExtract %5 %65 0
%69 = OpCompositeConstruct %68 %64 %67
%71 = OpBitcast %70 %69
%72 = OpCompositeExtract %13 %71 0
%73 = OpCompositeExtract %13 %71 1
%74 = OpFAdd %13 %72 %40
%75 = OpFAdd %13 %73 %40
%76 = OpIAdd %5 %31 %77
%79 = OpAccessChain %24 %8 %26
%80 = OpLoad %5 %79
%81 = OpIAdd %5 %80 %76
%78 = OpAccessChain %22 %12 %81
%82 = OpLoad %9 %78
%83 = OpIMul %5 %36 %29
%84 = OpImageFetch %37 %82 %83
%85 = OpCompositeExtract %5 %84 0
%87 = OpIAdd %5 %83 %26
%86 = OpImageFetch %37 %82 %87
%88 = OpCompositeExtract %5 %86 0
%90 = OpIAdd %5 %83 %62
%89 = OpImageFetch %37 %82 %90
%91 = OpCompositeExtract %5 %89 0
%93 = OpCompositeConstruct %92 %85 %88 %91
%94 = OpBitcast %19 %93
%95 = OpCompositeExtract %13 %94 0
%96 = OpCompositeExtract %13 %94 1
%97 = OpCompositeExtract %13 %94 2
%98 = OpFAdd %13 %95 %74
%99 = OpFAdd %13 %75 %96
%100 = OpFAdd %13 %97 %74
%102 = OpAccessChain %101 %21 %34
OpStore %102 %98
%103 = OpAccessChain %101 %21 %26
OpStore %103 %99
%104 = OpAccessChain %101 %21 %62
OpStore %104 %100
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-structured-buffer.bindless.root-constant.ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _13[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _16_19
{
    uvec2 _m0[];
} _19[];

layout(set = 1, binding = 0, scalar) restrict readonly buffer _22_25
{
    uvec3 _m0[];
} _25[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _48 = uint(int(gl_FragCoord.x));
    float _52 = uintBitsToFloat(_13[registers._m1 + 3u]._m0[_48]);
    vec2 _78 = uintBitsToFloat(_19[registers._m1 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u)]._m0[_48]);
    float _81 = _78.x + _52;
    vec3 _93 = uintBitsToFloat(_25[nonuniformEXT(registers._m1 + (INDEX + 100u))]._m0[_48]);
    SV_Target.x = _93.x + _81;
    SV_Target.y = (_78.y + _52) + _93.y;
    SV_Target.z = _93.z + _81;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %29 %31 %34
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %10 "SSBO"
OpName %16 "SSBO"
OpName %22 "SSBO"
OpName %29 "SV_Position"
OpName %31 "INDEX"
OpName %34 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %9 ArrayStride 4
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %13 DescriptorSet 1
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 8
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %21 ArrayStride 12
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %25 DescriptorSet 1
OpDecorate %25 Binding 0
OpDecorate %25 NonWritable
OpDecorate %25 Restrict
OpDecorate %29 BuiltIn FragCoord
OpDecorate %31 Flat
OpDecorate %31 Location 1
OpDecorate %34 Location 0
OpDecorate %89 NonUniform
OpDecorate %86 NonUniform
OpDecorate %91 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeRuntimeArray %5
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeVector %5 2
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeVector %5 3
%21 = OpTypeRuntimeArray %20
%22 = OpTypeStruct %21
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypeFloat 32
%27 = OpTypeVector %26 4
%28 = OpTypePointer Input %27
%29 = OpVariable %28 Input
%30 = OpTypePointer Input %5
%31 = OpVariable %30 Input
%32 = OpTypeVector %26 3
%33 = OpTypePointer Output %32
%34 = OpVariable %33 Output
%35 = OpTypePointer StorageBuffer %10
%37 = OpTypePointer PushConstant %5
%39 = OpConstant %5 1
%42 = OpConstant %5 3
%44 = OpTypePointer Input %26
%46 = OpConstant %5 0
%49 = OpTypePointer StorageBuffer %5
%54 = OpConstant %5 4
%57 = OpConstant %5 5
%60 = OpConstant %5 6
%63 = OpConstant %5 7
%65 = OpTypeVector %5 4
%69 = OpTypePointer StorageBuffer %16
%74 = OpTypePointer StorageBuffer %14
%77 = OpTypeVector %26 2
%84 = OpConstant %5 100
%85 = OpTypePointer StorageBuffer %22
%90 = OpTypePointer StorageBuffer %20
%100 = OpTypePointer Output %26
%104 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %105
%105 = OpLabel
%38 = OpAccessChain %37 %8 %39
%40 = OpLoad %5 %38
%41 = OpIAdd %5 %40 %42
%36 = OpAccessChain %35 %13 %41
%43 = OpLoad %5 %31
%45 = OpAccessChain %44 %29 %46
%47 = OpLoad %26 %45
%48 = OpConvertFToS %5 %47
%50 = OpAccessChain %49 %36 %46 %48
%51 = OpLoad %5 %50
%52 = OpBitcast %26 %51
%53 = OpAccessChain %37 %8 %54
%55 = OpLoad %5 %53
%56 = OpAccessChain %37 %8 %57
%58 = OpLoad %5 %56
%59 = OpAccessChain %37 %8 %60
%61 = OpLoad %5 %59
%62 = OpAccessChain %37 %8 %63
%64 = OpLoad %5 %62
%66 = OpCompositeConstruct %65 %55 %58 %61 %64
%67 = OpCompositeExtract %5 %66 0
%68 = OpIAdd %5 %67 %54
%71 = OpAccessChain %37 %8 %39
%72 = OpLoad %5 %71
%73 = OpIAdd %5 %72 %68
%70 = OpAccessChain %69 %19 %73
%75 = OpAccessChain %74 %70 %46 %48
%76 = OpLoad %14 %75
%78 = OpBitcast %77 %76
%79 = OpCompositeExtract %26 %78 0
%80 = OpCompositeExtract %26 %78 1
%81 = OpFAdd %26 %79 %52
%82 = OpFAdd %26 %80 %52
%83 = OpIAdd %5 %43 %84
%87 = OpAccessChain %37 %8 %39
%88 = OpLoad %5 %87
%89 = OpIAdd %5 %88 %83
%86 = OpAccessChain %85 %25 %89
%91 = OpAccessChain %90 %86 %46 %48
%92 = OpLoad %20 %91
%93 = OpBitcast %32 %92
%94 = OpCompositeExtract %26 %93 0
%95 = OpCompositeExtract %26 %93 1
%96 = OpCompositeExtract %26 %93 2
%97 = OpFAdd %26 %94 %81
%98 = OpFAdd %26 %82 %95
%99 = OpFAdd %26 %96 %81
%101 = OpAccessChain %100 %34 %46
OpStore %101 %97
%102 = OpAccessChain %100 %34 %39
OpStore %102 %98
%103 = OpAccessChain %100 %34 %104
OpStore %103 %99
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-structured-buffer.ssbo.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 3, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 4, std430) restrict readonly buffer _12_16
{
    uvec2 _m0[];
} _16[64];

layout(set = 0, binding = 100, scalar) restrict readonly buffer _19_22
{
    uvec3 _m0[];
} _22[];

layout(set = 0, binding = 0, std140) uniform _27_29
{
    vec4 _m0[1];
} _29;

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _42 = uint(int(gl_FragCoord.x));
    float _46 = uintBitsToFloat(_9._m0[_42]);
    uvec4 _51 = floatBitsToUint(_29._m0[0u]);
    uint _52 = _51.x;
    vec2 _61 = uintBitsToFloat(_16[_52]._m0[_42]);
    float _64 = _61.x + _46;
    vec3 _73 = uintBitsToFloat(_22[nonuniformEXT(INDEX)]._m0[_42]);
    SV_Target.x = _73.x + _64;
    SV_Target.y = (_61.y + _46) + _73.y;
    SV_Target.z = _73.z + _64;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %31 %33 %36
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %19 "SSBO"
OpName %27 ""
OpName %31 "SV_Position"
OpName %33 "INDEX"
OpName %36 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 3
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 4
OpDecorate %16 NonWritable
OpDecorate %16 Restrict
OpDecorate %18 ArrayStride 12
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 100
OpDecorate %22 NonWritable
OpDecorate %22 Restrict
OpDecorate %26 ArrayStride 16
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %31 BuiltIn FragCoord
OpDecorate %33 Flat
OpDecorate %33 Location 1
OpDecorate %36 Location 0
OpDecorate %37 NonUniform
OpDecorate %69 NonUniform
OpDecorate %71 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpConstant %5 64
%14 = OpTypeArray %12 %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeVector %5 3
%18 = OpTypeRuntimeArray %17
%19 = OpTypeStruct %18
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpConstant %5 1
%24 = OpTypeFloat 32
%25 = OpTypeVector %24 4
%26 = OpTypeArray %25 %23
%27 = OpTypeStruct %26
%28 = OpTypePointer Uniform %27
%29 = OpVariable %28 Uniform
%30 = OpTypePointer Input %25
%31 = OpVariable %30 Input
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%34 = OpTypeVector %24 3
%35 = OpTypePointer Output %34
%36 = OpVariable %35 Output
%38 = OpTypePointer Input %24
%40 = OpConstant %5 0
%43 = OpTypePointer StorageBuffer %5
%47 = OpTypePointer Uniform %25
%50 = OpTypeVector %5 4
%54 = OpConstant %5 4
%55 = OpTypePointer StorageBuffer %12
%57 = OpTypePointer StorageBuffer %10
%60 = OpTypeVector %24 2
%67 = OpConstant %5 100
%68 = OpTypePointer StorageBuffer %19
%70 = OpTypePointer StorageBuffer %17
%80 = OpTypePointer Output %24
%84 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %85
%85 = OpLabel
%37 = OpLoad %5 %33
%39 = OpAccessChain %38 %31 %40
%41 = OpLoad %24 %39
%42 = OpConvertFToS %5 %41
%44 = OpAccessChain %43 %9 %40 %42
%45 = OpLoad %5 %44
%46 = OpBitcast %24 %45
%48 = OpAccessChain %47 %29 %40 %40
%49 = OpLoad %25 %48
%51 = OpBitcast %50 %49
%52 = OpCompositeExtract %5 %51 0
%53 = OpIAdd %5 %52 %54
%56 = OpAccessChain %55 %16 %52
%58 = OpAccessChain %57 %56 %40 %42
%59 = OpLoad %10 %58
%61 = OpBitcast %60 %59
%62 = OpCompositeExtract %24 %61 0
%63 = OpCompositeExtract %24 %61 1
%64 = OpFAdd %24 %62 %46
%65 = OpFAdd %24 %63 %46
%66 = OpIAdd %5 %37 %67
%69 = OpAccessChain %68 %22 %37
%71 = OpAccessChain %70 %69 %40 %42
%72 = OpLoad %17 %71
%73 = OpBitcast %34 %72
%74 = OpCompositeExtract %24 %73 0
%75 = OpCompositeExtract %24 %73 1
%76 = OpCompositeExtract %24 %73 2
%77 = OpFAdd %24 %74 %64
%78 = OpFAdd %24 %65 %75
%79 = OpFAdd %24 %76 %64
%81 = OpAccessChain %80 %36 %40
OpStore %81 %77
%82 = OpAccessChain %80 %36 %23
OpStore %82 %78
%83 = OpAccessChain %80 %36 %84
OpStore %83 %79
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-texture.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _37 = uint(int(gl_FragCoord.x));
    uint _38 = uint(int(gl_FragCoord.y));
    vec4 _39 = texelFetch(_13[registers._m0 + 3u], ivec2(uvec2(_37, _38)), int(0u));
    vec4 _67 = texelFetch(_13[registers._m0 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u)], ivec2(uvec2(_37, _38)), int(0u));
    vec4 _84 = texelFetch(_13[nonuniformEXT(registers._m0 + (INDEX + 100u))], ivec2(uvec2(_37, _38)), int(0u));
    vec4 _100 = texelFetch(_13[registers._m0 + 101u], ivec2(uvec2(_37, _38)), int(0u));
    SV_Target.x = ((_67.x + _39.x) + _84.x) + _100.x;
    SV_Target.y = ((_67.y + _39.y) + _84.y) + _100.y;
    SV_Target.z = ((_67.z + _39.z) + _84.z) + _100.z;
    SV_Target.w = ((_67.w + _39.w) + _84.w) + _100.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 118
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18 %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpName %18 "INDEX"
OpName %20 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 BuiltIn FragCoord
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %20 Location 0
OpDecorate %82 NonUniform
OpDecorate %83 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypePointer Output %14
%20 = OpVariable %19 Output
%21 = OpTypePointer UniformConstant %10
%23 = OpTypePointer PushConstant %5
%25 = OpConstant %5 0
%28 = OpConstant %5 3
%31 = OpTypePointer Input %9
%35 = OpConstant %5 1
%40 = OpTypeVector %5 2
%47 = OpConstant %5 4
%50 = OpConstant %5 5
%53 = OpConstant %5 6
%56 = OpConstant %5 7
%58 = OpTypeVector %5 4
%78 = OpConstant %5 100
%98 = OpConstant %5 101
%110 = OpTypePointer Output %9
%114 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %116
%116 = OpLabel
%24 = OpAccessChain %23 %8 %25
%26 = OpLoad %5 %24
%27 = OpIAdd %5 %26 %28
%22 = OpAccessChain %21 %13 %27
%29 = OpLoad %10 %22
%30 = OpLoad %5 %18
%32 = OpAccessChain %31 %16 %25
%33 = OpLoad %9 %32
%34 = OpAccessChain %31 %16 %35
%36 = OpLoad %9 %34
%37 = OpConvertFToS %5 %33
%38 = OpConvertFToS %5 %36
%41 = OpCompositeConstruct %40 %37 %38
%39 = OpImageFetch %14 %29 %41 Lod %25
%42 = OpCompositeExtract %9 %39 0
%43 = OpCompositeExtract %9 %39 1
%44 = OpCompositeExtract %9 %39 2
%45 = OpCompositeExtract %9 %39 3
%46 = OpAccessChain %23 %8 %47
%48 = OpLoad %5 %46
%49 = OpAccessChain %23 %8 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %23 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %23 %8 %56
%57 = OpLoad %5 %55
%59 = OpCompositeConstruct %58 %48 %51 %54 %57
%60 = OpCompositeExtract %5 %59 0
%61 = OpIAdd %5 %60 %47
%63 = OpAccessChain %23 %8 %25
%64 = OpLoad %5 %63
%65 = OpIAdd %5 %64 %61
%62 = OpAccessChain %21 %13 %65
%66 = OpLoad %10 %62
%68 = OpCompositeConstruct %40 %37 %38
%67 = OpImageFetch %14 %66 %68 Lod %25
%69 = OpCompositeExtract %9 %67 0
%70 = OpCompositeExtract %9 %67 1
%71 = OpCompositeExtract %9 %67 2
%72 = OpCompositeExtract %9 %67 3
%73 = OpFAdd %9 %69 %42
%74 = OpFAdd %9 %70 %43
%75 = OpFAdd %9 %71 %44
%76 = OpFAdd %9 %72 %45
%77 = OpIAdd %5 %30 %78
%80 = OpAccessChain %23 %8 %25
%81 = OpLoad %5 %80
%82 = OpIAdd %5 %81 %77
%79 = OpAccessChain %21 %13 %82
%83 = OpLoad %10 %79
%85 = OpCompositeConstruct %40 %37 %38
%84 = OpImageFetch %14 %83 %85 Lod %25
%86 = OpCompositeExtract %9 %84 0
%87 = OpCompositeExtract %9 %84 1
%88 = OpCompositeExtract %9 %84 2
%89 = OpCompositeExtract %9 %84 3
%90 = OpFAdd %9 %73 %86
%91 = OpFAdd %9 %74 %87
%92 = OpFAdd %9 %75 %88
%93 = OpFAdd %9 %76 %89
%95 = OpAccessChain %23 %8 %25
%96 = OpLoad %5 %95
%97 = OpIAdd %5 %96 %98
%94 = OpAccessChain %21 %13 %97
%99 = OpLoad %10 %94
%101 = OpCompositeConstruct %40 %37 %38
%100 = OpImageFetch %14 %99 %101 Lod %25
%102 = OpCompositeExtract %9 %100 0
%103 = OpCompositeExtract %9 %100 1
%104 = OpCompositeExtract %9 %100 2
%105 = OpCompositeExtract %9 %100 3
%106 = OpFAdd %9 %90 %102
%107 = OpFAdd %9 %91 %103
%108 = OpFAdd %9 %92 %104
%109 = OpFAdd %9 %93 %105
%111 = OpAccessChain %110 %20 %25
OpStore %111 %106
%112 = OpAccessChain %110 %20 %35
OpStore %112 %107
%113 = OpAccessChain %110 %20 %114
OpStore %113 %108
%115 = OpAccessChain %110 %20 %28
OpStore %115 %109
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-texture.bindless.root-constant.inline-ubo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 6, binding = 1, std140) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _37 = uint(int(gl_FragCoord.x));
    uint _38 = uint(int(gl_FragCoord.y));
    vec4 _39 = texelFetch(_13[registers._m0 + 3u], ivec2(uvec2(_37, _38)), int(0u));
    vec4 _67 = texelFetch(_13[registers._m0 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u)], ivec2(uvec2(_37, _38)), int(0u));
    vec4 _84 = texelFetch(_13[nonuniformEXT(registers._m0 + (INDEX + 100u))], ivec2(uvec2(_37, _38)), int(0u));
    vec4 _100 = texelFetch(_13[registers._m0 + 101u], ivec2(uvec2(_37, _38)), int(0u));
    SV_Target.x = ((_67.x + _39.x) + _84.x) + _100.x;
    SV_Target.y = ((_67.y + _39.y) + _84.y) + _100.y;
    SV_Target.z = ((_67.z + _39.z) + _84.z) + _100.z;
    SV_Target.w = ((_67.w + _39.w) + _84.w) + _100.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 118
; Schema: 0
OpCapability Shader
OpCapability SampledImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18 %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpName %18 "INDEX"
OpName %20 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %8 DescriptorSet 6
OpDecorate %8 Binding 1
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 BuiltIn FragCoord
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %20 Location 0
OpDecorate %82 NonUniform
OpDecorate %83 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer Uniform %6
%8 = OpVariable %7 Uniform
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypePointer Output %14
%20 = OpVariable %19 Output
%21 = OpTypePointer UniformConstant %10
%23 = OpTypePointer Uniform %5
%25 = OpConstant %5 0
%28 = OpConstant %5 3
%31 = OpTypePointer Input %9
%35 = OpConstant %5 1
%40 = OpTypeVector %5 2
%47 = OpConstant %5 4
%50 = OpConstant %5 5
%53 = OpConstant %5 6
%56 = OpConstant %5 7
%58 = OpTypeVector %5 4
%78 = OpConstant %5 100
%98 = OpConstant %5 101
%110 = OpTypePointer Output %9
%114 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %116
%116 = OpLabel
%24 = OpAccessChain %23 %8 %25
%26 = OpLoad %5 %24
%27 = OpIAdd %5 %26 %28
%22 = OpAccessChain %21 %13 %27
%29 = OpLoad %10 %22
%30 = OpLoad %5 %18
%32 = OpAccessChain %31 %16 %25
%33 = OpLoad %9 %32
%34 = OpAccessChain %31 %16 %35
%36 = OpLoad %9 %34
%37 = OpConvertFToS %5 %33
%38 = OpConvertFToS %5 %36
%41 = OpCompositeConstruct %40 %37 %38
%39 = OpImageFetch %14 %29 %41 Lod %25
%42 = OpCompositeExtract %9 %39 0
%43 = OpCompositeExtract %9 %39 1
%44 = OpCompositeExtract %9 %39 2
%45 = OpCompositeExtract %9 %39 3
%46 = OpAccessChain %23 %8 %47
%48 = OpLoad %5 %46
%49 = OpAccessChain %23 %8 %50
%51 = OpLoad %5 %49
%52 = OpAccessChain %23 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %23 %8 %56
%57 = OpLoad %5 %55
%59 = OpCompositeConstruct %58 %48 %51 %54 %57
%60 = OpCompositeExtract %5 %59 0
%61 = OpIAdd %5 %60 %47
%63 = OpAccessChain %23 %8 %25
%64 = OpLoad %5 %63
%65 = OpIAdd %5 %64 %61
%62 = OpAccessChain %21 %13 %65
%66 = OpLoad %10 %62
%68 = OpCompositeConstruct %40 %37 %38
%67 = OpImageFetch %14 %66 %68 Lod %25
%69 = OpCompositeExtract %9 %67 0
%70 = OpCompositeExtract %9 %67 1
%71 = OpCompositeExtract %9 %67 2
%72 = OpCompositeExtract %9 %67 3
%73 = OpFAdd %9 %69 %42
%74 = OpFAdd %9 %70 %43
%75 = OpFAdd %9 %71 %44
%76 = OpFAdd %9 %72 %45
%77 = OpIAdd %5 %30 %78
%80 = OpAccessChain %23 %8 %25
%81 = OpLoad %5 %80
%82 = OpIAdd %5 %81 %77
%79 = OpAccessChain %21 %13 %82
%83 = OpLoad %10 %79
%85 = OpCompositeConstruct %40 %37 %38
%84 = OpImageFetch %14 %83 %85 Lod %25
%86 = OpCompositeExtract %9 %84 0
%87 = OpCompositeExtract %9 %84 1
%88 = OpCompositeExtract %9 %84 2
%89 = OpCompositeExtract %9 %84 3
%90 = OpFAdd %9 %73 %86
%91 = OpFAdd %9 %74 %87
%92 = OpFAdd %9 %75 %88
%93 = OpFAdd %9 %76 %89
%95 = OpAccessChain %23 %8 %25
%96 = OpLoad %5 %95
%97 = OpIAdd %5 %96 %98
%94 = OpAccessChain %21 %13 %97
%99 = OpLoad %10 %94
%101 = OpCompositeConstruct %40 %37 %38
%100 = OpImageFetch %14 %99 %101 Lod %25
%102 = OpCompositeExtract %9 %100 0
%103 = OpCompositeExtract %9 %100 1
%104 = OpCompositeExtract %9 %100 2
%105 = OpCompositeExtract %9 %100 3
%106 = OpFAdd %9 %90 %102
%107 = OpFAdd %9 %91 %103
%108 = OpFAdd %9 %92 %104
%109 = OpFAdd %9 %93 %105
%111 = OpAccessChain %110 %20 %25
OpStore %111 %106
%112 = OpAccessChain %110 %20 %35
OpStore %112 %107
%113 = OpAccessChain %110 %20 %114
OpStore %113 %108
%115 = OpAccessChain %110 %20 %28
OpStore %115 %109
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-typed-buffer.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 1, binding = 0) uniform samplerBuffer _13[];
layout(set = 1, binding = 0) uniform usamplerBuffer _17[];
layout(set = 1, binding = 0) uniform isamplerBuffer _22[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _44 = uint(int(gl_FragCoord.x));
    vec4 _45 = texelFetch(_13[registers._m1 + 3u], int(_44));
    uvec4 _72 = texelFetch(_17[registers._m1 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u)], int(_44));
    uvec4 _95 = uvec4(texelFetch(_22[nonuniformEXT(registers._m1 + (INDEX + 100u))], int(_44)));
    SV_Target.x = (float(_72.x) + _45.x) + float(int(_95.x));
    SV_Target.y = (float(_72.y) + _45.y) + float(int(_95.y));
    SV_Target.z = (float(_72.z) + _45.z) + float(int(_95.z));
    SV_Target.w = (float(_72.w) + _45.w) + float(int(_95.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 116
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %25 %27 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %25 "SV_Position"
OpName %27 "INDEX"
OpName %29 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %13 DescriptorSet 1
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %22 DescriptorSet 1
OpDecorate %22 Binding 0
OpDecorate %25 BuiltIn FragCoord
OpDecorate %27 Flat
OpDecorate %27 Location 1
OpDecorate %29 Location 0
OpDecorate %91 NonUniform
OpDecorate %92 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeInt 32 1
%19 = OpTypeImage %18 Buffer 0 0 0 1 Unknown
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeVector %9 4
%24 = OpTypePointer Input %23
%25 = OpVariable %24 Input
%26 = OpTypePointer Input %5
%27 = OpVariable %26 Input
%28 = OpTypePointer Output %23
%29 = OpVariable %28 Output
%30 = OpTypePointer UniformConstant %10
%32 = OpTypePointer PushConstant %5
%34 = OpConstant %5 1
%37 = OpConstant %5 3
%40 = OpTypePointer Input %9
%42 = OpConstant %5 0
%51 = OpConstant %5 4
%54 = OpConstant %5 5
%57 = OpConstant %5 6
%60 = OpConstant %5 7
%62 = OpTypeVector %5 4
%66 = OpTypePointer UniformConstant %14
%86 = OpConstant %5 100
%87 = OpTypePointer UniformConstant %19
%93 = OpTypeVector %18 4
%108 = OpTypePointer Output %9
%112 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%33 = OpAccessChain %32 %8 %34
%35 = OpLoad %5 %33
%36 = OpIAdd %5 %35 %37
%31 = OpAccessChain %30 %13 %36
%38 = OpLoad %10 %31
%39 = OpLoad %5 %27
%41 = OpAccessChain %40 %25 %42
%43 = OpLoad %9 %41
%44 = OpConvertFToS %5 %43
%45 = OpImageFetch %23 %38 %44
%46 = OpCompositeExtract %9 %45 0
%47 = OpCompositeExtract %9 %45 1
%48 = OpCompositeExtract %9 %45 2
%49 = OpCompositeExtract %9 %45 3
%50 = OpAccessChain %32 %8 %51
%52 = OpLoad %5 %50
%53 = OpAccessChain %32 %8 %54
%55 = OpLoad %5 %53
%56 = OpAccessChain %32 %8 %57
%58 = OpLoad %5 %56
%59 = OpAccessChain %32 %8 %60
%61 = OpLoad %5 %59
%63 = OpCompositeConstruct %62 %52 %55 %58 %61
%64 = OpCompositeExtract %5 %63 0
%65 = OpIAdd %5 %64 %51
%68 = OpAccessChain %32 %8 %34
%69 = OpLoad %5 %68
%70 = OpIAdd %5 %69 %65
%67 = OpAccessChain %66 %17 %70
%71 = OpLoad %14 %67
%72 = OpImageFetch %62 %71 %44
%73 = OpCompositeExtract %5 %72 0
%74 = OpCompositeExtract %5 %72 1
%75 = OpCompositeExtract %5 %72 2
%76 = OpCompositeExtract %5 %72 3
%77 = OpConvertUToF %9 %73
%78 = OpConvertUToF %9 %74
%79 = OpConvertUToF %9 %75
%80 = OpConvertUToF %9 %76
%81 = OpFAdd %9 %77 %46
%82 = OpFAdd %9 %78 %47
%83 = OpFAdd %9 %79 %48
%84 = OpFAdd %9 %80 %49
%85 = OpIAdd %5 %39 %86
%89 = OpAccessChain %32 %8 %34
%90 = OpLoad %5 %89
%91 = OpIAdd %5 %90 %85
%88 = OpAccessChain %87 %22 %91
%92 = OpLoad %19 %88
%94 = OpImageFetch %93 %92 %44
%95 = OpBitcast %62 %94
%96 = OpCompositeExtract %5 %95 0
%97 = OpCompositeExtract %5 %95 1
%98 = OpCompositeExtract %5 %95 2
%99 = OpCompositeExtract %5 %95 3
%100 = OpConvertSToF %9 %96
%101 = OpConvertSToF %9 %97
%102 = OpConvertSToF %9 %98
%103 = OpConvertSToF %9 %99
%104 = OpFAdd %9 %81 %100
%105 = OpFAdd %9 %82 %101
%106 = OpFAdd %9 %83 %102
%107 = OpFAdd %9 %84 %103
%109 = OpAccessChain %108 %29 %42
OpStore %109 %104
%110 = OpAccessChain %108 %29 %34
OpStore %110 %105
%111 = OpAccessChain %108 %29 %112
OpStore %111 %106
%113 = OpAccessChain %108 %29 %37
OpStore %113 %107
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-uav-raw.typed-buffer-offset.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 1, binding = 0) uniform usamplerBuffer _17[];
layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _21[];
layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _24[];

void main()
{
    uint _31 = registers._m4 + 2u;
    uint _34 = subgroupBroadcastFirst(_31);
    uint _44 = subgroupBroadcastFirst(registers._m4);
    uint _52 = registers._m1 + 1u;
    uint _54 = subgroupBroadcastFirst(_52);
    uint _70 = (gl_GlobalInvocationID.x * 4u) + 16u;
    uint _76 = (_70 < _13._m0[_44].y) ? (_70 + _13._m0[_44].x) : 1073741820u;
    uvec4 _79 = imageLoad(_21[registers._m4], int(_76));
    uvec4 _81 = imageLoad(_21[registers._m4], int(_76 + 1u));
    uvec4 _84 = imageLoad(_21[registers._m4], int(_76 + 2u));
    uvec4 _87 = imageLoad(_21[registers._m4], int(_76 + 3u));
    vec4 _93 = uintBitsToFloat(uvec4(_79.x, _81.x, _84.x, _87.x));
    uint _102 = (gl_GlobalInvocationID.x * 4u) + 4096u;
    uint _107 = (_102 < _13._m0[_44].y) ? (_102 + _13._m0[_44].x) : 1073741820u;
    imageStore(_21[registers._m4], int(_107), uvec4(floatBitsToUint(_93.x)));
    imageStore(_21[registers._m4], int(_107 + 1u), uvec4(floatBitsToUint(_93.y)));
    imageStore(_21[registers._m4], int(_107 + 2u), uvec4(floatBitsToUint(_93.z)));
    imageStore(_21[registers._m4], int(_107 + 3u), uvec4(floatBitsToUint(_93.w)));
    uint _122 = (gl_GlobalInvocationID.x * 4u) + 8u;
    uint _127 = (_122 < _13._m0[_54].y) ? (_122 + _13._m0[_54].x) : 1073741820u;
    vec4 _140 = uintBitsToFloat(uvec4(texelFetch(_17[_52], int(_127)).x, texelFetch(_17[_52], int(_127 + 1u)).x, texelFetch(_17[_52], int(_127 + 2u)).x, texelFetch(_17[_52], int(_127 + 3u)).x));
    uint _149 = (gl_GlobalInvocationID.x * 4u) + 8192u;
    uint _154 = (_149 < _13._m0[_44].y) ? (_149 + _13._m0[_44].x) : 1073741820u;
    imageStore(_21[registers._m4], int(_154), uvec4(floatBitsToUint(_140.x)));
    imageStore(_21[registers._m4], int(_154 + 1u), uvec4(floatBitsToUint(_140.y)));
    imageStore(_21[registers._m4], int(_154 + 2u), uvec4(floatBitsToUint(_140.z)));
    imageStore(_21[registers._m4], int(_154 + 3u), uvec4(floatBitsToUint(_140.w)));
    uint _174 = imageAtomicAdd(_24[_31], int((gl_GlobalInvocationID.x < _13._m0[_34].y) ? (gl_GlobalInvocationID.x + _13._m0[_34].x) : 1073741820u), 40u);
    uint _183 = imageAtomicCompSwap(_24[_31], int((gl_GlobalInvocationID.y < _13._m0[_34].y) ? (gl_GlobalInvocationID.y + _13._m0[_34].x) : 1073741820u), 40u, 50u);
    float _187 = float(_13._m0[_44].y / 4u);
    uint _192 = (0u < _13._m0[_44].y) ? (0u + _13._m0[_44].x) : 1073741820u;
    imageStore(_21[registers._m4], int(_192), uvec4(floatBitsToUint(_187)));
    imageStore(_21[registers._m4], int(_192 + 1u), uvec4(floatBitsToUint(_187)));
    imageStore(_21[registers._m4], int(_192 + 2u), uvec4(floatBitsToUint(_187)));
    imageStore(_21[registers._m4], int(_192 + 3u), uvec4(floatBitsToUint(_187)));
    float _206 = float(_13._m0[_54].y / 4u);
    uint _211 = (4u < _13._m0[_44].y) ? (4u + _13._m0[_44].x) : 1073741820u;
    imageStore(_21[registers._m4], int(_211), uvec4(floatBitsToUint(_206)));
    imageStore(_21[registers._m4], int(_211 + 1u), uvec4(floatBitsToUint(_206)));
    imageStore(_21[registers._m4], int(_211 + 2u), uvec4(floatBitsToUint(_206)));
    imageStore(_21[registers._m4], int(_211 + 3u), uvec4(floatBitsToUint(_206)));
    uint _227 = registers._m4 + (gl_GlobalInvocationID.z + 0u);
    uint _232 = (gl_GlobalInvocationID.x * 4u) + 16u;
    uint _237 = (_232 < _13._m0[_227].y) ? (_232 + _13._m0[_227].x) : 1073741820u;
    uvec4 _238 = imageLoad(_21[nonuniformEXT(_227)], int(_237));
    uvec4 _240 = imageLoad(_21[nonuniformEXT(_227)], int(_237 + 1u));
    uvec4 _243 = imageLoad(_21[nonuniformEXT(_227)], int(_237 + 2u));
    uvec4 _246 = imageLoad(_21[nonuniformEXT(_227)], int(_237 + 3u));
    vec4 _250 = uintBitsToFloat(uvec4(_238.x, _240.x, _243.x, _246.x));
    uint _256 = (gl_GlobalInvocationID.x * 4u) + 4096u;
    uint _261 = (_256 < _13._m0[_227].y) ? (_256 + _13._m0[_227].x) : 1073741820u;
    imageStore(_21[nonuniformEXT(_227)], int(_261), uvec4(floatBitsToUint(_250.x)));
    imageStore(_21[nonuniformEXT(_227)], int(_261 + 1u), uvec4(floatBitsToUint(_250.y)));
    imageStore(_21[nonuniformEXT(_227)], int(_261 + 2u), uvec4(floatBitsToUint(_250.z)));
    imageStore(_21[nonuniformEXT(_227)], int(_261 + 3u), uvec4(floatBitsToUint(_250.w)));
    uint _277 = registers._m1 + (gl_GlobalInvocationID.z + 0u);
    uint _282 = (gl_GlobalInvocationID.x * 4u) + 16u;
    uint _287 = (_282 < _13._m0[_277].y) ? (_282 + _13._m0[_277].x) : 1073741820u;
    vec4 _300 = uintBitsToFloat(uvec4(texelFetch(_17[nonuniformEXT(_277)], int(_287)).x, texelFetch(_17[nonuniformEXT(_277)], int(_287 + 1u)).x, texelFetch(_17[nonuniformEXT(_277)], int(_287 + 2u)).x, texelFetch(_17[nonuniformEXT(_277)], int(_287 + 3u)).x));
    uint _306 = (gl_GlobalInvocationID.x * 4u) + 8192u;
    uint _311 = (_306 < _13._m0[_227].y) ? (_306 + _13._m0[_227].x) : 1073741820u;
    imageStore(_21[nonuniformEXT(_227)], int(_311), uvec4(floatBitsToUint(_300.x)));
    imageStore(_21[nonuniformEXT(_227)], int(_311 + 1u), uvec4(floatBitsToUint(_300.y)));
    imageStore(_21[nonuniformEXT(_227)], int(_311 + 2u), uvec4(floatBitsToUint(_300.z)));
    imageStore(_21[nonuniformEXT(_227)], int(_311 + 3u), uvec4(floatBitsToUint(_300.w)));
    uint _327 = registers._m4 + (gl_GlobalInvocationID.z + 0u);
    uint _337 = imageAtomicAdd(_24[nonuniformEXT(_327)], int((gl_GlobalInvocationID.y < _13._m0[_327].y) ? (gl_GlobalInvocationID.y + _13._m0[_327].x) : 1073741820u), 40u);
    uint _344 = imageAtomicCompSwap(_24[nonuniformEXT(_327)], int((gl_GlobalInvocationID.y < _13._m0[_327].y) ? (gl_GlobalInvocationID.y + _13._m0[_327].x) : 1073741820u), 40u, 70u);
    float _348 = float(_13._m0[_227].y / 4u);
    uint _353 = (8u < _13._m0[_44].y) ? (8u + _13._m0[_44].x) : 1073741820u;
    imageStore(_21[registers._m4], int(_353), uvec4(floatBitsToUint(_348)));
    imageStore(_21[registers._m4], int(_353 + 1u), uvec4(floatBitsToUint(_348)));
    imageStore(_21[registers._m4], int(_353 + 2u), uvec4(floatBitsToUint(_348)));
    imageStore(_21[registers._m4], int(_353 + 3u), uvec4(floatBitsToUint(_348)));
    float _367 = float(_13._m0[_277].y / 4u);
    uint _373 = (12u < _13._m0[_44].y) ? (12u + _13._m0[_44].x) : 1073741820u;
    imageStore(_21[registers._m4], int(_373), uvec4(floatBitsToUint(_367)));
    imageStore(_21[registers._m4], int(_373 + 1u), uvec4(floatBitsToUint(_367)));
    imageStore(_21[registers._m4], int(_373 + 2u), uvec4(floatBitsToUint(_367)));
    imageStore(_21[registers._m4], int(_373 + 3u), uvec4(floatBitsToUint(_367)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 387
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %59
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 4
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 4
OpDecorate %24 Binding 0
OpDecorate %59 BuiltIn GlobalInvocationId
OpDecorate %227 NonUniform
OpDecorate %228 NonUniform
OpDecorate %277 NonUniform
OpDecorate %278 NonUniform
OpDecorate %327 NonUniform
OpDecorate %328 NonUniform
OpDecorate %336 NonUniform
OpDecorate %343 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypePointer UniformConstant %18
%27 = OpTypePointer PushConstant %5
%29 = OpConstant %5 4
%32 = OpConstant %5 2
%35 = OpConstant %5 3
%36 = OpTypePointer StorageBuffer %9
%38 = OpConstant %5 0
%47 = OpTypePointer UniformConstant %14
%50 = OpConstant %5 1
%57 = OpTypeVector %5 3
%58 = OpTypePointer Input %57
%59 = OpVariable %58 Input
%60 = OpTypePointer Input %5
%69 = OpConstant %5 16
%74 = OpTypeBool
%77 = OpConstant %5 1073741820
%78 = OpTypeVector %5 4
%91 = OpTypeFloat 32
%92 = OpTypeVector %91 4
%99 = OpConstant %5 1024
%101 = OpConstant %5 4096
%121 = OpConstant %5 8
%146 = OpConstant %5 2048
%148 = OpConstant %5 8192
%172 = OpTypePointer Image %5
%175 = OpConstant %5 40
%184 = OpConstant %5 50
%345 = OpConstant %5 70
%368 = OpConstant %5 12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %385
%385 = OpLabel
%28 = OpAccessChain %27 %8 %29
%30 = OpLoad %5 %28
%31 = OpIAdd %5 %30 %32
%26 = OpAccessChain %25 %24 %31
%33 = OpLoad %18 %26
%34 = OpGroupNonUniformBroadcastFirst %5 %35 %31
%37 = OpAccessChain %36 %13 %38 %34
%39 = OpLoad %9 %37
%41 = OpAccessChain %27 %8 %29
%42 = OpLoad %5 %41
%40 = OpAccessChain %25 %21 %42
%43 = OpLoad %18 %40
%44 = OpGroupNonUniformBroadcastFirst %5 %35 %42
%45 = OpAccessChain %36 %13 %38 %44
%46 = OpLoad %9 %45
%49 = OpAccessChain %27 %8 %50
%51 = OpLoad %5 %49
%52 = OpIAdd %5 %51 %50
%48 = OpAccessChain %47 %17 %52
%53 = OpLoad %14 %48
%54 = OpGroupNonUniformBroadcastFirst %5 %35 %52
%55 = OpAccessChain %36 %13 %38 %54
%56 = OpLoad %9 %55
%61 = OpAccessChain %60 %59 %38
%62 = OpLoad %5 %61
%63 = OpAccessChain %60 %59 %50
%64 = OpLoad %5 %63
%65 = OpAccessChain %60 %59 %32
%66 = OpLoad %5 %65
%67 = OpIAdd %5 %62 %29
%68 = OpIMul %5 %62 %29
%70 = OpIAdd %5 %68 %69
%71 = OpCompositeExtract %5 %46 0
%72 = OpCompositeExtract %5 %46 1
%73 = OpIAdd %5 %70 %71
%75 = OpULessThan %74 %70 %72
%76 = OpSelect %5 %75 %73 %77
%79 = OpImageRead %78 %43 %76
%80 = OpCompositeExtract %5 %79 0
%82 = OpIAdd %5 %76 %50
%81 = OpImageRead %78 %43 %82
%83 = OpCompositeExtract %5 %81 0
%85 = OpIAdd %5 %76 %32
%84 = OpImageRead %78 %43 %85
%86 = OpCompositeExtract %5 %84 0
%88 = OpIAdd %5 %76 %35
%87 = OpImageRead %78 %43 %88
%89 = OpCompositeExtract %5 %87 0
%90 = OpCompositeConstruct %78 %80 %83 %86 %89
%93 = OpBitcast %92 %90
%94 = OpCompositeExtract %91 %93 0
%95 = OpCompositeExtract %91 %93 1
%96 = OpCompositeExtract %91 %93 2
%97 = OpCompositeExtract %91 %93 3
%98 = OpIAdd %5 %62 %99
%100 = OpIMul %5 %62 %29
%102 = OpIAdd %5 %100 %101
%103 = OpCompositeExtract %5 %46 0
%104 = OpCompositeExtract %5 %46 1
%105 = OpIAdd %5 %102 %103
%106 = OpULessThan %74 %102 %104
%107 = OpSelect %5 %106 %105 %77
%108 = OpBitcast %5 %94
%109 = OpBitcast %5 %95
%110 = OpBitcast %5 %96
%111 = OpBitcast %5 %97
%112 = OpCompositeConstruct %78 %108 %108 %108 %108
OpImageWrite %43 %107 %112
%113 = OpCompositeConstruct %78 %109 %109 %109 %109
%114 = OpIAdd %5 %107 %50
OpImageWrite %43 %114 %113
%115 = OpCompositeConstruct %78 %110 %110 %110 %110
%116 = OpIAdd %5 %107 %32
OpImageWrite %43 %116 %115
%117 = OpCompositeConstruct %78 %111 %111 %111 %111
%118 = OpIAdd %5 %107 %35
OpImageWrite %43 %118 %117
%119 = OpIAdd %5 %62 %32
%120 = OpIMul %5 %62 %29
%122 = OpIAdd %5 %120 %121
%123 = OpCompositeExtract %5 %56 0
%124 = OpCompositeExtract %5 %56 1
%125 = OpIAdd %5 %122 %123
%126 = OpULessThan %74 %122 %124
%127 = OpSelect %5 %126 %125 %77
%128 = OpImageFetch %78 %53 %127
%129 = OpCompositeExtract %5 %128 0
%131 = OpIAdd %5 %127 %50
%130 = OpImageFetch %78 %53 %131
%132 = OpCompositeExtract %5 %130 0
%134 = OpIAdd %5 %127 %32
%133 = OpImageFetch %78 %53 %134
%135 = OpCompositeExtract %5 %133 0
%137 = OpIAdd %5 %127 %35
%136 = OpImageFetch %78 %53 %137
%138 = OpCompositeExtract %5 %136 0
%139 = OpCompositeConstruct %78 %129 %132 %135 %138
%140 = OpBitcast %92 %139
%141 = OpCompositeExtract %91 %140 0
%142 = OpCompositeExtract %91 %140 1
%143 = OpCompositeExtract %91 %140 2
%144 = OpCompositeExtract %91 %140 3
%145 = OpIAdd %5 %62 %146
%147 = OpIMul %5 %62 %29
%149 = OpIAdd %5 %147 %148
%150 = OpCompositeExtract %5 %46 0
%151 = OpCompositeExtract %5 %46 1
%152 = OpIAdd %5 %149 %150
%153 = OpULessThan %74 %149 %151
%154 = OpSelect %5 %153 %152 %77
%155 = OpBitcast %5 %141
%156 = OpBitcast %5 %142
%157 = OpBitcast %5 %143
%158 = OpBitcast %5 %144
%159 = OpCompositeConstruct %78 %155 %155 %155 %155
OpImageWrite %43 %154 %159
%160 = OpCompositeConstruct %78 %156 %156 %156 %156
%161 = OpIAdd %5 %154 %50
OpImageWrite %43 %161 %160
%162 = OpCompositeConstruct %78 %157 %157 %157 %157
%163 = OpIAdd %5 %154 %32
OpImageWrite %43 %163 %162
%164 = OpCompositeConstruct %78 %158 %158 %158 %158
%165 = OpIAdd %5 %154 %35
OpImageWrite %43 %165 %164
%166 = OpShiftLeftLogical %5 %62 %32
%167 = OpCompositeExtract %5 %39 0
%168 = OpCompositeExtract %5 %39 1
%169 = OpIAdd %5 %62 %167
%170 = OpULessThan %74 %62 %168
%171 = OpSelect %5 %170 %169 %77
%173 = OpImageTexelPointer %172 %26 %171 %38
%174 = OpAtomicIAdd %5 %173 %50 %38 %175
%176 = OpShiftLeftLogical %5 %64 %32
%177 = OpCompositeExtract %5 %39 0
%178 = OpCompositeExtract %5 %39 1
%179 = OpIAdd %5 %64 %177
%180 = OpULessThan %74 %64 %178
%181 = OpSelect %5 %180 %179 %77
%182 = OpImageTexelPointer %172 %26 %181 %38
%183 = OpAtomicCompareExchange %5 %182 %50 %38 %38 %184 %175
%185 = OpCompositeExtract %5 %46 1
%186 = OpUDiv %5 %185 %29
%187 = OpConvertUToF %91 %186
%188 = OpCompositeExtract %5 %46 0
%189 = OpCompositeExtract %5 %46 1
%190 = OpIAdd %5 %38 %188
%191 = OpULessThan %74 %38 %189
%192 = OpSelect %5 %191 %190 %77
%193 = OpBitcast %5 %187
%194 = OpBitcast %5 %187
%195 = OpBitcast %5 %187
%196 = OpBitcast %5 %187
%197 = OpCompositeConstruct %78 %193 %193 %193 %193
OpImageWrite %43 %192 %197
%198 = OpCompositeConstruct %78 %194 %194 %194 %194
%199 = OpIAdd %5 %192 %50
OpImageWrite %43 %199 %198
%200 = OpCompositeConstruct %78 %195 %195 %195 %195
%201 = OpIAdd %5 %192 %32
OpImageWrite %43 %201 %200
%202 = OpCompositeConstruct %78 %196 %196 %196 %196
%203 = OpIAdd %5 %192 %35
OpImageWrite %43 %203 %202
%204 = OpCompositeExtract %5 %56 1
%205 = OpUDiv %5 %204 %29
%206 = OpConvertUToF %91 %205
%207 = OpCompositeExtract %5 %46 0
%208 = OpCompositeExtract %5 %46 1
%209 = OpIAdd %5 %29 %207
%210 = OpULessThan %74 %29 %208
%211 = OpSelect %5 %210 %209 %77
%212 = OpBitcast %5 %206
%213 = OpBitcast %5 %206
%214 = OpBitcast %5 %206
%215 = OpBitcast %5 %206
%216 = OpCompositeConstruct %78 %212 %212 %212 %212
OpImageWrite %43 %211 %216
%217 = OpCompositeConstruct %78 %213 %213 %213 %213
%218 = OpIAdd %5 %211 %50
OpImageWrite %43 %218 %217
%219 = OpCompositeConstruct %78 %214 %214 %214 %214
%220 = OpIAdd %5 %211 %32
OpImageWrite %43 %220 %219
%221 = OpCompositeConstruct %78 %215 %215 %215 %215
%222 = OpIAdd %5 %211 %35
OpImageWrite %43 %222 %221
%223 = OpIAdd %5 %66 %38
%225 = OpAccessChain %27 %8 %29
%226 = OpLoad %5 %225
%227 = OpIAdd %5 %226 %223
%224 = OpAccessChain %25 %21 %227
%228 = OpLoad %18 %224
%229 = OpAccessChain %36 %13 %38 %227
%230 = OpLoad %9 %229
%231 = OpIMul %5 %62 %29
%232 = OpIAdd %5 %231 %69
%233 = OpCompositeExtract %5 %230 0
%234 = OpCompositeExtract %5 %230 1
%235 = OpIAdd %5 %232 %233
%236 = OpULessThan %74 %232 %234
%237 = OpSelect %5 %236 %235 %77
%238 = OpImageRead %78 %228 %237
%239 = OpCompositeExtract %5 %238 0
%241 = OpIAdd %5 %237 %50
%240 = OpImageRead %78 %228 %241
%242 = OpCompositeExtract %5 %240 0
%244 = OpIAdd %5 %237 %32
%243 = OpImageRead %78 %228 %244
%245 = OpCompositeExtract %5 %243 0
%247 = OpIAdd %5 %237 %35
%246 = OpImageRead %78 %228 %247
%248 = OpCompositeExtract %5 %246 0
%249 = OpCompositeConstruct %78 %239 %242 %245 %248
%250 = OpBitcast %92 %249
%251 = OpCompositeExtract %91 %250 0
%252 = OpCompositeExtract %91 %250 1
%253 = OpCompositeExtract %91 %250 2
%254 = OpCompositeExtract %91 %250 3
%255 = OpIMul %5 %62 %29
%256 = OpIAdd %5 %255 %101
%257 = OpCompositeExtract %5 %230 0
%258 = OpCompositeExtract %5 %230 1
%259 = OpIAdd %5 %256 %257
%260 = OpULessThan %74 %256 %258
%261 = OpSelect %5 %260 %259 %77
%262 = OpBitcast %5 %251
%263 = OpBitcast %5 %252
%264 = OpBitcast %5 %253
%265 = OpBitcast %5 %254
%266 = OpCompositeConstruct %78 %262 %262 %262 %262
OpImageWrite %228 %261 %266
%267 = OpCompositeConstruct %78 %263 %263 %263 %263
%268 = OpIAdd %5 %261 %50
OpImageWrite %228 %268 %267
%269 = OpCompositeConstruct %78 %264 %264 %264 %264
%270 = OpIAdd %5 %261 %32
OpImageWrite %228 %270 %269
%271 = OpCompositeConstruct %78 %265 %265 %265 %265
%272 = OpIAdd %5 %261 %35
OpImageWrite %228 %272 %271
%273 = OpIAdd %5 %66 %38
%275 = OpAccessChain %27 %8 %50
%276 = OpLoad %5 %275
%277 = OpIAdd %5 %276 %273
%274 = OpAccessChain %47 %17 %277
%278 = OpLoad %14 %274
%279 = OpAccessChain %36 %13 %38 %277
%280 = OpLoad %9 %279
%281 = OpIMul %5 %62 %29
%282 = OpIAdd %5 %281 %69
%283 = OpCompositeExtract %5 %280 0
%284 = OpCompositeExtract %5 %280 1
%285 = OpIAdd %5 %282 %283
%286 = OpULessThan %74 %282 %284
%287 = OpSelect %5 %286 %285 %77
%288 = OpImageFetch %78 %278 %287
%289 = OpCompositeExtract %5 %288 0
%291 = OpIAdd %5 %287 %50
%290 = OpImageFetch %78 %278 %291
%292 = OpCompositeExtract %5 %290 0
%294 = OpIAdd %5 %287 %32
%293 = OpImageFetch %78 %278 %294
%295 = OpCompositeExtract %5 %293 0
%297 = OpIAdd %5 %287 %35
%296 = OpImageFetch %78 %278 %297
%298 = OpCompositeExtract %5 %296 0
%299 = OpCompositeConstruct %78 %289 %292 %295 %298
%300 = OpBitcast %92 %299
%301 = OpCompositeExtract %91 %300 0
%302 = OpCompositeExtract %91 %300 1
%303 = OpCompositeExtract %91 %300 2
%304 = OpCompositeExtract %91 %300 3
%305 = OpIMul %5 %62 %29
%306 = OpIAdd %5 %305 %148
%307 = OpCompositeExtract %5 %230 0
%308 = OpCompositeExtract %5 %230 1
%309 = OpIAdd %5 %306 %307
%310 = OpULessThan %74 %306 %308
%311 = OpSelect %5 %310 %309 %77
%312 = OpBitcast %5 %301
%313 = OpBitcast %5 %302
%314 = OpBitcast %5 %303
%315 = OpBitcast %5 %304
%316 = OpCompositeConstruct %78 %312 %312 %312 %312
OpImageWrite %228 %311 %316
%317 = OpCompositeConstruct %78 %313 %313 %313 %313
%318 = OpIAdd %5 %311 %50
OpImageWrite %228 %318 %317
%319 = OpCompositeConstruct %78 %314 %314 %314 %314
%320 = OpIAdd %5 %311 %32
OpImageWrite %228 %320 %319
%321 = OpCompositeConstruct %78 %315 %315 %315 %315
%322 = OpIAdd %5 %311 %35
OpImageWrite %228 %322 %321
%323 = OpIAdd %5 %66 %38
%325 = OpAccessChain %27 %8 %29
%326 = OpLoad %5 %325
%327 = OpIAdd %5 %326 %323
%324 = OpAccessChain %25 %24 %327
%328 = OpLoad %18 %324
%329 = OpAccessChain %36 %13 %38 %327
%330 = OpLoad %9 %329
%331 = OpCompositeExtract %5 %330 0
%332 = OpCompositeExtract %5 %330 1
%333 = OpIAdd %5 %64 %331
%334 = OpULessThan %74 %64 %332
%335 = OpSelect %5 %334 %333 %77
%336 = OpImageTexelPointer %172 %324 %335 %38
%337 = OpAtomicIAdd %5 %336 %50 %38 %175
%338 = OpCompositeExtract %5 %330 0
%339 = OpCompositeExtract %5 %330 1
%340 = OpIAdd %5 %64 %338
%341 = OpULessThan %74 %64 %339
%342 = OpSelect %5 %341 %340 %77
%343 = OpImageTexelPointer %172 %324 %342 %38
%344 = OpAtomicCompareExchange %5 %343 %50 %38 %38 %345 %175
%346 = OpCompositeExtract %5 %230 1
%347 = OpUDiv %5 %346 %29
%348 = OpConvertUToF %91 %347
%349 = OpCompositeExtract %5 %46 0
%350 = OpCompositeExtract %5 %46 1
%351 = OpIAdd %5 %121 %349
%352 = OpULessThan %74 %121 %350
%353 = OpSelect %5 %352 %351 %77
%354 = OpBitcast %5 %348
%355 = OpBitcast %5 %348
%356 = OpBitcast %5 %348
%357 = OpBitcast %5 %348
%358 = OpCompositeConstruct %78 %354 %354 %354 %354
OpImageWrite %43 %353 %358
%359 = OpCompositeConstruct %78 %355 %355 %355 %355
%360 = OpIAdd %5 %353 %50
OpImageWrite %43 %360 %359
%361 = OpCompositeConstruct %78 %356 %356 %356 %356
%362 = OpIAdd %5 %353 %32
OpImageWrite %43 %362 %361
%363 = OpCompositeConstruct %78 %357 %357 %357 %357
%364 = OpIAdd %5 %353 %35
OpImageWrite %43 %364 %363
%365 = OpCompositeExtract %5 %280 1
%366 = OpUDiv %5 %365 %29
%367 = OpConvertUToF %91 %366
%369 = OpCompositeExtract %5 %46 0
%370 = OpCompositeExtract %5 %46 1
%371 = OpIAdd %5 %368 %369
%372 = OpULessThan %74 %368 %370
%373 = OpSelect %5 %372 %371 %77
%374 = OpBitcast %5 %367
%375 = OpBitcast %5 %367
%376 = OpBitcast %5 %367
%377 = OpBitcast %5 %367
%378 = OpCompositeConstruct %78 %374 %374 %374 %374
OpImageWrite %43 %373 %378
%379 = OpCompositeConstruct %78 %375 %375 %375 %375
%380 = OpIAdd %5 %373 %50
OpImageWrite %43 %380 %379
%381 = OpCompositeConstruct %78 %376 %376 %376 %376
%382 = OpIAdd %5 %373 %32
OpImageWrite %43 %382 %381
%383 = OpCompositeConstruct %78 %377 %377 %377 %377
%384 = OpIAdd %5 %373 %35
OpImageWrite %43 %384 %383
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/srv-uav.typed-buffer-offset.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 1, binding = 0) uniform samplerBuffer _18[];
layout(set = 4, binding = 0, r32f) uniform imageBuffer _22[];
layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _26[];

void main()
{
    uint _33 = registers._m4 + 2u;
    uint _36 = subgroupBroadcastFirst(_33);
    uint _47 = subgroupBroadcastFirst(registers._m4);
    uint _55 = registers._m1 + 1u;
    uint _57 = subgroupBroadcastFirst(_55);
    uint _70 = gl_GlobalInvocationID.x + 4u;
    uint _84 = gl_GlobalInvocationID.x + 1024u;
    imageStore(_22[registers._m4], int((_84 < _13._m0[_47].y) ? (_84 + _13._m0[_47].x) : 4294967295u), vec4(imageLoad(_22[registers._m4], int((_70 < _13._m0[_47].y) ? (_70 + _13._m0[_47].x) : 4294967295u))));
    uint _92 = gl_GlobalInvocationID.x + 2u;
    uint _103 = gl_GlobalInvocationID.x + 2048u;
    imageStore(_22[registers._m4], int((_103 < _13._m0[_47].y) ? (_103 + _13._m0[_47].x) : 4294967295u), vec4(texelFetch(_18[_55], int((_92 < _13._m0[_57].y) ? (_92 + _13._m0[_57].x) : 4294967295u))));
    uint _118 = imageAtomicAdd(_26[_33], int((gl_GlobalInvocationID.x < _13._m0[_36].y) ? (gl_GlobalInvocationID.x + _13._m0[_36].x) : 4294967295u), 40u);
    uint _126 = imageAtomicCompSwap(_26[_33], int((gl_GlobalInvocationID.y < _13._m0[_36].y) ? (gl_GlobalInvocationID.y + _13._m0[_36].x) : 4294967295u), 40u, 50u);
    imageStore(_22[registers._m4], int((0u < _13._m0[_47].y) ? (0u + _13._m0[_47].x) : 4294967295u), vec4(float(_13._m0[_47].y)));
    imageStore(_22[registers._m4], int((1u < _13._m0[_47].y) ? (1u + _13._m0[_47].x) : 4294967295u), vec4(float(_13._m0[_57].y)));
    uint _148 = registers._m4 + (gl_GlobalInvocationID.z + 0u);
    imageStore(_22[nonuniformEXT(_148)], int((_84 < _13._m0[_148].y) ? (_84 + _13._m0[_148].x) : 4294967295u), vec4(imageLoad(_22[nonuniformEXT(_148)], int((_70 < _13._m0[_148].y) ? (_70 + _13._m0[_148].x) : 4294967295u))));
    uint _172 = registers._m1 + (gl_GlobalInvocationID.z + 0u);
    imageStore(_22[nonuniformEXT(_148)], int((_103 < _13._m0[_148].y) ? (_103 + _13._m0[_148].x) : 4294967295u), vec4(texelFetch(_18[nonuniformEXT(_172)], int((_70 < _13._m0[_172].y) ? (_70 + _13._m0[_172].x) : 4294967295u))));
    uint _196 = registers._m4 + (gl_GlobalInvocationID.z + 0u);
    uint _206 = imageAtomicAdd(_26[nonuniformEXT(_196)], int((gl_GlobalInvocationID.y < _13._m0[_196].y) ? (gl_GlobalInvocationID.y + _13._m0[_196].x) : 4294967295u), 40u);
    uint _213 = imageAtomicCompSwap(_26[nonuniformEXT(_196)], int((gl_GlobalInvocationID.y < _13._m0[_196].y) ? (gl_GlobalInvocationID.y + _13._m0[_196].x) : 4294967295u), 40u, 70u);
    imageStore(_22[registers._m4], int((2u < _13._m0[_47].y) ? (2u + _13._m0[_47].x) : 4294967295u), vec4(float(_13._m0[_148].y)));
    imageStore(_22[registers._m4], int((3u < _13._m0[_47].y) ? (3u + _13._m0[_47].x) : 4294967295u), vec4(float(_13._m0[_172].y)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 233
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability ImageQuery
OpCapability StorageImageWriteWithoutFormat
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability UniformTexelBufferArrayNonUniformIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %62
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %18 DescriptorSet 1
OpDecorate %18 Binding 0
OpDecorate %22 DescriptorSet 4
OpDecorate %22 Binding 0
OpDecorate %26 DescriptorSet 4
OpDecorate %26 Binding 0
OpDecorate %62 BuiltIn GlobalInvocationId
OpDecorate %148 NonUniform
OpDecorate %149 NonUniform
OpDecorate %172 NonUniform
OpDecorate %173 NonUniform
OpDecorate %196 NonUniform
OpDecorate %197 NonUniform
OpDecorate %205 NonUniform
OpDecorate %212 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 32
%15 = OpTypeImage %14 Buffer 0 0 0 1 Unknown
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeImage %14 Buffer 0 0 0 2 R32f
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypePointer UniformConstant %23
%29 = OpTypePointer PushConstant %5
%31 = OpConstant %5 4
%34 = OpConstant %5 2
%37 = OpConstant %5 3
%38 = OpTypePointer StorageBuffer %9
%40 = OpConstant %5 0
%42 = OpTypePointer UniformConstant %19
%50 = OpTypePointer UniformConstant %15
%53 = OpConstant %5 1
%60 = OpTypeVector %5 3
%61 = OpTypePointer Input %60
%62 = OpVariable %61 Input
%63 = OpTypePointer Input %5
%74 = OpTypeBool
%77 = OpConstant %5 4294967295
%78 = OpTypeVector %14 4
%85 = OpConstant %5 1024
%104 = OpConstant %5 2048
%116 = OpTypePointer Image %5
%119 = OpConstant %5 40
%127 = OpConstant %5 50
%214 = OpConstant %5 70
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %231
%231 = OpLabel
%30 = OpAccessChain %29 %8 %31
%32 = OpLoad %5 %30
%33 = OpIAdd %5 %32 %34
%28 = OpAccessChain %27 %26 %33
%35 = OpLoad %23 %28
%36 = OpGroupNonUniformBroadcastFirst %5 %37 %33
%39 = OpAccessChain %38 %13 %40 %36
%41 = OpLoad %9 %39
%44 = OpAccessChain %29 %8 %31
%45 = OpLoad %5 %44
%43 = OpAccessChain %42 %22 %45
%46 = OpLoad %19 %43
%47 = OpGroupNonUniformBroadcastFirst %5 %37 %45
%48 = OpAccessChain %38 %13 %40 %47
%49 = OpLoad %9 %48
%52 = OpAccessChain %29 %8 %53
%54 = OpLoad %5 %52
%55 = OpIAdd %5 %54 %53
%51 = OpAccessChain %50 %18 %55
%56 = OpLoad %15 %51
%57 = OpGroupNonUniformBroadcastFirst %5 %37 %55
%58 = OpAccessChain %38 %13 %40 %57
%59 = OpLoad %9 %58
%64 = OpAccessChain %63 %62 %40
%65 = OpLoad %5 %64
%66 = OpAccessChain %63 %62 %53
%67 = OpLoad %5 %66
%68 = OpAccessChain %63 %62 %34
%69 = OpLoad %5 %68
%70 = OpIAdd %5 %65 %31
%71 = OpCompositeExtract %5 %49 0
%72 = OpCompositeExtract %5 %49 1
%73 = OpIAdd %5 %70 %71
%75 = OpULessThan %74 %70 %72
%76 = OpSelect %5 %75 %73 %77
%79 = OpImageRead %78 %46 %76
%80 = OpCompositeExtract %14 %79 0
%81 = OpCompositeExtract %14 %79 1
%82 = OpCompositeExtract %14 %79 2
%83 = OpCompositeExtract %14 %79 3
%84 = OpIAdd %5 %65 %85
%86 = OpCompositeExtract %5 %49 0
%87 = OpCompositeExtract %5 %49 1
%88 = OpIAdd %5 %84 %86
%89 = OpULessThan %74 %84 %87
%90 = OpSelect %5 %89 %88 %77
%91 = OpCompositeConstruct %78 %80 %81 %82 %83
OpImageWrite %46 %90 %91
%92 = OpIAdd %5 %65 %34
%93 = OpCompositeExtract %5 %59 0
%94 = OpCompositeExtract %5 %59 1
%95 = OpIAdd %5 %92 %93
%96 = OpULessThan %74 %92 %94
%97 = OpSelect %5 %96 %95 %77
%98 = OpImageFetch %78 %56 %97
%99 = OpCompositeExtract %14 %98 0
%100 = OpCompositeExtract %14 %98 1
%101 = OpCompositeExtract %14 %98 2
%102 = OpCompositeExtract %14 %98 3
%103 = OpIAdd %5 %65 %104
%105 = OpCompositeExtract %5 %49 0
%106 = OpCompositeExtract %5 %49 1
%107 = OpIAdd %5 %103 %105
%108 = OpULessThan %74 %103 %106
%109 = OpSelect %5 %108 %107 %77
%110 = OpCompositeConstruct %78 %99 %100 %101 %102
OpImageWrite %46 %109 %110
%111 = OpCompositeExtract %5 %41 0
%112 = OpCompositeExtract %5 %41 1
%113 = OpIAdd %5 %65 %111
%114 = OpULessThan %74 %65 %112
%115 = OpSelect %5 %114 %113 %77
%117 = OpImageTexelPointer %116 %28 %115 %40
%118 = OpAtomicIAdd %5 %117 %53 %40 %119
%120 = OpCompositeExtract %5 %41 0
%121 = OpCompositeExtract %5 %41 1
%122 = OpIAdd %5 %67 %120
%123 = OpULessThan %74 %67 %121
%124 = OpSelect %5 %123 %122 %77
%125 = OpImageTexelPointer %116 %28 %124 %40
%126 = OpAtomicCompareExchange %5 %125 %53 %40 %40 %127 %119
%128 = OpCompositeExtract %5 %49 1
%129 = OpConvertUToF %14 %128
%130 = OpCompositeExtract %5 %49 0
%131 = OpCompositeExtract %5 %49 1
%132 = OpIAdd %5 %40 %130
%133 = OpULessThan %74 %40 %131
%134 = OpSelect %5 %133 %132 %77
%135 = OpCompositeConstruct %78 %129 %129 %129 %129
OpImageWrite %46 %134 %135
%136 = OpCompositeExtract %5 %59 1
%137 = OpConvertUToF %14 %136
%138 = OpCompositeExtract %5 %49 0
%139 = OpCompositeExtract %5 %49 1
%140 = OpIAdd %5 %53 %138
%141 = OpULessThan %74 %53 %139
%142 = OpSelect %5 %141 %140 %77
%143 = OpCompositeConstruct %78 %137 %137 %137 %137
OpImageWrite %46 %142 %143
%144 = OpIAdd %5 %69 %40
%146 = OpAccessChain %29 %8 %31
%147 = OpLoad %5 %146
%148 = OpIAdd %5 %147 %144
%145 = OpAccessChain %42 %22 %148
%149 = OpLoad %19 %145
%150 = OpAccessChain %38 %13 %40 %148
%151 = OpLoad %9 %150
%152 = OpCompositeExtract %5 %151 0
%153 = OpCompositeExtract %5 %151 1
%154 = OpIAdd %5 %70 %152
%155 = OpULessThan %74 %70 %153
%156 = OpSelect %5 %155 %154 %77
%157 = OpImageRead %78 %149 %156
%158 = OpCompositeExtract %14 %157 0
%159 = OpCompositeExtract %14 %157 1
%160 = OpCompositeExtract %14 %157 2
%161 = OpCompositeExtract %14 %157 3
%162 = OpCompositeExtract %5 %151 0
%163 = OpCompositeExtract %5 %151 1
%164 = OpIAdd %5 %84 %162
%165 = OpULessThan %74 %84 %163
%166 = OpSelect %5 %165 %164 %77
%167 = OpCompositeConstruct %78 %158 %159 %160 %161
OpImageWrite %149 %166 %167
%168 = OpIAdd %5 %69 %40
%170 = OpAccessChain %29 %8 %53
%171 = OpLoad %5 %170
%172 = OpIAdd %5 %171 %168
%169 = OpAccessChain %50 %18 %172
%173 = OpLoad %15 %169
%174 = OpAccessChain %38 %13 %40 %172
%175 = OpLoad %9 %174
%176 = OpCompositeExtract %5 %175 0
%177 = OpCompositeExtract %5 %175 1
%178 = OpIAdd %5 %70 %176
%179 = OpULessThan %74 %70 %177
%180 = OpSelect %5 %179 %178 %77
%181 = OpImageFetch %78 %173 %180
%182 = OpCompositeExtract %14 %181 0
%183 = OpCompositeExtract %14 %181 1
%184 = OpCompositeExtract %14 %181 2
%185 = OpCompositeExtract %14 %181 3
%186 = OpCompositeExtract %5 %151 0
%187 = OpCompositeExtract %5 %151 1
%188 = OpIAdd %5 %103 %186
%189 = OpULessThan %74 %103 %187
%190 = OpSelect %5 %189 %188 %77
%191 = OpCompositeConstruct %78 %182 %183 %184 %185
OpImageWrite %149 %190 %191
%192 = OpIAdd %5 %69 %40
%194 = OpAccessChain %29 %8 %31
%195 = OpLoad %5 %194
%196 = OpIAdd %5 %195 %192
%193 = OpAccessChain %27 %26 %196
%197 = OpLoad %23 %193
%198 = OpAccessChain %38 %13 %40 %196
%199 = OpLoad %9 %198
%200 = OpCompositeExtract %5 %199 0
%201 = OpCompositeExtract %5 %199 1
%202 = OpIAdd %5 %67 %200
%203 = OpULessThan %74 %67 %201
%204 = OpSelect %5 %203 %202 %77
%205 = OpImageTexelPointer %116 %193 %204 %40
%206 = OpAtomicIAdd %5 %205 %53 %40 %119
%207 = OpCompositeExtract %5 %199 0
%208 = OpCompositeExtract %5 %199 1
%209 = OpIAdd %5 %67 %207
%210 = OpULessThan %74 %67 %208
%211 = OpSelect %5 %210 %209 %77
%212 = OpImageTexelPointer %116 %193 %211 %40
%213 = OpAtomicCompareExchange %5 %212 %53 %40 %40 %214 %119
%215 = OpCompositeExtract %5 %151 1
%216 = OpConvertUToF %14 %215
%217 = OpCompositeExtract %5 %49 0
%218 = OpCompositeExtract %5 %49 1
%219 = OpIAdd %5 %34 %217
%220 = OpULessThan %74 %34 %218
%221 = OpSelect %5 %220 %219 %77
%222 = OpCompositeConstruct %78 %216 %216 %216 %216
OpImageWrite %46 %221 %222
%223 = OpCompositeExtract %5 %175 1
%224 = OpConvertUToF %14 %223
%225 = OpCompositeExtract %5 %49 0
%226 = OpCompositeExtract %5 %49 1
%227 = OpIAdd %5 %37 %225
%228 = OpULessThan %74 %37 %226
%229 = OpSelect %5 %228 %227 %77
%230 = OpCompositeConstruct %78 %224 %224 %224 %224
OpImageWrite %46 %229 %230
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif

layout(set = 0, binding = 0) uniform usamplerBuffer _8;
layout(set = 0, binding = 1) uniform usamplerBuffer _9;
layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _12;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimageBuffer _13;

layout(location = 0) flat in mediump int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint16_t _25 = uint16_t(A);
    uint _26 = uint(int16_t(_25));
    uint _36 = uint(int16_t(_25 + 1us));
    imageStore(_12, int(_26), uvec4(floatBitsToUint(float(float16_t(uintBitsToFloat(texelFetch(_8, int(_36)).x)) + float16_t(uintBitsToFloat(texelFetch(_8, int(_26)).x))))));
    imageStore(_13, int(_26), uvec4(uint(uint16_t(texelFetch(_9, int(_36)).x) + uint16_t(texelFetch(_9, int(_26)).x))));
    SV_Target = int(10u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %18
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %16 "A"
OpName %18 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonReadable
OpDecorate %16 RelaxedPrecision
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpVariable %11 UniformConstant
%14 = OpTypeInt 32 1
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %14
%18 = OpVariable %17 Output
%24 = OpTypeInt 16 0
%27 = OpTypeVector %5 4
%30 = OpTypeFloat 32
%32 = OpTypeFloat 16
%35 = OpConstant %24 1
%54 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %56
%56 = OpLabel
%19 = OpLoad %10 %13
%20 = OpLoad %10 %12
%21 = OpLoad %6 %9
%22 = OpLoad %6 %8
%23 = OpLoad %14 %16
%25 = OpSConvert %24 %23
%26 = OpSConvert %5 %25
%28 = OpImageFetch %27 %22 %26
%29 = OpCompositeExtract %5 %28 0
%31 = OpBitcast %30 %29
%33 = OpFConvert %32 %31
%34 = OpIAdd %24 %25 %35
%36 = OpSConvert %5 %34
%37 = OpImageFetch %27 %22 %36
%38 = OpCompositeExtract %5 %37 0
%39 = OpBitcast %30 %38
%40 = OpFConvert %32 %39
%41 = OpFAdd %32 %40 %33
%42 = OpFConvert %30 %41
%43 = OpBitcast %5 %42
%44 = OpCompositeConstruct %27 %43 %43 %43 %43
OpImageWrite %20 %26 %44
%45 = OpImageFetch %27 %21 %26
%46 = OpCompositeExtract %5 %45 0
%47 = OpUConvert %24 %46
%48 = OpImageFetch %27 %21 %36
%49 = OpCompositeExtract %5 %48 0
%50 = OpUConvert %24 %49
%51 = OpIAdd %24 %50 %47
%52 = OpUConvert %5 %51
%53 = OpCompositeConstruct %27 %52 %52 %52 %52
OpImageWrite %19 %26 %53
%55 = OpBitcast %14 %54
OpStore %18 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

layout(location = 0) flat in mediump int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint16_t _21 = uint16_t(A);
    uint _22 = uint(int16_t(_21));
    float _28 = uintBitsToFloat(_9._m0[_22]);
    mediump float mp_copy_28 = _28;
    float _34 = uintBitsToFloat(_9._m0[uint(int16_t(_21 + 1us))]);
    mediump float mp_copy_34 = _34;
    _13._m0[_22] = floatBitsToUint(mp_copy_34 + mp_copy_28);
    SV_Target = int(10u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability Int16
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %16 "A"
OpName %18 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 NonReadable
OpDecorate %16 RelaxedPrecision
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %18 Location 0
OpDecorate %35 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 32 1
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %14
%18 = OpVariable %17 Output
%20 = OpTypeInt 16 0
%23 = OpTypePointer StorageBuffer %5
%25 = OpConstant %5 0
%27 = OpTypeFloat 32
%30 = OpConstant %20 1
%38 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%19 = OpLoad %14 %16
%21 = OpSConvert %20 %19
%22 = OpSConvert %5 %21
%24 = OpAccessChain %23 %9 %25 %22
%26 = OpLoad %5 %24
%28 = OpBitcast %27 %26
%29 = OpIAdd %20 %21 %30
%31 = OpSConvert %5 %29
%32 = OpAccessChain %23 %9 %25 %31
%33 = OpLoad %5 %32
%34 = OpBitcast %27 %33
%35 = OpFAdd %27 %34 %28
%36 = OpBitcast %5 %35
%37 = OpAccessChain %23 %13 %25 %22
OpStore %37 %36
%39 = OpBitcast %14 %38
OpStore %18 %39
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _11_13
{
    uint _m0[];
} _13;

layout(set = 0, binding = 0, std430) writeonly buffer _15_17
{
    uint _m0[];
} _17;

layout(set = 0, binding = 1, std430) writeonly buffer _19_21
{
    uint _m0[];
} _21;

layout(location = 0) flat in mediump int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint16_t _29 = uint16_t(A);
    uint _30 = uint(int16_t(_29));
    uint _41 = uint(int16_t(_29 + 1us));
    _17._m0[_30] = floatBitsToUint(float(float16_t(uintBitsToFloat(_9._m0[_41])) + float16_t(uintBitsToFloat(_9._m0[_30]))));
    _21._m0[_30] = uint(uint16_t(_13._m0[_41]) + uint16_t(_13._m0[_30]));
    SV_Target = int(10u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 63
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %24 %26
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "SSBO"
OpName %11 "SSBO"
OpName %15 "SSBO"
OpName %19 "SSBO"
OpName %24 "A"
OpName %26 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 4
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 NonReadable
OpDecorate %18 ArrayStride 4
OpMemberDecorate %19 0 Offset 0
OpDecorate %19 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 1
OpDecorate %21 NonReadable
OpDecorate %24 RelaxedPrecision
OpDecorate %24 Flat
OpDecorate %24 Location 0
OpDecorate %26 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %5
%15 = OpTypeStruct %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypeRuntimeArray %5
%19 = OpTypeStruct %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpTypeInt 32 1
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpTypePointer Output %22
%26 = OpVariable %25 Output
%28 = OpTypeInt 16 0
%31 = OpTypePointer StorageBuffer %5
%33 = OpConstant %5 0
%35 = OpTypeFloat 32
%37 = OpTypeFloat 16
%40 = OpConstant %28 1
%59 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %61
%61 = OpLabel
%27 = OpLoad %22 %24
%29 = OpSConvert %28 %27
%30 = OpSConvert %5 %29
%32 = OpAccessChain %31 %9 %33 %30
%34 = OpLoad %5 %32
%36 = OpBitcast %35 %34
%38 = OpFConvert %37 %36
%39 = OpIAdd %28 %29 %40
%41 = OpSConvert %5 %39
%42 = OpAccessChain %31 %9 %33 %41
%43 = OpLoad %5 %42
%44 = OpBitcast %35 %43
%45 = OpFConvert %37 %44
%46 = OpFAdd %37 %45 %38
%47 = OpFConvert %35 %46
%48 = OpBitcast %5 %47
%49 = OpAccessChain %31 %17 %33 %30
OpStore %49 %48
%50 = OpAccessChain %31 %13 %33 %30
%51 = OpLoad %5 %50
%52 = OpUConvert %28 %51
%53 = OpAccessChain %31 %13 %33 %41
%54 = OpLoad %5 %53
%55 = OpUConvert %28 %54
%56 = OpIAdd %28 %55 %52
%57 = OpUConvert %5 %56
%58 = OpAccessChain %31 %21 %33 %30
OpStore %58 %57
%60 = OpBitcast %22 %59
OpStore %26 %60
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloatNonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloatArray;
layout(buffer_reference) buffer PhysicalPointerUintArray;

float16_t _57;
uint16_t _73;

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteArray
{
    float value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray
{
    float value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray
{
    uint value[];
};

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _13;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) flat in mediump int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint16_t _31 = uint16_t(A);
    uint _32 = uint(int16_t(_31));
    uint _46 = uint(int16_t(_31 + 1us));
    PhysicalPointerFloatArray(registers._m2).value[_32] = float(float16_t(PhysicalPointerFloatNonWriteArray(registers._m1).value[_46]) + float16_t(PhysicalPointerFloatNonWriteArray(registers._m1).value[_32]));
    PhysicalPointerUintArray(registers._m3).value[_32] = uint(uint16_t(_13._m0[_46]) + uint16_t(_13._m0[_32]));
    SV_Target = int(10u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability DenormPreserve
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %11 "SSBO"
OpName %16 "A"
OpName %18 "SV_Target"
OpName %35 "PhysicalPointerFloatNonWriteArray"
OpMemberName %35 0 "value"
OpName %53 "PhysicalPointerFloatArray"
OpMemberName %53 0 "value"
OpName %68 "PhysicalPointerUintArray"
OpMemberName %68 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %10 ArrayStride 4
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 1
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 RelaxedPrecision
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %18 Location 0
OpDecorate %34 ArrayStride 4
OpMemberDecorate %35 0 Offset 0
OpDecorate %35 Block
OpMemberDecorate %35 0 NonWritable
OpDecorate %52 ArrayStride 4
OpMemberDecorate %53 0 Offset 0
OpDecorate %53 Block
OpDecorate %67 ArrayStride 4
OpMemberDecorate %68 0 Offset 0
OpDecorate %68 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeRuntimeArray %5
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 32 1
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %14
%18 = OpVariable %17 Output
%19 = OpTypePointer PushConstant %6
%21 = OpConstant %5 3
%24 = OpConstant %5 2
%27 = OpConstant %5 1
%30 = OpTypeInt 16 0
%33 = OpTypeFloat 32
%34 = OpTypeRuntimeArray %33
%35 = OpTypeStruct %34
%36 = OpTypePointer PhysicalStorageBuffer %35
%38 = OpTypePointer PhysicalStorageBuffer %33
%40 = OpConstant %5 0
%42 = OpTypeFloat 16
%45 = OpConstant %30 1
%52 = OpTypeRuntimeArray %33
%53 = OpTypeStruct %52
%54 = OpTypePointer PhysicalStorageBuffer %53
%59 = OpTypePointer StorageBuffer %5
%67 = OpTypeRuntimeArray %5
%68 = OpTypeStruct %67
%69 = OpTypePointer PhysicalStorageBuffer %68
%71 = OpTypePointer PhysicalStorageBuffer %5
%75 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
%57 = OpUndef %42
%73 = OpUndef %30
OpBranch %77
%77 = OpLabel
%20 = OpAccessChain %19 %9 %21
%22 = OpLoad %6 %20
%23 = OpAccessChain %19 %9 %24
%25 = OpLoad %6 %23
%26 = OpAccessChain %19 %9 %27
%28 = OpLoad %6 %26
%29 = OpLoad %14 %16
%31 = OpSConvert %30 %29
%32 = OpSConvert %5 %31
%37 = OpBitcast %36 %28
%39 = OpInBoundsAccessChain %38 %37 %40 %32
%41 = OpLoad %33 %39 Aligned 4
%43 = OpFConvert %42 %41
%44 = OpIAdd %30 %31 %45
%46 = OpSConvert %5 %44
%47 = OpBitcast %36 %28
%48 = OpInBoundsAccessChain %38 %47 %40 %46
%49 = OpLoad %33 %48 Aligned 4
%50 = OpFConvert %42 %49
%51 = OpFAdd %42 %50 %43
%55 = OpBitcast %54 %25
%56 = OpInBoundsAccessChain %38 %55 %40 %32
%58 = OpFConvert %33 %51
OpStore %56 %58 Aligned 4
%60 = OpAccessChain %59 %13 %40 %32
%61 = OpLoad %5 %60
%62 = OpUConvert %30 %61
%63 = OpAccessChain %59 %13 %40 %46
%64 = OpLoad %5 %63
%65 = OpUConvert %30 %64
%66 = OpIAdd %30 %65 %62
%70 = OpBitcast %69 %22
%72 = OpInBoundsAccessChain %71 %70 %40 %32
%74 = OpUConvert %5 %66
OpStore %72 %74 Aligned 4
%76 = OpBitcast %14 %75
OpStore %18 %76
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer PhysicalPointerFloatNonWriteArray;
layout(buffer_reference) buffer PhysicalPointerFloatArray;

float _47;

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteArray
{
    float value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray
{
    float value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

layout(location = 0) flat in mediump int A;
layout(location = 0) out int SV_Target;

void main()
{
    uint16_t _24 = uint16_t(A);
    uint _25 = uint(int16_t(_24));
    float _34 = PhysicalPointerFloatNonWriteArray(registers._m1).value[_25];
    mediump float mp_copy_34 = _34;
    float _40 = PhysicalPointerFloatNonWriteArray(registers._m1).value[uint(int16_t(_24 + 1us))];
    mediump float mp_copy_40 = _40;
    PhysicalPointerFloatArray(registers._m2).value[_25] = mp_copy_40 + mp_copy_34;
    SV_Target = int(10u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability Int16
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %12 %14
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "A"
OpName %14 "SV_Target"
OpName %28 "PhysicalPointerFloatNonWriteArray"
OpMemberName %28 0 "value"
OpName %43 "PhysicalPointerFloatArray"
OpMemberName %43 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %12 RelaxedPrecision
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %14 Location 0
OpDecorate %27 ArrayStride 4
OpMemberDecorate %28 0 Offset 0
OpDecorate %28 Block
OpMemberDecorate %28 0 NonWritable
OpDecorate %41 RelaxedPrecision
OpDecorate %42 ArrayStride 4
OpMemberDecorate %43 0 Offset 0
OpDecorate %43 Block
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeInt 32 1
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Output %10
%14 = OpVariable %13 Output
%15 = OpTypePointer PushConstant %6
%17 = OpConstant %5 2
%20 = OpConstant %5 1
%23 = OpTypeInt 16 0
%26 = OpTypeFloat 32
%27 = OpTypeRuntimeArray %26
%28 = OpTypeStruct %27
%29 = OpTypePointer PhysicalStorageBuffer %28
%31 = OpTypePointer PhysicalStorageBuffer %26
%33 = OpConstant %5 0
%36 = OpConstant %23 1
%42 = OpTypeRuntimeArray %26
%43 = OpTypeStruct %42
%44 = OpTypePointer PhysicalStorageBuffer %43
%48 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
%47 = OpUndef %26
OpBranch %50
%50 = OpLabel
%16 = OpAccessChain %15 %9 %17
%18 = OpLoad %6 %16
%19 = OpAccessChain %15 %9 %20
%21 = OpLoad %6 %19
%22 = OpLoad %10 %12
%24 = OpSConvert %23 %22
%25 = OpSConvert %5 %24
%30 = OpBitcast %29 %21
%32 = OpInBoundsAccessChain %31 %30 %33 %25
%34 = OpLoad %26 %32 Aligned 4
%35 = OpIAdd %23 %24 %36
%37 = OpSConvert %5 %35
%38 = OpBitcast %29 %21
%39 = OpInBoundsAccessChain %31 %38 %33 %37
%40 = OpLoad %26 %39 Aligned 4
%41 = OpFAdd %26 %40 %34
%45 = OpBitcast %44 %18
%46 = OpInBoundsAccessChain %31 %45 %33 %25
OpStore %46 %41 Aligned 4
%49 = OpBitcast %10 %48
OpStore %14 %49
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/subobject-parsing.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

void main()
{
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 7
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main"
OpName %3 "main"
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %5
%5 = OpLabel
OpReturn
OpFunctionEnd
#endif
#if 0
==== RDAT ====
ShaderConfig sconf = { maxPayloadSize = 80, maxAttributeSize = 60 };
RaytracingPipelineConfig1 pconf0 = { maxRecursion = 4, flags = 0 };
RaytracingPipelineConfig1 pconf1 = { maxRecursion = 5, flags = 256 };
ShaderConfig soc = { flags = 1 };
TriangleHitGroup tri_hg = { ahit = "a", chit = "b", intersection = "" };
TriangleHitGroup tri_hg2 = { ahit = "", chit = "b", intersection = "" };
ProceduralHitGroup aabb_hg = { ahit = "c", chit = "d", intersection = "e" };
SubobjectToExportsAssociation assoc1 = { A, { foo, bar, meow, nyaa, frog } };
SubobjectToExportsAssociation assoc2 = { B, { meep, frog } };
============
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit-sparse.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_ARB_sparse_texture2 : require
#extension GL_EXT_samplerless_texture_functions : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _74
{
    float16_t _m0;
    float16_t _m1;
    float16_t _m2;
    float16_t _m3;
    uint _m4;
};

struct SparseTexel_1
{
    uint _m0;
    ivec4 _m1;
};

struct _98
{
    uint16_t _m0;
    uint16_t _m1;
    uint16_t _m2;
    uint16_t _m3;
    uint _m4;
};

struct SparseTexel_2
{
    uint _m0;
    uvec4 _m1;
};

struct SparseTexel_3
{
    uint _m0;
    float _m1;
};

struct _204
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

layout(set = 0, binding = 0) uniform mediump texture2D _8;
layout(set = 0, binding = 1) uniform mediump itexture2D _12;
layout(set = 0, binding = 2) uniform mediump utexture2D _16;
layout(set = 0, binding = 3) uniform mediump samplerBuffer _19;
layout(set = 0, binding = 4) uniform mediump isamplerBuffer _22;
layout(set = 0, binding = 5) uniform mediump usamplerBuffer _25;
layout(set = 0, binding = 0) uniform sampler _28;
layout(set = 0, binding = 1) uniform samplerShadow _29;

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;
layout(location = 3) out uint SV_Target_3;

void main()
{
    uint _422;
    vec4 _423;
    _422 = sparseTextureARB(sampler2D(_8, _28), vec2(UV.x, UV.y), _423);
    SparseTexel _63 = SparseTexel(_422, _423);
    f16vec4 _69 = f16vec4(_63._m1);
    _74 _75 = _74(_69.x, _69.y, _69.z, _69.w, _63._m0);
    uint _424;
    ivec4 _425;
    _424 = sparseTexelFetchARB(_12, ivec2(uvec2(1u, 2u)), int(3u), _425);
    SparseTexel_1 _86 = SparseTexel_1(_424, _425);
    u16vec4 _93 = u16vec4(_86._m1);
    _98 _99 = _98(_93.x, _93.y, _93.z, _93.w, _86._m0);
    uint _426;
    uvec4 _427;
    _426 = sparseTexelFetchARB(_16, ivec2(uvec2(4u, 5u)), int(6u), _427);
    SparseTexel_2 _111 = SparseTexel_2(_426, _427);
    u16vec4 _115 = u16vec4(_111._m1);
    _98 _120 = _98(_115.x, _115.y, _115.z, _115.w, _111._m0);
    uint _428;
    vec4 _429;
    _428 = sparseTextureGatherARB(sampler2D(_8, _28), vec2(UV.x, UV.y), _429);
    SparseTexel _129 = SparseTexel(_428, _429);
    f16vec4 _132 = f16vec4(_129._m1);
    _74 _137 = _74(_132.x, _132.y, _132.z, _132.w, _129._m0);
    uint _430;
    ivec4 _431;
    _430 = sparseTextureGatherARB(isampler2D(_12, _28), vec2(UV.x, UV.y), _431, int(1u));
    SparseTexel_1 _152 = SparseTexel_1(_430, _431);
    u16vec4 _155 = u16vec4(_152._m1);
    _98 _160 = _98(_155.x, _155.y, _155.z, _155.w, _152._m0);
    uint _432;
    uvec4 _433;
    _432 = sparseTextureGatherARB(usampler2D(_16, _28), vec2(UV.x, UV.y), _433, int(2u));
    SparseTexel_2 _175 = SparseTexel_2(_432, _433);
    u16vec4 _178 = u16vec4(_175._m1);
    _98 _183 = _98(_178.x, _178.y, _178.z, _178.w, _175._m0);
    uint _434;
    float _435;
    _434 = sparseTextureARB(sampler2DShadow(_8, _29), vec3(vec2(UV.x, UV.y), 0.5), _435);
    SparseTexel_3 _200 = SparseTexel_3(_434, _435);
    mediump float _203 = _200._m1;
    _204 _205 = _204(_203, _203, _203, _203, _200._m0);
    mediump float _206 = _205._m0;
    float _213 = _206 + float(_137._m0 + _75._m0);
    float _214 = _206 + float(_137._m1 + _75._m1);
    float _215 = _206 + float(_137._m2 + _75._m2);
    float _216 = _206 + float(_137._m3 + _75._m3);
    uint _436;
    float _437;
    _436 = sparseTextureLodARB(sampler2DShadow(_8, _29), vec3(vec2(UV.x, UV.y), 0.5), 0.0, _437);
    SparseTexel_3 _222 = SparseTexel_3(_436, _437);
    mediump float _225 = _222._m1;
    _204 _226 = _204(_225, _225, _225, _225, _222._m0);
    mediump float _227 = _226._m0;
    vec2 _243 = vec2(UV.x, UV.y);
    uint _438;
    vec4 _439;
    _438 = sparseTextureGatherARB(sampler2DShadow(_8, _29), _243, 0.5, _439);
    SparseTexel _244 = SparseTexel(_438, _439);
    f16vec4 _247 = f16vec4(_244._m1);
    _74 _252 = _74(_247.x, _247.y, _247.z, _247.w, _244._m0);
    uint _440;
    vec4 _441;
    _440 = sparseTextureLodARB(sampler2D(_8, _28), vec2(UV.x, UV.y), 0.0, _441);
    SparseTexel _264 = SparseTexel(_440, _441);
    f16vec4 _268 = f16vec4(_264._m1);
    _74 _273 = _74(_268.x, _268.y, _268.z, _268.w, _264._m0);
    uint _442;
    vec4 _443;
    _442 = sparseTextureGradARB(sampler2D(_8, _28), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5), _443);
    SparseTexel _288 = SparseTexel(_442, _443);
    f16vec4 _294 = f16vec4(_288._m1);
    _74 _299 = _74(_294.x, _294.y, _294.z, _294.w, _288._m0);
    uint _444;
    vec4 _445;
    _444 = sparseTextureARB(sampler2D(_8, _28), vec2(UV.x, UV.y), _445, 0.5);
    SparseTexel _311 = SparseTexel(_444, _445);
    f16vec4 _315 = f16vec4(_311._m1);
    _74 _320 = _74(_315.x, _315.y, _315.z, _315.w, _311._m0);
    uint _332 = uint(int(UV.x));
    uint _446;
    vec4 _447;
    _446 = sparseTexelFetchARB(_19, int(_332), _447);
    SparseTexel _333 = SparseTexel(_446, _447);
    f16vec4 _336 = f16vec4(_333._m1);
    _74 _341 = _74(_336.x, _336.y, _336.z, _336.w, _333._m0);
    uint _448;
    ivec4 _449;
    _448 = sparseTexelFetchARB(_22, int(_332), _449);
    SparseTexel_1 _353 = SparseTexel_1(_448, _449);
    u16vec4 _356 = u16vec4(_353._m1);
    _98 _361 = _98(_356.x, _356.y, _356.z, _356.w, _353._m0);
    uint _450;
    uvec4 _451;
    _450 = sparseTexelFetchARB(_25, int(_332), _451);
    SparseTexel_2 _373 = SparseTexel_2(_450, _451);
    u16vec4 _376 = u16vec4(_373._m1);
    _98 _381 = _98(_376.x, _376.y, _376.z, _376.w, _373._m0);
    SV_Target.x = float(((((float16_t(unpackHalf2x16(packHalf2x16(vec2(_213))).x + _227) + _252._m0) + _273._m0) + _299._m0) + _320._m0) + _341._m0);
    SV_Target.y = float(((((float16_t(unpackHalf2x16(packHalf2x16(vec2(_214))).x + _227) + _252._m1) + _273._m1) + _299._m1) + _320._m1) + _341._m1);
    SV_Target.z = float(((((float16_t(unpackHalf2x16(packHalf2x16(vec2(_215))).x + _227) + _252._m2) + _273._m2) + _299._m2) + _320._m2) + _341._m2);
    SV_Target.w = float(((((float16_t(unpackHalf2x16(packHalf2x16(vec2(_216))).x + _227) + _252._m3) + _273._m3) + _299._m3) + _320._m3) + _341._m3);
    SV_Target_1.x = int(int16_t((_160._m0 + _99._m0) + _361._m0));
    SV_Target_1.y = int(int16_t((_160._m1 + _99._m1) + _361._m1));
    SV_Target_1.z = int(int16_t((_160._m2 + _99._m2) + _361._m2));
    SV_Target_1.w = int(int16_t((_160._m3 + _99._m3) + _361._m3));
    SV_Target_2.x = uint((_183._m0 + _120._m0) + _381._m0);
    SV_Target_2.y = uint((_183._m1 + _120._m1) + _381._m1);
    SV_Target_2.z = uint((_183._m2 + _120._m2) + _381._m2);
    SV_Target_2.w = uint((_183._m3 + _120._m3) + _381._m3);
    SV_Target_3 = uint((((((((((((((sparseTexelsResidentARB(int(_75._m4)) || sparseTexelsResidentARB(int(_99._m4))) || sparseTexelsResidentARB(int(_120._m4))) || sparseTexelsResidentARB(int(_137._m4))) || sparseTexelsResidentARB(int(_160._m4))) || sparseTexelsResidentARB(int(_183._m4))) || sparseTexelsResidentARB(int(_205._m4))) || sparseTexelsResidentARB(int(_226._m4))) || sparseTexelsResidentARB(int(_252._m4))) || sparseTexelsResidentARB(int(_273._m4))) || sparseTexelsResidentARB(int(_299._m4))) || sparseTexelsResidentARB(int(_320._m4))) || sparseTexelsResidentARB(int(_341._m4))) || sparseTexelsResidentARB(int(_361._m4))) || sparseTexelsResidentARB(int(_381._m4)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 422
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability SparseResidency
OpCapability SampledBuffer
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %32 %35 %38 %41 %43
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %32 "UV"
OpName %35 "SV_Target"
OpName %38 "SV_Target_1"
OpName %41 "SV_Target_2"
OpName %43 "SV_Target_3"
OpName %62 "SparseTexel"
OpName %74 ""
OpName %85 "SparseTexel"
OpName %98 ""
OpName %110 "SparseTexel"
OpName %199 "SparseTexel"
OpName %204 ""
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 RelaxedPrecision
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 RelaxedPrecision
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 2
OpDecorate %19 RelaxedPrecision
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 3
OpDecorate %22 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 4
OpDecorate %25 RelaxedPrecision
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 5
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 1
OpDecorate %32 Location 0
OpDecorate %35 RelaxedPrecision
OpDecorate %35 Location 0
OpDecorate %38 RelaxedPrecision
OpDecorate %38 Location 1
OpDecorate %41 RelaxedPrecision
OpDecorate %41 Location 2
OpDecorate %43 Location 3
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 1
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpTypeImage %13 2D 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeSampler
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpVariable %27 UniformConstant
%30 = OpTypeVector %5 2
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%33 = OpTypeVector %5 4
%34 = OpTypePointer Output %33
%35 = OpVariable %34 Output
%36 = OpTypeVector %9 4
%37 = OpTypePointer Output %36
%38 = OpVariable %37 Output
%39 = OpTypeVector %13 4
%40 = OpTypePointer Output %39
%41 = OpVariable %40 Output
%42 = OpTypePointer Output %13
%43 = OpVariable %42 Output
%52 = OpTypePointer Input %5
%54 = OpConstant %13 0
%57 = OpConstant %13 1
%59 = OpTypeSampledImage %6
%61 = OpConstant %5 0
%62 = OpTypeStruct %13 %33
%67 = OpTypeFloat 16
%68 = OpTypeVector %67 4
%74 = OpTypeStruct %67 %67 %67 %67 %13
%81 = OpTypeBool
%83 = OpConstant %13 3
%84 = OpConstant %13 2
%85 = OpTypeStruct %13 %36
%87 = OpTypeVector %13 2
%91 = OpTypeInt 16 0
%92 = OpTypeVector %91 4
%98 = OpTypeStruct %91 %91 %91 %91 %13
%107 = OpConstant %13 6
%108 = OpConstant %13 4
%109 = OpConstant %13 5
%110 = OpTypeStruct %13 %39
%149 = OpTypeSampledImage %10
%172 = OpTypeSampledImage %14
%195 = OpTypeImage %5 2D 1 0 0 1 Unknown
%196 = OpTypeSampledImage %195
%198 = OpConstant %5 0.5
%199 = OpTypeStruct %13 %5
%204 = OpTypeStruct %5 %5 %5 %5 %13
%285 = OpConstant %5 0.200000003
%286 = OpConstant %5 0.300000012
%287 = OpConstant %5 0.400000006
%394 = OpTypePointer Output %5
%403 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %420
%420 = OpLabel
%44 = OpLoad %23 %25
%45 = OpLoad %20 %22
%46 = OpLoad %17 %19
%47 = OpLoad %14 %16
%48 = OpLoad %10 %12
%49 = OpLoad %6 %8
%50 = OpLoad %26 %29
%51 = OpLoad %26 %28
%53 = OpAccessChain %52 %32 %54
%55 = OpLoad %5 %53
%56 = OpAccessChain %52 %32 %57
%58 = OpLoad %5 %56
%60 = OpSampledImage %59 %49 %51
%64 = OpCompositeConstruct %30 %55 %58
%63 = OpImageSparseSampleImplicitLod %62 %60 %64 None
%65 = OpCompositeExtract %13 %63 0
%66 = OpCompositeExtract %33 %63 1
%69 = OpFConvert %68 %66
%70 = OpCompositeExtract %67 %69 0
%71 = OpCompositeExtract %67 %69 1
%72 = OpCompositeExtract %67 %69 2
%73 = OpCompositeExtract %67 %69 3
%75 = OpCompositeConstruct %74 %70 %71 %72 %73 %65
%76 = OpCompositeExtract %67 %75 0
%77 = OpCompositeExtract %67 %75 1
%78 = OpCompositeExtract %67 %75 2
%79 = OpCompositeExtract %67 %75 3
%80 = OpCompositeExtract %13 %75 4
%82 = OpImageSparseTexelsResident %81 %80
%88 = OpCompositeConstruct %87 %57 %84
%86 = OpImageSparseFetch %85 %48 %88 Lod %83
%89 = OpCompositeExtract %13 %86 0
%90 = OpCompositeExtract %36 %86 1
%93 = OpSConvert %92 %90
%94 = OpCompositeExtract %91 %93 0
%95 = OpCompositeExtract %91 %93 1
%96 = OpCompositeExtract %91 %93 2
%97 = OpCompositeExtract %91 %93 3
%99 = OpCompositeConstruct %98 %94 %95 %96 %97 %89
%100 = OpCompositeExtract %91 %99 0
%101 = OpCompositeExtract %91 %99 1
%102 = OpCompositeExtract %91 %99 2
%103 = OpCompositeExtract %91 %99 3
%104 = OpCompositeExtract %13 %99 4
%105 = OpImageSparseTexelsResident %81 %104
%106 = OpLogicalOr %81 %82 %105
%112 = OpCompositeConstruct %87 %108 %109
%111 = OpImageSparseFetch %110 %47 %112 Lod %107
%113 = OpCompositeExtract %13 %111 0
%114 = OpCompositeExtract %39 %111 1
%115 = OpUConvert %92 %114
%116 = OpCompositeExtract %91 %115 0
%117 = OpCompositeExtract %91 %115 1
%118 = OpCompositeExtract %91 %115 2
%119 = OpCompositeExtract %91 %115 3
%120 = OpCompositeConstruct %98 %116 %117 %118 %119 %113
%121 = OpCompositeExtract %91 %120 0
%122 = OpCompositeExtract %91 %120 1
%123 = OpCompositeExtract %91 %120 2
%124 = OpCompositeExtract %91 %120 3
%125 = OpCompositeExtract %13 %120 4
%126 = OpImageSparseTexelsResident %81 %125
%127 = OpLogicalOr %81 %106 %126
%128 = OpCompositeConstruct %30 %55 %58
%129 = OpImageSparseGather %62 %60 %128 %54
%130 = OpCompositeExtract %13 %129 0
%131 = OpCompositeExtract %33 %129 1
%132 = OpFConvert %68 %131
%133 = OpCompositeExtract %67 %132 0
%134 = OpCompositeExtract %67 %132 1
%135 = OpCompositeExtract %67 %132 2
%136 = OpCompositeExtract %67 %132 3
%137 = OpCompositeConstruct %74 %133 %134 %135 %136 %130
%138 = OpCompositeExtract %67 %137 0
%139 = OpCompositeExtract %67 %137 1
%140 = OpCompositeExtract %67 %137 2
%141 = OpCompositeExtract %67 %137 3
%142 = OpCompositeExtract %13 %137 4
%143 = OpImageSparseTexelsResident %81 %142
%144 = OpFAdd %67 %138 %76
%145 = OpFAdd %67 %139 %77
%146 = OpFAdd %67 %140 %78
%147 = OpFAdd %67 %141 %79
%148 = OpLogicalOr %81 %127 %143
%150 = OpSampledImage %149 %48 %51
%151 = OpCompositeConstruct %30 %55 %58
%152 = OpImageSparseGather %85 %150 %151 %57
%153 = OpCompositeExtract %13 %152 0
%154 = OpCompositeExtract %36 %152 1
%155 = OpSConvert %92 %154
%156 = OpCompositeExtract %91 %155 0
%157 = OpCompositeExtract %91 %155 1
%158 = OpCompositeExtract %91 %155 2
%159 = OpCompositeExtract %91 %155 3
%160 = OpCompositeConstruct %98 %156 %157 %158 %159 %153
%161 = OpCompositeExtract %91 %160 0
%162 = OpCompositeExtract %91 %160 1
%163 = OpCompositeExtract %91 %160 2
%164 = OpCompositeExtract %91 %160 3
%165 = OpCompositeExtract %13 %160 4
%166 = OpImageSparseTexelsResident %81 %165
%167 = OpIAdd %91 %161 %100
%168 = OpIAdd %91 %162 %101
%169 = OpIAdd %91 %163 %102
%170 = OpIAdd %91 %164 %103
%171 = OpLogicalOr %81 %148 %166
%173 = OpSampledImage %172 %47 %51
%174 = OpCompositeConstruct %30 %55 %58
%175 = OpImageSparseGather %110 %173 %174 %84
%176 = OpCompositeExtract %13 %175 0
%177 = OpCompositeExtract %39 %175 1
%178 = OpUConvert %92 %177
%179 = OpCompositeExtract %91 %178 0
%180 = OpCompositeExtract %91 %178 1
%181 = OpCompositeExtract %91 %178 2
%182 = OpCompositeExtract %91 %178 3
%183 = OpCompositeConstruct %98 %179 %180 %181 %182 %176
%184 = OpCompositeExtract %91 %183 0
%185 = OpCompositeExtract %91 %183 1
%186 = OpCompositeExtract %91 %183 2
%187 = OpCompositeExtract %91 %183 3
%188 = OpCompositeExtract %13 %183 4
%189 = OpImageSparseTexelsResident %81 %188
%190 = OpIAdd %91 %184 %121
%191 = OpIAdd %91 %185 %122
%192 = OpIAdd %91 %186 %123
%193 = OpIAdd %91 %187 %124
%194 = OpLogicalOr %81 %171 %189
%197 = OpSampledImage %196 %49 %50
%201 = OpCompositeConstruct %30 %55 %58
%200 = OpImageSparseSampleDrefImplicitLod %199 %197 %201 %198 None
%202 = OpCompositeExtract %13 %200 0
%203 = OpCompositeExtract %5 %200 1
%205 = OpCompositeConstruct %204 %203 %203 %203 %203 %202
%206 = OpCompositeExtract %5 %205 0
%207 = OpCompositeExtract %13 %205 4
%208 = OpImageSparseTexelsResident %81 %207
%209 = OpFConvert %5 %144
%210 = OpFConvert %5 %145
%211 = OpFConvert %5 %146
%212 = OpFConvert %5 %147
%213 = OpFAdd %5 %206 %209
%214 = OpFAdd %5 %206 %210
%215 = OpFAdd %5 %206 %211
%216 = OpFAdd %5 %206 %212
%217 = OpFConvert %67 %213
%218 = OpFConvert %67 %214
%219 = OpFConvert %67 %215
%220 = OpFConvert %67 %216
%221 = OpLogicalOr %81 %194 %208
%223 = OpCompositeConstruct %30 %55 %58
%222 = OpImageSparseSampleDrefExplicitLod %199 %197 %223 %198 Lod %61
%224 = OpCompositeExtract %13 %222 0
%225 = OpCompositeExtract %5 %222 1
%226 = OpCompositeConstruct %204 %225 %225 %225 %225 %224
%227 = OpCompositeExtract %5 %226 0
%228 = OpCompositeExtract %13 %226 4
%229 = OpImageSparseTexelsResident %81 %228
%230 = OpQuantizeToF16 %5 %213
%231 = OpQuantizeToF16 %5 %214
%232 = OpQuantizeToF16 %5 %215
%233 = OpQuantizeToF16 %5 %216
%234 = OpFAdd %5 %230 %227
%235 = OpFAdd %5 %231 %227
%236 = OpFAdd %5 %232 %227
%237 = OpFAdd %5 %233 %227
%238 = OpFConvert %67 %234
%239 = OpFConvert %67 %235
%240 = OpFConvert %67 %236
%241 = OpFConvert %67 %237
%242 = OpLogicalOr %81 %221 %229
%243 = OpCompositeConstruct %30 %55 %58
%244 = OpImageSparseDrefGather %62 %197 %243 %198
%245 = OpCompositeExtract %13 %244 0
%246 = OpCompositeExtract %33 %244 1
%247 = OpFConvert %68 %246
%248 = OpCompositeExtract %67 %247 0
%249 = OpCompositeExtract %67 %247 1
%250 = OpCompositeExtract %67 %247 2
%251 = OpCompositeExtract %67 %247 3
%252 = OpCompositeConstruct %74 %248 %249 %250 %251 %245
%253 = OpCompositeExtract %67 %252 0
%254 = OpCompositeExtract %67 %252 1
%255 = OpCompositeExtract %67 %252 2
%256 = OpCompositeExtract %67 %252 3
%257 = OpCompositeExtract %13 %252 4
%258 = OpImageSparseTexelsResident %81 %257
%259 = OpFAdd %67 %238 %253
%260 = OpFAdd %67 %239 %254
%261 = OpFAdd %67 %240 %255
%262 = OpFAdd %67 %241 %256
%263 = OpLogicalOr %81 %242 %258
%265 = OpCompositeConstruct %30 %55 %58
%264 = OpImageSparseSampleExplicitLod %62 %60 %265 Lod %61
%266 = OpCompositeExtract %13 %264 0
%267 = OpCompositeExtract %33 %264 1
%268 = OpFConvert %68 %267
%269 = OpCompositeExtract %67 %268 0
%270 = OpCompositeExtract %67 %268 1
%271 = OpCompositeExtract %67 %268 2
%272 = OpCompositeExtract %67 %268 3
%273 = OpCompositeConstruct %74 %269 %270 %271 %272 %266
%274 = OpCompositeExtract %67 %273 0
%275 = OpCompositeExtract %67 %273 1
%276 = OpCompositeExtract %67 %273 2
%277 = OpCompositeExtract %67 %273 3
%278 = OpCompositeExtract %13 %273 4
%279 = OpImageSparseTexelsResident %81 %278
%280 = OpFAdd %67 %259 %274
%281 = OpFAdd %67 %260 %275
%282 = OpFAdd %67 %261 %276
%283 = OpFAdd %67 %262 %277
%284 = OpLogicalOr %81 %263 %279
%289 = OpCompositeConstruct %30 %55 %58
%290 = OpCompositeConstruct %30 %285 %286
%291 = OpCompositeConstruct %30 %287 %198
%288 = OpImageSparseSampleExplicitLod %62 %60 %289 Grad %290 %291
%292 = OpCompositeExtract %13 %288 0
%293 = OpCompositeExtract %33 %288 1
%294 = OpFConvert %68 %293
%295 = OpCompositeExtract %67 %294 0
%296 = OpCompositeExtract %67 %294 1
%297 = OpCompositeExtract %67 %294 2
%298 = OpCompositeExtract %67 %294 3
%299 = OpCompositeConstruct %74 %295 %296 %297 %298 %292
%300 = OpCompositeExtract %67 %299 0
%301 = OpCompositeExtract %67 %299 1
%302 = OpCompositeExtract %67 %299 2
%303 = OpCompositeExtract %67 %299 3
%304 = OpCompositeExtract %13 %299 4
%305 = OpImageSparseTexelsResident %81 %304
%306 = OpFAdd %67 %280 %300
%307 = OpFAdd %67 %281 %301
%308 = OpFAdd %67 %282 %302
%309 = OpFAdd %67 %283 %303
%310 = OpLogicalOr %81 %284 %305
%312 = OpCompositeConstruct %30 %55 %58
%311 = OpImageSparseSampleImplicitLod %62 %60 %312 Bias %198
%313 = OpCompositeExtract %13 %311 0
%314 = OpCompositeExtract %33 %311 1
%315 = OpFConvert %68 %314
%316 = OpCompositeExtract %67 %315 0
%317 = OpCompositeExtract %67 %315 1
%318 = OpCompositeExtract %67 %315 2
%319 = OpCompositeExtract %67 %315 3
%320 = OpCompositeConstruct %74 %316 %317 %318 %319 %313
%321 = OpCompositeExtract %67 %320 0
%322 = OpCompositeExtract %67 %320 1
%323 = OpCompositeExtract %67 %320 2
%324 = OpCompositeExtract %67 %320 3
%325 = OpCompositeExtract %13 %320 4
%326 = OpImageSparseTexelsResident %81 %325
%327 = OpFAdd %67 %306 %321
%328 = OpFAdd %67 %307 %322
%329 = OpFAdd %67 %308 %323
%330 = OpFAdd %67 %309 %324
%331 = OpLogicalOr %81 %310 %326
%332 = OpConvertFToS %13 %55
%333 = OpImageSparseFetch %62 %46 %332
%334 = OpCompositeExtract %13 %333 0
%335 = OpCompositeExtract %33 %333 1
%336 = OpFConvert %68 %335
%337 = OpCompositeExtract %67 %336 0
%338 = OpCompositeExtract %67 %336 1
%339 = OpCompositeExtract %67 %336 2
%340 = OpCompositeExtract %67 %336 3
%341 = OpCompositeConstruct %74 %337 %338 %339 %340 %334
%342 = OpCompositeExtract %67 %341 0
%343 = OpCompositeExtract %67 %341 1
%344 = OpCompositeExtract %67 %341 2
%345 = OpCompositeExtract %67 %341 3
%346 = OpCompositeExtract %13 %341 4
%347 = OpImageSparseTexelsResident %81 %346
%348 = OpFAdd %67 %327 %342
%349 = OpFAdd %67 %328 %343
%350 = OpFAdd %67 %329 %344
%351 = OpFAdd %67 %330 %345
%352 = OpLogicalOr %81 %331 %347
%353 = OpImageSparseFetch %85 %45 %332
%354 = OpCompositeExtract %13 %353 0
%355 = OpCompositeExtract %36 %353 1
%356 = OpSConvert %92 %355
%357 = OpCompositeExtract %91 %356 0
%358 = OpCompositeExtract %91 %356 1
%359 = OpCompositeExtract %91 %356 2
%360 = OpCompositeExtract %91 %356 3
%361 = OpCompositeConstruct %98 %357 %358 %359 %360 %354
%362 = OpCompositeExtract %91 %361 0
%363 = OpCompositeExtract %91 %361 1
%364 = OpCompositeExtract %91 %361 2
%365 = OpCompositeExtract %91 %361 3
%366 = OpCompositeExtract %13 %361 4
%367 = OpImageSparseTexelsResident %81 %366
%368 = OpIAdd %91 %167 %362
%369 = OpIAdd %91 %168 %363
%370 = OpIAdd %91 %169 %364
%371 = OpIAdd %91 %170 %365
%372 = OpLogicalOr %81 %352 %367
%373 = OpImageSparseFetch %110 %44 %332
%374 = OpCompositeExtract %13 %373 0
%375 = OpCompositeExtract %39 %373 1
%376 = OpUConvert %92 %375
%377 = OpCompositeExtract %91 %376 0
%378 = OpCompositeExtract %91 %376 1
%379 = OpCompositeExtract %91 %376 2
%380 = OpCompositeExtract %91 %376 3
%381 = OpCompositeConstruct %98 %377 %378 %379 %380 %374
%382 = OpCompositeExtract %91 %381 0
%383 = OpCompositeExtract %91 %381 1
%384 = OpCompositeExtract %91 %381 2
%385 = OpCompositeExtract %91 %381 3
%386 = OpCompositeExtract %13 %381 4
%387 = OpImageSparseTexelsResident %81 %386
%388 = OpIAdd %91 %190 %382
%389 = OpIAdd %91 %191 %383
%390 = OpIAdd %91 %192 %384
%391 = OpIAdd %91 %193 %385
%392 = OpLogicalOr %81 %372 %387
%393 = OpSelect %13 %392 %57 %54
%395 = OpAccessChain %394 %35 %54
%396 = OpFConvert %5 %348
OpStore %395 %396
%397 = OpAccessChain %394 %35 %57
%398 = OpFConvert %5 %349
OpStore %397 %398
%399 = OpAccessChain %394 %35 %84
%400 = OpFConvert %5 %350
OpStore %399 %400
%401 = OpAccessChain %394 %35 %83
%402 = OpFConvert %5 %351
OpStore %401 %402
%404 = OpAccessChain %403 %38 %54
%405 = OpSConvert %9 %368
OpStore %404 %405
%406 = OpAccessChain %403 %38 %57
%407 = OpSConvert %9 %369
OpStore %406 %407
%408 = OpAccessChain %403 %38 %84
%409 = OpSConvert %9 %370
OpStore %408 %409
%410 = OpAccessChain %403 %38 %83
%411 = OpSConvert %9 %371
OpStore %410 %411
%412 = OpAccessChain %42 %41 %54
%413 = OpUConvert %13 %388
OpStore %412 %413
%414 = OpAccessChain %42 %41 %57
%415 = OpUConvert %13 %389
OpStore %414 %415
%416 = OpAccessChain %42 %41 %84
%417 = OpUConvert %13 %390
OpStore %416 %417
%418 = OpAccessChain %42 %41 %83
%419 = OpUConvert %13 %391
OpStore %418 %419
OpStore %43 %393
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit.bindless.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform mediump texture2D _13[];
layout(set = 0, binding = 0) uniform mediump itexture2D _18[];
layout(set = 0, binding = 0) uniform mediump utexture2D _22[];
layout(set = 1, binding = 0) uniform mediump samplerBuffer _26[];
layout(set = 1, binding = 0) uniform mediump isamplerBuffer _30[];
layout(set = 1, binding = 0) uniform mediump usamplerBuffer _34[];
layout(set = 3, binding = 0) uniform writeonly mediump image2D _38[];
layout(set = 3, binding = 0) uniform writeonly mediump iimage2D _42[];
layout(set = 3, binding = 0) uniform writeonly mediump uimage2D _46[];
layout(set = 4, binding = 0) uniform writeonly mediump imageBuffer _50[];
layout(set = 4, binding = 0) uniform writeonly mediump iimageBuffer _54[];
layout(set = 4, binding = 0) uniform writeonly mediump uimageBuffer _58[];
layout(set = 2, binding = 0) uniform samplerShadow _62[];

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;

void main()
{
    uint _139 = registers._m0 + 2u;
    uint _145 = registers._m0 + 1u;
    float16_t _168 = float16_t(UV.x);
    float16_t _169 = float16_t(UV.y);
    uint _170 = uint(int(UV.x));
    uint _171 = uint(int(UV.y));
    imageStore(_38[registers._m3], ivec2(uvec2(_170, _171)), vec4(f16vec4(_168, _169, _168, _169)));
    uint16_t _178 = uint16_t(int16_t(UV.x));
    uint16_t _179 = uint16_t(int16_t(UV.y));
    imageStore(_42[registers._m3 + 1u], ivec2(uvec2(_170, _171)), ivec4(i16vec4(u16vec4(_178, _179, _178, _179))));
    uint16_t _184 = uint16_t(UV.x);
    uint16_t _185 = uint16_t(UV.y);
    imageStore(_46[registers._m3 + 2u], ivec2(uvec2(_170, _171)), uvec4(u16vec4(_184, _185, _184, _185)));
    imageStore(_50[registers._m4 + 3u], int(_170), vec4(f16vec4(float16_t(8.0))));
    imageStore(_54[registers._m4 + 4u], int(_170), ivec4(i16vec4(u16vec4(65516us))));
    imageStore(_58[registers._m4 + 5u], int(_170), uvec4(u16vec4(80us)));
    f16vec4 _203 = f16vec4(texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)));
    u16vec4 _210 = u16vec4(texelFetch(_18[_145], ivec2(uvec2(1u, 2u)), int(3u)));
    u16vec4 _218 = u16vec4(texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u)));
    f16vec4 _225 = f16vec4(textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)));
    u16vec4 _238 = u16vec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u)));
    u16vec4 _251 = u16vec4(textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u)));
    mediump vec4 _266 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5)));
    mediump float _267 = _266.x;
    float _272 = _267 + float(_225.x + _203.x);
    float _273 = _267 + float(_225.y + _203.y);
    float _274 = _267 + float(_225.z + _203.z);
    float _275 = _267 + float(_225.w + _203.w);
    mediump vec4 _282 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0));
    mediump float _283 = _282.x;
    vec2 _296 = vec2(UV.x, UV.y);
    f16vec4 _298 = f16vec4(textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _296, 0.5));
    f16vec4 _305 = f16vec4(textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0));
    f16vec4 _317 = f16vec4(textureGrad(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)));
    f16vec4 _324 = f16vec4(texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.5));
    f16vec4 _330 = f16vec4(texelFetch(_26[registers._m1 + 3u], int(_170)));
    u16vec4 _356 = u16vec4(texelFetch(_30[registers._m1 + 4u], int(_170)));
    u16vec4 _366 = u16vec4(texelFetch(_34[registers._m1 + 5u], int(_170)));
    SV_Target.x = float(((((_305.x + _298.x) + _317.x) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_272))).x + _283)) + _324.x) + _330.x);
    SV_Target.y = float(((((_305.y + _298.y) + _317.y) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_273))).x + _283)) + _324.y) + _330.y);
    SV_Target.z = float(((((_305.z + _298.z) + _317.z) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_274))).x + _283)) + _324.z) + _330.z);
    SV_Target.w = float(((((_305.w + _298.w) + _317.w) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_275))).x + _283)) + _324.w) + _330.w);
    SV_Target_1.x = int(int16_t((_238.x + _210.x) + _356.x));
    SV_Target_1.y = int(int16_t((_238.y + _210.y) + _356.y));
    SV_Target_1.z = int(int16_t((_238.z + _210.z) + _356.z));
    SV_Target_1.w = int(int16_t((_238.w + _210.w) + _356.w));
    SV_Target_2.x = uint((_251.x + _218.x) + _366.x);
    SV_Target_2.y = uint((_251.y + _218.y) + _366.y);
    SV_Target_2.z = uint((_251.z + _218.z) + _366.z);
    SV_Target_2.w = uint((_251.w + _218.w) + _366.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 404
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %65 %68 %71 %74
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %65 "UV"
OpName %68 "SV_Target"
OpName %71 "SV_Target_1"
OpName %74 "SV_Target_2"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 RelaxedPrecision
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %22 RelaxedPrecision
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 0
OpDecorate %26 RelaxedPrecision
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 0
OpDecorate %30 RelaxedPrecision
OpDecorate %34 DescriptorSet 1
OpDecorate %34 Binding 0
OpDecorate %34 RelaxedPrecision
OpDecorate %38 DescriptorSet 3
OpDecorate %38 Binding 0
OpDecorate %38 RelaxedPrecision
OpDecorate %38 NonReadable
OpDecorate %42 DescriptorSet 3
OpDecorate %42 Binding 0
OpDecorate %42 RelaxedPrecision
OpDecorate %42 NonReadable
OpDecorate %46 DescriptorSet 3
OpDecorate %46 Binding 0
OpDecorate %46 RelaxedPrecision
OpDecorate %46 NonReadable
OpDecorate %50 DescriptorSet 4
OpDecorate %50 Binding 0
OpDecorate %50 RelaxedPrecision
OpDecorate %50 NonReadable
OpDecorate %54 DescriptorSet 4
OpDecorate %54 Binding 0
OpDecorate %54 RelaxedPrecision
OpDecorate %54 NonReadable
OpDecorate %58 DescriptorSet 4
OpDecorate %58 Binding 0
OpDecorate %58 RelaxedPrecision
OpDecorate %58 NonReadable
OpDecorate %62 DescriptorSet 2
OpDecorate %62 Binding 0
OpDecorate %65 Location 0
OpDecorate %68 RelaxedPrecision
OpDecorate %68 Location 0
OpDecorate %71 RelaxedPrecision
OpDecorate %71 Location 1
OpDecorate %74 RelaxedPrecision
OpDecorate %74 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeInt 32 1
%15 = OpTypeImage %14 2D 0 0 0 1 Unknown
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeImage %5 2D 0 0 0 1 Unknown
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %14 Buffer 0 0 0 1 Unknown
%28 = OpTypeRuntimeArray %27
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypeImage %9 2D 0 0 0 2 Unknown
%36 = OpTypeRuntimeArray %35
%37 = OpTypePointer UniformConstant %36
%38 = OpVariable %37 UniformConstant
%39 = OpTypeImage %14 2D 0 0 0 2 Unknown
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer UniformConstant %40
%42 = OpVariable %41 UniformConstant
%43 = OpTypeImage %5 2D 0 0 0 2 Unknown
%44 = OpTypeRuntimeArray %43
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%48 = OpTypeRuntimeArray %47
%49 = OpTypePointer UniformConstant %48
%50 = OpVariable %49 UniformConstant
%51 = OpTypeImage %14 Buffer 0 0 0 2 Unknown
%52 = OpTypeRuntimeArray %51
%53 = OpTypePointer UniformConstant %52
%54 = OpVariable %53 UniformConstant
%55 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%56 = OpTypeRuntimeArray %55
%57 = OpTypePointer UniformConstant %56
%58 = OpVariable %57 UniformConstant
%59 = OpTypeSampler
%60 = OpTypeRuntimeArray %59
%61 = OpTypePointer UniformConstant %60
%62 = OpVariable %61 UniformConstant
%63 = OpTypeVector %9 2
%64 = OpTypePointer Input %63
%65 = OpVariable %64 Input
%66 = OpTypeVector %9 4
%67 = OpTypePointer Output %66
%68 = OpVariable %67 Output
%69 = OpTypeVector %14 4
%70 = OpTypePointer Output %69
%71 = OpVariable %70 Output
%72 = OpTypeVector %5 4
%73 = OpTypePointer Output %72
%74 = OpVariable %73 Output
%75 = OpTypePointer UniformConstant %55
%77 = OpTypePointer PushConstant %5
%79 = OpConstant %5 4
%82 = OpConstant %5 5
%84 = OpTypePointer UniformConstant %51
%90 = OpTypePointer UniformConstant %47
%95 = OpConstant %5 3
%97 = OpTypePointer UniformConstant %43
%102 = OpConstant %5 2
%104 = OpTypePointer UniformConstant %39
%109 = OpConstant %5 1
%111 = OpTypePointer UniformConstant %35
%116 = OpTypePointer UniformConstant %31
%122 = OpTypePointer UniformConstant %27
%128 = OpTypePointer UniformConstant %23
%134 = OpTypePointer UniformConstant %19
%137 = OpConstant %5 0
%141 = OpTypePointer UniformConstant %15
%147 = OpTypePointer UniformConstant %10
%152 = OpTypePointer UniformConstant %59
%162 = OpTypePointer Input %9
%167 = OpTypeFloat 16
%172 = OpTypeVector %5 2
%174 = OpTypeVector %167 4
%177 = OpTypeInt 16 0
%181 = OpTypeVector %177 4
%189 = OpConstant %167 0x1p+3
%192 = OpConstant %177 65516
%195 = OpConstant %177 80
%198 = OpTypeSampledImage %10
%200 = OpConstant %9 0
%215 = OpConstant %5 6
%234 = OpTypeSampledImage %15
%247 = OpTypeSampledImage %19
%260 = OpTypeImage %9 2D 1 0 0 1 Unknown
%261 = OpTypeSampledImage %260
%263 = OpConstant %9 0.5
%310 = OpConstant %9 0.200000003
%311 = OpConstant %9 0.300000012
%312 = OpConstant %9 0.400000006
%375 = OpTypePointer Output %9
%384 = OpTypePointer Output %14
%393 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %402
%402 = OpLabel
%78 = OpAccessChain %77 %8 %79
%80 = OpLoad %5 %78
%81 = OpIAdd %5 %80 %82
%76 = OpAccessChain %75 %58 %81
%83 = OpLoad %55 %76
%86 = OpAccessChain %77 %8 %79
%87 = OpLoad %5 %86
%88 = OpIAdd %5 %87 %79
%85 = OpAccessChain %84 %54 %88
%89 = OpLoad %51 %85
%92 = OpAccessChain %77 %8 %79
%93 = OpLoad %5 %92
%94 = OpIAdd %5 %93 %95
%91 = OpAccessChain %90 %50 %94
%96 = OpLoad %47 %91
%99 = OpAccessChain %77 %8 %95
%100 = OpLoad %5 %99
%101 = OpIAdd %5 %100 %102
%98 = OpAccessChain %97 %46 %101
%103 = OpLoad %43 %98
%106 = OpAccessChain %77 %8 %95
%107 = OpLoad %5 %106
%108 = OpIAdd %5 %107 %109
%105 = OpAccessChain %104 %42 %108
%110 = OpLoad %39 %105
%113 = OpAccessChain %77 %8 %95
%114 = OpLoad %5 %113
%112 = OpAccessChain %111 %38 %114
%115 = OpLoad %35 %112
%118 = OpAccessChain %77 %8 %109
%119 = OpLoad %5 %118
%120 = OpIAdd %5 %119 %82
%117 = OpAccessChain %116 %34 %120
%121 = OpLoad %31 %117
%124 = OpAccessChain %77 %8 %109
%125 = OpLoad %5 %124
%126 = OpIAdd %5 %125 %79
%123 = OpAccessChain %122 %30 %126
%127 = OpLoad %27 %123
%130 = OpAccessChain %77 %8 %109
%131 = OpLoad %5 %130
%132 = OpIAdd %5 %131 %95
%129 = OpAccessChain %128 %26 %132
%133 = OpLoad %23 %129
%136 = OpAccessChain %77 %8 %137
%138 = OpLoad %5 %136
%139 = OpIAdd %5 %138 %102
%135 = OpAccessChain %134 %22 %139
%140 = OpLoad %19 %135
%143 = OpAccessChain %77 %8 %137
%144 = OpLoad %5 %143
%145 = OpIAdd %5 %144 %109
%142 = OpAccessChain %141 %18 %145
%146 = OpLoad %15 %142
%149 = OpAccessChain %77 %8 %137
%150 = OpLoad %5 %149
%148 = OpAccessChain %147 %13 %150
%151 = OpLoad %10 %148
%154 = OpAccessChain %77 %8 %102
%155 = OpLoad %5 %154
%156 = OpIAdd %5 %155 %109
%153 = OpAccessChain %152 %62 %156
%157 = OpLoad %59 %153
%159 = OpAccessChain %77 %8 %102
%160 = OpLoad %5 %159
%158 = OpAccessChain %152 %62 %160
%161 = OpLoad %59 %158
%163 = OpAccessChain %162 %65 %137
%164 = OpLoad %9 %163
%165 = OpAccessChain %162 %65 %109
%166 = OpLoad %9 %165
%168 = OpFConvert %167 %164
%169 = OpFConvert %167 %166
%170 = OpConvertFToS %5 %164
%171 = OpConvertFToS %5 %166
%173 = OpCompositeConstruct %172 %170 %171
%175 = OpCompositeConstruct %174 %168 %169 %168 %169
%176 = OpFConvert %66 %175
OpImageWrite %115 %173 %176
%178 = OpConvertFToS %177 %164
%179 = OpConvertFToS %177 %166
%180 = OpCompositeConstruct %172 %170 %171
%182 = OpCompositeConstruct %181 %178 %179 %178 %179
%183 = OpSConvert %69 %182
OpImageWrite %110 %180 %183
%184 = OpConvertFToU %177 %164
%185 = OpConvertFToU %177 %166
%186 = OpCompositeConstruct %172 %170 %171
%187 = OpCompositeConstruct %181 %184 %185 %184 %185
%188 = OpUConvert %72 %187
OpImageWrite %103 %186 %188
%190 = OpCompositeConstruct %174 %189 %189 %189 %189
%191 = OpFConvert %66 %190
OpImageWrite %96 %170 %191
%193 = OpCompositeConstruct %181 %192 %192 %192 %192
%194 = OpSConvert %69 %193
OpImageWrite %89 %170 %194
%196 = OpCompositeConstruct %181 %195 %195 %195 %195
%197 = OpUConvert %72 %196
OpImageWrite %83 %170 %197
%199 = OpSampledImage %198 %151 %161
%202 = OpCompositeConstruct %63 %164 %166
%201 = OpImageSampleImplicitLod %66 %199 %202 None
%203 = OpFConvert %174 %201
%204 = OpCompositeExtract %167 %203 0
%205 = OpCompositeExtract %167 %203 1
%206 = OpCompositeExtract %167 %203 2
%207 = OpCompositeExtract %167 %203 3
%209 = OpCompositeConstruct %172 %109 %102
%208 = OpImageFetch %69 %146 %209 Lod %95
%210 = OpSConvert %181 %208
%211 = OpCompositeExtract %177 %210 0
%212 = OpCompositeExtract %177 %210 1
%213 = OpCompositeExtract %177 %210 2
%214 = OpCompositeExtract %177 %210 3
%217 = OpCompositeConstruct %172 %79 %82
%216 = OpImageFetch %72 %140 %217 Lod %215
%218 = OpUConvert %181 %216
%219 = OpCompositeExtract %177 %218 0
%220 = OpCompositeExtract %177 %218 1
%221 = OpCompositeExtract %177 %218 2
%222 = OpCompositeExtract %177 %218 3
%223 = OpCompositeConstruct %63 %164 %166
%224 = OpImageGather %66 %199 %223 %137
%225 = OpFConvert %174 %224
%226 = OpCompositeExtract %167 %225 0
%227 = OpCompositeExtract %167 %225 1
%228 = OpCompositeExtract %167 %225 2
%229 = OpCompositeExtract %167 %225 3
%230 = OpFAdd %167 %226 %204
%231 = OpFAdd %167 %227 %205
%232 = OpFAdd %167 %228 %206
%233 = OpFAdd %167 %229 %207
%235 = OpSampledImage %234 %146 %161
%236 = OpCompositeConstruct %63 %164 %166
%237 = OpImageGather %69 %235 %236 %109
%238 = OpSConvert %181 %237
%239 = OpCompositeExtract %177 %238 0
%240 = OpCompositeExtract %177 %238 1
%241 = OpCompositeExtract %177 %238 2
%242 = OpCompositeExtract %177 %238 3
%243 = OpIAdd %177 %239 %211
%244 = OpIAdd %177 %240 %212
%245 = OpIAdd %177 %241 %213
%246 = OpIAdd %177 %242 %214
%248 = OpSampledImage %247 %140 %161
%249 = OpCompositeConstruct %63 %164 %166
%250 = OpImageGather %72 %248 %249 %102
%251 = OpUConvert %181 %250
%252 = OpCompositeExtract %177 %251 0
%253 = OpCompositeExtract %177 %251 1
%254 = OpCompositeExtract %177 %251 2
%255 = OpCompositeExtract %177 %251 3
%256 = OpIAdd %177 %252 %219
%257 = OpIAdd %177 %253 %220
%258 = OpIAdd %177 %254 %221
%259 = OpIAdd %177 %255 %222
%262 = OpSampledImage %261 %151 %157
%265 = OpCompositeConstruct %63 %164 %166
%264 = OpImageSampleDrefImplicitLod %9 %262 %265 %263 None
%266 = OpCompositeConstruct %66 %264 %264 %264 %264
%267 = OpCompositeExtract %9 %266 0
%268 = OpFConvert %9 %230
%269 = OpFConvert %9 %231
%270 = OpFConvert %9 %232
%271 = OpFConvert %9 %233
%272 = OpFAdd %9 %267 %268
%273 = OpFAdd %9 %267 %269
%274 = OpFAdd %9 %267 %270
%275 = OpFAdd %9 %267 %271
%276 = OpFConvert %167 %272
%277 = OpFConvert %167 %273
%278 = OpFConvert %167 %274
%279 = OpFConvert %167 %275
%281 = OpCompositeConstruct %63 %164 %166
%280 = OpImageSampleDrefExplicitLod %9 %262 %281 %263 Lod %200
%282 = OpCompositeConstruct %66 %280 %280 %280 %280
%283 = OpCompositeExtract %9 %282 0
%284 = OpQuantizeToF16 %9 %272
%285 = OpQuantizeToF16 %9 %273
%286 = OpQuantizeToF16 %9 %274
%287 = OpQuantizeToF16 %9 %275
%288 = OpFAdd %9 %284 %283
%289 = OpFAdd %9 %285 %283
%290 = OpFAdd %9 %286 %283
%291 = OpFAdd %9 %287 %283
%292 = OpFConvert %167 %288
%293 = OpFConvert %167 %289
%294 = OpFConvert %167 %290
%295 = OpFConvert %167 %291
%296 = OpCompositeConstruct %63 %164 %166
%297 = OpImageDrefGather %66 %262 %296 %263
%298 = OpFConvert %174 %297
%299 = OpCompositeExtract %167 %298 0
%300 = OpCompositeExtract %167 %298 1
%301 = OpCompositeExtract %167 %298 2
%302 = OpCompositeExtract %167 %298 3
%304 = OpCompositeConstruct %63 %164 %166
%303 = OpImageSampleExplicitLod %66 %199 %304 Lod %200
%305 = OpFConvert %174 %303
%306 = OpCompositeExtract %167 %305 0
%307 = OpCompositeExtract %167 %305 1
%308 = OpCompositeExtract %167 %305 2
%309 = OpCompositeExtract %167 %305 3
%314 = OpCompositeConstruct %63 %164 %166
%315 = OpCompositeConstruct %63 %310 %311
%316 = OpCompositeConstruct %63 %312 %263
%313 = OpImageSampleExplicitLod %66 %199 %314 Grad %315 %316
%317 = OpFConvert %174 %313
%318 = OpCompositeExtract %167 %317 0
%319 = OpCompositeExtract %167 %317 1
%320 = OpCompositeExtract %167 %317 2
%321 = OpCompositeExtract %167 %317 3
%323 = OpCompositeConstruct %63 %164 %166
%322 = OpImageSampleImplicitLod %66 %199 %323 Bias %263
%324 = OpFConvert %174 %322
%325 = OpCompositeExtract %167 %324 0
%326 = OpCompositeExtract %167 %324 1
%327 = OpCompositeExtract %167 %324 2
%328 = OpCompositeExtract %167 %324 3
%329 = OpImageFetch %66 %133 %170
%330 = OpFConvert %174 %329
%331 = OpCompositeExtract %167 %330 0
%332 = OpCompositeExtract %167 %330 1
%333 = OpCompositeExtract %167 %330 2
%334 = OpCompositeExtract %167 %330 3
%335 = OpFAdd %167 %306 %299
%336 = OpFAdd %167 %335 %318
%337 = OpFAdd %167 %336 %292
%338 = OpFAdd %167 %337 %325
%339 = OpFAdd %167 %338 %331
%340 = OpFAdd %167 %307 %300
%341 = OpFAdd %167 %340 %319
%342 = OpFAdd %167 %341 %293
%343 = OpFAdd %167 %342 %326
%344 = OpFAdd %167 %343 %332
%345 = OpFAdd %167 %308 %301
%346 = OpFAdd %167 %345 %320
%347 = OpFAdd %167 %346 %294
%348 = OpFAdd %167 %347 %327
%349 = OpFAdd %167 %348 %333
%350 = OpFAdd %167 %309 %302
%351 = OpFAdd %167 %350 %321
%352 = OpFAdd %167 %351 %295
%353 = OpFAdd %167 %352 %328
%354 = OpFAdd %167 %353 %334
%355 = OpImageFetch %69 %127 %170
%356 = OpSConvert %181 %355
%357 = OpCompositeExtract %177 %356 0
%358 = OpCompositeExtract %177 %356 1
%359 = OpCompositeExtract %177 %356 2
%360 = OpCompositeExtract %177 %356 3
%361 = OpIAdd %177 %243 %357
%362 = OpIAdd %177 %244 %358
%363 = OpIAdd %177 %245 %359
%364 = OpIAdd %177 %246 %360
%365 = OpImageFetch %72 %121 %170
%366 = OpUConvert %181 %365
%367 = OpCompositeExtract %177 %366 0
%368 = OpCompositeExtract %177 %366 1
%369 = OpCompositeExtract %177 %366 2
%370 = OpCompositeExtract %177 %366 3
%371 = OpIAdd %177 %256 %367
%372 = OpIAdd %177 %257 %368
%373 = OpIAdd %177 %258 %369
%374 = OpIAdd %177 %259 %370
%376 = OpAccessChain %375 %68 %137
%377 = OpFConvert %9 %339
OpStore %376 %377
%378 = OpAccessChain %375 %68 %109
%379 = OpFConvert %9 %344
OpStore %378 %379
%380 = OpAccessChain %375 %68 %102
%381 = OpFConvert %9 %349
OpStore %380 %381
%382 = OpAccessChain %375 %68 %95
%383 = OpFConvert %9 %354
OpStore %382 %383
%385 = OpAccessChain %384 %71 %137
%386 = OpSConvert %14 %361
OpStore %385 %386
%387 = OpAccessChain %384 %71 %109
%388 = OpSConvert %14 %362
OpStore %387 %388
%389 = OpAccessChain %384 %71 %102
%390 = OpSConvert %14 %363
OpStore %389 %390
%391 = OpAccessChain %384 %71 %95
%392 = OpSConvert %14 %364
OpStore %391 %392
%394 = OpAccessChain %393 %74 %137
%395 = OpUConvert %5 %371
OpStore %394 %395
%396 = OpAccessChain %393 %74 %109
%397 = OpUConvert %5 %372
OpStore %396 %397
%398 = OpAccessChain %393 %74 %102
%399 = OpUConvert %5 %373
OpStore %398 %399
%400 = OpAccessChain %393 %74 %95
%401 = OpUConvert %5 %374
OpStore %400 %401
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform mediump texture2D _8;
layout(set = 0, binding = 1) uniform mediump itexture2D _12;
layout(set = 0, binding = 2) uniform mediump utexture2D _16;
layout(set = 0, binding = 3) uniform mediump samplerBuffer _19;
layout(set = 0, binding = 4) uniform mediump isamplerBuffer _22;
layout(set = 0, binding = 5) uniform mediump usamplerBuffer _25;
layout(set = 0, binding = 0) uniform writeonly mediump image2D _28;
layout(set = 0, binding = 1) uniform writeonly mediump iimage2D _31;
layout(set = 0, binding = 2) uniform writeonly mediump uimage2D _34;
layout(set = 0, binding = 3) uniform writeonly mediump imageBuffer _37;
layout(set = 0, binding = 4) uniform writeonly mediump iimageBuffer _40;
layout(set = 0, binding = 5) uniform writeonly mediump uimageBuffer _43;
layout(set = 0, binding = 0) uniform sampler _46;
layout(set = 0, binding = 1) uniform samplerShadow _47;

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;

void main()
{
    float16_t _82 = float16_t(UV.x);
    float16_t _83 = float16_t(UV.y);
    uint _84 = uint(int(UV.x));
    uint _85 = uint(int(UV.y));
    imageStore(_28, ivec2(uvec2(_84, _85)), vec4(f16vec4(_82, _83, _82, _83)));
    uint16_t _92 = uint16_t(int16_t(UV.x));
    uint16_t _93 = uint16_t(int16_t(UV.y));
    imageStore(_31, ivec2(uvec2(_84, _85)), ivec4(i16vec4(u16vec4(_92, _93, _92, _93))));
    uint16_t _98 = uint16_t(UV.x);
    uint16_t _99 = uint16_t(UV.y);
    imageStore(_34, ivec2(uvec2(_84, _85)), uvec4(u16vec4(_98, _99, _98, _99)));
    imageStore(_37, int(_84), vec4(f16vec4(float16_t(8.0))));
    imageStore(_40, int(_84), ivec4(i16vec4(u16vec4(65516us))));
    imageStore(_43, int(_84), uvec4(u16vec4(80us)));
    f16vec4 _117 = f16vec4(texture(sampler2D(_8, _46), vec2(UV.x, UV.y)));
    u16vec4 _126 = u16vec4(texelFetch(_12, ivec2(uvec2(1u, 2u)), int(3u)));
    u16vec4 _136 = u16vec4(texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u)));
    f16vec4 _143 = f16vec4(textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y)));
    u16vec4 _156 = u16vec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u)));
    u16vec4 _169 = u16vec4(textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u)));
    mediump vec4 _184 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5)));
    mediump float _185 = _184.x;
    float _190 = _185 + float(_143.x + _117.x);
    float _191 = _185 + float(_143.y + _117.y);
    float _192 = _185 + float(_143.z + _117.z);
    float _193 = _185 + float(_143.w + _117.w);
    mediump vec4 _200 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0));
    mediump float _201 = _200.x;
    vec2 _214 = vec2(UV.x, UV.y);
    f16vec4 _216 = f16vec4(textureGather(sampler2DShadow(_8, _47), _214, 0.5));
    f16vec4 _223 = f16vec4(textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0));
    f16vec4 _235 = f16vec4(textureGrad(sampler2D(_8, _46), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)));
    f16vec4 _242 = f16vec4(texture(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.5));
    f16vec4 _248 = f16vec4(texelFetch(_19, int(_84)));
    u16vec4 _274 = u16vec4(texelFetch(_22, int(_84)));
    u16vec4 _284 = u16vec4(texelFetch(_25, int(_84)));
    SV_Target.x = float(((((_223.x + _216.x) + _235.x) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_190))).x + _201)) + _242.x) + _248.x);
    SV_Target.y = float(((((_223.y + _216.y) + _235.y) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_191))).x + _201)) + _242.y) + _248.y);
    SV_Target.z = float(((((_223.z + _216.z) + _235.z) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_192))).x + _201)) + _242.z) + _248.z);
    SV_Target.w = float(((((_223.w + _216.w) + _235.w) + float16_t(unpackHalf2x16(packHalf2x16(vec2(_193))).x + _201)) + _242.w) + _248.w);
    SV_Target_1.x = int(int16_t((_156.x + _126.x) + _274.x));
    SV_Target_1.y = int(int16_t((_156.y + _126.y) + _274.y));
    SV_Target_1.z = int(int16_t((_156.z + _126.z) + _274.z));
    SV_Target_1.w = int(int16_t((_156.w + _126.w) + _274.w));
    SV_Target_2.x = uint((_169.x + _136.x) + _284.x);
    SV_Target_2.y = uint((_169.y + _136.y) + _284.y);
    SV_Target_2.z = uint((_169.z + _136.z) + _284.z);
    SV_Target_2.w = uint((_169.w + _136.w) + _284.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 322
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %50 %53 %56 %59
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %50 "UV"
OpName %53 "SV_Target"
OpName %56 "SV_Target_1"
OpName %59 "SV_Target_2"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 RelaxedPrecision
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 RelaxedPrecision
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 2
OpDecorate %19 RelaxedPrecision
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 3
OpDecorate %22 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 4
OpDecorate %25 RelaxedPrecision
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 5
OpDecorate %28 RelaxedPrecision
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %31 RelaxedPrecision
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %31 NonReadable
OpDecorate %34 RelaxedPrecision
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 2
OpDecorate %34 NonReadable
OpDecorate %37 RelaxedPrecision
OpDecorate %37 DescriptorSet 0
OpDecorate %37 Binding 3
OpDecorate %37 NonReadable
OpDecorate %40 RelaxedPrecision
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 4
OpDecorate %40 NonReadable
OpDecorate %43 RelaxedPrecision
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 5
OpDecorate %43 NonReadable
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 0
OpDecorate %47 DescriptorSet 0
OpDecorate %47 Binding 1
OpDecorate %50 Location 0
OpDecorate %53 RelaxedPrecision
OpDecorate %53 Location 0
OpDecorate %56 RelaxedPrecision
OpDecorate %56 Location 1
OpDecorate %59 RelaxedPrecision
OpDecorate %59 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 1
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpTypeImage %13 2D 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeImage %5 2D 0 0 0 2 Unknown
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeImage %9 2D 0 0 0 2 Unknown
%30 = OpTypePointer UniformConstant %29
%31 = OpVariable %30 UniformConstant
%32 = OpTypeImage %13 2D 0 0 0 2 Unknown
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%36 = OpTypePointer UniformConstant %35
%37 = OpVariable %36 UniformConstant
%38 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeImage %13 Buffer 0 0 0 2 Unknown
%42 = OpTypePointer UniformConstant %41
%43 = OpVariable %42 UniformConstant
%44 = OpTypeSampler
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpVariable %45 UniformConstant
%48 = OpTypeVector %5 2
%49 = OpTypePointer Input %48
%50 = OpVariable %49 Input
%51 = OpTypeVector %5 4
%52 = OpTypePointer Output %51
%53 = OpVariable %52 Output
%54 = OpTypeVector %9 4
%55 = OpTypePointer Output %54
%56 = OpVariable %55 Output
%57 = OpTypeVector %13 4
%58 = OpTypePointer Output %57
%59 = OpVariable %58 Output
%74 = OpTypePointer Input %5
%76 = OpConstant %13 0
%79 = OpConstant %13 1
%81 = OpTypeFloat 16
%86 = OpTypeVector %13 2
%88 = OpTypeVector %81 4
%91 = OpTypeInt 16 0
%95 = OpTypeVector %91 4
%103 = OpConstant %81 0x1p+3
%106 = OpConstant %91 65516
%109 = OpConstant %91 80
%112 = OpTypeSampledImage %6
%114 = OpConstant %5 0
%122 = OpConstant %13 3
%123 = OpConstant %13 2
%131 = OpConstant %13 6
%132 = OpConstant %13 4
%133 = OpConstant %13 5
%152 = OpTypeSampledImage %10
%165 = OpTypeSampledImage %14
%178 = OpTypeImage %5 2D 1 0 0 1 Unknown
%179 = OpTypeSampledImage %178
%181 = OpConstant %5 0.5
%228 = OpConstant %5 0.200000003
%229 = OpConstant %5 0.300000012
%230 = OpConstant %5 0.400000006
%293 = OpTypePointer Output %5
%302 = OpTypePointer Output %9
%311 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %320
%320 = OpLabel
%60 = OpLoad %41 %43
%61 = OpLoad %38 %40
%62 = OpLoad %35 %37
%63 = OpLoad %32 %34
%64 = OpLoad %29 %31
%65 = OpLoad %26 %28
%66 = OpLoad %23 %25
%67 = OpLoad %20 %22
%68 = OpLoad %17 %19
%69 = OpLoad %14 %16
%70 = OpLoad %10 %12
%71 = OpLoad %6 %8
%72 = OpLoad %44 %47
%73 = OpLoad %44 %46
%75 = OpAccessChain %74 %50 %76
%77 = OpLoad %5 %75
%78 = OpAccessChain %74 %50 %79
%80 = OpLoad %5 %78
%82 = OpFConvert %81 %77
%83 = OpFConvert %81 %80
%84 = OpConvertFToS %13 %77
%85 = OpConvertFToS %13 %80
%87 = OpCompositeConstruct %86 %84 %85
%89 = OpCompositeConstruct %88 %82 %83 %82 %83
%90 = OpFConvert %51 %89
OpImageWrite %65 %87 %90
%92 = OpConvertFToS %91 %77
%93 = OpConvertFToS %91 %80
%94 = OpCompositeConstruct %86 %84 %85
%96 = OpCompositeConstruct %95 %92 %93 %92 %93
%97 = OpSConvert %54 %96
OpImageWrite %64 %94 %97
%98 = OpConvertFToU %91 %77
%99 = OpConvertFToU %91 %80
%100 = OpCompositeConstruct %86 %84 %85
%101 = OpCompositeConstruct %95 %98 %99 %98 %99
%102 = OpUConvert %57 %101
OpImageWrite %63 %100 %102
%104 = OpCompositeConstruct %88 %103 %103 %103 %103
%105 = OpFConvert %51 %104
OpImageWrite %62 %84 %105
%107 = OpCompositeConstruct %95 %106 %106 %106 %106
%108 = OpSConvert %54 %107
OpImageWrite %61 %84 %108
%110 = OpCompositeConstruct %95 %109 %109 %109 %109
%111 = OpUConvert %57 %110
OpImageWrite %60 %84 %111
%113 = OpSampledImage %112 %71 %73
%116 = OpCompositeConstruct %48 %77 %80
%115 = OpImageSampleImplicitLod %51 %113 %116 None
%117 = OpFConvert %88 %115
%118 = OpCompositeExtract %81 %117 0
%119 = OpCompositeExtract %81 %117 1
%120 = OpCompositeExtract %81 %117 2
%121 = OpCompositeExtract %81 %117 3
%125 = OpCompositeConstruct %86 %79 %123
%124 = OpImageFetch %54 %70 %125 Lod %122
%126 = OpSConvert %95 %124
%127 = OpCompositeExtract %91 %126 0
%128 = OpCompositeExtract %91 %126 1
%129 = OpCompositeExtract %91 %126 2
%130 = OpCompositeExtract %91 %126 3
%135 = OpCompositeConstruct %86 %132 %133
%134 = OpImageFetch %57 %69 %135 Lod %131
%136 = OpUConvert %95 %134
%137 = OpCompositeExtract %91 %136 0
%138 = OpCompositeExtract %91 %136 1
%139 = OpCompositeExtract %91 %136 2
%140 = OpCompositeExtract %91 %136 3
%141 = OpCompositeConstruct %48 %77 %80
%142 = OpImageGather %51 %113 %141 %76
%143 = OpFConvert %88 %142
%144 = OpCompositeExtract %81 %143 0
%145 = OpCompositeExtract %81 %143 1
%146 = OpCompositeExtract %81 %143 2
%147 = OpCompositeExtract %81 %143 3
%148 = OpFAdd %81 %144 %118
%149 = OpFAdd %81 %145 %119
%150 = OpFAdd %81 %146 %120
%151 = OpFAdd %81 %147 %121
%153 = OpSampledImage %152 %70 %73
%154 = OpCompositeConstruct %48 %77 %80
%155 = OpImageGather %54 %153 %154 %79
%156 = OpSConvert %95 %155
%157 = OpCompositeExtract %91 %156 0
%158 = OpCompositeExtract %91 %156 1
%159 = OpCompositeExtract %91 %156 2
%160 = OpCompositeExtract %91 %156 3
%161 = OpIAdd %91 %157 %127
%162 = OpIAdd %91 %158 %128
%163 = OpIAdd %91 %159 %129
%164 = OpIAdd %91 %160 %130
%166 = OpSampledImage %165 %69 %73
%167 = OpCompositeConstruct %48 %77 %80
%168 = OpImageGather %57 %166 %167 %123
%169 = OpUConvert %95 %168
%170 = OpCompositeExtract %91 %169 0
%171 = OpCompositeExtract %91 %169 1
%172 = OpCompositeExtract %91 %169 2
%173 = OpCompositeExtract %91 %169 3
%174 = OpIAdd %91 %170 %137
%175 = OpIAdd %91 %171 %138
%176 = OpIAdd %91 %172 %139
%177 = OpIAdd %91 %173 %140
%180 = OpSampledImage %179 %71 %72
%183 = OpCompositeConstruct %48 %77 %80
%182 = OpImageSampleDrefImplicitLod %5 %180 %183 %181 None
%184 = OpCompositeConstruct %51 %182 %182 %182 %182
%185 = OpCompositeExtract %5 %184 0
%186 = OpFConvert %5 %148
%187 = OpFConvert %5 %149
%188 = OpFConvert %5 %150
%189 = OpFConvert %5 %151
%190 = OpFAdd %5 %185 %186
%191 = OpFAdd %5 %185 %187
%192 = OpFAdd %5 %185 %188
%193 = OpFAdd %5 %185 %189
%194 = OpFConvert %81 %190
%195 = OpFConvert %81 %191
%196 = OpFConvert %81 %192
%197 = OpFConvert %81 %193
%199 = OpCompositeConstruct %48 %77 %80
%198 = OpImageSampleDrefExplicitLod %5 %180 %199 %181 Lod %114
%200 = OpCompositeConstruct %51 %198 %198 %198 %198
%201 = OpCompositeExtract %5 %200 0
%202 = OpQuantizeToF16 %5 %190
%203 = OpQuantizeToF16 %5 %191
%204 = OpQuantizeToF16 %5 %192
%205 = OpQuantizeToF16 %5 %193
%206 = OpFAdd %5 %202 %201
%207 = OpFAdd %5 %203 %201
%208 = OpFAdd %5 %204 %201
%209 = OpFAdd %5 %205 %201
%210 = OpFConvert %81 %206
%211 = OpFConvert %81 %207
%212 = OpFConvert %81 %208
%213 = OpFConvert %81 %209
%214 = OpCompositeConstruct %48 %77 %80
%215 = OpImageDrefGather %51 %180 %214 %181
%216 = OpFConvert %88 %215
%217 = OpCompositeExtract %81 %216 0
%218 = OpCompositeExtract %81 %216 1
%219 = OpCompositeExtract %81 %216 2
%220 = OpCompositeExtract %81 %216 3
%222 = OpCompositeConstruct %48 %77 %80
%221 = OpImageSampleExplicitLod %51 %113 %222 Lod %114
%223 = OpFConvert %88 %221
%224 = OpCompositeExtract %81 %223 0
%225 = OpCompositeExtract %81 %223 1
%226 = OpCompositeExtract %81 %223 2
%227 = OpCompositeExtract %81 %223 3
%232 = OpCompositeConstruct %48 %77 %80
%233 = OpCompositeConstruct %48 %228 %229
%234 = OpCompositeConstruct %48 %230 %181
%231 = OpImageSampleExplicitLod %51 %113 %232 Grad %233 %234
%235 = OpFConvert %88 %231
%236 = OpCompositeExtract %81 %235 0
%237 = OpCompositeExtract %81 %235 1
%238 = OpCompositeExtract %81 %235 2
%239 = OpCompositeExtract %81 %235 3
%241 = OpCompositeConstruct %48 %77 %80
%240 = OpImageSampleImplicitLod %51 %113 %241 Bias %181
%242 = OpFConvert %88 %240
%243 = OpCompositeExtract %81 %242 0
%244 = OpCompositeExtract %81 %242 1
%245 = OpCompositeExtract %81 %242 2
%246 = OpCompositeExtract %81 %242 3
%247 = OpImageFetch %51 %68 %84
%248 = OpFConvert %88 %247
%249 = OpCompositeExtract %81 %248 0
%250 = OpCompositeExtract %81 %248 1
%251 = OpCompositeExtract %81 %248 2
%252 = OpCompositeExtract %81 %248 3
%253 = OpFAdd %81 %224 %217
%254 = OpFAdd %81 %253 %236
%255 = OpFAdd %81 %254 %210
%256 = OpFAdd %81 %255 %243
%257 = OpFAdd %81 %256 %249
%258 = OpFAdd %81 %225 %218
%259 = OpFAdd %81 %258 %237
%260 = OpFAdd %81 %259 %211
%261 = OpFAdd %81 %260 %244
%262 = OpFAdd %81 %261 %250
%263 = OpFAdd %81 %226 %219
%264 = OpFAdd %81 %263 %238
%265 = OpFAdd %81 %264 %212
%266 = OpFAdd %81 %265 %245
%267 = OpFAdd %81 %266 %251
%268 = OpFAdd %81 %227 %220
%269 = OpFAdd %81 %268 %239
%270 = OpFAdd %81 %269 %213
%271 = OpFAdd %81 %270 %246
%272 = OpFAdd %81 %271 %252
%273 = OpImageFetch %54 %67 %84
%274 = OpSConvert %95 %273
%275 = OpCompositeExtract %91 %274 0
%276 = OpCompositeExtract %91 %274 1
%277 = OpCompositeExtract %91 %274 2
%278 = OpCompositeExtract %91 %274 3
%279 = OpIAdd %91 %161 %275
%280 = OpIAdd %91 %162 %276
%281 = OpIAdd %91 %163 %277
%282 = OpIAdd %91 %164 %278
%283 = OpImageFetch %57 %66 %84
%284 = OpUConvert %95 %283
%285 = OpCompositeExtract %91 %284 0
%286 = OpCompositeExtract %91 %284 1
%287 = OpCompositeExtract %91 %284 2
%288 = OpCompositeExtract %91 %284 3
%289 = OpIAdd %91 %174 %285
%290 = OpIAdd %91 %175 %286
%291 = OpIAdd %91 %176 %287
%292 = OpIAdd %91 %177 %288
%294 = OpAccessChain %293 %53 %76
%295 = OpFConvert %5 %257
OpStore %294 %295
%296 = OpAccessChain %293 %53 %79
%297 = OpFConvert %5 %262
OpStore %296 %297
%298 = OpAccessChain %293 %53 %123
%299 = OpFConvert %5 %267
OpStore %298 %299
%300 = OpAccessChain %293 %53 %122
%301 = OpFConvert %5 %272
OpStore %300 %301
%303 = OpAccessChain %302 %56 %76
%304 = OpSConvert %9 %279
OpStore %303 %304
%305 = OpAccessChain %302 %56 %79
%306 = OpSConvert %9 %280
OpStore %305 %306
%307 = OpAccessChain %302 %56 %123
%308 = OpSConvert %9 %281
OpStore %307 %308
%309 = OpAccessChain %302 %56 %122
%310 = OpSConvert %9 %282
OpStore %309 %310
%312 = OpAccessChain %311 %59 %76
%313 = OpUConvert %13 %289
OpStore %312 %313
%314 = OpAccessChain %311 %59 %79
%315 = OpUConvert %13 %290
OpStore %314 %315
%316 = OpAccessChain %311 %59 %123
%317 = OpUConvert %13 %291
OpStore %316 %317
%318 = OpAccessChain %311 %59 %122
%319 = OpUConvert %13 %292
OpStore %318 %319
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform mediump texture2D _13[];
layout(set = 0, binding = 0) uniform mediump itexture2D _18[];
layout(set = 0, binding = 0) uniform mediump utexture2D _22[];
layout(set = 1, binding = 0) uniform mediump samplerBuffer _26[];
layout(set = 1, binding = 0) uniform mediump isamplerBuffer _30[];
layout(set = 1, binding = 0) uniform mediump usamplerBuffer _34[];
layout(set = 3, binding = 0) uniform writeonly mediump image2D _38[];
layout(set = 3, binding = 0) uniform writeonly mediump iimage2D _42[];
layout(set = 3, binding = 0) uniform writeonly mediump uimage2D _46[];
layout(set = 4, binding = 0) uniform writeonly mediump imageBuffer _50[];
layout(set = 4, binding = 0) uniform writeonly mediump iimageBuffer _54[];
layout(set = 4, binding = 0) uniform writeonly mediump uimageBuffer _58[];
layout(set = 2, binding = 0) uniform samplerShadow _62[];

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;

void main()
{
    uint _139 = registers._m0 + 2u;
    uint _145 = registers._m0 + 1u;
    mediump float _167 = UV.x;
    mediump float _168 = UV.y;
    uint _169 = uint(int(UV.x));
    uint _170 = uint(int(UV.y));
    imageStore(_38[registers._m3], ivec2(uvec2(_169, _170)), vec4(_167, _168, _167, _168));
    uint16_t _175 = uint16_t(int16_t(UV.x));
    uint16_t _176 = uint16_t(int16_t(UV.y));
    imageStore(_42[registers._m3 + 1u], ivec2(uvec2(_169, _170)), ivec4(i16vec4(u16vec4(_175, _176, _175, _176))));
    uint16_t _181 = uint16_t(UV.x);
    uint16_t _182 = uint16_t(UV.y);
    imageStore(_46[registers._m3 + 2u], ivec2(uvec2(_169, _170)), uvec4(u16vec4(_181, _182, _181, _182)));
    imageStore(_50[registers._m4 + 3u], int(_169), vec4(8.0));
    imageStore(_54[registers._m4 + 4u], int(_169), ivec4(i16vec4(u16vec4(65516us))));
    imageStore(_58[registers._m4 + 5u], int(_169), uvec4(u16vec4(80us)));
    mediump vec4 _197 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y));
    u16vec4 _205 = u16vec4(texelFetch(_18[_145], ivec2(uvec2(1u, 2u)), int(3u)));
    u16vec4 _213 = u16vec4(texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u)));
    mediump vec4 _219 = textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y));
    mediump float _224 = _219.x + _197.x;
    float hp_copy_224 = _224;
    mediump float _225 = _219.y + _197.y;
    float hp_copy_225 = _225;
    mediump float _226 = _219.z + _197.z;
    float hp_copy_226 = _226;
    mediump float _227 = _219.w + _197.w;
    float hp_copy_227 = _227;
    u16vec4 _232 = u16vec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u)));
    u16vec4 _245 = u16vec4(textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u)));
    mediump float _261 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))).x;
    float hp_copy_261 = _261;
    mediump float _266 = hp_copy_261 + hp_copy_224;
    float hp_copy_266 = _266;
    mediump float _267 = hp_copy_261 + hp_copy_225;
    float hp_copy_267 = _267;
    mediump float _268 = hp_copy_261 + hp_copy_226;
    float hp_copy_268 = _268;
    mediump float _269 = hp_copy_261 + hp_copy_227;
    float hp_copy_269 = _269;
    mediump float _273 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)).x;
    float hp_copy_273 = _273;
    mediump float _278 = hp_copy_266 + hp_copy_273;
    mediump float _279 = hp_copy_267 + hp_copy_273;
    mediump float _280 = hp_copy_268 + hp_copy_273;
    mediump float _281 = hp_copy_269 + hp_copy_273;
    vec2 _282 = vec2(UV.x, UV.y);
    mediump vec4 _283 = textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _282, 0.5);
    mediump vec4 _288 = textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0);
    mediump vec4 _297 = textureGrad(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5));
    mediump vec4 _305 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.5);
    mediump vec4 _311 = texelFetch(_26[registers._m1 + 3u], int(_169));
    u16vec4 _337 = u16vec4(texelFetch(_30[registers._m1 + 4u], int(_169)));
    u16vec4 _347 = u16vec4(texelFetch(_34[registers._m1 + 5u], int(_169)));
    SV_Target.x = ((((_288.x + _283.x) + _297.x) + _278) + _305.x) + _311.x;
    SV_Target.y = ((((_288.y + _283.y) + _297.y) + _279) + _305.y) + _311.y;
    SV_Target.z = ((((_288.z + _283.z) + _297.z) + _280) + _305.z) + _311.z;
    SV_Target.w = ((((_288.w + _283.w) + _297.w) + _281) + _305.w) + _311.w;
    SV_Target_1.x = int(int16_t((_232.x + _205.x) + _337.x));
    SV_Target_1.y = int(int16_t((_232.y + _205.y) + _337.y));
    SV_Target_1.z = int(int16_t((_232.z + _205.z) + _337.z));
    SV_Target_1.w = int(int16_t((_232.w + _205.w) + _337.w));
    SV_Target_2.x = uint((_245.x + _213.x) + _347.x);
    SV_Target_2.y = uint((_245.y + _213.y) + _347.y);
    SV_Target_2.z = uint((_245.z + _213.z) + _347.z);
    SV_Target_2.w = uint((_245.w + _213.w) + _347.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 381
; Schema: 0
OpCapability Shader
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %65 %68 %71 %74
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %65 "UV"
OpName %68 "SV_Target"
OpName %71 "SV_Target_1"
OpName %74 "SV_Target_2"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 RelaxedPrecision
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %22 RelaxedPrecision
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 0
OpDecorate %26 RelaxedPrecision
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 0
OpDecorate %30 RelaxedPrecision
OpDecorate %34 DescriptorSet 1
OpDecorate %34 Binding 0
OpDecorate %34 RelaxedPrecision
OpDecorate %38 DescriptorSet 3
OpDecorate %38 Binding 0
OpDecorate %38 RelaxedPrecision
OpDecorate %38 NonReadable
OpDecorate %42 DescriptorSet 3
OpDecorate %42 Binding 0
OpDecorate %42 RelaxedPrecision
OpDecorate %42 NonReadable
OpDecorate %46 DescriptorSet 3
OpDecorate %46 Binding 0
OpDecorate %46 RelaxedPrecision
OpDecorate %46 NonReadable
OpDecorate %50 DescriptorSet 4
OpDecorate %50 Binding 0
OpDecorate %50 RelaxedPrecision
OpDecorate %50 NonReadable
OpDecorate %54 DescriptorSet 4
OpDecorate %54 Binding 0
OpDecorate %54 RelaxedPrecision
OpDecorate %54 NonReadable
OpDecorate %58 DescriptorSet 4
OpDecorate %58 Binding 0
OpDecorate %58 RelaxedPrecision
OpDecorate %58 NonReadable
OpDecorate %62 DescriptorSet 2
OpDecorate %62 Binding 0
OpDecorate %65 Location 0
OpDecorate %68 RelaxedPrecision
OpDecorate %68 Location 0
OpDecorate %71 RelaxedPrecision
OpDecorate %71 Location 1
OpDecorate %74 RelaxedPrecision
OpDecorate %74 Location 2
OpDecorate %167 RelaxedPrecision
OpDecorate %168 RelaxedPrecision
OpDecorate %197 RelaxedPrecision
OpDecorate %199 RelaxedPrecision
OpDecorate %200 RelaxedPrecision
OpDecorate %201 RelaxedPrecision
OpDecorate %202 RelaxedPrecision
OpDecorate %203 RelaxedPrecision
OpDecorate %211 RelaxedPrecision
OpDecorate %219 RelaxedPrecision
OpDecorate %220 RelaxedPrecision
OpDecorate %221 RelaxedPrecision
OpDecorate %222 RelaxedPrecision
OpDecorate %223 RelaxedPrecision
OpDecorate %224 RelaxedPrecision
OpDecorate %225 RelaxedPrecision
OpDecorate %226 RelaxedPrecision
OpDecorate %227 RelaxedPrecision
OpDecorate %231 RelaxedPrecision
OpDecorate %244 RelaxedPrecision
OpDecorate %266 RelaxedPrecision
OpDecorate %267 RelaxedPrecision
OpDecorate %268 RelaxedPrecision
OpDecorate %269 RelaxedPrecision
OpDecorate %278 RelaxedPrecision
OpDecorate %279 RelaxedPrecision
OpDecorate %280 RelaxedPrecision
OpDecorate %281 RelaxedPrecision
OpDecorate %283 RelaxedPrecision
OpDecorate %284 RelaxedPrecision
OpDecorate %285 RelaxedPrecision
OpDecorate %286 RelaxedPrecision
OpDecorate %287 RelaxedPrecision
OpDecorate %288 RelaxedPrecision
OpDecorate %290 RelaxedPrecision
OpDecorate %291 RelaxedPrecision
OpDecorate %292 RelaxedPrecision
OpDecorate %293 RelaxedPrecision
OpDecorate %297 RelaxedPrecision
OpDecorate %301 RelaxedPrecision
OpDecorate %302 RelaxedPrecision
OpDecorate %303 RelaxedPrecision
OpDecorate %304 RelaxedPrecision
OpDecorate %305 RelaxedPrecision
OpDecorate %307 RelaxedPrecision
OpDecorate %308 RelaxedPrecision
OpDecorate %309 RelaxedPrecision
OpDecorate %310 RelaxedPrecision
OpDecorate %311 RelaxedPrecision
OpDecorate %312 RelaxedPrecision
OpDecorate %313 RelaxedPrecision
OpDecorate %314 RelaxedPrecision
OpDecorate %315 RelaxedPrecision
OpDecorate %316 RelaxedPrecision
OpDecorate %317 RelaxedPrecision
OpDecorate %318 RelaxedPrecision
OpDecorate %319 RelaxedPrecision
OpDecorate %320 RelaxedPrecision
OpDecorate %321 RelaxedPrecision
OpDecorate %322 RelaxedPrecision
OpDecorate %323 RelaxedPrecision
OpDecorate %324 RelaxedPrecision
OpDecorate %325 RelaxedPrecision
OpDecorate %326 RelaxedPrecision
OpDecorate %327 RelaxedPrecision
OpDecorate %328 RelaxedPrecision
OpDecorate %329 RelaxedPrecision
OpDecorate %330 RelaxedPrecision
OpDecorate %331 RelaxedPrecision
OpDecorate %332 RelaxedPrecision
OpDecorate %333 RelaxedPrecision
OpDecorate %334 RelaxedPrecision
OpDecorate %335 RelaxedPrecision
OpDecorate %336 RelaxedPrecision
OpDecorate %346 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeInt 32 1
%15 = OpTypeImage %14 2D 0 0 0 1 Unknown
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeImage %5 2D 0 0 0 1 Unknown
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %14 Buffer 0 0 0 1 Unknown
%28 = OpTypeRuntimeArray %27
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypeImage %9 2D 0 0 0 2 Unknown
%36 = OpTypeRuntimeArray %35
%37 = OpTypePointer UniformConstant %36
%38 = OpVariable %37 UniformConstant
%39 = OpTypeImage %14 2D 0 0 0 2 Unknown
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer UniformConstant %40
%42 = OpVariable %41 UniformConstant
%43 = OpTypeImage %5 2D 0 0 0 2 Unknown
%44 = OpTypeRuntimeArray %43
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%48 = OpTypeRuntimeArray %47
%49 = OpTypePointer UniformConstant %48
%50 = OpVariable %49 UniformConstant
%51 = OpTypeImage %14 Buffer 0 0 0 2 Unknown
%52 = OpTypeRuntimeArray %51
%53 = OpTypePointer UniformConstant %52
%54 = OpVariable %53 UniformConstant
%55 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%56 = OpTypeRuntimeArray %55
%57 = OpTypePointer UniformConstant %56
%58 = OpVariable %57 UniformConstant
%59 = OpTypeSampler
%60 = OpTypeRuntimeArray %59
%61 = OpTypePointer UniformConstant %60
%62 = OpVariable %61 UniformConstant
%63 = OpTypeVector %9 2
%64 = OpTypePointer Input %63
%65 = OpVariable %64 Input
%66 = OpTypeVector %9 4
%67 = OpTypePointer Output %66
%68 = OpVariable %67 Output
%69 = OpTypeVector %14 4
%70 = OpTypePointer Output %69
%71 = OpVariable %70 Output
%72 = OpTypeVector %5 4
%73 = OpTypePointer Output %72
%74 = OpVariable %73 Output
%75 = OpTypePointer UniformConstant %55
%77 = OpTypePointer PushConstant %5
%79 = OpConstant %5 4
%82 = OpConstant %5 5
%84 = OpTypePointer UniformConstant %51
%90 = OpTypePointer UniformConstant %47
%95 = OpConstant %5 3
%97 = OpTypePointer UniformConstant %43
%102 = OpConstant %5 2
%104 = OpTypePointer UniformConstant %39
%109 = OpConstant %5 1
%111 = OpTypePointer UniformConstant %35
%116 = OpTypePointer UniformConstant %31
%122 = OpTypePointer UniformConstant %27
%128 = OpTypePointer UniformConstant %23
%134 = OpTypePointer UniformConstant %19
%137 = OpConstant %5 0
%141 = OpTypePointer UniformConstant %15
%147 = OpTypePointer UniformConstant %10
%152 = OpTypePointer UniformConstant %59
%162 = OpTypePointer Input %9
%171 = OpTypeVector %5 2
%174 = OpTypeInt 16 0
%178 = OpTypeVector %174 4
%186 = OpConstant %9 8
%188 = OpConstant %174 65516
%191 = OpConstant %174 80
%194 = OpTypeSampledImage %10
%196 = OpConstant %9 0
%210 = OpConstant %5 6
%228 = OpTypeSampledImage %15
%241 = OpTypeSampledImage %19
%254 = OpTypeImage %9 2D 1 0 0 1 Unknown
%255 = OpTypeSampledImage %254
%257 = OpConstant %9 0.5
%294 = OpConstant %9 0.200000003
%295 = OpConstant %9 0.300000012
%296 = OpConstant %9 0.400000006
%356 = OpTypePointer Output %9
%361 = OpTypePointer Output %14
%370 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %379
%379 = OpLabel
%78 = OpAccessChain %77 %8 %79
%80 = OpLoad %5 %78
%81 = OpIAdd %5 %80 %82
%76 = OpAccessChain %75 %58 %81
%83 = OpLoad %55 %76
%86 = OpAccessChain %77 %8 %79
%87 = OpLoad %5 %86
%88 = OpIAdd %5 %87 %79
%85 = OpAccessChain %84 %54 %88
%89 = OpLoad %51 %85
%92 = OpAccessChain %77 %8 %79
%93 = OpLoad %5 %92
%94 = OpIAdd %5 %93 %95
%91 = OpAccessChain %90 %50 %94
%96 = OpLoad %47 %91
%99 = OpAccessChain %77 %8 %95
%100 = OpLoad %5 %99
%101 = OpIAdd %5 %100 %102
%98 = OpAccessChain %97 %46 %101
%103 = OpLoad %43 %98
%106 = OpAccessChain %77 %8 %95
%107 = OpLoad %5 %106
%108 = OpIAdd %5 %107 %109
%105 = OpAccessChain %104 %42 %108
%110 = OpLoad %39 %105
%113 = OpAccessChain %77 %8 %95
%114 = OpLoad %5 %113
%112 = OpAccessChain %111 %38 %114
%115 = OpLoad %35 %112
%118 = OpAccessChain %77 %8 %109
%119 = OpLoad %5 %118
%120 = OpIAdd %5 %119 %82
%117 = OpAccessChain %116 %34 %120
%121 = OpLoad %31 %117
%124 = OpAccessChain %77 %8 %109
%125 = OpLoad %5 %124
%126 = OpIAdd %5 %125 %79
%123 = OpAccessChain %122 %30 %126
%127 = OpLoad %27 %123
%130 = OpAccessChain %77 %8 %109
%131 = OpLoad %5 %130
%132 = OpIAdd %5 %131 %95
%129 = OpAccessChain %128 %26 %132
%133 = OpLoad %23 %129
%136 = OpAccessChain %77 %8 %137
%138 = OpLoad %5 %136
%139 = OpIAdd %5 %138 %102
%135 = OpAccessChain %134 %22 %139
%140 = OpLoad %19 %135
%143 = OpAccessChain %77 %8 %137
%144 = OpLoad %5 %143
%145 = OpIAdd %5 %144 %109
%142 = OpAccessChain %141 %18 %145
%146 = OpLoad %15 %142
%149 = OpAccessChain %77 %8 %137
%150 = OpLoad %5 %149
%148 = OpAccessChain %147 %13 %150
%151 = OpLoad %10 %148
%154 = OpAccessChain %77 %8 %102
%155 = OpLoad %5 %154
%156 = OpIAdd %5 %155 %109
%153 = OpAccessChain %152 %62 %156
%157 = OpLoad %59 %153
%159 = OpAccessChain %77 %8 %102
%160 = OpLoad %5 %159
%158 = OpAccessChain %152 %62 %160
%161 = OpLoad %59 %158
%163 = OpAccessChain %162 %65 %137
%164 = OpLoad %9 %163
%165 = OpAccessChain %162 %65 %109
%166 = OpLoad %9 %165
%167 = OpCopyObject %9 %164
%168 = OpCopyObject %9 %166
%169 = OpConvertFToS %5 %164
%170 = OpConvertFToS %5 %166
%172 = OpCompositeConstruct %171 %169 %170
%173 = OpCompositeConstruct %66 %167 %168 %167 %168
OpImageWrite %115 %172 %173
%175 = OpConvertFToS %174 %164
%176 = OpConvertFToS %174 %166
%177 = OpCompositeConstruct %171 %169 %170
%179 = OpCompositeConstruct %178 %175 %176 %175 %176
%180 = OpSConvert %69 %179
OpImageWrite %110 %177 %180
%181 = OpConvertFToU %174 %164
%182 = OpConvertFToU %174 %166
%183 = OpCompositeConstruct %171 %169 %170
%184 = OpCompositeConstruct %178 %181 %182 %181 %182
%185 = OpUConvert %72 %184
OpImageWrite %103 %183 %185
%187 = OpCompositeConstruct %66 %186 %186 %186 %186
OpImageWrite %96 %169 %187
%189 = OpCompositeConstruct %178 %188 %188 %188 %188
%190 = OpSConvert %69 %189
OpImageWrite %89 %169 %190
%192 = OpCompositeConstruct %178 %191 %191 %191 %191
%193 = OpUConvert %72 %192
OpImageWrite %83 %169 %193
%195 = OpSampledImage %194 %151 %161
%198 = OpCompositeConstruct %63 %164 %166
%197 = OpImageSampleImplicitLod %66 %195 %198 None
%199 = OpCompositeExtract %9 %197 0
%200 = OpCompositeExtract %9 %197 1
%201 = OpCompositeExtract %9 %197 2
%202 = OpCompositeExtract %9 %197 3
%204 = OpCompositeConstruct %171 %109 %102
%203 = OpImageFetch %69 %146 %204 Lod %95
%205 = OpSConvert %178 %203
%206 = OpCompositeExtract %174 %205 0
%207 = OpCompositeExtract %174 %205 1
%208 = OpCompositeExtract %174 %205 2
%209 = OpCompositeExtract %174 %205 3
%212 = OpCompositeConstruct %171 %79 %82
%211 = OpImageFetch %72 %140 %212 Lod %210
%213 = OpUConvert %178 %211
%214 = OpCompositeExtract %174 %213 0
%215 = OpCompositeExtract %174 %213 1
%216 = OpCompositeExtract %174 %213 2
%217 = OpCompositeExtract %174 %213 3
%218 = OpCompositeConstruct %63 %164 %166
%219 = OpImageGather %66 %195 %218 %137
%220 = OpCompositeExtract %9 %219 0
%221 = OpCompositeExtract %9 %219 1
%222 = OpCompositeExtract %9 %219 2
%223 = OpCompositeExtract %9 %219 3
%224 = OpFAdd %9 %220 %199
%225 = OpFAdd %9 %221 %200
%226 = OpFAdd %9 %222 %201
%227 = OpFAdd %9 %223 %202
%229 = OpSampledImage %228 %146 %161
%230 = OpCompositeConstruct %63 %164 %166
%231 = OpImageGather %69 %229 %230 %109
%232 = OpSConvert %178 %231
%233 = OpCompositeExtract %174 %232 0
%234 = OpCompositeExtract %174 %232 1
%235 = OpCompositeExtract %174 %232 2
%236 = OpCompositeExtract %174 %232 3
%237 = OpIAdd %174 %233 %206
%238 = OpIAdd %174 %234 %207
%239 = OpIAdd %174 %235 %208
%240 = OpIAdd %174 %236 %209
%242 = OpSampledImage %241 %140 %161
%243 = OpCompositeConstruct %63 %164 %166
%244 = OpImageGather %72 %242 %243 %102
%245 = OpUConvert %178 %244
%246 = OpCompositeExtract %174 %245 0
%247 = OpCompositeExtract %174 %245 1
%248 = OpCompositeExtract %174 %245 2
%249 = OpCompositeExtract %174 %245 3
%250 = OpIAdd %174 %246 %214
%251 = OpIAdd %174 %247 %215
%252 = OpIAdd %174 %248 %216
%253 = OpIAdd %174 %249 %217
%256 = OpSampledImage %255 %151 %157
%259 = OpCompositeConstruct %63 %164 %166
%258 = OpImageSampleDrefImplicitLod %9 %256 %259 %257 None
%260 = OpCompositeConstruct %66 %258 %258 %258 %258
%261 = OpCompositeExtract %9 %260 0
%262 = OpFAdd %9 %261 %224
%263 = OpFAdd %9 %261 %225
%264 = OpFAdd %9 %261 %226
%265 = OpFAdd %9 %261 %227
%266 = OpCopyObject %9 %262
%267 = OpCopyObject %9 %263
%268 = OpCopyObject %9 %264
%269 = OpCopyObject %9 %265
%271 = OpCompositeConstruct %63 %164 %166
%270 = OpImageSampleDrefExplicitLod %9 %256 %271 %257 Lod %196
%272 = OpCompositeConstruct %66 %270 %270 %270 %270
%273 = OpCompositeExtract %9 %272 0
%274 = OpFAdd %9 %266 %273
%275 = OpFAdd %9 %267 %273
%276 = OpFAdd %9 %268 %273
%277 = OpFAdd %9 %269 %273
%278 = OpCopyObject %9 %274
%279 = OpCopyObject %9 %275
%280 = OpCopyObject %9 %276
%281 = OpCopyObject %9 %277
%282 = OpCompositeConstruct %63 %164 %166
%283 = OpImageDrefGather %66 %256 %282 %257
%284 = OpCompositeExtract %9 %283 0
%285 = OpCompositeExtract %9 %283 1
%286 = OpCompositeExtract %9 %283 2
%287 = OpCompositeExtract %9 %283 3
%289 = OpCompositeConstruct %63 %164 %166
%288 = OpImageSampleExplicitLod %66 %195 %289 Lod %196
%290 = OpCompositeExtract %9 %288 0
%291 = OpCompositeExtract %9 %288 1
%292 = OpCompositeExtract %9 %288 2
%293 = OpCompositeExtract %9 %288 3
%298 = OpCompositeConstruct %63 %164 %166
%299 = OpCompositeConstruct %63 %294 %295
%300 = OpCompositeConstruct %63 %296 %257
%297 = OpImageSampleExplicitLod %66 %195 %298 Grad %299 %300
%301 = OpCompositeExtract %9 %297 0
%302 = OpCompositeExtract %9 %297 1
%303 = OpCompositeExtract %9 %297 2
%304 = OpCompositeExtract %9 %297 3
%306 = OpCompositeConstruct %63 %164 %166
%305 = OpImageSampleImplicitLod %66 %195 %306 Bias %257
%307 = OpCompositeExtract %9 %305 0
%308 = OpCompositeExtract %9 %305 1
%309 = OpCompositeExtract %9 %305 2
%310 = OpCompositeExtract %9 %305 3
%311 = OpImageFetch %66 %133 %169
%312 = OpCompositeExtract %9 %311 0
%313 = OpCompositeExtract %9 %311 1
%314 = OpCompositeExtract %9 %311 2
%315 = OpCompositeExtract %9 %311 3
%316 = OpFAdd %9 %290 %284
%317 = OpFAdd %9 %316 %301
%318 = OpFAdd %9 %317 %278
%319 = OpFAdd %9 %318 %307
%320 = OpFAdd %9 %319 %312
%321 = OpFAdd %9 %291 %285
%322 = OpFAdd %9 %321 %302
%323 = OpFAdd %9 %322 %279
%324 = OpFAdd %9 %323 %308
%325 = OpFAdd %9 %324 %313
%326 = OpFAdd %9 %292 %286
%327 = OpFAdd %9 %326 %303
%328 = OpFAdd %9 %327 %280
%329 = OpFAdd %9 %328 %309
%330 = OpFAdd %9 %329 %314
%331 = OpFAdd %9 %293 %287
%332 = OpFAdd %9 %331 %304
%333 = OpFAdd %9 %332 %281
%334 = OpFAdd %9 %333 %310
%335 = OpFAdd %9 %334 %315
%336 = OpImageFetch %69 %127 %169
%337 = OpSConvert %178 %336
%338 = OpCompositeExtract %174 %337 0
%339 = OpCompositeExtract %174 %337 1
%340 = OpCompositeExtract %174 %337 2
%341 = OpCompositeExtract %174 %337 3
%342 = OpIAdd %174 %237 %338
%343 = OpIAdd %174 %238 %339
%344 = OpIAdd %174 %239 %340
%345 = OpIAdd %174 %240 %341
%346 = OpImageFetch %72 %121 %169
%347 = OpUConvert %178 %346
%348 = OpCompositeExtract %174 %347 0
%349 = OpCompositeExtract %174 %347 1
%350 = OpCompositeExtract %174 %347 2
%351 = OpCompositeExtract %174 %347 3
%352 = OpIAdd %174 %250 %348
%353 = OpIAdd %174 %251 %349
%354 = OpIAdd %174 %252 %350
%355 = OpIAdd %174 %253 %351
%357 = OpAccessChain %356 %68 %137
OpStore %357 %320
%358 = OpAccessChain %356 %68 %109
OpStore %358 %325
%359 = OpAccessChain %356 %68 %102
OpStore %359 %330
%360 = OpAccessChain %356 %68 %95
OpStore %360 %335
%362 = OpAccessChain %361 %71 %137
%363 = OpSConvert %14 %342
OpStore %362 %363
%364 = OpAccessChain %361 %71 %109
%365 = OpSConvert %14 %343
OpStore %364 %365
%366 = OpAccessChain %361 %71 %102
%367 = OpSConvert %14 %344
OpStore %366 %367
%368 = OpAccessChain %361 %71 %95
%369 = OpSConvert %14 %345
OpStore %368 %369
%371 = OpAccessChain %370 %74 %137
%372 = OpUConvert %5 %352
OpStore %371 %372
%373 = OpAccessChain %370 %74 %109
%374 = OpUConvert %5 %353
OpStore %373 %374
%375 = OpAccessChain %370 %74 %102
%376 = OpUConvert %5 %354
OpStore %375 %376
%377 = OpAccessChain %370 %74 %95
%378 = OpUConvert %5 %355
OpStore %377 %378
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit.sm60.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform mediump texture2D _8;
layout(set = 0, binding = 1) uniform mediump itexture2D _12;
layout(set = 0, binding = 2) uniform mediump utexture2D _16;
layout(set = 0, binding = 3) uniform mediump samplerBuffer _19;
layout(set = 0, binding = 4) uniform mediump isamplerBuffer _22;
layout(set = 0, binding = 5) uniform mediump usamplerBuffer _25;
layout(set = 0, binding = 0) uniform writeonly mediump image2D _28;
layout(set = 0, binding = 1) uniform writeonly mediump iimage2D _31;
layout(set = 0, binding = 2) uniform writeonly mediump uimage2D _34;
layout(set = 0, binding = 3) uniform writeonly mediump imageBuffer _37;
layout(set = 0, binding = 4) uniform writeonly mediump iimageBuffer _40;
layout(set = 0, binding = 5) uniform writeonly mediump uimageBuffer _43;
layout(set = 0, binding = 0) uniform sampler _46;
layout(set = 0, binding = 1) uniform samplerShadow _47;

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;

void main()
{
    mediump float _81 = UV.x;
    mediump float _82 = UV.y;
    uint _83 = uint(int(UV.x));
    uint _84 = uint(int(UV.y));
    imageStore(_28, ivec2(uvec2(_83, _84)), vec4(_81, _82, _81, _82));
    uint16_t _89 = uint16_t(int16_t(UV.x));
    uint16_t _90 = uint16_t(int16_t(UV.y));
    imageStore(_31, ivec2(uvec2(_83, _84)), ivec4(i16vec4(u16vec4(_89, _90, _89, _90))));
    uint16_t _95 = uint16_t(UV.x);
    uint16_t _96 = uint16_t(UV.y);
    imageStore(_34, ivec2(uvec2(_83, _84)), uvec4(u16vec4(_95, _96, _95, _96)));
    imageStore(_37, int(_83), vec4(8.0));
    imageStore(_40, int(_83), ivec4(i16vec4(u16vec4(65516us))));
    imageStore(_43, int(_83), uvec4(u16vec4(80us)));
    mediump vec4 _111 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y));
    u16vec4 _121 = u16vec4(texelFetch(_12, ivec2(uvec2(1u, 2u)), int(3u)));
    u16vec4 _131 = u16vec4(texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u)));
    mediump vec4 _137 = textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y));
    mediump float _142 = _137.x + _111.x;
    float hp_copy_142 = _142;
    mediump float _143 = _137.y + _111.y;
    float hp_copy_143 = _143;
    mediump float _144 = _137.z + _111.z;
    float hp_copy_144 = _144;
    mediump float _145 = _137.w + _111.w;
    float hp_copy_145 = _145;
    u16vec4 _150 = u16vec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u)));
    u16vec4 _163 = u16vec4(textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u)));
    mediump float _179 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))).x;
    float hp_copy_179 = _179;
    mediump float _184 = hp_copy_179 + hp_copy_142;
    float hp_copy_184 = _184;
    mediump float _185 = hp_copy_179 + hp_copy_143;
    float hp_copy_185 = _185;
    mediump float _186 = hp_copy_179 + hp_copy_144;
    float hp_copy_186 = _186;
    mediump float _187 = hp_copy_179 + hp_copy_145;
    float hp_copy_187 = _187;
    mediump float _191 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)).x;
    float hp_copy_191 = _191;
    mediump float _196 = hp_copy_184 + hp_copy_191;
    mediump float _197 = hp_copy_185 + hp_copy_191;
    mediump float _198 = hp_copy_186 + hp_copy_191;
    mediump float _199 = hp_copy_187 + hp_copy_191;
    vec2 _200 = vec2(UV.x, UV.y);
    mediump vec4 _201 = textureGather(sampler2DShadow(_8, _47), _200, 0.5);
    mediump vec4 _206 = textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0);
    mediump vec4 _215 = textureGrad(sampler2D(_8, _46), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5));
    mediump vec4 _223 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.5);
    mediump vec4 _229 = texelFetch(_19, int(_83));
    u16vec4 _255 = u16vec4(texelFetch(_22, int(_83)));
    u16vec4 _265 = u16vec4(texelFetch(_25, int(_83)));
    SV_Target.x = ((((_206.x + _201.x) + _215.x) + _196) + _223.x) + _229.x;
    SV_Target.y = ((((_206.y + _201.y) + _215.y) + _197) + _223.y) + _229.y;
    SV_Target.z = ((((_206.z + _201.z) + _215.z) + _198) + _223.z) + _229.z;
    SV_Target.w = ((((_206.w + _201.w) + _215.w) + _199) + _223.w) + _229.w;
    SV_Target_1.x = int(int16_t((_150.x + _121.x) + _255.x));
    SV_Target_1.y = int(int16_t((_150.y + _121.y) + _255.y));
    SV_Target_1.z = int(int16_t((_150.z + _121.z) + _255.z));
    SV_Target_1.w = int(int16_t((_150.w + _121.w) + _255.w));
    SV_Target_2.x = uint((_163.x + _131.x) + _265.x);
    SV_Target_2.y = uint((_163.y + _131.y) + _265.y);
    SV_Target_2.z = uint((_163.z + _131.z) + _265.z);
    SV_Target_2.w = uint((_163.w + _131.w) + _265.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 299
; Schema: 0
OpCapability Shader
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %50 %53 %56 %59
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %50 "UV"
OpName %53 "SV_Target"
OpName %56 "SV_Target_1"
OpName %59 "SV_Target_2"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 RelaxedPrecision
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 RelaxedPrecision
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 2
OpDecorate %19 RelaxedPrecision
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 3
OpDecorate %22 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 4
OpDecorate %25 RelaxedPrecision
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 5
OpDecorate %28 RelaxedPrecision
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %31 RelaxedPrecision
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %31 NonReadable
OpDecorate %34 RelaxedPrecision
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 2
OpDecorate %34 NonReadable
OpDecorate %37 RelaxedPrecision
OpDecorate %37 DescriptorSet 0
OpDecorate %37 Binding 3
OpDecorate %37 NonReadable
OpDecorate %40 RelaxedPrecision
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 4
OpDecorate %40 NonReadable
OpDecorate %43 RelaxedPrecision
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 5
OpDecorate %43 NonReadable
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 0
OpDecorate %47 DescriptorSet 0
OpDecorate %47 Binding 1
OpDecorate %50 Location 0
OpDecorate %53 RelaxedPrecision
OpDecorate %53 Location 0
OpDecorate %56 RelaxedPrecision
OpDecorate %56 Location 1
OpDecorate %59 RelaxedPrecision
OpDecorate %59 Location 2
OpDecorate %81 RelaxedPrecision
OpDecorate %82 RelaxedPrecision
OpDecorate %111 RelaxedPrecision
OpDecorate %113 RelaxedPrecision
OpDecorate %114 RelaxedPrecision
OpDecorate %115 RelaxedPrecision
OpDecorate %116 RelaxedPrecision
OpDecorate %119 RelaxedPrecision
OpDecorate %129 RelaxedPrecision
OpDecorate %137 RelaxedPrecision
OpDecorate %138 RelaxedPrecision
OpDecorate %139 RelaxedPrecision
OpDecorate %140 RelaxedPrecision
OpDecorate %141 RelaxedPrecision
OpDecorate %142 RelaxedPrecision
OpDecorate %143 RelaxedPrecision
OpDecorate %144 RelaxedPrecision
OpDecorate %145 RelaxedPrecision
OpDecorate %149 RelaxedPrecision
OpDecorate %162 RelaxedPrecision
OpDecorate %184 RelaxedPrecision
OpDecorate %185 RelaxedPrecision
OpDecorate %186 RelaxedPrecision
OpDecorate %187 RelaxedPrecision
OpDecorate %196 RelaxedPrecision
OpDecorate %197 RelaxedPrecision
OpDecorate %198 RelaxedPrecision
OpDecorate %199 RelaxedPrecision
OpDecorate %201 RelaxedPrecision
OpDecorate %202 RelaxedPrecision
OpDecorate %203 RelaxedPrecision
OpDecorate %204 RelaxedPrecision
OpDecorate %205 RelaxedPrecision
OpDecorate %206 RelaxedPrecision
OpDecorate %208 RelaxedPrecision
OpDecorate %209 RelaxedPrecision
OpDecorate %210 RelaxedPrecision
OpDecorate %211 RelaxedPrecision
OpDecorate %215 RelaxedPrecision
OpDecorate %219 RelaxedPrecision
OpDecorate %220 RelaxedPrecision
OpDecorate %221 RelaxedPrecision
OpDecorate %222 RelaxedPrecision
OpDecorate %223 RelaxedPrecision
OpDecorate %225 RelaxedPrecision
OpDecorate %226 RelaxedPrecision
OpDecorate %227 RelaxedPrecision
OpDecorate %228 RelaxedPrecision
OpDecorate %229 RelaxedPrecision
OpDecorate %230 RelaxedPrecision
OpDecorate %231 RelaxedPrecision
OpDecorate %232 RelaxedPrecision
OpDecorate %233 RelaxedPrecision
OpDecorate %234 RelaxedPrecision
OpDecorate %235 RelaxedPrecision
OpDecorate %236 RelaxedPrecision
OpDecorate %237 RelaxedPrecision
OpDecorate %238 RelaxedPrecision
OpDecorate %239 RelaxedPrecision
OpDecorate %240 RelaxedPrecision
OpDecorate %241 RelaxedPrecision
OpDecorate %242 RelaxedPrecision
OpDecorate %243 RelaxedPrecision
OpDecorate %244 RelaxedPrecision
OpDecorate %245 RelaxedPrecision
OpDecorate %246 RelaxedPrecision
OpDecorate %247 RelaxedPrecision
OpDecorate %248 RelaxedPrecision
OpDecorate %249 RelaxedPrecision
OpDecorate %250 RelaxedPrecision
OpDecorate %251 RelaxedPrecision
OpDecorate %252 RelaxedPrecision
OpDecorate %253 RelaxedPrecision
OpDecorate %254 RelaxedPrecision
OpDecorate %264 RelaxedPrecision
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 1
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpTypeImage %13 2D 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeImage %5 2D 0 0 0 2 Unknown
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeImage %9 2D 0 0 0 2 Unknown
%30 = OpTypePointer UniformConstant %29
%31 = OpVariable %30 UniformConstant
%32 = OpTypeImage %13 2D 0 0 0 2 Unknown
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%36 = OpTypePointer UniformConstant %35
%37 = OpVariable %36 UniformConstant
%38 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeImage %13 Buffer 0 0 0 2 Unknown
%42 = OpTypePointer UniformConstant %41
%43 = OpVariable %42 UniformConstant
%44 = OpTypeSampler
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpVariable %45 UniformConstant
%48 = OpTypeVector %5 2
%49 = OpTypePointer Input %48
%50 = OpVariable %49 Input
%51 = OpTypeVector %5 4
%52 = OpTypePointer Output %51
%53 = OpVariable %52 Output
%54 = OpTypeVector %9 4
%55 = OpTypePointer Output %54
%56 = OpVariable %55 Output
%57 = OpTypeVector %13 4
%58 = OpTypePointer Output %57
%59 = OpVariable %58 Output
%74 = OpTypePointer Input %5
%76 = OpConstant %13 0
%79 = OpConstant %13 1
%85 = OpTypeVector %13 2
%88 = OpTypeInt 16 0
%92 = OpTypeVector %88 4
%100 = OpConstant %5 8
%102 = OpConstant %88 65516
%105 = OpConstant %88 80
%108 = OpTypeSampledImage %6
%110 = OpConstant %5 0
%117 = OpConstant %13 3
%118 = OpConstant %13 2
%126 = OpConstant %13 6
%127 = OpConstant %13 4
%128 = OpConstant %13 5
%146 = OpTypeSampledImage %10
%159 = OpTypeSampledImage %14
%172 = OpTypeImage %5 2D 1 0 0 1 Unknown
%173 = OpTypeSampledImage %172
%175 = OpConstant %5 0.5
%212 = OpConstant %5 0.200000003
%213 = OpConstant %5 0.300000012
%214 = OpConstant %5 0.400000006
%274 = OpTypePointer Output %5
%279 = OpTypePointer Output %9
%288 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %297
%297 = OpLabel
%60 = OpLoad %41 %43
%61 = OpLoad %38 %40
%62 = OpLoad %35 %37
%63 = OpLoad %32 %34
%64 = OpLoad %29 %31
%65 = OpLoad %26 %28
%66 = OpLoad %23 %25
%67 = OpLoad %20 %22
%68 = OpLoad %17 %19
%69 = OpLoad %14 %16
%70 = OpLoad %10 %12
%71 = OpLoad %6 %8
%72 = OpLoad %44 %47
%73 = OpLoad %44 %46
%75 = OpAccessChain %74 %50 %76
%77 = OpLoad %5 %75
%78 = OpAccessChain %74 %50 %79
%80 = OpLoad %5 %78
%81 = OpCopyObject %5 %77
%82 = OpCopyObject %5 %80
%83 = OpConvertFToS %13 %77
%84 = OpConvertFToS %13 %80
%86 = OpCompositeConstruct %85 %83 %84
%87 = OpCompositeConstruct %51 %81 %82 %81 %82
OpImageWrite %65 %86 %87
%89 = OpConvertFToS %88 %77
%90 = OpConvertFToS %88 %80
%91 = OpCompositeConstruct %85 %83 %84
%93 = OpCompositeConstruct %92 %89 %90 %89 %90
%94 = OpSConvert %54 %93
OpImageWrite %64 %91 %94
%95 = OpConvertFToU %88 %77
%96 = OpConvertFToU %88 %80
%97 = OpCompositeConstruct %85 %83 %84
%98 = OpCompositeConstruct %92 %95 %96 %95 %96
%99 = OpUConvert %57 %98
OpImageWrite %63 %97 %99
%101 = OpCompositeConstruct %51 %100 %100 %100 %100
OpImageWrite %62 %83 %101
%103 = OpCompositeConstruct %92 %102 %102 %102 %102
%104 = OpSConvert %54 %103
OpImageWrite %61 %83 %104
%106 = OpCompositeConstruct %92 %105 %105 %105 %105
%107 = OpUConvert %57 %106
OpImageWrite %60 %83 %107
%109 = OpSampledImage %108 %71 %73
%112 = OpCompositeConstruct %48 %77 %80
%111 = OpImageSampleImplicitLod %51 %109 %112 None
%113 = OpCompositeExtract %5 %111 0
%114 = OpCompositeExtract %5 %111 1
%115 = OpCompositeExtract %5 %111 2
%116 = OpCompositeExtract %5 %111 3
%120 = OpCompositeConstruct %85 %79 %118
%119 = OpImageFetch %54 %70 %120 Lod %117
%121 = OpSConvert %92 %119
%122 = OpCompositeExtract %88 %121 0
%123 = OpCompositeExtract %88 %121 1
%124 = OpCompositeExtract %88 %121 2
%125 = OpCompositeExtract %88 %121 3
%130 = OpCompositeConstruct %85 %127 %128
%129 = OpImageFetch %57 %69 %130 Lod %126
%131 = OpUConvert %92 %129
%132 = OpCompositeExtract %88 %131 0
%133 = OpCompositeExtract %88 %131 1
%134 = OpCompositeExtract %88 %131 2
%135 = OpCompositeExtract %88 %131 3
%136 = OpCompositeConstruct %48 %77 %80
%137 = OpImageGather %51 %109 %136 %76
%138 = OpCompositeExtract %5 %137 0
%139 = OpCompositeExtract %5 %137 1
%140 = OpCompositeExtract %5 %137 2
%141 = OpCompositeExtract %5 %137 3
%142 = OpFAdd %5 %138 %113
%143 = OpFAdd %5 %139 %114
%144 = OpFAdd %5 %140 %115
%145 = OpFAdd %5 %141 %116
%147 = OpSampledImage %146 %70 %73
%148 = OpCompositeConstruct %48 %77 %80
%149 = OpImageGather %54 %147 %148 %79
%150 = OpSConvert %92 %149
%151 = OpCompositeExtract %88 %150 0
%152 = OpCompositeExtract %88 %150 1
%153 = OpCompositeExtract %88 %150 2
%154 = OpCompositeExtract %88 %150 3
%155 = OpIAdd %88 %151 %122
%156 = OpIAdd %88 %152 %123
%157 = OpIAdd %88 %153 %124
%158 = OpIAdd %88 %154 %125
%160 = OpSampledImage %159 %69 %73
%161 = OpCompositeConstruct %48 %77 %80
%162 = OpImageGather %57 %160 %161 %118
%163 = OpUConvert %92 %162
%164 = OpCompositeExtract %88 %163 0
%165 = OpCompositeExtract %88 %163 1
%166 = OpCompositeExtract %88 %163 2
%167 = OpCompositeExtract %88 %163 3
%168 = OpIAdd %88 %164 %132
%169 = OpIAdd %88 %165 %133
%170 = OpIAdd %88 %166 %134
%171 = OpIAdd %88 %167 %135
%174 = OpSampledImage %173 %71 %72
%177 = OpCompositeConstruct %48 %77 %80
%176 = OpImageSampleDrefImplicitLod %5 %174 %177 %175 None
%178 = OpCompositeConstruct %51 %176 %176 %176 %176
%179 = OpCompositeExtract %5 %178 0
%180 = OpFAdd %5 %179 %142
%181 = OpFAdd %5 %179 %143
%182 = OpFAdd %5 %179 %144
%183 = OpFAdd %5 %179 %145
%184 = OpCopyObject %5 %180
%185 = OpCopyObject %5 %181
%186 = OpCopyObject %5 %182
%187 = OpCopyObject %5 %183
%189 = OpCompositeConstruct %48 %77 %80
%188 = OpImageSampleDrefExplicitLod %5 %174 %189 %175 Lod %110
%190 = OpCompositeConstruct %51 %188 %188 %188 %188
%191 = OpCompositeExtract %5 %190 0
%192 = OpFAdd %5 %184 %191
%193 = OpFAdd %5 %185 %191
%194 = OpFAdd %5 %186 %191
%195 = OpFAdd %5 %187 %191
%196 = OpCopyObject %5 %192
%197 = OpCopyObject %5 %193
%198 = OpCopyObject %5 %194
%199 = OpCopyObject %5 %195
%200 = OpCompositeConstruct %48 %77 %80
%201 = OpImageDrefGather %51 %174 %200 %175
%202 = OpCompositeExtract %5 %201 0
%203 = OpCompositeExtract %5 %201 1
%204 = OpCompositeExtract %5 %201 2
%205 = OpCompositeExtract %5 %201 3
%207 = OpCompositeConstruct %48 %77 %80
%206 = OpImageSampleExplicitLod %51 %109 %207 Lod %110
%208 = OpCompositeExtract %5 %206 0
%209 = OpCompositeExtract %5 %206 1
%210 = OpCompositeExtract %5 %206 2
%211 = OpCompositeExtract %5 %206 3
%216 = OpCompositeConstruct %48 %77 %80
%217 = OpCompositeConstruct %48 %212 %213
%218 = OpCompositeConstruct %48 %214 %175
%215 = OpImageSampleExplicitLod %51 %109 %216 Grad %217 %218
%219 = OpCompositeExtract %5 %215 0
%220 = OpCompositeExtract %5 %215 1
%221 = OpCompositeExtract %5 %215 2
%222 = OpCompositeExtract %5 %215 3
%224 = OpCompositeConstruct %48 %77 %80
%223 = OpImageSampleImplicitLod %51 %109 %224 Bias %175
%225 = OpCompositeExtract %5 %223 0
%226 = OpCompositeExtract %5 %223 1
%227 = OpCompositeExtract %5 %223 2
%228 = OpCompositeExtract %5 %223 3
%229 = OpImageFetch %51 %68 %83
%230 = OpCompositeExtract %5 %229 0
%231 = OpCompositeExtract %5 %229 1
%232 = OpCompositeExtract %5 %229 2
%233 = OpCompositeExtract %5 %229 3
%234 = OpFAdd %5 %208 %202
%235 = OpFAdd %5 %234 %219
%236 = OpFAdd %5 %235 %196
%237 = OpFAdd %5 %236 %225
%238 = OpFAdd %5 %237 %230
%239 = OpFAdd %5 %209 %203
%240 = OpFAdd %5 %239 %220
%241 = OpFAdd %5 %240 %197
%242 = OpFAdd %5 %241 %226
%243 = OpFAdd %5 %242 %231
%244 = OpFAdd %5 %210 %204
%245 = OpFAdd %5 %244 %221
%246 = OpFAdd %5 %245 %198
%247 = OpFAdd %5 %246 %227
%248 = OpFAdd %5 %247 %232
%249 = OpFAdd %5 %211 %205
%250 = OpFAdd %5 %249 %222
%251 = OpFAdd %5 %250 %199
%252 = OpFAdd %5 %251 %228
%253 = OpFAdd %5 %252 %233
%254 = OpImageFetch %54 %67 %83
%255 = OpSConvert %92 %254
%256 = OpCompositeExtract %88 %255 0
%257 = OpCompositeExtract %88 %255 1
%258 = OpCompositeExtract %88 %255 2
%259 = OpCompositeExtract %88 %255 3
%260 = OpIAdd %88 %155 %256
%261 = OpIAdd %88 %156 %257
%262 = OpIAdd %88 %157 %258
%263 = OpIAdd %88 %158 %259
%264 = OpImageFetch %57 %66 %83
%265 = OpUConvert %92 %264
%266 = OpCompositeExtract %88 %265 0
%267 = OpCompositeExtract %88 %265 1
%268 = OpCompositeExtract %88 %265 2
%269 = OpCompositeExtract %88 %265 3
%270 = OpIAdd %88 %168 %266
%271 = OpIAdd %88 %169 %267
%272 = OpIAdd %88 %170 %268
%273 = OpIAdd %88 %171 %269
%275 = OpAccessChain %274 %53 %76
OpStore %275 %238
%276 = OpAccessChain %274 %53 %79
OpStore %276 %243
%277 = OpAccessChain %274 %53 %118
OpStore %277 %248
%278 = OpAccessChain %274 %53 %117
OpStore %278 %253
%280 = OpAccessChain %279 %56 %76
%281 = OpSConvert %9 %260
OpStore %280 %281
%282 = OpAccessChain %279 %56 %79
%283 = OpSConvert %9 %261
OpStore %282 %283
%284 = OpAccessChain %279 %56 %118
%285 = OpSConvert %9 %262
OpStore %284 %285
%286 = OpAccessChain %279 %56 %117
%287 = OpSConvert %9 %263
OpStore %286 %287
%289 = OpAccessChain %288 %59 %76
%290 = OpUConvert %13 %270
OpStore %289 %290
%291 = OpAccessChain %288 %59 %79
%292 = OpUConvert %13 %271
OpStore %291 %292
%293 = OpAccessChain %288 %59 %118
%294 = OpUConvert %13 %272
OpStore %293 %294
%295 = OpAccessChain %288 %59 %117
%296 = OpUConvert %13 %273
OpStore %295 %296
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform mediump texture2D _13[];
layout(set = 0, binding = 0) uniform mediump itexture2D _18[];
layout(set = 0, binding = 0) uniform mediump utexture2D _22[];
layout(set = 1, binding = 0) uniform mediump samplerBuffer _26[];
layout(set = 1, binding = 0) uniform mediump isamplerBuffer _30[];
layout(set = 1, binding = 0) uniform mediump usamplerBuffer _34[];
layout(set = 3, binding = 0) uniform writeonly mediump image2D _38[];
layout(set = 3, binding = 0) uniform writeonly mediump iimage2D _42[];
layout(set = 3, binding = 0) uniform writeonly mediump uimage2D _46[];
layout(set = 4, binding = 0) uniform writeonly mediump imageBuffer _50[];
layout(set = 4, binding = 0) uniform writeonly mediump iimageBuffer _54[];
layout(set = 4, binding = 0) uniform writeonly mediump uimageBuffer _58[];
layout(set = 2, binding = 0) uniform samplerShadow _62[];

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;

void main()
{
    uint _139 = registers._m0 + 2u;
    uint _145 = registers._m0 + 1u;
    float16_t _168 = float16_t(UV.x);
    float16_t _169 = float16_t(UV.y);
    uint _170 = uint(int(UV.x));
    uint _171 = uint(int(UV.y));
    imageStore(_38[registers._m3], ivec2(uvec2(_170, _171)), vec4(f16vec4(_168, _169, _168, _169)));
    uint16_t _178 = uint16_t(int16_t(UV.x));
    uint16_t _179 = uint16_t(int16_t(UV.y));
    imageStore(_42[registers._m3 + 1u], ivec2(uvec2(_170, _171)), ivec4(i16vec4(u16vec4(_178, _179, _178, _179))));
    uint16_t _184 = uint16_t(UV.x);
    uint16_t _185 = uint16_t(UV.y);
    imageStore(_46[registers._m3 + 2u], ivec2(uvec2(_170, _171)), uvec4(u16vec4(_184, _185, _184, _185)));
    imageStore(_50[registers._m4 + 3u], int(_170), vec4(f16vec4(float16_t(8.0))));
    imageStore(_54[registers._m4 + 4u], int(_170), ivec4(i16vec4(u16vec4(65516us))));
    imageStore(_58[registers._m4 + 5u], int(_170), uvec4(u16vec4(80us)));
    f16vec4 _203 = f16vec4(texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)));
    u16vec4 _210 = u16vec4(texelFetch(_18[_145], ivec2(uvec2(1u, 2u)), int(3u)));
    u16vec4 _218 = u16vec4(texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u)));
    f16vec4 _225 = f16vec4(textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)));
    u16vec4 _238 = u16vec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u)));
    u16vec4 _251 = u16vec4(textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u)));
    mediump vec4 _266 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5)));
    mediump float _267 = _266.x;
    mediump vec4 _282 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0));
    mediump float _283 = _282.x;
    vec2 _296 = vec2(UV.x, UV.y);
    f16vec4 _298 = f16vec4(textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _296, 0.5));
    f16vec4 _305 = f16vec4(textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0));
    f16vec4 _317 = f16vec4(textureGrad(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)));
    f16vec4 _324 = f16vec4(texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.5));
    f16vec4 _330 = f16vec4(texelFetch(_26[registers._m1 + 3u], int(_170)));
    u16vec4 _356 = u16vec4(texelFetch(_30[registers._m1 + 4u], int(_170)));
    u16vec4 _366 = u16vec4(texelFetch(_34[registers._m1 + 5u], int(_170)));
    SV_Target.x = float(((((_305.x + _298.x) + _317.x) + float16_t(float(float16_t(_267 + float(_225.x + _203.x))) + _283)) + _324.x) + _330.x);
    SV_Target.y = float(((((_305.y + _298.y) + _317.y) + float16_t(float(float16_t(_267 + float(_225.y + _203.y))) + _283)) + _324.y) + _330.y);
    SV_Target.z = float(((((_305.z + _298.z) + _317.z) + float16_t(float(float16_t(_267 + float(_225.z + _203.z))) + _283)) + _324.z) + _330.z);
    SV_Target.w = float(((((_305.w + _298.w) + _317.w) + float16_t(float(float16_t(_267 + float(_225.w + _203.w))) + _283)) + _324.w) + _330.w);
    SV_Target_1.x = int(int16_t((_238.x + _210.x) + _356.x));
    SV_Target_1.y = int(int16_t((_238.y + _210.y) + _356.y));
    SV_Target_1.z = int(int16_t((_238.z + _210.z) + _356.z));
    SV_Target_1.w = int(int16_t((_238.w + _210.w) + _356.w));
    SV_Target_2.x = uint((_251.x + _218.x) + _366.x);
    SV_Target_2.y = uint((_251.y + _218.y) + _366.y);
    SV_Target_2.z = uint((_251.z + _218.z) + _366.z);
    SV_Target_2.w = uint((_251.w + _218.w) + _366.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 404
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %65 %68 %71 %74
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %65 "UV"
OpName %68 "SV_Target"
OpName %71 "SV_Target_1"
OpName %74 "SV_Target_2"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %13 RelaxedPrecision
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %18 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 0
OpDecorate %22 RelaxedPrecision
OpDecorate %26 DescriptorSet 1
OpDecorate %26 Binding 0
OpDecorate %26 RelaxedPrecision
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 0
OpDecorate %30 RelaxedPrecision
OpDecorate %34 DescriptorSet 1
OpDecorate %34 Binding 0
OpDecorate %34 RelaxedPrecision
OpDecorate %38 DescriptorSet 3
OpDecorate %38 Binding 0
OpDecorate %38 RelaxedPrecision
OpDecorate %38 NonReadable
OpDecorate %42 DescriptorSet 3
OpDecorate %42 Binding 0
OpDecorate %42 RelaxedPrecision
OpDecorate %42 NonReadable
OpDecorate %46 DescriptorSet 3
OpDecorate %46 Binding 0
OpDecorate %46 RelaxedPrecision
OpDecorate %46 NonReadable
OpDecorate %50 DescriptorSet 4
OpDecorate %50 Binding 0
OpDecorate %50 RelaxedPrecision
OpDecorate %50 NonReadable
OpDecorate %54 DescriptorSet 4
OpDecorate %54 Binding 0
OpDecorate %54 RelaxedPrecision
OpDecorate %54 NonReadable
OpDecorate %58 DescriptorSet 4
OpDecorate %58 Binding 0
OpDecorate %58 RelaxedPrecision
OpDecorate %58 NonReadable
OpDecorate %62 DescriptorSet 2
OpDecorate %62 Binding 0
OpDecorate %65 Location 0
OpDecorate %68 RelaxedPrecision
OpDecorate %68 Location 0
OpDecorate %71 RelaxedPrecision
OpDecorate %71 Location 1
OpDecorate %74 RelaxedPrecision
OpDecorate %74 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeInt 32 1
%15 = OpTypeImage %14 2D 0 0 0 1 Unknown
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer UniformConstant %16
%18 = OpVariable %17 UniformConstant
%19 = OpTypeImage %5 2D 0 0 0 1 Unknown
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%24 = OpTypeRuntimeArray %23
%25 = OpTypePointer UniformConstant %24
%26 = OpVariable %25 UniformConstant
%27 = OpTypeImage %14 Buffer 0 0 0 1 Unknown
%28 = OpTypeRuntimeArray %27
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypeImage %9 2D 0 0 0 2 Unknown
%36 = OpTypeRuntimeArray %35
%37 = OpTypePointer UniformConstant %36
%38 = OpVariable %37 UniformConstant
%39 = OpTypeImage %14 2D 0 0 0 2 Unknown
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer UniformConstant %40
%42 = OpVariable %41 UniformConstant
%43 = OpTypeImage %5 2D 0 0 0 2 Unknown
%44 = OpTypeRuntimeArray %43
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%48 = OpTypeRuntimeArray %47
%49 = OpTypePointer UniformConstant %48
%50 = OpVariable %49 UniformConstant
%51 = OpTypeImage %14 Buffer 0 0 0 2 Unknown
%52 = OpTypeRuntimeArray %51
%53 = OpTypePointer UniformConstant %52
%54 = OpVariable %53 UniformConstant
%55 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%56 = OpTypeRuntimeArray %55
%57 = OpTypePointer UniformConstant %56
%58 = OpVariable %57 UniformConstant
%59 = OpTypeSampler
%60 = OpTypeRuntimeArray %59
%61 = OpTypePointer UniformConstant %60
%62 = OpVariable %61 UniformConstant
%63 = OpTypeVector %9 2
%64 = OpTypePointer Input %63
%65 = OpVariable %64 Input
%66 = OpTypeVector %9 4
%67 = OpTypePointer Output %66
%68 = OpVariable %67 Output
%69 = OpTypeVector %14 4
%70 = OpTypePointer Output %69
%71 = OpVariable %70 Output
%72 = OpTypeVector %5 4
%73 = OpTypePointer Output %72
%74 = OpVariable %73 Output
%75 = OpTypePointer UniformConstant %55
%77 = OpTypePointer PushConstant %5
%79 = OpConstant %5 4
%82 = OpConstant %5 5
%84 = OpTypePointer UniformConstant %51
%90 = OpTypePointer UniformConstant %47
%95 = OpConstant %5 3
%97 = OpTypePointer UniformConstant %43
%102 = OpConstant %5 2
%104 = OpTypePointer UniformConstant %39
%109 = OpConstant %5 1
%111 = OpTypePointer UniformConstant %35
%116 = OpTypePointer UniformConstant %31
%122 = OpTypePointer UniformConstant %27
%128 = OpTypePointer UniformConstant %23
%134 = OpTypePointer UniformConstant %19
%137 = OpConstant %5 0
%141 = OpTypePointer UniformConstant %15
%147 = OpTypePointer UniformConstant %10
%152 = OpTypePointer UniformConstant %59
%162 = OpTypePointer Input %9
%167 = OpTypeFloat 16
%172 = OpTypeVector %5 2
%174 = OpTypeVector %167 4
%177 = OpTypeInt 16 0
%181 = OpTypeVector %177 4
%189 = OpConstant %167 0x1p+3
%192 = OpConstant %177 65516
%195 = OpConstant %177 80
%198 = OpTypeSampledImage %10
%200 = OpConstant %9 0
%215 = OpConstant %5 6
%234 = OpTypeSampledImage %15
%247 = OpTypeSampledImage %19
%260 = OpTypeImage %9 2D 1 0 0 1 Unknown
%261 = OpTypeSampledImage %260
%263 = OpConstant %9 0.5
%310 = OpConstant %9 0.200000003
%311 = OpConstant %9 0.300000012
%312 = OpConstant %9 0.400000006
%375 = OpTypePointer Output %9
%384 = OpTypePointer Output %14
%393 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %402
%402 = OpLabel
%78 = OpAccessChain %77 %8 %79
%80 = OpLoad %5 %78
%81 = OpIAdd %5 %80 %82
%76 = OpAccessChain %75 %58 %81
%83 = OpLoad %55 %76
%86 = OpAccessChain %77 %8 %79
%87 = OpLoad %5 %86
%88 = OpIAdd %5 %87 %79
%85 = OpAccessChain %84 %54 %88
%89 = OpLoad %51 %85
%92 = OpAccessChain %77 %8 %79
%93 = OpLoad %5 %92
%94 = OpIAdd %5 %93 %95
%91 = OpAccessChain %90 %50 %94
%96 = OpLoad %47 %91
%99 = OpAccessChain %77 %8 %95
%100 = OpLoad %5 %99
%101 = OpIAdd %5 %100 %102
%98 = OpAccessChain %97 %46 %101
%103 = OpLoad %43 %98
%106 = OpAccessChain %77 %8 %95
%107 = OpLoad %5 %106
%108 = OpIAdd %5 %107 %109
%105 = OpAccessChain %104 %42 %108
%110 = OpLoad %39 %105
%113 = OpAccessChain %77 %8 %95
%114 = OpLoad %5 %113
%112 = OpAccessChain %111 %38 %114
%115 = OpLoad %35 %112
%118 = OpAccessChain %77 %8 %109
%119 = OpLoad %5 %118
%120 = OpIAdd %5 %119 %82
%117 = OpAccessChain %116 %34 %120
%121 = OpLoad %31 %117
%124 = OpAccessChain %77 %8 %109
%125 = OpLoad %5 %124
%126 = OpIAdd %5 %125 %79
%123 = OpAccessChain %122 %30 %126
%127 = OpLoad %27 %123
%130 = OpAccessChain %77 %8 %109
%131 = OpLoad %5 %130
%132 = OpIAdd %5 %131 %95
%129 = OpAccessChain %128 %26 %132
%133 = OpLoad %23 %129
%136 = OpAccessChain %77 %8 %137
%138 = OpLoad %5 %136
%139 = OpIAdd %5 %138 %102
%135 = OpAccessChain %134 %22 %139
%140 = OpLoad %19 %135
%143 = OpAccessChain %77 %8 %137
%144 = OpLoad %5 %143
%145 = OpIAdd %5 %144 %109
%142 = OpAccessChain %141 %18 %145
%146 = OpLoad %15 %142
%149 = OpAccessChain %77 %8 %137
%150 = OpLoad %5 %149
%148 = OpAccessChain %147 %13 %150
%151 = OpLoad %10 %148
%154 = OpAccessChain %77 %8 %102
%155 = OpLoad %5 %154
%156 = OpIAdd %5 %155 %109
%153 = OpAccessChain %152 %62 %156
%157 = OpLoad %59 %153
%159 = OpAccessChain %77 %8 %102
%160 = OpLoad %5 %159
%158 = OpAccessChain %152 %62 %160
%161 = OpLoad %59 %158
%163 = OpAccessChain %162 %65 %137
%164 = OpLoad %9 %163
%165 = OpAccessChain %162 %65 %109
%166 = OpLoad %9 %165
%168 = OpFConvert %167 %164
%169 = OpFConvert %167 %166
%170 = OpConvertFToS %5 %164
%171 = OpConvertFToS %5 %166
%173 = OpCompositeConstruct %172 %170 %171
%175 = OpCompositeConstruct %174 %168 %169 %168 %169
%176 = OpFConvert %66 %175
OpImageWrite %115 %173 %176
%178 = OpConvertFToS %177 %164
%179 = OpConvertFToS %177 %166
%180 = OpCompositeConstruct %172 %170 %171
%182 = OpCompositeConstruct %181 %178 %179 %178 %179
%183 = OpSConvert %69 %182
OpImageWrite %110 %180 %183
%184 = OpConvertFToU %177 %164
%185 = OpConvertFToU %177 %166
%186 = OpCompositeConstruct %172 %170 %171
%187 = OpCompositeConstruct %181 %184 %185 %184 %185
%188 = OpUConvert %72 %187
OpImageWrite %103 %186 %188
%190 = OpCompositeConstruct %174 %189 %189 %189 %189
%191 = OpFConvert %66 %190
OpImageWrite %96 %170 %191
%193 = OpCompositeConstruct %181 %192 %192 %192 %192
%194 = OpSConvert %69 %193
OpImageWrite %89 %170 %194
%196 = OpCompositeConstruct %181 %195 %195 %195 %195
%197 = OpUConvert %72 %196
OpImageWrite %83 %170 %197
%199 = OpSampledImage %198 %151 %161
%202 = OpCompositeConstruct %63 %164 %166
%201 = OpImageSampleImplicitLod %66 %199 %202 None
%203 = OpFConvert %174 %201
%204 = OpCompositeExtract %167 %203 0
%205 = OpCompositeExtract %167 %203 1
%206 = OpCompositeExtract %167 %203 2
%207 = OpCompositeExtract %167 %203 3
%209 = OpCompositeConstruct %172 %109 %102
%208 = OpImageFetch %69 %146 %209 Lod %95
%210 = OpSConvert %181 %208
%211 = OpCompositeExtract %177 %210 0
%212 = OpCompositeExtract %177 %210 1
%213 = OpCompositeExtract %177 %210 2
%214 = OpCompositeExtract %177 %210 3
%217 = OpCompositeConstruct %172 %79 %82
%216 = OpImageFetch %72 %140 %217 Lod %215
%218 = OpUConvert %181 %216
%219 = OpCompositeExtract %177 %218 0
%220 = OpCompositeExtract %177 %218 1
%221 = OpCompositeExtract %177 %218 2
%222 = OpCompositeExtract %177 %218 3
%223 = OpCompositeConstruct %63 %164 %166
%224 = OpImageGather %66 %199 %223 %137
%225 = OpFConvert %174 %224
%226 = OpCompositeExtract %167 %225 0
%227 = OpCompositeExtract %167 %225 1
%228 = OpCompositeExtract %167 %225 2
%229 = OpCompositeExtract %167 %225 3
%230 = OpFAdd %167 %226 %204
%231 = OpFAdd %167 %227 %205
%232 = OpFAdd %167 %228 %206
%233 = OpFAdd %167 %229 %207
%235 = OpSampledImage %234 %146 %161
%236 = OpCompositeConstruct %63 %164 %166
%237 = OpImageGather %69 %235 %236 %109
%238 = OpSConvert %181 %237
%239 = OpCompositeExtract %177 %238 0
%240 = OpCompositeExtract %177 %238 1
%241 = OpCompositeExtract %177 %238 2
%242 = OpCompositeExtract %177 %238 3
%243 = OpIAdd %177 %239 %211
%244 = OpIAdd %177 %240 %212
%245 = OpIAdd %177 %241 %213
%246 = OpIAdd %177 %242 %214
%248 = OpSampledImage %247 %140 %161
%249 = OpCompositeConstruct %63 %164 %166
%250 = OpImageGather %72 %248 %249 %102
%251 = OpUConvert %181 %250
%252 = OpCompositeExtract %177 %251 0
%253 = OpCompositeExtract %177 %251 1
%254 = OpCompositeExtract %177 %251 2
%255 = OpCompositeExtract %177 %251 3
%256 = OpIAdd %177 %252 %219
%257 = OpIAdd %177 %253 %220
%258 = OpIAdd %177 %254 %221
%259 = OpIAdd %177 %255 %222
%262 = OpSampledImage %261 %151 %157
%265 = OpCompositeConstruct %63 %164 %166
%264 = OpImageSampleDrefImplicitLod %9 %262 %265 %263 None
%266 = OpCompositeConstruct %66 %264 %264 %264 %264
%267 = OpCompositeExtract %9 %266 0
%268 = OpFConvert %9 %230
%269 = OpFConvert %9 %231
%270 = OpFConvert %9 %232
%271 = OpFConvert %9 %233
%272 = OpFAdd %9 %267 %268
%273 = OpFAdd %9 %267 %269
%274 = OpFAdd %9 %267 %270
%275 = OpFAdd %9 %267 %271
%276 = OpFConvert %167 %272
%277 = OpFConvert %167 %273
%278 = OpFConvert %167 %274
%279 = OpFConvert %167 %275
%281 = OpCompositeConstruct %63 %164 %166
%280 = OpImageSampleDrefExplicitLod %9 %262 %281 %263 Lod %200
%282 = OpCompositeConstruct %66 %280 %280 %280 %280
%283 = OpCompositeExtract %9 %282 0
%284 = OpFConvert %9 %276
%285 = OpFConvert %9 %277
%286 = OpFConvert %9 %278
%287 = OpFConvert %9 %279
%288 = OpFAdd %9 %284 %283
%289 = OpFAdd %9 %285 %283
%290 = OpFAdd %9 %286 %283
%291 = OpFAdd %9 %287 %283
%292 = OpFConvert %167 %288
%293 = OpFConvert %167 %289
%294 = OpFConvert %167 %290
%295 = OpFConvert %167 %291
%296 = OpCompositeConstruct %63 %164 %166
%297 = OpImageDrefGather %66 %262 %296 %263
%298 = OpFConvert %174 %297
%299 = OpCompositeExtract %167 %298 0
%300 = OpCompositeExtract %167 %298 1
%301 = OpCompositeExtract %167 %298 2
%302 = OpCompositeExtract %167 %298 3
%304 = OpCompositeConstruct %63 %164 %166
%303 = OpImageSampleExplicitLod %66 %199 %304 Lod %200
%305 = OpFConvert %174 %303
%306 = OpCompositeExtract %167 %305 0
%307 = OpCompositeExtract %167 %305 1
%308 = OpCompositeExtract %167 %305 2
%309 = OpCompositeExtract %167 %305 3
%314 = OpCompositeConstruct %63 %164 %166
%315 = OpCompositeConstruct %63 %310 %311
%316 = OpCompositeConstruct %63 %312 %263
%313 = OpImageSampleExplicitLod %66 %199 %314 Grad %315 %316
%317 = OpFConvert %174 %313
%318 = OpCompositeExtract %167 %317 0
%319 = OpCompositeExtract %167 %317 1
%320 = OpCompositeExtract %167 %317 2
%321 = OpCompositeExtract %167 %317 3
%323 = OpCompositeConstruct %63 %164 %166
%322 = OpImageSampleImplicitLod %66 %199 %323 Bias %263
%324 = OpFConvert %174 %322
%325 = OpCompositeExtract %167 %324 0
%326 = OpCompositeExtract %167 %324 1
%327 = OpCompositeExtract %167 %324 2
%328 = OpCompositeExtract %167 %324 3
%329 = OpImageFetch %66 %133 %170
%330 = OpFConvert %174 %329
%331 = OpCompositeExtract %167 %330 0
%332 = OpCompositeExtract %167 %330 1
%333 = OpCompositeExtract %167 %330 2
%334 = OpCompositeExtract %167 %330 3
%335 = OpFAdd %167 %306 %299
%336 = OpFAdd %167 %335 %318
%337 = OpFAdd %167 %336 %292
%338 = OpFAdd %167 %337 %325
%339 = OpFAdd %167 %338 %331
%340 = OpFAdd %167 %307 %300
%341 = OpFAdd %167 %340 %319
%342 = OpFAdd %167 %341 %293
%343 = OpFAdd %167 %342 %326
%344 = OpFAdd %167 %343 %332
%345 = OpFAdd %167 %308 %301
%346 = OpFAdd %167 %345 %320
%347 = OpFAdd %167 %346 %294
%348 = OpFAdd %167 %347 %327
%349 = OpFAdd %167 %348 %333
%350 = OpFAdd %167 %309 %302
%351 = OpFAdd %167 %350 %321
%352 = OpFAdd %167 %351 %295
%353 = OpFAdd %167 %352 %328
%354 = OpFAdd %167 %353 %334
%355 = OpImageFetch %69 %127 %170
%356 = OpSConvert %181 %355
%357 = OpCompositeExtract %177 %356 0
%358 = OpCompositeExtract %177 %356 1
%359 = OpCompositeExtract %177 %356 2
%360 = OpCompositeExtract %177 %356 3
%361 = OpIAdd %177 %243 %357
%362 = OpIAdd %177 %244 %358
%363 = OpIAdd %177 %245 %359
%364 = OpIAdd %177 %246 %360
%365 = OpImageFetch %72 %121 %170
%366 = OpUConvert %181 %365
%367 = OpCompositeExtract %177 %366 0
%368 = OpCompositeExtract %177 %366 1
%369 = OpCompositeExtract %177 %366 2
%370 = OpCompositeExtract %177 %366 3
%371 = OpIAdd %177 %256 %367
%372 = OpIAdd %177 %257 %368
%373 = OpIAdd %177 %258 %369
%374 = OpIAdd %177 %259 %370
%376 = OpAccessChain %375 %68 %137
%377 = OpFConvert %9 %339
OpStore %376 %377
%378 = OpAccessChain %375 %68 %109
%379 = OpFConvert %9 %344
OpStore %378 %379
%380 = OpAccessChain %375 %68 %102
%381 = OpFConvert %9 %349
OpStore %380 %381
%382 = OpAccessChain %375 %68 %95
%383 = OpFConvert %9 %354
OpStore %382 %383
%385 = OpAccessChain %384 %71 %137
%386 = OpSConvert %14 %361
OpStore %385 %386
%387 = OpAccessChain %384 %71 %109
%388 = OpSConvert %14 %362
OpStore %387 %388
%389 = OpAccessChain %384 %71 %102
%390 = OpSConvert %14 %363
OpStore %389 %390
%391 = OpAccessChain %384 %71 %95
%392 = OpSConvert %14 %364
OpStore %391 %392
%394 = OpAccessChain %393 %74 %137
%395 = OpUConvert %5 %371
OpStore %394 %395
%396 = OpAccessChain %393 %74 %109
%397 = OpUConvert %5 %372
OpStore %396 %397
%398 = OpAccessChain %393 %74 %102
%399 = OpUConvert %5 %373
OpStore %398 %399
%400 = OpAccessChain %393 %74 %95
%401 = OpUConvert %5 %374
OpStore %400 %401
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform mediump texture2D _8;
layout(set = 0, binding = 1) uniform mediump itexture2D _12;
layout(set = 0, binding = 2) uniform mediump utexture2D _16;
layout(set = 0, binding = 3) uniform mediump samplerBuffer _19;
layout(set = 0, binding = 4) uniform mediump isamplerBuffer _22;
layout(set = 0, binding = 5) uniform mediump usamplerBuffer _25;
layout(set = 0, binding = 0) uniform writeonly mediump image2D _28;
layout(set = 0, binding = 1) uniform writeonly mediump iimage2D _31;
layout(set = 0, binding = 2) uniform writeonly mediump uimage2D _34;
layout(set = 0, binding = 3) uniform writeonly mediump imageBuffer _37;
layout(set = 0, binding = 4) uniform writeonly mediump iimageBuffer _40;
layout(set = 0, binding = 5) uniform writeonly mediump uimageBuffer _43;
layout(set = 0, binding = 0) uniform sampler _46;
layout(set = 0, binding = 1) uniform samplerShadow _47;

layout(location = 0) in vec2 UV;
layout(location = 0) out mediump vec4 SV_Target;
layout(location = 1) out mediump ivec4 SV_Target_1;
layout(location = 2) out mediump uvec4 SV_Target_2;

void main()
{
    float16_t _82 = float16_t(UV.x);
    float16_t _83 = float16_t(UV.y);
    uint _84 = uint(int(UV.x));
    uint _85 = uint(int(UV.y));
    imageStore(_28, ivec2(uvec2(_84, _85)), vec4(f16vec4(_82, _83, _82, _83)));
    uint16_t _92 = uint16_t(int16_t(UV.x));
    uint16_t _93 = uint16_t(int16_t(UV.y));
    imageStore(_31, ivec2(uvec2(_84, _85)), ivec4(i16vec4(u16vec4(_92, _93, _92, _93))));
    uint16_t _98 = uint16_t(UV.x);
    uint16_t _99 = uint16_t(UV.y);
    imageStore(_34, ivec2(uvec2(_84, _85)), uvec4(u16vec4(_98, _99, _98, _99)));
    imageStore(_37, int(_84), vec4(f16vec4(float16_t(8.0))));
    imageStore(_40, int(_84), ivec4(i16vec4(u16vec4(65516us))));
    imageStore(_43, int(_84), uvec4(u16vec4(80us)));
    f16vec4 _117 = f16vec4(texture(sampler2D(_8, _46), vec2(UV.x, UV.y)));
    u16vec4 _126 = u16vec4(texelFetch(_12, ivec2(uvec2(1u, 2u)), int(3u)));
    u16vec4 _136 = u16vec4(texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u)));
    f16vec4 _143 = f16vec4(textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y)));
    u16vec4 _156 = u16vec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u)));
    u16vec4 _169 = u16vec4(textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u)));
    mediump vec4 _184 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5)));
    mediump float _185 = _184.x;
    mediump vec4 _200 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0));
    mediump float _201 = _200.x;
    vec2 _214 = vec2(UV.x, UV.y);
    f16vec4 _216 = f16vec4(textureGather(sampler2DShadow(_8, _47), _214, 0.5));
    f16vec4 _223 = f16vec4(textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0));
    f16vec4 _235 = f16vec4(textureGrad(sampler2D(_8, _46), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)));
    f16vec4 _242 = f16vec4(texture(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.5));
    f16vec4 _248 = f16vec4(texelFetch(_19, int(_84)));
    u16vec4 _274 = u16vec4(texelFetch(_22, int(_84)));
    u16vec4 _284 = u16vec4(texelFetch(_25, int(_84)));
    SV_Target.x = float(((((_223.x + _216.x) + _235.x) + float16_t(float(float16_t(_185 + float(_143.x + _117.x))) + _201)) + _242.x) + _248.x);
    SV_Target.y = float(((((_223.y + _216.y) + _235.y) + float16_t(float(float16_t(_185 + float(_143.y + _117.y))) + _201)) + _242.y) + _248.y);
    SV_Target.z = float(((((_223.z + _216.z) + _235.z) + float16_t(float(float16_t(_185 + float(_143.z + _117.z))) + _201)) + _242.z) + _248.z);
    SV_Target.w = float(((((_223.w + _216.w) + _235.w) + float16_t(float(float16_t(_185 + float(_143.w + _117.w))) + _201)) + _242.w) + _248.w);
    SV_Target_1.x = int(int16_t((_156.x + _126.x) + _274.x));
    SV_Target_1.y = int(int16_t((_156.y + _126.y) + _274.y));
    SV_Target_1.z = int(int16_t((_156.z + _126.z) + _274.z));
    SV_Target_1.w = int(int16_t((_156.w + _126.w) + _274.w));
    SV_Target_2.x = uint((_169.x + _136.x) + _284.x);
    SV_Target_2.y = uint((_169.y + _136.y) + _284.y);
    SV_Target_2.z = uint((_169.z + _136.z) + _284.z);
    SV_Target_2.w = uint((_169.w + _136.w) + _284.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 322
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %50 %53 %56 %59
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %50 "UV"
OpName %53 "SV_Target"
OpName %56 "SV_Target_1"
OpName %59 "SV_Target_2"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 RelaxedPrecision
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %16 RelaxedPrecision
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 2
OpDecorate %19 RelaxedPrecision
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 3
OpDecorate %22 RelaxedPrecision
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 4
OpDecorate %25 RelaxedPrecision
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 5
OpDecorate %28 RelaxedPrecision
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %31 RelaxedPrecision
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 1
OpDecorate %31 NonReadable
OpDecorate %34 RelaxedPrecision
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 2
OpDecorate %34 NonReadable
OpDecorate %37 RelaxedPrecision
OpDecorate %37 DescriptorSet 0
OpDecorate %37 Binding 3
OpDecorate %37 NonReadable
OpDecorate %40 RelaxedPrecision
OpDecorate %40 DescriptorSet 0
OpDecorate %40 Binding 4
OpDecorate %40 NonReadable
OpDecorate %43 RelaxedPrecision
OpDecorate %43 DescriptorSet 0
OpDecorate %43 Binding 5
OpDecorate %43 NonReadable
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 0
OpDecorate %47 DescriptorSet 0
OpDecorate %47 Binding 1
OpDecorate %50 Location 0
OpDecorate %53 RelaxedPrecision
OpDecorate %53 Location 0
OpDecorate %56 RelaxedPrecision
OpDecorate %56 Location 1
OpDecorate %59 RelaxedPrecision
OpDecorate %59 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeInt 32 1
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeInt 32 0
%14 = OpTypeImage %13 2D 0 0 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeImage %13 Buffer 0 0 0 1 Unknown
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeImage %5 2D 0 0 0 2 Unknown
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeImage %9 2D 0 0 0 2 Unknown
%30 = OpTypePointer UniformConstant %29
%31 = OpVariable %30 UniformConstant
%32 = OpTypeImage %13 2D 0 0 0 2 Unknown
%33 = OpTypePointer UniformConstant %32
%34 = OpVariable %33 UniformConstant
%35 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%36 = OpTypePointer UniformConstant %35
%37 = OpVariable %36 UniformConstant
%38 = OpTypeImage %9 Buffer 0 0 0 2 Unknown
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeImage %13 Buffer 0 0 0 2 Unknown
%42 = OpTypePointer UniformConstant %41
%43 = OpVariable %42 UniformConstant
%44 = OpTypeSampler
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%47 = OpVariable %45 UniformConstant
%48 = OpTypeVector %5 2
%49 = OpTypePointer Input %48
%50 = OpVariable %49 Input
%51 = OpTypeVector %5 4
%52 = OpTypePointer Output %51
%53 = OpVariable %52 Output
%54 = OpTypeVector %9 4
%55 = OpTypePointer Output %54
%56 = OpVariable %55 Output
%57 = OpTypeVector %13 4
%58 = OpTypePointer Output %57
%59 = OpVariable %58 Output
%74 = OpTypePointer Input %5
%76 = OpConstant %13 0
%79 = OpConstant %13 1
%81 = OpTypeFloat 16
%86 = OpTypeVector %13 2
%88 = OpTypeVector %81 4
%91 = OpTypeInt 16 0
%95 = OpTypeVector %91 4
%103 = OpConstant %81 0x1p+3
%106 = OpConstant %91 65516
%109 = OpConstant %91 80
%112 = OpTypeSampledImage %6
%114 = OpConstant %5 0
%122 = OpConstant %13 3
%123 = OpConstant %13 2
%131 = OpConstant %13 6
%132 = OpConstant %13 4
%133 = OpConstant %13 5
%152 = OpTypeSampledImage %10
%165 = OpTypeSampledImage %14
%178 = OpTypeImage %5 2D 1 0 0 1 Unknown
%179 = OpTypeSampledImage %178
%181 = OpConstant %5 0.5
%228 = OpConstant %5 0.200000003
%229 = OpConstant %5 0.300000012
%230 = OpConstant %5 0.400000006
%293 = OpTypePointer Output %5
%302 = OpTypePointer Output %9
%311 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %320
%320 = OpLabel
%60 = OpLoad %41 %43
%61 = OpLoad %38 %40
%62 = OpLoad %35 %37
%63 = OpLoad %32 %34
%64 = OpLoad %29 %31
%65 = OpLoad %26 %28
%66 = OpLoad %23 %25
%67 = OpLoad %20 %22
%68 = OpLoad %17 %19
%69 = OpLoad %14 %16
%70 = OpLoad %10 %12
%71 = OpLoad %6 %8
%72 = OpLoad %44 %47
%73 = OpLoad %44 %46
%75 = OpAccessChain %74 %50 %76
%77 = OpLoad %5 %75
%78 = OpAccessChain %74 %50 %79
%80 = OpLoad %5 %78
%82 = OpFConvert %81 %77
%83 = OpFConvert %81 %80
%84 = OpConvertFToS %13 %77
%85 = OpConvertFToS %13 %80
%87 = OpCompositeConstruct %86 %84 %85
%89 = OpCompositeConstruct %88 %82 %83 %82 %83
%90 = OpFConvert %51 %89
OpImageWrite %65 %87 %90
%92 = OpConvertFToS %91 %77
%93 = OpConvertFToS %91 %80
%94 = OpCompositeConstruct %86 %84 %85
%96 = OpCompositeConstruct %95 %92 %93 %92 %93
%97 = OpSConvert %54 %96
OpImageWrite %64 %94 %97
%98 = OpConvertFToU %91 %77
%99 = OpConvertFToU %91 %80
%100 = OpCompositeConstruct %86 %84 %85
%101 = OpCompositeConstruct %95 %98 %99 %98 %99
%102 = OpUConvert %57 %101
OpImageWrite %63 %100 %102
%104 = OpCompositeConstruct %88 %103 %103 %103 %103
%105 = OpFConvert %51 %104
OpImageWrite %62 %84 %105
%107 = OpCompositeConstruct %95 %106 %106 %106 %106
%108 = OpSConvert %54 %107
OpImageWrite %61 %84 %108
%110 = OpCompositeConstruct %95 %109 %109 %109 %109
%111 = OpUConvert %57 %110
OpImageWrite %60 %84 %111
%113 = OpSampledImage %112 %71 %73
%116 = OpCompositeConstruct %48 %77 %80
%115 = OpImageSampleImplicitLod %51 %113 %116 None
%117 = OpFConvert %88 %115
%118 = OpCompositeExtract %81 %117 0
%119 = OpCompositeExtract %81 %117 1
%120 = OpCompositeExtract %81 %117 2
%121 = OpCompositeExtract %81 %117 3
%125 = OpCompositeConstruct %86 %79 %123
%124 = OpImageFetch %54 %70 %125 Lod %122
%126 = OpSConvert %95 %124
%127 = OpCompositeExtract %91 %126 0
%128 = OpCompositeExtract %91 %126 1
%129 = OpCompositeExtract %91 %126 2
%130 = OpCompositeExtract %91 %126 3
%135 = OpCompositeConstruct %86 %132 %133
%134 = OpImageFetch %57 %69 %135 Lod %131
%136 = OpUConvert %95 %134
%137 = OpCompositeExtract %91 %136 0
%138 = OpCompositeExtract %91 %136 1
%139 = OpCompositeExtract %91 %136 2
%140 = OpCompositeExtract %91 %136 3
%141 = OpCompositeConstruct %48 %77 %80
%142 = OpImageGather %51 %113 %141 %76
%143 = OpFConvert %88 %142
%144 = OpCompositeExtract %81 %143 0
%145 = OpCompositeExtract %81 %143 1
%146 = OpCompositeExtract %81 %143 2
%147 = OpCompositeExtract %81 %143 3
%148 = OpFAdd %81 %144 %118
%149 = OpFAdd %81 %145 %119
%150 = OpFAdd %81 %146 %120
%151 = OpFAdd %81 %147 %121
%153 = OpSampledImage %152 %70 %73
%154 = OpCompositeConstruct %48 %77 %80
%155 = OpImageGather %54 %153 %154 %79
%156 = OpSConvert %95 %155
%157 = OpCompositeExtract %91 %156 0
%158 = OpCompositeExtract %91 %156 1
%159 = OpCompositeExtract %91 %156 2
%160 = OpCompositeExtract %91 %156 3
%161 = OpIAdd %91 %157 %127
%162 = OpIAdd %91 %158 %128
%163 = OpIAdd %91 %159 %129
%164 = OpIAdd %91 %160 %130
%166 = OpSampledImage %165 %69 %73
%167 = OpCompositeConstruct %48 %77 %80
%168 = OpImageGather %57 %166 %167 %123
%169 = OpUConvert %95 %168
%170 = OpCompositeExtract %91 %169 0
%171 = OpCompositeExtract %91 %169 1
%172 = OpCompositeExtract %91 %169 2
%173 = OpCompositeExtract %91 %169 3
%174 = OpIAdd %91 %170 %137
%175 = OpIAdd %91 %171 %138
%176 = OpIAdd %91 %172 %139
%177 = OpIAdd %91 %173 %140
%180 = OpSampledImage %179 %71 %72
%183 = OpCompositeConstruct %48 %77 %80
%182 = OpImageSampleDrefImplicitLod %5 %180 %183 %181 None
%184 = OpCompositeConstruct %51 %182 %182 %182 %182
%185 = OpCompositeExtract %5 %184 0
%186 = OpFConvert %5 %148
%187 = OpFConvert %5 %149
%188 = OpFConvert %5 %150
%189 = OpFConvert %5 %151
%190 = OpFAdd %5 %185 %186
%191 = OpFAdd %5 %185 %187
%192 = OpFAdd %5 %185 %188
%193 = OpFAdd %5 %185 %189
%194 = OpFConvert %81 %190
%195 = OpFConvert %81 %191
%196 = OpFConvert %81 %192
%197 = OpFConvert %81 %193
%199 = OpCompositeConstruct %48 %77 %80
%198 = OpImageSampleDrefExplicitLod %5 %180 %199 %181 Lod %114
%200 = OpCompositeConstruct %51 %198 %198 %198 %198
%201 = OpCompositeExtract %5 %200 0
%202 = OpFConvert %5 %194
%203 = OpFConvert %5 %195
%204 = OpFConvert %5 %196
%205 = OpFConvert %5 %197
%206 = OpFAdd %5 %202 %201
%207 = OpFAdd %5 %203 %201
%208 = OpFAdd %5 %204 %201
%209 = OpFAdd %5 %205 %201
%210 = OpFConvert %81 %206
%211 = OpFConvert %81 %207
%212 = OpFConvert %81 %208
%213 = OpFConvert %81 %209
%214 = OpCompositeConstruct %48 %77 %80
%215 = OpImageDrefGather %51 %180 %214 %181
%216 = OpFConvert %88 %215
%217 = OpCompositeExtract %81 %216 0
%218 = OpCompositeExtract %81 %216 1
%219 = OpCompositeExtract %81 %216 2
%220 = OpCompositeExtract %81 %216 3
%222 = OpCompositeConstruct %48 %77 %80
%221 = OpImageSampleExplicitLod %51 %113 %222 Lod %114
%223 = OpFConvert %88 %221
%224 = OpCompositeExtract %81 %223 0
%225 = OpCompositeExtract %81 %223 1
%226 = OpCompositeExtract %81 %223 2
%227 = OpCompositeExtract %81 %223 3
%232 = OpCompositeConstruct %48 %77 %80
%233 = OpCompositeConstruct %48 %228 %229
%234 = OpCompositeConstruct %48 %230 %181
%231 = OpImageSampleExplicitLod %51 %113 %232 Grad %233 %234
%235 = OpFConvert %88 %231
%236 = OpCompositeExtract %81 %235 0
%237 = OpCompositeExtract %81 %235 1
%238 = OpCompositeExtract %81 %235 2
%239 = OpCompositeExtract %81 %235 3
%241 = OpCompositeConstruct %48 %77 %80
%240 = OpImageSampleImplicitLod %51 %113 %241 Bias %181
%242 = OpFConvert %88 %240
%243 = OpCompositeExtract %81 %242 0
%244 = OpCompositeExtract %81 %242 1
%245 = OpCompositeExtract %81 %242 2
%246 = OpCompositeExtract %81 %242 3
%247 = OpImageFetch %51 %68 %84
%248 = OpFConvert %88 %247
%249 = OpCompositeExtract %81 %248 0
%250 = OpCompositeExtract %81 %248 1
%251 = OpCompositeExtract %81 %248 2
%252 = OpCompositeExtract %81 %248 3
%253 = OpFAdd %81 %224 %217
%254 = OpFAdd %81 %253 %236
%255 = OpFAdd %81 %254 %210
%256 = OpFAdd %81 %255 %243
%257 = OpFAdd %81 %256 %249
%258 = OpFAdd %81 %225 %218
%259 = OpFAdd %81 %258 %237
%260 = OpFAdd %81 %259 %211
%261 = OpFAdd %81 %260 %244
%262 = OpFAdd %81 %261 %250
%263 = OpFAdd %81 %226 %219
%264 = OpFAdd %81 %263 %238
%265 = OpFAdd %81 %264 %212
%266 = OpFAdd %81 %265 %245
%267 = OpFAdd %81 %266 %251
%268 = OpFAdd %81 %227 %220
%269 = OpFAdd %81 %268 %239
%270 = OpFAdd %81 %269 %213
%271 = OpFAdd %81 %270 %246
%272 = OpFAdd %81 %271 %252
%273 = OpImageFetch %54 %67 %84
%274 = OpSConvert %95 %273
%275 = OpCompositeExtract %91 %274 0
%276 = OpCompositeExtract %91 %274 1
%277 = OpCompositeExtract %91 %274 2
%278 = OpCompositeExtract %91 %274 3
%279 = OpIAdd %91 %161 %275
%280 = OpIAdd %91 %162 %276
%281 = OpIAdd %91 %163 %277
%282 = OpIAdd %91 %164 %278
%283 = OpImageFetch %57 %66 %84
%284 = OpUConvert %95 %283
%285 = OpCompositeExtract %91 %284 0
%286 = OpCompositeExtract %91 %284 1
%287 = OpCompositeExtract %91 %284 2
%288 = OpCompositeExtract %91 %284 3
%289 = OpIAdd %91 %174 %285
%290 = OpIAdd %91 %175 %286
%291 = OpIAdd %91 %176 %287
%292 = OpIAdd %91 %177 %288
%294 = OpAccessChain %293 %53 %76
%295 = OpFConvert %5 %257
OpStore %294 %295
%296 = OpAccessChain %293 %53 %79
%297 = OpFConvert %5 %262
OpStore %296 %297
%298 = OpAccessChain %293 %53 %123
%299 = OpFConvert %5 %267
OpStore %298 %299
%300 = OpAccessChain %293 %53 %122
%301 = OpFConvert %5 %272
OpStore %300 %301
%303 = OpAccessChain %302 %56 %76
%304 = OpSConvert %9 %279
OpStore %303 %304
%305 = OpAccessChain %302 %56 %79
%306 = OpSConvert %9 %280
OpStore %305 %306
%307 = OpAccessChain %302 %56 %123
%308 = OpSConvert %9 %281
OpStore %307 %308
%309 = OpAccessChain %302 %56 %122
%310 = OpSConvert %9 %282
OpStore %309 %310
%312 = OpAccessChain %311 %59 %76
%313 = OpUConvert %13 %289
OpStore %312 %313
%314 = OpAccessChain %311 %59 %79
%315 = OpUConvert %13 %290
OpStore %314 %315
%316 = OpAccessChain %311 %59 %123
%317 = OpUConvert %13 %291
OpStore %316 %317
%318 = OpAccessChain %311 %59 %122
%319 = OpUConvert %13 %292
OpStore %318 %319
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-raw-buffer-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _19 = INDEX + 0u;
    uint _24 = INDEX * 4u;
    uvec4 _40 = uvec4(imageLoad(_9[nonuniformEXT(_19)], int(_24)).x, imageLoad(_9[nonuniformEXT(_19)], int(_24 + 1u)).x, imageLoad(_9[nonuniformEXT(_19)], int(_24 + 2u)).x, imageLoad(_9[nonuniformEXT(_19)], int(_24 + 3u)).x);
    SV_Target.x = uintBitsToFloat(_40.x);
    SV_Target.y = uintBitsToFloat(_40.y);
    SV_Target.z = uintBitsToFloat(_40.z);
    SV_Target.w = uintBitsToFloat(_40.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
OpDecorate %19 NonUniform
OpDecorate %23 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 4
%20 = OpConstant %5 0
%21 = OpTypePointer UniformConstant %6
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%49 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%16 = OpLoad %5 %11
%17 = OpShiftLeftLogical %5 %16 %18
%19 = OpIAdd %5 %16 %20
%22 = OpAccessChain %21 %9 %19
%23 = OpLoad %6 %22
%24 = OpIMul %5 %16 %18
%26 = OpImageRead %25 %23 %24
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %24 %30
%28 = OpImageRead %25 %23 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %24 %34
%32 = OpImageRead %25 %23 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %24 %38
%36 = OpImageRead %25 %23 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpBitcast %12 %41
%46 = OpBitcast %12 %42
%47 = OpBitcast %12 %43
%48 = OpBitcast %12 %44
%50 = OpAccessChain %49 %15 %20
OpStore %50 %45
%51 = OpAccessChain %49 %15 %30
OpStore %51 %46
%52 = OpAccessChain %49 %15 %34
OpStore %52 %47
%53 = OpAccessChain %49 %15 %38
OpStore %53 %48
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-raw-buffer.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _19 = INDEX + 0u;
    uint _24 = INDEX * 4u;
    uvec4 _40 = uvec4(imageLoad(_9[_19], int(_24)).x, imageLoad(_9[_19], int(_24 + 1u)).x, imageLoad(_9[_19], int(_24 + 2u)).x, imageLoad(_9[_19], int(_24 + 3u)).x);
    SV_Target.x = uintBitsToFloat(_40.x);
    SV_Target.y = uintBitsToFloat(_40.y);
    SV_Target.z = uintBitsToFloat(_40.z);
    SV_Target.w = uintBitsToFloat(_40.w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 4
%20 = OpConstant %5 0
%21 = OpTypePointer UniformConstant %6
%25 = OpTypeVector %5 4
%30 = OpConstant %5 1
%34 = OpConstant %5 2
%38 = OpConstant %5 3
%49 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%16 = OpLoad %5 %11
%17 = OpShiftLeftLogical %5 %16 %18
%19 = OpIAdd %5 %16 %20
%22 = OpAccessChain %21 %9 %19
%23 = OpLoad %6 %22
%24 = OpIMul %5 %16 %18
%26 = OpImageRead %25 %23 %24
%27 = OpCompositeExtract %5 %26 0
%29 = OpIAdd %5 %24 %30
%28 = OpImageRead %25 %23 %29
%31 = OpCompositeExtract %5 %28 0
%33 = OpIAdd %5 %24 %34
%32 = OpImageRead %25 %23 %33
%35 = OpCompositeExtract %5 %32 0
%37 = OpIAdd %5 %24 %38
%36 = OpImageRead %25 %23 %37
%39 = OpCompositeExtract %5 %36 0
%40 = OpCompositeConstruct %25 %27 %31 %35 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpBitcast %12 %41
%46 = OpBitcast %12 %42
%47 = OpBitcast %12 %43
%48 = OpBitcast %12 %44
%50 = OpAccessChain %49 %15 %20
OpStore %50 %45
%51 = OpAccessChain %49 %15 %30
OpStore %51 %46
%52 = OpAccessChain %49 %15 %34
OpStore %52 %47
%53 = OpAccessChain %49 %15 %38
OpStore %53 %48
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-structured-buffer-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _17 = INDEX + 0u;
    uint _22 = INDEX * 4u;
    vec4 _40 = uintBitsToFloat(uvec4(imageLoad(_9[nonuniformEXT(_17)], int(_22)).x, imageLoad(_9[nonuniformEXT(_17)], int(_22 + 1u)).x, imageLoad(_9[nonuniformEXT(_17)], int(_22 + 2u)).x, imageLoad(_9[nonuniformEXT(_17)], int(_22 + 3u)).x));
    SV_Target.x = _40.x;
    SV_Target.y = _40.y;
    SV_Target.z = _40.z;
    SV_Target.w = _40.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
OpDecorate %17 NonUniform
OpDecorate %21 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 0
%19 = OpTypePointer UniformConstant %6
%23 = OpConstant %5 4
%24 = OpTypeVector %5 4
%29 = OpConstant %5 1
%33 = OpConstant %5 2
%37 = OpConstant %5 3
%45 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%16 = OpLoad %5 %11
%17 = OpIAdd %5 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpIMul %5 %16 %23
%25 = OpImageRead %24 %21 %22
%26 = OpCompositeExtract %5 %25 0
%28 = OpIAdd %5 %22 %29
%27 = OpImageRead %24 %21 %28
%30 = OpCompositeExtract %5 %27 0
%32 = OpIAdd %5 %22 %33
%31 = OpImageRead %24 %21 %32
%34 = OpCompositeExtract %5 %31 0
%36 = OpIAdd %5 %22 %37
%35 = OpImageRead %24 %21 %36
%38 = OpCompositeExtract %5 %35 0
%39 = OpCompositeConstruct %24 %26 %30 %34 %38
%40 = OpBitcast %13 %39
%41 = OpCompositeExtract %12 %40 0
%42 = OpCompositeExtract %12 %40 1
%43 = OpCompositeExtract %12 %40 2
%44 = OpCompositeExtract %12 %40 3
%46 = OpAccessChain %45 %15 %18
OpStore %46 %41
%47 = OpAccessChain %45 %15 %29
OpStore %47 %42
%48 = OpAccessChain %45 %15 %33
OpStore %48 %43
%49 = OpAccessChain %45 %15 %37
OpStore %49 %44
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-structured-buffer-nonuniform.ssbo.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    uvec4 _m0[];
} _14[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _34 = uintBitsToFloat(_14[nonuniformEXT(registers._m4 + (INDEX + 0u))]._m0[INDEX]);
    SV_Target.x = _34.x;
    SV_Target.y = _34.y;
    SV_Target.z = _34.z;
    SV_Target.w = _34.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %20
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %16 "INDEX"
OpName %20 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %10 ArrayStride 16
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 4
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %20 Location 0
OpDecorate %30 NonUniform
OpDecorate %25 NonUniform
OpDecorate %32 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 4
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypePointer Input %5
%16 = OpVariable %15 Input
%17 = OpTypeFloat 32
%18 = OpTypeVector %17 4
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%23 = OpConstant %5 0
%24 = OpTypePointer StorageBuffer %11
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 4
%31 = OpTypePointer StorageBuffer %9
%39 = OpTypePointer Output %17
%42 = OpConstant %5 1
%44 = OpConstant %5 2
%46 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %47
%47 = OpLabel
%21 = OpLoad %5 %16
%22 = OpIAdd %5 %21 %23
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %22
%25 = OpAccessChain %24 %14 %30
%32 = OpAccessChain %31 %25 %23 %21
%33 = OpLoad %9 %32
%34 = OpBitcast %18 %33
%35 = OpCompositeExtract %17 %34 0
%36 = OpCompositeExtract %17 %34 1
%37 = OpCompositeExtract %17 %34 2
%38 = OpCompositeExtract %17 %34 3
%40 = OpAccessChain %39 %20 %23
OpStore %40 %35
%41 = OpAccessChain %39 %20 %42
OpStore %41 %36
%43 = OpAccessChain %39 %20 %44
OpStore %43 %37
%45 = OpAccessChain %39 %20 %46
OpStore %45 %38
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-structured-buffer-nonuniform.ssbo.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uvec4 _m0[];
} _11[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _26 = uintBitsToFloat(_11[nonuniformEXT(INDEX + 0u)]._m0[INDEX]);
    SV_Target.x = _26.x;
    SV_Target.y = _26.y;
    SV_Target.z = _26.z;
    SV_Target.w = _26.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %17
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "INDEX"
OpName %17 "SV_Target"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %11 NonWritable
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %17 Location 0
OpDecorate %19 NonUniform
OpDecorate %22 NonUniform
OpDecorate %24 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypePointer Input %5
%13 = OpVariable %12 Input
%14 = OpTypeFloat 32
%15 = OpTypeVector %14 4
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%20 = OpConstant %5 0
%21 = OpTypePointer StorageBuffer %8
%23 = OpTypePointer StorageBuffer %6
%31 = OpTypePointer Output %14
%34 = OpConstant %5 1
%36 = OpConstant %5 2
%38 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%18 = OpLoad %5 %13
%19 = OpIAdd %5 %18 %20
%22 = OpAccessChain %21 %11 %19
%24 = OpAccessChain %23 %22 %20 %18
%25 = OpLoad %6 %24
%26 = OpBitcast %15 %25
%27 = OpCompositeExtract %14 %26 0
%28 = OpCompositeExtract %14 %26 1
%29 = OpCompositeExtract %14 %26 2
%30 = OpCompositeExtract %14 %26 3
%32 = OpAccessChain %31 %17 %20
OpStore %32 %27
%33 = OpAccessChain %31 %17 %34
OpStore %33 %28
%35 = OpAccessChain %31 %17 %36
OpStore %35 %29
%37 = OpAccessChain %31 %17 %38
OpStore %37 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-structured-buffer.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32ui) uniform readonly uimageBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _17 = INDEX + 0u;
    uint _22 = INDEX * 4u;
    vec4 _40 = uintBitsToFloat(uvec4(imageLoad(_9[_17], int(_22)).x, imageLoad(_9[_17], int(_22 + 1u)).x, imageLoad(_9[_17], int(_22 + 2u)).x, imageLoad(_9[_17], int(_22 + 3u)).x));
    SV_Target.x = _40.x;
    SV_Target.y = _40.y;
    SV_Target.z = _40.z;
    SV_Target.w = _40.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %11 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %11 Flat
OpDecorate %11 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypePointer Input %5
%11 = OpVariable %10 Input
%12 = OpTypeFloat 32
%13 = OpTypeVector %12 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %5 0
%19 = OpTypePointer UniformConstant %6
%23 = OpConstant %5 4
%24 = OpTypeVector %5 4
%29 = OpConstant %5 1
%33 = OpConstant %5 2
%37 = OpConstant %5 3
%45 = OpTypePointer Output %12
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%16 = OpLoad %5 %11
%17 = OpIAdd %5 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpIMul %5 %16 %23
%25 = OpImageRead %24 %21 %22
%26 = OpCompositeExtract %5 %25 0
%28 = OpIAdd %5 %22 %29
%27 = OpImageRead %24 %21 %28
%30 = OpCompositeExtract %5 %27 0
%32 = OpIAdd %5 %22 %33
%31 = OpImageRead %24 %21 %32
%34 = OpCompositeExtract %5 %31 0
%36 = OpIAdd %5 %22 %37
%35 = OpImageRead %24 %21 %36
%38 = OpCompositeExtract %5 %35 0
%39 = OpCompositeConstruct %24 %26 %30 %34 %38
%40 = OpBitcast %13 %39
%41 = OpCompositeExtract %12 %40 0
%42 = OpCompositeExtract %12 %40 1
%43 = OpCompositeExtract %12 %40 2
%44 = OpCompositeExtract %12 %40 3
%46 = OpAccessChain %45 %15 %18
OpStore %46 %41
%47 = OpAccessChain %45 %15 %29
OpStore %47 %42
%48 = OpAccessChain %45 %15 %33
OpStore %48 %43
%49 = OpAccessChain %45 %15 %37
OpStore %49 %44
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-texture-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly image2D _9[];
layout(set = 1, binding = 0, r32f) uniform readonly image2D _14[100];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _26 = imageLoad(_9[nonuniformEXT(INDEX + 0u)], ivec2(uvec2(0u)));
    vec4 _38 = imageLoad(_14[nonuniformEXT((INDEX ^ 1u) + 0u)], ivec2(uvec2(0u)));
    SV_Target.x = _38.x + _26.x;
    SV_Target.y = _38.y + _26.y;
    SV_Target.z = _38.z + _26.z;
    SV_Target.w = _38.w + _26.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "INDEX"
OpName %19 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Location 0
OpDecorate %21 NonUniform
OpDecorate %25 NonUniform
OpDecorate %35 NonUniform
OpDecorate %37 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpConstant %10 100
%12 = OpTypeArray %6 %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypePointer Input %10
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 4
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%22 = OpConstant %10 0
%23 = OpTypePointer UniformConstant %6
%27 = OpTypeVector %10 2
%34 = OpConstant %10 1
%48 = OpTypePointer Output %5
%52 = OpConstant %10 2
%54 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %55
%55 = OpLabel
%20 = OpLoad %10 %16
%21 = OpIAdd %10 %20 %22
%24 = OpAccessChain %23 %9 %21
%25 = OpLoad %6 %24
%28 = OpCompositeConstruct %27 %22 %22
%26 = OpImageRead %17 %25 %28 None
%29 = OpCompositeExtract %5 %26 0
%30 = OpCompositeExtract %5 %26 1
%31 = OpCompositeExtract %5 %26 2
%32 = OpCompositeExtract %5 %26 3
%33 = OpBitwiseXor %10 %20 %34
%35 = OpIAdd %10 %33 %22
%36 = OpAccessChain %23 %14 %35
%37 = OpLoad %6 %36
%39 = OpCompositeConstruct %27 %22 %22
%38 = OpImageRead %17 %37 %39 None
%40 = OpCompositeExtract %5 %38 0
%41 = OpCompositeExtract %5 %38 1
%42 = OpCompositeExtract %5 %38 2
%43 = OpCompositeExtract %5 %38 3
%44 = OpFAdd %5 %40 %29
%45 = OpFAdd %5 %41 %30
%46 = OpFAdd %5 %42 %31
%47 = OpFAdd %5 %43 %32
%49 = OpAccessChain %48 %19 %22
OpStore %49 %44
%50 = OpAccessChain %48 %19 %34
OpStore %50 %45
%51 = OpAccessChain %48 %19 %52
OpStore %51 %46
%53 = OpAccessChain %48 %19 %54
OpStore %53 %47
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-texture.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly image2D _9[];
layout(set = 1, binding = 0, r32f) uniform readonly image2D _14[100];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _26 = imageLoad(_9[INDEX + 0u], ivec2(uvec2(0u)));
    vec4 _38 = imageLoad(_14[(INDEX ^ 1u) + 0u], ivec2(uvec2(0u)));
    SV_Target.x = _38.x + _26.x;
    SV_Target.y = _38.y + _26.y;
    SV_Target.z = _38.z + _26.z;
    SV_Target.w = _38.w + _26.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %16 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "INDEX"
OpName %19 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %16 Flat
OpDecorate %16 Location 0
OpDecorate %19 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpConstant %10 100
%12 = OpTypeArray %6 %11
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypePointer Input %10
%16 = OpVariable %15 Input
%17 = OpTypeVector %5 4
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%22 = OpConstant %10 0
%23 = OpTypePointer UniformConstant %6
%27 = OpTypeVector %10 2
%34 = OpConstant %10 1
%48 = OpTypePointer Output %5
%52 = OpConstant %10 2
%54 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %55
%55 = OpLabel
%20 = OpLoad %10 %16
%21 = OpIAdd %10 %20 %22
%24 = OpAccessChain %23 %9 %21
%25 = OpLoad %6 %24
%28 = OpCompositeConstruct %27 %22 %22
%26 = OpImageRead %17 %25 %28 None
%29 = OpCompositeExtract %5 %26 0
%30 = OpCompositeExtract %5 %26 1
%31 = OpCompositeExtract %5 %26 2
%32 = OpCompositeExtract %5 %26 3
%33 = OpBitwiseXor %10 %20 %34
%35 = OpIAdd %10 %33 %22
%36 = OpAccessChain %23 %14 %35
%37 = OpLoad %6 %36
%39 = OpCompositeConstruct %27 %22 %22
%38 = OpImageRead %17 %37 %39 None
%40 = OpCompositeExtract %5 %38 0
%41 = OpCompositeExtract %5 %38 1
%42 = OpCompositeExtract %5 %38 2
%43 = OpCompositeExtract %5 %38 3
%44 = OpFAdd %5 %40 %29
%45 = OpFAdd %5 %41 %30
%46 = OpFAdd %5 %42 %31
%47 = OpFAdd %5 %43 %32
%49 = OpAccessChain %48 %19 %22
OpStore %49 %44
%50 = OpAccessChain %48 %19 %34
OpStore %50 %45
%51 = OpAccessChain %48 %19 %52
OpStore %51 %46
%53 = OpAccessChain %48 %19 %54
OpStore %53 %47
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-typed-buffer-nonuniform.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = imageLoad(_9[nonuniformEXT(INDEX + 0u)], int(INDEX));
    SV_Target.x = _22.x;
    SV_Target.y = _22.y;
    SV_Target.z = _22.z;
    SV_Target.w = _22.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %15 Location 0
OpDecorate %17 NonUniform
OpDecorate %21 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %10 0
%19 = OpTypePointer UniformConstant %6
%27 = OpTypePointer Output %5
%30 = OpConstant %10 1
%32 = OpConstant %10 2
%34 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%16 = OpLoad %10 %12
%17 = OpIAdd %10 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpImageRead %13 %21 %16
%23 = OpCompositeExtract %5 %22 0
%24 = OpCompositeExtract %5 %22 1
%25 = OpCompositeExtract %5 %22 2
%26 = OpCompositeExtract %5 %22 3
%28 = OpAccessChain %27 %15 %18
OpStore %28 %23
%29 = OpAccessChain %27 %15 %30
OpStore %29 %24
%31 = OpAccessChain %27 %15 %32
OpStore %31 %25
%33 = OpAccessChain %27 %15 %34
OpStore %33 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-array-typed-buffer.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _9[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = imageLoad(_9[INDEX + 0u], int(INDEX));
    SV_Target.x = _22.x;
    SV_Target.y = _22.y;
    SV_Target.z = _22.z;
    SV_Target.w = _22.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %12 "INDEX"
OpName %15 "SV_Target"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %12 Flat
OpDecorate %12 Location 0
OpDecorate %15 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeInt 32 0
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeVector %5 4
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%18 = OpConstant %10 0
%19 = OpTypePointer UniformConstant %6
%27 = OpTypePointer Output %5
%30 = OpConstant %10 1
%32 = OpConstant %10 2
%34 = OpConstant %10 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%16 = OpLoad %10 %12
%17 = OpIAdd %10 %16 %18
%20 = OpAccessChain %19 %9 %17
%21 = OpLoad %6 %20
%22 = OpImageRead %13 %21 %16
%23 = OpCompositeExtract %5 %22 0
%24 = OpCompositeExtract %5 %22 1
%25 = OpCompositeExtract %5 %22 2
%26 = OpCompositeExtract %5 %22 3
%28 = OpAccessChain %27 %15 %18
OpStore %28 %23
%29 = OpAccessChain %27 %15 %30
OpStore %29 %24
%31 = OpAccessChain %27 %15 %32
OpStore %31 %25
%33 = OpAccessChain %27 %15 %34
OpStore %33 %26
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-array.ssbo.frag
================================================
#version 460

layout(set = 0, binding = 10, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _11[5];

layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _15[5];

layout(location = 0) flat in uint I;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _30 = imageAtomicAdd(_15[nonuniformEXT(I)], int(0u), 1u);
    SV_Target = _30;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %17 "I"
OpName %19 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 10
OpDecorate %11 NonReadable
OpDecorate %11 NonWritable
OpDecorate %15 DescriptorSet 7
OpDecorate %15 Binding 0
OpDecorate %17 Flat
OpDecorate %17 Location 0
OpDecorate %19 Location 0
OpDecorate %20 NonUniform
OpDecorate %24 NonUniform
OpDecorate %28 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 5
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%13 = OpTypeArray %12 %8
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %5
%19 = OpVariable %18 Output
%22 = OpConstant %5 10
%23 = OpTypePointer StorageBuffer %7
%25 = OpTypePointer UniformConstant %12
%27 = OpTypePointer Image %5
%29 = OpConstant %5 0
%31 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%20 = OpLoad %5 %17
%21 = OpIAdd %5 %20 %22
%24 = OpAccessChain %23 %11 %20
%26 = OpAccessChain %25 %15 %20
%28 = OpImageTexelPointer %27 %26 %29 %29
%30 = OpAtomicIAdd %5 %28 %31 %29 %31
OpStore %19 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-array.ssbo.sm66.frag
================================================
#version 460

layout(set = 0, binding = 10, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _11[5];

layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _15[5];

layout(location = 0) flat in uint I;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _30 = imageAtomicAdd(_15[nonuniformEXT(I)], int(0u), 1u);
    SV_Target = _30;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %17 "I"
OpName %19 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 10
OpDecorate %11 NonReadable
OpDecorate %11 NonWritable
OpDecorate %15 DescriptorSet 7
OpDecorate %15 Binding 0
OpDecorate %17 Flat
OpDecorate %17 Location 0
OpDecorate %19 Location 0
OpDecorate %20 NonUniform
OpDecorate %24 NonUniform
OpDecorate %28 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 5
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%13 = OpTypeArray %12 %8
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %5
%19 = OpVariable %18 Output
%22 = OpConstant %5 10
%23 = OpTypePointer StorageBuffer %7
%25 = OpTypePointer UniformConstant %12
%27 = OpTypePointer Image %5
%29 = OpConstant %5 0
%31 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%20 = OpLoad %5 %17
%21 = OpIAdd %5 %20 %22
%24 = OpAccessChain %23 %11 %20
%26 = OpAccessChain %25 %15 %20
%28 = OpImageTexelPointer %27 %26 %29 %29
%30 = OpAtomicIAdd %5 %28 %31 %29 %31
OpStore %19 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-array.ssbo.sm66.uav-counter-ssbo.frag
================================================
#version 460

layout(set = 0, binding = 10, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _11[5];

layout(set = 7, binding = 0, std430) buffer AtomicCounterSSBO
{
    uint counter;
} _15[5];

layout(location = 0) flat in uint I;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _28 = atomicAdd(_15[nonuniformEXT(I)].counter, 1u);
    SV_Target = _28;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "AtomicCounterSSBO"
OpMemberName %12 0 "counter"
OpName %17 "I"
OpName %19 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 10
OpDecorate %11 NonReadable
OpDecorate %11 NonWritable
OpDecorate %12 Block
OpMemberDecorate %12 0 Offset 0
OpDecorate %15 DescriptorSet 7
OpDecorate %15 Binding 0
OpDecorate %17 Flat
OpDecorate %17 Location 0
OpDecorate %19 Location 0
OpDecorate %20 NonUniform
OpDecorate %24 NonUniform
OpDecorate %26 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 5
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeStruct %5
%13 = OpTypeArray %12 %8
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %5
%19 = OpVariable %18 Output
%22 = OpConstant %5 10
%23 = OpTypePointer StorageBuffer %7
%25 = OpTypePointer StorageBuffer %5
%27 = OpConstant %5 0
%29 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%20 = OpLoad %5 %17
%21 = OpIAdd %5 %20 %22
%24 = OpAccessChain %23 %11 %20
%26 = OpAccessChain %25 %15 %20 %27
%28 = OpAtomicIAdd %5 %26 %29 %27 %29
OpStore %19 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-array.ssbo.uav-counter-ssbo.frag
================================================
#version 460

layout(set = 0, binding = 10, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _11[5];

layout(set = 7, binding = 0, std430) buffer AtomicCounterSSBO
{
    uint counter;
} _15[5];

layout(location = 0) flat in uint I;
layout(location = 0) out uint SV_Target;

void main()
{
    uint _28 = atomicAdd(_15[nonuniformEXT(I)].counter, 1u);
    SV_Target = _28;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %17 %19
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "AtomicCounterSSBO"
OpMemberName %12 0 "counter"
OpName %17 "I"
OpName %19 "SV_Target"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 10
OpDecorate %11 NonReadable
OpDecorate %11 NonWritable
OpDecorate %12 Block
OpMemberDecorate %12 0 Offset 0
OpDecorate %15 DescriptorSet 7
OpDecorate %15 Binding 0
OpDecorate %17 Flat
OpDecorate %17 Location 0
OpDecorate %19 Location 0
OpDecorate %20 NonUniform
OpDecorate %24 NonUniform
OpDecorate %26 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpConstant %5 5
%9 = OpTypeArray %7 %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeStruct %5
%13 = OpTypeArray %12 %8
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypePointer Input %5
%17 = OpVariable %16 Input
%18 = OpTypePointer Output %5
%19 = OpVariable %18 Output
%22 = OpConstant %5 10
%23 = OpTypePointer StorageBuffer %7
%25 = OpTypePointer StorageBuffer %5
%27 = OpConstant %5 0
%29 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%20 = OpLoad %5 %17
%21 = OpIAdd %5 %20 %22
%24 = OpAccessChain %23 %11 %20
%26 = OpAccessChain %25 %15 %20 %27
%28 = OpAtomicIAdd %5 %26 %29 %27 %29
OpStore %19 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-heap.sm66.bindless.ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _13;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _19[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) flat in uint I;
layout(location = 0) out vec4 SV_Target;

uint RobustPhysicalAtomicCounter(uvec2 _34, uint _35, uint _36)
{
    uint _51;
    if (any(notEqual(_34, uvec2(0u))))
    {
        uint _48 = atomicAdd(uintPointer(_34).value, _35);
        _51 = _48 + _36;
    }
    else
    {
        _51 = 0u;
    }
    return _51;
}

void main()
{
    uvec2 _32 = _13.counters[14u];
    uint _53 = RobustPhysicalAtomicCounter(_32, 1u, 0u);
    vec4 _57 = uintBitsToFloat(_19[14u]._m0[_53]);
    SV_Target.x = _57.x;
    SV_Target.y = _57.y;
    SV_Target.z = _57.z;
    SV_Target.w = _57.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %21 %25
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "AtomicCounters"
OpMemberName %11 0 "counters"
OpName %16 "SSBO"
OpName %21 "I"
OpName %25 "SV_Target"
OpName %37 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpDecorate %11 Block
OpMemberDecorate %11 0 Offset 0
OpMemberDecorate %11 0 NonWritable
OpDecorate %13 DescriptorSet 7
OpDecorate %13 Binding 0
OpDecorate %13 AliasedPointer
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %21 Flat
OpDecorate %21 Location 0
OpDecorate %25 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeVector %5 4
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypePointer Input %5
%21 = OpVariable %20 Input
%22 = OpTypeFloat 32
%23 = OpTypeVector %22 4
%24 = OpTypePointer Output %23
%25 = OpVariable %24 Output
%26 = OpTypePointer StorageBuffer %16
%28 = OpConstant %5 14
%29 = OpTypePointer StorageBuffer %9
%31 = OpConstant %5 0
%33 = OpTypeFunction %5 %9 %5 %5
%41 = OpTypeBool
%42 = OpTypeVector %41 2
%43 = OpConstantNull %9
%46 = OpTypePointer PhysicalStorageBuffer %5
%49 = OpConstant %5 1
%54 = OpTypePointer StorageBuffer %14
%62 = OpTypePointer Output %22
%66 = OpConstant %5 2
%68 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %69
%69 = OpLabel
%27 = OpAccessChain %26 %19 %28
%30 = OpAccessChain %29 %13 %31 %28
%32 = OpLoad %9 %30
%53 = OpFunctionCall %5 %37 %32 %49 %31
%55 = OpAccessChain %54 %27 %31 %53
%56 = OpLoad %14 %55
%57 = OpBitcast %23 %56
%58 = OpCompositeExtract %22 %57 0
%59 = OpCompositeExtract %22 %57 1
%60 = OpCompositeExtract %22 %57 2
%61 = OpCompositeExtract %22 %57 3
%63 = OpAccessChain %62 %25 %31
OpStore %63 %58
%64 = OpAccessChain %62 %25 %49
OpStore %64 %59
%65 = OpAccessChain %62 %25 %66
OpStore %65 %60
%67 = OpAccessChain %62 %25 %68
OpStore %67 %61
OpReturn
OpFunctionEnd
%37 = OpFunction %5 None %33
%34 = OpFunctionParameter %9
%35 = OpFunctionParameter %5
%36 = OpFunctionParameter %5
%38 = OpLabel
%44 = OpINotEqual %42 %34 %43
%45 = OpAny %41 %44
OpSelectionMerge %40 None
OpBranchConditional %45 %39 %40
%39 = OpLabel
%47 = OpBitcast %46 %34
%48 = OpAtomicIAdd %5 %47 %49 %31 %35
%50 = OpIAdd %5 %48 %36
OpBranch %40
%40 = OpLabel
%51 = OpPhi %5 %31 %38 %50 %39
OpReturnValue %51
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-heap.sm66.uav-counter-ssbo.bindless.ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 7, binding = 0, std430) buffer AtomicCounters
{
    uint counter;
} _12[];

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _18[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(location = 0) flat in uint I;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _33 = atomicAdd(_12[14u].counter, 1u);
    vec4 _38 = uintBitsToFloat(_18[14u]._m0[_33]);
    SV_Target.x = _38.x;
    SV_Target.y = _38.y;
    SV_Target.z = _38.z;
    SV_Target.w = _38.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %24
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %9 "AtomicCounters"
OpMemberName %9 0 "counter"
OpName %15 "SSBO"
OpName %20 "I"
OpName %24 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %9 Block
OpMemberDecorate %9 0 Offset 0
OpDecorate %12 DescriptorSet 7
OpDecorate %12 Binding 0
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 Flat
OpDecorate %20 Location 0
OpDecorate %24 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeStruct %5
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer StorageBuffer %10
%12 = OpVariable %11 StorageBuffer
%13 = OpTypeVector %5 4
%14 = OpTypeRuntimeArray %13
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypePointer Input %5
%20 = OpVariable %19 Input
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypePointer Output %22
%24 = OpVariable %23 Output
%25 = OpTypePointer StorageBuffer %15
%27 = OpConstant %5 14
%28 = OpTypePointer StorageBuffer %9
%30 = OpTypePointer StorageBuffer %5
%32 = OpConstant %5 0
%34 = OpConstant %5 1
%35 = OpTypePointer StorageBuffer %13
%43 = OpTypePointer Output %21
%47 = OpConstant %5 2
%49 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%26 = OpAccessChain %25 %18 %27
%29 = OpAccessChain %28 %12 %27
%31 = OpAccessChain %30 %29 %32
%33 = OpAtomicIAdd %5 %31 %34 %32 %34
%36 = OpAccessChain %35 %26 %32 %33
%37 = OpLoad %13 %36
%38 = OpBitcast %22 %37
%39 = OpCompositeExtract %21 %38 0
%40 = OpCompositeExtract %21 %38 1
%41 = OpCompositeExtract %21 %38 2
%42 = OpCompositeExtract %21 %38 3
%44 = OpAccessChain %43 %24 %32
OpStore %44 %39
%45 = OpAccessChain %43 %24 %34
OpStore %45 %40
%46 = OpAccessChain %43 %24 %47
OpStore %46 %41
%48 = OpAccessChain %43 %24 %49
OpStore %48 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter-heap.sm66.uav-counter-texel-buffer.bindless.ssbo.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _18[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _12[];

layout(location = 0) flat in uint I;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _33 = imageAtomicAdd(_12[14u], int(0u), 1u);
    vec4 _38 = uintBitsToFloat(_18[14u]._m0[_33]);
    SV_Target.x = _38.x;
    SV_Target.y = _38.y;
    SV_Target.z = _38.z;
    SV_Target.w = _38.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %20 %24
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 "SSBO"
OpName %20 "I"
OpName %24 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 7
OpDecorate %12 Binding 0
OpDecorate %14 ArrayStride 16
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %20 Flat
OpDecorate %20 Location 0
OpDecorate %24 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %5 4
%14 = OpTypeRuntimeArray %13
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypePointer Input %5
%20 = OpVariable %19 Input
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypePointer Output %22
%24 = OpVariable %23 Output
%25 = OpTypePointer StorageBuffer %15
%27 = OpConstant %5 14
%28 = OpTypePointer UniformConstant %9
%30 = OpTypePointer Image %5
%32 = OpConstant %5 0
%34 = OpConstant %5 1
%35 = OpTypePointer StorageBuffer %13
%43 = OpTypePointer Output %21
%47 = OpConstant %5 2
%49 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%26 = OpAccessChain %25 %18 %27
%29 = OpAccessChain %28 %12 %27
%31 = OpImageTexelPointer %30 %29 %32 %32
%33 = OpAtomicIAdd %5 %31 %34 %32 %34
%36 = OpAccessChain %35 %26 %32 %33
%37 = OpLoad %13 %36
%38 = OpBitcast %22 %37
%39 = OpCompositeExtract %21 %38 0
%40 = OpCompositeExtract %21 %38 1
%41 = OpCompositeExtract %21 %38 2
%42 = OpCompositeExtract %21 %38 3
%44 = OpAccessChain %43 %24 %32
OpStore %44 %39
%45 = OpAccessChain %43 %24 %34
OpStore %45 %40
%46 = OpAccessChain %43 %24 %47
OpStore %46 %41
%48 = OpAccessChain %43 %24 %49
OpStore %48 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.bindless.nobda.root-constant.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];
layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _15[];

void main()
{
    uint _23 = gl_GlobalInvocationID.x + 2u;
    uint _39 = imageAtomicAdd(_15[nonuniformEXT(registers._m4 + _23)], int(0u), 1u);
    uint _42 = (gl_GlobalInvocationID.x ^ 1u) + 2u;
    uint _53 = imageAtomicAdd(_15[nonuniformEXT(registers._m4 + _42)], int(0u), 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %15 DescriptorSet 7
OpDecorate %15 Binding 0
OpDecorate %18 BuiltIn GlobalInvocationId
OpDecorate %31 NonUniform
OpDecorate %32 NonUniform
OpDecorate %36 NonUniform
OpDecorate %38 NonUniform
OpDecorate %46 NonUniform
OpDecorate %47 NonUniform
OpDecorate %51 NonUniform
OpDecorate %52 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeRuntimeArray %9
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeVector %5 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %5
%21 = OpConstant %5 0
%24 = OpConstant %5 2
%25 = OpTypePointer UniformConstant %9
%27 = OpTypePointer PushConstant %5
%29 = OpConstant %5 4
%37 = OpTypePointer Image %5
%40 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %5 %20
%23 = OpIAdd %5 %22 %24
%28 = OpAccessChain %27 %8 %29
%30 = OpLoad %5 %28
%31 = OpIAdd %5 %30 %23
%26 = OpAccessChain %25 %12 %31
%32 = OpLoad %9 %26
%34 = OpAccessChain %27 %8 %29
%35 = OpLoad %5 %34
%36 = OpIAdd %5 %35 %23
%33 = OpAccessChain %25 %15 %36
%38 = OpImageTexelPointer %37 %33 %21 %21
%39 = OpAtomicIAdd %5 %38 %40 %21 %40
%41 = OpBitwiseXor %5 %22 %40
%42 = OpIAdd %5 %41 %24
%44 = OpAccessChain %27 %8 %29
%45 = OpLoad %5 %44
%46 = OpIAdd %5 %45 %42
%43 = OpAccessChain %25 %12 %46
%47 = OpLoad %9 %43
%49 = OpAccessChain %27 %8 %29
%50 = OpLoad %5 %49
%51 = OpIAdd %5 %50 %42
%48 = OpAccessChain %25 %15 %51
%52 = OpImageTexelPointer %37 %48 %21 %21
%53 = OpAtomicIAdd %5 %52 %40 %21 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.bindless.nobda.root-constant.raw-va-stride-offset.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];
layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _15[];

void main()
{
    uint _23 = gl_GlobalInvocationID.x + 2u;
    uint _39 = imageAtomicAdd(_15[nonuniformEXT(registers._m4 + _23)], int(0u), 1u);
    uint _42 = (gl_GlobalInvocationID.x ^ 1u) + 2u;
    uint _53 = imageAtomicAdd(_15[nonuniformEXT(registers._m4 + _42)], int(0u), 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %15 DescriptorSet 7
OpDecorate %15 Binding 0
OpDecorate %18 BuiltIn GlobalInvocationId
OpDecorate %31 NonUniform
OpDecorate %32 NonUniform
OpDecorate %36 NonUniform
OpDecorate %38 NonUniform
OpDecorate %46 NonUniform
OpDecorate %47 NonUniform
OpDecorate %51 NonUniform
OpDecorate %52 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeRuntimeArray %9
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%16 = OpTypeVector %5 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %5
%21 = OpConstant %5 0
%24 = OpConstant %5 2
%25 = OpTypePointer UniformConstant %9
%27 = OpTypePointer PushConstant %5
%29 = OpConstant %5 4
%37 = OpTypePointer Image %5
%40 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %5 %20
%23 = OpIAdd %5 %22 %24
%28 = OpAccessChain %27 %8 %29
%30 = OpLoad %5 %28
%31 = OpIAdd %5 %30 %23
%26 = OpAccessChain %25 %12 %31
%32 = OpLoad %9 %26
%34 = OpAccessChain %27 %8 %29
%35 = OpLoad %5 %34
%36 = OpIAdd %5 %35 %23
%33 = OpAccessChain %25 %15 %36
%38 = OpImageTexelPointer %37 %33 %21 %21
%39 = OpAtomicIAdd %5 %38 %40 %21 %40
%41 = OpBitwiseXor %5 %22 %40
%42 = OpIAdd %5 %41 %24
%44 = OpAccessChain %27 %8 %29
%45 = OpLoad %5 %44
%46 = OpIAdd %5 %45 %42
%43 = OpAccessChain %25 %12 %46
%47 = OpLoad %9 %43
%49 = OpAccessChain %27 %8 %29
%50 = OpLoad %5 %49
%51 = OpIAdd %5 %50 %42
%48 = OpAccessChain %25 %15 %51
%52 = OpImageTexelPointer %37 %48 %21 %21
%53 = OpAtomicIAdd %5 %52 %40 %21 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.bindless.root-constant.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _17;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];

uint RobustPhysicalAtomicCounter(uvec2 _42, uint _43, uint _44)
{
    uint _59;
    if (any(notEqual(_42, uvec2(0u))))
    {
        uint _56 = atomicAdd(uintPointer(_42).value, _43);
        _59 = _56 + _44;
    }
    else
    {
        _59 = 0u;
    }
    return _59;
}

void main()
{
    uint _25 = gl_GlobalInvocationID.x + 2u;
    uvec2 _40 = _17.counters[registers._m4 + _25];
    uint _61 = RobustPhysicalAtomicCounter(_40, 1u, 0u);
    uint _63 = (gl_GlobalInvocationID.x ^ 1u) + 2u;
    uvec2 _73 = _17.counters[registers._m4 + _63];
    uint _74 = RobustPhysicalAtomicCounter(_73, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %20
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 "AtomicCounters"
OpMemberName %15 0 "counters"
OpName %45 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %14 ArrayStride 8
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpMemberDecorate %15 0 NonWritable
OpDecorate %17 DescriptorSet 7
OpDecorate %17 Binding 0
OpDecorate %17 AliasedPointer
OpDecorate %20 BuiltIn GlobalInvocationId
OpDecorate %33 NonUniform
OpDecorate %34 NonUniform
OpDecorate %67 NonUniform
OpDecorate %68 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %5 2
%14 = OpTypeRuntimeArray %13
%15 = OpTypeStruct %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypeVector %5 3
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %5
%23 = OpConstant %5 0
%26 = OpConstant %5 2
%27 = OpTypePointer UniformConstant %9
%29 = OpTypePointer PushConstant %5
%31 = OpConstant %5 4
%35 = OpTypePointer StorageBuffer %13
%41 = OpTypeFunction %5 %13 %5 %5
%49 = OpTypeBool
%50 = OpTypeVector %49 2
%51 = OpConstantNull %13
%54 = OpTypePointer PhysicalStorageBuffer %5
%57 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpIAdd %5 %24 %26
%30 = OpAccessChain %29 %8 %31
%32 = OpLoad %5 %30
%33 = OpIAdd %5 %32 %25
%28 = OpAccessChain %27 %12 %33
%34 = OpLoad %9 %28
%37 = OpAccessChain %29 %8 %31
%38 = OpLoad %5 %37
%39 = OpIAdd %5 %38 %25
%36 = OpAccessChain %35 %17 %23 %39
%40 = OpLoad %13 %36
%61 = OpFunctionCall %5 %45 %40 %57 %23
%62 = OpBitwiseXor %5 %24 %57
%63 = OpIAdd %5 %62 %26
%65 = OpAccessChain %29 %8 %31
%66 = OpLoad %5 %65
%67 = OpIAdd %5 %66 %63
%64 = OpAccessChain %27 %12 %67
%68 = OpLoad %9 %64
%70 = OpAccessChain %29 %8 %31
%71 = OpLoad %5 %70
%72 = OpIAdd %5 %71 %63
%69 = OpAccessChain %35 %17 %23 %72
%73 = OpLoad %13 %69
%74 = OpFunctionCall %5 %45 %73 %57 %23
OpReturn
OpFunctionEnd
%45 = OpFunction %5 None %41
%42 = OpFunctionParameter %13
%43 = OpFunctionParameter %5
%44 = OpFunctionParameter %5
%46 = OpLabel
%52 = OpINotEqual %50 %42 %51
%53 = OpAny %49 %52
OpSelectionMerge %48 None
OpBranchConditional %53 %47 %48
%47 = OpLabel
%55 = OpBitcast %54 %42
%56 = OpAtomicIAdd %5 %55 %57 %23 %43
%58 = OpIAdd %5 %56 %44
OpBranch %48
%48 = OpLabel
%59 = OpPhi %5 %23 %46 %58 %47
OpReturnValue %59
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.bindless.root-constant.raw-va-stride-offset.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _17;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];

uint RobustPhysicalAtomicCounter(uvec2 _45, uint _46, uint _47)
{
    uint _62;
    if (any(notEqual(_45, uvec2(0u))))
    {
        uint _59 = atomicAdd(uintPointer(_45).value, _46);
        _62 = _59 + _47;
    }
    else
    {
        _62 = 0u;
    }
    return _62;
}

void main()
{
    uint _25 = gl_GlobalInvocationID.x + 2u;
    uvec2 _43 = _17.counters[(4u * (registers._m4 + _25)) + 3u];
    uint _64 = RobustPhysicalAtomicCounter(_43, 1u, 0u);
    uint _66 = (gl_GlobalInvocationID.x ^ 1u) + 2u;
    uvec2 _78 = _17.counters[(4u * (registers._m4 + _66)) + 3u];
    uint _79 = RobustPhysicalAtomicCounter(_78, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 82
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %20
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %15 "AtomicCounters"
OpMemberName %15 0 "counters"
OpName %48 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %14 ArrayStride 8
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpMemberDecorate %15 0 NonWritable
OpDecorate %17 DescriptorSet 7
OpDecorate %17 Binding 0
OpDecorate %17 AliasedPointer
OpDecorate %20 BuiltIn GlobalInvocationId
OpDecorate %33 NonUniform
OpDecorate %34 NonUniform
OpDecorate %70 NonUniform
OpDecorate %71 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %5 2
%14 = OpTypeRuntimeArray %13
%15 = OpTypeStruct %14
%16 = OpTypePointer StorageBuffer %15
%17 = OpVariable %16 StorageBuffer
%18 = OpTypeVector %5 3
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %5
%23 = OpConstant %5 0
%26 = OpConstant %5 2
%27 = OpTypePointer UniformConstant %9
%29 = OpTypePointer PushConstant %5
%31 = OpConstant %5 4
%35 = OpTypePointer StorageBuffer %13
%42 = OpConstant %5 3
%44 = OpTypeFunction %5 %13 %5 %5
%52 = OpTypeBool
%53 = OpTypeVector %52 2
%54 = OpConstantNull %13
%57 = OpTypePointer PhysicalStorageBuffer %5
%60 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpIAdd %5 %24 %26
%30 = OpAccessChain %29 %8 %31
%32 = OpLoad %5 %30
%33 = OpIAdd %5 %32 %25
%28 = OpAccessChain %27 %12 %33
%34 = OpLoad %9 %28
%37 = OpAccessChain %29 %8 %31
%38 = OpLoad %5 %37
%39 = OpIAdd %5 %38 %25
%40 = OpIMul %5 %31 %39
%41 = OpIAdd %5 %40 %42
%36 = OpAccessChain %35 %17 %23 %41
%43 = OpLoad %13 %36
%64 = OpFunctionCall %5 %48 %43 %60 %23
%65 = OpBitwiseXor %5 %24 %60
%66 = OpIAdd %5 %65 %26
%68 = OpAccessChain %29 %8 %31
%69 = OpLoad %5 %68
%70 = OpIAdd %5 %69 %66
%67 = OpAccessChain %27 %12 %70
%71 = OpLoad %9 %67
%73 = OpAccessChain %29 %8 %31
%74 = OpLoad %5 %73
%75 = OpIAdd %5 %74 %66
%76 = OpIMul %5 %31 %75
%77 = OpIAdd %5 %76 %42
%72 = OpAccessChain %35 %17 %23 %77
%78 = OpLoad %13 %72
%79 = OpFunctionCall %5 %48 %78 %60 %23
OpReturn
OpFunctionEnd
%48 = OpFunction %5 None %44
%45 = OpFunctionParameter %13
%46 = OpFunctionParameter %5
%47 = OpFunctionParameter %5
%49 = OpLabel
%55 = OpINotEqual %53 %45 %54
%56 = OpAny %52 %55
OpSelectionMerge %51 None
OpBranchConditional %56 %50 %51
%50 = OpLabel
%58 = OpBitcast %57 %45
%59 = OpAtomicIAdd %5 %58 %60 %23 %46
%61 = OpIAdd %5 %59 %47
OpBranch %51
%51 = OpLabel
%62 = OpPhi %5 %23 %49 %61 %50
OpReturnValue %62
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.bindless.root-constant.raw-va-stride-offset.heap-raw-va-cbv.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer DescriptorHeapRawBlock;
layout(buffer_reference) buffer uintPointer;

struct DescriptorHeapRawPayload
{
    uvec2 _m0[4];
};

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(buffer_reference, buffer_reference_align = 8, std430) readonly buffer DescriptorHeapRawBlock
{
    DescriptorHeapRawPayload descriptors[];
};

layout(set = 10, binding = 21, std140) uniform DescriptorHeapRawPayloadPtr
{
    DescriptorHeapRawBlock ptr;
} DescriptorHeapRaw;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];

uint RobustPhysicalAtomicCounter(uvec2 _50, uint _51, uint _52)
{
    uint _67;
    if (any(notEqual(_50, uvec2(0u))))
    {
        uint _64 = atomicAdd(uintPointer(_50).value, _51);
        _67 = _64 + _52;
    }
    else
    {
        _67 = 0u;
    }
    return _67;
}

void main()
{
    uint _30 = gl_GlobalInvocationID.x + 2u;
    uint _69 = RobustPhysicalAtomicCounter(DescriptorHeapRaw.ptr.descriptors[registers._m4 + _30]._m0[3u], 1u, 0u);
    uint _71 = (gl_GlobalInvocationID.x ^ 1u) + 2u;
    uint _84 = RobustPhysicalAtomicCounter(DescriptorHeapRaw.ptr.descriptors[registers._m4 + _71]._m0[3u], 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %25
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "DescriptorHeapRawPayload"
OpName %18 "DescriptorHeapRawBlock"
OpMemberName %18 0 "descriptors"
OpName %20 "DescriptorHeapRawPayloadPtr"
OpMemberName %20 0 "ptr"
OpName %22 "DescriptorHeapRaw"
OpName %53 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %15 ArrayStride 8
OpMemberDecorate %16 0 Offset 0
OpDecorate %17 ArrayStride 32
OpDecorate %18 Block
OpMemberDecorate %18 0 Offset 0
OpMemberDecorate %18 0 NonWritable
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 10
OpDecorate %22 Binding 21
OpDecorate %25 BuiltIn GlobalInvocationId
OpDecorate %37 NonUniform
OpDecorate %38 NonUniform
OpDecorate %75 NonUniform
OpDecorate %76 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %5 2
%14 = OpConstant %5 4
%15 = OpTypeArray %13 %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer PhysicalStorageBuffer %18
%20 = OpTypeStruct %19
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypeVector %5 3
%24 = OpTypePointer Input %23
%25 = OpVariable %24 Input
%26 = OpTypePointer Input %5
%28 = OpConstant %5 0
%31 = OpConstant %5 2
%32 = OpTypePointer UniformConstant %9
%34 = OpTypePointer PushConstant %5
%39 = OpTypePointer Uniform %19
%42 = OpTypePointer PhysicalStorageBuffer %13
%47 = OpConstant %5 3
%49 = OpTypeFunction %5 %13 %5 %5
%57 = OpTypeBool
%58 = OpTypeVector %57 2
%59 = OpConstantNull %13
%62 = OpTypePointer PhysicalStorageBuffer %5
%65 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %85
%85 = OpLabel
%27 = OpAccessChain %26 %25 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %31
%35 = OpAccessChain %34 %8 %14
%36 = OpLoad %5 %35
%37 = OpIAdd %5 %36 %30
%33 = OpAccessChain %32 %12 %37
%38 = OpLoad %9 %33
%40 = OpAccessChain %39 %22 %28
%41 = OpLoad %19 %40
%44 = OpAccessChain %34 %8 %14
%45 = OpLoad %5 %44
%46 = OpIAdd %5 %45 %30
%43 = OpInBoundsAccessChain %42 %41 %28 %46 %28 %47
%48 = OpLoad %13 %43 Aligned 8
%69 = OpFunctionCall %5 %53 %48 %65 %28
%70 = OpBitwiseXor %5 %29 %65
%71 = OpIAdd %5 %70 %31
%73 = OpAccessChain %34 %8 %14
%74 = OpLoad %5 %73
%75 = OpIAdd %5 %74 %71
%72 = OpAccessChain %32 %12 %75
%76 = OpLoad %9 %72
%77 = OpAccessChain %39 %22 %28
%78 = OpLoad %19 %77
%80 = OpAccessChain %34 %8 %14
%81 = OpLoad %5 %80
%82 = OpIAdd %5 %81 %71
%79 = OpInBoundsAccessChain %42 %78 %28 %82 %28 %47
%83 = OpLoad %13 %79 Aligned 8
%84 = OpFunctionCall %5 %53 %83 %65 %28
OpReturn
OpFunctionEnd
%53 = OpFunction %5 None %49
%50 = OpFunctionParameter %13
%51 = OpFunctionParameter %5
%52 = OpFunctionParameter %5
%54 = OpLabel
%60 = OpINotEqual %58 %50 %59
%61 = OpAny %57 %60
OpSelectionMerge %56 None
OpBranchConditional %61 %55 %56
%55 = OpLabel
%63 = OpBitcast %62 %50
%64 = OpAtomicIAdd %5 %63 %65 %28 %51
%66 = OpIAdd %5 %64 %52
OpBranch %56
%56 = OpLabel
%67 = OpPhi %5 %28 %54 %66 %55
OpReturnValue %67
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.bindless.root-constant.uav-counter-ssbo.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 7, binding = 0, std430) buffer AtomicCounters
{
    uint counter;
} _16[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly writeonly uimageBuffer _12[];

void main()
{
    uint _24 = gl_GlobalInvocationID.x + 2u;
    uint _41 = atomicAdd(_16[nonuniformEXT(registers._m4 + _24)].counter, 1u);
    uint _44 = (gl_GlobalInvocationID.x ^ 1u) + 2u;
    uint _55 = atomicAdd(_16[nonuniformEXT(registers._m4 + _44)].counter, 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %19
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %13 "AtomicCounters"
OpMemberName %13 0 "counter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonReadable
OpDecorate %12 NonWritable
OpDecorate %13 Block
OpMemberDecorate %13 0 Offset 0
OpDecorate %16 DescriptorSet 7
OpDecorate %16 Binding 0
OpDecorate %19 BuiltIn GlobalInvocationId
OpDecorate %32 NonUniform
OpDecorate %33 NonUniform
OpDecorate %38 NonUniform
OpDecorate %40 NonUniform
OpDecorate %48 NonUniform
OpDecorate %49 NonUniform
OpDecorate %53 NonUniform
OpDecorate %54 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeStruct %5
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeVector %5 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %5
%22 = OpConstant %5 0
%25 = OpConstant %5 2
%26 = OpTypePointer UniformConstant %9
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 4
%34 = OpTypePointer StorageBuffer %13
%39 = OpTypePointer StorageBuffer %5
%42 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %56
%56 = OpLabel
%21 = OpAccessChain %20 %19 %22
%23 = OpLoad %5 %21
%24 = OpIAdd %5 %23 %25
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%32 = OpIAdd %5 %31 %24
%27 = OpAccessChain %26 %12 %32
%33 = OpLoad %9 %27
%36 = OpAccessChain %28 %8 %30
%37 = OpLoad %5 %36
%38 = OpIAdd %5 %37 %24
%35 = OpAccessChain %34 %16 %38
%40 = OpAccessChain %39 %35 %22
%41 = OpAtomicIAdd %5 %40 %42 %22 %42
%43 = OpBitwiseXor %5 %23 %42
%44 = OpIAdd %5 %43 %25
%46 = OpAccessChain %28 %8 %30
%47 = OpLoad %5 %46
%48 = OpIAdd %5 %47 %44
%45 = OpAccessChain %26 %12 %48
%49 = OpLoad %9 %45
%51 = OpAccessChain %28 %8 %30
%52 = OpLoad %5 %51
%53 = OpIAdd %5 %52 %44
%50 = OpAccessChain %34 %16 %53
%54 = OpAccessChain %39 %50 %22
%55 = OpAtomicIAdd %5 %54 %42 %22 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.ssbo.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 1, binding = 2, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 2, binding = 2, std430) writeonly readonly buffer _14_16
{
    uint _m0[];
} _16;

layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _12;
layout(set = 7, binding = 1, r32ui) uniform uimageBuffer _17;

void main()
{
    uint _21 = imageAtomicAdd(_12, int(0u), 1u);
    uint _24 = imageAtomicAdd(_17, int(0u), 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %14 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 1
OpDecorate %9 Binding 2
OpDecorate %9 NonReadable
OpDecorate %9 NonWritable
OpDecorate %12 DescriptorSet 7
OpDecorate %12 Binding 0
OpDecorate %13 ArrayStride 4
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 2
OpDecorate %16 Binding 2
OpDecorate %16 NonReadable
OpDecorate %16 NonWritable
OpDecorate %17 DescriptorSet 7
OpDecorate %17 Binding 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeRuntimeArray %5
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpVariable %11 UniformConstant
%18 = OpTypePointer Image %5
%20 = OpConstant %5 0
%22 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%19 = OpImageTexelPointer %18 %12 %20 %20
%21 = OpAtomicIAdd %5 %19 %22 %20 %22
%23 = OpImageTexelPointer %18 %17 %20 %20
%24 = OpAtomicIAdd %5 %23 %22 %20 %22
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.ssbo.raw-va-stride-offset.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 1, binding = 2, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 2, binding = 2, std430) writeonly readonly buffer _14_16
{
    uint _m0[];
} _16;

layout(set = 7, binding = 0, r32ui) uniform uimageBuffer _12;
layout(set = 7, binding = 1, r32ui) uniform uimageBuffer _17;

void main()
{
    uint _21 = imageAtomicAdd(_12, int(0u), 1u);
    uint _24 = imageAtomicAdd(_17, int(0u), 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %14 "SSBO"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 1
OpDecorate %9 Binding 2
OpDecorate %9 NonReadable
OpDecorate %9 NonWritable
OpDecorate %12 DescriptorSet 7
OpDecorate %12 Binding 0
OpDecorate %13 ArrayStride 4
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 2
OpDecorate %16 Binding 2
OpDecorate %16 NonReadable
OpDecorate %16 NonWritable
OpDecorate %17 DescriptorSet 7
OpDecorate %17 Binding 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeRuntimeArray %5
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpVariable %11 UniformConstant
%18 = OpTypePointer Image %5
%20 = OpConstant %5 0
%22 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%19 = OpImageTexelPointer %18 %12 %20 %20
%21 = OpAtomicIAdd %5 %19 %22 %20 %22
%23 = OpImageTexelPointer %18 %17 %20 %20
%24 = OpAtomicIAdd %5 %23 %22 %20 %22
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-counter.ssbo.uav-counter-ssbo.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 1, binding = 2, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 7, binding = 0, std430) buffer AtomicCounterSSBO
{
    uint counter;
} _12;

layout(set = 2, binding = 2, std430) writeonly readonly buffer _14_16
{
    uint _m0[];
} _16;

layout(set = 7, binding = 1, std430) buffer _17_19
{
    uint counter;
} _19;

void main()
{
    uint _24 = atomicAdd(_12.counter, 1u);
    uint _26 = atomicAdd(_19.counter, 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main"
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %10 "AtomicCounterSSBO"
OpMemberName %10 0 "counter"
OpName %14 "SSBO"
OpName %17 "AtomicCounterSSBO"
OpMemberName %17 0 "counter"
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 1
OpDecorate %9 Binding 2
OpDecorate %9 NonReadable
OpDecorate %9 NonWritable
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 7
OpDecorate %12 Binding 0
OpDecorate %13 ArrayStride 4
OpMemberDecorate %14 0 Offset 0
OpDecorate %14 Block
OpDecorate %16 DescriptorSet 2
OpDecorate %16 Binding 2
OpDecorate %16 NonReadable
OpDecorate %16 NonWritable
OpDecorate %17 Block
OpMemberDecorate %17 0 Offset 0
OpDecorate %19 DescriptorSet 7
OpDecorate %19 Binding 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeStruct %5
%11 = OpTypePointer StorageBuffer %10
%12 = OpVariable %11 StorageBuffer
%13 = OpTypeRuntimeArray %5
%14 = OpTypeStruct %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeStruct %5
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypePointer StorageBuffer %5
%22 = OpConstant %5 0
%25 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%21 = OpAccessChain %20 %19 %22
%23 = OpAccessChain %20 %12 %22
%24 = OpAtomicIAdd %5 %23 %25 %22 %25
%26 = OpAtomicIAdd %5 %21 %25 %22 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-indexing.frag
================================================
#version 460

layout(set = 0, binding = 5, r32f) uniform readonly image2D _11[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = imageLoad(_11[0u], ivec2(uvec2(0u)));
    vec4 _32 = imageLoad(_11[2u], ivec2(uvec2(0u)));
    vec4 _46 = imageLoad(_11[V], ivec2(uvec2(0u)));
    SV_Target.x = (_32.x + _22.x) + _46.x;
    SV_Target.y = (_32.y + _22.y) + _46.y;
    SV_Target.z = (_32.z + _22.z) + _46.z;
    SV_Target.w = (_32.w + _22.w) + _46.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "V"
OpName %16 "SV_Target"
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 5
OpDecorate %11 NonWritable
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypePointer Input %7
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%18 = OpTypePointer UniformConstant %6
%20 = OpConstant %7 0
%23 = OpTypeVector %7 2
%30 = OpConstant %7 2
%43 = OpConstant %7 5
%56 = OpTypePointer Output %5
%59 = OpConstant %7 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%17 = OpLoad %7 %13
%19 = OpAccessChain %18 %11 %20
%21 = OpLoad %6 %19
%24 = OpCompositeConstruct %23 %20 %20
%22 = OpImageRead %14 %21 %24 None
%25 = OpCompositeExtract %5 %22 0
%26 = OpCompositeExtract %5 %22 1
%27 = OpCompositeExtract %5 %22 2
%28 = OpCompositeExtract %5 %22 3
%29 = OpAccessChain %18 %11 %30
%31 = OpLoad %6 %29
%33 = OpCompositeConstruct %23 %20 %20
%32 = OpImageRead %14 %31 %33 None
%34 = OpCompositeExtract %5 %32 0
%35 = OpCompositeExtract %5 %32 1
%36 = OpCompositeExtract %5 %32 2
%37 = OpCompositeExtract %5 %32 3
%38 = OpFAdd %5 %34 %25
%39 = OpFAdd %5 %35 %26
%40 = OpFAdd %5 %36 %27
%41 = OpFAdd %5 %37 %28
%42 = OpIAdd %7 %17 %43
%44 = OpAccessChain %18 %11 %17
%45 = OpLoad %6 %44
%47 = OpCompositeConstruct %23 %20 %20
%46 = OpImageRead %14 %45 %47 None
%48 = OpCompositeExtract %5 %46 0
%49 = OpCompositeExtract %5 %46 1
%50 = OpCompositeExtract %5 %46 2
%51 = OpCompositeExtract %5 %46 3
%52 = OpFAdd %5 %38 %48
%53 = OpFAdd %5 %39 %49
%54 = OpFAdd %5 %40 %50
%55 = OpFAdd %5 %41 %51
%57 = OpAccessChain %56 %16 %20
OpStore %57 %52
%58 = OpAccessChain %56 %16 %59
OpStore %58 %53
%60 = OpAccessChain %56 %16 %30
OpStore %60 %54
%61 = OpAccessChain %56 %16 %8
OpStore %61 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-indexing.sm66.frag
================================================
#version 460

layout(set = 0, binding = 5, r32f) uniform readonly image2D _11[3];

layout(location = 0) flat in uint V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    vec4 _22 = imageLoad(_11[0u], ivec2(uvec2(0u)));
    vec4 _32 = imageLoad(_11[2u], ivec2(uvec2(0u)));
    vec4 _46 = imageLoad(_11[V], ivec2(uvec2(0u)));
    SV_Target.x = (_32.x + _22.x) + _46.x;
    SV_Target.y = (_32.y + _22.y) + _46.y;
    SV_Target.z = (_32.z + _22.z) + _46.z;
    SV_Target.w = (_32.w + _22.w) + _46.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
OpCapability Shader
OpCapability StorageImageArrayDynamicIndexing
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "V"
OpName %16 "SV_Target"
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 5
OpDecorate %11 NonWritable
OpDecorate %13 Flat
OpDecorate %13 Location 0
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypePointer Input %7
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%18 = OpTypePointer UniformConstant %6
%20 = OpConstant %7 0
%23 = OpTypeVector %7 2
%30 = OpConstant %7 2
%43 = OpConstant %7 5
%56 = OpTypePointer Output %5
%59 = OpConstant %7 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%17 = OpLoad %7 %13
%19 = OpAccessChain %18 %11 %20
%21 = OpLoad %6 %19
%24 = OpCompositeConstruct %23 %20 %20
%22 = OpImageRead %14 %21 %24 None
%25 = OpCompositeExtract %5 %22 0
%26 = OpCompositeExtract %5 %22 1
%27 = OpCompositeExtract %5 %22 2
%28 = OpCompositeExtract %5 %22 3
%29 = OpAccessChain %18 %11 %30
%31 = OpLoad %6 %29
%33 = OpCompositeConstruct %23 %20 %20
%32 = OpImageRead %14 %31 %33 None
%34 = OpCompositeExtract %5 %32 0
%35 = OpCompositeExtract %5 %32 1
%36 = OpCompositeExtract %5 %32 2
%37 = OpCompositeExtract %5 %32 3
%38 = OpFAdd %5 %34 %25
%39 = OpFAdd %5 %35 %26
%40 = OpFAdd %5 %36 %27
%41 = OpFAdd %5 %37 %28
%42 = OpIAdd %7 %17 %43
%44 = OpAccessChain %18 %11 %17
%45 = OpLoad %6 %44
%47 = OpCompositeConstruct %23 %20 %20
%46 = OpImageRead %14 %45 %47 None
%48 = OpCompositeExtract %5 %46 0
%49 = OpCompositeExtract %5 %46 1
%50 = OpCompositeExtract %5 %46 2
%51 = OpCompositeExtract %5 %46 3
%52 = OpFAdd %5 %38 %48
%53 = OpFAdd %5 %39 %49
%54 = OpFAdd %5 %40 %50
%55 = OpFAdd %5 %41 %51
%57 = OpAccessChain %56 %16 %20
OpStore %57 %52
%58 = OpAccessChain %56 %16 %59
OpStore %58 %53
%60 = OpAccessChain %56 %16 %30
OpStore %60 %54
%61 = OpAccessChain %56 %16 %8
OpStore %61 %55
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-raw-buffer.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly uimageBuffer _12[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    uint _28 = registers._m4 + 3u;
    uint _36 = uint(int(gl_FragCoord.x));
    uint _38 = _36 * 2u;
    uvec2 _47 = uvec2(imageLoad(_12[_28], int(_38)).x, imageLoad(_12[_28], int(_38 + 1u)).x);
    uint _67 = registers._m4 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _69 = _36 * 2u;
    uvec2 _75 = uvec2(imageLoad(_12[_67], int(_69)).x, imageLoad(_12[_67], int(_69 + 1u)).x);
    uint _85 = registers._m4 + (INDEX + 100u);
    uint _87 = _36 * 2u;
    uvec2 _93 = uvec2(imageLoad(_12[nonuniformEXT(_85)], int(_87)).x, imageLoad(_12[nonuniformEXT(_85)], int(_87 + 1u)).x);
    SV_Target.x = (_75.x + _47.x) + _93.x;
    SV_Target.y = (_75.y + _47.y) + _93.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18 %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpName %18 "INDEX"
OpName %21 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonWritable
OpDecorate %16 BuiltIn FragCoord
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %21 Location 0
OpDecorate %85 NonUniform
OpDecorate %86 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypeVector %5 2
%20 = OpTypePointer Output %19
%21 = OpVariable %20 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpTypePointer PushConstant %5
%26 = OpConstant %5 4
%29 = OpConstant %5 3
%32 = OpTypePointer Input %13
%34 = OpConstant %5 0
%39 = OpConstant %5 2
%40 = OpTypeVector %5 4
%45 = OpConstant %5 1
%53 = OpConstant %5 5
%56 = OpConstant %5 6
%59 = OpConstant %5 7
%81 = OpConstant %5 100
%98 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %101
%101 = OpLabel
%25 = OpAccessChain %24 %8 %26
%27 = OpLoad %5 %25
%28 = OpIAdd %5 %27 %29
%23 = OpAccessChain %22 %12 %28
%30 = OpLoad %9 %23
%31 = OpLoad %5 %18
%33 = OpAccessChain %32 %16 %34
%35 = OpLoad %13 %33
%36 = OpConvertFToS %5 %35
%37 = OpShiftLeftLogical %5 %36 %29
%38 = OpIMul %5 %36 %39
%41 = OpImageRead %40 %30 %38
%42 = OpCompositeExtract %5 %41 0
%44 = OpIAdd %5 %38 %45
%43 = OpImageRead %40 %30 %44
%46 = OpCompositeExtract %5 %43 0
%47 = OpCompositeConstruct %19 %42 %46
%48 = OpCompositeExtract %5 %47 0
%49 = OpCompositeExtract %5 %47 1
%50 = OpAccessChain %24 %8 %26
%51 = OpLoad %5 %50
%52 = OpAccessChain %24 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %24 %8 %56
%57 = OpLoad %5 %55
%58 = OpAccessChain %24 %8 %59
%60 = OpLoad %5 %58
%61 = OpCompositeConstruct %40 %51 %54 %57 %60
%62 = OpCompositeExtract %5 %61 0
%63 = OpIAdd %5 %62 %26
%65 = OpAccessChain %24 %8 %26
%66 = OpLoad %5 %65
%67 = OpIAdd %5 %66 %63
%64 = OpAccessChain %22 %12 %67
%68 = OpLoad %9 %64
%69 = OpIMul %5 %36 %39
%70 = OpImageRead %40 %68 %69
%71 = OpCompositeExtract %5 %70 0
%73 = OpIAdd %5 %69 %45
%72 = OpImageRead %40 %68 %73
%74 = OpCompositeExtract %5 %72 0
%75 = OpCompositeConstruct %19 %71 %74
%76 = OpCompositeExtract %5 %75 0
%77 = OpCompositeExtract %5 %75 1
%78 = OpIAdd %5 %76 %48
%79 = OpIAdd %5 %77 %49
%80 = OpIAdd %5 %31 %81
%83 = OpAccessChain %24 %8 %26
%84 = OpLoad %5 %83
%85 = OpIAdd %5 %84 %80
%82 = OpAccessChain %22 %12 %85
%86 = OpLoad %9 %82
%87 = OpIMul %5 %36 %39
%88 = OpImageRead %40 %86 %87
%89 = OpCompositeExtract %5 %88 0
%91 = OpIAdd %5 %87 %45
%90 = OpImageRead %40 %86 %91
%92 = OpCompositeExtract %5 %90 0
%93 = OpCompositeConstruct %19 %89 %92
%94 = OpCompositeExtract %5 %93 0
%95 = OpCompositeExtract %5 %93 1
%96 = OpIAdd %5 %78 %94
%97 = OpIAdd %5 %79 %95
%99 = OpAccessChain %98 %21 %34
OpStore %99 %96
%100 = OpAccessChain %98 %21 %45
OpStore %100 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-raw-buffer.ssbo.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 3, std430) readonly buffer SSBO
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 4, std430) readonly buffer _12_16
{
    uvec2 _m0[];
} _16[64];

layout(set = 0, binding = 100, std430) readonly buffer _18_21
{
    uvec2 _m0[];
} _21[];

layout(set = 0, binding = 0, std140) uniform _26_28
{
    vec4 _m0[1];
} _28;

layout(location = 1) flat in uint INDEX;
layout(location = 0) out uvec2 SV_Target;

void main()
{
    uint _40 = uint(int(gl_FragCoord.x));
    uvec4 _52 = floatBitsToUint(_28._m0[0u]);
    uint _53 = _52.x;
    SV_Target.x = (_16[_53]._m0[_40].x + _10._m0[_40].x) + _21[nonuniformEXT(INDEX)]._m0[_40].x;
    SV_Target.y = (_16[_53]._m0[_40].y + _10._m0[_40].y) + _21[nonuniformEXT(INDEX)]._m0[_40].y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability StorageBufferArrayDynamicIndexing
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %30 %32 %34
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpName %18 "SSBO"
OpName %26 ""
OpName %30 "SV_Position"
OpName %32 "INDEX"
OpName %34 "SV_Target"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 3
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 4
OpDecorate %16 NonWritable
OpDecorate %17 ArrayStride 8
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 100
OpDecorate %21 NonWritable
OpDecorate %25 ArrayStride 16
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %28 DescriptorSet 0
OpDecorate %28 Binding 0
OpDecorate %30 BuiltIn FragCoord
OpDecorate %32 Flat
OpDecorate %32 Location 1
OpDecorate %34 Location 0
OpDecorate %35 NonUniform
OpDecorate %67 NonUniform
OpDecorate %68 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpConstant %5 64
%14 = OpTypeArray %12 %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeRuntimeArray %6
%18 = OpTypeStruct %17
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer StorageBuffer %19
%21 = OpVariable %20 StorageBuffer
%22 = OpConstant %5 1
%23 = OpTypeFloat 32
%24 = OpTypeVector %23 4
%25 = OpTypeArray %24 %22
%26 = OpTypeStruct %25
%27 = OpTypePointer Uniform %26
%28 = OpVariable %27 Uniform
%29 = OpTypePointer Input %24
%30 = OpVariable %29 Input
%31 = OpTypePointer Input %5
%32 = OpVariable %31 Input
%33 = OpTypePointer Output %6
%34 = OpVariable %33 Output
%36 = OpTypePointer Input %23
%38 = OpConstant %5 0
%42 = OpConstant %5 3
%43 = OpTypePointer StorageBuffer %6
%48 = OpTypePointer Uniform %24
%51 = OpTypeVector %5 4
%55 = OpConstant %5 4
%56 = OpTypePointer StorageBuffer %12
%65 = OpConstant %5 100
%66 = OpTypePointer StorageBuffer %18
%74 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %77
%77 = OpLabel
%35 = OpLoad %5 %32
%37 = OpAccessChain %36 %30 %38
%39 = OpLoad %23 %37
%40 = OpConvertFToS %5 %39
%41 = OpShiftLeftLogical %5 %40 %42
%44 = OpAccessChain %43 %10 %38 %40
%45 = OpLoad %6 %44
%46 = OpCompositeExtract %5 %45 0
%47 = OpCompositeExtract %5 %45 1
%49 = OpAccessChain %48 %28 %38 %38
%50 = OpLoad %24 %49
%52 = OpBitcast %51 %50
%53 = OpCompositeExtract %5 %52 0
%54 = OpIAdd %5 %53 %55
%57 = OpAccessChain %56 %16 %53
%58 = OpAccessChain %43 %57 %38 %40
%59 = OpLoad %6 %58
%60 = OpCompositeExtract %5 %59 0
%61 = OpCompositeExtract %5 %59 1
%62 = OpIAdd %5 %60 %46
%63 = OpIAdd %5 %61 %47
%64 = OpIAdd %5 %35 %65
%67 = OpAccessChain %66 %21 %35
%68 = OpAccessChain %43 %67 %38 %40
%69 = OpLoad %6 %68
%70 = OpCompositeExtract %5 %69 0
%71 = OpCompositeExtract %5 %69 1
%72 = OpIAdd %5 %62 %70
%73 = OpIAdd %5 %63 %71
%75 = OpAccessChain %74 %34 %38
OpStore %75 %72
%76 = OpAccessChain %74 %34 %22
OpStore %76 %73
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-structured-buffer.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32ui) uniform readonly uimageBuffer _12[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec3 SV_Target;

void main()
{
    uint _36 = uint(int(gl_FragCoord.x));
    float _40 = uintBitsToFloat(imageLoad(_12[registers._m4 + 3u], int(_36)).x);
    uint _58 = registers._m4 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u);
    uint _60 = _36 * 2u;
    vec2 _71 = uintBitsToFloat(uvec2(imageLoad(_12[_58], int(_60)).x, imageLoad(_12[_58], int(_60 + 1u)).x));
    float _74 = _71.x + _40;
    uint _81 = registers._m4 + (INDEX + 100u);
    uint _83 = _36 * 3u;
    vec3 _94 = uintBitsToFloat(uvec3(imageLoad(_12[nonuniformEXT(_81)], int(_83)).x, imageLoad(_12[nonuniformEXT(_81)], int(_83 + 1u)).x, imageLoad(_12[nonuniformEXT(_81)], int(_83 + 2u)).x));
    SV_Target.x = _94.x + _74;
    SV_Target.y = (_71.y + _40) + _94.y;
    SV_Target.z = _94.z + _74;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16 %18 %21
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpName %18 "INDEX"
OpName %21 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 NonWritable
OpDecorate %16 BuiltIn FragCoord
OpDecorate %18 Flat
OpDecorate %18 Location 1
OpDecorate %21 Location 0
OpDecorate %81 NonUniform
OpDecorate %82 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%18 = OpVariable %17 Input
%19 = OpTypeVector %13 3
%20 = OpTypePointer Output %19
%21 = OpVariable %20 Output
%22 = OpTypePointer UniformConstant %9
%24 = OpTypePointer PushConstant %5
%26 = OpConstant %5 4
%29 = OpConstant %5 3
%32 = OpTypePointer Input %13
%34 = OpConstant %5 0
%37 = OpTypeVector %5 4
%44 = OpConstant %5 5
%47 = OpConstant %5 6
%50 = OpConstant %5 7
%61 = OpConstant %5 2
%66 = OpConstant %5 1
%68 = OpTypeVector %5 2
%70 = OpTypeVector %13 2
%77 = OpConstant %5 100
%92 = OpTypeVector %5 3
%101 = OpTypePointer Output %13
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %105
%105 = OpLabel
%25 = OpAccessChain %24 %8 %26
%27 = OpLoad %5 %25
%28 = OpIAdd %5 %27 %29
%23 = OpAccessChain %22 %12 %28
%30 = OpLoad %9 %23
%31 = OpLoad %5 %18
%33 = OpAccessChain %32 %16 %34
%35 = OpLoad %13 %33
%36 = OpConvertFToS %5 %35
%38 = OpImageRead %37 %30 %36
%39 = OpCompositeExtract %5 %38 0
%40 = OpBitcast %13 %39
%41 = OpAccessChain %24 %8 %26
%42 = OpLoad %5 %41
%43 = OpAccessChain %24 %8 %44
%45 = OpLoad %5 %43
%46 = OpAccessChain %24 %8 %47
%48 = OpLoad %5 %46
%49 = OpAccessChain %24 %8 %50
%51 = OpLoad %5 %49
%52 = OpCompositeConstruct %37 %42 %45 %48 %51
%53 = OpCompositeExtract %5 %52 0
%54 = OpIAdd %5 %53 %26
%56 = OpAccessChain %24 %8 %26
%57 = OpLoad %5 %56
%58 = OpIAdd %5 %57 %54
%55 = OpAccessChain %22 %12 %58
%59 = OpLoad %9 %55
%60 = OpIMul %5 %36 %61
%62 = OpImageRead %37 %59 %60
%63 = OpCompositeExtract %5 %62 0
%65 = OpIAdd %5 %60 %66
%64 = OpImageRead %37 %59 %65
%67 = OpCompositeExtract %5 %64 0
%69 = OpCompositeConstruct %68 %63 %67
%71 = OpBitcast %70 %69
%72 = OpCompositeExtract %13 %71 0
%73 = OpCompositeExtract %13 %71 1
%74 = OpFAdd %13 %72 %40
%75 = OpFAdd %13 %73 %40
%76 = OpIAdd %5 %31 %77
%79 = OpAccessChain %24 %8 %26
%80 = OpLoad %5 %79
%81 = OpIAdd %5 %80 %76
%78 = OpAccessChain %22 %12 %81
%82 = OpLoad %9 %78
%83 = OpIMul %5 %36 %29
%84 = OpImageRead %37 %82 %83
%85 = OpCompositeExtract %5 %84 0
%87 = OpIAdd %5 %83 %66
%86 = OpImageRead %37 %82 %87
%88 = OpCompositeExtract %5 %86 0
%90 = OpIAdd %5 %83 %61
%89 = OpImageRead %37 %82 %90
%91 = OpCompositeExtract %5 %89 0
%93 = OpCompositeConstruct %92 %85 %88 %91
%94 = OpBitcast %19 %93
%95 = OpCompositeExtract %13 %94 0
%96 = OpCompositeExtract %13 %94 1
%97 = OpCompositeExtract %13 %94 2
%98 = OpFAdd %13 %95 %74
%99 = OpFAdd %13 %75 %96
%100 = OpFAdd %13 %97 %74
%102 = OpAccessChain %101 %21 %34
OpStore %102 %98
%103 = OpAccessChain %101 %21 %66
OpStore %103 %99
%104 = OpAccessChain %101 %21 %61
OpStore %104 %100
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-typed-buffer.bindless.root-constant.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
    uint _m8;
    uint _m9;
    uint _m10;
    uint _m11;
    uint _m12;
    uint _m13;
    uint _m14;
    uint _m15;
} registers;

layout(set = 4, binding = 0, r32f) uniform readonly imageBuffer _13[];
layout(set = 4, binding = 0, r32ui) uniform readonly uimageBuffer _17[];
layout(set = 4, binding = 0, r32i) uniform readonly iimageBuffer _22[];

layout(location = 1) flat in uint INDEX;
layout(location = 0) out vec4 SV_Target;

void main()
{
    uint _44 = uint(int(gl_FragCoord.x));
    vec4 _45 = imageLoad(_13[registers._m4 + 3u], int(_44));
    uvec4 _71 = imageLoad(_17[registers._m4 + (uvec4(registers._m4, registers._m5, registers._m6, registers._m7).x + 4u)], int(_44));
    uvec4 _94 = uvec4(imageLoad(_22[nonuniformEXT(registers._m4 + (INDEX + 100u))], int(_44)));
    SV_Target.x = (float(_71.x) + _45.x) + float(int(_94.x));
    SV_Target.y = (float(_71.y) + _45.y) + float(int(_94.y));
    SV_Target.z = (float(_71.z) + _45.z) + float(int(_94.z));
    SV_Target.w = (float(_71.w) + _45.w) + float(int(_94.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 116
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %25 %27 %29
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %25 "SV_Position"
OpName %27 "INDEX"
OpName %29 "SV_Target"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpMemberDecorate %6 8 Offset 32
OpMemberDecorate %6 9 Offset 36
OpMemberDecorate %6 10 Offset 40
OpMemberDecorate %6 11 Offset 44
OpMemberDecorate %6 12 Offset 48
OpMemberDecorate %6 13 Offset 52
OpMemberDecorate %6 14 Offset 56
OpMemberDecorate %6 15 Offset 60
OpDecorate %13 DescriptorSet 4
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %17 DescriptorSet 4
OpDecorate %17 Binding 0
OpDecorate %17 NonWritable
OpDecorate %22 DescriptorSet 4
OpDecorate %22 Binding 0
OpDecorate %22 NonWritable
OpDecorate %25 BuiltIn FragCoord
OpDecorate %27 Flat
OpDecorate %27 Location 1
OpDecorate %29 Location 0
OpDecorate %90 NonUniform
OpDecorate %91 NonUniform
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 Buffer 0 0 0 2 R32f
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeInt 32 1
%19 = OpTypeImage %18 Buffer 0 0 0 2 R32i
%20 = OpTypeRuntimeArray %19
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypeVector %9 4
%24 = OpTypePointer Input %23
%25 = OpVariable %24 Input
%26 = OpTypePointer Input %5
%27 = OpVariable %26 Input
%28 = OpTypePointer Output %23
%29 = OpVariable %28 Output
%30 = OpTypePointer UniformConstant %10
%32 = OpTypePointer PushConstant %5
%34 = OpConstant %5 4
%37 = OpConstant %5 3
%40 = OpTypePointer Input %9
%42 = OpConstant %5 0
%53 = OpConstant %5 5
%56 = OpConstant %5 6
%59 = OpConstant %5 7
%61 = OpTypeVector %5 4
%65 = OpTypePointer UniformConstant %14
%85 = OpConstant %5 100
%86 = OpTypePointer UniformConstant %19
%92 = OpTypeVector %18 4
%107 = OpTypePointer Output %9
%110 = OpConstant %5 1
%112 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %114
%114 = OpLabel
%33 = OpAccessChain %32 %8 %34
%35 = OpLoad %5 %33
%36 = OpIAdd %5 %35 %37
%31 = OpAccessChain %30 %13 %36
%38 = OpLoad %10 %31
%39 = OpLoad %5 %27
%41 = OpAccessChain %40 %25 %42
%43 = OpLoad %9 %41
%44 = OpConvertFToS %5 %43
%45 = OpImageRead %23 %38 %44
%46 = OpCompositeExtract %9 %45 0
%47 = OpCompositeExtract %9 %45 1
%48 = OpCompositeExtract %9 %45 2
%49 = OpCompositeExtract %9 %45 3
%50 = OpAccessChain %32 %8 %34
%51 = OpLoad %5 %50
%52 = OpAccessChain %32 %8 %53
%54 = OpLoad %5 %52
%55 = OpAccessChain %32 %8 %56
%57 = OpLoad %5 %55
%58 = OpAccessChain %32 %8 %59
%60 = OpLoad %5 %58
%62 = OpCompositeConstruct %61 %51 %54 %57 %60
%63 = OpCompositeExtract %5 %62 0
%64 = OpIAdd %5 %63 %34
%67 = OpAccessChain %32 %8 %34
%68 = OpLoad %5 %67
%69 = OpIAdd %5 %68 %64
%66 = OpAccessChain %65 %17 %69
%70 = OpLoad %14 %66
%71 = OpImageRead %61 %70 %44
%72 = OpCompositeExtract %5 %71 0
%73 = OpCompositeExtract %5 %71 1
%74 = OpCompositeExtract %5 %71 2
%75 = OpCompositeExtract %5 %71 3
%76 = OpConvertUToF %9 %72
%77 = OpConvertUToF %9 %73
%78 = OpConvertUToF %9 %74
%79 = OpConvertUToF %9 %75
%80 = OpFAdd %9 %76 %46
%81 = OpFAdd %9 %77 %47
%82 = OpFAdd %9 %78 %48
%83 = OpFAdd %9 %79 %49
%84 = OpIAdd %5 %39 %85
%88 = OpAccessChain %32 %8 %34
%89 = OpLoad %5 %88
%90 = OpIAdd %5 %89 %84
%87 = OpAccessChain %86 %22 %90
%91 = OpLoad %19 %87
%93 = OpImageRead %92 %91 %44
%94 = OpBitcast %61 %93
%95 = OpCompositeExtract %5 %94 0
%96 = OpCompositeExtract %5 %94 1
%97 = OpCompositeExtract %5 %94 2
%98 = OpCompositeExtract %5 %94 3
%99 = OpConvertSToF %9 %95
%100 = OpConvertSToF %9 %96
%101 = OpConvertSToF %9 %97
%102 = OpConvertSToF %9 %98
%103 = OpFAdd %9 %80 %99
%104 = OpFAdd %9 %81 %100
%105 = OpFAdd %9 %82 %101
%106 = OpFAdd %9 %83 %102
%108 = OpAccessChain %107 %29 %42
OpStore %108 %103
%109 = OpAccessChain %107 %29 %110
OpStore %109 %104
%111 = OpAccessChain %107 %29 %112
OpStore %111 %105
%113 = OpAccessChain %107 %29 %37
OpStore %113 %106
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/resources/uav-typed.typed-uav-without-format.comp
================================================
#version 460
#extension GL_EXT_shader_image_load_formatted : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform image2D _8;
layout(set = 0, binding = 1) uniform imageBuffer _11;
layout(set = 0, binding = 2, r32ui) uniform uimageBuffer _15;

void main()
{
    vec4 _30 = imageLoad(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)));
    imageStore(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), vec4(_30.x + 2.0, _30.y + 2.0, _30.z + 2.0, _30.w + 2.0));
    vec4 _44 = imageLoad(_11, int(gl_GlobalInvocationID.x));
    imageStore(_11, int(gl_GlobalInvocationID.x), vec4(_44.x + 3.0, _44.y + 3.0, _44.z + 3.0, _44.w + 3.0));
    uint _57 = imageAtomicAdd(_15, int(gl_GlobalInvocationID.y), 10u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability StorageImageReadWithoutFormat
OpCapability StorageImageWriteWithoutFormat
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %21
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 2
OpDecorate %21 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeInt 32 0
%13 = OpTypeImage %12 Buffer 0 0 0 2 R32ui
%14 = OpTypePointer UniformConstant %13
%15 = OpVariable %14 UniformConstant
%19 = OpTypeVector %12 3
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypePointer Input %12
%24 = OpConstant %12 0
%27 = OpConstant %12 1
%29 = OpTypeVector %5 4
%31 = OpTypeVector %12 2
%38 = OpConstant %5 2
%50 = OpConstant %5 3
%55 = OpTypePointer Image %12
%58 = OpConstant %12 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%16 = OpLoad %13 %15
%17 = OpLoad %9 %11
%18 = OpLoad %6 %8
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %12 %23
%26 = OpAccessChain %22 %21 %27
%28 = OpLoad %12 %26
%32 = OpCompositeConstruct %31 %25 %28
%30 = OpImageRead %29 %18 %32 None
%33 = OpCompositeExtract %5 %30 0
%34 = OpCompositeExtract %5 %30 1
%35 = OpCompositeExtract %5 %30 2
%36 = OpCompositeExtract %5 %30 3
%37 = OpFAdd %5 %33 %38
%39 = OpFAdd %5 %34 %38
%40 = OpFAdd %5 %35 %38
%41 = OpFAdd %5 %36 %38
%42 = OpCompositeConstruct %31 %25 %28
%43 = OpCompositeConstruct %29 %37 %39 %40 %41
OpImageWrite %18 %42 %43
%44 = OpImageRead %29 %17 %25
%45 = OpCompositeExtract %5 %44 0
%46 = OpCompositeExtract %5 %44 1
%47 = OpCompositeExtract %5 %44 2
%48 = OpCompositeExtract %5 %44 3
%49 = OpFAdd %5 %45 %50
%51 = OpFAdd %5 %46 %50
%52 = OpFAdd %5 %47 %50
%53 = OpFAdd %5 %48 %50
%54 = OpCompositeConstruct %29 %49 %51 %52 %53
OpImageWrite %17 %25 %54
%56 = OpImageTexelPointer %55 %15 %28 %24
%57 = OpAtomicIAdd %12 %56 %27 %24 %58
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-bab.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 4, binding = 0, r32ui) uniform coherent uimageBuffer _12[];

void main()
{
    uint _35 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    uint _37 = _35 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _39 = imageLoad(_12[registers._m4], int(_37));
    uvec4 _41 = imageLoad(_12[registers._m4], int(_37 + 1u));
    uvec4 _44 = imageLoad(_12[registers._m4], int(_37 + 2u));
    uvec4 _48 = imageLoad(_12[registers._m4], int(_37 + 3u));
    uvec4 _52 = uvec4(_39.x, _41.x, _44.x, _48.x);
    uint _61 = _35 * 4u;
    imageStore(_12[registers._m4], int(_61), uvec4(_52.x + 1u));
    imageStore(_12[registers._m4], int(_61 + 1u), uvec4(_52.y + 2u));
    imageStore(_12[registers._m4], int(_61 + 2u), uvec4(_52.z + 3u));
    imageStore(_12[registers._m4], int(_61 + 3u), uvec4(_52.w + 4u));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %9
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 4
%24 = OpTypePointer Input %13
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%34 = OpConstant %5 1000
%38 = OpTypeVector %5 4
%46 = OpConstant %5 2
%50 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %69
%69 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %12 %22
%23 = OpLoad %9 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %13 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %13 %28
%31 = OpConvertFToU %5 %27
%32 = OpConvertFToU %5 %30
%33 = OpIMul %5 %32 %34
%35 = OpIAdd %5 %33 %31
%36 = OpShiftLeftLogical %5 %35 %21
%37 = OpIMul %5 %35 %21
OpBeginInvocationInterlockEXT
%39 = OpImageRead %38 %23 %37
%40 = OpCompositeExtract %5 %39 0
%42 = OpIAdd %5 %37 %29
%41 = OpImageRead %38 %23 %42
%43 = OpCompositeExtract %5 %41 0
%45 = OpIAdd %5 %37 %46
%44 = OpImageRead %38 %23 %45
%47 = OpCompositeExtract %5 %44 0
%49 = OpIAdd %5 %37 %50
%48 = OpImageRead %38 %23 %49
%51 = OpCompositeExtract %5 %48 0
%52 = OpCompositeConstruct %38 %40 %43 %47 %51
%53 = OpCompositeExtract %5 %52 0
%54 = OpCompositeExtract %5 %52 1
%55 = OpCompositeExtract %5 %52 2
%56 = OpCompositeExtract %5 %52 3
%57 = OpIAdd %5 %53 %29
%58 = OpIAdd %5 %54 %46
%59 = OpIAdd %5 %55 %50
%60 = OpIAdd %5 %56 %21
%61 = OpIMul %5 %35 %21
%62 = OpCompositeConstruct %38 %57 %57 %57 %57
OpImageWrite %23 %61 %62
%63 = OpCompositeConstruct %38 %58 %58 %58 %58
%64 = OpIAdd %5 %61 %29
OpImageWrite %23 %64 %63
%65 = OpCompositeConstruct %38 %59 %59 %59 %59
%66 = OpIAdd %5 %61 %46
OpImageWrite %23 %66 %65
%67 = OpCompositeConstruct %38 %60 %60 %60 %60
%68 = OpIAdd %5 %61 %50
OpImageWrite %23 %68 %67
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-bab.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _8;

void main()
{
    uint _25 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    uint _28 = _25 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _30 = imageLoad(_8, int(_28));
    uvec4 _32 = imageLoad(_8, int(_28 + 1u));
    uvec4 _35 = imageLoad(_8, int(_28 + 2u));
    uvec4 _39 = imageLoad(_8, int(_28 + 3u));
    uvec4 _43 = uvec4(_30.x, _32.x, _35.x, _39.x);
    uint _52 = _25 * 4u;
    imageStore(_8, int(_52), uvec4(_43.x + 1u));
    imageStore(_8, int(_52 + 1u), uvec4(_43.y + 2u));
    imageStore(_8, int(_52 + 2u), uvec4(_43.z + 3u));
    imageStore(_8, int(_52 + 3u), uvec4(_43.w + 4u));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %12 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%14 = OpTypePointer Input %9
%16 = OpConstant %5 0
%19 = OpConstant %5 1
%24 = OpConstant %5 1000
%27 = OpConstant %5 4
%29 = OpTypeVector %5 4
%37 = OpConstant %5 2
%41 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%13 = OpLoad %6 %8
%15 = OpAccessChain %14 %12 %16
%17 = OpLoad %9 %15
%18 = OpAccessChain %14 %12 %19
%20 = OpLoad %9 %18
%21 = OpConvertFToU %5 %17
%22 = OpConvertFToU %5 %20
%23 = OpIMul %5 %22 %24
%25 = OpIAdd %5 %23 %21
%26 = OpShiftLeftLogical %5 %25 %27
%28 = OpIMul %5 %25 %27
OpBeginInvocationInterlockEXT
%30 = OpImageRead %29 %13 %28
%31 = OpCompositeExtract %5 %30 0
%33 = OpIAdd %5 %28 %19
%32 = OpImageRead %29 %13 %33
%34 = OpCompositeExtract %5 %32 0
%36 = OpIAdd %5 %28 %37
%35 = OpImageRead %29 %13 %36
%38 = OpCompositeExtract %5 %35 0
%40 = OpIAdd %5 %28 %41
%39 = OpImageRead %29 %13 %40
%42 = OpCompositeExtract %5 %39 0
%43 = OpCompositeConstruct %29 %31 %34 %38 %42
%44 = OpCompositeExtract %5 %43 0
%45 = OpCompositeExtract %5 %43 1
%46 = OpCompositeExtract %5 %43 2
%47 = OpCompositeExtract %5 %43 3
%48 = OpIAdd %5 %44 %19
%49 = OpIAdd %5 %45 %37
%50 = OpIAdd %5 %46 %41
%51 = OpIAdd %5 %47 %27
%52 = OpIMul %5 %25 %27
%53 = OpCompositeConstruct %29 %48 %48 %48 %48
OpImageWrite %13 %52 %53
%54 = OpCompositeConstruct %29 %49 %49 %49 %49
%55 = OpIAdd %5 %52 %19
OpImageWrite %13 %55 %54
%56 = OpCompositeConstruct %29 %50 %50 %50 %50
%57 = OpIAdd %5 %52 %37
OpImageWrite %13 %57 %56
%58 = OpCompositeConstruct %29 %51 %51 %51 %51
%59 = OpIAdd %5 %52 %41
OpImageWrite %13 %59 %58
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-bab.ssbo.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 4, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _14[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _36 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    _14[registers._m4]._m0[_36] = uvec4(_14[registers._m4]._m0[_36].x + 1u, _14[registers._m4]._m0[_36].y + 2u, _14[registers._m4]._m0[_36].z + 3u, _14[registers._m4]._m0[_36].w + 4u);
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %18
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %18 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 16
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 4
OpDecorate %14 Binding 0
OpDecorate %14 Coherent
OpDecorate %18 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 4
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeFloat 32
%16 = OpTypeVector %15 4
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer StorageBuffer %11
%21 = OpTypePointer PushConstant %5
%23 = OpConstant %5 4
%25 = OpTypePointer Input %15
%27 = OpConstant %5 0
%30 = OpConstant %5 1
%35 = OpConstant %5 1000
%38 = OpTypePointer StorageBuffer %9
%47 = OpConstant %5 2
%49 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %53
%53 = OpLabel
%22 = OpAccessChain %21 %8 %23
%24 = OpLoad %5 %22
%20 = OpAccessChain %19 %14 %24
%26 = OpAccessChain %25 %18 %27
%28 = OpLoad %15 %26
%29 = OpAccessChain %25 %18 %30
%31 = OpLoad %15 %29
%32 = OpConvertFToU %5 %28
%33 = OpConvertFToU %5 %31
%34 = OpIMul %5 %33 %35
%36 = OpIAdd %5 %34 %32
%37 = OpShiftLeftLogical %5 %36 %23
%39 = OpAccessChain %38 %20 %27 %36
OpBeginInvocationInterlockEXT
%40 = OpLoad %9 %39
%41 = OpCompositeExtract %5 %40 0
%42 = OpCompositeExtract %5 %40 1
%43 = OpCompositeExtract %5 %40 2
%44 = OpCompositeExtract %5 %40 3
%45 = OpIAdd %5 %41 %30
%46 = OpIAdd %5 %42 %47
%48 = OpIAdd %5 %43 %49
%50 = OpIAdd %5 %44 %23
%51 = OpCompositeConstruct %9 %45 %46 %48 %50
%52 = OpAccessChain %38 %20 %27 %36
OpStore %52 %51
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-bab.ssbo.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _10;

void main()
{
    uint _26 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    _10._m0[_26] = uvec4(_10._m0[_26].x + 1u, _10._m0[_26].y + 2u, _10._m0[_26].z + 3u, _10._m0[_26].w + 4u);
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %8 "SSBO"
OpName %14 "SV_Position"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 Coherent
OpDecorate %14 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %11
%17 = OpConstant %5 0
%20 = OpConstant %5 1
%25 = OpConstant %5 1000
%28 = OpConstant %5 4
%29 = OpTypePointer StorageBuffer %6
%38 = OpConstant %5 2
%40 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %11 %16
%19 = OpAccessChain %15 %14 %20
%21 = OpLoad %11 %19
%22 = OpConvertFToU %5 %18
%23 = OpConvertFToU %5 %21
%24 = OpIMul %5 %23 %25
%26 = OpIAdd %5 %24 %22
%27 = OpShiftLeftLogical %5 %26 %28
%30 = OpAccessChain %29 %10 %17 %26
OpBeginInvocationInterlockEXT
%31 = OpLoad %6 %30
%32 = OpCompositeExtract %5 %31 0
%33 = OpCompositeExtract %5 %31 1
%34 = OpCompositeExtract %5 %31 2
%35 = OpCompositeExtract %5 %31 3
%36 = OpIAdd %5 %32 %20
%37 = OpIAdd %5 %33 %38
%39 = OpIAdd %5 %34 %40
%41 = OpIAdd %5 %35 %28
%42 = OpCompositeConstruct %6 %36 %37 %39 %41
%43 = OpAccessChain %29 %10 %17 %26
OpStore %43 %42
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-bab.ssbo.root-descriptor.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#extension GL_EXT_buffer_reference_uvec2 : require
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(buffer_reference) buffer PhysicalPointerUint4CoherentArray;

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerUint4CoherentArray
{
    uvec4 value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    uint _29 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    PhysicalPointerUint4CoherentArray _36 = PhysicalPointerUint4CoherentArray(registers._m2);
    SPIRV_Cross_beginInvocationInterlock();
    PhysicalPointerUint4CoherentArray(registers._m2).value[_29] = uvec4(_36.value[_29].x + 1u, _36.value[_29].y + 2u, _36.value[_29].z + 3u, _36.value[_29].w + 4u);
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %13 "SV_Position"
OpName %34 "PhysicalPointerUint4CoherentArray"
OpMemberName %34 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %13 BuiltIn FragCoord
OpDecorate %33 ArrayStride 16
OpMemberDecorate %34 0 Offset 0
OpDecorate %34 Block
OpMemberDecorate %34 0 Coherent
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpTypePointer PushConstant %6
%16 = OpConstant %5 2
%18 = OpTypePointer Input %10
%20 = OpConstant %5 0
%23 = OpConstant %5 1
%28 = OpConstant %5 1000
%31 = OpConstant %5 4
%32 = OpTypeVector %5 4
%33 = OpTypeRuntimeArray %32
%34 = OpTypeStruct %33
%35 = OpTypePointer PhysicalStorageBuffer %34
%37 = OpTypePointer PhysicalStorageBuffer %32
%47 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %52
%52 = OpLabel
%15 = OpAccessChain %14 %9 %16
%17 = OpLoad %6 %15
%19 = OpAccessChain %18 %13 %20
%21 = OpLoad %10 %19
%22 = OpAccessChain %18 %13 %23
%24 = OpLoad %10 %22
%25 = OpConvertFToU %5 %21
%26 = OpConvertFToU %5 %24
%27 = OpIMul %5 %26 %28
%29 = OpIAdd %5 %27 %25
%30 = OpShiftLeftLogical %5 %29 %31
%36 = OpBitcast %35 %17
%38 = OpInBoundsAccessChain %37 %36 %20 %29
OpBeginInvocationInterlockEXT
%39 = OpLoad %32 %38 Aligned 16
%40 = OpCompositeExtract %5 %39 0
%41 = OpCompositeExtract %5 %39 1
%42 = OpCompositeExtract %5 %39 2
%43 = OpCompositeExtract %5 %39 3
%44 = OpIAdd %5 %40 %23
%45 = OpIAdd %5 %41 %16
%46 = OpIAdd %5 %42 %47
%48 = OpIAdd %5 %43 %31
%49 = OpBitcast %35 %17
%50 = OpInBoundsAccessChain %37 %49 %20 %29
%51 = OpCompositeConstruct %32 %44 %45 %46 %48
OpStore %50 %51 Aligned 16
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-branch-early-return.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2D _8;
layout(set = 0, binding = 1, r32f) uniform coherent image2D _9;
layout(set = 0, binding = 2, r32f) uniform image2D _10;

void main()
{
    uint _25;
    uint _26;
    bool _45;
    for (;;)
    {
        _25 = uint(gl_FragCoord.x);
        _26 = uint(gl_FragCoord.y);
        vec4 _27 = imageLoad(_10, ivec2(uvec2(_25, _26)));
        imageStore(_10, ivec2(uvec2(_25, _26)), vec4(_27.x + 1.0, _27.y + 2.0, _27.z + 3.0, _27.w + 4.0));
        _45 = gl_FragCoord.x < 10.0;
        SPIRV_Cross_beginInvocationInterlock();
        if (_45)
        {
            vec4 _47 = imageLoad(_8, ivec2(uvec2(_25, _26)));
            imageStore(_8, ivec2(uvec2(_25, _26)), vec4(_47.x + 1.0, _47.y + 2.0, _47.z + 3.0, _47.w + 4.0));
            if (imageLoad(_10, ivec2(uvec2(_25, _26))).z > 100.0)
            {
                break;
            }
        }
        else
        {
            vec4 _64 = imageLoad(_9, ivec2(uvec2(_25, _26)));
            imageStore(_9, ivec2(uvec2(_25, _26)), vec4(_64.x + 1.0, _64.y + 2.0, _64.z + 3.0, _64.w + 4.0));
        }
        uint _76 = _25 ^ 1u;
        uint _77 = _26 ^ 1u;
        vec4 _78 = imageLoad(_10, ivec2(uvec2(_76, _77)));
        imageStore(_10, ivec2(uvec2(_76, _77)), vec4(_78.x + 1.0, _78.y + 2.0, _78.z + 3.0, _78.w + 4.0));
        break;
    }
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %13 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 Coherent
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %13 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeVector %5 4
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%17 = OpTypePointer Input %5
%19 = OpTypeInt 32 0
%20 = OpConstant %19 0
%23 = OpConstant %19 1
%28 = OpTypeVector %19 2
%35 = OpConstant %5 1
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%41 = OpConstant %5 4
%44 = OpTypeBool
%46 = OpConstant %5 10
%63 = OpConstant %5 100
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %90
%90 = OpLabel
%14 = OpLoad %6 %10
%15 = OpLoad %6 %9
%16 = OpLoad %6 %8
%18 = OpAccessChain %17 %13 %20
%21 = OpLoad %5 %18
%22 = OpAccessChain %17 %13 %23
%24 = OpLoad %5 %22
%25 = OpConvertFToU %19 %21
%26 = OpConvertFToU %19 %24
%29 = OpCompositeConstruct %28 %25 %26
%27 = OpImageRead %11 %14 %29 None
%30 = OpCompositeExtract %5 %27 0
%31 = OpCompositeExtract %5 %27 1
%32 = OpCompositeExtract %5 %27 2
%33 = OpCompositeExtract %5 %27 3
%34 = OpFAdd %5 %30 %35
%36 = OpFAdd %5 %31 %37
%38 = OpFAdd %5 %32 %39
%40 = OpFAdd %5 %33 %41
%42 = OpCompositeConstruct %28 %25 %26
%43 = OpCompositeConstruct %11 %34 %36 %38 %40
OpImageWrite %14 %42 %43
%45 = OpFOrdLessThan %44 %21 %46
OpBeginInvocationInterlockEXT
OpLoopMerge %96 %97 None
OpBranch %91
%91 = OpLabel
OpSelectionMerge %95 None
OpBranchConditional %45 %93 %92
%93 = OpLabel
%48 = OpCompositeConstruct %28 %25 %26
%47 = OpImageRead %11 %16 %48 None
%49 = OpCompositeExtract %5 %47 0
%50 = OpCompositeExtract %5 %47 1
%51 = OpCompositeExtract %5 %47 2
%52 = OpCompositeExtract %5 %47 3
%53 = OpFAdd %5 %49 %35
%54 = OpFAdd %5 %50 %37
%55 = OpFAdd %5 %51 %39
%56 = OpFAdd %5 %52 %41
%57 = OpCompositeConstruct %28 %25 %26
%58 = OpCompositeConstruct %11 %53 %54 %55 %56
OpImageWrite %16 %57 %58
%60 = OpCompositeConstruct %28 %25 %26
%59 = OpImageRead %11 %14 %60 None
%61 = OpCompositeExtract %5 %59 2
%62 = OpFOrdGreaterThan %44 %61 %63
OpSelectionMerge %94 None
OpBranchConditional %62 %96 %94
%94 = OpLabel
OpBranch %95
%92 = OpLabel
%65 = OpCompositeConstruct %28 %25 %26
%64 = OpImageRead %11 %15 %65 None
%66 = OpCompositeExtract %5 %64 0
%67 = OpCompositeExtract %5 %64 1
%68 = OpCompositeExtract %5 %64 2
%69 = OpCompositeExtract %5 %64 3
%70 = OpFAdd %5 %66 %35
%71 = OpFAdd %5 %67 %37
%72 = OpFAdd %5 %68 %39
%73 = OpFAdd %5 %69 %41
%74 = OpCompositeConstruct %28 %25 %26
%75 = OpCompositeConstruct %11 %70 %71 %72 %73
OpImageWrite %15 %74 %75
OpBranch %95
%95 = OpLabel
%76 = OpBitwiseXor %19 %25 %23
%77 = OpBitwiseXor %19 %26 %23
%79 = OpCompositeConstruct %28 %76 %77
%78 = OpImageRead %11 %14 %79 None
%80 = OpCompositeExtract %5 %78 0
%81 = OpCompositeExtract %5 %78 1
%82 = OpCompositeExtract %5 %78 2
%83 = OpCompositeExtract %5 %78 3
%84 = OpFAdd %5 %80 %35
%85 = OpFAdd %5 %81 %37
%86 = OpFAdd %5 %82 %39
%87 = OpFAdd %5 %83 %41
%88 = OpCompositeConstruct %28 %76 %77
%89 = OpCompositeConstruct %11 %84 %85 %86 %87
OpImageWrite %14 %88 %89
OpBranch %96
%97 = OpLabel
OpBranch %90
%96 = OpLabel
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-branch.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2D _8;
layout(set = 0, binding = 1, r32f) uniform coherent image2D _9;
layout(set = 0, binding = 2, r32f) uniform image2D _10;

void main()
{
    uint _25 = uint(gl_FragCoord.x);
    uint _26 = uint(gl_FragCoord.y);
    vec4 _27 = imageLoad(_10, ivec2(uvec2(_25, _26)));
    imageStore(_10, ivec2(uvec2(_25, _26)), vec4(_27.x + 1.0, _27.y + 2.0, _27.z + 3.0, _27.w + 4.0));
    SPIRV_Cross_beginInvocationInterlock();
    if (gl_FragCoord.x < 10.0)
    {
        vec4 _47 = imageLoad(_8, ivec2(uvec2(_25, _26)));
        imageStore(_8, ivec2(uvec2(_25, _26)), vec4(_47.x + 1.0, _47.y + 2.0, _47.z + 3.0, _47.w + 4.0));
    }
    else
    {
        vec4 _59 = imageLoad(_9, ivec2(uvec2(_25, _26)));
        imageStore(_9, ivec2(uvec2(_25, _26)), vec4(_59.x + 1.0, _59.y + 2.0, _59.z + 3.0, _59.w + 4.0));
    }
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 76
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %13 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 1
OpDecorate %9 Coherent
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 2
OpDecorate %13 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpVariable %7 UniformConstant
%11 = OpTypeVector %5 4
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%17 = OpTypePointer Input %5
%19 = OpTypeInt 32 0
%20 = OpConstant %19 0
%23 = OpConstant %19 1
%28 = OpTypeVector %19 2
%35 = OpConstant %5 1
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%41 = OpConstant %5 4
%44 = OpTypeBool
%46 = OpConstant %5 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %71
%71 = OpLabel
%14 = OpLoad %6 %10
%15 = OpLoad %6 %9
%16 = OpLoad %6 %8
%18 = OpAccessChain %17 %13 %20
%21 = OpLoad %5 %18
%22 = OpAccessChain %17 %13 %23
%24 = OpLoad %5 %22
%25 = OpConvertFToU %19 %21
%26 = OpConvertFToU %19 %24
%29 = OpCompositeConstruct %28 %25 %26
%27 = OpImageRead %11 %14 %29 None
%30 = OpCompositeExtract %5 %27 0
%31 = OpCompositeExtract %5 %27 1
%32 = OpCompositeExtract %5 %27 2
%33 = OpCompositeExtract %5 %27 3
%34 = OpFAdd %5 %30 %35
%36 = OpFAdd %5 %31 %37
%38 = OpFAdd %5 %32 %39
%40 = OpFAdd %5 %33 %41
%42 = OpCompositeConstruct %28 %25 %26
%43 = OpCompositeConstruct %11 %34 %36 %38 %40
OpImageWrite %14 %42 %43
%45 = OpFOrdLessThan %44 %21 %46
OpBeginInvocationInterlockEXT
OpSelectionMerge %74 None
OpBranchConditional %45 %73 %72
%73 = OpLabel
%48 = OpCompositeConstruct %28 %25 %26
%47 = OpImageRead %11 %16 %48 None
%49 = OpCompositeExtract %5 %47 0
%50 = OpCompositeExtract %5 %47 1
%51 = OpCompositeExtract %5 %47 2
%52 = OpCompositeExtract %5 %47 3
%53 = OpFAdd %5 %49 %35
%54 = OpFAdd %5 %50 %37
%55 = OpFAdd %5 %51 %39
%56 = OpFAdd %5 %52 %41
%57 = OpCompositeConstruct %28 %25 %26
%58 = OpCompositeConstruct %11 %53 %54 %55 %56
OpImageWrite %16 %57 %58
OpBranch %74
%72 = OpLabel
%60 = OpCompositeConstruct %28 %25 %26
%59 = OpImageRead %11 %15 %60 None
%61 = OpCompositeExtract %5 %59 0
%62 = OpCompositeExtract %5 %59 1
%63 = OpCompositeExtract %5 %59 2
%64 = OpCompositeExtract %5 %59 3
%65 = OpFAdd %5 %61 %35
%66 = OpFAdd %5 %62 %37
%67 = OpFAdd %5 %63 %39
%68 = OpFAdd %5 %64 %41
%69 = OpCompositeConstruct %28 %25 %26
%70 = OpCompositeConstruct %11 %65 %66 %67 %68
OpImageWrite %15 %69 %70
OpBranch %74
%74 = OpLabel
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-buffer.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent imageBuffer _8;

void main()
{
    uint _25 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _26 = imageLoad(_8, int(_25));
    imageStore(_8, int(_25), vec4(_26.x + 1.0, _26.y + 2.0, _26.z + 3.0, _26.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%24 = OpConstant %15 1000
%32 = OpConstant %5 1
%34 = OpConstant %5 2
%36 = OpConstant %5 3
%38 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpConvertFToU %15 %17
%22 = OpConvertFToU %15 %20
%23 = OpIMul %15 %22 %24
%25 = OpIAdd %15 %23 %21
OpBeginInvocationInterlockEXT
%26 = OpImageRead %9 %12 %25
%27 = OpCompositeExtract %5 %26 0
%28 = OpCompositeExtract %5 %26 1
%29 = OpCompositeExtract %5 %26 2
%30 = OpCompositeExtract %5 %26 3
%31 = OpFAdd %5 %27 %32
%33 = OpFAdd %5 %28 %34
%35 = OpFAdd %5 %29 %36
%37 = OpFAdd %5 %30 %38
%39 = OpCompositeConstruct %9 %31 %33 %35 %37
OpImageWrite %12 %25 %39
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-inloop-2.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2D _8;
layout(set = 0, binding = 2, r32f) uniform image2D _9;

void main()
{
    uint _26 = uint(gl_FragCoord.x);
    uint _27 = uint(gl_FragCoord.y);
    vec4 _28 = imageLoad(_9, ivec2(uvec2(_26, _27)));
    imageStore(_9, ivec2(uvec2(_26, _27)), vec4(_28.x + 1.0, _28.y + 2.0, _28.z + 3.0, _28.w + 4.0));
    uint _45 = uint(gl_FragCoord.z);
    SPIRV_Cross_beginInvocationInterlock();
    if (!(_45 == 0u))
    {
        uint _48;
        _48 = 0u;
        for (;;)
        {
            vec4 _50 = imageLoad(_8, ivec2(uvec2(_26, _27)));
            imageStore(_8, ivec2(uvec2(_26, _27)), vec4(_50.x + 1.0, _50.y + 2.0, _50.z + 3.0, _50.w + 4.0));
            uint _49 = _48 + 1u;
            if (_49 < _45)
            {
                _48 = _49;
            }
            else
            {
                break;
            }
        }
    }
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 69
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 2
OpDecorate %12 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%15 = OpTypePointer Input %5
%17 = OpTypeInt 32 0
%18 = OpConstant %17 0
%21 = OpConstant %17 1
%24 = OpConstant %17 2
%29 = OpTypeVector %17 2
%36 = OpConstant %5 1
%38 = OpConstant %5 2
%40 = OpConstant %5 3
%42 = OpConstant %5 4
%46 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %63
%63 = OpLabel
%13 = OpLoad %6 %9
%14 = OpLoad %6 %8
%16 = OpAccessChain %15 %12 %18
%19 = OpLoad %5 %16
%20 = OpAccessChain %15 %12 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %15 %12 %24
%25 = OpLoad %5 %23
%26 = OpConvertFToU %17 %19
%27 = OpConvertFToU %17 %22
%30 = OpCompositeConstruct %29 %26 %27
%28 = OpImageRead %10 %13 %30 None
%31 = OpCompositeExtract %5 %28 0
%32 = OpCompositeExtract %5 %28 1
%33 = OpCompositeExtract %5 %28 2
%34 = OpCompositeExtract %5 %28 3
%35 = OpFAdd %5 %31 %36
%37 = OpFAdd %5 %32 %38
%39 = OpFAdd %5 %33 %40
%41 = OpFAdd %5 %34 %42
%43 = OpCompositeConstruct %29 %26 %27
%44 = OpCompositeConstruct %10 %35 %37 %39 %41
OpImageWrite %13 %43 %44
%45 = OpConvertFToU %17 %25
%47 = OpIEqual %46 %45 %18
OpBeginInvocationInterlockEXT
OpSelectionMerge %67 None
OpBranchConditional %47 %67 %64
%64 = OpLabel
OpBranch %65
%65 = OpLabel
%48 = OpPhi %17 %18 %64 %49 %65
%51 = OpCompositeConstruct %29 %26 %27
%50 = OpImageRead %10 %14 %51 None
%52 = OpCompositeExtract %5 %50 0
%53 = OpCompositeExtract %5 %50 1
%54 = OpCompositeExtract %5 %50 2
%55 = OpCompositeExtract %5 %50 3
%56 = OpFAdd %5 %52 %36
%57 = OpFAdd %5 %53 %38
%58 = OpFAdd %5 %54 %40
%59 = OpFAdd %5 %55 %42
%60 = OpCompositeConstruct %29 %26 %27
%61 = OpCompositeConstruct %10 %56 %57 %58 %59
OpImageWrite %14 %60 %61
%49 = OpIAdd %17 %48 %21
%62 = OpULessThan %46 %49 %45
OpLoopMerge %66 %65 None
OpBranchConditional %62 %65 %66
%66 = OpLabel
OpBranch %67
%67 = OpLabel
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-inloop.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2D _8;
layout(set = 0, binding = 2, r32f) uniform image2D _9;

layout(location = 0) out float SV_Target;

void main()
{
    uint _29 = uint(gl_FragCoord.x);
    uint _30 = uint(gl_FragCoord.y);
    float _28[8];
    _28[0u] = 0.0;
    _28[1u] = 1.0;
    _28[2u] = 2.0;
    _28[3u] = 3.0;
    _28[4u] = 4.0;
    _28[5u] = 5.0;
    _28[6u] = 6.0;
    _28[7u] = 7.0;
    vec4 _54 = imageLoad(_9, ivec2(uvec2(_29, _30)));
    imageStore(_9, ivec2(uvec2(_29, _30)), vec4(_54.x + 1.0, _54.y + 2.0, _54.z + 3.0, _54.w + 4.0));
    SPIRV_Cross_beginInvocationInterlock();
    uint _67;
    _67 = 0u;
    for (;;)
    {
        vec4 _69 = imageLoad(_8, ivec2(uvec2(_29, _30)));
        imageStore(_8, ivec2(uvec2(_29, _30)), vec4(_69.x + 1.0, _69.y + 2.0, _69.z + 3.0, _69.w + 4.0));
        _28[_67] = imageLoad(_8, ivec2(uvec2(_29, _30))).y;
        uint _68 = _67 + 1u;
        if (_68 == 4u)
        {
            break;
        }
        else
        {
            _67 = _68;
        }
    }
    SPIRV_Cross_endInvocationInterlock();
    SV_Target = _28[uint((1.0 / gl_FragCoord.w) * 7.0)];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 98
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12 %14
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpName %14 "SV_Target"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 2
OpDecorate %12 BuiltIn FragCoord
OpDecorate %14 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypePointer Output %5
%14 = OpVariable %13 Output
%17 = OpTypePointer Input %5
%19 = OpTypeInt 32 0
%20 = OpConstant %19 0
%23 = OpConstant %19 1
%25 = OpConstant %19 8
%26 = OpTypeArray %5 %25
%27 = OpTypePointer Function %26
%31 = OpTypePointer Function %5
%33 = OpConstant %5 0
%35 = OpConstant %5 1
%37 = OpConstant %19 2
%38 = OpConstant %5 2
%40 = OpConstant %19 3
%41 = OpConstant %5 3
%43 = OpConstant %19 4
%44 = OpConstant %5 4
%46 = OpConstant %19 5
%47 = OpConstant %5 5
%49 = OpConstant %19 6
%50 = OpConstant %5 6
%52 = OpConstant %19 7
%53 = OpConstant %5 7
%55 = OpTypeVector %19 2
%85 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
%28 = OpVariable %27 Function
OpBranch %94
%94 = OpLabel
%15 = OpLoad %6 %9
%16 = OpLoad %6 %8
%18 = OpAccessChain %17 %12 %20
%21 = OpLoad %5 %18
%22 = OpAccessChain %17 %12 %23
%24 = OpLoad %5 %22
%29 = OpConvertFToU %19 %21
%30 = OpConvertFToU %19 %24
%32 = OpInBoundsAccessChain %31 %28 %20
OpStore %32 %33
%34 = OpInBoundsAccessChain %31 %28 %23
OpStore %34 %35
%36 = OpInBoundsAccessChain %31 %28 %37
OpStore %36 %38
%39 = OpInBoundsAccessChain %31 %28 %40
OpStore %39 %41
%42 = OpInBoundsAccessChain %31 %28 %43
OpStore %42 %44
%45 = OpInBoundsAccessChain %31 %28 %46
OpStore %45 %47
%48 = OpInBoundsAccessChain %31 %28 %49
OpStore %48 %50
%51 = OpInBoundsAccessChain %31 %28 %52
OpStore %51 %53
%56 = OpCompositeConstruct %55 %29 %30
%54 = OpImageRead %10 %15 %56 None
%57 = OpCompositeExtract %5 %54 0
%58 = OpCompositeExtract %5 %54 1
%59 = OpCompositeExtract %5 %54 2
%60 = OpCompositeExtract %5 %54 3
%61 = OpFAdd %5 %57 %35
%62 = OpFAdd %5 %58 %38
%63 = OpFAdd %5 %59 %41
%64 = OpFAdd %5 %60 %44
%65 = OpCompositeConstruct %55 %29 %30
%66 = OpCompositeConstruct %10 %61 %62 %63 %64
OpImageWrite %15 %65 %66
OpBeginInvocationInterlockEXT
OpBranch %95
%95 = OpLabel
%67 = OpPhi %19 %20 %94 %68 %95
%70 = OpCompositeConstruct %55 %29 %30
%69 = OpImageRead %10 %16 %70 None
%71 = OpCompositeExtract %5 %69 0
%72 = OpCompositeExtract %5 %69 1
%73 = OpCompositeExtract %5 %69 2
%74 = OpCompositeExtract %5 %69 3
%75 = OpFAdd %5 %71 %35
%76 = OpFAdd %5 %72 %38
%77 = OpFAdd %5 %73 %41
%78 = OpFAdd %5 %74 %44
%79 = OpCompositeConstruct %55 %29 %30
%80 = OpCompositeConstruct %10 %75 %76 %77 %78
OpImageWrite %16 %79 %80
%82 = OpCompositeConstruct %55 %29 %30
%81 = OpImageRead %10 %16 %82 None
%83 = OpCompositeExtract %5 %81 1
%84 = OpInBoundsAccessChain %31 %28 %67
OpStore %84 %83
%68 = OpIAdd %19 %67 %23
%86 = OpIEqual %85 %68 %43
OpLoopMerge %96 %95 None
OpBranchConditional %86 %96 %95
%96 = OpLabel
OpEndInvocationInterlockEXT
%87 = OpAccessChain %17 %12 %40
%88 = OpLoad %5 %87
%89 = OpFDiv %5 %35 %88
%90 = OpFMul %5 %89 %53
%91 = OpConvertFToU %19 %90
%92 = OpInBoundsAccessChain %31 %28 %91
%93 = OpLoad %5 %92
OpStore %14 %93
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-per-sample.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(sample_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _9[];
layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _12[];
layout(set = 0, binding = 0, r32f) uniform coherent imageBuffer _17[];
layout(set = 0, binding = 0, r32f) uniform coherent image1D _21[];
layout(set = 0, binding = 0, r32f) uniform coherent image2D _25[];

void main()
{
    uint _41 = uint(gl_SampleID) + uint(gl_FragCoord.x);
    uint _42 = uint(gl_FragCoord.y) + uint(gl_SampleID);
    uint _48 = (_42 * 1000u) + _41;
    uint _51 = _48 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _53 = imageLoad(_9[0u], int(_51));
    uvec4 _55 = imageLoad(_9[0u], int(_51 + 1u));
    uvec4 _58 = imageLoad(_9[0u], int(_51 + 2u));
    uvec4 _62 = imageLoad(_9[0u], int(_51 + 3u));
    uvec4 _66 = uvec4(_53.x, _55.x, _58.x, _62.x);
    uint _75 = _48 * 4u;
    imageStore(_9[0u], int(_75), uvec4(_66.x + 1u));
    imageStore(_9[0u], int(_75 + 1u), uvec4(_66.y + 2u));
    imageStore(_9[0u], int(_75 + 2u), uvec4(_66.z + 3u));
    imageStore(_9[0u], int(_75 + 3u), uvec4(_66.w + 4u));
    uint _85 = _48 * 4u;
    uvec4 _86 = imageLoad(_12[1u], int(_85));
    uvec4 _88 = imageLoad(_12[1u], int(_85 + 1u));
    uvec4 _91 = imageLoad(_12[1u], int(_85 + 2u));
    uvec4 _94 = imageLoad(_12[1u], int(_85 + 3u));
    vec4 _98 = uintBitsToFloat(uvec4(_86.x, _88.x, _91.x, _94.x));
    uint _111 = _48 * 4u;
    imageStore(_12[1u], int(_111), uvec4(floatBitsToUint(_98.x + 1.0)));
    imageStore(_12[1u], int(_111 + 1u), uvec4(floatBitsToUint(_98.y + 2.0)));
    imageStore(_12[1u], int(_111 + 2u), uvec4(floatBitsToUint(_98.z + 3.0)));
    imageStore(_12[1u], int(_111 + 3u), uvec4(floatBitsToUint(_98.w + 4.0)));
    vec4 _126 = imageLoad(_17[2u], int(_48));
    imageStore(_17[2u], int(_48), vec4(_126.x + 1.0, _126.y + 2.0, _126.z + 3.0, _126.w + 4.0));
    vec4 _139 = imageLoad(_21[3u], int(_48));
    imageStore(_21[3u], int(_48), vec4(_139.x + 1.0, _139.y + 2.0, _139.z + 3.0, _139.w + 4.0));
    vec4 _152 = imageLoad(_25[4u], ivec2(uvec2(_41, _42)));
    imageStore(_25[4u], ivec2(uvec2(_41, _42)), vec4(_152.x + 1.0, _152.y + 2.0, _152.z + 3.0, _152.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 167
; Schema: 0
OpCapability Shader
OpCapability SampleRateShading
OpCapability Image1D
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability FragmentShaderSampleInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %28 %30
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 SampleInterlockOrderedEXT
OpName %3 "main"
OpName %28 "SV_Position"
OpName %30 "SV_SampleIndex"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 Coherent
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 Coherent
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 Coherent
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 Coherent
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 Coherent
OpDecorate %28 BuiltIn FragCoord
OpDecorate %30 BuiltIn SampleId
OpDecorate %30 Flat
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeRuntimeArray %6
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 2 R32f
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %13 1D 0 0 0 2 R32f
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %13 2D 0 0 0 2 R32f
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %13 4
%27 = OpTypePointer Input %26
%28 = OpVariable %27 Input
%29 = OpTypePointer Input %5
%30 = OpVariable %29 Input
%31 = OpTypePointer Input %13
%33 = OpConstant %5 0
%36 = OpConstant %5 1
%43 = OpTypePointer UniformConstant %6
%47 = OpConstant %5 1000
%50 = OpConstant %5 4
%52 = OpTypeVector %5 4
%60 = OpConstant %5 2
%64 = OpConstant %5 3
%104 = OpConstant %13 1
%106 = OpConstant %13 2
%108 = OpConstant %13 3
%110 = OpConstant %13 4
%123 = OpTypePointer UniformConstant %14
%136 = OpTypePointer UniformConstant %18
%149 = OpTypePointer UniformConstant %22
%153 = OpTypeVector %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %165
%165 = OpLabel
%32 = OpAccessChain %31 %28 %33
%34 = OpLoad %13 %32
%35 = OpAccessChain %31 %28 %36
%37 = OpLoad %13 %35
%38 = OpLoad %5 %30
%39 = OpConvertFToU %5 %34
%40 = OpConvertFToU %5 %37
%41 = OpIAdd %5 %38 %39
%42 = OpIAdd %5 %40 %38
%44 = OpAccessChain %43 %9 %33
%45 = OpLoad %6 %44
%46 = OpIMul %5 %42 %47
%48 = OpIAdd %5 %46 %41
%49 = OpShiftLeftLogical %5 %48 %50
%51 = OpIMul %5 %48 %50
OpBeginInvocationInterlockEXT
%53 = OpImageRead %52 %45 %51
%54 = OpCompositeExtract %5 %53 0
%56 = OpIAdd %5 %51 %36
%55 = OpImageRead %52 %45 %56
%57 = OpCompositeExtract %5 %55 0
%59 = OpIAdd %5 %51 %60
%58 = OpImageRead %52 %45 %59
%61 = OpCompositeExtract %5 %58 0
%63 = OpIAdd %5 %51 %64
%62 = OpImageRead %52 %45 %63
%65 = OpCompositeExtract %5 %62 0
%66 = OpCompositeConstruct %52 %54 %57 %61 %65
%67 = OpCompositeExtract %5 %66 0
%68 = OpCompositeExtract %5 %66 1
%69 = OpCompositeExtract %5 %66 2
%70 = OpCompositeExtract %5 %66 3
%71 = OpIAdd %5 %67 %36
%72 = OpIAdd %5 %68 %60
%73 = OpIAdd %5 %69 %64
%74 = OpIAdd %5 %70 %50
%75 = OpIMul %5 %48 %50
%76 = OpCompositeConstruct %52 %71 %71 %71 %71
OpImageWrite %45 %75 %76
%77 = OpCompositeConstruct %52 %72 %72 %72 %72
%78 = OpIAdd %5 %75 %36
OpImageWrite %45 %78 %77
%79 = OpCompositeConstruct %52 %73 %73 %73 %73
%80 = OpIAdd %5 %75 %60
OpImageWrite %45 %80 %79
%81 = OpCompositeConstruct %52 %74 %74 %74 %74
%82 = OpIAdd %5 %75 %64
OpImageWrite %45 %82 %81
%83 = OpAccessChain %43 %12 %36
%84 = OpLoad %6 %83
%85 = OpIMul %5 %48 %50
%86 = OpImageRead %52 %84 %85
%87 = OpCompositeExtract %5 %86 0
%89 = OpIAdd %5 %85 %36
%88 = OpImageRead %52 %84 %89
%90 = OpCompositeExtract %5 %88 0
%92 = OpIAdd %5 %85 %60
%91 = OpImageRead %52 %84 %92
%93 = OpCompositeExtract %5 %91 0
%95 = OpIAdd %5 %85 %64
%94 = OpImageRead %52 %84 %95
%96 = OpCompositeExtract %5 %94 0
%97 = OpCompositeConstruct %52 %87 %90 %93 %96
%98 = OpBitcast %26 %97
%99 = OpCompositeExtract %13 %98 0
%100 = OpCompositeExtract %13 %98 1
%101 = OpCompositeExtract %13 %98 2
%102 = OpCompositeExtract %13 %98 3
%103 = OpFAdd %13 %99 %104
%105 = OpFAdd %13 %100 %106
%107 = OpFAdd %13 %101 %108
%109 = OpFAdd %13 %102 %110
%111 = OpIMul %5 %48 %50
%112 = OpBitcast %5 %103
%113 = OpBitcast %5 %105
%114 = OpBitcast %5 %107
%115 = OpBitcast %5 %109
%116 = OpCompositeConstruct %52 %112 %112 %112 %112
OpImageWrite %84 %111 %116
%117 = OpCompositeConstruct %52 %113 %113 %113 %113
%118 = OpIAdd %5 %111 %36
OpImageWrite %84 %118 %117
%119 = OpCompositeConstruct %52 %114 %114 %114 %114
%120 = OpIAdd %5 %111 %60
OpImageWrite %84 %120 %119
%121 = OpCompositeConstruct %52 %115 %115 %115 %115
%122 = OpIAdd %5 %111 %64
OpImageWrite %84 %122 %121
%124 = OpAccessChain %123 %17 %60
%125 = OpLoad %14 %124
%126 = OpImageRead %26 %125 %48
%127 = OpCompositeExtract %13 %126 0
%128 = OpCompositeExtract %13 %126 1
%129 = OpCompositeExtract %13 %126 2
%130 = OpCompositeExtract %13 %126 3
%131 = OpFAdd %13 %127 %104
%132 = OpFAdd %13 %128 %106
%133 = OpFAdd %13 %129 %108
%134 = OpFAdd %13 %130 %110
%135 = OpCompositeConstruct %26 %131 %132 %133 %134
OpImageWrite %125 %48 %135
%137 = OpAccessChain %136 %21 %64
%138 = OpLoad %18 %137
%139 = OpImageRead %26 %138 %48 None
%140 = OpCompositeExtract %13 %139 0
%141 = OpCompositeExtract %13 %139 1
%142 = OpCompositeExtract %13 %139 2
%143 = OpCompositeExtract %13 %139 3
%144 = OpFAdd %13 %140 %104
%145 = OpFAdd %13 %141 %106
%146 = OpFAdd %13 %142 %108
%147 = OpFAdd %13 %143 %110
%148 = OpCompositeConstruct %26 %144 %145 %146 %147
OpImageWrite %138 %48 %148
%150 = OpAccessChain %149 %25 %50
%151 = OpLoad %22 %150
%154 = OpCompositeConstruct %153 %41 %42
%152 = OpImageRead %26 %151 %154 None
%155 = OpCompositeExtract %13 %152 0
%156 = OpCompositeExtract %13 %152 1
%157 = OpCompositeExtract %13 %152 2
%158 = OpCompositeExtract %13 %152 3
%159 = OpFAdd %13 %155 %104
%160 = OpFAdd %13 %156 %106
%161 = OpFAdd %13 %157 %108
%162 = OpFAdd %13 %158 %110
%163 = OpCompositeConstruct %153 %41 %42
%164 = OpCompositeConstruct %26 %159 %160 %161 %162
OpImageWrite %151 %163 %164
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-postloop.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2D _8;
layout(set = 0, binding = 2, r32f) uniform image2D _9;

void main()
{
    uint _23 = uint(gl_FragCoord.x);
    uint _24 = uint(gl_FragCoord.y);
    uint _25;
    _25 = 0u;
    for (;;)
    {
        vec4 _27 = imageLoad(_9, ivec2(uvec2(_23, _24)));
        imageStore(_9, ivec2(uvec2(_23, _24)), vec4(_27.x + 1.0, _27.y + 2.0, _27.z + 3.0, _27.w + 4.0));
        uint _26 = _25 + 1u;
        if (_26 == 4u)
        {
            break;
        }
        else
        {
            _25 = _26;
        }
    }
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _47 = imageLoad(_8, ivec2(uvec2(_23, _24)));
    imageStore(_8, ivec2(uvec2(_23, _24)), vec4(_47.x + 1.0, _47.y + 2.0, _47.z + 3.0, _47.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 63
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 2
OpDecorate %12 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpVariable %7 UniformConstant
%10 = OpTypeVector %5 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%15 = OpTypePointer Input %5
%17 = OpTypeInt 32 0
%18 = OpConstant %17 0
%21 = OpConstant %17 1
%28 = OpTypeVector %17 2
%35 = OpConstant %5 1
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%41 = OpConstant %5 4
%44 = OpTypeBool
%46 = OpConstant %17 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%13 = OpLoad %6 %9
%14 = OpLoad %6 %8
%16 = OpAccessChain %15 %12 %18
%19 = OpLoad %5 %16
%20 = OpAccessChain %15 %12 %21
%22 = OpLoad %5 %20
%23 = OpConvertFToU %17 %19
%24 = OpConvertFToU %17 %22
OpBranch %60
%60 = OpLabel
%25 = OpPhi %17 %18 %59 %26 %60
%29 = OpCompositeConstruct %28 %23 %24
%27 = OpImageRead %10 %13 %29 None
%30 = OpCompositeExtract %5 %27 0
%31 = OpCompositeExtract %5 %27 1
%32 = OpCompositeExtract %5 %27 2
%33 = OpCompositeExtract %5 %27 3
%34 = OpFAdd %5 %30 %35
%36 = OpFAdd %5 %31 %37
%38 = OpFAdd %5 %32 %39
%40 = OpFAdd %5 %33 %41
%42 = OpCompositeConstruct %28 %23 %24
%43 = OpCompositeConstruct %10 %34 %36 %38 %40
OpImageWrite %13 %42 %43
%26 = OpIAdd %17 %25 %21
%45 = OpIEqual %44 %26 %46
OpLoopMerge %61 %60 None
OpBranchConditional %45 %61 %60
%61 = OpLabel
%48 = OpCompositeConstruct %28 %23 %24
OpBeginInvocationInterlockEXT
%47 = OpImageRead %10 %14 %48 None
%49 = OpCompositeExtract %5 %47 0
%50 = OpCompositeExtract %5 %47 1
%51 = OpCompositeExtract %5 %47 2
%52 = OpCompositeExtract %5 %47 3
%53 = OpFAdd %5 %49 %35
%54 = OpFAdd %5 %50 %37
%55 = OpFAdd %5 %51 %39
%56 = OpFAdd %5 %52 %41
%57 = OpCompositeConstruct %28 %23 %24
%58 = OpCompositeConstruct %10 %53 %54 %55 %56
OpImageWrite %14 %57 %58
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-structured.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 4, binding = 0, r32ui) uniform coherent uimageBuffer _12[];

void main()
{
    uint _35 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    uint _36 = _35 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _38 = imageLoad(_12[registers._m4], int(_36));
    uvec4 _40 = imageLoad(_12[registers._m4], int(_36 + 1u));
    uvec4 _43 = imageLoad(_12[registers._m4], int(_36 + 2u));
    uvec4 _47 = imageLoad(_12[registers._m4], int(_36 + 3u));
    vec4 _52 = uintBitsToFloat(uvec4(_38.x, _40.x, _43.x, _47.x));
    uint _65 = _35 * 4u;
    imageStore(_12[registers._m4], int(_65), uvec4(floatBitsToUint(_52.x + 1.0)));
    imageStore(_12[registers._m4], int(_65 + 1u), uvec4(floatBitsToUint(_52.y + 2.0)));
    imageStore(_12[registers._m4], int(_65 + 2u), uvec4(floatBitsToUint(_52.z + 3.0)));
    imageStore(_12[registers._m4], int(_65 + 3u), uvec4(floatBitsToUint(_52.w + 4.0)));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %12 DescriptorSet 4
OpDecorate %12 Binding 0
OpDecorate %12 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%10 = OpTypeRuntimeArray %9
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeVector %13 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %9
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 4
%24 = OpTypePointer Input %13
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%34 = OpConstant %5 1000
%37 = OpTypeVector %5 4
%45 = OpConstant %5 2
%49 = OpConstant %5 3
%58 = OpConstant %13 1
%60 = OpConstant %13 2
%62 = OpConstant %13 3
%64 = OpConstant %13 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %77
%77 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %12 %22
%23 = OpLoad %9 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %13 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %13 %28
%31 = OpConvertFToU %5 %27
%32 = OpConvertFToU %5 %30
%33 = OpIMul %5 %32 %34
%35 = OpIAdd %5 %33 %31
%36 = OpIMul %5 %35 %21
OpBeginInvocationInterlockEXT
%38 = OpImageRead %37 %23 %36
%39 = OpCompositeExtract %5 %38 0
%41 = OpIAdd %5 %36 %29
%40 = OpImageRead %37 %23 %41
%42 = OpCompositeExtract %5 %40 0
%44 = OpIAdd %5 %36 %45
%43 = OpImageRead %37 %23 %44
%46 = OpCompositeExtract %5 %43 0
%48 = OpIAdd %5 %36 %49
%47 = OpImageRead %37 %23 %48
%50 = OpCompositeExtract %5 %47 0
%51 = OpCompositeConstruct %37 %39 %42 %46 %50
%52 = OpBitcast %14 %51
%53 = OpCompositeExtract %13 %52 0
%54 = OpCompositeExtract %13 %52 1
%55 = OpCompositeExtract %13 %52 2
%56 = OpCompositeExtract %13 %52 3
%57 = OpFAdd %13 %53 %58
%59 = OpFAdd %13 %54 %60
%61 = OpFAdd %13 %55 %62
%63 = OpFAdd %13 %56 %64
%65 = OpIMul %5 %35 %21
%66 = OpBitcast %5 %57
%67 = OpBitcast %5 %59
%68 = OpBitcast %5 %61
%69 = OpBitcast %5 %63
%70 = OpCompositeConstruct %37 %66 %66 %66 %66
OpImageWrite %23 %65 %70
%71 = OpCompositeConstruct %37 %67 %67 %67 %67
%72 = OpIAdd %5 %65 %29
OpImageWrite %23 %72 %71
%73 = OpCompositeConstruct %37 %68 %68 %68 %68
%74 = OpIAdd %5 %65 %45
OpImageWrite %23 %74 %73
%75 = OpCompositeConstruct %37 %69 %69 %69 %69
%76 = OpIAdd %5 %65 %49
OpImageWrite %23 %76 %75
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-structured.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _8;

void main()
{
    uint _25 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    uint _26 = _25 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _29 = imageLoad(_8, int(_26));
    uvec4 _31 = imageLoad(_8, int(_26 + 1u));
    uvec4 _34 = imageLoad(_8, int(_26 + 2u));
    uvec4 _38 = imageLoad(_8, int(_26 + 3u));
    vec4 _43 = uintBitsToFloat(uvec4(_29.x, _31.x, _34.x, _38.x));
    uint _56 = _25 * 4u;
    imageStore(_8, int(_56), uvec4(floatBitsToUint(_43.x + 1.0)));
    imageStore(_8, int(_56 + 1u), uvec4(floatBitsToUint(_43.y + 2.0)));
    imageStore(_8, int(_56 + 2u), uvec4(floatBitsToUint(_43.z + 3.0)));
    imageStore(_8, int(_56 + 3u), uvec4(floatBitsToUint(_43.w + 4.0)));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 70
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %12 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%14 = OpTypePointer Input %9
%16 = OpConstant %5 0
%19 = OpConstant %5 1
%24 = OpConstant %5 1000
%27 = OpConstant %5 4
%28 = OpTypeVector %5 4
%36 = OpConstant %5 2
%40 = OpConstant %5 3
%49 = OpConstant %9 1
%51 = OpConstant %9 2
%53 = OpConstant %9 3
%55 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %68
%68 = OpLabel
%13 = OpLoad %6 %8
%15 = OpAccessChain %14 %12 %16
%17 = OpLoad %9 %15
%18 = OpAccessChain %14 %12 %19
%20 = OpLoad %9 %18
%21 = OpConvertFToU %5 %17
%22 = OpConvertFToU %5 %20
%23 = OpIMul %5 %22 %24
%25 = OpIAdd %5 %23 %21
%26 = OpIMul %5 %25 %27
OpBeginInvocationInterlockEXT
%29 = OpImageRead %28 %13 %26
%30 = OpCompositeExtract %5 %29 0
%32 = OpIAdd %5 %26 %19
%31 = OpImageRead %28 %13 %32
%33 = OpCompositeExtract %5 %31 0
%35 = OpIAdd %5 %26 %36
%34 = OpImageRead %28 %13 %35
%37 = OpCompositeExtract %5 %34 0
%39 = OpIAdd %5 %26 %40
%38 = OpImageRead %28 %13 %39
%41 = OpCompositeExtract %5 %38 0
%42 = OpCompositeConstruct %28 %30 %33 %37 %41
%43 = OpBitcast %10 %42
%44 = OpCompositeExtract %9 %43 0
%45 = OpCompositeExtract %9 %43 1
%46 = OpCompositeExtract %9 %43 2
%47 = OpCompositeExtract %9 %43 3
%48 = OpFAdd %9 %44 %49
%50 = OpFAdd %9 %45 %51
%52 = OpFAdd %9 %46 %53
%54 = OpFAdd %9 %47 %55
%56 = OpIMul %5 %25 %27
%57 = OpBitcast %5 %48
%58 = OpBitcast %5 %50
%59 = OpBitcast %5 %52
%60 = OpBitcast %5 %54
%61 = OpCompositeConstruct %28 %57 %57 %57 %57
OpImageWrite %13 %56 %61
%62 = OpCompositeConstruct %28 %58 %58 %58 %58
%63 = OpIAdd %5 %56 %19
OpImageWrite %13 %63 %62
%64 = OpCompositeConstruct %28 %59 %59 %59 %59
%65 = OpIAdd %5 %56 %36
OpImageWrite %13 %65 %64
%66 = OpCompositeConstruct %28 %60 %60 %60 %60
%67 = OpIAdd %5 %56 %40
OpImageWrite %13 %67 %66
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-structured.ssbo.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 4, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _14[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _36 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _40 = uintBitsToFloat(_14[registers._m4]._m0[_36]);
    _14[registers._m4]._m0[_36] = uvec4(floatBitsToUint(_40.x + 1.0), floatBitsToUint(_40.y + 2.0), floatBitsToUint(_40.z + 3.0), floatBitsToUint(_40.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %18
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %18 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 16
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 4
OpDecorate %14 Binding 0
OpDecorate %14 Coherent
OpDecorate %18 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 4
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeFloat 32
%16 = OpTypeVector %15 4
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer StorageBuffer %11
%21 = OpTypePointer PushConstant %5
%23 = OpConstant %5 4
%25 = OpTypePointer Input %15
%27 = OpConstant %5 0
%30 = OpConstant %5 1
%35 = OpConstant %5 1000
%37 = OpTypePointer StorageBuffer %9
%46 = OpConstant %15 1
%48 = OpConstant %15 2
%50 = OpConstant %15 3
%52 = OpConstant %15 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %59
%59 = OpLabel
%22 = OpAccessChain %21 %8 %23
%24 = OpLoad %5 %22
%20 = OpAccessChain %19 %14 %24
%26 = OpAccessChain %25 %18 %27
%28 = OpLoad %15 %26
%29 = OpAccessChain %25 %18 %30
%31 = OpLoad %15 %29
%32 = OpConvertFToU %5 %28
%33 = OpConvertFToU %5 %31
%34 = OpIMul %5 %33 %35
%36 = OpIAdd %5 %34 %32
%38 = OpAccessChain %37 %20 %27 %36
OpBeginInvocationInterlockEXT
%39 = OpLoad %9 %38
%40 = OpBitcast %16 %39
%41 = OpCompositeExtract %15 %40 0
%42 = OpCompositeExtract %15 %40 1
%43 = OpCompositeExtract %15 %40 2
%44 = OpCompositeExtract %15 %40 3
%45 = OpFAdd %15 %41 %46
%47 = OpFAdd %15 %42 %48
%49 = OpFAdd %15 %43 %50
%51 = OpFAdd %15 %44 %52
%53 = OpBitcast %5 %45
%54 = OpBitcast %5 %47
%55 = OpBitcast %5 %49
%56 = OpBitcast %5 %51
%57 = OpCompositeConstruct %9 %53 %54 %55 %56
%58 = OpAccessChain %37 %20 %27 %36
OpStore %58 %57
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-structured.ssbo.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, std430) coherent buffer SSBO
{
    uvec4 _m0[];
} _10;

void main()
{
    uint _26 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _30 = uintBitsToFloat(_10._m0[_26]);
    _10._m0[_26] = uvec4(floatBitsToUint(_30.x + 1.0), floatBitsToUint(_30.y + 2.0), floatBitsToUint(_30.z + 3.0), floatBitsToUint(_30.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 51
; Schema: 0
OpCapability Shader
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %8 "SSBO"
OpName %14 "SV_Position"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 Coherent
OpDecorate %14 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeFloat 32
%12 = OpTypeVector %11 4
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Input %11
%17 = OpConstant %5 0
%20 = OpConstant %5 1
%25 = OpConstant %5 1000
%27 = OpTypePointer StorageBuffer %6
%36 = OpConstant %11 1
%38 = OpConstant %11 2
%40 = OpConstant %11 3
%42 = OpConstant %11 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %49
%49 = OpLabel
%16 = OpAccessChain %15 %14 %17
%18 = OpLoad %11 %16
%19 = OpAccessChain %15 %14 %20
%21 = OpLoad %11 %19
%22 = OpConvertFToU %5 %18
%23 = OpConvertFToU %5 %21
%24 = OpIMul %5 %23 %25
%26 = OpIAdd %5 %24 %22
%28 = OpAccessChain %27 %10 %17 %26
OpBeginInvocationInterlockEXT
%29 = OpLoad %6 %28
%30 = OpBitcast %12 %29
%31 = OpCompositeExtract %11 %30 0
%32 = OpCompositeExtract %11 %30 1
%33 = OpCompositeExtract %11 %30 2
%34 = OpCompositeExtract %11 %30 3
%35 = OpFAdd %11 %31 %36
%37 = OpFAdd %11 %32 %38
%39 = OpFAdd %11 %33 %40
%41 = OpFAdd %11 %34 %42
%43 = OpBitcast %5 %35
%44 = OpBitcast %5 %37
%45 = OpBitcast %5 %39
%46 = OpBitcast %5 %41
%47 = OpCompositeConstruct %6 %43 %44 %45 %46
%48 = OpAccessChain %27 %10 %17 %26
OpStore %48 %47
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-structured.ssbo.root-descriptor.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#extension GL_EXT_buffer_reference_uvec2 : require
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;

layout(buffer_reference, buffer_reference_align = 16, std430) coherent buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    uint _29 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    PhysicalPointerFloat4CoherentArray _33 = PhysicalPointerFloat4CoherentArray(registers._m2);
    SPIRV_Cross_beginInvocationInterlock();
    PhysicalPointerFloat4CoherentArray(registers._m2).value[_29] = vec4(_33.value[_29].x + 1.0, _33.value[_29].y + 2.0, _33.value[_29].z + 3.0, _33.value[_29].w + 4.0);
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %13 "SV_Position"
OpName %31 "PhysicalPointerFloat4CoherentArray"
OpMemberName %31 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %13 BuiltIn FragCoord
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpMemberDecorate %31 0 Coherent
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeFloat 32
%11 = OpTypeVector %10 4
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpTypePointer PushConstant %6
%16 = OpConstant %5 2
%18 = OpTypePointer Input %10
%20 = OpConstant %5 0
%23 = OpConstant %5 1
%28 = OpConstant %5 1000
%30 = OpTypeRuntimeArray %11
%31 = OpTypeStruct %30
%32 = OpTypePointer PhysicalStorageBuffer %31
%34 = OpTypePointer PhysicalStorageBuffer %11
%42 = OpConstant %10 1
%44 = OpConstant %10 2
%46 = OpConstant %10 3
%48 = OpConstant %10 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %52
%52 = OpLabel
%15 = OpAccessChain %14 %9 %16
%17 = OpLoad %6 %15
%19 = OpAccessChain %18 %13 %20
%21 = OpLoad %10 %19
%22 = OpAccessChain %18 %13 %23
%24 = OpLoad %10 %22
%25 = OpConvertFToU %5 %21
%26 = OpConvertFToU %5 %24
%27 = OpIMul %5 %26 %28
%29 = OpIAdd %5 %27 %25
%33 = OpBitcast %32 %17
%35 = OpInBoundsAccessChain %34 %33 %20 %29
OpBeginInvocationInterlockEXT
%36 = OpLoad %11 %35 Aligned 16
%37 = OpCompositeExtract %10 %36 0
%38 = OpCompositeExtract %10 %36 1
%39 = OpCompositeExtract %10 %36 2
%40 = OpCompositeExtract %10 %36 3
%41 = OpFAdd %10 %37 %42
%43 = OpFAdd %10 %38 %44
%45 = OpFAdd %10 %39 %46
%47 = OpFAdd %10 %40 %48
%49 = OpBitcast %32 %17
%50 = OpInBoundsAccessChain %34 %49 %20 %29
%51 = OpCompositeConstruct %11 %41 %43 %45 %47
OpStore %50 %51 Aligned 16
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex1d.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0, r32f) uniform coherent image1D _13[];

void main()
{
    uint _35 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _36 = imageLoad(_13[registers._m3], int(_35));
    imageStore(_13[registers._m3], int(_35), vec4(_36.x + 1.0, _36.y + 2.0, _36.z + 3.0, _36.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 3
OpDecorate %13 Binding 0
OpDecorate %13 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 1D 0 0 0 2 R32f
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %10
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 3
%24 = OpTypePointer Input %9
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%34 = OpConstant %5 1000
%42 = OpConstant %9 1
%44 = OpConstant %9 2
%46 = OpConstant %9 3
%48 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %13 %22
%23 = OpLoad %10 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %9 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %9 %28
%31 = OpConvertFToU %5 %27
%32 = OpConvertFToU %5 %30
%33 = OpIMul %5 %32 %34
%35 = OpIAdd %5 %33 %31
OpBeginInvocationInterlockEXT
%36 = OpImageRead %14 %23 %35 None
%37 = OpCompositeExtract %9 %36 0
%38 = OpCompositeExtract %9 %36 1
%39 = OpCompositeExtract %9 %36 2
%40 = OpCompositeExtract %9 %36 3
%41 = OpFAdd %9 %37 %42
%43 = OpFAdd %9 %38 %44
%45 = OpFAdd %9 %39 %46
%47 = OpFAdd %9 %40 %48
%49 = OpCompositeConstruct %14 %41 %43 %45 %47
OpImageWrite %23 %35 %49
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex1d.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image1D _8;

void main()
{
    uint _25 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _26 = imageLoad(_8, int(_25));
    imageStore(_8, int(_25), vec4(_26.x + 1.0, _26.y + 2.0, _26.z + 3.0, _26.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%24 = OpConstant %15 1000
%32 = OpConstant %5 1
%34 = OpConstant %5 2
%36 = OpConstant %5 3
%38 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpConvertFToU %15 %17
%22 = OpConvertFToU %15 %20
%23 = OpIMul %15 %22 %24
%25 = OpIAdd %15 %23 %21
OpBeginInvocationInterlockEXT
%26 = OpImageRead %9 %12 %25 None
%27 = OpCompositeExtract %5 %26 0
%28 = OpCompositeExtract %5 %26 1
%29 = OpCompositeExtract %5 %26 2
%30 = OpCompositeExtract %5 %26 3
%31 = OpFAdd %5 %27 %32
%33 = OpFAdd %5 %28 %34
%35 = OpFAdd %5 %29 %36
%37 = OpFAdd %5 %30 %38
%39 = OpCompositeConstruct %9 %31 %33 %35 %37
OpImageWrite %12 %25 %39
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex1darray.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0, r32f) uniform coherent image1DArray _13[];

void main()
{
    uint _31 = uint(gl_FragCoord.x);
    uint _32 = uint(gl_FragCoord.y);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _33 = imageLoad(_13[registers._m3], ivec2(uvec2(_31, _32)));
    imageStore(_13[registers._m3], ivec2(uvec2(_31, _32)), vec4(_33.x + 1.0, _33.y + 2.0, _33.z + 3.0, _33.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 3
OpDecorate %13 Binding 0
OpDecorate %13 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 1D 0 1 0 2 R32f
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %10
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 3
%24 = OpTypePointer Input %9
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%34 = OpTypeVector %5 2
%41 = OpConstant %9 1
%43 = OpConstant %9 2
%45 = OpConstant %9 3
%47 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %13 %22
%23 = OpLoad %10 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %9 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %9 %28
%31 = OpConvertFToU %5 %27
%32 = OpConvertFToU %5 %30
%35 = OpCompositeConstruct %34 %31 %32
OpBeginInvocationInterlockEXT
%33 = OpImageRead %14 %23 %35 None
%36 = OpCompositeExtract %9 %33 0
%37 = OpCompositeExtract %9 %33 1
%38 = OpCompositeExtract %9 %33 2
%39 = OpCompositeExtract %9 %33 3
%40 = OpFAdd %9 %36 %41
%42 = OpFAdd %9 %37 %43
%44 = OpFAdd %9 %38 %45
%46 = OpFAdd %9 %39 %47
%48 = OpCompositeConstruct %34 %31 %32
%49 = OpCompositeConstruct %14 %40 %42 %44 %46
OpImageWrite %23 %48 %49
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex1darray.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image1DArray _8;

void main()
{
    uint _21 = uint(gl_FragCoord.x);
    uint _22 = uint(gl_FragCoord.y);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _23 = imageLoad(_8, ivec2(uvec2(_21, _22)));
    imageStore(_8, ivec2(uvec2(_21, _22)), vec4(_23.x + 1.0, _23.y + 2.0, _23.z + 3.0, _23.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 1D 0 1 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%24 = OpTypeVector %15 2
%31 = OpConstant %5 1
%33 = OpConstant %5 2
%35 = OpConstant %5 3
%37 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpConvertFToU %15 %17
%22 = OpConvertFToU %15 %20
%25 = OpCompositeConstruct %24 %21 %22
OpBeginInvocationInterlockEXT
%23 = OpImageRead %9 %12 %25 None
%26 = OpCompositeExtract %5 %23 0
%27 = OpCompositeExtract %5 %23 1
%28 = OpCompositeExtract %5 %23 2
%29 = OpCompositeExtract %5 %23 3
%30 = OpFAdd %5 %26 %31
%32 = OpFAdd %5 %27 %33
%34 = OpFAdd %5 %28 %35
%36 = OpFAdd %5 %29 %37
%38 = OpCompositeConstruct %24 %21 %22
%39 = OpCompositeConstruct %9 %30 %32 %34 %36
OpImageWrite %12 %38 %39
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex2d.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0, r32f) uniform coherent image2D _13[];

void main()
{
    uint _31 = uint(gl_FragCoord.x);
    uint _32 = uint(gl_FragCoord.y);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _33 = imageLoad(_13[registers._m3], ivec2(uvec2(_31, _32)));
    imageStore(_13[registers._m3], ivec2(uvec2(_31, _32)), vec4(_33.x + 1.0, _33.y + 2.0, _33.z + 3.0, _33.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 3
OpDecorate %13 Binding 0
OpDecorate %13 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 R32f
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %10
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 3
%24 = OpTypePointer Input %9
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%34 = OpTypeVector %5 2
%41 = OpConstant %9 1
%43 = OpConstant %9 2
%45 = OpConstant %9 3
%47 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %50
%50 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %13 %22
%23 = OpLoad %10 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %9 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %9 %28
%31 = OpConvertFToU %5 %27
%32 = OpConvertFToU %5 %30
%35 = OpCompositeConstruct %34 %31 %32
OpBeginInvocationInterlockEXT
%33 = OpImageRead %14 %23 %35 None
%36 = OpCompositeExtract %9 %33 0
%37 = OpCompositeExtract %9 %33 1
%38 = OpCompositeExtract %9 %33 2
%39 = OpCompositeExtract %9 %33 3
%40 = OpFAdd %9 %36 %41
%42 = OpFAdd %9 %37 %43
%44 = OpFAdd %9 %38 %45
%46 = OpFAdd %9 %39 %47
%48 = OpCompositeConstruct %34 %31 %32
%49 = OpCompositeConstruct %14 %40 %42 %44 %46
OpImageWrite %23 %48 %49
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex2d.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2D _8;

void main()
{
    uint _21 = uint(gl_FragCoord.x);
    uint _22 = uint(gl_FragCoord.y);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _23 = imageLoad(_8, ivec2(uvec2(_21, _22)));
    imageStore(_8, ivec2(uvec2(_21, _22)), vec4(_23.x + 1.0, _23.y + 2.0, _23.z + 3.0, _23.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%24 = OpTypeVector %15 2
%31 = OpConstant %5 1
%33 = OpConstant %5 2
%35 = OpConstant %5 3
%37 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpConvertFToU %15 %17
%22 = OpConvertFToU %15 %20
%25 = OpCompositeConstruct %24 %21 %22
OpBeginInvocationInterlockEXT
%23 = OpImageRead %9 %12 %25 None
%26 = OpCompositeExtract %5 %23 0
%27 = OpCompositeExtract %5 %23 1
%28 = OpCompositeExtract %5 %23 2
%29 = OpCompositeExtract %5 %23 3
%30 = OpFAdd %5 %26 %31
%32 = OpFAdd %5 %27 %33
%34 = OpFAdd %5 %28 %35
%36 = OpFAdd %5 %29 %37
%38 = OpCompositeConstruct %24 %21 %22
%39 = OpCompositeConstruct %9 %30 %32 %34 %36
OpImageWrite %12 %38 %39
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex2darray.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0, r32f) uniform coherent image2DArray _13[];

void main()
{
    uint _34 = uint(gl_FragCoord.x);
    uint _35 = uint(gl_FragCoord.y);
    uint _36 = uint(gl_FragCoord.z);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _37 = imageLoad(_13[registers._m3], ivec3(uvec3(_34, _35, _36)));
    imageStore(_13[registers._m3], ivec3(uvec3(_34, _35, _36)), vec4(_37.x + 1.0, _37.y + 2.0, _37.z + 3.0, _37.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 3
OpDecorate %13 Binding 0
OpDecorate %13 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 1 0 2 R32f
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %10
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 3
%24 = OpTypePointer Input %9
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%32 = OpConstant %5 2
%38 = OpTypeVector %5 3
%45 = OpConstant %9 1
%47 = OpConstant %9 2
%49 = OpConstant %9 3
%51 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %13 %22
%23 = OpLoad %10 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %9 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %9 %28
%31 = OpAccessChain %24 %16 %32
%33 = OpLoad %9 %31
%34 = OpConvertFToU %5 %27
%35 = OpConvertFToU %5 %30
%36 = OpConvertFToU %5 %33
%39 = OpCompositeConstruct %38 %34 %35 %36
OpBeginInvocationInterlockEXT
%37 = OpImageRead %14 %23 %39 None
%40 = OpCompositeExtract %9 %37 0
%41 = OpCompositeExtract %9 %37 1
%42 = OpCompositeExtract %9 %37 2
%43 = OpCompositeExtract %9 %37 3
%44 = OpFAdd %9 %40 %45
%46 = OpFAdd %9 %41 %47
%48 = OpFAdd %9 %42 %49
%50 = OpFAdd %9 %43 %51
%52 = OpCompositeConstruct %38 %34 %35 %36
%53 = OpCompositeConstruct %14 %44 %46 %48 %50
OpImageWrite %23 %52 %53
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex2darray.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image2DArray _8;

void main()
{
    uint _24 = uint(gl_FragCoord.x);
    uint _25 = uint(gl_FragCoord.y);
    uint _26 = uint(gl_FragCoord.z);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _27 = imageLoad(_8, ivec3(uvec3(_24, _25, _26)));
    imageStore(_8, ivec3(uvec3(_24, _25, _26)), vec4(_27.x + 1.0, _27.y + 2.0, _27.z + 3.0, _27.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 1 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%22 = OpConstant %15 2
%28 = OpTypeVector %15 3
%35 = OpConstant %5 1
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%41 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %13 %11 %22
%23 = OpLoad %5 %21
%24 = OpConvertFToU %15 %17
%25 = OpConvertFToU %15 %20
%26 = OpConvertFToU %15 %23
%29 = OpCompositeConstruct %28 %24 %25 %26
OpBeginInvocationInterlockEXT
%27 = OpImageRead %9 %12 %29 None
%30 = OpCompositeExtract %5 %27 0
%31 = OpCompositeExtract %5 %27 1
%32 = OpCompositeExtract %5 %27 2
%33 = OpCompositeExtract %5 %27 3
%34 = OpFAdd %5 %30 %35
%36 = OpFAdd %5 %31 %37
%38 = OpFAdd %5 %32 %39
%40 = OpFAdd %5 %33 %41
%42 = OpCompositeConstruct %28 %24 %25 %26
%43 = OpCompositeConstruct %9 %34 %36 %38 %40
OpImageWrite %12 %42 %43
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex3d.bindless.frag
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 3, binding = 0, r32f) uniform coherent image3D _13[];

void main()
{
    uint _34 = uint(gl_FragCoord.x);
    uint _35 = uint(gl_FragCoord.y);
    uint _36 = uint(gl_FragCoord.z);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _37 = imageLoad(_13[registers._m3], ivec3(uvec3(_34, _35, _36)));
    imageStore(_13[registers._m3], ivec3(uvec3(_34, _35, _36)), vec4(_37.x + 1.0, _37.y + 2.0, _37.z + 3.0, _37.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint Fragment %3 "main" %16
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %16 "SV_Position"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 3
OpDecorate %13 Binding 0
OpDecorate %13 Coherent
OpDecorate %16 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 3D 0 0 0 2 R32f
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeVector %9 4
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer UniformConstant %10
%19 = OpTypePointer PushConstant %5
%21 = OpConstant %5 3
%24 = OpTypePointer Input %9
%26 = OpConstant %5 0
%29 = OpConstant %5 1
%32 = OpConstant %5 2
%38 = OpTypeVector %5 3
%45 = OpConstant %9 1
%47 = OpConstant %9 2
%49 = OpConstant %9 3
%51 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%18 = OpAccessChain %17 %13 %22
%23 = OpLoad %10 %18
%25 = OpAccessChain %24 %16 %26
%27 = OpLoad %9 %25
%28 = OpAccessChain %24 %16 %29
%30 = OpLoad %9 %28
%31 = OpAccessChain %24 %16 %32
%33 = OpLoad %9 %31
%34 = OpConvertFToU %5 %27
%35 = OpConvertFToU %5 %30
%36 = OpConvertFToU %5 %33
%39 = OpCompositeConstruct %38 %34 %35 %36
OpBeginInvocationInterlockEXT
%37 = OpImageRead %14 %23 %39 None
%40 = OpCompositeExtract %9 %37 0
%41 = OpCompositeExtract %9 %37 1
%42 = OpCompositeExtract %9 %37 2
%43 = OpCompositeExtract %9 %37 3
%44 = OpFAdd %9 %40 %45
%46 = OpFAdd %9 %41 %47
%48 = OpFAdd %9 %42 %49
%50 = OpFAdd %9 %43 %51
%52 = OpCompositeConstruct %38 %34 %35 %36
%53 = OpCompositeConstruct %14 %44 %46 %48 %50
OpImageWrite %23 %52 %53
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-tex3d.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform coherent image3D _8;

void main()
{
    uint _24 = uint(gl_FragCoord.x);
    uint _25 = uint(gl_FragCoord.y);
    uint _26 = uint(gl_FragCoord.z);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _27 = imageLoad(_8, ivec3(uvec3(_24, _25, _26)));
    imageStore(_8, ivec3(uvec3(_24, _25, _26)), vec4(_27.x + 1.0, _27.y + 2.0, _27.z + 3.0, _27.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 3D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%22 = OpConstant %15 2
%28 = OpTypeVector %15 3
%35 = OpConstant %5 1
%37 = OpConstant %5 2
%39 = OpConstant %5 3
%41 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %44
%44 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %13 %11 %22
%23 = OpLoad %5 %21
%24 = OpConvertFToU %15 %17
%25 = OpConvertFToU %15 %20
%26 = OpConvertFToU %15 %23
%29 = OpCompositeConstruct %28 %24 %25 %26
OpBeginInvocationInterlockEXT
%27 = OpImageRead %9 %12 %29 None
%30 = OpCompositeExtract %5 %27 0
%31 = OpCompositeExtract %5 %27 1
%32 = OpCompositeExtract %5 %27 2
%33 = OpCompositeExtract %5 %27 3
%34 = OpFAdd %5 %30 %35
%36 = OpFAdd %5 %31 %37
%38 = OpFAdd %5 %32 %39
%40 = OpFAdd %5 %33 %41
%42 = OpCompositeConstruct %28 %24 %25 %26
%43 = OpCompositeConstruct %9 %34 %36 %38 %40
OpImageWrite %12 %42 %43
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov-undef.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

uint _37;

layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _8;

void main()
{
    SPIRV_Cross_beginInvocationInterlock();
    uint _36;
    uint _38;
    if (gl_FragCoord.x > 1000.0)
    {
        uvec4 _24 = imageLoad(_8, int(0u));
        uint _27 = 0u + 1u;
        uvec4 _26 = imageLoad(_8, int(_27));
        uvec2 _31 = uvec2(_24.x, _26.x);
        imageStore(_8, int(0u), uvec4(50u));
        _36 = _31.x;
        _38 = _31.y;
    }
    else
    {
        _36 = _37;
        _38 = _37;
    }
    if (gl_FragCoord.x > 1100.0)
    {
        uint _41 = (_38 * 1000u) + _36;
        uint _44 = _41 * 4u;
        uvec4 _45 = imageLoad(_8, int(_44));
        uvec4 _47 = imageLoad(_8, int(_44 + 1u));
        uvec4 _50 = imageLoad(_8, int(_44 + 2u));
        uvec4 _54 = imageLoad(_8, int(_44 + 3u));
        uvec4 _58 = uvec4(_45.x, _47.x, _50.x, _54.x);
        uint _67 = _41 * 4u;
        imageStore(_8, int(_67), uvec4(_58.x + 1u));
        imageStore(_8, int(_67 + 1u), uvec4(_58.y + 2u));
        imageStore(_8, int(_67 + 2u), uvec4(_58.z + 3u));
        imageStore(_8, int(_67 + 3u), uvec4(_58.w + 4u));
    }
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 81
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 Coherent
OpDecorate %12 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%14 = OpTypePointer Input %9
%16 = OpConstant %5 0
%18 = OpTypeBool
%20 = OpConstant %9 1000
%22 = OpConstant %9 1100
%23 = OpTypeVector %5 4
%28 = OpConstant %5 1
%30 = OpTypeVector %5 2
%34 = OpConstant %5 50
%40 = OpConstant %5 1000
%43 = OpConstant %5 4
%52 = OpConstant %5 2
%56 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
%37 = OpUndef %5
OpBranch %75
%75 = OpLabel
%13 = OpLoad %6 %8
%15 = OpAccessChain %14 %12 %16
%17 = OpLoad %9 %15
%19 = OpFOrdGreaterThan %18 %17 %20
%21 = OpFOrdGreaterThan %18 %17 %22
OpBeginInvocationInterlockEXT
OpSelectionMerge %77 None
OpBranchConditional %19 %76 %77
%76 = OpLabel
%24 = OpImageRead %23 %13 %16
%25 = OpCompositeExtract %5 %24 0
%27 = OpIAdd %5 %16 %28
%26 = OpImageRead %23 %13 %27
%29 = OpCompositeExtract %5 %26 0
%31 = OpCompositeConstruct %30 %25 %29
%32 = OpCompositeExtract %5 %31 0
%33 = OpCompositeExtract %5 %31 1
%35 = OpCompositeConstruct %23 %34 %34 %34 %34
OpImageWrite %13 %16 %35
OpBranch %77
%77 = OpLabel
%36 = OpPhi %5 %37 %75 %32 %76
%38 = OpPhi %5 %37 %75 %33 %76
OpSelectionMerge %79 None
OpBranchConditional %21 %78 %79
%78 = OpLabel
%39 = OpIMul %5 %38 %40
%41 = OpIAdd %5 %39 %36
%42 = OpShiftLeftLogical %5 %41 %43
%44 = OpIMul %5 %41 %43
%45 = OpImageRead %23 %13 %44
%46 = OpCompositeExtract %5 %45 0
%48 = OpIAdd %5 %44 %28
%47 = OpImageRead %23 %13 %48
%49 = OpCompositeExtract %5 %47 0
%51 = OpIAdd %5 %44 %52
%50 = OpImageRead %23 %13 %51
%53 = OpCompositeExtract %5 %50 0
%55 = OpIAdd %5 %44 %56
%54 = OpImageRead %23 %13 %55
%57 = OpCompositeExtract %5 %54 0
%58 = OpCompositeConstruct %23 %46 %49 %53 %57
%59 = OpCompositeExtract %5 %58 0
%60 = OpCompositeExtract %5 %58 1
%61 = OpCompositeExtract %5 %58 2
%62 = OpCompositeExtract %5 %58 3
%63 = OpIAdd %5 %59 %28
%64 = OpIAdd %5 %60 %52
%65 = OpIAdd %5 %61 %56
%66 = OpIAdd %5 %62 %43
%67 = OpIMul %5 %41 %43
%68 = OpCompositeConstruct %23 %63 %63 %63 %63
OpImageWrite %13 %67 %68
%69 = OpCompositeConstruct %23 %64 %64 %64 %64
%70 = OpIAdd %5 %67 %28
OpImageWrite %13 %70 %69
%71 = OpCompositeConstruct %23 %65 %65 %65 %65
%72 = OpIAdd %5 %67 %52
OpImageWrite %13 %72 %71
%73 = OpCompositeConstruct %23 %66 %66 %66 %66
%74 = OpIAdd %5 %67 %56
OpImageWrite %13 %74 %73
OpBranch %79
%79 = OpLabel
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/rov/rov.sm66.frag
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _9[];
layout(set = 0, binding = 0, r32ui) uniform coherent uimageBuffer _12[];
layout(set = 0, binding = 0, r32f) uniform coherent imageBuffer _17[];
layout(set = 0, binding = 0, r32f) uniform coherent image1D _21[];
layout(set = 0, binding = 0, r32f) uniform coherent image2D _25[];

void main()
{
    uint _36 = uint(gl_FragCoord.x);
    uint _37 = uint(gl_FragCoord.y);
    uint _43 = (_37 * 1000u) + _36;
    uint _46 = _43 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _48 = imageLoad(_9[0u], int(_46));
    uvec4 _50 = imageLoad(_9[0u], int(_46 + 1u));
    uvec4 _53 = imageLoad(_9[0u], int(_46 + 2u));
    uvec4 _57 = imageLoad(_9[0u], int(_46 + 3u));
    uvec4 _61 = uvec4(_48.x, _50.x, _53.x, _57.x);
    uint _70 = _43 * 4u;
    imageStore(_9[0u], int(_70), uvec4(_61.x + 1u));
    imageStore(_9[0u], int(_70 + 1u), uvec4(_61.y + 2u));
    imageStore(_9[0u], int(_70 + 2u), uvec4(_61.z + 3u));
    imageStore(_9[0u], int(_70 + 3u), uvec4(_61.w + 4u));
    uint _80 = _43 * 4u;
    uvec4 _81 = imageLoad(_12[1u], int(_80));
    uvec4 _83 = imageLoad(_12[1u], int(_80 + 1u));
    uvec4 _86 = imageLoad(_12[1u], int(_80 + 2u));
    uvec4 _89 = imageLoad(_12[1u], int(_80 + 3u));
    vec4 _93 = uintBitsToFloat(uvec4(_81.x, _83.x, _86.x, _89.x));
    uint _106 = _43 * 4u;
    imageStore(_12[1u], int(_106), uvec4(floatBitsToUint(_93.x + 1.0)));
    imageStore(_12[1u], int(_106 + 1u), uvec4(floatBitsToUint(_93.y + 2.0)));
    imageStore(_12[1u], int(_106 + 2u), uvec4(floatBitsToUint(_93.z + 3.0)));
    imageStore(_12[1u], int(_106 + 3u), uvec4(floatBitsToUint(_93.w + 4.0)));
    vec4 _121 = imageLoad(_17[2u], int(_43));
    imageStore(_17[2u], int(_43), vec4(_121.x + 1.0, _121.y + 2.0, _121.z + 3.0, _121.w + 4.0));
    vec4 _134 = imageLoad(_21[3u], int(_43));
    imageStore(_21[3u], int(_43), vec4(_134.x + 1.0, _134.y + 2.0, _134.z + 3.0, _134.w + 4.0));
    vec4 _147 = imageLoad(_25[4u], ivec2(uvec2(_36, _37)));
    imageStore(_25[4u], ivec2(uvec2(_36, _37)), vec4(_147.x + 1.0, _147.y + 2.0, _147.z + 3.0, _147.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 162
; Schema: 0
OpCapability Shader
OpCapability Image1D
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %28
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %28 "SV_Position"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 Coherent
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %12 Coherent
OpDecorate %17 DescriptorSet 0
OpDecorate %17 Binding 0
OpDecorate %17 Coherent
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %21 Coherent
OpDecorate %25 DescriptorSet 0
OpDecorate %25 Binding 0
OpDecorate %25 Coherent
OpDecorate %28 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeRuntimeArray %6
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeFloat 32
%14 = OpTypeImage %13 Buffer 0 0 0 2 R32f
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %13 1D 0 0 0 2 R32f
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %13 2D 0 0 0 2 R32f
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer UniformConstant %23
%25 = OpVariable %24 UniformConstant
%26 = OpTypeVector %13 4
%27 = OpTypePointer Input %26
%28 = OpVariable %27 Input
%29 = OpTypePointer Input %13
%31 = OpConstant %5 0
%34 = OpConstant %5 1
%38 = OpTypePointer UniformConstant %6
%42 = OpConstant %5 1000
%45 = OpConstant %5 4
%47 = OpTypeVector %5 4
%55 = OpConstant %5 2
%59 = OpConstant %5 3
%99 = OpConstant %13 1
%101 = OpConstant %13 2
%103 = OpConstant %13 3
%105 = OpConstant %13 4
%118 = OpTypePointer UniformConstant %14
%131 = OpTypePointer UniformConstant %18
%144 = OpTypePointer UniformConstant %22
%148 = OpTypeVector %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %160
%160 = OpLabel
%30 = OpAccessChain %29 %28 %31
%32 = OpLoad %13 %30
%33 = OpAccessChain %29 %28 %34
%35 = OpLoad %13 %33
%36 = OpConvertFToU %5 %32
%37 = OpConvertFToU %5 %35
%39 = OpAccessChain %38 %9 %31
%40 = OpLoad %6 %39
%41 = OpIMul %5 %37 %42
%43 = OpIAdd %5 %41 %36
%44 = OpShiftLeftLogical %5 %43 %45
%46 = OpIMul %5 %43 %45
OpBeginInvocationInterlockEXT
%48 = OpImageRead %47 %40 %46
%49 = OpCompositeExtract %5 %48 0
%51 = OpIAdd %5 %46 %34
%50 = OpImageRead %47 %40 %51
%52 = OpCompositeExtract %5 %50 0
%54 = OpIAdd %5 %46 %55
%53 = OpImageRead %47 %40 %54
%56 = OpCompositeExtract %5 %53 0
%58 = OpIAdd %5 %46 %59
%57 = OpImageRead %47 %40 %58
%60 = OpCompositeExtract %5 %57 0
%61 = OpCompositeConstruct %47 %49 %52 %56 %60
%62 = OpCompositeExtract %5 %61 0
%63 = OpCompositeExtract %5 %61 1
%64 = OpCompositeExtract %5 %61 2
%65 = OpCompositeExtract %5 %61 3
%66 = OpIAdd %5 %62 %34
%67 = OpIAdd %5 %63 %55
%68 = OpIAdd %5 %64 %59
%69 = OpIAdd %5 %65 %45
%70 = OpIMul %5 %43 %45
%71 = OpCompositeConstruct %47 %66 %66 %66 %66
OpImageWrite %40 %70 %71
%72 = OpCompositeConstruct %47 %67 %67 %67 %67
%73 = OpIAdd %5 %70 %34
OpImageWrite %40 %73 %72
%74 = OpCompositeConstruct %47 %68 %68 %68 %68
%75 = OpIAdd %5 %70 %55
OpImageWrite %40 %75 %74
%76 = OpCompositeConstruct %47 %69 %69 %69 %69
%77 = OpIAdd %5 %70 %59
OpImageWrite %40 %77 %76
%78 = OpAccessChain %38 %12 %34
%79 = OpLoad %6 %78
%80 = OpIMul %5 %43 %45
%81 = OpImageRead %47 %79 %80
%82 = OpCompositeExtract %5 %81 0
%84 = OpIAdd %5 %80 %34
%83 = OpImageRead %47 %79 %84
%85 = OpCompositeExtract %5 %83 0
%87 = OpIAdd %5 %80 %55
%86 = OpImageRead %47 %79 %87
%88 = OpCompositeExtract %5 %86 0
%90 = OpIAdd %5 %80 %59
%89 = OpImageRead %47 %79 %90
%91 = OpCompositeExtract %5 %89 0
%92 = OpCompositeConstruct %47 %82 %85 %88 %91
%93 = OpBitcast %26 %92
%94 = OpCompositeExtract %13 %93 0
%95 = OpCompositeExtract %13 %93 1
%96 = OpCompositeExtract %13 %93 2
%97 = OpCompositeExtract %13 %93 3
%98 = OpFAdd %13 %94 %99
%100 = OpFAdd %13 %95 %101
%102 = OpFAdd %13 %96 %103
%104 = OpFAdd %13 %97 %105
%106 = OpIMul %5 %43 %45
%107 = OpBitcast %5 %98
%108 = OpBitcast %5 %100
%109 = OpBitcast %5 %102
%110 = OpBitcast %5 %104
%111 = OpCompositeConstruct %47 %107 %107 %107 %107
OpImageWrite %79 %106 %111
%112 = OpCompositeConstruct %47 %108 %108 %108 %108
%113 = OpIAdd %5 %106 %34
OpImageWrite %79 %113 %112
%114 = OpCompositeConstruct %47 %109 %109 %109 %109
%115 = OpIAdd %5 %106 %55
OpImageWrite %79 %115 %114
%116 = OpCompositeConstruct %47 %110 %110 %110 %110
%117 = OpIAdd %5 %106 %59
OpImageWrite %79 %117 %116
%119 = OpAccessChain %118 %17 %55
%120 = OpLoad %14 %119
%121 = OpImageRead %26 %120 %43
%122 = OpCompositeExtract %13 %121 0
%123 = OpCompositeExtract %13 %121 1
%124 = OpCompositeExtract %13 %121 2
%125 = OpCompositeExtract %13 %121 3
%126 = OpFAdd %13 %122 %99
%127 = OpFAdd %13 %123 %101
%128 = OpFAdd %13 %124 %103
%129 = OpFAdd %13 %125 %105
%130 = OpCompositeConstruct %26 %126 %127 %128 %129
OpImageWrite %120 %43 %130
%132 = OpAccessChain %131 %21 %59
%133 = OpLoad %18 %132
%134 = OpImageRead %26 %133 %43 None
%135 = OpCompositeExtract %13 %134 0
%136 = OpCompositeExtract %13 %134 1
%137 = OpCompositeExtract %13 %134 2
%138 = OpCompositeExtract %13 %134 3
%139 = OpFAdd %13 %135 %99
%140 = OpFAdd %13 %136 %101
%141 = OpFAdd %13 %137 %103
%142 = OpFAdd %13 %138 %105
%143 = OpCompositeConstruct %26 %139 %140 %141 %142
OpImageWrite %133 %43 %143
%145 = OpAccessChain %144 %25 %45
%146 = OpLoad %22 %145
%149 = OpCompositeConstruct %148 %36 %37
%147 = OpImageRead %26 %146 %149 None
%150 = OpCompositeExtract %13 %147 0
%151 = OpCompositeExtract %13 %147 1
%152 = OpCompositeExtract %13 %147 2
%153 = OpCompositeExtract %13 %147 3
%154 = OpFAdd %13 %150 %99
%155 = OpFAdd %13 %151 %101
%156 = OpFAdd %13 %152 %103
%157 = OpFAdd %13 %153 %105
%158 = OpCompositeConstruct %148 %36 %37
%159 = OpCompositeConstruct %26 %154 %155 %156 %157
OpImageWrite %146 %158 %159
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/sampler-feedback/sampler-feedback.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_image_int64 : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_samplerless_texture_functions : require
#extension GL_EXT_spirv_intrinsics : require
#extension GL_KHR_shader_subgroup_quad : require

struct ResType
{
    vec2 _m0;
    ivec2 _m1;
};

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 1) uniform texture2DArray _11;
layout(set = 1, binding = 0) uniform texture2D _14[];
layout(set = 2, binding = 0) uniform texture2DArray _17[];
layout(set = 0, binding = 0, r64ui) uniform readonly writeonly u64image2D _21;
layout(set = 0, binding = 1, r64ui) uniform readonly writeonly u64image2DArray _24;
layout(set = 1, binding = 0, r64ui) uniform readonly writeonly u64image2D _27[];
layout(set = 2, binding = 0, r64ui) uniform readonly writeonly u64image2DArray _30[];
layout(set = 0, binding = 0) uniform sampler _33;
layout(set = 1, binding = 0) uniform sampler _36[];

layout(location = 1) in vec2 GRADX;
layout(location = 1, component = 2) in vec2 GRADY;
layout(location = 2) in float CLAMP;
layout(location = 3) flat in uint IDX;

spirv_instruction(set = "GLSL.std.450", id = 81) float spvNClamp(float, float, float);
spirv_instruction(set = "GLSL.std.450", id = 81) vec2 spvNClamp(vec2, vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) vec3 spvNClamp(vec3, vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) vec4 spvNClamp(vec4, vec4, vec4);

void WriteFeedback(u64image2D img, ivec2 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _184 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _184;
            if (_184)
            {
                uint64_t _185 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _188 = imageAtomicOr(img, coord, _185);
                }
            }
        }
    }
}

void WriteFeedbackArray(u64image2DArray img, ivec3 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _847 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _847;
            if (_847)
            {
                uint64_t _848 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _851 = imageAtomicOr(img, coord, _848);
                }
            }
        }
    }
}

void WriteFeedbackNonUniform(u64image2D img, ivec2 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _1555 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _1555;
            if (_1555)
            {
                uint64_t _1556 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _1559 = imageAtomicOr(img, coord, _1556);
                }
            }
        }
    }
}

void WriteFeedbackArrayNonUniform(u64image2DArray img, ivec3 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _1688 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _1688;
            if (_1688)
            {
                uint64_t _1689 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _1692 = imageAtomicOr(img, coord, _1689);
                }
            }
        }
    }
}

void main()
{
    float _76 = 1.0 / gl_FragCoord.w;
    vec2 _83 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _85 = imageSize(_21) & ivec2(15);
    vec2 _93 = textureQueryLod(sampler2D(_8, _33), _83);
    float _94 = _93.x;
    int _95 = int(_94);
    int _98 = int(_94 + 0.9970703125);
    vec2 _103 = abs(dFdx(_83));
    vec2 _104 = abs(dFdy(_83));
    vec2 _111 = vec2(max(_103.x, _104.x), max(_103.y, _104.y));
    vec2 _112 = fract(_83);
    vec2 _113 = vec2(textureSize(_8, _95));
    ivec2 _115 = -_85;
    vec2 _117 = _112 * ldexp(_113, _115);
    ResType _123;
    _123._m0 = frexp((_111 * _113) - vec2(0.00390625), _123._m1);
    vec2 _134 = min(ldexp(vec2(0.5), clamp(_123._m1, ivec2(0), ivec2(4)) + _115), vec2(0.5));
    ivec2 _140 = ivec2(max(_117 - _134, vec2(0.0)));
    bvec2 _142 = notEqual(_140, ivec2(_117 + _134));
    bool _143 = _142.x;
    bool _144 = _142.y;
    WriteFeedback(_21, _140, uint64_t(((1 | (_143 ? 2 : 0)) | (_144 ? 4 : 0)) | ((_143 && _144) ? 8 : 0)) << uint64_t(_95 * 4), !gl_HelperInvocation);
    vec2 _191 = vec2(textureSize(_8, _98));
    ivec2 _193 = -_85;
    vec2 _195 = _112 * ldexp(_191, _193);
    ResType _198;
    _198._m0 = frexp((_111 * _191) - vec2(0.00390625), _198._m1);
    vec2 _203 = min(ldexp(vec2(0.5), clamp(_198._m1, ivec2(0), ivec2(4)) + _193), vec2(0.5));
    ivec2 _207 = ivec2(max(_195 - _203, vec2(0.0)));
    bvec2 _209 = notEqual(_207, ivec2(_195 + _203));
    bool _210 = _209.x;
    bool _211 = _209.y;
    WriteFeedback(_21, _207, uint64_t(((1 | (_210 ? 2 : 0)) | (_211 ? 4 : 0)) | ((_210 && _211) ? 8 : 0)) << uint64_t(_98 * 4), (!gl_HelperInvocation) && (_95 != _98));
    vec2 _227 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _229 = imageSize(_21) & ivec2(15);
    float _235 = min(max(textureQueryLod(sampler2D(_8, _33), _227).x, CLAMP), 14.0);
    int _237 = int(_235);
    int _239 = int(_235 + 0.9970703125);
    vec2 _244 = abs(dFdx(_227));
    vec2 _245 = abs(dFdy(_227));
    vec2 _252 = vec2(max(_244.x, _245.x), max(_244.y, _245.y));
    vec2 _253 = fract(_227);
    vec2 _254 = vec2(textureSize(_8, _237));
    ivec2 _256 = -_229;
    vec2 _258 = _253 * ldexp(_254, _256);
    ResType _261;
    _261._m0 = frexp((_252 * _254) - vec2(0.00390625), _261._m1);
    vec2 _266 = min(ldexp(vec2(0.5), clamp(_261._m1, ivec2(0), ivec2(4)) + _256), vec2(0.5));
    ivec2 _270 = ivec2(max(_258 - _266, vec2(0.0)));
    bvec2 _272 = notEqual(_270, ivec2(_258 + _266));
    bool _273 = _272.x;
    bool _274 = _272.y;
    WriteFeedback(_21, _270, uint64_t(((1 | (_273 ? 2 : 0)) | (_274 ? 4 : 0)) | ((_273 && _274) ? 8 : 0)) << uint64_t(_237 * 4), !gl_HelperInvocation);
    vec2 _289 = vec2(textureSize(_8, _239));
    ivec2 _291 = -_229;
    vec2 _293 = _253 * ldexp(_289, _291);
    ResType _296;
    _296._m0 = frexp((_252 * _289) - vec2(0.00390625), _296._m1);
    vec2 _301 = min(ldexp(vec2(0.5), clamp(_296._m1, ivec2(0), ivec2(4)) + _291), vec2(0.5));
    ivec2 _305 = ivec2(max(_293 - _301, vec2(0.0)));
    bvec2 _307 = notEqual(_305, ivec2(_293 + _301));
    bool _308 = _307.x;
    bool _309 = _307.y;
    WriteFeedback(_21, _305, uint64_t(((1 | (_308 ? 2 : 0)) | (_309 ? 4 : 0)) | ((_308 && _309) ? 8 : 0)) << uint64_t(_239 * 4), (!gl_HelperInvocation) && (_237 != _239));
    ivec2 _327 = imageSize(_21) & ivec2(15);
    int _329 = textureQueryLevels(_8) - 1;
    float _331 = spvNClamp(gl_FragCoord.z, 0.0, float(_329));
    int _332 = int(_331);
    int _334 = int(_331 + 0.9970703125);
    vec2 _337 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _340 = -_327;
    vec2 _342 = _337 * ldexp(vec2(textureSize(_8, _332)), _340);
    vec2 _343 = ldexp(vec2(0.5), _340);
    ivec2 _347 = ivec2(max(_342 - _343, vec2(0.0)));
    bvec2 _349 = notEqual(_347, ivec2(_342 + _343));
    bool _350 = _349.x;
    bool _351 = _349.y;
    WriteFeedback(_21, _347, uint64_t(((1 | (_350 ? 2 : 0)) | (_351 ? 4 : 0)) | ((_350 && _351) ? 8 : 0)) << uint64_t(_332 * 4), !gl_HelperInvocation);
    ivec2 _368 = -_327;
    vec2 _370 = _337 * ldexp(vec2(textureSize(_8, _334)), _368);
    vec2 _371 = ldexp(vec2(0.5), _368);
    ivec2 _375 = ivec2(max(_370 - _371, vec2(0.0)));
    bvec2 _377 = notEqual(_375, ivec2(_370 + _371));
    bool _378 = _377.x;
    bool _379 = _377.y;
    WriteFeedback(_21, _375, uint64_t(((1 | (_378 ? 2 : 0)) | (_379 ? 4 : 0)) | ((_378 && _379) ? 8 : 0)) << uint64_t(_334 * 4), (!gl_HelperInvocation) && (_332 != _334));
    ivec2 _397 = imageSize(_21) & ivec2(15);
    int _399 = textureQueryLevels(_8) - 1;
    vec2 _400 = vec2(textureSize(_8, 0));
    vec2 _404 = _400 * vec2(GRADX.x, GRADX.y);
    vec2 _405 = _400 * vec2(GRADY.x, GRADY.y);
    float _412 = spvNClamp(log2(max(dot(_404, _404), dot(_405, _405))) * 0.5, 0.0, float(_399));
    int _413 = int(_412);
    int _415 = int(_412 + 0.9970703125);
    vec2 _418 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _421 = -_397;
    vec2 _423 = _418 * ldexp(vec2(textureSize(_8, _413)), _421);
    vec2 _424 = ldexp(vec2(0.5), _421);
    ivec2 _428 = ivec2(max(_423 - _424, vec2(0.0)));
    bvec2 _430 = notEqual(_428, ivec2(_423 + _424));
    bool _431 = _430.x;
    bool _432 = _430.y;
    WriteFeedback(_21, _428, uint64_t(((1 | (_431 ? 2 : 0)) | (_432 ? 4 : 0)) | ((_431 && _432) ? 8 : 0)) << uint64_t(_413 * 4), !gl_HelperInvocation);
    ivec2 _449 = -_397;
    vec2 _451 = _418 * ldexp(vec2(textureSize(_8, _415)), _449);
    vec2 _452 = ldexp(vec2(0.5), _449);
    ivec2 _456 = ivec2(max(_451 - _452, vec2(0.0)));
    bvec2 _458 = notEqual(_456, ivec2(_451 + _452));
    bool _459 = _458.x;
    bool _460 = _458.y;
    WriteFeedback(_21, _456, uint64_t(((1 | (_459 ? 2 : 0)) | (_460 ? 4 : 0)) | ((_459 && _460) ? 8 : 0)) << uint64_t(_415 * 4), (!gl_HelperInvocation) && (_413 != _415));
    ivec2 _478 = imageSize(_21) & ivec2(15);
    int _480 = textureQueryLevels(_8) - 1;
    vec2 _481 = vec2(textureSize(_8, 0));
    vec2 _485 = _481 * vec2(GRADX.x, GRADX.y);
    vec2 _486 = _481 * vec2(GRADY.x, GRADY.y);
    float _495 = min(max(spvNClamp(log2(max(dot(_485, _485), dot(_486, _486))) * 0.5, 0.0, float(_480)), CLAMP), 14.0);
    int _496 = int(_495);
    int _498 = int(_495 + 0.9970703125);
    vec2 _501 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _504 = -_478;
    vec2 _506 = _501 * ldexp(vec2(textureSize(_8, _496)), _504);
    vec2 _507 = ldexp(vec2(0.5), _504);
    ivec2 _511 = ivec2(max(_506 - _507, vec2(0.0)));
    bvec2 _513 = notEqual(_511, ivec2(_506 + _507));
    bool _514 = _513.x;
    bool _515 = _513.y;
    WriteFeedback(_21, _511, uint64_t(((1 | (_514 ? 2 : 0)) | (_515 ? 4 : 0)) | ((_514 && _515) ? 8 : 0)) << uint64_t(_496 * 4), !gl_HelperInvocation);
    ivec2 _532 = -_478;
    vec2 _534 = _501 * ldexp(vec2(textureSize(_8, _498)), _532);
    vec2 _535 = ldexp(vec2(0.5), _532);
    ivec2 _539 = ivec2(max(_534 - _535, vec2(0.0)));
    bvec2 _541 = notEqual(_539, ivec2(_534 + _535));
    bool _542 = _541.x;
    bool _543 = _541.y;
    WriteFeedback(_21, _539, uint64_t(((1 | (_542 ? 2 : 0)) | (_543 ? 4 : 0)) | ((_542 && _543) ? 8 : 0)) << uint64_t(_498 * 4), (!gl_HelperInvocation) && (_496 != _498));
    vec2 _559 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _561 = imageSize(_21) & ivec2(15);
    float _564 = exp2(gl_FragCoord.z);
    vec2 _567 = textureQueryLod(sampler2D(_8, _33), _559 * subgroupQuadBroadcast(_564, 0u));
    float _568 = _567.x;
    int _569 = int(_568);
    int _571 = int(_568 + 0.9970703125);
    vec2 _576 = abs(dFdx(_559));
    vec2 _577 = abs(dFdy(_559));
    vec2 _585 = vec2(max(_576.x, _577.x), max(_576.y, _577.y)) * _564;
    vec2 _586 = fract(_559);
    vec2 _587 = vec2(textureSize(_8, _569));
    ivec2 _589 = -_561;
    vec2 _591 = _586 * ldexp(_587, _589);
    ResType _594;
    _594._m0 = frexp((_585 * _587) - vec2(0.00390625), _594._m1);
    vec2 _599 = min(ldexp(vec2(0.5), clamp(_594._m1, ivec2(0), ivec2(4)) + _589), vec2(0.5));
    ivec2 _603 = ivec2(max(_591 - _599, vec2(0.0)));
    bvec2 _605 = notEqual(_603, ivec2(_591 + _599));
    bool _606 = _605.x;
    bool _607 = _605.y;
    WriteFeedback(_21, _603, uint64_t(((1 | (_606 ? 2 : 0)) | (_607 ? 4 : 0)) | ((_606 && _607) ? 8 : 0)) << uint64_t(_569 * 4), !gl_HelperInvocation);
    vec2 _622 = vec2(textureSize(_8, _571));
    ivec2 _624 = -_561;
    vec2 _626 = _586 * ldexp(_622, _624);
    ResType _629;
    _629._m0 = frexp((_585 * _622) - vec2(0.00390625), _629._m1);
    vec2 _634 = min(ldexp(vec2(0.5), clamp(_629._m1, ivec2(0), ivec2(4)) + _624), vec2(0.5));
    ivec2 _638 = ivec2(max(_626 - _634, vec2(0.0)));
    bvec2 _640 = notEqual(_638, ivec2(_626 + _634));
    bool _641 = _640.x;
    bool _642 = _640.y;
    WriteFeedback(_21, _638, uint64_t(((1 | (_641 ? 2 : 0)) | (_642 ? 4 : 0)) | ((_641 && _642) ? 8 : 0)) << uint64_t(_571 * 4), (!gl_HelperInvocation) && (_569 != _571));
    vec2 _658 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _660 = imageSize(_21) & ivec2(15);
    float _663 = exp2(gl_FragCoord.z);
    float _669 = min(max(textureQueryLod(sampler2D(_8, _33), _658 * subgroupQuadBroadcast(_663, 0u)).x, CLAMP), 14.0);
    int _670 = int(_669);
    int _672 = int(_669 + 0.9970703125);
    vec2 _677 = abs(dFdx(_658));
    vec2 _678 = abs(dFdy(_658));
    vec2 _686 = vec2(max(_677.x, _678.x), max(_677.y, _678.y)) * _663;
    vec2 _687 = fract(_658);
    vec2 _688 = vec2(textureSize(_8, _670));
    ivec2 _690 = -_660;
    vec2 _692 = _687 * ldexp(_688, _690);
    ResType _695;
    _695._m0 = frexp((_686 * _688) - vec2(0.00390625), _695._m1);
    vec2 _700 = min(ldexp(vec2(0.5), clamp(_695._m1, ivec2(0), ivec2(4)) + _690), vec2(0.5));
    ivec2 _704 = ivec2(max(_692 - _700, vec2(0.0)));
    bvec2 _706 = notEqual(_704, ivec2(_692 + _700));
    bool _707 = _706.x;
    bool _708 = _706.y;
    WriteFeedback(_21, _704, uint64_t(((1 | (_707 ? 2 : 0)) | (_708 ? 4 : 0)) | ((_707 && _708) ? 8 : 0)) << uint64_t(_670 * 4), !gl_HelperInvocation);
    vec2 _723 = vec2(textureSize(_8, _672));
    ivec2 _725 = -_660;
    vec2 _727 = _687 * ldexp(_723, _725);
    ResType _730;
    _730._m0 = frexp((_686 * _723) - vec2(0.00390625), _730._m1);
    vec2 _735 = min(ldexp(vec2(0.5), clamp(_730._m1, ivec2(0), ivec2(4)) + _725), vec2(0.5));
    ivec2 _739 = ivec2(max(_727 - _735, vec2(0.0)));
    bvec2 _741 = notEqual(_739, ivec2(_727 + _735));
    bool _742 = _741.x;
    bool _743 = _741.y;
    WriteFeedback(_21, _739, uint64_t(((1 | (_742 ? 2 : 0)) | (_743 ? 4 : 0)) | ((_742 && _743) ? 8 : 0)) << uint64_t(_672 * 4), (!gl_HelperInvocation) && (_670 != _672));
    int _760 = int(roundEven(gl_FragCoord.z));
    vec2 _761 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _765 = imageSize(_24).xy & ivec2(15);
    vec2 _770 = textureQueryLod(sampler2DArray(_11, _33), _761);
    float _771 = _770.x;
    int _772 = int(_771);
    int _774 = int(_771 + 0.9970703125);
    vec2 _779 = abs(dFdx(_761));
    vec2 _780 = abs(dFdy(_761));
    vec2 _787 = vec2(max(_779.x, _780.x), max(_779.y, _780.y));
    vec2 _788 = fract(_761);
    vec2 _789 = vec2(textureSize(_11, _772).xy);
    ivec2 _792 = -_765;
    vec2 _794 = _788 * ldexp(_789, _792);
    ResType _797;
    _797._m0 = frexp((_787 * _789) - vec2(0.00390625), _797._m1);
    vec2 _802 = min(ldexp(vec2(0.5), clamp(_797._m1, ivec2(0), ivec2(4)) + _792), vec2(0.5));
    ivec2 _806 = ivec2(max(_794 - _802, vec2(0.0)));
    bvec2 _808 = notEqual(_806, ivec2(_794 + _802));
    bool _809 = _808.x;
    bool _810 = _808.y;
    WriteFeedbackArray(_24, ivec3(_806, _760), uint64_t(((1 | (_809 ? 2 : 0)) | (_810 ? 4 : 0)) | ((_809 && _810) ? 8 : 0)) << uint64_t(_772 * 4), !gl_HelperInvocation);
    vec2 _854 = vec2(textureSize(_11, _774).xy);
    ivec2 _857 = -_765;
    vec2 _859 = _788 * ldexp(_854, _857);
    ResType _862;
    _862._m0 = frexp((_787 * _854) - vec2(0.00390625), _862._m1);
    vec2 _867 = min(ldexp(vec2(0.5), clamp(_862._m1, ivec2(0), ivec2(4)) + _857), vec2(0.5));
    ivec2 _871 = ivec2(max(_859 - _867, vec2(0.0)));
    bvec2 _873 = notEqual(_871, ivec2(_859 + _867));
    bool _874 = _873.x;
    bool _875 = _873.y;
    WriteFeedbackArray(_24, ivec3(_871, _760), uint64_t(((1 | (_874 ? 2 : 0)) | (_875 ? 4 : 0)) | ((_874 && _875) ? 8 : 0)) << uint64_t(_774 * 4), (!gl_HelperInvocation) && (_772 != _774));
    int _893 = int(roundEven(gl_FragCoord.z));
    vec2 _894 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _897 = imageSize(_24).xy & ivec2(15);
    float _903 = min(max(textureQueryLod(sampler2DArray(_11, _33), _894).x, CLAMP), 14.0);
    int _904 = int(_903);
    int _906 = int(_903 + 0.9970703125);
    vec2 _911 = abs(dFdx(_894));
    vec2 _912 = abs(dFdy(_894));
    vec2 _919 = vec2(max(_911.x, _912.x), max(_911.y, _912.y));
    vec2 _920 = fract(_894);
    vec2 _921 = vec2(textureSize(_11, _904).xy);
    ivec2 _924 = -_897;
    vec2 _926 = _920 * ldexp(_921, _924);
    ResType _929;
    _929._m0 = frexp((_919 * _921) - vec2(0.00390625), _929._m1);
    vec2 _934 = min(ldexp(vec2(0.5), clamp(_929._m1, ivec2(0), ivec2(4)) + _924), vec2(0.5));
    ivec2 _938 = ivec2(max(_926 - _934, vec2(0.0)));
    bvec2 _940 = notEqual(_938, ivec2(_926 + _934));
    bool _941 = _940.x;
    bool _942 = _940.y;
    WriteFeedbackArray(_24, ivec3(_938, _893), uint64_t(((1 | (_941 ? 2 : 0)) | (_942 ? 4 : 0)) | ((_941 && _942) ? 8 : 0)) << uint64_t(_904 * 4), !gl_HelperInvocation);
    vec2 _958 = vec2(textureSize(_11, _906).xy);
    ivec2 _961 = -_897;
    vec2 _963 = _920 * ldexp(_958, _961);
    ResType _966;
    _966._m0 = frexp((_919 * _958) - vec2(0.00390625), _966._m1);
    vec2 _971 = min(ldexp(vec2(0.5), clamp(_966._m1, ivec2(0), ivec2(4)) + _961), vec2(0.5));
    ivec2 _975 = ivec2(max(_963 - _971, vec2(0.0)));
    bvec2 _977 = notEqual(_975, ivec2(_963 + _971));
    bool _978 = _977.x;
    bool _979 = _977.y;
    WriteFeedbackArray(_24, ivec3(_975, _893), uint64_t(((1 | (_978 ? 2 : 0)) | (_979 ? 4 : 0)) | ((_978 && _979) ? 8 : 0)) << uint64_t(_906 * 4), (!gl_HelperInvocation) && (_904 != _906));
    int _997 = int(roundEven(gl_FragCoord.z));
    ivec2 _1001 = imageSize(_24).xy & ivec2(15);
    int _1003 = textureQueryLevels(_11) - 1;
    float _1005 = spvNClamp(_76, 0.0, float(_1003));
    int _1006 = int(_1005);
    int _1008 = int(_1005 + 0.9970703125);
    vec2 _1011 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _1015 = -_1001;
    vec2 _1017 = _1011 * ldexp(vec2(textureSize(_11, _1006).xy), _1015);
    vec2 _1018 = ldexp(vec2(0.5), _1015);
    ivec2 _1022 = ivec2(max(_1017 - _1018, vec2(0.0)));
    bvec2 _1024 = notEqual(_1022, ivec2(_1017 + _1018));
    bool _1025 = _1024.x;
    bool _1026 = _1024.y;
    WriteFeedbackArray(_24, ivec3(_1022, _997), uint64_t(((1 | (_1025 ? 2 : 0)) | (_1026 ? 4 : 0)) | ((_1025 && _1026) ? 8 : 0)) << uint64_t(_1006 * 4), !gl_HelperInvocation);
    ivec2 _1045 = -_1001;
    vec2 _1047 = _1011 * ldexp(vec2(textureSize(_11, _1008).xy), _1045);
    vec2 _1048 = ldexp(vec2(0.5), _1045);
    ivec2 _1052 = ivec2(max(_1047 - _1048, vec2(0.0)));
    bvec2 _1054 = notEqual(_1052, ivec2(_1047 + _1048));
    bool _1055 = _1054.x;
    bool _1056 = _1054.y;
    WriteFeedbackArray(_24, ivec3(_1052, _997), uint64_t(((1 | (_1055 ? 2 : 0)) | (_1056 ? 4 : 0)) | ((_1055 && _1056) ? 8 : 0)) << uint64_t(_1008 * 4), (!gl_HelperInvocation) && (_1006 != _1008));
    int _1074 = int(roundEven(gl_FragCoord.z));
    ivec2 _1078 = imageSize(_24).xy & ivec2(15);
    int _1080 = textureQueryLevels(_11) - 1;
    vec2 _1081 = vec2(textureSize(_11, 0).xy);
    vec2 _1086 = _1081 * vec2(GRADX.x, GRADX.y);
    vec2 _1087 = _1081 * vec2(GRADY.x, GRADY.y);
    float _1094 = spvNClamp(log2(max(dot(_1086, _1086), dot(_1087, _1087))) * 0.5, 0.0, float(_1080));
    int _1095 = int(_1094);
    int _1097 = int(_1094 + 0.9970703125);
    vec2 _1100 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _1104 = -_1078;
    vec2 _1106 = _1100 * ldexp(vec2(textureSize(_11, _1095).xy), _1104);
    vec2 _1107 = ldexp(vec2(0.5), _1104);
    ivec2 _1111 = ivec2(max(_1106 - _1107, vec2(0.0)));
    bvec2 _1113 = notEqual(_1111, ivec2(_1106 + _1107));
    bool _1114 = _1113.x;
    bool _1115 = _1113.y;
    WriteFeedbackArray(_24, ivec3(_1111, _1074), uint64_t(((1 | (_1114 ? 2 : 0)) | (_1115 ? 4 : 0)) | ((_1114 && _1115) ? 8 : 0)) << uint64_t(_1095 * 4), !gl_HelperInvocation);
    ivec2 _1134 = -_1078;
    vec2 _1136 = _1100 * ldexp(vec2(textureSize(_11, _1097).xy), _1134);
    vec2 _1137 = ldexp(vec2(0.5), _1134);
    ivec2 _1141 = ivec2(max(_1136 - _1137, vec2(0.0)));
    bvec2 _1143 = notEqual(_1141, ivec2(_1136 + _1137));
    bool _1144 = _1143.x;
    bool _1145 = _1143.y;
    WriteFeedbackArray(_24, ivec3(_1141, _1074), uint64_t(((1 | (_1144 ? 2 : 0)) | (_1145 ? 4 : 0)) | ((_1144 && _1145) ? 8 : 0)) << uint64_t(_1097 * 4), (!gl_HelperInvocation) && (_1095 != _1097));
    int _1163 = int(roundEven(gl_FragCoord.z));
    ivec2 _1167 = imageSize(_24).xy & ivec2(15);
    int _1169 = textureQueryLevels(_11) - 1;
    vec2 _1170 = vec2(textureSize(_11, 0).xy);
    vec2 _1175 = _1170 * vec2(GRADX.x, GRADX.y);
    vec2 _1176 = _1170 * vec2(GRADY.x, GRADY.y);
    float _1185 = min(max(spvNClamp(log2(max(dot(_1175, _1175), dot(_1176, _1176))) * 0.5, 0.0, float(_1169)), CLAMP), 14.0);
    int _1186 = int(_1185);
    int _1188 = int(_1185 + 0.9970703125);
    vec2 _1191 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _1195 = -_1167;
    vec2 _1197 = _1191 * ldexp(vec2(textureSize(_11, _1186).xy), _1195);
    vec2 _1198 = ldexp(vec2(0.5), _1195);
    ivec2 _1202 = ivec2(max(_1197 - _1198, vec2(0.0)));
    bvec2 _1204 = notEqual(_1202, ivec2(_1197 + _1198));
    bool _1205 = _1204.x;
    bool _1206 = _1204.y;
    WriteFeedbackArray(_24, ivec3(_1202, _1163), uint64_t(((1 | (_1205 ? 2 : 0)) | (_1206 ? 4 : 0)) | ((_1205 && _1206) ? 8 : 0)) << uint64_t(_1186 * 4), !gl_HelperInvocation);
    ivec2 _1225 = -_1167;
    vec2 _1227 = _1191 * ldexp(vec2(textureSize(_11, _1188).xy), _1225);
    vec2 _1228 = ldexp(vec2(0.5), _1225);
    ivec2 _1232 = ivec2(max(_1227 - _1228, vec2(0.0)));
    bvec2 _1234 = notEqual(_1232, ivec2(_1227 + _1228));
    bool _1235 = _1234.x;
    bool _1236 = _1234.y;
    WriteFeedbackArray(_24, ivec3(_1232, _1163), uint64_t(((1 | (_1235 ? 2 : 0)) | (_1236 ? 4 : 0)) | ((_1235 && _1236) ? 8 : 0)) << uint64_t(_1188 * 4), (!gl_HelperInvocation) && (_1186 != _1188));
    int _1254 = int(roundEven(gl_FragCoord.z));
    vec2 _1255 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1258 = imageSize(_24).xy & ivec2(15);
    float _1261 = exp2(gl_FragCoord.z);
    vec2 _1264 = textureQueryLod(sampler2DArray(_11, _33), _1255 * subgroupQuadBroadcast(_1261, 0u));
    float _1265 = _1264.x;
    int _1266 = int(_1265);
    int _1268 = int(_1265 + 0.9970703125);
    vec2 _1273 = abs(dFdx(_1255));
    vec2 _1274 = abs(dFdy(_1255));
    vec2 _1282 = vec2(max(_1273.x, _1274.x), max(_1273.y, _1274.y)) * _1261;
    vec2 _1283 = fract(_1255);
    vec2 _1284 = vec2(textureSize(_11, _1266).xy);
    ivec2 _1287 = -_1258;
    vec2 _1289 = _1283 * ldexp(_1284, _1287);
    ResType _1292;
    _1292._m0 = frexp((_1282 * _1284) - vec2(0.00390625), _1292._m1);
    vec2 _1297 = min(ldexp(vec2(0.5), clamp(_1292._m1, ivec2(0), ivec2(4)) + _1287), vec2(0.5));
    ivec2 _1301 = ivec2(max(_1289 - _1297, vec2(0.0)));
    bvec2 _1303 = notEqual(_1301, ivec2(_1289 + _1297));
    bool _1304 = _1303.x;
    bool _1305 = _1303.y;
    WriteFeedbackArray(_24, ivec3(_1301, _1254), uint64_t(((1 | (_1304 ? 2 : 0)) | (_1305 ? 4 : 0)) | ((_1304 && _1305) ? 8 : 0)) << uint64_t(_1266 * 4), !gl_HelperInvocation);
    vec2 _1321 = vec2(textureSize(_11, _1268).xy);
    ivec2 _1324 = -_1258;
    vec2 _1326 = _1283 * ldexp(_1321, _1324);
    ResType _1329;
    _1329._m0 = frexp((_1282 * _1321) - vec2(0.00390625), _1329._m1);
    vec2 _1334 = min(ldexp(vec2(0.5), clamp(_1329._m1, ivec2(0), ivec2(4)) + _1324), vec2(0.5));
    ivec2 _1338 = ivec2(max(_1326 - _1334, vec2(0.0)));
    bvec2 _1340 = notEqual(_1338, ivec2(_1326 + _1334));
    bool _1341 = _1340.x;
    bool _1342 = _1340.y;
    WriteFeedbackArray(_24, ivec3(_1338, _1254), uint64_t(((1 | (_1341 ? 2 : 0)) | (_1342 ? 4 : 0)) | ((_1341 && _1342) ? 8 : 0)) << uint64_t(_1268 * 4), (!gl_HelperInvocation) && (_1266 != _1268));
    int _1360 = int(roundEven(gl_FragCoord.z));
    vec2 _1361 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1364 = imageSize(_24).xy & ivec2(15);
    float _1367 = exp2(_76);
    float _1373 = min(max(textureQueryLod(sampler2DArray(_11, _33), _1361 * subgroupQuadBroadcast(_1367, 0u)).x, CLAMP), 14.0);
    int _1374 = int(_1373);
    int _1376 = int(_1373 + 0.9970703125);
    vec2 _1381 = abs(dFdx(_1361));
    vec2 _1382 = abs(dFdy(_1361));
    vec2 _1390 = vec2(max(_1381.x, _1382.x), max(_1381.y, _1382.y)) * _1367;
    vec2 _1391 = fract(_1361);
    vec2 _1392 = vec2(textureSize(_11, _1374).xy);
    ivec2 _1395 = -_1364;
    vec2 _1397 = _1391 * ldexp(_1392, _1395);
    ResType _1400;
    _1400._m0 = frexp((_1390 * _1392) - vec2(0.00390625), _1400._m1);
    vec2 _1405 = min(ldexp(vec2(0.5), clamp(_1400._m1, ivec2(0), ivec2(4)) + _1395), vec2(0.5));
    ivec2 _1409 = ivec2(max(_1397 - _1405, vec2(0.0)));
    bvec2 _1411 = notEqual(_1409, ivec2(_1397 + _1405));
    bool _1412 = _1411.x;
    bool _1413 = _1411.y;
    WriteFeedbackArray(_24, ivec3(_1409, _1360), uint64_t(((1 | (_1412 ? 2 : 0)) | (_1413 ? 4 : 0)) | ((_1412 && _1413) ? 8 : 0)) << uint64_t(_1374 * 4), !gl_HelperInvocation);
    vec2 _1429 = vec2(textureSize(_11, _1376).xy);
    ivec2 _1432 = -_1364;
    vec2 _1434 = _1391 * ldexp(_1429, _1432);
    ResType _1437;
    _1437._m0 = frexp((_1390 * _1429) - vec2(0.00390625), _1437._m1);
    vec2 _1442 = min(ldexp(vec2(0.5), clamp(_1437._m1, ivec2(0), ivec2(4)) + _1432), vec2(0.5));
    ivec2 _1446 = ivec2(max(_1434 - _1442, vec2(0.0)));
    bvec2 _1448 = notEqual(_1446, ivec2(_1434 + _1442));
    bool _1449 = _1448.x;
    bool _1450 = _1448.y;
    WriteFeedbackArray(_24, ivec3(_1446, _1360), uint64_t(((1 | (_1449 ? 2 : 0)) | (_1450 ? 4 : 0)) | ((_1449 && _1450) ? 8 : 0)) << uint64_t(_1376 * 4), (!gl_HelperInvocation) && (_1374 != _1376));
    uint _1467 = IDX + 0u;
    uint _1470 = IDX + 0u;
    uint _1473 = IDX + 0u;
    vec2 _1476 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1478 = imageSize(_27[nonuniformEXT(_1467)]) & ivec2(15);
    vec2 _1482 = textureQueryLod(nonuniformEXT(sampler2D(_14[_1470], _36[_1473])), _1476);
    float _1483 = _1482.x;
    int _1484 = int(_1483);
    int _1486 = int(_1483 + 0.9970703125);
    vec2 _1491 = abs(dFdx(_1476));
    vec2 _1492 = abs(dFdy(_1476));
    vec2 _1499 = vec2(max(_1491.x, _1492.x), max(_1491.y, _1492.y));
    vec2 _1500 = fract(_1476);
    vec2 _1501 = vec2(textureSize(_14[nonuniformEXT(_1470)], _1484));
    ivec2 _1503 = -_1478;
    vec2 _1505 = _1500 * ldexp(_1501, _1503);
    ResType _1508;
    _1508._m0 = frexp((_1499 * _1501) - vec2(0.00390625), _1508._m1);
    vec2 _1513 = min(ldexp(vec2(0.5), clamp(_1508._m1, ivec2(0), ivec2(4)) + _1503), vec2(0.5));
    ivec2 _1517 = ivec2(max(_1505 - _1513, vec2(0.0)));
    bvec2 _1519 = notEqual(_1517, ivec2(_1505 + _1513));
    bool _1520 = _1519.x;
    bool _1521 = _1519.y;
    WriteFeedbackNonUniform(_27[_1467], _1517, uint64_t(((1 | (_1520 ? 2 : 0)) | (_1521 ? 4 : 0)) | ((_1520 && _1521) ? 8 : 0)) << uint64_t(_1484 * 4), !gl_HelperInvocation);
    vec2 _1562 = vec2(textureSize(_14[nonuniformEXT(_1470)], _1486));
    ivec2 _1564 = -_1478;
    vec2 _1566 = _1500 * ldexp(_1562, _1564);
    ResType _1569;
    _1569._m0 = frexp((_1499 * _1562) - vec2(0.00390625), _1569._m1);
    vec2 _1574 = min(ldexp(vec2(0.5), clamp(_1569._m1, ivec2(0), ivec2(4)) + _1564), vec2(0.5));
    ivec2 _1578 = ivec2(max(_1566 - _1574, vec2(0.0)));
    bvec2 _1580 = notEqual(_1578, ivec2(_1566 + _1574));
    bool _1581 = _1580.x;
    bool _1582 = _1580.y;
    WriteFeedbackNonUniform(_27[_1467], _1578, uint64_t(((1 | (_1581 ? 2 : 0)) | (_1582 ? 4 : 0)) | ((_1581 && _1582) ? 8 : 0)) << uint64_t(_1486 * 4), (!gl_HelperInvocation) && (_1484 != _1486));
    uint _1598 = IDX + 0u;
    uint _1601 = IDX + 0u;
    int _1605 = int(roundEven(gl_FragCoord.z));
    vec2 _1606 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1609 = imageSize(_30[nonuniformEXT(_1598)]).xy & ivec2(15);
    vec2 _1613 = textureQueryLod(nonuniformEXT(sampler2DArray(_17[_1601], _36[_1473])), _1606);
    float _1614 = _1613.x;
    int _1615 = int(_1614);
    int _1617 = int(_1614 + 0.9970703125);
    vec2 _1622 = abs(dFdx(_1606));
    vec2 _1623 = abs(dFdy(_1606));
    vec2 _1630 = vec2(max(_1622.x, _1623.x), max(_1622.y, _1623.y));
    vec2 _1631 = fract(_1606);
    vec2 _1632 = vec2(textureSize(_17[nonuniformEXT(_1601)], _1615).xy);
    ivec2 _1635 = -_1609;
    vec2 _1637 = _1631 * ldexp(_1632, _1635);
    ResType _1640;
    _1640._m0 = frexp((_1630 * _1632) - vec2(0.00390625), _1640._m1);
    vec2 _1645 = min(ldexp(vec2(0.5), clamp(_1640._m1, ivec2(0), ivec2(4)) + _1635), vec2(0.5));
    ivec2 _1649 = ivec2(max(_1637 - _1645, vec2(0.0)));
    bvec2 _1651 = notEqual(_1649, ivec2(_1637 + _1645));
    bool _1652 = _1651.x;
    bool _1653 = _1651.y;
    WriteFeedbackArrayNonUniform(_30[_1598], ivec3(_1649, _1605), uint64_t(((1 | (_1652 ? 2 : 0)) | (_1653 ? 4 : 0)) | ((_1652 && _1653) ? 8 : 0)) << uint64_t(_1615 * 4), !gl_HelperInvocation);
    vec2 _1695 = vec2(textureSize(_17[nonuniformEXT(_1601)], _1617).xy);
    ivec2 _1698 = -_1609;
    vec2 _1700 = _1631 * ldexp(_1695, _1698);
    ResType _1703;
    _1703._m0 = frexp((_1630 * _1695) - vec2(0.00390625), _1703._m1);
    vec2 _1708 = min(ldexp(vec2(0.5), clamp(_1703._m1, ivec2(0), ivec2(4)) + _1698), vec2(0.5));
    ivec2 _1712 = ivec2(max(_1700 - _1708, vec2(0.0)));
    bvec2 _1714 = notEqual(_1712, ivec2(_1700 + _1708));
    bool _1715 = _1714.x;
    bool _1716 = _1714.y;
    WriteFeedbackArrayNonUniform(_30[_1598], ivec3(_1712, _1605), uint64_t(((1 | (_1715 ? 2 : 0)) | (_1716 ? 4 : 0)) | ((_1715 && _1716) ? 8 : 0)) << uint64_t(_1617 * 4), (!gl_HelperInvocation) && (_1615 != _1617));
    vec2 _1733 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1735 = imageSize(_27[nonuniformEXT(_1467)]) & ivec2(15);
    vec2 _1738 = textureQueryLod(nonuniformEXT(sampler2D(_14[_1470], _36[_1473])), _1733);
    float _1739 = _1738.x;
    int _1740 = int(_1739);
    int _1742 = int(_1739 + 0.9970703125);
    vec2 _1747 = abs(dFdx(_1733));
    vec2 _1748 = abs(dFdy(_1733));
    vec2 _1755 = vec2(max(_1747.x, _1748.x), max(_1747.y, _1748.y));
    vec2 _1756 = fract(_1733);
    vec2 _1757 = vec2(textureSize(_14[nonuniformEXT(_1470)], _1740));
    ivec2 _1759 = -_1735;
    vec2 _1761 = _1756 * ldexp(_1757, _1759);
    ResType _1764;
    _1764._m0 = frexp((_1755 * _1757) - vec2(0.00390625), _1764._m1);
    vec2 _1769 = min(ldexp(vec2(0.5), clamp(_1764._m1, ivec2(0), ivec2(4)) + _1759), vec2(0.5));
    ivec2 _1773 = ivec2(max(_1761 - _1769, vec2(0.0)));
    bvec2 _1775 = notEqual(_1773, ivec2(_1761 + _1769));
    bool _1776 = _1775.x;
    bool _1777 = _1775.y;
    WriteFeedbackNonUniform(_27[_1467], _1773, uint64_t(((1 | (_1776 ? 2 : 0)) | (_1777 ? 4 : 0)) | ((_1776 && _1777) ? 8 : 0)) << uint64_t(_1740 * 4), !gl_HelperInvocation);
    vec2 _1792 = vec2(textureSize(_14[nonuniformEXT(_1470)], _1742));
    ivec2 _1794 = -_1735;
    vec2 _1796 = _1756 * ldexp(_1792, _1794);
    ResType _1799;
    _1799._m0 = frexp((_1755 * _1792) - vec2(0.00390625), _1799._m1);
    vec2 _1804 = min(ldexp(vec2(0.5), clamp(_1799._m1, ivec2(0), ivec2(4)) + _1794), vec2(0.5));
    ivec2 _1808 = ivec2(max(_1796 - _1804, vec2(0.0)));
    bvec2 _1810 = notEqual(_1808, ivec2(_1796 + _1804));
    bool _1811 = _1810.x;
    bool _1812 = _1810.y;
    WriteFeedbackNonUniform(_27[_1467], _1808, uint64_t(((1 | (_1811 ? 2 : 0)) | (_1812 ? 4 : 0)) | ((_1811 && _1812) ? 8 : 0)) << uint64_t(_1742 * 4), (!gl_HelperInvocation) && (_1740 != _1742));
    int _1829 = int(roundEven(gl_FragCoord.z));
    vec2 _1830 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1833 = imageSize(_30[nonuniformEXT(_1598)]).xy & ivec2(15);
    vec2 _1836 = textureQueryLod(nonuniformEXT(sampler2DArray(_17[_1601], _36[_1473])), _1830);
    float _1837 = _1836.x;
    int _1838 = int(_1837);
    int _1840 = int(_1837 + 0.9970703125);
    vec2 _1845 = abs(dFdx(_1830));
    vec2 _1846 = abs(dFdy(_1830));
    vec2 _1853 = vec2(max(_1845.x, _1846.x), max(_1845.y, _1846.y));
    vec2 _1854 = fract(_1830);
    vec2 _1855 = vec2(textureSize(_17[nonuniformEXT(_1601)], _1838).xy);
    ivec2 _1858 = -_1833;
    vec2 _1860 = _1854 * ldexp(_1855, _1858);
    ResType _1863;
    _1863._m0 = frexp((_1853 * _1855) - vec2(0.00390625), _1863._m1);
    vec2 _1868 = min(ldexp(vec2(0.5), clamp(_1863._m1, ivec2(0), ivec2(4)) + _1858), vec2(0.5));
    ivec2 _1872 = ivec2(max(_1860 - _1868, vec2(0.0)));
    bvec2 _1874 = notEqual(_1872, ivec2(_1860 + _1868));
    bool _1875 = _1874.x;
    bool _1876 = _1874.y;
    WriteFeedbackArrayNonUniform(_30[_1598], ivec3(_1872, _1829), uint64_t(((1 | (_1875 ? 2 : 0)) | (_1876 ? 4 : 0)) | ((_1875 && _1876) ? 8 : 0)) << uint64_t(_1838 * 4), !gl_HelperInvocation);
    vec2 _1892 = vec2(textureSize(_17[nonuniformEXT(_1601)], _1840).xy);
    ivec2 _1895 = -_1833;
    vec2 _1897 = _1854 * ldexp(_1892, _1895);
    ResType _1900;
    _1900._m0 = frexp((_1853 * _1892) - vec2(0.00390625), _1900._m1);
    vec2 _1905 = min(ldexp(vec2(0.5), clamp(_1900._m1, ivec2(0), ivec2(4)) + _1895), vec2(0.5));
    ivec2 _1909 = ivec2(max(_1897 - _1905, vec2(0.0)));
    bvec2 _1911 = notEqual(_1909, ivec2(_1897 + _1905));
    bool _1912 = _1911.x;
    bool _1913 = _1911.y;
    WriteFeedbackArrayNonUniform(_30[_1598], ivec3(_1909, _1829), uint64_t(((1 | (_1912 ? 2 : 0)) | (_1913 ? 4 : 0)) | ((_1912 && _1913) ? 8 : 0)) << uint64_t(_1840 * 4), (!gl_HelperInvocation) && (_1838 != _1840));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 1934
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageQuery
OpCapability GroupNonUniform
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformQuad
OpCapability Int64ImageEXT
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_shader_image_int64"
%78 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %39 %42 %43 %45 %48 %1932
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %39 "SV_Position"
OpName %42 "GRADX"
OpName %43 "GRADY"
OpName %45 "CLAMP"
OpName %48 "IDX"
OpName %122 "ResType"
OpName %166 "WriteFeedback"
OpName %162 "img"
OpName %163 "coord"
OpName %164 "value"
OpName %165 "active_lane"
OpName %179 "is_done"
OpName %831 "WriteFeedbackArray"
OpName %827 "img"
OpName %828 "coord"
OpName %829 "value"
OpName %830 "active_lane"
OpName %842 "is_done"
OpName %1539 "WriteFeedbackNonUniform"
OpName %1535 "img"
OpName %1536 "coord"
OpName %1537 "value"
OpName %1538 "active_lane"
OpName %1550 "is_done"
OpName %1672 "WriteFeedbackArrayNonUniform"
OpName %1668 "img"
OpName %1669 "coord"
OpName %1670 "value"
OpName %1671 "active_lane"
OpName %1683 "is_done"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 1
OpDecorate %14 DescriptorSet 1
OpDecorate %14 Binding 0
OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 0
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 1
OpDecorate %27 DescriptorSet 1
OpDecorate %27 Binding 0
OpDecorate %30 DescriptorSet 2
OpDecorate %30 Binding 0
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %36 DescriptorSet 1
OpDecorate %36 Binding 0
OpDecorate %39 BuiltIn FragCoord
OpDecorate %42 Location 1
OpDecorate %43 Location 1
OpDecorate %43 Component 2
OpDecorate %45 Location 2
OpDecorate %48 Flat
OpDecorate %48 Location 3
OpDecorate %1467 NonUniform
OpDecorate %1469 NonUniform
OpDecorate %1470 NonUniform
OpDecorate %1472 NonUniform
OpDecorate %1473 NonUniform
OpDecorate %1475 NonUniform
OpDecorate %1481 NonUniform
OpDecorate %1558 NonUniform
OpDecorate %1598 NonUniform
OpDecorate %1600 NonUniform
OpDecorate %1601 NonUniform
OpDecorate %1603 NonUniform
OpDecorate %1612 NonUniform
OpDecorate %1691 NonUniform
OpDecorate %1932 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 1 0 1 Unknown
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpTypeRuntimeArray %6
%13 = OpTypePointer UniformConstant %12
%14 = OpVariable %13 UniformConstant
%15 = OpTypeRuntimeArray %9
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeInt 64 0
%19 = OpTypeImage %18 2D 0 0 0 2 R64ui
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeImage %18 2D 0 1 0 2 R64ui
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeRuntimeArray %19
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeRuntimeArray %22
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeSampler
%32 = OpTypePointer UniformConstant %31
%33 = OpVariable %32 UniformConstant
%34 = OpTypeRuntimeArray %31
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeVector %5 4
%38 = OpTypePointer Input %37
%39 = OpVariable %38 Input
%40 = OpTypeVector %5 2
%41 = OpTypePointer Input %40
%42 = OpVariable %41 Input
%43 = OpVariable %41 Input
%44 = OpTypePointer Input %5
%45 = OpVariable %44 Input
%46 = OpTypeInt 32 0
%47 = OpTypePointer Input %46
%48 = OpVariable %47 Input
%57 = OpConstant %46 0
%60 = OpConstant %46 1
%71 = OpConstant %46 2
%74 = OpConstant %46 3
%77 = OpConstant %5 1
%79 = OpTypeBool
%80 = OpTypeInt 32 1
%81 = OpTypeVector %79 2
%82 = OpTypeVector %80 2
%86 = OpConstant %80 15
%87 = OpConstantComposite %82 %86 %86
%90 = OpConstant %80 1
%91 = OpTypeSampledImage %6
%97 = OpConstant %5 0.997070312
%120 = OpConstant %5 0.00390625
%121 = OpConstantComposite %40 %120 %120
%122 = OpTypeStruct %40 %82
%126 = OpConstant %80 0
%127 = OpConstantComposite %82 %126 %126
%128 = OpConstant %80 4
%129 = OpConstantComposite %82 %128 %128
%132 = OpConstant %5 0.5
%133 = OpConstantComposite %40 %132 %132
%137 = OpConstant %5 0
%138 = OpConstantComposite %40 %137 %137
%147 = OpConstant %80 2
%152 = OpConstant %80 8
%160 = OpTypePointer Image %18
%161 = OpTypeFunction %1 %20 %82 %18 %79
%177 = OpConstantFalse %79
%178 = OpTypePointer Function %79
%236 = OpConstant %5 14
%762 = OpTypeVector %80 3
%768 = OpTypeSampledImage %9
%825 = OpTypeVector %79 3
%826 = OpTypeFunction %1 %23 %762 %18 %79
%1931 = OpTypePointer Input %79
%1932 = OpVariable %1931 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %1930
%1930 = OpLabel
%49 = OpLoad %22 %24
%50 = OpLoad %19 %21
%51 = OpLoad %9 %11
%52 = OpLoad %6 %8
%53 = OpLoad %31 %33
%54 = OpLoad %46 %48
%55 = OpLoad %5 %45
%56 = OpAccessChain %44 %43 %57
%58 = OpLoad %5 %56
%59 = OpAccessChain %44 %43 %60
%61 = OpLoad %5 %59
%62 = OpAccessChain %44 %42 %57
%63 = OpLoad %5 %62
%64 = OpAccessChain %44 %42 %60
%65 = OpLoad %5 %64
%66 = OpAccessChain %44 %39 %57
%67 = OpLoad %5 %66
%68 = OpAccessChain %44 %39 %60
%69 = OpLoad %5 %68
%70 = OpAccessChain %44 %39 %71
%72 = OpLoad %5 %70
%73 = OpAccessChain %44 %39 %74
%75 = OpLoad %5 %73
%76 = OpFDiv %5 %77 %75
%83 = OpCompositeConstruct %40 %67 %69
%84 = OpImageQuerySize %82 %50
%85 = OpBitwiseAnd %82 %84 %87
%88 = OpImageQueryLevels %80 %52
%89 = OpISub %80 %88 %90
%92 = OpSampledImage %91 %52 %53
%93 = OpImageQueryLod %40 %92 %83
%94 = OpCompositeExtract %5 %93 0
%95 = OpConvertFToS %80 %94
%96 = OpFAdd %5 %94 %97
%98 = OpConvertFToS %80 %96
%99 = OpExtInst %80 %78 SMin %98 %89
%100 = OpINotEqual %79 %95 %98
%101 = OpDPdx %40 %83
%102 = OpDPdy %40 %83
%103 = OpExtInst %40 %78 FAbs %101
%104 = OpExtInst %40 %78 FAbs %102
%105 = OpCompositeExtract %5 %103 0
%106 = OpCompositeExtract %5 %104 0
%107 = OpCompositeExtract %5 %103 1
%108 = OpCompositeExtract %5 %104 1
%109 = OpExtInst %5 %78 FMax %105 %106
%110 = OpExtInst %5 %78 FMax %107 %108
%111 = OpCompositeConstruct %40 %109 %110
%112 = OpExtInst %40 %78 Fract %83
%114 = OpImageQuerySizeLod %82 %52 %95
%113 = OpConvertSToF %40 %114
%115 = OpSNegate %82 %85
%116 = OpExtInst %40 %78 Ldexp %113 %115
%117 = OpFMul %40 %112 %116
%118 = OpFMul %40 %111 %113
%119 = OpFSub %40 %118 %121
%123 = OpExtInst %122 %78 FrexpStruct %119
%124 = OpCompositeExtract %82 %123 1
%125 = OpExtInst %82 %78 SClamp %124 %127 %129
%130 = OpIAdd %82 %125 %115
%131 = OpExtInst %40 %78 Ldexp %133 %130
%134 = OpExtInst %40 %78 FMin %131 %133
%135 = OpFSub %40 %117 %134
%136 = OpExtInst %40 %78 FMax %135 %138
%139 = OpFAdd %40 %117 %134
%140 = OpConvertFToS %82 %136
%141 = OpConvertFToS %82 %139
%142 = OpINotEqual %81 %140 %141
%143 = OpCompositeExtract %79 %142 0
%144 = OpCompositeExtract %79 %142 1
%145 = OpLogicalAnd %79 %143 %144
%146 = OpSelect %80 %143 %147 %126
%148 = OpBitwiseOr %80 %90 %146
%149 = OpSelect %80 %144 %128 %126
%150 = OpBitwiseOr %80 %148 %149
%151 = OpSelect %80 %145 %152 %126
%153 = OpBitwiseOr %80 %150 %151
%154 = OpSConvert %18 %153
%155 = OpIMul %80 %95 %128
%156 = OpSConvert %18 %155
%157 = OpShiftLeftLogical %18 %154 %156
%158 = OpLoad %79 %1932
%159 = OpLogicalNot %79 %158
%190 = OpFunctionCall %1 %166 %21 %140 %157 %159
%192 = OpImageQuerySizeLod %82 %52 %98
%191 = OpConvertSToF %40 %192
%193 = OpSNegate %82 %85
%194 = OpExtInst %40 %78 Ldexp %191 %193
%195 = OpFMul %40 %112 %194
%196 = OpFMul %40 %111 %191
%197 = OpFSub %40 %196 %121
%198 = OpExtInst %122 %78 FrexpStruct %197
%199 = OpCompositeExtract %82 %198 1
%200 = OpExtInst %82 %78 SClamp %199 %127 %129
%201 = OpIAdd %82 %200 %193
%202 = OpExtInst %40 %78 Ldexp %133 %201
%203 = OpExtInst %40 %78 FMin %202 %133
%204 = OpFSub %40 %195 %203
%205 = OpExtInst %40 %78 FMax %204 %138
%206 = OpFAdd %40 %195 %203
%207 = OpConvertFToS %82 %205
%208 = OpConvertFToS %82 %206
%209 = OpINotEqual %81 %207 %208
%210 = OpCompositeExtract %79 %209 0
%211 = OpCompositeExtract %79 %209 1
%212 = OpLogicalAnd %79 %210 %211
%213 = OpSelect %80 %210 %147 %126
%214 = OpBitwiseOr %80 %90 %213
%215 = OpSelect %80 %211 %128 %126
%216 = OpBitwiseOr %80 %214 %215
%217 = OpSelect %80 %212 %152 %126
%218 = OpBitwiseOr %80 %216 %217
%219 = OpSConvert %18 %218
%220 = OpIMul %80 %98 %128
%221 = OpSConvert %18 %220
%222 = OpShiftLeftLogical %18 %219 %221
%223 = OpLoad %79 %1932
%224 = OpLogicalNot %79 %223
%225 = OpLogicalAnd %79 %224 %100
%226 = OpFunctionCall %1 %166 %21 %207 %222 %225
%227 = OpCompositeConstruct %40 %67 %69
%228 = OpImageQuerySize %82 %50
%229 = OpBitwiseAnd %82 %228 %87
%230 = OpImageQueryLevels %80 %52
%231 = OpISub %80 %230 %90
%232 = OpImageQueryLod %40 %92 %227
%233 = OpCompositeExtract %5 %232 0
%234 = OpExtInst %5 %78 FMax %233 %55
%235 = OpExtInst %5 %78 FMin %234 %236
%237 = OpConvertFToS %80 %235
%238 = OpFAdd %5 %235 %97
%239 = OpConvertFToS %80 %238
%240 = OpExtInst %80 %78 SMin %239 %231
%241 = OpINotEqual %79 %237 %239
%242 = OpDPdx %40 %227
%243 = OpDPdy %40 %227
%244 = OpExtInst %40 %78 FAbs %242
%245 = OpExtInst %40 %78 FAbs %243
%246 = OpCompositeExtract %5 %244 0
%247 = OpCompositeExtract %5 %245 0
%248 = OpCompositeExtract %5 %244 1
%249 = OpCompositeExtract %5 %245 1
%250 = OpExtInst %5 %78 FMax %246 %247
%251 = OpExtInst %5 %78 FMax %248 %249
%252 = OpCompositeConstruct %40 %250 %251
%253 = OpExtInst %40 %78 Fract %227
%255 = OpImageQuerySizeLod %82 %52 %237
%254 = OpConvertSToF %40 %255
%256 = OpSNegate %82 %229
%257 = OpExtInst %40 %78 Ldexp %254 %256
%258 = OpFMul %40 %253 %257
%259 = OpFMul %40 %252 %254
%260 = OpFSub %40 %259 %121
%261 = OpExtInst %122 %78 FrexpStruct %260
%262 = OpCompositeExtract %82 %261 1
%263 = OpExtInst %82 %78 SClamp %262 %127 %129
%264 = OpIAdd %82 %263 %256
%265 = OpExtInst %40 %78 Ldexp %133 %264
%266 = OpExtInst %40 %78 FMin %265 %133
%267 = OpFSub %40 %258 %266
%268 = OpExtInst %40 %78 FMax %267 %138
%269 = OpFAdd %40 %258 %266
%270 = OpConvertFToS %82 %268
%271 = OpConvertFToS %82 %269
%272 = OpINotEqual %81 %270 %271
%273 = OpCompositeExtract %79 %272 0
%274 = OpCompositeExtract %79 %272 1
%275 = OpLogicalAnd %79 %273 %274
%276 = OpSelect %80 %273 %147 %126
%277 = OpBitwiseOr %80 %90 %276
%278 = OpSelect %80 %274 %128 %126
%279 = OpBitwiseOr %80 %277 %278
%280 = OpSelect %80 %275 %152 %126
%281 = OpBitwiseOr %80 %279 %280
%282 = OpSConvert %18 %281
%283 = OpIMul %80 %237 %128
%284 = OpSConvert %18 %283
%285 = OpShiftLeftLogical %18 %282 %284
%286 = OpLoad %79 %1932
%287 = OpLogicalNot %79 %286
%288 = OpFunctionCall %1 %166 %21 %270 %285 %287
%290 = OpImageQuerySizeLod %82 %52 %239
%289 = OpConvertSToF %40 %290
%291 = OpSNegate %82 %229
%292 = OpExtInst %40 %78 Ldexp %289 %291
%293 = OpFMul %40 %253 %292
%294 = OpFMul %40 %252 %289
%295 = OpFSub %40 %294 %121
%296 = OpExtInst %122 %78 FrexpStruct %295
%297 = OpCompositeExtract %82 %296 1
%298 = OpExtInst %82 %78 SClamp %297 %127 %129
%299 = OpIAdd %82 %298 %291
%300 = OpExtInst %40 %78 Ldexp %133 %299
%301 = OpExtInst %40 %78 FMin %300 %133
%302 = OpFSub %40 %293 %301
%303 = OpExtInst %40 %78 FMax %302 %138
%304 = OpFAdd %40 %293 %301
%305 = OpConvertFToS %82 %303
%306 = OpConvertFToS %82 %304
%307 = OpINotEqual %81 %305 %306
%308 = OpCompositeExtract %79 %307 0
%309 = OpCompositeExtract %79 %307 1
%310 = OpLogicalAnd %79 %308 %309
%311 = OpSelect %80 %308 %147 %126
%312 = OpBitwiseOr %80 %90 %311
%313 = OpSelect %80 %309 %128 %126
%314 = OpBitwiseOr %80 %312 %313
%315 = OpSelect %80 %310 %152 %126
%316 = OpBitwiseOr %80 %314 %315
%317 = OpSConvert %18 %316
%318 = OpIMul %80 %239 %128
%319 = OpSConvert %18 %318
%320 = OpShiftLeftLogical %18 %317 %319
%321 = OpLoad %79 %1932
%322 = OpLogicalNot %79 %321
%323 = OpLogicalAnd %79 %322 %241
%324 = OpFunctionCall %1 %166 %21 %305 %320 %323
%325 = OpCompositeConstruct %40 %67 %69
%326 = OpImageQuerySize %82 %50
%327 = OpBitwiseAnd %82 %326 %87
%328 = OpImageQueryLevels %80 %52
%329 = OpISub %80 %328 %90
%330 = OpConvertSToF %5 %329
%331 = OpExtInst %5 %78 NClamp %72 %137 %330
%332 = OpConvertFToS %80 %331
%333 = OpFAdd %5 %331 %97
%334 = OpConvertFToS %80 %333
%335 = OpExtInst %80 %78 SMin %334 %329
%336 = OpINotEqual %79 %332 %334
%337 = OpExtInst %40 %78 Fract %325
%339 = OpImageQuerySizeLod %82 %52 %332
%338 = OpConvertSToF %40 %339
%340 = OpSNegate %82 %327
%341 = OpExtInst %40 %78 Ldexp %338 %340
%342 = OpFMul %40 %337 %341
%343 = OpExtInst %40 %78 Ldexp %133 %340
%344 = OpFSub %40 %342 %343
%345 = OpExtInst %40 %78 FMax %344 %138
%346 = OpFAdd %40 %342 %343
%347 = OpConvertFToS %82 %345
%348 = OpConvertFToS %82 %346
%349 = OpINotEqual %81 %347 %348
%350 = OpCompositeExtract %79 %349 0
%351 = OpCompositeExtract %79 %349 1
%352 = OpLogicalAnd %79 %350 %351
%353 = OpSelect %80 %350 %147 %126
%354 = OpBitwiseOr %80 %90 %353
%355 = OpSelect %80 %351 %128 %126
%356 = OpBitwiseOr %80 %354 %355
%357 = OpSelect %80 %352 %152 %126
%358 = OpBitwiseOr %80 %356 %357
%359 = OpSConvert %18 %358
%360 = OpIMul %80 %332 %128
%361 = OpSConvert %18 %360
%362 = OpShiftLeftLogical %18 %359 %361
%363 = OpLoad %79 %1932
%364 = OpLogicalNot %79 %363
%365 = OpFunctionCall %1 %166 %21 %347 %362 %364
%367 = OpImageQuerySizeLod %82 %52 %334
%366 = OpConvertSToF %40 %367
%368 = OpSNegate %82 %327
%369 = OpExtInst %40 %78 Ldexp %366 %368
%370 = OpFMul %40 %337 %369
%371 = OpExtInst %40 %78 Ldexp %133 %368
%372 = OpFSub %40 %370 %371
%373 = OpExtInst %40 %78 FMax %372 %138
%374 = OpFAdd %40 %370 %371
%375 = OpConvertFToS %82 %373
%376 = OpConvertFToS %82 %374
%377 = OpINotEqual %81 %375 %376
%378 = OpCompositeExtract %79 %377 0
%379 = OpCompositeExtract %79 %377 1
%380 = OpLogicalAnd %79 %378 %379
%381 = OpSelect %80 %378 %147 %126
%382 = OpBitwiseOr %80 %90 %381
%383 = OpSelect %80 %379 %128 %126
%384 = OpBitwiseOr %80 %382 %383
%385 = OpSelect %80 %380 %152 %126
%386 = OpBitwiseOr %80 %384 %385
%387 = OpSConvert %18 %386
%388 = OpIMul %80 %334 %128
%389 = OpSConvert %18 %388
%390 = OpShiftLeftLogical %18 %387 %389
%391 = OpLoad %79 %1932
%392 = OpLogicalNot %79 %391
%393 = OpLogicalAnd %79 %392 %336
%394 = OpFunctionCall %1 %166 %21 %375 %390 %393
%395 = OpCompositeConstruct %40 %67 %69
%396 = OpImageQuerySize %82 %50
%397 = OpBitwiseAnd %82 %396 %87
%398 = OpImageQueryLevels %80 %52
%399 = OpISub %80 %398 %90
%401 = OpImageQuerySizeLod %82 %52 %126
%400 = OpConvertSToF %40 %401
%402 = OpCompositeConstruct %40 %63 %65
%403 = OpCompositeConstruct %40 %58 %61
%404 = OpFMul %40 %400 %402
%405 = OpFMul %40 %400 %403
%406 = OpDot %5 %404 %404
%407 = OpDot %5 %405 %405
%408 = OpExtInst %5 %78 FMax %406 %407
%409 = OpExtInst %5 %78 Log2 %408
%410 = OpFMul %5 %409 %132
%411 = OpConvertSToF %5 %399
%412 = OpExtInst %5 %78 NClamp %410 %137 %411
%413 = OpConvertFToS %80 %412
%414 = OpFAdd %5 %412 %97
%415 = OpConvertFToS %80 %414
%416 = OpExtInst %80 %78 SMin %415 %399
%417 = OpINotEqual %79 %413 %415
%418 = OpExtInst %40 %78 Fract %395
%420 = OpImageQuerySizeLod %82 %52 %413
%419 = OpConvertSToF %40 %420
%421 = OpSNegate %82 %397
%422 = OpExtInst %40 %78 Ldexp %419 %421
%423 = OpFMul %40 %418 %422
%424 = OpExtInst %40 %78 Ldexp %133 %421
%425 = OpFSub %40 %423 %424
%426 = OpExtInst %40 %78 FMax %425 %138
%427 = OpFAdd %40 %423 %424
%428 = OpConvertFToS %82 %426
%429 = OpConvertFToS %82 %427
%430 = OpINotEqual %81 %428 %429
%431 = OpCompositeExtract %79 %430 0
%432 = OpCompositeExtract %79 %430 1
%433 = OpLogicalAnd %79 %431 %432
%434 = OpSelect %80 %431 %147 %126
%435 = OpBitwiseOr %80 %90 %434
%436 = OpSelect %80 %432 %128 %126
%437 = OpBitwiseOr %80 %435 %436
%438 = OpSelect %80 %433 %152 %126
%439 = OpBitwiseOr %80 %437 %438
%440 = OpSConvert %18 %439
%441 = OpIMul %80 %413 %128
%442 = OpSConvert %18 %441
%443 = OpShiftLeftLogical %18 %440 %442
%444 = OpLoad %79 %1932
%445 = OpLogicalNot %79 %444
%446 = OpFunctionCall %1 %166 %21 %428 %443 %445
%448 = OpImageQuerySizeLod %82 %52 %415
%447 = OpConvertSToF %40 %448
%449 = OpSNegate %82 %397
%450 = OpExtInst %40 %78 Ldexp %447 %449
%451 = OpFMul %40 %418 %450
%452 = OpExtInst %40 %78 Ldexp %133 %449
%453 = OpFSub %40 %451 %452
%454 = OpExtInst %40 %78 FMax %453 %138
%455 = OpFAdd %40 %451 %452
%456 = OpConvertFToS %82 %454
%457 = OpConvertFToS %82 %455
%458 = OpINotEqual %81 %456 %457
%459 = OpCompositeExtract %79 %458 0
%460 = OpCompositeExtract %79 %458 1
%461 = OpLogicalAnd %79 %459 %460
%462 = OpSelect %80 %459 %147 %126
%463 = OpBitwiseOr %80 %90 %462
%464 = OpSelect %80 %460 %128 %126
%465 = OpBitwiseOr %80 %463 %464
%466 = OpSelect %80 %461 %152 %126
%467 = OpBitwiseOr %80 %465 %466
%468 = OpSConvert %18 %467
%469 = OpIMul %80 %415 %128
%470 = OpSConvert %18 %469
%471 = OpShiftLeftLogical %18 %468 %470
%472 = OpLoad %79 %1932
%473 = OpLogicalNot %79 %472
%474 = OpLogicalAnd %79 %473 %417
%475 = OpFunctionCall %1 %166 %21 %456 %471 %474
%476 = OpCompositeConstruct %40 %67 %69
%477 = OpImageQuerySize %82 %50
%478 = OpBitwiseAnd %82 %477 %87
%479 = OpImageQueryLevels %80 %52
%480 = OpISub %80 %479 %90
%482 = OpImageQuerySizeLod %82 %52 %126
%481 = OpConvertSToF %40 %482
%483 = OpCompositeConstruct %40 %63 %65
%484 = OpCompositeConstruct %40 %58 %61
%485 = OpFMul %40 %481 %483
%486 = OpFMul %40 %481 %484
%487 = OpDot %5 %485 %485
%488 = OpDot %5 %486 %486
%489 = OpExtInst %5 %78 FMax %487 %488
%490 = OpExtInst %5 %78 Log2 %489
%491 = OpFMul %5 %490 %132
%492 = OpConvertSToF %5 %480
%493 = OpExtInst %5 %78 NClamp %491 %137 %492
%494 = OpExtInst %5 %78 FMax %493 %55
%495 = OpExtInst %5 %78 FMin %494 %236
%496 = OpConvertFToS %80 %495
%497 = OpFAdd %5 %495 %97
%498 = OpConvertFToS %80 %497
%499 = OpExtInst %80 %78 SMin %498 %480
%500 = OpINotEqual %79 %496 %498
%501 = OpExtInst %40 %78 Fract %476
%503 = OpImageQuerySizeLod %82 %52 %496
%502 = OpConvertSToF %40 %503
%504 = OpSNegate %82 %478
%505 = OpExtInst %40 %78 Ldexp %502 %504
%506 = OpFMul %40 %501 %505
%507 = OpExtInst %40 %78 Ldexp %133 %504
%508 = OpFSub %40 %506 %507
%509 = OpExtInst %40 %78 FMax %508 %138
%510 = OpFAdd %40 %506 %507
%511 = OpConvertFToS %82 %509
%512 = OpConvertFToS %82 %510
%513 = OpINotEqual %81 %511 %512
%514 = OpCompositeExtract %79 %513 0
%515 = OpCompositeExtract %79 %513 1
%516 = OpLogicalAnd %79 %514 %515
%517 = OpSelect %80 %514 %147 %126
%518 = OpBitwiseOr %80 %90 %517
%519 = OpSelect %80 %515 %128 %126
%520 = OpBitwiseOr %80 %518 %519
%521 = OpSelect %80 %516 %152 %126
%522 = OpBitwiseOr %80 %520 %521
%523 = OpSConvert %18 %522
%524 = OpIMul %80 %496 %128
%525 = OpSConvert %18 %524
%526 = OpShiftLeftLogical %18 %523 %525
%527 = OpLoad %79 %1932
%528 = OpLogicalNot %79 %527
%529 = OpFunctionCall %1 %166 %21 %511 %526 %528
%531 = OpImageQuerySizeLod %82 %52 %498
%530 = OpConvertSToF %40 %531
%532 = OpSNegate %82 %478
%533 = OpExtInst %40 %78 Ldexp %530 %532
%534 = OpFMul %40 %501 %533
%535 = OpExtInst %40 %78 Ldexp %133 %532
%536 = OpFSub %40 %534 %535
%537 = OpExtInst %40 %78 FMax %536 %138
%538 = OpFAdd %40 %534 %535
%539 = OpConvertFToS %82 %537
%540 = OpConvertFToS %82 %538
%541 = OpINotEqual %81 %539 %540
%542 = OpCompositeExtract %79 %541 0
%543 = OpCompositeExtract %79 %541 1
%544 = OpLogicalAnd %79 %542 %543
%545 = OpSelect %80 %542 %147 %126
%546 = OpBitwiseOr %80 %90 %545
%547 = OpSelect %80 %543 %128 %126
%548 = OpBitwiseOr %80 %546 %547
%549 = OpSelect %80 %544 %152 %126
%550 = OpBitwiseOr %80 %548 %549
%551 = OpSConvert %18 %550
%552 = OpIMul %80 %498 %128
%553 = OpSConvert %18 %552
%554 = OpShiftLeftLogical %18 %551 %553
%555 = OpLoad %79 %1932
%556 = OpLogicalNot %79 %555
%557 = OpLogicalAnd %79 %556 %500
%558 = OpFunctionCall %1 %166 %21 %539 %554 %557
%559 = OpCompositeConstruct %40 %67 %69
%560 = OpImageQuerySize %82 %50
%561 = OpBitwiseAnd %82 %560 %87
%562 = OpImageQueryLevels %80 %52
%563 = OpISub %80 %562 %90
%564 = OpExtInst %5 %78 Exp2 %72
%565 = OpGroupNonUniformQuadBroadcast %5 %74 %564 %57
%566 = OpVectorTimesScalar %40 %559 %565
%567 = OpImageQueryLod %40 %92 %566
%568 = OpCompositeExtract %5 %567 0
%569 = OpConvertFToS %80 %568
%570 = OpFAdd %5 %568 %97
%571 = OpConvertFToS %80 %570
%572 = OpExtInst %80 %78 SMin %571 %563
%573 = OpINotEqual %79 %569 %571
%574 = OpDPdx %40 %559
%575 = OpDPdy %40 %559
%576 = OpExtInst %40 %78 FAbs %574
%577 = OpExtInst %40 %78 FAbs %575
%578 = OpCompositeExtract %5 %576 0
%579 = OpCompositeExtract %5 %577 0
%580 = OpCompositeExtract %5 %576 1
%581 = OpCompositeExtract %5 %577 1
%582 = OpExtInst %5 %78 FMax %578 %579
%583 = OpExtInst %5 %78 FMax %580 %581
%584 = OpCompositeConstruct %40 %582 %583
%585 = OpVectorTimesScalar %40 %584 %564
%586 = OpExtInst %40 %78 Fract %559
%588 = OpImageQuerySizeLod %82 %52 %569
%587 = OpConvertSToF %40 %588
%589 = OpSNegate %82 %561
%590 = OpExtInst %40 %78 Ldexp %587 %589
%591 = OpFMul %40 %586 %590
%592 = OpFMul %40 %585 %587
%593 = OpFSub %40 %592 %121
%594 = OpExtInst %122 %78 FrexpStruct %593
%595 = OpCompositeExtract %82 %594 1
%596 = OpExtInst %82 %78 SClamp %595 %127 %129
%597 = OpIAdd %82 %596 %589
%598 = OpExtInst %40 %78 Ldexp %133 %597
%599 = OpExtInst %40 %78 FMin %598 %133
%600 = OpFSub %40 %591 %599
%601 = OpExtInst %40 %78 FMax %600 %138
%602 = OpFAdd %40 %591 %599
%603 = OpConvertFToS %82 %601
%604 = OpConvertFToS %82 %602
%605 = OpINotEqual %81 %603 %604
%606 = OpCompositeExtract %79 %605 0
%607 = OpCompositeExtract %79 %605 1
%608 = OpLogicalAnd %79 %606 %607
%609 = OpSelect %80 %606 %147 %126
%610 = OpBitwiseOr %80 %90 %609
%611 = OpSelect %80 %607 %128 %126
%612 = OpBitwiseOr %80 %610 %611
%613 = OpSelect %80 %608 %152 %126
%614 = OpBitwiseOr %80 %612 %613
%615 = OpSConvert %18 %614
%616 = OpIMul %80 %569 %128
%617 = OpSConvert %18 %616
%618 = OpShiftLeftLogical %18 %615 %617
%619 = OpLoad %79 %1932
%620 = OpLogicalNot %79 %619
%621 = OpFunctionCall %1 %166 %21 %603 %618 %620
%623 = OpImageQuerySizeLod %82 %52 %571
%622 = OpConvertSToF %40 %623
%624 = OpSNegate %82 %561
%625 = OpExtInst %40 %78 Ldexp %622 %624
%626 = OpFMul %40 %586 %625
%627 = OpFMul %40 %585 %622
%628 = OpFSub %40 %627 %121
%629 = OpExtInst %122 %78 FrexpStruct %628
%630 = OpCompositeExtract %82 %629 1
%631 = OpExtInst %82 %78 SClamp %630 %127 %129
%632 = OpIAdd %82 %631 %624
%633 = OpExtInst %40 %78 Ldexp %133 %632
%634 = OpExtInst %40 %78 FMin %633 %133
%635 = OpFSub %40 %626 %634
%636 = OpExtInst %40 %78 FMax %635 %138
%637 = OpFAdd %40 %626 %634
%638 = OpConvertFToS %82 %636
%639 = OpConvertFToS %82 %637
%640 = OpINotEqual %81 %638 %639
%641 = OpCompositeExtract %79 %640 0
%642 = OpCompositeExtract %79 %640 1
%643 = OpLogicalAnd %79 %641 %642
%644 = OpSelect %80 %641 %147 %126
%645 = OpBitwiseOr %80 %90 %644
%646 = OpSelect %80 %642 %128 %126
%647 = OpBitwiseOr %80 %645 %646
%648 = OpSelect %80 %643 %152 %126
%649 = OpBitwiseOr %80 %647 %648
%650 = OpSConvert %18 %649
%651 = OpIMul %80 %571 %128
%652 = OpSConvert %18 %651
%653 = OpShiftLeftLogical %18 %650 %652
%654 = OpLoad %79 %1932
%655 = OpLogicalNot %79 %654
%656 = OpLogicalAnd %79 %655 %573
%657 = OpFunctionCall %1 %166 %21 %638 %653 %656
%658 = OpCompositeConstruct %40 %67 %69
%659 = OpImageQuerySize %82 %50
%660 = OpBitwiseAnd %82 %659 %87
%661 = OpImageQueryLevels %80 %52
%662 = OpISub %80 %661 %90
%663 = OpExtInst %5 %78 Exp2 %72
%664 = OpGroupNonUniformQuadBroadcast %5 %74 %663 %57
%665 = OpVectorTimesScalar %40 %658 %664
%666 = OpImageQueryLod %40 %92 %665
%667 = OpCompositeExtract %5 %666 0
%668 = OpExtInst %5 %78 FMax %667 %55
%669 = OpExtInst %5 %78 FMin %668 %236
%670 = OpConvertFToS %80 %669
%671 = OpFAdd %5 %669 %97
%672 = OpConvertFToS %80 %671
%673 = OpExtInst %80 %78 SMin %672 %662
%674 = OpINotEqual %79 %670 %672
%675 = OpDPdx %40 %658
%676 = OpDPdy %40 %658
%677 = OpExtInst %40 %78 FAbs %675
%678 = OpExtInst %40 %78 FAbs %676
%679 = OpCompositeExtract %5 %677 0
%680 = OpCompositeExtract %5 %678 0
%681 = OpCompositeExtract %5 %677 1
%682 = OpCompositeExtract %5 %678 1
%683 = OpExtInst %5 %78 FMax %679 %680
%684 = OpExtInst %5 %78 FMax %681 %682
%685 = OpCompositeConstruct %40 %683 %684
%686 = OpVectorTimesScalar %40 %685 %663
%687 = OpExtInst %40 %78 Fract %658
%689 = OpImageQuerySizeLod %82 %52 %670
%688 = OpConvertSToF %40 %689
%690 = OpSNegate %82 %660
%691 = OpExtInst %40 %78 Ldexp %688 %690
%692 = OpFMul %40 %687 %691
%693 = OpFMul %40 %686 %688
%694 = OpFSub %40 %693 %121
%695 = OpExtInst %122 %78 FrexpStruct %694
%696 = OpCompositeExtract %82 %695 1
%697 = OpExtInst %82 %78 SClamp %696 %127 %129
%698 = OpIAdd %82 %697 %690
%699 = OpExtInst %40 %78 Ldexp %133 %698
%700 = OpExtInst %40 %78 FMin %699 %133
%701 = OpFSub %40 %692 %700
%702 = OpExtInst %40 %78 FMax %701 %138
%703 = OpFAdd %40 %692 %700
%704 = OpConvertFToS %82 %702
%705 = OpConvertFToS %82 %703
%706 = OpINotEqual %81 %704 %705
%707 = OpCompositeExtract %79 %706 0
%708 = OpCompositeExtract %79 %706 1
%709 = OpLogicalAnd %79 %707 %708
%710 = OpSelect %80 %707 %147 %126
%711 = OpBitwiseOr %80 %90 %710
%712 = OpSelect %80 %708 %128 %126
%713 = OpBitwiseOr %80 %711 %712
%714 = OpSelect %80 %709 %152 %126
%715 = OpBitwiseOr %80 %713 %714
%716 = OpSConvert %18 %715
%717 = OpIMul %80 %670 %128
%718 = OpSConvert %18 %717
%719 = OpShiftLeftLogical %18 %716 %718
%720 = OpLoad %79 %1932
%721 = OpLogicalNot %79 %720
%722 = OpFunctionCall %1 %166 %21 %704 %719 %721
%724 = OpImageQuerySizeLod %82 %52 %672
%723 = OpConvertSToF %40 %724
%725 = OpSNegate %82 %660
%726 = OpExtInst %40 %78 Ldexp %723 %725
%727 = OpFMul %40 %687 %726
%728 = OpFMul %40 %686 %723
%729 = OpFSub %40 %728 %121
%730 = OpExtInst %122 %78 FrexpStruct %729
%731 = OpCompositeExtract %82 %730 1
%732 = OpExtInst %82 %78 SClamp %731 %127 %129
%733 = OpIAdd %82 %732 %725
%734 = OpExtInst %40 %78 Ldexp %133 %733
%735 = OpExtInst %40 %78 FMin %734 %133
%736 = OpFSub %40 %727 %735
%737 = OpExtInst %40 %78 FMax %736 %138
%738 = OpFAdd %40 %727 %735
%739 = OpConvertFToS %82 %737
%740 = OpConvertFToS %82 %738
%741 = OpINotEqual %81 %739 %740
%742 = OpCompositeExtract %79 %741 0
%743 = OpCompositeExtract %79 %741 1
%744 = OpLogicalAnd %79 %742 %743
%745 = OpSelect %80 %742 %147 %126
%746 = OpBitwiseOr %80 %90 %745
%747 = OpSelect %80 %743 %128 %126
%748 = OpBitwiseOr %80 %746 %747
%749 = OpSelect %80 %744 %152 %126
%750 = OpBitwiseOr %80 %748 %749
%751 = OpSConvert %18 %750
%752 = OpIMul %80 %672 %128
%753 = OpSConvert %18 %752
%754 = OpShiftLeftLogical %18 %751 %753
%755 = OpLoad %79 %1932
%756 = OpLogicalNot %79 %755
%757 = OpLogicalAnd %79 %756 %674
%758 = OpFunctionCall %1 %166 %21 %739 %754 %757
%759 = OpExtInst %5 %78 RoundEven %72
%760 = OpConvertFToS %80 %759
%761 = OpCompositeConstruct %40 %67 %69
%763 = OpImageQuerySize %762 %49
%764 = OpVectorShuffle %82 %763 %763 0 1
%765 = OpBitwiseAnd %82 %764 %87
%766 = OpImageQueryLevels %80 %51
%767 = OpISub %80 %766 %90
%769 = OpSampledImage %768 %51 %53
%770 = OpImageQueryLod %40 %769 %761
%771 = OpCompositeExtract %5 %770 0
%772 = OpConvertFToS %80 %771
%773 = OpFAdd %5 %771 %97
%774 = OpConvertFToS %80 %773
%775 = OpExtInst %80 %78 SMin %774 %767
%776 = OpINotEqual %79 %772 %774
%777 = OpDPdx %40 %761
%778 = OpDPdy %40 %761
%779 = OpExtInst %40 %78 FAbs %777
%780 = OpExtInst %40 %78 FAbs %778
%781 = OpCompositeExtract %5 %779 0
%782 = OpCompositeExtract %5 %780 0
%783 = OpCompositeExtract %5 %779 1
%784 = OpCompositeExtract %5 %780 1
%785 = OpExtInst %5 %78 FMax %781 %782
%786 = OpExtInst %5 %78 FMax %783 %784
%787 = OpCompositeConstruct %40 %785 %786
%788 = OpExtInst %40 %78 Fract %761
%790 = OpImageQuerySizeLod %762 %51 %772
%791 = OpVectorShuffle %82 %790 %790 0 1
%789 = OpConvertSToF %40 %791
%792 = OpSNegate %82 %765
%793 = OpExtInst %40 %78 Ldexp %789 %792
%794 = OpFMul %40 %788 %793
%795 = OpFMul %40 %787 %789
%796 = OpFSub %40 %795 %121
%797 = OpExtInst %122 %78 FrexpStruct %796
%798 = OpCompositeExtract %82 %797 1
%799 = OpExtInst %82 %78 SClamp %798 %127 %129
%800 = OpIAdd %82 %799 %792
%801 = OpExtInst %40 %78 Ldexp %133 %800
%802 = OpExtInst %40 %78 FMin %801 %133
%803 = OpFSub %40 %794 %802
%804 = OpExtInst %40 %78 FMax %803 %138
%805 = OpFAdd %40 %794 %802
%806 = OpConvertFToS %82 %804
%807 = OpConvertFToS %82 %805
%808 = OpINotEqual %81 %806 %807
%809 = OpCompositeExtract %79 %808 0
%810 = OpCompositeExtract %79 %808 1
%811 = OpLogicalAnd %79 %809 %810
%812 = OpSelect %80 %809 %147 %126
%813 = OpBitwiseOr %80 %90 %812
%814 = OpSelect %80 %810 %128 %126
%815 = OpBitwiseOr %80 %813 %814
%816 = OpSelect %80 %811 %152 %126
%817 = OpBitwiseOr %80 %815 %816
%818 = OpSConvert %18 %817
%819 = OpIMul %80 %772 %128
%820 = OpSConvert %18 %819
%821 = OpShiftLeftLogical %18 %818 %820
%822 = OpCompositeConstruct %762 %806 %760
%823 = OpLoad %79 %1932
%824 = OpLogicalNot %79 %823
%853 = OpFunctionCall %1 %831 %24 %822 %821 %824
%855 = OpImageQuerySizeLod %762 %51 %774
%856 = OpVectorShuffle %82 %855 %855 0 1
%854 = OpConvertSToF %40 %856
%857 = OpSNegate %82 %765
%858 = OpExtInst %40 %78 Ldexp %854 %857
%859 = OpFMul %40 %788 %858
%860 = OpFMul %40 %787 %854
%861 = OpFSub %40 %860 %121
%862 = OpExtInst %122 %78 FrexpStruct %861
%863 = OpCompositeExtract %82 %862 1
%864 = OpExtInst %82 %78 SClamp %863 %127 %129
%865 = OpIAdd %82 %864 %857
%866 = OpExtInst %40 %78 Ldexp %133 %865
%867 = OpExtInst %40 %78 FMin %866 %133
%868 = OpFSub %40 %859 %867
%869 = OpExtInst %40 %78 FMax %868 %138
%870 = OpFAdd %40 %859 %867
%871 = OpConvertFToS %82 %869
%872 = OpConvertFToS %82 %870
%873 = OpINotEqual %81 %871 %872
%874 = OpCompositeExtract %79 %873 0
%875 = OpCompositeExtract %79 %873 1
%876 = OpLogicalAnd %79 %874 %875
%877 = OpSelect %80 %874 %147 %126
%878 = OpBitwiseOr %80 %90 %877
%879 = OpSelect %80 %875 %128 %126
%880 = OpBitwiseOr %80 %878 %879
%881 = OpSelect %80 %876 %152 %126
%882 = OpBitwiseOr %80 %880 %881
%883 = OpSConvert %18 %882
%884 = OpIMul %80 %774 %128
%885 = OpSConvert %18 %884
%886 = OpShiftLeftLogical %18 %883 %885
%887 = OpCompositeConstruct %762 %871 %760
%888 = OpLoad %79 %1932
%889 = OpLogicalNot %79 %888
%890 = OpLogicalAnd %79 %889 %776
%891 = OpFunctionCall %1 %831 %24 %887 %886 %890
%892 = OpExtInst %5 %78 RoundEven %72
%893 = OpConvertFToS %80 %892
%894 = OpCompositeConstruct %40 %67 %69
%895 = OpImageQuerySize %762 %49
%896 = OpVectorShuffle %82 %895 %895 0 1
%897 = OpBitwiseAnd %82 %896 %87
%898 = OpImageQueryLevels %80 %51
%899 = OpISub %80 %898 %90
%900 = OpImageQueryLod %40 %769 %894
%901 = OpCompositeExtract %5 %900 0
%902 = OpExtInst %5 %78 FMax %901 %55
%903 = OpExtInst %5 %78 FMin %902 %236
%904 = OpConvertFToS %80 %903
%905 = OpFAdd %5 %903 %97
%906 = OpConvertFToS %80 %905
%907 = OpExtInst %80 %78 SMin %906 %899
%908 = OpINotEqual %79 %904 %906
%909 = OpDPdx %40 %894
%910 = OpDPdy %40 %894
%911 = OpExtInst %40 %78 FAbs %909
%912 = OpExtInst %40 %78 FAbs %910
%913 = OpCompositeExtract %5 %911 0
%914 = OpCompositeExtract %5 %912 0
%915 = OpCompositeExtract %5 %911 1
%916 = OpCompositeExtract %5 %912 1
%917 = OpExtInst %5 %78 FMax %913 %914
%918 = OpExtInst %5 %78 FMax %915 %916
%919 = OpCompositeConstruct %40 %917 %918
%920 = OpExtInst %40 %78 Fract %894
%922 = OpImageQuerySizeLod %762 %51 %904
%923 = OpVectorShuffle %82 %922 %922 0 1
%921 = OpConvertSToF %40 %923
%924 = OpSNegate %82 %897
%925 = OpExtInst %40 %78 Ldexp %921 %924
%926 = OpFMul %40 %920 %925
%927 = OpFMul %40 %919 %921
%928 = OpFSub %40 %927 %121
%929 = OpExtInst %122 %78 FrexpStruct %928
%930 = OpCompositeExtract %82 %929 1
%931 = OpExtInst %82 %78 SClamp %930 %127 %129
%932 = OpIAdd %82 %931 %924
%933 = OpExtInst %40 %78 Ldexp %133 %932
%934 = OpExtInst %40 %78 FMin %933 %133
%935 = OpFSub %40 %926 %934
%936 = OpExtInst %40 %78 FMax %935 %138
%937 = OpFAdd %40 %926 %934
%938 = OpConvertFToS %82 %936
%939 = OpConvertFToS %82 %937
%940 = OpINotEqual %81 %938 %939
%941 = OpCompositeExtract %79 %940 0
%942 = OpCompositeExtract %79 %940 1
%943 = OpLogicalAnd %79 %941 %942
%944 = OpSelect %80 %941 %147 %126
%945 = OpBitwiseOr %80 %90 %944
%946 = OpSelect %80 %942 %128 %126
%947 = OpBitwiseOr %80 %945 %946
%948 = OpSelect %80 %943 %152 %126
%949 = OpBitwiseOr %80 %947 %948
%950 = OpSConvert %18 %949
%951 = OpIMul %80 %904 %128
%952 = OpSConvert %18 %951
%953 = OpShiftLeftLogical %18 %950 %952
%954 = OpCompositeConstruct %762 %938 %893
%955 = OpLoad %79 %1932
%956 = OpLogicalNot %79 %955
%957 = OpFunctionCall %1 %831 %24 %954 %953 %956
%959 = OpImageQuerySizeLod %762 %51 %906
%960 = OpVectorShuffle %82 %959 %959 0 1
%958 = OpConvertSToF %40 %960
%961 = OpSNegate %82 %897
%962 = OpExtInst %40 %78 Ldexp %958 %961
%963 = OpFMul %40 %920 %962
%964 = OpFMul %40 %919 %958
%965 = OpFSub %40 %964 %121
%966 = OpExtInst %122 %78 FrexpStruct %965
%967 = OpCompositeExtract %82 %966 1
%968 = OpExtInst %82 %78 SClamp %967 %127 %129
%969 = OpIAdd %82 %968 %961
%970 = OpExtInst %40 %78 Ldexp %133 %969
%971 = OpExtInst %40 %78 FMin %970 %133
%972 = OpFSub %40 %963 %971
%973 = OpExtInst %40 %78 FMax %972 %138
%974 = OpFAdd %40 %963 %971
%975 = OpConvertFToS %82 %973
%976 = OpConvertFToS %82 %974
%977 = OpINotEqual %81 %975 %976
%978 = OpCompositeExtract %79 %977 0
%979 = OpCompositeExtract %79 %977 1
%980 = OpLogicalAnd %79 %978 %979
%981 = OpSelect %80 %978 %147 %126
%982 = OpBitwiseOr %80 %90 %981
%983 = OpSelect %80 %979 %128 %126
%984 = OpBitwiseOr %80 %982 %983
%985 = OpSelect %80 %980 %152 %126
%986 = OpBitwiseOr %80 %984 %985
%987 = OpSConvert %18 %986
%988 = OpIMul %80 %906 %128
%989 = OpSConvert %18 %988
%990 = OpShiftLeftLogical %18 %987 %989
%991 = OpCompositeConstruct %762 %975 %893
%992 = OpLoad %79 %1932
%993 = OpLogicalNot %79 %992
%994 = OpLogicalAnd %79 %993 %908
%995 = OpFunctionCall %1 %831 %24 %991 %990 %994
%996 = OpExtInst %5 %78 RoundEven %72
%997 = OpConvertFToS %80 %996
%998 = OpCompositeConstruct %40 %67 %69
%999 = OpImageQuerySize %762 %49
%1000 = OpVectorShuffle %82 %999 %999 0 1
%1001 = OpBitwiseAnd %82 %1000 %87
%1002 = OpImageQueryLevels %80 %51
%1003 = OpISub %80 %1002 %90
%1004 = OpConvertSToF %5 %1003
%1005 = OpExtInst %5 %78 NClamp %76 %137 %1004
%1006 = OpConvertFToS %80 %1005
%1007 = OpFAdd %5 %1005 %97
%1008 = OpConvertFToS %80 %1007
%1009 = OpExtInst %80 %78 SMin %1008 %1003
%1010 = OpINotEqual %79 %1006 %1008
%1011 = OpExtInst %40 %78 Fract %998
%1013 = OpImageQuerySizeLod %762 %51 %1006
%1014 = OpVectorShuffle %82 %1013 %1013 0 1
%1012 = OpConvertSToF %40 %1014
%1015 = OpSNegate %82 %1001
%1016 = OpExtInst %40 %78 Ldexp %1012 %1015
%1017 = OpFMul %40 %1011 %1016
%1018 = OpExtInst %40 %78 Ldexp %133 %1015
%1019 = OpFSub %40 %1017 %1018
%1020 = OpExtInst %40 %78 FMax %1019 %138
%1021 = OpFAdd %40 %1017 %1018
%1022 = OpConvertFToS %82 %1020
%1023 = OpConvertFToS %82 %1021
%1024 = OpINotEqual %81 %1022 %1023
%1025 = OpCompositeExtract %79 %1024 0
%1026 = OpCompositeExtract %79 %1024 1
%1027 = OpLogicalAnd %79 %1025 %1026
%1028 = OpSelect %80 %1025 %147 %126
%1029 = OpBitwiseOr %80 %90 %1028
%1030 = OpSelect %80 %1026 %128 %126
%1031 = OpBitwiseOr %80 %1029 %1030
%1032 = OpSelect %80 %1027 %152 %126
%1033 = OpBitwiseOr %80 %1031 %1032
%1034 = OpSConvert %18 %1033
%1035 = OpIMul %80 %1006 %128
%1036 = OpSConvert %18 %1035
%1037 = OpShiftLeftLogical %18 %1034 %1036
%1038 = OpCompositeConstruct %762 %1022 %997
%1039 = OpLoad %79 %1932
%1040 = OpLogicalNot %79 %1039
%1041 = OpFunctionCall %1 %831 %24 %1038 %1037 %1040
%1043 = OpImageQuerySizeLod %762 %51 %1008
%1044 = OpVectorShuffle %82 %1043 %1043 0 1
%1042 = OpConvertSToF %40 %1044
%1045 = OpSNegate %82 %1001
%1046 = OpExtInst %40 %78 Ldexp %1042 %1045
%1047 = OpFMul %40 %1011 %1046
%1048 = OpExtInst %40 %78 Ldexp %133 %1045
%1049 = OpFSub %40 %1047 %1048
%1050 = OpExtInst %40 %78 FMax %1049 %138
%1051 = OpFAdd %40 %1047 %1048
%1052 = OpConvertFToS %82 %1050
%1053 = OpConvertFToS %82 %1051
%1054 = OpINotEqual %81 %1052 %1053
%1055 = OpCompositeExtract %79 %1054 0
%1056 = OpCompositeExtract %79 %1054 1
%1057 = OpLogicalAnd %79 %1055 %1056
%1058 = OpSelect %80 %1055 %147 %126
%1059 = OpBitwiseOr %80 %90 %1058
%1060 = OpSelect %80 %1056 %128 %126
%1061 = OpBitwiseOr %80 %1059 %1060
%1062 = OpSelect %80 %1057 %152 %126
%1063 = OpBitwiseOr %80 %1061 %1062
%1064 = OpSConvert %18 %1063
%1065 = OpIMul %80 %1008 %128
%1066 = OpSConvert %18 %1065
%1067 = OpShiftLeftLogical %18 %1064 %1066
%1068 = OpCompositeConstruct %762 %1052 %997
%1069 = OpLoad %79 %1932
%1070 = OpLogicalNot %79 %1069
%1071 = OpLogicalAnd %79 %1070 %1010
%1072 = OpFunctionCall %1 %831 %24 %1068 %1067 %1071
%1073 = OpExtInst %5 %78 RoundEven %72
%1074 = OpConvertFToS %80 %1073
%1075 = OpCompositeConstruct %40 %67 %69
%1076 = OpImageQuerySize %762 %49
%1077 = OpVectorShuffle %82 %1076 %1076 0 1
%1078 = OpBitwiseAnd %82 %1077 %87
%1079 = OpImageQueryLevels %80 %51
%1080 = OpISub %80 %1079 %90
%1082 = OpImageQuerySizeLod %762 %51 %126
%1083 = OpVectorShuffle %82 %1082 %1082 0 1
%1081 = OpConvertSToF %40 %1083
%1084 = OpCompositeConstruct %40 %63 %65
%1085 = OpCompositeConstruct %40 %58 %61
%1086 = OpFMul %40 %1081 %1084
%1087 = OpFMul %40 %1081 %1085
%1088 = OpDot %5 %1086 %1086
%1089 = OpDot %5 %1087 %1087
%1090 = OpExtInst %5 %78 FMax %1088 %1089
%1091 = OpExtInst %5 %78 Log2 %1090
%1092 = OpFMul %5 %1091 %132
%1093 = OpConvertSToF %5 %1080
%1094 = OpExtInst %5 %78 NClamp %1092 %137 %1093
%1095 = OpConvertFToS %80 %1094
%1096 = OpFAdd %5 %1094 %97
%1097 = OpConvertFToS %80 %1096
%1098 = OpExtInst %80 %78 SMin %1097 %1080
%1099 = OpINotEqual %79 %1095 %1097
%1100 = OpExtInst %40 %78 Fract %1075
%1102 = OpImageQuerySizeLod %762 %51 %1095
%1103 = OpVectorShuffle %82 %1102 %1102 0 1
%1101 = OpConvertSToF %40 %1103
%1104 = OpSNegate %82 %1078
%1105 = OpExtInst %40 %78 Ldexp %1101 %1104
%1106 = OpFMul %40 %1100 %1105
%1107 = OpExtInst %40 %78 Ldexp %133 %1104
%1108 = OpFSub %40 %1106 %1107
%1109 = OpExtInst %40 %78 FMax %1108 %138
%1110 = OpFAdd %40 %1106 %1107
%1111 = OpConvertFToS %82 %1109
%1112 = OpConvertFToS %82 %1110
%1113 = OpINotEqual %81 %1111 %1112
%1114 = OpCompositeExtract %79 %1113 0
%1115 = OpCompositeExtract %79 %1113 1
%1116 = OpLogicalAnd %79 %1114 %1115
%1117 = OpSelect %80 %1114 %147 %126
%1118 = OpBitwiseOr %80 %90 %1117
%1119 = OpSelect %80 %1115 %128 %126
%1120 = OpBitwiseOr %80 %1118 %1119
%1121 = OpSelect %80 %1116 %152 %126
%1122 = OpBitwiseOr %80 %1120 %1121
%1123 = OpSConvert %18 %1122
%1124 = OpIMul %80 %1095 %128
%1125 = OpSConvert %18 %1124
%1126 = OpShiftLeftLogical %18 %1123 %1125
%1127 = OpCompositeConstruct %762 %1111 %1074
%1128 = OpLoad %79 %1932
%1129 = OpLogicalNot %79 %1128
%1130 = OpFunctionCall %1 %831 %24 %1127 %1126 %1129
%1132 = OpImageQuerySizeLod %762 %51 %1097
%1133 = OpVectorShuffle %82 %1132 %1132 0 1
%1131 = OpConvertSToF %40 %1133
%1134 = OpSNegate %82 %1078
%1135 = OpExtInst %40 %78 Ldexp %1131 %1134
%1136 = OpFMul %40 %1100 %1135
%1137 = OpExtInst %40 %78 Ldexp %133 %1134
%1138 = OpFSub %40 %1136 %1137
%1139 = OpExtInst %40 %78 FMax %1138 %138
%1140 = OpFAdd %40 %1136 %1137
%1141 = OpConvertFToS %82 %1139
%1142 = OpConvertFToS %82 %1140
%1143 = OpINotEqual %81 %1141 %1142
%1144 = OpCompositeExtract %79 %1143 0
%1145 = OpCompositeExtract %79 %1143 1
%1146 = OpLogicalAnd %79 %1144 %1145
%1147 = OpSelect %80 %1144 %147 %126
%1148 = OpBitwiseOr %80 %90 %1147
%1149 = OpSelect %80 %1145 %128 %126
%1150 = OpBitwiseOr %80 %1148 %1149
%1151 = OpSelect %80 %1146 %152 %126
%1152 = OpBitwiseOr %80 %1150 %1151
%1153 = OpSConvert %18 %1152
%1154 = OpIMul %80 %1097 %128
%1155 = OpSConvert %18 %1154
%1156 = OpShiftLeftLogical %18 %1153 %1155
%1157 = OpCompositeConstruct %762 %1141 %1074
%1158 = OpLoad %79 %1932
%1159 = OpLogicalNot %79 %1158
%1160 = OpLogicalAnd %79 %1159 %1099
%1161 = OpFunctionCall %1 %831 %24 %1157 %1156 %1160
%1162 = OpExtInst %5 %78 RoundEven %72
%1163 = OpConvertFToS %80 %1162
%1164 = OpCompositeConstruct %40 %67 %69
%1165 = OpImageQuerySize %762 %49
%1166 = OpVectorShuffle %82 %1165 %1165 0 1
%1167 = OpBitwiseAnd %82 %1166 %87
%1168 = OpImageQueryLevels %80 %51
%1169 = OpISub %80 %1168 %90
%1171 = OpImageQuerySizeLod %762 %51 %126
%1172 = OpVectorShuffle %82 %1171 %1171 0 1
%1170 = OpConvertSToF %40 %1172
%1173 = OpCompositeConstruct %40 %63 %65
%1174 = OpCompositeConstruct %40 %58 %61
%1175 = OpFMul %40 %1170 %1173
%1176 = OpFMul %40 %1170 %1174
%1177 = OpDot %5 %1175 %1175
%1178 = OpDot %5 %1176 %1176
%1179 = OpExtInst %5 %78 FMax %1177 %1178
%1180 = OpExtInst %5 %78 Log2 %1179
%1181 = OpFMul %5 %1180 %132
%1182 = OpConvertSToF %5 %1169
%1183 = OpExtInst %5 %78 NClamp %1181 %137 %1182
%1184 = OpExtInst %5 %78 FMax %1183 %55
%1185 = OpExtInst %5 %78 FMin %1184 %236
%1186 = OpConvertFToS %80 %1185
%1187 = OpFAdd %5 %1185 %97
%1188 = OpConvertFToS %80 %1187
%1189 = OpExtInst %80 %78 SMin %1188 %1169
%1190 = OpINotEqual %79 %1186 %1188
%1191 = OpExtInst %40 %78 Fract %1164
%1193 = OpImageQuerySizeLod %762 %51 %1186
%1194 = OpVectorShuffle %82 %1193 %1193 0 1
%1192 = OpConvertSToF %40 %1194
%1195 = OpSNegate %82 %1167
%1196 = OpExtInst %40 %78 Ldexp %1192 %1195
%1197 = OpFMul %40 %1191 %1196
%1198 = OpExtInst %40 %78 Ldexp %133 %1195
%1199 = OpFSub %40 %1197 %1198
%1200 = OpExtInst %40 %78 FMax %1199 %138
%1201 = OpFAdd %40 %1197 %1198
%1202 = OpConvertFToS %82 %1200
%1203 = OpConvertFToS %82 %1201
%1204 = OpINotEqual %81 %1202 %1203
%1205 = OpCompositeExtract %79 %1204 0
%1206 = OpCompositeExtract %79 %1204 1
%1207 = OpLogicalAnd %79 %1205 %1206
%1208 = OpSelect %80 %1205 %147 %126
%1209 = OpBitwiseOr %80 %90 %1208
%1210 = OpSelect %80 %1206 %128 %126
%1211 = OpBitwiseOr %80 %1209 %1210
%1212 = OpSelect %80 %1207 %152 %126
%1213 = OpBitwiseOr %80 %1211 %1212
%1214 = OpSConvert %18 %1213
%1215 = OpIMul %80 %1186 %128
%1216 = OpSConvert %18 %1215
%1217 = OpShiftLeftLogical %18 %1214 %1216
%1218 = OpCompositeConstruct %762 %1202 %1163
%1219 = OpLoad %79 %1932
%1220 = OpLogicalNot %79 %1219
%1221 = OpFunctionCall %1 %831 %24 %1218 %1217 %1220
%1223 = OpImageQuerySizeLod %762 %51 %1188
%1224 = OpVectorShuffle %82 %1223 %1223 0 1
%1222 = OpConvertSToF %40 %1224
%1225 = OpSNegate %82 %1167
%1226 = OpExtInst %40 %78 Ldexp %1222 %1225
%1227 = OpFMul %40 %1191 %1226
%1228 = OpExtInst %40 %78 Ldexp %133 %1225
%1229 = OpFSub %40 %1227 %1228
%1230 = OpExtInst %40 %78 FMax %1229 %138
%1231 = OpFAdd %40 %1227 %1228
%1232 = OpConvertFToS %82 %1230
%1233 = OpConvertFToS %82 %1231
%1234 = OpINotEqual %81 %1232 %1233
%1235 = OpCompositeExtract %79 %1234 0
%1236 = OpCompositeExtract %79 %1234 1
%1237 = OpLogicalAnd %79 %1235 %1236
%1238 = OpSelect %80 %1235 %147 %126
%1239 = OpBitwiseOr %80 %90 %1238
%1240 = OpSelect %80 %1236 %128 %126
%1241 = OpBitwiseOr %80 %1239 %1240
%1242 = OpSelect %80 %1237 %152 %126
%1243 = OpBitwiseOr %80 %1241 %1242
%1244 = OpSConvert %18 %1243
%1245 = OpIMul %80 %1188 %128
%1246 = OpSConvert %18 %1245
%1247 = OpShiftLeftLogical %18 %1244 %1246
%1248 = OpCompositeConstruct %762 %1232 %1163
%1249 = OpLoad %79 %1932
%1250 = OpLogicalNot %79 %1249
%1251 = OpLogicalAnd %79 %1250 %1190
%1252 = OpFunctionCall %1 %831 %24 %1248 %1247 %1251
%1253 = OpExtInst %5 %78 RoundEven %72
%1254 = OpConvertFToS %80 %1253
%1255 = OpCompositeConstruct %40 %67 %69
%1256 = OpImageQuerySize %762 %49
%1257 = OpVectorShuffle %82 %1256 %1256 0 1
%1258 = OpBitwiseAnd %82 %1257 %87
%1259 = OpImageQueryLevels %80 %51
%1260 = OpISub %80 %1259 %90
%1261 = OpExtInst %5 %78 Exp2 %72
%1262 = OpGroupNonUniformQuadBroadcast %5 %74 %1261 %57
%1263 = OpVectorTimesScalar %40 %1255 %1262
%1264 = OpImageQueryLod %40 %769 %1263
%1265 = OpCompositeExtract %5 %1264 0
%1266 = OpConvertFToS %80 %1265
%1267 = OpFAdd %5 %1265 %97
%1268 = OpConvertFToS %80 %1267
%1269 = OpExtInst %80 %78 SMin %1268 %1260
%1270 = OpINotEqual %79 %1266 %1268
%1271 = OpDPdx %40 %1255
%1272 = OpDPdy %40 %1255
%1273 = OpExtInst %40 %78 FAbs %1271
%1274 = OpExtInst %40 %78 FAbs %1272
%1275 = OpCompositeExtract %5 %1273 0
%1276 = OpCompositeExtract %5 %1274 0
%1277 = OpCompositeExtract %5 %1273 1
%1278 = OpCompositeExtract %5 %1274 1
%1279 = OpExtInst %5 %78 FMax %1275 %1276
%1280 = OpExtInst %5 %78 FMax %1277 %1278
%1281 = OpCompositeConstruct %40 %1279 %1280
%1282 = OpVectorTimesScalar %40 %1281 %1261
%1283 = OpExtInst %40 %78 Fract %1255
%1285 = OpImageQuerySizeLod %762 %51 %1266
%1286 = OpVectorShuffle %82 %1285 %1285 0 1
%1284 = OpConvertSToF %40 %1286
%1287 = OpSNegate %82 %1258
%1288 = OpExtInst %40 %78 Ldexp %1284 %1287
%1289 = OpFMul %40 %1283 %1288
%1290 = OpFMul %40 %1282 %1284
%1291 = OpFSub %40 %1290 %121
%1292 = OpExtInst %122 %78 FrexpStruct %1291
%1293 = OpCompositeExtract %82 %1292 1
%1294 = OpExtInst %82 %78 SClamp %1293 %127 %129
%1295 = OpIAdd %82 %1294 %1287
%1296 = OpExtInst %40 %78 Ldexp %133 %1295
%1297 = OpExtInst %40 %78 FMin %1296 %133
%1298 = OpFSub %40 %1289 %1297
%1299 = OpExtInst %40 %78 FMax %1298 %138
%1300 = OpFAdd %40 %1289 %1297
%1301 = OpConvertFToS %82 %1299
%1302 = OpConvertFToS %82 %1300
%1303 = OpINotEqual %81 %1301 %1302
%1304 = OpCompositeExtract %79 %1303 0
%1305 = OpCompositeExtract %79 %1303 1
%1306 = OpLogicalAnd %79 %1304 %1305
%1307 = OpSelect %80 %1304 %147 %126
%1308 = OpBitwiseOr %80 %90 %1307
%1309 = OpSelect %80 %1305 %128 %126
%1310 = OpBitwiseOr %80 %1308 %1309
%1311 = OpSelect %80 %1306 %152 %126
%1312 = OpBitwiseOr %80 %1310 %1311
%1313 = OpSConvert %18 %1312
%1314 = OpIMul %80 %1266 %128
%1315 = OpSConvert %18 %1314
%1316 = OpShiftLeftLogical %18 %1313 %1315
%1317 = OpCompositeConstruct %762 %1301 %1254
%1318 = OpLoad %79 %1932
%1319 = OpLogicalNot %79 %1318
%1320 = OpFunctionCall %1 %831 %24 %1317 %1316 %1319
%1322 = OpImageQuerySizeLod %762 %51 %1268
%1323 = OpVectorShuffle %82 %1322 %1322 0 1
%1321 = OpConvertSToF %40 %1323
%1324 = OpSNegate %82 %1258
%1325 = OpExtInst %40 %78 Ldexp %1321 %1324
%1326 = OpFMul %40 %1283 %1325
%1327 = OpFMul %40 %1282 %1321
%1328 = OpFSub %40 %1327 %121
%1329 = OpExtInst %122 %78 FrexpStruct %1328
%1330 = OpCompositeExtract %82 %1329 1
%1331 = OpExtInst %82 %78 SClamp %1330 %127 %129
%1332 = OpIAdd %82 %1331 %1324
%1333 = OpExtInst %40 %78 Ldexp %133 %1332
%1334 = OpExtInst %40 %78 FMin %1333 %133
%1335 = OpFSub %40 %1326 %1334
%1336 = OpExtInst %40 %78 FMax %1335 %138
%1337 = OpFAdd %40 %1326 %1334
%1338 = OpConvertFToS %82 %1336
%1339 = OpConvertFToS %82 %1337
%1340 = OpINotEqual %81 %1338 %1339
%1341 = OpCompositeExtract %79 %1340 0
%1342 = OpCompositeExtract %79 %1340 1
%1343 = OpLogicalAnd %79 %1341 %1342
%1344 = OpSelect %80 %1341 %147 %126
%1345 = OpBitwiseOr %80 %90 %1344
%1346 = OpSelect %80 %1342 %128 %126
%1347 = OpBitwiseOr %80 %1345 %1346
%1348 = OpSelect %80 %1343 %152 %126
%1349 = OpBitwiseOr %80 %1347 %1348
%1350 = OpSConvert %18 %1349
%1351 = OpIMul %80 %1268 %128
%1352 = OpSConvert %18 %1351
%1353 = OpShiftLeftLogical %18 %1350 %1352
%1354 = OpCompositeConstruct %762 %1338 %1254
%1355 = OpLoad %79 %1932
%1356 = OpLogicalNot %79 %1355
%1357 = OpLogicalAnd %79 %1356 %1270
%1358 = OpFunctionCall %1 %831 %24 %1354 %1353 %1357
%1359 = OpExtInst %5 %78 RoundEven %72
%1360 = OpConvertFToS %80 %1359
%1361 = OpCompositeConstruct %40 %67 %69
%1362 = OpImageQuerySize %762 %49
%1363 = OpVectorShuffle %82 %1362 %1362 0 1
%1364 = OpBitwiseAnd %82 %1363 %87
%1365 = OpImageQueryLevels %80 %51
%1366 = OpISub %80 %1365 %90
%1367 = OpExtInst %5 %78 Exp2 %76
%1368 = OpGroupNonUniformQuadBroadcast %5 %74 %1367 %57
%1369 = OpVectorTimesScalar %40 %1361 %1368
%1370 = OpImageQueryLod %40 %769 %1369
%1371 = OpCompositeExtract %5 %1370 0
%1372 = OpExtInst %5 %78 FMax %1371 %55
%1373 = OpExtInst %5 %78 FMin %1372 %236
%1374 = OpConvertFToS %80 %1373
%1375 = OpFAdd %5 %1373 %97
%1376 = OpConvertFToS %80 %1375
%1377 = OpExtInst %80 %78 SMin %1376 %1366
%1378 = OpINotEqual %79 %1374 %1376
%1379 = OpDPdx %40 %1361
%1380 = OpDPdy %40 %1361
%1381 = OpExtInst %40 %78 FAbs %1379
%1382 = OpExtInst %40 %78 FAbs %1380
%1383 = OpCompositeExtract %5 %1381 0
%1384 = OpCompositeExtract %5 %1382 0
%1385 = OpCompositeExtract %5 %1381 1
%1386 = OpCompositeExtract %5 %1382 1
%1387 = OpExtInst %5 %78 FMax %1383 %1384
%1388 = OpExtInst %5 %78 FMax %1385 %1386
%1389 = OpCompositeConstruct %40 %1387 %1388
%1390 = OpVectorTimesScalar %40 %1389 %1367
%1391 = OpExtInst %40 %78 Fract %1361
%1393 = OpImageQuerySizeLod %762 %51 %1374
%1394 = OpVectorShuffle %82 %1393 %1393 0 1
%1392 = OpConvertSToF %40 %1394
%1395 = OpSNegate %82 %1364
%1396 = OpExtInst %40 %78 Ldexp %1392 %1395
%1397 = OpFMul %40 %1391 %1396
%1398 = OpFMul %40 %1390 %1392
%1399 = OpFSub %40 %1398 %121
%1400 = OpExtInst %122 %78 FrexpStruct %1399
%1401 = OpCompositeExtract %82 %1400 1
%1402 = OpExtInst %82 %78 SClamp %1401 %127 %129
%1403 = OpIAdd %82 %1402 %1395
%1404 = OpExtInst %40 %78 Ldexp %133 %1403
%1405 = OpExtInst %40 %78 FMin %1404 %133
%1406 = OpFSub %40 %1397 %1405
%1407 = OpExtInst %40 %78 FMax %1406 %138
%1408 = OpFAdd %40 %1397 %1405
%1409 = OpConvertFToS %82 %1407
%1410 = OpConvertFToS %82 %1408
%1411 = OpINotEqual %81 %1409 %1410
%1412 = OpCompositeExtract %79 %1411 0
%1413 = OpCompositeExtract %79 %1411 1
%1414 = OpLogicalAnd %79 %1412 %1413
%1415 = OpSelect %80 %1412 %147 %126
%1416 = OpBitwiseOr %80 %90 %1415
%1417 = OpSelect %80 %1413 %128 %126
%1418 = OpBitwiseOr %80 %1416 %1417
%1419 = OpSelect %80 %1414 %152 %126
%1420 = OpBitwiseOr %80 %1418 %1419
%1421 = OpSConvert %18 %1420
%1422 = OpIMul %80 %1374 %128
%1423 = OpSConvert %18 %1422
%1424 = OpShiftLeftLogical %18 %1421 %1423
%1425 = OpCompositeConstruct %762 %1409 %1360
%1426 = OpLoad %79 %1932
%1427 = OpLogicalNot %79 %1426
%1428 = OpFunctionCall %1 %831 %24 %1425 %1424 %1427
%1430 = OpImageQuerySizeLod %762 %51 %1376
%1431 = OpVectorShuffle %82 %1430 %1430 0 1
%1429 = OpConvertSToF %40 %1431
%1432 = OpSNegate %82 %1364
%1433 = OpExtInst %40 %78 Ldexp %1429 %1432
%1434 = OpFMul %40 %1391 %1433
%1435 = OpFMul %40 %1390 %1429
%1436 = OpFSub %40 %1435 %121
%1437 = OpExtInst %122 %78 FrexpStruct %1436
%1438 = OpCompositeExtract %82 %1437 1
%1439 = OpExtInst %82 %78 SClamp %1438 %127 %129
%1440 = OpIAdd %82 %1439 %1432
%1441 = OpExtInst %40 %78 Ldexp %133 %1440
%1442 = OpExtInst %40 %78 FMin %1441 %133
%1443 = OpFSub %40 %1434 %1442
%1444 = OpExtInst %40 %78 FMax %1443 %138
%1445 = OpFAdd %40 %1434 %1442
%1446 = OpConvertFToS %82 %1444
%1447 = OpConvertFToS %82 %1445
%1448 = OpINotEqual %81 %1446 %1447
%1449 = OpCompositeExtract %79 %1448 0
%1450 = OpCompositeExtract %79 %1448 1
%1451 = OpLogicalAnd %79 %1449 %1450
%1452 = OpSelect %80 %1449 %147 %126
%1453 = OpBitwiseOr %80 %90 %1452
%1454 = OpSelect %80 %1450 %128 %126
%1455 = OpBitwiseOr %80 %1453 %1454
%1456 = OpSelect %80 %1451 %152 %126
%1457 = OpBitwiseOr %80 %1455 %1456
%1458 = OpSConvert %18 %1457
%1459 = OpIMul %80 %1376 %128
%1460 = OpSConvert %18 %1459
%1461 = OpShiftLeftLogical %18 %1458 %1460
%1462 = OpCompositeConstruct %762 %1446 %1360
%1463 = OpLoad %79 %1932
%1464 = OpLogicalNot %79 %1463
%1465 = OpLogicalAnd %79 %1464 %1378
%1466 = OpFunctionCall %1 %831 %24 %1462 %1461 %1465
%1467 = OpIAdd %46 %54 %57
%1468 = OpAccessChain %20 %27 %1467
%1469 = OpLoad %19 %1468
%1470 = OpIAdd %46 %54 %57
%1471 = OpAccessChain %7 %14 %1470
%1472 = OpLoad %6 %1471
%1473 = OpIAdd %46 %54 %57
%1474 = OpAccessChain %32 %36 %1473
%1475 = OpLoad %31 %1474
%1476 = OpCompositeConstruct %40 %67 %69
%1477 = OpImageQuerySize %82 %1469
%1478 = OpBitwiseAnd %82 %1477 %87
%1479 = OpImageQueryLevels %80 %1472
%1480 = OpISub %80 %1479 %90
%1481 = OpSampledImage %91 %1472 %1475
%1482 = OpImageQueryLod %40 %1481 %1476
%1483 = OpCompositeExtract %5 %1482 0
%1484 = OpConvertFToS %80 %1483
%1485 = OpFAdd %5 %1483 %97
%1486 = OpConvertFToS %80 %1485
%1487 = OpExtInst %80 %78 SMin %1486 %1480
%1488 = OpINotEqual %79 %1484 %1486
%1489 = OpDPdx %40 %1476
%1490 = OpDPdy %40 %1476
%1491 = OpExtInst %40 %78 FAbs %1489
%1492 = OpExtInst %40 %78 FAbs %1490
%1493 = OpCompositeExtract %5 %1491 0
%1494 = OpCompositeExtract %5 %1492 0
%1495 = OpCompositeExtract %5 %1491 1
%1496 = OpCompositeExtract %5 %1492 1
%1497 = OpExtInst %5 %78 FMax %1493 %1494
%1498 = OpExtInst %5 %78 FMax %1495 %1496
%1499 = OpCompositeConstruct %40 %1497 %1498
%1500 = OpExtInst %40 %78 Fract %1476
%1502 = OpImageQuerySizeLod %82 %1472 %1484
%1501 = OpConvertSToF %40 %1502
%1503 = OpSNegate %82 %1478
%1504 = OpExtInst %40 %78 Ldexp %1501 %1503
%1505 = OpFMul %40 %1500 %1504
%1506 = OpFMul %40 %1499 %1501
%1507 = OpFSub %40 %1506 %121
%1508 = OpExtInst %122 %78 FrexpStruct %1507
%1509 = OpCompositeExtract %82 %1508 1
%1510 = OpExtInst %82 %78 SClamp %1509 %127 %129
%1511 = OpIAdd %82 %1510 %1503
%1512 = OpExtInst %40 %78 Ldexp %133 %1511
%1513 = OpExtInst %40 %78 FMin %1512 %133
%1514 = OpFSub %40 %1505 %1513
%1515 = OpExtInst %40 %78 FMax %1514 %138
%1516 = OpFAdd %40 %1505 %1513
%1517 = OpConvertFToS %82 %1515
%1518 = OpConvertFToS %82 %1516
%1519 = OpINotEqual %81 %1517 %1518
%1520 = OpCompositeExtract %79 %1519 0
%1521 = OpCompositeExtract %79 %1519 1
%1522 = OpLogicalAnd %79 %1520 %1521
%1523 = OpSelect %80 %1520 %147 %126
%1524 = OpBitwiseOr %80 %90 %1523
%1525 = OpSelect %80 %1521 %128 %126
%1526 = OpBitwiseOr %80 %1524 %1525
%1527 = OpSelect %80 %1522 %152 %126
%1528 = OpBitwiseOr %80 %1526 %1527
%1529 = OpSConvert %18 %1528
%1530 = OpIMul %80 %1484 %128
%1531 = OpSConvert %18 %1530
%1532 = OpShiftLeftLogical %18 %1529 %1531
%1533 = OpLoad %79 %1932
%1534 = OpLogicalNot %79 %1533
%1561 = OpFunctionCall %1 %1539 %1468 %1517 %1532 %1534
%1563 = OpImageQuerySizeLod %82 %1472 %1486
%1562 = OpConvertSToF %40 %1563
%1564 = OpSNegate %82 %1478
%1565 = OpExtInst %40 %78 Ldexp %1562 %1564
%1566 = OpFMul %40 %1500 %1565
%1567 = OpFMul %40 %1499 %1562
%1568 = OpFSub %40 %1567 %121
%1569 = OpExtInst %122 %78 FrexpStruct %1568
%1570 = OpCompositeExtract %82 %1569 1
%1571 = OpExtInst %82 %78 SClamp %1570 %127 %129
%1572 = OpIAdd %82 %1571 %1564
%1573 = OpExtInst %40 %78 Ldexp %133 %1572
%1574 = OpExtInst %40 %78 FMin %1573 %133
%1575 = OpFSub %40 %1566 %1574
%1576 = OpExtInst %40 %78 FMax %1575 %138
%1577 = OpFAdd %40 %1566 %1574
%1578 = OpConvertFToS %82 %1576
%1579 = OpConvertFToS %82 %1577
%1580 = OpINotEqual %81 %1578 %1579
%1581 = OpCompositeExtract %79 %1580 0
%1582 = OpCompositeExtract %79 %1580 1
%1583 = OpLogicalAnd %79 %1581 %1582
%1584 = OpSelect %80 %1581 %147 %126
%1585 = OpBitwiseOr %80 %90 %1584
%1586 = OpSelect %80 %1582 %128 %126
%1587 = OpBitwiseOr %80 %1585 %1586
%1588 = OpSelect %80 %1583 %152 %126
%1589 = OpBitwiseOr %80 %1587 %1588
%1590 = OpSConvert %18 %1589
%1591 = OpIMul %80 %1486 %128
%1592 = OpSConvert %18 %1591
%1593 = OpShiftLeftLogical %18 %1590 %1592
%1594 = OpLoad %79 %1932
%1595 = OpLogicalNot %79 %1594
%1596 = OpLogicalAnd %79 %1595 %1488
%1597 = OpFunctionCall %1 %1539 %1468 %1578 %1593 %1596
%1598 = OpIAdd %46 %54 %57
%1599 = OpAccessChain %23 %30 %1598
%1600 = OpLoad %22 %1599
%1601 = OpIAdd %46 %54 %57
%1602 = OpAccessChain %10 %17 %1601
%1603 = OpLoad %9 %1602
%1604 = OpExtInst %5 %78 RoundEven %72
%1605 = OpConvertFToS %80 %1604
%1606 = OpCompositeConstruct %40 %67 %69
%1607 = OpImageQuerySize %762 %1600
%1608 = OpVectorShuffle %82 %1607 %1607 0 1
%1609 = OpBitwiseAnd %82 %1608 %87
%1610 = OpImageQueryLevels %80 %1603
%1611 = OpISub %80 %1610 %90
%1612 = OpSampledImage %768 %1603 %1475
%1613 = OpImageQueryLod %40 %1612 %1606
%1614 = OpCompositeExtract %5 %1613 0
%1615 = OpConvertFToS %80 %1614
%1616 = OpFAdd %5 %1614 %97
%1617 = OpConvertFToS %80 %1616
%1618 = OpExtInst %80 %78 SMin %1617 %1611
%1619 = OpINotEqual %79 %1615 %1617
%1620 = OpDPdx %40 %1606
%1621 = OpDPdy %40 %1606
%1622 = OpExtInst %40 %78 FAbs %1620
%1623 = OpExtInst %40 %78 FAbs %1621
%1624 = OpCompositeExtract %5 %1622 0
%1625 = OpCompositeExtract %5 %1623 0
%1626 = OpCompositeExtract %5 %1622 1
%1627 = OpCompositeExtract %5 %1623 1
%1628 = OpExtInst %5 %78 FMax %1624 %1625
%1629 = OpExtInst %5 %78 FMax %1626 %1627
%1630 = OpCompositeConstruct %40 %1628 %1629
%1631 = OpExtInst %40 %78 Fract %1606
%1633 = OpImageQuerySizeLod %762 %1603 %1615
%1634 = OpVectorShuffle %82 %1633 %1633 0 1
%1632 = OpConvertSToF %40 %1634
%1635 = OpSNegate %82 %1609
%1636 = OpExtInst %40 %78 Ldexp %1632 %1635
%1637 = OpFMul %40 %1631 %1636
%1638 = OpFMul %40 %1630 %1632
%1639 = OpFSub %40 %1638 %121
%1640 = OpExtInst %122 %78 FrexpStruct %1639
%1641 = OpCompositeExtract %82 %1640 1
%1642 = OpExtInst %82 %78 SClamp %1641 %127 %129
%1643 = OpIAdd %82 %1642 %1635
%1644 = OpExtInst %40 %78 Ldexp %133 %1643
%1645 = OpExtInst %40 %78 FMin %1644 %133
%1646 = OpFSub %40 %1637 %1645
%1647 = OpExtInst %40 %78 FMax %1646 %138
%1648 = OpFAdd %40 %1637 %1645
%1649 = OpConvertFToS %82 %1647
%1650 = OpConvertFToS %82 %1648
%1651 = OpINotEqual %81 %1649 %1650
%1652 = OpCompositeExtract %79 %1651 0
%1653 = OpCompositeExtract %79 %1651 1
%1654 = OpLogicalAnd %79 %1652 %1653
%1655 = OpSelect %80 %1652 %147 %126
%1656 = OpBitwiseOr %80 %90 %1655
%1657 = OpSelect %80 %1653 %128 %126
%1658 = OpBitwiseOr %80 %1656 %1657
%1659 = OpSelect %80 %1654 %152 %126
%1660 = OpBitwiseOr %80 %1658 %1659
%1661 = OpSConvert %18 %1660
%1662 = OpIMul %80 %1615 %128
%1663 = OpSConvert %18 %1662
%1664 = OpShiftLeftLogical %18 %1661 %1663
%1665 = OpCompositeConstruct %762 %1649 %1605
%1666 = OpLoad %79 %1932
%1667 = OpLogicalNot %79 %1666
%1694 = OpFunctionCall %1 %1672 %1599 %1665 %1664 %1667
%1696 = OpImageQuerySizeLod %762 %1603 %1617
%1697 = OpVectorShuffle %82 %1696 %1696 0 1
%1695 = OpConvertSToF %40 %1697
%1698 = OpSNegate %82 %1609
%1699 = OpExtInst %40 %78 Ldexp %1695 %1698
%1700 = OpFMul %40 %1631 %1699
%1701 = OpFMul %40 %1630 %1695
%1702 = OpFSub %40 %1701 %121
%1703 = OpExtInst %122 %78 FrexpStruct %1702
%1704 = OpCompositeExtract %82 %1703 1
%1705 = OpExtInst %82 %78 SClamp %1704 %127 %129
%1706 = OpIAdd %82 %1705 %1698
%1707 = OpExtInst %40 %78 Ldexp %133 %1706
%1708 = OpExtInst %40 %78 FMin %1707 %133
%1709 = OpFSub %40 %1700 %1708
%1710 = OpExtInst %40 %78 FMax %1709 %138
%1711 = OpFAdd %40 %1700 %1708
%1712 = OpConvertFToS %82 %1710
%1713 = OpConvertFToS %82 %1711
%1714 = OpINotEqual %81 %1712 %1713
%1715 = OpCompositeExtract %79 %1714 0
%1716 = OpCompositeExtract %79 %1714 1
%1717 = OpLogicalAnd %79 %1715 %1716
%1718 = OpSelect %80 %1715 %147 %126
%1719 = OpBitwiseOr %80 %90 %1718
%1720 = OpSelect %80 %1716 %128 %126
%1721 = OpBitwiseOr %80 %1719 %1720
%1722 = OpSelect %80 %1717 %152 %126
%1723 = OpBitwiseOr %80 %1721 %1722
%1724 = OpSConvert %18 %1723
%1725 = OpIMul %80 %1617 %128
%1726 = OpSConvert %18 %1725
%1727 = OpShiftLeftLogical %18 %1724 %1726
%1728 = OpCompositeConstruct %762 %1712 %1605
%1729 = OpLoad %79 %1932
%1730 = OpLogicalNot %79 %1729
%1731 = OpLogicalAnd %79 %1730 %1619
%1732 = OpFunctionCall %1 %1672 %1599 %1728 %1727 %1731
%1733 = OpCompositeConstruct %40 %67 %69
%1734 = OpImageQuerySize %82 %1469
%1735 = OpBitwiseAnd %82 %1734 %87
%1736 = OpImageQueryLevels %80 %1472
%1737 = OpISub %80 %1736 %90
%1738 = OpImageQueryLod %40 %1481 %1733
%1739 = OpCompositeExtract %5 %1738 0
%1740 = OpConvertFToS %80 %1739
%1741 = OpFAdd %5 %1739 %97
%1742 = OpConvertFToS %80 %1741
%1743 = OpExtInst %80 %78 SMin %1742 %1737
%1744 = OpINotEqual %79 %1740 %1742
%1745 = OpDPdx %40 %1733
%1746 = OpDPdy %40 %1733
%1747 = OpExtInst %40 %78 FAbs %1745
%1748 = OpExtInst %40 %78 FAbs %1746
%1749 = OpCompositeExtract %5 %1747 0
%1750 = OpCompositeExtract %5 %1748 0
%1751 = OpCompositeExtract %5 %1747 1
%1752 = OpCompositeExtract %5 %1748 1
%1753 = OpExtInst %5 %78 FMax %1749 %1750
%1754 = OpExtInst %5 %78 FMax %1751 %1752
%1755 = OpCompositeConstruct %40 %1753 %1754
%1756 = OpExtInst %40 %78 Fract %1733
%1758 = OpImageQuerySizeLod %82 %1472 %1740
%1757 = OpConvertSToF %40 %1758
%1759 = OpSNegate %82 %1735
%1760 = OpExtInst %40 %78 Ldexp %1757 %1759
%1761 = OpFMul %40 %1756 %1760
%1762 = OpFMul %40 %1755 %1757
%1763 = OpFSub %40 %1762 %121
%1764 = OpExtInst %122 %78 FrexpStruct %1763
%1765 = OpCompositeExtract %82 %1764 1
%1766 = OpExtInst %82 %78 SClamp %1765 %127 %129
%1767 = OpIAdd %82 %1766 %1759
%1768 = OpExtInst %40 %78 Ldexp %133 %1767
%1769 = OpExtInst %40 %78 FMin %1768 %133
%1770 = OpFSub %40 %1761 %1769
%1771 = OpExtInst %40 %78 FMax %1770 %138
%1772 = OpFAdd %40 %1761 %1769
%1773 = OpConvertFToS %82 %1771
%1774 = OpConvertFToS %82 %1772
%1775 = OpINotEqual %81 %1773 %1774
%1776 = OpCompositeExtract %79 %1775 0
%1777 = OpCompositeExtract %79 %1775 1
%1778 = OpLogicalAnd %79 %1776 %1777
%1779 = OpSelect %80 %1776 %147 %126
%1780 = OpBitwiseOr %80 %90 %1779
%1781 = OpSelect %80 %1777 %128 %126
%1782 = OpBitwiseOr %80 %1780 %1781
%1783 = OpSelect %80 %1778 %152 %126
%1784 = OpBitwiseOr %80 %1782 %1783
%1785 = OpSConvert %18 %1784
%1786 = OpIMul %80 %1740 %128
%1787 = OpSConvert %18 %1786
%1788 = OpShiftLeftLogical %18 %1785 %1787
%1789 = OpLoad %79 %1932
%1790 = OpLogicalNot %79 %1789
%1791 = OpFunctionCall %1 %1539 %1468 %1773 %1788 %1790
%1793 = OpImageQuerySizeLod %82 %1472 %1742
%1792 = OpConvertSToF %40 %1793
%1794 = OpSNegate %82 %1735
%1795 = OpExtInst %40 %78 Ldexp %1792 %1794
%1796 = OpFMul %40 %1756 %1795
%1797 = OpFMul %40 %1755 %1792
%1798 = OpFSub %40 %1797 %121
%1799 = OpExtInst %122 %78 FrexpStruct %1798
%1800 = OpCompositeExtract %82 %1799 1
%1801 = OpExtInst %82 %78 SClamp %1800 %127 %129
%1802 = OpIAdd %82 %1801 %1794
%1803 = OpExtInst %40 %78 Ldexp %133 %1802
%1804 = OpExtInst %40 %78 FMin %1803 %133
%1805 = OpFSub %40 %1796 %1804
%1806 = OpExtInst %40 %78 FMax %1805 %138
%1807 = OpFAdd %40 %1796 %1804
%1808 = OpConvertFToS %82 %1806
%1809 = OpConvertFToS %82 %1807
%1810 = OpINotEqual %81 %1808 %1809
%1811 = OpCompositeExtract %79 %1810 0
%1812 = OpCompositeExtract %79 %1810 1
%1813 = OpLogicalAnd %79 %1811 %1812
%1814 = OpSelect %80 %1811 %147 %126
%1815 = OpBitwiseOr %80 %90 %1814
%1816 = OpSelect %80 %1812 %128 %126
%1817 = OpBitwiseOr %80 %1815 %1816
%1818 = OpSelect %80 %1813 %152 %126
%1819 = OpBitwiseOr %80 %1817 %1818
%1820 = OpSConvert %18 %1819
%1821 = OpIMul %80 %1742 %128
%1822 = OpSConvert %18 %1821
%1823 = OpShiftLeftLogical %18 %1820 %1822
%1824 = OpLoad %79 %1932
%1825 = OpLogicalNot %79 %1824
%1826 = OpLogicalAnd %79 %1825 %1744
%1827 = OpFunctionCall %1 %1539 %1468 %1808 %1823 %1826
%1828 = OpExtInst %5 %78 RoundEven %72
%1829 = OpConvertFToS %80 %1828
%1830 = OpCompositeConstruct %40 %67 %69
%1831 = OpImageQuerySize %762 %1600
%1832 = OpVectorShuffle %82 %1831 %1831 0 1
%1833 = OpBitwiseAnd %82 %1832 %87
%1834 = OpImageQueryLevels %80 %1603
%1835 = OpISub %80 %1834 %90
%1836 = OpImageQueryLod %40 %1612 %1830
%1837 = OpCompositeExtract %5 %1836 0
%1838 = OpConvertFToS %80 %1837
%1839 = OpFAdd %5 %1837 %97
%1840 = OpConvertFToS %80 %1839
%1841 = OpExtInst %80 %78 SMin %1840 %1835
%1842 = OpINotEqual %79 %1838 %1840
%1843 = OpDPdx %40 %1830
%1844 = OpDPdy %40 %1830
%1845 = OpExtInst %40 %78 FAbs %1843
%1846 = OpExtInst %40 %78 FAbs %1844
%1847 = OpCompositeExtract %5 %1845 0
%1848 = OpCompositeExtract %5 %1846 0
%1849 = OpCompositeExtract %5 %1845 1
%1850 = OpCompositeExtract %5 %1846 1
%1851 = OpExtInst %5 %78 FMax %1847 %1848
%1852 = OpExtInst %5 %78 FMax %1849 %1850
%1853 = OpCompositeConstruct %40 %1851 %1852
%1854 = OpExtInst %40 %78 Fract %1830
%1856 = OpImageQuerySizeLod %762 %1603 %1838
%1857 = OpVectorShuffle %82 %1856 %1856 0 1
%1855 = OpConvertSToF %40 %1857
%1858 = OpSNegate %82 %1833
%1859 = OpExtInst %40 %78 Ldexp %1855 %1858
%1860 = OpFMul %40 %1854 %1859
%1861 = OpFMul %40 %1853 %1855
%1862 = OpFSub %40 %1861 %121
%1863 = OpExtInst %122 %78 FrexpStruct %1862
%1864 = OpCompositeExtract %82 %1863 1
%1865 = OpExtInst %82 %78 SClamp %1864 %127 %129
%1866 = OpIAdd %82 %1865 %1858
%1867 = OpExtInst %40 %78 Ldexp %133 %1866
%1868 = OpExtInst %40 %78 FMin %1867 %133
%1869 = OpFSub %40 %1860 %1868
%1870 = OpExtInst %40 %78 FMax %1869 %138
%1871 = OpFAdd %40 %1860 %1868
%1872 = OpConvertFToS %82 %1870
%1873 = OpConvertFToS %82 %1871
%1874 = OpINotEqual %81 %1872 %1873
%1875 = OpCompositeExtract %79 %1874 0
%1876 = OpCompositeExtract %79 %1874 1
%1877 = OpLogicalAnd %79 %1875 %1876
%1878 = OpSelect %80 %1875 %147 %126
%1879 = OpBitwiseOr %80 %90 %1878
%1880 = OpSelect %80 %1876 %128 %126
%1881 = OpBitwiseOr %80 %1879 %1880
%1882 = OpSelect %80 %1877 %152 %126
%1883 = OpBitwiseOr %80 %1881 %1882
%1884 = OpSConvert %18 %1883
%1885 = OpIMul %80 %1838 %128
%1886 = OpSConvert %18 %1885
%1887 = OpShiftLeftLogical %18 %1884 %1886
%1888 = OpCompositeConstruct %762 %1872 %1829
%1889 = OpLoad %79 %1932
%1890 = OpLogicalNot %79 %1889
%1891 = OpFunctionCall %1 %1672 %1599 %1888 %1887 %1890
%1893 = OpImageQuerySizeLod %762 %1603 %1840
%1894 = OpVectorShuffle %82 %1893 %1893 0 1
%1892 = OpConvertSToF %40 %1894
%1895 = OpSNegate %82 %1833
%1896 = OpExtInst %40 %78 Ldexp %1892 %1895
%1897 = OpFMul %40 %1854 %1896
%1898 = OpFMul %40 %1853 %1892
%1899 = OpFSub %40 %1898 %121
%1900 = OpExtInst %122 %78 FrexpStruct %1899
%1901 = OpCompositeExtract %82 %1900 1
%1902 = OpExtInst %82 %78 SClamp %1901 %127 %129
%1903 = OpIAdd %82 %1902 %1895
%1904 = OpExtInst %40 %78 Ldexp %133 %1903
%1905 = OpExtInst %40 %78 FMin %1904 %133
%1906 = OpFSub %40 %1897 %1905
%1907 = OpExtInst %40 %78 FMax %1906 %138
%1908 = OpFAdd %40 %1897 %1905
%1909 = OpConvertFToS %82 %1907
%1910 = OpConvertFToS %82 %1908
%1911 = OpINotEqual %81 %1909 %1910
%1912 = OpCompositeExtract %79 %1911 0
%1913 = OpCompositeExtract %79 %1911 1
%1914 = OpLogicalAnd %79 %1912 %1913
%1915 = OpSelect %80 %1912 %147 %126
%1916 = OpBitwiseOr %80 %90 %1915
%1917 = OpSelect %80 %1913 %128 %126
%1918 = OpBitwiseOr %80 %1916 %1917
%1919 = OpSelect %80 %1914 %152 %126
%1920 = OpBitwiseOr %80 %1918 %1919
%1921 = OpSConvert %18 %1920
%1922 = OpIMul %80 %1840 %128
%1923 = OpSConvert %18 %1922
%1924 = OpShiftLeftLogical %18 %1921 %1923
%1925 = OpCompositeConstruct %762 %1909 %1829
%1926 = OpLoad %79 %1932
%1927 = OpLogicalNot %79 %1926
%1928 = OpLogicalAnd %79 %1927 %1842
%1929 = OpFunctionCall %1 %1672 %1599 %1925 %1924 %1928
OpReturn
OpFunctionEnd
%166 = OpFunction %1 None %161
%162 = OpFunctionParameter %20
%163 = OpFunctionParameter %82
%164 = OpFunctionParameter %18
%165 = OpFunctionParameter %79
%167 = OpLabel
%179 = OpVariable %178 Function %177
OpSelectionMerge %169 None
OpBranchConditional %165 %168 %169
%168 = OpLabel
%180 = OpLoad %79 %179
%181 = OpLogicalNot %79 %180
OpLoopMerge %170 %172 None
OpBranchConditional %181 %171 %170
%171 = OpLabel
%182 = OpGroupNonUniformBroadcastFirst %82 %74 %163
%183 = OpIEqual %81 %163 %182
%184 = OpAll %79 %183
OpStore %179 %184
OpSelectionMerge %174 None
OpBranchConditional %184 %173 %174
%173 = OpLabel
%185 = OpGroupNonUniformBitwiseOr %18 %74 Reduce %164
%186 = OpGroupNonUniformElect %79 %74
OpSelectionMerge %176 None
OpBranchConditional %186 %175 %176
%175 = OpLabel
%187 = OpImageTexelPointer %160 %162 %163 %126
%188 = OpAtomicOr %18 %187 %60 %57 %185
OpBranch %176
%176 = OpLabel
OpBranch %174
%174 = OpLabel
OpBranch %172
%172 = OpLabel
OpBranch %168
%170 = OpLabel
OpBranch %169
%169 = OpLabel
OpReturn
OpFunctionEnd
%831 = OpFunction %1 None %826
%827 = OpFunctionParameter %23
%828 = OpFunctionParameter %762
%829 = OpFunctionParameter %18
%830 = OpFunctionParameter %79
%832 = OpLabel
%842 = OpVariable %178 Function %177
OpSelectionMerge %834 None
OpBranchConditional %830 %833 %834
%833 = OpLabel
%843 = OpLoad %79 %842
%844 = OpLogicalNot %79 %843
OpLoopMerge %835 %837 None
OpBranchConditional %844 %836 %835
%836 = OpLabel
%845 = OpGroupNonUniformBroadcastFirst %762 %74 %828
%846 = OpIEqual %825 %828 %845
%847 = OpAll %79 %846
OpStore %842 %847
OpSelectionMerge %839 None
OpBranchConditional %847 %838 %839
%838 = OpLabel
%848 = OpGroupNonUniformBitwiseOr %18 %74 Reduce %829
%849 = OpGroupNonUniformElect %79 %74
OpSelectionMerge %841 None
OpBranchConditional %849 %840 %841
%840 = OpLabel
%850 = OpImageTexelPointer %160 %827 %828 %126
%851 = OpAtomicOr %18 %850 %60 %57 %848
OpBranch %841
%841 = OpLabel
OpBranch %839
%839 = OpLabel
OpBranch %837
%837 = OpLabel
OpBranch %833
%835 = OpLabel
OpBranch %834
%834 = OpLabel
OpReturn
OpFunctionEnd
%1539 = OpFunction %1 None %161
%1535 = OpFunctionParameter %20
%1536 = OpFunctionParameter %82
%1537 = OpFunctionParameter %18
%1538 = OpFunctionParameter %79
%1540 = OpLabel
%1550 = OpVariable %178 Function %177
OpSelectionMerge %1542 None
OpBranchConditional %1538 %1541 %1542
%1541 = OpLabel
%1551 = OpLoad %79 %1550
%1552 = OpLogicalNot %79 %1551
OpLoopMerge %1543 %1545 None
OpBranchConditional %1552 %1544 %1543
%1544 = OpLabel
%1553 = OpGroupNonUniformBroadcastFirst %82 %74 %1536
%1554 = OpIEqual %81 %1536 %1553
%1555 = OpAll %79 %1554
OpStore %1550 %1555
OpSelectionMerge %1547 None
OpBranchConditional %1555 %1546 %1547
%1546 = OpLabel
%1556 = OpGroupNonUniformBitwiseOr %18 %74 Reduce %1537
%1557 = OpGroupNonUniformElect %79 %74
OpSelectionMerge %1549 None
OpBranchConditional %1557 %1548 %1549
%1548 = OpLabel
%1558 = OpImageTexelPointer %160 %1535 %1536 %126
%1559 = OpAtomicOr %18 %1558 %60 %57 %1556
OpBranch %1549
%1549 = OpLabel
OpBranch %1547
%1547 = OpLabel
OpBranch %1545
%1545 = OpLabel
OpBranch %1541
%1543 = OpLabel
OpBranch %1542
%1542 = OpLabel
OpReturn
OpFunctionEnd
%1672 = OpFunction %1 None %826
%1668 = OpFunctionParameter %23
%1669 = OpFunctionParameter %762
%1670 = OpFunctionParameter %18
%1671 = OpFunctionParameter %79
%1673 = OpLabel
%1683 = OpVariable %178 Function %177
OpSelectionMerge %1675 None
OpBranchConditional %1671 %1674 %1675
%1674 = OpLabel
%1684 = OpLoad %79 %1683
%1685 = OpLogicalNot %79 %1684
OpLoopMerge %1676 %1678 None
OpBranchConditional %1685 %1677 %1676
%1677 = OpLabel
%1686 = OpGroupNonUniformBroadcastFirst %762 %74 %1669
%1687 = OpIEqual %825 %1669 %1686
%1688 = OpAll %79 %1687
OpStore %1683 %1688
OpSelectionMerge %1680 None
OpBranchConditional %1688 %1679 %1680
%1679 = OpLabel
%1689 = OpGroupNonUniformBitwiseOr %18 %74 Reduce %1670
%1690 = OpGroupNonUniformElect %79 %74
OpSelectionMerge %1682 None
OpBranchConditional %1690 %1681 %1682
%1681 = OpLabel
%1691 = OpImageTexelPointer %160 %1668 %1669 %126
%1692 = OpAtomicOr %18 %1691 %60 %57 %1689
OpBranch %1682
%1682 = OpLabel
OpBranch %1680
%1680 = OpLabel
OpBranch %1678
%1678 = OpLabel
OpBranch %1674
%1676 = OpLabel
OpBranch %1675
%1675 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/sampler-feedback/sampler-feedback.sm66.frag
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_shader_image_int64 : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_samplerless_texture_functions : require
#extension GL_EXT_spirv_intrinsics : require
#extension GL_KHR_shader_subgroup_quad : require

struct ResType
{
    vec2 _m0;
    ivec2 _m1;
};

layout(set = 0, binding = 0, r64ui) uniform readonly writeonly u64image2D _9[];
layout(set = 0, binding = 0) uniform texture2D _13;
layout(set = 0, binding = 1) uniform texture2DArray _16;
layout(set = 1, binding = 0) uniform texture2D _19[];
layout(set = 2, binding = 0) uniform texture2DArray _22[];
layout(set = 0, binding = 0, r64ui) uniform readonly writeonly u64image2D _24;
layout(set = 0, binding = 1, r64ui) uniform readonly writeonly u64image2DArray _27;
layout(set = 1, binding = 0, r64ui) uniform readonly writeonly u64image2D _30[];
layout(set = 2, binding = 0, r64ui) uniform readonly writeonly u64image2DArray _33[];
layout(set = 0, binding = 0) uniform sampler _36;
layout(set = 1, binding = 0) uniform sampler _39[];

layout(location = 1) in vec2 GRADX;
layout(location = 1, component = 2) in vec2 GRADY;
layout(location = 2) in float CLAMP;
layout(location = 3) flat in uint IDX;

spirv_instruction(set = "GLSL.std.450", id = 81) float spvNClamp(float, float, float);
spirv_instruction(set = "GLSL.std.450", id = 81) vec2 spvNClamp(vec2, vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) vec3 spvNClamp(vec3, vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) vec4 spvNClamp(vec4, vec4, vec4);

void WriteFeedback(u64image2D img, ivec2 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _185 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _185;
            if (_185)
            {
                uint64_t _186 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _189 = imageAtomicOr(img, coord, _186);
                }
            }
        }
    }
}

void WriteFeedbackArray(u64image2DArray img, ivec3 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _872 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _872;
            if (_872)
            {
                uint64_t _873 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _876 = imageAtomicOr(img, coord, _873);
                }
            }
        }
    }
}

void WriteFeedbackNonUniform(u64image2D img, ivec2 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _1601 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _1601;
            if (_1601)
            {
                uint64_t _1602 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _1605 = imageAtomicOr(img, coord, _1602);
                }
            }
        }
    }
}

void WriteFeedbackArrayNonUniform(u64image2DArray img, ivec3 coord, uint64_t value, bool active_lane)
{
    bool is_done = false;
    if (active_lane)
    {
        while (!is_done)
        {
            bool _1736 = all(equal(coord, subgroupBroadcastFirst(coord)));
            is_done = _1736;
            if (_1736)
            {
                uint64_t _1737 = subgroupOr(value);
                if (subgroupElect())
                {
                    uint64_t _1740 = imageAtomicOr(img, coord, _1737);
                }
            }
        }
    }
}

void main()
{
    float _74 = 1.0 / gl_FragCoord.w;
    vec2 _84 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _86 = imageSize(_24) & ivec2(15);
    vec2 _94 = textureQueryLod(sampler2D(_13, _36), _84);
    float _95 = _94.x;
    int _96 = int(_95);
    int _99 = int(_95 + 0.9970703125);
    vec2 _104 = abs(dFdx(_84));
    vec2 _105 = abs(dFdy(_84));
    vec2 _112 = vec2(max(_104.x, _105.x), max(_104.y, _105.y));
    vec2 _113 = fract(_84);
    vec2 _114 = vec2(textureSize(_13, _96));
    ivec2 _116 = -_86;
    vec2 _118 = _113 * ldexp(_114, _116);
    ResType _124;
    _124._m0 = frexp((_112 * _114) - vec2(0.00390625), _124._m1);
    vec2 _135 = min(ldexp(vec2(0.5), clamp(_124._m1, ivec2(0), ivec2(4)) + _116), vec2(0.5));
    ivec2 _141 = ivec2(max(_118 - _135, vec2(0.0)));
    bvec2 _143 = notEqual(_141, ivec2(_118 + _135));
    bool _144 = _143.x;
    bool _145 = _143.y;
    WriteFeedback(_24, _141, uint64_t(((1 | (_144 ? 2 : 0)) | (_145 ? 4 : 0)) | ((_144 && _145) ? 8 : 0)) << uint64_t(_96 * 4), !gl_HelperInvocation);
    vec2 _192 = vec2(textureSize(_13, _99));
    ivec2 _194 = -_86;
    vec2 _196 = _113 * ldexp(_192, _194);
    ResType _199;
    _199._m0 = frexp((_112 * _192) - vec2(0.00390625), _199._m1);
    vec2 _204 = min(ldexp(vec2(0.5), clamp(_199._m1, ivec2(0), ivec2(4)) + _194), vec2(0.5));
    ivec2 _208 = ivec2(max(_196 - _204, vec2(0.0)));
    bvec2 _210 = notEqual(_208, ivec2(_196 + _204));
    bool _211 = _210.x;
    bool _212 = _210.y;
    WriteFeedback(_24, _208, uint64_t(((1 | (_211 ? 2 : 0)) | (_212 ? 4 : 0)) | ((_211 && _212) ? 8 : 0)) << uint64_t(_99 * 4), (!gl_HelperInvocation) && (_96 != _99));
    vec2 _231 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _233 = imageSize(_24) & ivec2(15);
    float _240 = min(max(textureQueryLod(sampler2D(_13, _36), _231).x, CLAMP), 14.0);
    int _242 = int(_240);
    int _244 = int(_240 + 0.9970703125);
    vec2 _249 = abs(dFdx(_231));
    vec2 _250 = abs(dFdy(_231));
    vec2 _257 = vec2(max(_249.x, _250.x), max(_249.y, _250.y));
    vec2 _258 = fract(_231);
    vec2 _259 = vec2(textureSize(_13, _242));
    ivec2 _261 = -_233;
    vec2 _263 = _258 * ldexp(_259, _261);
    ResType _266;
    _266._m0 = frexp((_257 * _259) - vec2(0.00390625), _266._m1);
    vec2 _271 = min(ldexp(vec2(0.5), clamp(_266._m1, ivec2(0), ivec2(4)) + _261), vec2(0.5));
    ivec2 _275 = ivec2(max(_263 - _271, vec2(0.0)));
    bvec2 _277 = notEqual(_275, ivec2(_263 + _271));
    bool _278 = _277.x;
    bool _279 = _277.y;
    WriteFeedback(_24, _275, uint64_t(((1 | (_278 ? 2 : 0)) | (_279 ? 4 : 0)) | ((_278 && _279) ? 8 : 0)) << uint64_t(_242 * 4), !gl_HelperInvocation);
    vec2 _294 = vec2(textureSize(_13, _244));
    ivec2 _296 = -_233;
    vec2 _298 = _258 * ldexp(_294, _296);
    ResType _301;
    _301._m0 = frexp((_257 * _294) - vec2(0.00390625), _301._m1);
    vec2 _306 = min(ldexp(vec2(0.5), clamp(_301._m1, ivec2(0), ivec2(4)) + _296), vec2(0.5));
    ivec2 _310 = ivec2(max(_298 - _306, vec2(0.0)));
    bvec2 _312 = notEqual(_310, ivec2(_298 + _306));
    bool _313 = _312.x;
    bool _314 = _312.y;
    WriteFeedback(_24, _310, uint64_t(((1 | (_313 ? 2 : 0)) | (_314 ? 4 : 0)) | ((_313 && _314) ? 8 : 0)) << uint64_t(_244 * 4), (!gl_HelperInvocation) && (_242 != _244));
    ivec2 _335 = imageSize(_24) & ivec2(15);
    int _337 = textureQueryLevels(_13) - 1;
    float _339 = spvNClamp(gl_FragCoord.z, 0.0, float(_337));
    int _340 = int(_339);
    int _342 = int(_339 + 0.9970703125);
    vec2 _345 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _348 = -_335;
    vec2 _350 = _345 * ldexp(vec2(textureSize(_13, _340)), _348);
    vec2 _351 = ldexp(vec2(0.5), _348);
    ivec2 _355 = ivec2(max(_350 - _351, vec2(0.0)));
    bvec2 _357 = notEqual(_355, ivec2(_350 + _351));
    bool _358 = _357.x;
    bool _359 = _357.y;
    WriteFeedback(_24, _355, uint64_t(((1 | (_358 ? 2 : 0)) | (_359 ? 4 : 0)) | ((_358 && _359) ? 8 : 0)) << uint64_t(_340 * 4), !gl_HelperInvocation);
    ivec2 _376 = -_335;
    vec2 _378 = _345 * ldexp(vec2(textureSize(_13, _342)), _376);
    vec2 _379 = ldexp(vec2(0.5), _376);
    ivec2 _383 = ivec2(max(_378 - _379, vec2(0.0)));
    bvec2 _385 = notEqual(_383, ivec2(_378 + _379));
    bool _386 = _385.x;
    bool _387 = _385.y;
    WriteFeedback(_24, _383, uint64_t(((1 | (_386 ? 2 : 0)) | (_387 ? 4 : 0)) | ((_386 && _387) ? 8 : 0)) << uint64_t(_342 * 4), (!gl_HelperInvocation) && (_340 != _342));
    ivec2 _408 = imageSize(_24) & ivec2(15);
    int _410 = textureQueryLevels(_13) - 1;
    vec2 _411 = vec2(textureSize(_13, 0));
    vec2 _415 = _411 * vec2(GRADX.x, GRADX.y);
    vec2 _416 = _411 * vec2(GRADY.x, GRADY.y);
    float _423 = spvNClamp(log2(max(dot(_415, _415), dot(_416, _416))) * 0.5, 0.0, float(_410));
    int _424 = int(_423);
    int _426 = int(_423 + 0.9970703125);
    vec2 _429 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _432 = -_408;
    vec2 _434 = _429 * ldexp(vec2(textureSize(_13, _424)), _432);
    vec2 _435 = ldexp(vec2(0.5), _432);
    ivec2 _439 = ivec2(max(_434 - _435, vec2(0.0)));
    bvec2 _441 = notEqual(_439, ivec2(_434 + _435));
    bool _442 = _441.x;
    bool _443 = _441.y;
    WriteFeedback(_24, _439, uint64_t(((1 | (_442 ? 2 : 0)) | (_443 ? 4 : 0)) | ((_442 && _443) ? 8 : 0)) << uint64_t(_424 * 4), !gl_HelperInvocation);
    ivec2 _460 = -_408;
    vec2 _462 = _429 * ldexp(vec2(textureSize(_13, _426)), _460);
    vec2 _463 = ldexp(vec2(0.5), _460);
    ivec2 _467 = ivec2(max(_462 - _463, vec2(0.0)));
    bvec2 _469 = notEqual(_467, ivec2(_462 + _463));
    bool _470 = _469.x;
    bool _471 = _469.y;
    WriteFeedback(_24, _467, uint64_t(((1 | (_470 ? 2 : 0)) | (_471 ? 4 : 0)) | ((_470 && _471) ? 8 : 0)) << uint64_t(_426 * 4), (!gl_HelperInvocation) && (_424 != _426));
    ivec2 _492 = imageSize(_24) & ivec2(15);
    int _494 = textureQueryLevels(_13) - 1;
    vec2 _495 = vec2(textureSize(_13, 0));
    vec2 _499 = _495 * vec2(GRADX.x, GRADX.y);
    vec2 _500 = _495 * vec2(GRADY.x, GRADY.y);
    float _509 = min(max(spvNClamp(log2(max(dot(_499, _499), dot(_500, _500))) * 0.5, 0.0, float(_494)), CLAMP), 14.0);
    int _510 = int(_509);
    int _512 = int(_509 + 0.9970703125);
    vec2 _515 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _518 = -_492;
    vec2 _520 = _515 * ldexp(vec2(textureSize(_13, _510)), _518);
    vec2 _521 = ldexp(vec2(0.5), _518);
    ivec2 _525 = ivec2(max(_520 - _521, vec2(0.0)));
    bvec2 _527 = notEqual(_525, ivec2(_520 + _521));
    bool _528 = _527.x;
    bool _529 = _527.y;
    WriteFeedback(_24, _525, uint64_t(((1 | (_528 ? 2 : 0)) | (_529 ? 4 : 0)) | ((_528 && _529) ? 8 : 0)) << uint64_t(_510 * 4), !gl_HelperInvocation);
    ivec2 _546 = -_492;
    vec2 _548 = _515 * ldexp(vec2(textureSize(_13, _512)), _546);
    vec2 _549 = ldexp(vec2(0.5), _546);
    ivec2 _553 = ivec2(max(_548 - _549, vec2(0.0)));
    bvec2 _555 = notEqual(_553, ivec2(_548 + _549));
    bool _556 = _555.x;
    bool _557 = _555.y;
    WriteFeedback(_24, _553, uint64_t(((1 | (_556 ? 2 : 0)) | (_557 ? 4 : 0)) | ((_556 && _557) ? 8 : 0)) << uint64_t(_512 * 4), (!gl_HelperInvocation) && (_510 != _512));
    vec2 _576 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _578 = imageSize(_24) & ivec2(15);
    float _582 = exp2(gl_FragCoord.z);
    vec2 _585 = textureQueryLod(sampler2D(_13, _36), _576 * subgroupQuadBroadcast(_582, 0u));
    float _586 = _585.x;
    int _587 = int(_586);
    int _589 = int(_586 + 0.9970703125);
    vec2 _594 = abs(dFdx(_576));
    vec2 _595 = abs(dFdy(_576));
    vec2 _603 = vec2(max(_594.x, _595.x), max(_594.y, _595.y)) * _582;
    vec2 _604 = fract(_576);
    vec2 _605 = vec2(textureSize(_13, _587));
    ivec2 _607 = -_578;
    vec2 _609 = _604 * ldexp(_605, _607);
    ResType _612;
    _612._m0 = frexp((_603 * _605) - vec2(0.00390625), _612._m1);
    vec2 _617 = min(ldexp(vec2(0.5), clamp(_612._m1, ivec2(0), ivec2(4)) + _607), vec2(0.5));
    ivec2 _621 = ivec2(max(_609 - _617, vec2(0.0)));
    bvec2 _623 = notEqual(_621, ivec2(_609 + _617));
    bool _624 = _623.x;
    bool _625 = _623.y;
    WriteFeedback(_24, _621, uint64_t(((1 | (_624 ? 2 : 0)) | (_625 ? 4 : 0)) | ((_624 && _625) ? 8 : 0)) << uint64_t(_587 * 4), !gl_HelperInvocation);
    vec2 _640 = vec2(textureSize(_13, _589));
    ivec2 _642 = -_578;
    vec2 _644 = _604 * ldexp(_640, _642);
    ResType _647;
    _647._m0 = frexp((_603 * _640) - vec2(0.00390625), _647._m1);
    vec2 _652 = min(ldexp(vec2(0.5), clamp(_647._m1, ivec2(0), ivec2(4)) + _642), vec2(0.5));
    ivec2 _656 = ivec2(max(_644 - _652, vec2(0.0)));
    bvec2 _658 = notEqual(_656, ivec2(_644 + _652));
    bool _659 = _658.x;
    bool _660 = _658.y;
    WriteFeedback(_24, _656, uint64_t(((1 | (_659 ? 2 : 0)) | (_660 ? 4 : 0)) | ((_659 && _660) ? 8 : 0)) << uint64_t(_589 * 4), (!gl_HelperInvocation) && (_587 != _589));
    vec2 _679 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _681 = imageSize(_24) & ivec2(15);
    float _685 = exp2(gl_FragCoord.z);
    float _691 = min(max(textureQueryLod(sampler2D(_13, _36), _679 * subgroupQuadBroadcast(_685, 0u)).x, CLAMP), 14.0);
    int _692 = int(_691);
    int _694 = int(_691 + 0.9970703125);
    vec2 _699 = abs(dFdx(_679));
    vec2 _700 = abs(dFdy(_679));
    vec2 _708 = vec2(max(_699.x, _700.x), max(_699.y, _700.y)) * _685;
    vec2 _709 = fract(_679);
    vec2 _710 = vec2(textureSize(_13, _692));
    ivec2 _712 = -_681;
    vec2 _714 = _709 * ldexp(_710, _712);
    ResType _717;
    _717._m0 = frexp((_708 * _710) - vec2(0.00390625), _717._m1);
    vec2 _722 = min(ldexp(vec2(0.5), clamp(_717._m1, ivec2(0), ivec2(4)) + _712), vec2(0.5));
    ivec2 _726 = ivec2(max(_714 - _722, vec2(0.0)));
    bvec2 _728 = notEqual(_726, ivec2(_714 + _722));
    bool _729 = _728.x;
    bool _730 = _728.y;
    WriteFeedback(_24, _726, uint64_t(((1 | (_729 ? 2 : 0)) | (_730 ? 4 : 0)) | ((_729 && _730) ? 8 : 0)) << uint64_t(_692 * 4), !gl_HelperInvocation);
    vec2 _745 = vec2(textureSize(_13, _694));
    ivec2 _747 = -_681;
    vec2 _749 = _709 * ldexp(_745, _747);
    ResType _752;
    _752._m0 = frexp((_708 * _745) - vec2(0.00390625), _752._m1);
    vec2 _757 = min(ldexp(vec2(0.5), clamp(_752._m1, ivec2(0), ivec2(4)) + _747), vec2(0.5));
    ivec2 _761 = ivec2(max(_749 - _757, vec2(0.0)));
    bvec2 _763 = notEqual(_761, ivec2(_749 + _757));
    bool _764 = _763.x;
    bool _765 = _763.y;
    WriteFeedback(_24, _761, uint64_t(((1 | (_764 ? 2 : 0)) | (_765 ? 4 : 0)) | ((_764 && _765) ? 8 : 0)) << uint64_t(_694 * 4), (!gl_HelperInvocation) && (_692 != _694));
    int _785 = int(roundEven(gl_FragCoord.z));
    vec2 _786 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _790 = imageSize(_27).xy & ivec2(15);
    vec2 _795 = textureQueryLod(sampler2DArray(_16, _36), _786);
    float _796 = _795.x;
    int _797 = int(_796);
    int _799 = int(_796 + 0.9970703125);
    vec2 _804 = abs(dFdx(_786));
    vec2 _805 = abs(dFdy(_786));
    vec2 _812 = vec2(max(_804.x, _805.x), max(_804.y, _805.y));
    vec2 _813 = fract(_786);
    vec2 _814 = vec2(textureSize(_16, _797).xy);
    ivec2 _817 = -_790;
    vec2 _819 = _813 * ldexp(_814, _817);
    ResType _822;
    _822._m0 = frexp((_812 * _814) - vec2(0.00390625), _822._m1);
    vec2 _827 = min(ldexp(vec2(0.5), clamp(_822._m1, ivec2(0), ivec2(4)) + _817), vec2(0.5));
    ivec2 _831 = ivec2(max(_819 - _827, vec2(0.0)));
    bvec2 _833 = notEqual(_831, ivec2(_819 + _827));
    bool _834 = _833.x;
    bool _835 = _833.y;
    WriteFeedbackArray(_27, ivec3(_831, _785), uint64_t(((1 | (_834 ? 2 : 0)) | (_835 ? 4 : 0)) | ((_834 && _835) ? 8 : 0)) << uint64_t(_797 * 4), !gl_HelperInvocation);
    vec2 _879 = vec2(textureSize(_16, _799).xy);
    ivec2 _882 = -_790;
    vec2 _884 = _813 * ldexp(_879, _882);
    ResType _887;
    _887._m0 = frexp((_812 * _879) - vec2(0.00390625), _887._m1);
    vec2 _892 = min(ldexp(vec2(0.5), clamp(_887._m1, ivec2(0), ivec2(4)) + _882), vec2(0.5));
    ivec2 _896 = ivec2(max(_884 - _892, vec2(0.0)));
    bvec2 _898 = notEqual(_896, ivec2(_884 + _892));
    bool _899 = _898.x;
    bool _900 = _898.y;
    WriteFeedbackArray(_27, ivec3(_896, _785), uint64_t(((1 | (_899 ? 2 : 0)) | (_900 ? 4 : 0)) | ((_899 && _900) ? 8 : 0)) << uint64_t(_799 * 4), (!gl_HelperInvocation) && (_797 != _799));
    int _921 = int(roundEven(gl_FragCoord.z));
    vec2 _922 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _925 = imageSize(_27).xy & ivec2(15);
    float _932 = min(max(textureQueryLod(sampler2DArray(_16, _36), _922).x, CLAMP), 14.0);
    int _933 = int(_932);
    int _935 = int(_932 + 0.9970703125);
    vec2 _940 = abs(dFdx(_922));
    vec2 _941 = abs(dFdy(_922));
    vec2 _948 = vec2(max(_940.x, _941.x), max(_940.y, _941.y));
    vec2 _949 = fract(_922);
    vec2 _950 = vec2(textureSize(_16, _933).xy);
    ivec2 _953 = -_925;
    vec2 _955 = _949 * ldexp(_950, _953);
    ResType _958;
    _958._m0 = frexp((_948 * _950) - vec2(0.00390625), _958._m1);
    vec2 _963 = min(ldexp(vec2(0.5), clamp(_958._m1, ivec2(0), ivec2(4)) + _953), vec2(0.5));
    ivec2 _967 = ivec2(max(_955 - _963, vec2(0.0)));
    bvec2 _969 = notEqual(_967, ivec2(_955 + _963));
    bool _970 = _969.x;
    bool _971 = _969.y;
    WriteFeedbackArray(_27, ivec3(_967, _921), uint64_t(((1 | (_970 ? 2 : 0)) | (_971 ? 4 : 0)) | ((_970 && _971) ? 8 : 0)) << uint64_t(_933 * 4), !gl_HelperInvocation);
    vec2 _987 = vec2(textureSize(_16, _935).xy);
    ivec2 _990 = -_925;
    vec2 _992 = _949 * ldexp(_987, _990);
    ResType _995;
    _995._m0 = frexp((_948 * _987) - vec2(0.00390625), _995._m1);
    vec2 _1000 = min(ldexp(vec2(0.5), clamp(_995._m1, ivec2(0), ivec2(4)) + _990), vec2(0.5));
    ivec2 _1004 = ivec2(max(_992 - _1000, vec2(0.0)));
    bvec2 _1006 = notEqual(_1004, ivec2(_992 + _1000));
    bool _1007 = _1006.x;
    bool _1008 = _1006.y;
    WriteFeedbackArray(_27, ivec3(_1004, _921), uint64_t(((1 | (_1007 ? 2 : 0)) | (_1008 ? 4 : 0)) | ((_1007 && _1008) ? 8 : 0)) << uint64_t(_935 * 4), (!gl_HelperInvocation) && (_933 != _935));
    int _1029 = int(roundEven(gl_FragCoord.z));
    ivec2 _1033 = imageSize(_27).xy & ivec2(15);
    int _1035 = textureQueryLevels(_16) - 1;
    float _1037 = spvNClamp(_74, 0.0, float(_1035));
    int _1038 = int(_1037);
    int _1040 = int(_1037 + 0.9970703125);
    vec2 _1043 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _1047 = -_1033;
    vec2 _1049 = _1043 * ldexp(vec2(textureSize(_16, _1038).xy), _1047);
    vec2 _1050 = ldexp(vec2(0.5), _1047);
    ivec2 _1054 = ivec2(max(_1049 - _1050, vec2(0.0)));
    bvec2 _1056 = notEqual(_1054, ivec2(_1049 + _1050));
    bool _1057 = _1056.x;
    bool _1058 = _1056.y;
    WriteFeedbackArray(_27, ivec3(_1054, _1029), uint64_t(((1 | (_1057 ? 2 : 0)) | (_1058 ? 4 : 0)) | ((_1057 && _1058) ? 8 : 0)) << uint64_t(_1038 * 4), !gl_HelperInvocation);
    ivec2 _1077 = -_1033;
    vec2 _1079 = _1043 * ldexp(vec2(textureSize(_16, _1040).xy), _1077);
    vec2 _1080 = ldexp(vec2(0.5), _1077);
    ivec2 _1084 = ivec2(max(_1079 - _1080, vec2(0.0)));
    bvec2 _1086 = notEqual(_1084, ivec2(_1079 + _1080));
    bool _1087 = _1086.x;
    bool _1088 = _1086.y;
    WriteFeedbackArray(_27, ivec3(_1084, _1029), uint64_t(((1 | (_1087 ? 2 : 0)) | (_1088 ? 4 : 0)) | ((_1087 && _1088) ? 8 : 0)) << uint64_t(_1040 * 4), (!gl_HelperInvocation) && (_1038 != _1040));
    int _1109 = int(roundEven(gl_FragCoord.z));
    ivec2 _1113 = imageSize(_27).xy & ivec2(15);
    int _1115 = textureQueryLevels(_16) - 1;
    vec2 _1116 = vec2(textureSize(_16, 0).xy);
    vec2 _1121 = _1116 * vec2(GRADX.x, GRADX.y);
    vec2 _1122 = _1116 * vec2(GRADY.x, GRADY.y);
    float _1129 = spvNClamp(log2(max(dot(_1121, _1121), dot(_1122, _1122))) * 0.5, 0.0, float(_1115));
    int _1130 = int(_1129);
    int _1132 = int(_1129 + 0.9970703125);
    vec2 _1135 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _1139 = -_1113;
    vec2 _1141 = _1135 * ldexp(vec2(textureSize(_16, _1130).xy), _1139);
    vec2 _1142 = ldexp(vec2(0.5), _1139);
    ivec2 _1146 = ivec2(max(_1141 - _1142, vec2(0.0)));
    bvec2 _1148 = notEqual(_1146, ivec2(_1141 + _1142));
    bool _1149 = _1148.x;
    bool _1150 = _1148.y;
    WriteFeedbackArray(_27, ivec3(_1146, _1109), uint64_t(((1 | (_1149 ? 2 : 0)) | (_1150 ? 4 : 0)) | ((_1149 && _1150) ? 8 : 0)) << uint64_t(_1130 * 4), !gl_HelperInvocation);
    ivec2 _1169 = -_1113;
    vec2 _1171 = _1135 * ldexp(vec2(textureSize(_16, _1132).xy), _1169);
    vec2 _1172 = ldexp(vec2(0.5), _1169);
    ivec2 _1176 = ivec2(max(_1171 - _1172, vec2(0.0)));
    bvec2 _1178 = notEqual(_1176, ivec2(_1171 + _1172));
    bool _1179 = _1178.x;
    bool _1180 = _1178.y;
    WriteFeedbackArray(_27, ivec3(_1176, _1109), uint64_t(((1 | (_1179 ? 2 : 0)) | (_1180 ? 4 : 0)) | ((_1179 && _1180) ? 8 : 0)) << uint64_t(_1132 * 4), (!gl_HelperInvocation) && (_1130 != _1132));
    int _1201 = int(roundEven(gl_FragCoord.z));
    ivec2 _1205 = imageSize(_27).xy & ivec2(15);
    int _1207 = textureQueryLevels(_16) - 1;
    vec2 _1208 = vec2(textureSize(_16, 0).xy);
    vec2 _1213 = _1208 * vec2(GRADX.x, GRADX.y);
    vec2 _1214 = _1208 * vec2(GRADY.x, GRADY.y);
    float _1223 = min(max(spvNClamp(log2(max(dot(_1213, _1213), dot(_1214, _1214))) * 0.5, 0.0, float(_1207)), CLAMP), 14.0);
    int _1224 = int(_1223);
    int _1226 = int(_1223 + 0.9970703125);
    vec2 _1229 = fract(vec2(gl_FragCoord.x, gl_FragCoord.y));
    ivec2 _1233 = -_1205;
    vec2 _1235 = _1229 * ldexp(vec2(textureSize(_16, _1224).xy), _1233);
    vec2 _1236 = ldexp(vec2(0.5), _1233);
    ivec2 _1240 = ivec2(max(_1235 - _1236, vec2(0.0)));
    bvec2 _1242 = notEqual(_1240, ivec2(_1235 + _1236));
    bool _1243 = _1242.x;
    bool _1244 = _1242.y;
    WriteFeedbackArray(_27, ivec3(_1240, _1201), uint64_t(((1 | (_1243 ? 2 : 0)) | (_1244 ? 4 : 0)) | ((_1243 && _1244) ? 8 : 0)) << uint64_t(_1224 * 4), !gl_HelperInvocation);
    ivec2 _1263 = -_1205;
    vec2 _1265 = _1229 * ldexp(vec2(textureSize(_16, _1226).xy), _1263);
    vec2 _1266 = ldexp(vec2(0.5), _1263);
    ivec2 _1270 = ivec2(max(_1265 - _1266, vec2(0.0)));
    bvec2 _1272 = notEqual(_1270, ivec2(_1265 + _1266));
    bool _1273 = _1272.x;
    bool _1274 = _1272.y;
    WriteFeedbackArray(_27, ivec3(_1270, _1201), uint64_t(((1 | (_1273 ? 2 : 0)) | (_1274 ? 4 : 0)) | ((_1273 && _1274) ? 8 : 0)) << uint64_t(_1226 * 4), (!gl_HelperInvocation) && (_1224 != _1226));
    int _1295 = int(roundEven(gl_FragCoord.z));
    vec2 _1296 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1299 = imageSize(_27).xy & ivec2(15);
    float _1303 = exp2(gl_FragCoord.z);
    vec2 _1306 = textureQueryLod(sampler2DArray(_16, _36), _1296 * subgroupQuadBroadcast(_1303, 0u));
    float _1307 = _1306.x;
    int _1308 = int(_1307);
    int _1310 = int(_1307 + 0.9970703125);
    vec2 _1315 = abs(dFdx(_1296));
    vec2 _1316 = abs(dFdy(_1296));
    vec2 _1324 = vec2(max(_1315.x, _1316.x), max(_1315.y, _1316.y)) * _1303;
    vec2 _1325 = fract(_1296);
    vec2 _1326 = vec2(textureSize(_16, _1308).xy);
    ivec2 _1329 = -_1299;
    vec2 _1331 = _1325 * ldexp(_1326, _1329);
    ResType _1334;
    _1334._m0 = frexp((_1324 * _1326) - vec2(0.00390625), _1334._m1);
    vec2 _1339 = min(ldexp(vec2(0.5), clamp(_1334._m1, ivec2(0), ivec2(4)) + _1329), vec2(0.5));
    ivec2 _1343 = ivec2(max(_1331 - _1339, vec2(0.0)));
    bvec2 _1345 = notEqual(_1343, ivec2(_1331 + _1339));
    bool _1346 = _1345.x;
    bool _1347 = _1345.y;
    WriteFeedbackArray(_27, ivec3(_1343, _1295), uint64_t(((1 | (_1346 ? 2 : 0)) | (_1347 ? 4 : 0)) | ((_1346 && _1347) ? 8 : 0)) << uint64_t(_1308 * 4), !gl_HelperInvocation);
    vec2 _1363 = vec2(textureSize(_16, _1310).xy);
    ivec2 _1366 = -_1299;
    vec2 _1368 = _1325 * ldexp(_1363, _1366);
    ResType _1371;
    _1371._m0 = frexp((_1324 * _1363) - vec2(0.00390625), _1371._m1);
    vec2 _1376 = min(ldexp(vec2(0.5), clamp(_1371._m1, ivec2(0), ivec2(4)) + _1366), vec2(0.5));
    ivec2 _1380 = ivec2(max(_1368 - _1376, vec2(0.0)));
    bvec2 _1382 = notEqual(_1380, ivec2(_1368 + _1376));
    bool _1383 = _1382.x;
    bool _1384 = _1382.y;
    WriteFeedbackArray(_27, ivec3(_1380, _1295), uint64_t(((1 | (_1383 ? 2 : 0)) | (_1384 ? 4 : 0)) | ((_1383 && _1384) ? 8 : 0)) << uint64_t(_1310 * 4), (!gl_HelperInvocation) && (_1308 != _1310));
    int _1405 = int(roundEven(gl_FragCoord.z));
    vec2 _1406 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1409 = imageSize(_27).xy & ivec2(15);
    float _1413 = exp2(_74);
    float _1419 = min(max(textureQueryLod(sampler2DArray(_16, _36), _1406 * subgroupQuadBroadcast(_1413, 0u)).x, CLAMP), 14.0);
    int _1420 = int(_1419);
    int _1422 = int(_1419 + 0.9970703125);
    vec2 _1427 = abs(dFdx(_1406));
    vec2 _1428 = abs(dFdy(_1406));
    vec2 _1436 = vec2(max(_1427.x, _1428.x), max(_1427.y, _1428.y)) * _1413;
    vec2 _1437 = fract(_1406);
    vec2 _1438 = vec2(textureSize(_16, _1420).xy);
    ivec2 _1441 = -_1409;
    vec2 _1443 = _1437 * ldexp(_1438, _1441);
    ResType _1446;
    _1446._m0 = frexp((_1436 * _1438) - vec2(0.00390625), _1446._m1);
    vec2 _1451 = min(ldexp(vec2(0.5), clamp(_1446._m1, ivec2(0), ivec2(4)) + _1441), vec2(0.5));
    ivec2 _1455 = ivec2(max(_1443 - _1451, vec2(0.0)));
    bvec2 _1457 = notEqual(_1455, ivec2(_1443 + _1451));
    bool _1458 = _1457.x;
    bool _1459 = _1457.y;
    WriteFeedbackArray(_27, ivec3(_1455, _1405), uint64_t(((1 | (_1458 ? 2 : 0)) | (_1459 ? 4 : 0)) | ((_1458 && _1459) ? 8 : 0)) << uint64_t(_1420 * 4), !gl_HelperInvocation);
    vec2 _1475 = vec2(textureSize(_16, _1422).xy);
    ivec2 _1478 = -_1409;
    vec2 _1480 = _1437 * ldexp(_1475, _1478);
    ResType _1483;
    _1483._m0 = frexp((_1436 * _1475) - vec2(0.00390625), _1483._m1);
    vec2 _1488 = min(ldexp(vec2(0.5), clamp(_1483._m1, ivec2(0), ivec2(4)) + _1478), vec2(0.5));
    ivec2 _1492 = ivec2(max(_1480 - _1488, vec2(0.0)));
    bvec2 _1494 = notEqual(_1492, ivec2(_1480 + _1488));
    bool _1495 = _1494.x;
    bool _1496 = _1494.y;
    WriteFeedbackArray(_27, ivec3(_1492, _1405), uint64_t(((1 | (_1495 ? 2 : 0)) | (_1496 ? 4 : 0)) | ((_1495 && _1496) ? 8 : 0)) << uint64_t(_1422 * 4), (!gl_HelperInvocation) && (_1420 != _1422));
    uint _1513 = IDX + 0u;
    uint _1516 = IDX + 0u;
    uint _1519 = IDX + 0u;
    vec2 _1522 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1524 = imageSize(_30[nonuniformEXT(_1513)]) & ivec2(15);
    vec2 _1528 = textureQueryLod(nonuniformEXT(sampler2D(_19[_1516], _39[_1519])), _1522);
    float _1529 = _1528.x;
    int _1530 = int(_1529);
    int _1532 = int(_1529 + 0.9970703125);
    vec2 _1537 = abs(dFdx(_1522));
    vec2 _1538 = abs(dFdy(_1522));
    vec2 _1545 = vec2(max(_1537.x, _1538.x), max(_1537.y, _1538.y));
    vec2 _1546 = fract(_1522);
    vec2 _1547 = vec2(textureSize(_19[nonuniformEXT(_1516)], _1530));
    ivec2 _1549 = -_1524;
    vec2 _1551 = _1546 * ldexp(_1547, _1549);
    ResType _1554;
    _1554._m0 = frexp((_1545 * _1547) - vec2(0.00390625), _1554._m1);
    vec2 _1559 = min(ldexp(vec2(0.5), clamp(_1554._m1, ivec2(0), ivec2(4)) + _1549), vec2(0.5));
    ivec2 _1563 = ivec2(max(_1551 - _1559, vec2(0.0)));
    bvec2 _1565 = notEqual(_1563, ivec2(_1551 + _1559));
    bool _1566 = _1565.x;
    bool _1567 = _1565.y;
    WriteFeedbackNonUniform(_30[_1513], _1563, uint64_t(((1 | (_1566 ? 2 : 0)) | (_1567 ? 4 : 0)) | ((_1566 && _1567) ? 8 : 0)) << uint64_t(_1530 * 4), !gl_HelperInvocation);
    vec2 _1608 = vec2(textureSize(_19[nonuniformEXT(_1516)], _1532));
    ivec2 _1610 = -_1524;
    vec2 _1612 = _1546 * ldexp(_1608, _1610);
    ResType _1615;
    _1615._m0 = frexp((_1545 * _1608) - vec2(0.00390625), _1615._m1);
    vec2 _1620 = min(ldexp(vec2(0.5), clamp(_1615._m1, ivec2(0), ivec2(4)) + _1610), vec2(0.5));
    ivec2 _1624 = ivec2(max(_1612 - _1620, vec2(0.0)));
    bvec2 _1626 = notEqual(_1624, ivec2(_1612 + _1620));
    bool _1627 = _1626.x;
    bool _1628 = _1626.y;
    WriteFeedbackNonUniform(_30[_1513], _1624, uint64_t(((1 | (_1627 ? 2 : 0)) | (_1628 ? 4 : 0)) | ((_1627 && _1628) ? 8 : 0)) << uint64_t(_1532 * 4), (!gl_HelperInvocation) && (_1530 != _1532));
    uint _1644 = IDX + 0u;
    uint _1647 = IDX + 0u;
    int _1653 = int(roundEven(gl_FragCoord.z));
    vec2 _1654 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1657 = imageSize(_33[nonuniformEXT(_1644)]).xy & ivec2(15);
    vec2 _1661 = textureQueryLod(nonuniformEXT(sampler2DArray(_22[_1647], _39[_1519])), _1654);
    float _1662 = _1661.x;
    int _1663 = int(_1662);
    int _1665 = int(_1662 + 0.9970703125);
    vec2 _1670 = abs(dFdx(_1654));
    vec2 _1671 = abs(dFdy(_1654));
    vec2 _1678 = vec2(max(_1670.x, _1671.x), max(_1670.y, _1671.y));
    vec2 _1679 = fract(_1654);
    vec2 _1680 = vec2(textureSize(_22[nonuniformEXT(_1647)], _1663).xy);
    ivec2 _1683 = -_1657;
    vec2 _1685 = _1679 * ldexp(_1680, _1683);
    ResType _1688;
    _1688._m0 = frexp((_1678 * _1680) - vec2(0.00390625), _1688._m1);
    vec2 _1693 = min(ldexp(vec2(0.5), clamp(_1688._m1, ivec2(0), ivec2(4)) + _1683), vec2(0.5));
    ivec2 _1697 = ivec2(max(_1685 - _1693, vec2(0.0)));
    bvec2 _1699 = notEqual(_1697, ivec2(_1685 + _1693));
    bool _1700 = _1699.x;
    bool _1701 = _1699.y;
    WriteFeedbackArrayNonUniform(_33[_1644], ivec3(_1697, _1653), uint64_t(((1 | (_1700 ? 2 : 0)) | (_1701 ? 4 : 0)) | ((_1700 && _1701) ? 8 : 0)) << uint64_t(_1663 * 4), !gl_HelperInvocation);
    vec2 _1743 = vec2(textureSize(_22[nonuniformEXT(_1647)], _1665).xy);
    ivec2 _1746 = -_1657;
    vec2 _1748 = _1679 * ldexp(_1743, _1746);
    ResType _1751;
    _1751._m0 = frexp((_1678 * _1743) - vec2(0.00390625), _1751._m1);
    vec2 _1756 = min(ldexp(vec2(0.5), clamp(_1751._m1, ivec2(0), ivec2(4)) + _1746), vec2(0.5));
    ivec2 _1760 = ivec2(max(_1748 - _1756, vec2(0.0)));
    bvec2 _1762 = notEqual(_1760, ivec2(_1748 + _1756));
    bool _1763 = _1762.x;
    bool _1764 = _1762.y;
    WriteFeedbackArrayNonUniform(_33[_1644], ivec3(_1760, _1653), uint64_t(((1 | (_1763 ? 2 : 0)) | (_1764 ? 4 : 0)) | ((_1763 && _1764) ? 8 : 0)) << uint64_t(_1665 * 4), (!gl_HelperInvocation) && (_1663 != _1665));
    vec2 _1787 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1789 = imageSize(_30[nonuniformEXT(_1513)]) & ivec2(15);
    vec2 _1793 = textureQueryLod(nonuniformEXT(sampler2D(_19[_1516], _39[_1519])), _1787);
    float _1794 = _1793.x;
    int _1795 = int(_1794);
    int _1797 = int(_1794 + 0.9970703125);
    vec2 _1802 = abs(dFdx(_1787));
    vec2 _1803 = abs(dFdy(_1787));
    vec2 _1810 = vec2(max(_1802.x, _1803.x), max(_1802.y, _1803.y));
    vec2 _1811 = fract(_1787);
    vec2 _1812 = vec2(textureSize(_19[nonuniformEXT(_1516)], _1795));
    ivec2 _1814 = -_1789;
    vec2 _1816 = _1811 * ldexp(_1812, _1814);
    ResType _1819;
    _1819._m0 = frexp((_1810 * _1812) - vec2(0.00390625), _1819._m1);
    vec2 _1824 = min(ldexp(vec2(0.5), clamp(_1819._m1, ivec2(0), ivec2(4)) + _1814), vec2(0.5));
    ivec2 _1828 = ivec2(max(_1816 - _1824, vec2(0.0)));
    bvec2 _1830 = notEqual(_1828, ivec2(_1816 + _1824));
    bool _1831 = _1830.x;
    bool _1832 = _1830.y;
    WriteFeedbackNonUniform(_30[_1513], _1828, uint64_t(((1 | (_1831 ? 2 : 0)) | (_1832 ? 4 : 0)) | ((_1831 && _1832) ? 8 : 0)) << uint64_t(_1795 * 4), !gl_HelperInvocation);
    vec2 _1847 = vec2(textureSize(_19[nonuniformEXT(_1516)], _1797));
    ivec2 _1849 = -_1789;
    vec2 _1851 = _1811 * ldexp(_1847, _1849);
    ResType _1854;
    _1854._m0 = frexp((_1810 * _1847) - vec2(0.00390625), _1854._m1);
    vec2 _1859 = min(ldexp(vec2(0.5), clamp(_1854._m1, ivec2(0), ivec2(4)) + _1849), vec2(0.5));
    ivec2 _1863 = ivec2(max(_1851 - _1859, vec2(0.0)));
    bvec2 _1865 = notEqual(_1863, ivec2(_1851 + _1859));
    bool _1866 = _1865.x;
    bool _1867 = _1865.y;
    WriteFeedbackNonUniform(_30[_1513], _1863, uint64_t(((1 | (_1866 ? 2 : 0)) | (_1867 ? 4 : 0)) | ((_1866 && _1867) ? 8 : 0)) << uint64_t(_1797 * 4), (!gl_HelperInvocation) && (_1795 != _1797));
    int _1890 = int(roundEven(gl_FragCoord.z));
    vec2 _1891 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1894 = imageSize(_33[nonuniformEXT(_1644)]).xy & ivec2(15);
    vec2 _1898 = textureQueryLod(nonuniformEXT(sampler2DArray(_22[_1647], _39[_1519])), _1891);
    float _1899 = _1898.x;
    int _1900 = int(_1899);
    int _1902 = int(_1899 + 0.9970703125);
    vec2 _1907 = abs(dFdx(_1891));
    vec2 _1908 = abs(dFdy(_1891));
    vec2 _1915 = vec2(max(_1907.x, _1908.x), max(_1907.y, _1908.y));
    vec2 _1916 = fract(_1891);
    vec2 _1917 = vec2(textureSize(_22[nonuniformEXT(_1647)], _1900).xy);
    ivec2 _1920 = -_1894;
    vec2 _1922 = _1916 * ldexp(_1917, _1920);
    ResType _1925;
    _1925._m0 = frexp((_1915 * _1917) - vec2(0.00390625), _1925._m1);
    vec2 _1930 = min(ldexp(vec2(0.5), clamp(_1925._m1, ivec2(0), ivec2(4)) + _1920), vec2(0.5));
    ivec2 _1934 = ivec2(max(_1922 - _1930, vec2(0.0)));
    bvec2 _1936 = notEqual(_1934, ivec2(_1922 + _1930));
    bool _1937 = _1936.x;
    bool _1938 = _1936.y;
    WriteFeedbackArrayNonUniform(_33[_1644], ivec3(_1934, _1890), uint64_t(((1 | (_1937 ? 2 : 0)) | (_1938 ? 4 : 0)) | ((_1937 && _1938) ? 8 : 0)) << uint64_t(_1900 * 4), !gl_HelperInvocation);
    vec2 _1954 = vec2(textureSize(_22[nonuniformEXT(_1647)], _1902).xy);
    ivec2 _1957 = -_1894;
    vec2 _1959 = _1916 * ldexp(_1954, _1957);
    ResType _1962;
    _1962._m0 = frexp((_1915 * _1954) - vec2(0.00390625), _1962._m1);
    vec2 _1967 = min(ldexp(vec2(0.5), clamp(_1962._m1, ivec2(0), ivec2(4)) + _1957), vec2(0.5));
    ivec2 _1971 = ivec2(max(_1959 - _1967, vec2(0.0)));
    bvec2 _1973 = notEqual(_1971, ivec2(_1959 + _1967));
    bool _1974 = _1973.x;
    bool _1975 = _1973.y;
    WriteFeedbackArrayNonUniform(_33[_1644], ivec3(_1971, _1890), uint64_t(((1 | (_1974 ? 2 : 0)) | (_1975 ? 4 : 0)) | ((_1974 && _1975) ? 8 : 0)) << uint64_t(_1902 * 4), (!gl_HelperInvocation) && (_1900 != _1902));
    vec2 _1996 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _1998 = imageSize(_9[nonuniformEXT(IDX)]) & ivec2(15);
    vec2 _2002 = textureQueryLod(sampler2D(_13, _36), _1996);
    float _2003 = _2002.x;
    int _2004 = int(_2003);
    int _2006 = int(_2003 + 0.9970703125);
    vec2 _2011 = abs(dFdx(_1996));
    vec2 _2012 = abs(dFdy(_1996));
    vec2 _2019 = vec2(max(_2011.x, _2012.x), max(_2011.y, _2012.y));
    vec2 _2020 = fract(_1996);
    vec2 _2021 = vec2(textureSize(_13, _2004));
    ivec2 _2023 = -_1998;
    vec2 _2025 = _2020 * ldexp(_2021, _2023);
    ResType _2028;
    _2028._m0 = frexp((_2019 * _2021) - vec2(0.00390625), _2028._m1);
    vec2 _2033 = min(ldexp(vec2(0.5), clamp(_2028._m1, ivec2(0), ivec2(4)) + _2023), vec2(0.5));
    ivec2 _2037 = ivec2(max(_2025 - _2033, vec2(0.0)));
    bvec2 _2039 = notEqual(_2037, ivec2(_2025 + _2033));
    bool _2040 = _2039.x;
    bool _2041 = _2039.y;
    WriteFeedbackNonUniform(_9[IDX], _2037, uint64_t(((1 | (_2040 ? 2 : 0)) | (_2041 ? 4 : 0)) | ((_2040 && _2041) ? 8 : 0)) << uint64_t(_2004 * 4), !gl_HelperInvocation);
    vec2 _2056 = vec2(textureSize(_13, _2006));
    ivec2 _2058 = -_1998;
    vec2 _2060 = _2020 * ldexp(_2056, _2058);
    ResType _2063;
    _2063._m0 = frexp((_2019 * _2056) - vec2(0.00390625), _2063._m1);
    vec2 _2068 = min(ldexp(vec2(0.5), clamp(_2063._m1, ivec2(0), ivec2(4)) + _2058), vec2(0.5));
    ivec2 _2072 = ivec2(max(_2060 - _2068, vec2(0.0)));
    bvec2 _2074 = notEqual(_2072, ivec2(_2060 + _2068));
    bool _2075 = _2074.x;
    bool _2076 = _2074.y;
    WriteFeedbackNonUniform(_9[IDX], _2072, uint64_t(((1 | (_2075 ? 2 : 0)) | (_2076 ? 4 : 0)) | ((_2075 && _2076) ? 8 : 0)) << uint64_t(_2006 * 4), (!gl_HelperInvocation) && (_2004 != _2006));
    vec2 _2094 = vec2(gl_FragCoord.x, gl_FragCoord.y);
    ivec2 _2096 = imageSize(_9[nonuniformEXT(IDX)]) & ivec2(15);
    vec2 _2100 = textureQueryLod(sampler2D(_13, _36), _2094);
    float _2101 = _2100.x;
    int _2102 = int(_2101);
    int _2104 = int(_2101 + 0.9970703125);
    vec2 _2109 = abs(dFdx(_2094));
    vec2 _2110 = abs(dFdy(_2094));
    vec2 _2117 = vec2(max(_2109.x, _2110.x), max(_2109.y, _2110.y));
    vec2 _2118 = fract(_2094);
    vec2 _2119 = vec2(textureSize(_13, _2102));
    ivec2 _2121 = -_2096;
    vec2 _2123 = _2118 * ldexp(_2119, _2121);
    ResType _2126;
    _2126._m0 = frexp((_2117 * _2119) - vec2(0.00390625), _2126._m1);
    vec2 _2131 = min(ldexp(vec2(0.5), clamp(_2126._m1, ivec2(0), ivec2(4)) + _2121), vec2(0.5));
    ivec2 _2135 = ivec2(max(_2123 - _2131, vec2(0.0)));
    bvec2 _2137 = notEqual(_2135, ivec2(_2123 + _2131));
    bool _2138 = _2137.x;
    bool _2139 = _2137.y;
    WriteFeedbackNonUniform(_9[IDX], _2135, uint64_t(((1 | (_2138 ? 2 : 0)) | (_2139 ? 4 : 0)) | ((_2138 && _2139) ? 8 : 0)) << uint64_t(_2102 * 4), !gl_HelperInvocation);
    vec2 _2154 = vec2(textureSize(_13, _2104));
    ivec2 _2156 = -_2096;
    vec2 _2158 = _2118 * ldexp(_2154, _2156);
    ResType _2161;
    _2161._m0 = frexp((_2117 * _2154) - vec2(0.00390625), _2161._m1);
    vec2 _2166 = min(ldexp(vec2(0.5), clamp(_2161._m1, ivec2(0), ivec2(4)) + _2156), vec2(0.5));
    ivec2 _2170 = ivec2(max(_2158 - _2166, vec2(0.0)));
    bvec2 _2172 = notEqual(_2170, ivec2(_2158 + _2166));
    bool _2173 = _2172.x;
    bool _2174 = _2172.y;
    WriteFeedbackNonUniform(_9[IDX], _2170, uint64_t(((1 | (_2173 ? 2 : 0)) | (_2174 ? 4 : 0)) | ((_2173 && _2174) ? 8 : 0)) << uint64_t(_2104 * 4), (!gl_HelperInvocation) && (_2102 != _2104));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 2194
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability Int64Atomics
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability ImageQuery
OpCapability GroupNonUniform
OpCapability GroupNonUniformArithmetic
OpCapability GroupNonUniformBallot
OpCapability GroupNonUniformQuad
OpCapability Int64ImageEXT
OpCapability RuntimeDescriptorArray
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_EXT_shader_image_int64"
%79 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %42 %45 %46 %48 %51 %2192
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %42 "SV_Position"
OpName %45 "GRADX"
OpName %46 "GRADY"
OpName %48 "CLAMP"
OpName %51 "IDX"
OpName %123 "ResType"
OpName %167 "WriteFeedback"
OpName %163 "img"
OpName %164 "coord"
OpName %165 "value"
OpName %166 "active_lane"
OpName %180 "is_done"
OpName %856 "WriteFeedbackArray"
OpName %852 "img"
OpName %853 "coord"
OpName %854 "value"
OpName %855 "active_lane"
OpName %867 "is_done"
OpName %1585 "WriteFeedbackNonUniform"
OpName %1581 "img"
OpName %1582 "coord"
OpName %1583 "value"
OpName %1584 "active_lane"
OpName %1596 "is_done"
OpName %1720 "WriteFeedbackArrayNonUniform"
OpName %1716 "img"
OpName %1717 "coord"
OpName %1718 "value"
OpName %1719 "active_lane"
OpName %1731 "is_done"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 1
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %22 DescriptorSet 2
OpDecorate %22 Binding 0
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %27 DescriptorSet 0
OpDecorate %27 Binding 1
OpDecorate %30 DescriptorSet 1
OpDecorate %30 Binding 0
OpDecorate %33 DescriptorSet 2
OpDecorate %33 Binding 0
OpDecorate %36 DescriptorSet 0
OpDecorate %36 Binding 0
OpDecorate %39 DescriptorSet 1
OpDecorate %39 Binding 0
OpDecorate %42 BuiltIn FragCoord
OpDecorate %45 Location 1
OpDecorate %46 Location 1
OpDecorate %46 Component 2
OpDecorate %48 Location 2
OpDecorate %51 Flat
OpDecorate %51 Location 3
OpDecorate %1513 NonUniform
OpDecorate %1515 NonUniform
OpDecorate %1516 NonUniform
OpDecorate %1518 NonUniform
OpDecorate %1519 NonUniform
OpDecorate %1521 NonUniform
OpDecorate %1527 NonUniform
OpDecorate %1604 NonUniform
OpDecorate %1644 NonUniform
OpDecorate %1646 NonUniform
OpDecorate %1647 NonUniform
OpDecorate %1649 NonUniform
OpDecorate %1651 NonUniform
OpDecorate %1660 NonUniform
OpDecorate %1739 NonUniform
OpDecorate %1782 NonUniform
OpDecorate %1784 NonUniform
OpDecorate %1786 NonUniform
OpDecorate %1792 NonUniform
OpDecorate %1884 NonUniform
OpDecorate %1886 NonUniform
OpDecorate %1888 NonUniform
OpDecorate %1897 NonUniform
OpDecorate %52 NonUniform
OpDecorate %1993 NonUniform
OpDecorate %2192 BuiltIn HelperInvocation
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 64 0
%6 = OpTypeImage %5 2D 0 0 0 2 R64ui
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeFloat 32
%11 = OpTypeImage %10 2D 0 0 0 1 Unknown
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %10 2D 0 1 0 1 Unknown
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeRuntimeArray %11
%18 = OpTypePointer UniformConstant %17
%19 = OpVariable %18 UniformConstant
%20 = OpTypeRuntimeArray %14
%21 = OpTypePointer UniformConstant %20
%22 = OpVariable %21 UniformConstant
%23 = OpTypePointer UniformConstant %6
%24 = OpVariable %23 UniformConstant
%25 = OpTypeImage %5 2D 0 1 0 2 R64ui
%26 = OpTypePointer UniformConstant %25
%27 = OpVariable %26 UniformConstant
%28 = OpTypeRuntimeArray %6
%29 = OpTypePointer UniformConstant %28
%30 = OpVariable %29 UniformConstant
%31 = OpTypeRuntimeArray %25
%32 = OpTypePointer UniformConstant %31
%33 = OpVariable %32 UniformConstant
%34 = OpTypeSampler
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeRuntimeArray %34
%38 = OpTypePointer UniformConstant %37
%39 = OpVariable %38 UniformConstant
%40 = OpTypeVector %10 4
%41 = OpTypePointer Input %40
%42 = OpVariable %41 Input
%43 = OpTypeVector %10 2
%44 = OpTypePointer Input %43
%45 = OpVariable %44 Input
%46 = OpVariable %44 Input
%47 = OpTypePointer Input %10
%48 = OpVariable %47 Input
%49 = OpTypeInt 32 0
%50 = OpTypePointer Input %49
%51 = OpVariable %50 Input
%55 = OpConstant %49 0
%58 = OpConstant %49 1
%69 = OpConstant %49 2
%72 = OpConstant %49 3
%75 = OpConstant %10 1
%80 = OpTypeBool
%81 = OpTypeInt 32 1
%82 = OpTypeVector %80 2
%83 = OpTypeVector %81 2
%87 = OpConstant %81 15
%88 = OpConstantComposite %83 %87 %87
%91 = OpConstant %81 1
%92 = OpTypeSampledImage %11
%98 = OpConstant %10 0.997070312
%121 = OpConstant %10 0.00390625
%122 = OpConstantComposite %43 %121 %121
%123 = OpTypeStruct %43 %83
%127 = OpConstant %81 0
%128 = OpConstantComposite %83 %127 %127
%129 = OpConstant %81 4
%130 = OpConstantComposite %83 %129 %129
%133 = OpConstant %10 0.5
%134 = OpConstantComposite %43 %133 %133
%138 = OpConstant %10 0
%139 = OpConstantComposite %43 %138 %138
%148 = OpConstant %81 2
%153 = OpConstant %81 8
%161 = OpTypePointer Image %5
%162 = OpTypeFunction %1 %23 %83 %5 %80
%178 = OpConstantFalse %80
%179 = OpTypePointer Function %80
%241 = OpConstant %10 14
%787 = OpTypeVector %81 3
%793 = OpTypeSampledImage %14
%850 = OpTypeVector %80 3
%851 = OpTypeFunction %1 %26 %787 %5 %80
%2191 = OpTypePointer Input %80
%2192 = OpVariable %2191 Input
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %2190
%2190 = OpLabel
%52 = OpLoad %49 %51
%53 = OpLoad %10 %48
%54 = OpAccessChain %47 %46 %55
%56 = OpLoad %10 %54
%57 = OpAccessChain %47 %46 %58
%59 = OpLoad %10 %57
%60 = OpAccessChain %47 %45 %55
%61 = OpLoad %10 %60
%62 = OpAccessChain %47 %45 %58
%63 = OpLoad %10 %62
%64 = OpAccessChain %47 %42 %55
%65 = OpLoad %10 %64
%66 = OpAccessChain %47 %42 %58
%67 = OpLoad %10 %66
%68 = OpAccessChain %47 %42 %69
%70 = OpLoad %10 %68
%71 = OpAccessChain %47 %42 %72
%73 = OpLoad %10 %71
%74 = OpFDiv %10 %75 %73
%76 = OpLoad %6 %24
%77 = OpLoad %11 %13
%78 = OpLoad %34 %36
%84 = OpCompositeConstruct %43 %65 %67
%85 = OpImageQuerySize %83 %76
%86 = OpBitwiseAnd %83 %85 %88
%89 = OpImageQueryLevels %81 %77
%90 = OpISub %81 %89 %91
%93 = OpSampledImage %92 %77 %78
%94 = OpImageQueryLod %43 %93 %84
%95 = OpCompositeExtract %10 %94 0
%96 = OpConvertFToS %81 %95
%97 = OpFAdd %10 %95 %98
%99 = OpConvertFToS %81 %97
%100 = OpExtInst %81 %79 SMin %99 %90
%101 = OpINotEqual %80 %96 %99
%102 = OpDPdx %43 %84
%103 = OpDPdy %43 %84
%104 = OpExtInst %43 %79 FAbs %102
%105 = OpExtInst %43 %79 FAbs %103
%106 = OpCompositeExtract %10 %104 0
%107 = OpCompositeExtract %10 %105 0
%108 = OpCompositeExtract %10 %104 1
%109 = OpCompositeExtract %10 %105 1
%110 = OpExtInst %10 %79 FMax %106 %107
%111 = OpExtInst %10 %79 FMax %108 %109
%112 = OpCompositeConstruct %43 %110 %111
%113 = OpExtInst %43 %79 Fract %84
%115 = OpImageQuerySizeLod %83 %77 %96
%114 = OpConvertSToF %43 %115
%116 = OpSNegate %83 %86
%117 = OpExtInst %43 %79 Ldexp %114 %116
%118 = OpFMul %43 %113 %117
%119 = OpFMul %43 %112 %114
%120 = OpFSub %43 %119 %122
%124 = OpExtInst %123 %79 FrexpStruct %120
%125 = OpCompositeExtract %83 %124 1
%126 = OpExtInst %83 %79 SClamp %125 %128 %130
%131 = OpIAdd %83 %126 %116
%132 = OpExtInst %43 %79 Ldexp %134 %131
%135 = OpExtInst %43 %79 FMin %132 %134
%136 = OpFSub %43 %118 %135
%137 = OpExtInst %43 %79 FMax %136 %139
%140 = OpFAdd %43 %118 %135
%141 = OpConvertFToS %83 %137
%142 = OpConvertFToS %83 %140
%143 = OpINotEqual %82 %141 %142
%144 = OpCompositeExtract %80 %143 0
%145 = OpCompositeExtract %80 %143 1
%146 = OpLogicalAnd %80 %144 %145
%147 = OpSelect %81 %144 %148 %127
%149 = OpBitwiseOr %81 %91 %147
%150 = OpSelect %81 %145 %129 %127
%151 = OpBitwiseOr %81 %149 %150
%152 = OpSelect %81 %146 %153 %127
%154 = OpBitwiseOr %81 %151 %152
%155 = OpSConvert %5 %154
%156 = OpIMul %81 %96 %129
%157 = OpSConvert %5 %156
%158 = OpShiftLeftLogical %5 %155 %157
%159 = OpLoad %80 %2192
%160 = OpLogicalNot %80 %159
%191 = OpFunctionCall %1 %167 %24 %141 %158 %160
%193 = OpImageQuerySizeLod %83 %77 %99
%192 = OpConvertSToF %43 %193
%194 = OpSNegate %83 %86
%195 = OpExtInst %43 %79 Ldexp %192 %194
%196 = OpFMul %43 %113 %195
%197 = OpFMul %43 %112 %192
%198 = OpFSub %43 %197 %122
%199 = OpExtInst %123 %79 FrexpStruct %198
%200 = OpCompositeExtract %83 %199 1
%201 = OpExtInst %83 %79 SClamp %200 %128 %130
%202 = OpIAdd %83 %201 %194
%203 = OpExtInst %43 %79 Ldexp %134 %202
%204 = OpExtInst %43 %79 FMin %203 %134
%205 = OpFSub %43 %196 %204
%206 = OpExtInst %43 %79 FMax %205 %139
%207 = OpFAdd %43 %196 %204
%208 = OpConvertFToS %83 %206
%209 = OpConvertFToS %83 %207
%210 = OpINotEqual %82 %208 %209
%211 = OpCompositeExtract %80 %210 0
%212 = OpCompositeExtract %80 %210 1
%213 = OpLogicalAnd %80 %211 %212
%214 = OpSelect %81 %211 %148 %127
%215 = OpBitwiseOr %81 %91 %214
%216 = OpSelect %81 %212 %129 %127
%217 = OpBitwiseOr %81 %215 %216
%218 = OpSelect %81 %213 %153 %127
%219 = OpBitwiseOr %81 %217 %218
%220 = OpSConvert %5 %219
%221 = OpIMul %81 %99 %129
%222 = OpSConvert %5 %221
%223 = OpShiftLeftLogical %5 %220 %222
%224 = OpLoad %80 %2192
%225 = OpLogicalNot %80 %224
%226 = OpLogicalAnd %80 %225 %101
%227 = OpFunctionCall %1 %167 %24 %208 %223 %226
%228 = OpLoad %6 %24
%229 = OpLoad %11 %13
%230 = OpLoad %34 %36
%231 = OpCompositeConstruct %43 %65 %67
%232 = OpImageQuerySize %83 %228
%233 = OpBitwiseAnd %83 %232 %88
%234 = OpImageQueryLevels %81 %229
%235 = OpISub %81 %234 %91
%236 = OpSampledImage %92 %229 %230
%237 = OpImageQueryLod %43 %236 %231
%238 = OpCompositeExtract %10 %237 0
%239 = OpExtInst %10 %79 FMax %238 %53
%240 = OpExtInst %10 %79 FMin %239 %241
%242 = OpConvertFToS %81 %240
%243 = OpFAdd %10 %240 %98
%244 = OpConvertFToS %81 %243
%245 = OpExtInst %81 %79 SMin %244 %235
%246 = OpINotEqual %80 %242 %244
%247 = OpDPdx %43 %231
%248 = OpDPdy %43 %231
%249 = OpExtInst %43 %79 FAbs %247
%250 = OpExtInst %43 %79 FAbs %248
%251 = OpCompositeExtract %10 %249 0
%252 = OpCompositeExtract %10 %250 0
%253 = OpCompositeExtract %10 %249 1
%254 = OpCompositeExtract %10 %250 1
%255 = OpExtInst %10 %79 FMax %251 %252
%256 = OpExtInst %10 %79 FMax %253 %254
%257 = OpCompositeConstruct %43 %255 %256
%258 = OpExtInst %43 %79 Fract %231
%260 = OpImageQuerySizeLod %83 %229 %242
%259 = OpConvertSToF %43 %260
%261 = OpSNegate %83 %233
%262 = OpExtInst %43 %79 Ldexp %259 %261
%263 = OpFMul %43 %258 %262
%264 = OpFMul %43 %257 %259
%265 = OpFSub %43 %264 %122
%266 = OpExtInst %123 %79 FrexpStruct %265
%267 = OpCompositeExtract %83 %266 1
%268 = OpExtInst %83 %79 SClamp %267 %128 %130
%269 = OpIAdd %83 %268 %261
%270 = OpExtInst %43 %79 Ldexp %134 %269
%271 = OpExtInst %43 %79 FMin %270 %134
%272 = OpFSub %43 %263 %271
%273 = OpExtInst %43 %79 FMax %272 %139
%274 = OpFAdd %43 %263 %271
%275 = OpConvertFToS %83 %273
%276 = OpConvertFToS %83 %274
%277 = OpINotEqual %82 %275 %276
%278 = OpCompositeExtract %80 %277 0
%279 = OpCompositeExtract %80 %277 1
%280 = OpLogicalAnd %80 %278 %279
%281 = OpSelect %81 %278 %148 %127
%282 = OpBitwiseOr %81 %91 %281
%283 = OpSelect %81 %279 %129 %127
%284 = OpBitwiseOr %81 %282 %283
%285 = OpSelect %81 %280 %153 %127
%286 = OpBitwiseOr %81 %284 %285
%287 = OpSConvert %5 %286
%288 = OpIMul %81 %242 %129
%289 = OpSConvert %5 %288
%290 = OpShiftLeftLogical %5 %287 %289
%291 = OpLoad %80 %2192
%292 = OpLogicalNot %80 %291
%293 = OpFunctionCall %1 %167 %24 %275 %290 %292
%295 = OpImageQuerySizeLod %83 %229 %244
%294 = OpConvertSToF %43 %295
%296 = OpSNegate %83 %233
%297 = OpExtInst %43 %79 Ldexp %294 %296
%298 = OpFMul %43 %258 %297
%299 = OpFMul %43 %257 %294
%300 = OpFSub %43 %299 %122
%301 = OpExtInst %123 %79 FrexpStruct %300
%302 = OpCompositeExtract %83 %301 1
%303 = OpExtInst %83 %79 SClamp %302 %128 %130
%304 = OpIAdd %83 %303 %296
%305 = OpExtInst %43 %79 Ldexp %134 %304
%306 = OpExtInst %43 %79 FMin %305 %134
%307 = OpFSub %43 %298 %306
%308 = OpExtInst %43 %79 FMax %307 %139
%309 = OpFAdd %43 %298 %306
%310 = OpConvertFToS %83 %308
%311 = OpConvertFToS %83 %309
%312 = OpINotEqual %82 %310 %311
%313 = OpCompositeExtract %80 %312 0
%314 = OpCompositeExtract %80 %312 1
%315 = OpLogicalAnd %80 %313 %314
%316 = OpSelect %81 %313 %148 %127
%317 = OpBitwiseOr %81 %91 %316
%318 = OpSelect %81 %314 %129 %127
%319 = OpBitwiseOr %81 %317 %318
%320 = OpSelect %81 %315 %153 %127
%321 = OpBitwiseOr %81 %319 %320
%322 = OpSConvert %5 %321
%323 = OpIMul %81 %244 %129
%324 = OpSConvert %5 %323
%325 = OpShiftLeftLogical %5 %322 %324
%326 = OpLoad %80 %2192
%327 = OpLogicalNot %80 %326
%328 = OpLogicalAnd %80 %327 %246
%329 = OpFunctionCall %1 %167 %24 %310 %325 %328
%330 = OpLoad %6 %24
%331 = OpLoad %11 %13
%332 = OpLoad %34 %36
%333 = OpCompositeConstruct %43 %65 %67
%334 = OpImageQuerySize %83 %330
%335 = OpBitwiseAnd %83 %334 %88
%336 = OpImageQueryLevels %81 %331
%337 = OpISub %81 %336 %91
%338 = OpConvertSToF %10 %337
%339 = OpExtInst %10 %79 NClamp %70 %138 %338
%340 = OpConvertFToS %81 %339
%341 = OpFAdd %10 %339 %98
%342 = OpConvertFToS %81 %341
%343 = OpExtInst %81 %79 SMin %342 %337
%344 = OpINotEqual %80 %340 %342
%345 = OpExtInst %43 %79 Fract %333
%347 = OpImageQuerySizeLod %83 %331 %340
%346 = OpConvertSToF %43 %347
%348 = OpSNegate %83 %335
%349 = OpExtInst %43 %79 Ldexp %346 %348
%350 = OpFMul %43 %345 %349
%351 = OpExtInst %43 %79 Ldexp %134 %348
%352 = OpFSub %43 %350 %351
%353 = OpExtInst %43 %79 FMax %352 %139
%354 = OpFAdd %43 %350 %351
%355 = OpConvertFToS %83 %353
%356 = OpConvertFToS %83 %354
%357 = OpINotEqual %82 %355 %356
%358 = OpCompositeExtract %80 %357 0
%359 = OpCompositeExtract %80 %357 1
%360 = OpLogicalAnd %80 %358 %359
%361 = OpSelect %81 %358 %148 %127
%362 = OpBitwiseOr %81 %91 %361
%363 = OpSelect %81 %359 %129 %127
%364 = OpBitwiseOr %81 %362 %363
%365 = OpSelect %81 %360 %153 %127
%366 = OpBitwiseOr %81 %364 %365
%367 = OpSConvert %5 %366
%368 = OpIMul %81 %340 %129
%369 = OpSConvert %5 %368
%370 = OpShiftLeftLogical %5 %367 %369
%371 = OpLoad %80 %2192
%372 = OpLogicalNot %80 %371
%373 = OpFunctionCall %1 %167 %24 %355 %370 %372
%375 = OpImageQuerySizeLod %83 %331 %342
%374 = OpConvertSToF %43 %375
%376 = OpSNegate %83 %335
%377 = OpExtInst %43 %79 Ldexp %374 %376
%378 = OpFMul %43 %345 %377
%379 = OpExtInst %43 %79 Ldexp %134 %376
%380 = OpFSub %43 %378 %379
%381 = OpExtInst %43 %79 FMax %380 %139
%382 = OpFAdd %43 %378 %379
%383 = OpConvertFToS %83 %381
%384 = OpConvertFToS %83 %382
%385 = OpINotEqual %82 %383 %384
%386 = OpCompositeExtract %80 %385 0
%387 = OpCompositeExtract %80 %385 1
%388 = OpLogicalAnd %80 %386 %387
%389 = OpSelect %81 %386 %148 %127
%390 = OpBitwiseOr %81 %91 %389
%391 = OpSelect %81 %387 %129 %127
%392 = OpBitwiseOr %81 %390 %391
%393 = OpSelect %81 %388 %153 %127
%394 = OpBitwiseOr %81 %392 %393
%395 = OpSConvert %5 %394
%396 = OpIMul %81 %342 %129
%397 = OpSConvert %5 %396
%398 = OpShiftLeftLogical %5 %395 %397
%399 = OpLoad %80 %2192
%400 = OpLogicalNot %80 %399
%401 = OpLogicalAnd %80 %400 %344
%402 = OpFunctionCall %1 %167 %24 %383 %398 %401
%403 = OpLoad %6 %24
%404 = OpLoad %11 %13
%405 = OpLoad %34 %36
%406 = OpCompositeConstruct %43 %65 %67
%407 = OpImageQuerySize %83 %403
%408 = OpBitwiseAnd %83 %407 %88
%409 = OpImageQueryLevels %81 %404
%410 = OpISub %81 %409 %91
%412 = OpImageQuerySizeLod %83 %404 %127
%411 = OpConvertSToF %43 %412
%413 = OpCompositeConstruct %43 %61 %63
%414 = OpCompositeConstruct %43 %56 %59
%415 = OpFMul %43 %411 %413
%416 = OpFMul %43 %411 %414
%417 = OpDot %10 %415 %415
%418 = OpDot %10 %416 %416
%419 = OpExtInst %10 %79 FMax %417 %418
%420 = OpExtInst %10 %79 Log2 %419
%421 = OpFMul %10 %420 %133
%422 = OpConvertSToF %10 %410
%423 = OpExtInst %10 %79 NClamp %421 %138 %422
%424 = OpConvertFToS %81 %423
%425 = OpFAdd %10 %423 %98
%426 = OpConvertFToS %81 %425
%427 = OpExtInst %81 %79 SMin %426 %410
%428 = OpINotEqual %80 %424 %426
%429 = OpExtInst %43 %79 Fract %406
%431 = OpImageQuerySizeLod %83 %404 %424
%430 = OpConvertSToF %43 %431
%432 = OpSNegate %83 %408
%433 = OpExtInst %43 %79 Ldexp %430 %432
%434 = OpFMul %43 %429 %433
%435 = OpExtInst %43 %79 Ldexp %134 %432
%436 = OpFSub %43 %434 %435
%437 = OpExtInst %43 %79 FMax %436 %139
%438 = OpFAdd %43 %434 %435
%439 = OpConvertFToS %83 %437
%440 = OpConvertFToS %83 %438
%441 = OpINotEqual %82 %439 %440
%442 = OpCompositeExtract %80 %441 0
%443 = OpCompositeExtract %80 %441 1
%444 = OpLogicalAnd %80 %442 %443
%445 = OpSelect %81 %442 %148 %127
%446 = OpBitwiseOr %81 %91 %445
%447 = OpSelect %81 %443 %129 %127
%448 = OpBitwiseOr %81 %446 %447
%449 = OpSelect %81 %444 %153 %127
%450 = OpBitwiseOr %81 %448 %449
%451 = OpSConvert %5 %450
%452 = OpIMul %81 %424 %129
%453 = OpSConvert %5 %452
%454 = OpShiftLeftLogical %5 %451 %453
%455 = OpLoad %80 %2192
%456 = OpLogicalNot %80 %455
%457 = OpFunctionCall %1 %167 %24 %439 %454 %456
%459 = OpImageQuerySizeLod %83 %404 %426
%458 = OpConvertSToF %43 %459
%460 = OpSNegate %83 %408
%461 = OpExtInst %43 %79 Ldexp %458 %460
%462 = OpFMul %43 %429 %461
%463 = OpExtInst %43 %79 Ldexp %134 %460
%464 = OpFSub %43 %462 %463
%465 = OpExtInst %43 %79 FMax %464 %139
%466 = OpFAdd %43 %462 %463
%467 = OpConvertFToS %83 %465
%468 = OpConvertFToS %83 %466
%469 = OpINotEqual %82 %467 %468
%470 = OpCompositeExtract %80 %469 0
%471 = OpCompositeExtract %80 %469 1
%472 = OpLogicalAnd %80 %470 %471
%473 = OpSelect %81 %470 %148 %127
%474 = OpBitwiseOr %81 %91 %473
%475 = OpSelect %81 %471 %129 %127
%476 = OpBitwiseOr %81 %474 %475
%477 = OpSelect %81 %472 %153 %127
%478 = OpBitwiseOr %81 %476 %477
%479 = OpSConvert %5 %478
%480 = OpIMul %81 %426 %129
%481 = OpSConvert %5 %480
%482 = OpShiftLeftLogical %5 %479 %481
%483 = OpLoad %80 %2192
%484 = OpLogicalNot %80 %483
%485 = OpLogicalAnd %80 %484 %428
%486 = OpFunctionCall %1 %167 %24 %467 %482 %485
%487 = OpLoad %6 %24
%488 = OpLoad %11 %13
%489 = OpLoad %34 %36
%490 = OpCompositeConstruct %43 %65 %67
%491 = OpImageQuerySize %83 %487
%492 = OpBitwiseAnd %83 %491 %88
%493 = OpImageQueryLevels %81 %488
%494 = OpISub %81 %493 %91
%496 = OpImageQuerySizeLod %83 %488 %127
%495 = OpConvertSToF %43 %496
%497 = OpCompositeConstruct %43 %61 %63
%498 = OpCompositeConstruct %43 %56 %59
%499 = OpFMul %43 %495 %497
%500 = OpFMul %43 %495 %498
%501 = OpDot %10 %499 %499
%502 = OpDot %10 %500 %500
%503 = OpExtInst %10 %79 FMax %501 %502
%504 = OpExtInst %10 %79 Log2 %503
%505 = OpFMul %10 %504 %133
%506 = OpConvertSToF %10 %494
%507 = OpExtInst %10 %79 NClamp %505 %138 %506
%508 = OpExtInst %10 %79 FMax %507 %53
%509 = OpExtInst %10 %79 FMin %508 %241
%510 = OpConvertFToS %81 %509
%511 = OpFAdd %10 %509 %98
%512 = OpConvertFToS %81 %511
%513 = OpExtInst %81 %79 SMin %512 %494
%514 = OpINotEqual %80 %510 %512
%515 = OpExtInst %43 %79 Fract %490
%517 = OpImageQuerySizeLod %83 %488 %510
%516 = OpConvertSToF %43 %517
%518 = OpSNegate %83 %492
%519 = OpExtInst %43 %79 Ldexp %516 %518
%520 = OpFMul %43 %515 %519
%521 = OpExtInst %43 %79 Ldexp %134 %518
%522 = OpFSub %43 %520 %521
%523 = OpExtInst %43 %79 FMax %522 %139
%524 = OpFAdd %43 %520 %521
%525 = OpConvertFToS %83 %523
%526 = OpConvertFToS %83 %524
%527 = OpINotEqual %82 %525 %526
%528 = OpCompositeExtract %80 %527 0
%529 = OpCompositeExtract %80 %527 1
%530 = OpLogicalAnd %80 %528 %529
%531 = OpSelect %81 %528 %148 %127
%532 = OpBitwiseOr %81 %91 %531
%533 = OpSelect %81 %529 %129 %127
%534 = OpBitwiseOr %81 %532 %533
%535 = OpSelect %81 %530 %153 %127
%536 = OpBitwiseOr %81 %534 %535
%537 = OpSConvert %5 %536
%538 = OpIMul %81 %510 %129
%539 = OpSConvert %5 %538
%540 = OpShiftLeftLogical %5 %537 %539
%541 = OpLoad %80 %2192
%542 = OpLogicalNot %80 %541
%543 = OpFunctionCall %1 %167 %24 %525 %540 %542
%545 = OpImageQuerySizeLod %83 %488 %512
%544 = OpConvertSToF %43 %545
%546 = OpSNegate %83 %492
%547 = OpExtInst %43 %79 Ldexp %544 %546
%548 = OpFMul %43 %515 %547
%549 = OpExtInst %43 %79 Ldexp %134 %546
%550 = OpFSub %43 %548 %549
%551 = OpExtInst %43 %79 FMax %550 %139
%552 = OpFAdd %43 %548 %549
%553 = OpConvertFToS %83 %551
%554 = OpConvertFToS %83 %552
%555 = OpINotEqual %82 %553 %554
%556 = OpCompositeExtract %80 %555 0
%557 = OpCompositeExtract %80 %555 1
%558 = OpLogicalAnd %80 %556 %557
%559 = OpSelect %81 %556 %148 %127
%560 = OpBitwiseOr %81 %91 %559
%561 = OpSelect %81 %557 %129 %127
%562 = OpBitwiseOr %81 %560 %561
%563 = OpSelect %81 %558 %153 %127
%564 = OpBitwiseOr %81 %562 %563
%565 = OpSConvert %5 %564
%566 = OpIMul %81 %512 %129
%567 = OpSConvert %5 %566
%568 = OpShiftLeftLogical %5 %565 %567
%569 = OpLoad %80 %2192
%570 = OpLogicalNot %80 %569
%571 = OpLogicalAnd %80 %570 %514
%572 = OpFunctionCall %1 %167 %24 %553 %568 %571
%573 = OpLoad %6 %24
%574 = OpLoad %11 %13
%575 = OpLoad %34 %36
%576 = OpCompositeConstruct %43 %65 %67
%577 = OpImageQuerySize %83 %573
%578 = OpBitwiseAnd %83 %577 %88
%579 = OpImageQueryLevels %81 %574
%580 = OpISub %81 %579 %91
%581 = OpSampledImage %92 %574 %575
%582 = OpExtInst %10 %79 Exp2 %70
%583 = OpGroupNonUniformQuadBroadcast %10 %72 %582 %55
%584 = OpVectorTimesScalar %43 %576 %583
%585 = OpImageQueryLod %43 %581 %584
%586 = OpCompositeExtract %10 %585 0
%587 = OpConvertFToS %81 %586
%588 = OpFAdd %10 %586 %98
%589 = OpConvertFToS %81 %588
%590 = OpExtInst %81 %79 SMin %589 %580
%591 = OpINotEqual %80 %587 %589
%592 = OpDPdx %43 %576
%593 = OpDPdy %43 %576
%594 = OpExtInst %43 %79 FAbs %592
%595 = OpExtInst %43 %79 FAbs %593
%596 = OpCompositeExtract %10 %594 0
%597 = OpCompositeExtract %10 %595 0
%598 = OpCompositeExtract %10 %594 1
%599 = OpCompositeExtract %10 %595 1
%600 = OpExtInst %10 %79 FMax %596 %597
%601 = OpExtInst %10 %79 FMax %598 %599
%602 = OpCompositeConstruct %43 %600 %601
%603 = OpVectorTimesScalar %43 %602 %582
%604 = OpExtInst %43 %79 Fract %576
%606 = OpImageQuerySizeLod %83 %574 %587
%605 = OpConvertSToF %43 %606
%607 = OpSNegate %83 %578
%608 = OpExtInst %43 %79 Ldexp %605 %607
%609 = OpFMul %43 %604 %608
%610 = OpFMul %43 %603 %605
%611 = OpFSub %43 %610 %122
%612 = OpExtInst %123 %79 FrexpStruct %611
%613 = OpCompositeExtract %83 %612 1
%614 = OpExtInst %83 %79 SClamp %613 %128 %130
%615 = OpIAdd %83 %614 %607
%616 = OpExtInst %43 %79 Ldexp %134 %615
%617 = OpExtInst %43 %79 FMin %616 %134
%618 = OpFSub %43 %609 %617
%619 = OpExtInst %43 %79 FMax %618 %139
%620 = OpFAdd %43 %609 %617
%621 = OpConvertFToS %83 %619
%622 = OpConvertFToS %83 %620
%623 = OpINotEqual %82 %621 %622
%624 = OpCompositeExtract %80 %623 0
%625 = OpCompositeExtract %80 %623 1
%626 = OpLogicalAnd %80 %624 %625
%627 = OpSelect %81 %624 %148 %127
%628 = OpBitwiseOr %81 %91 %627
%629 = OpSelect %81 %625 %129 %127
%630 = OpBitwiseOr %81 %628 %629
%631 = OpSelect %81 %626 %153 %127
%632 = OpBitwiseOr %81 %630 %631
%633 = OpSConvert %5 %632
%634 = OpIMul %81 %587 %129
%635 = OpSConvert %5 %634
%636 = OpShiftLeftLogical %5 %633 %635
%637 = OpLoad %80 %2192
%638 = OpLogicalNot %80 %637
%639 = OpFunctionCall %1 %167 %24 %621 %636 %638
%641 = OpImageQuerySizeLod %83 %574 %589
%640 = OpConvertSToF %43 %641
%642 = OpSNegate %83 %578
%643 = OpExtInst %43 %79 Ldexp %640 %642
%644 = OpFMul %43 %604 %643
%645 = OpFMul %43 %603 %640
%646 = OpFSub %43 %645 %122
%647 = OpExtInst %123 %79 FrexpStruct %646
%648 = OpCompositeExtract %83 %647 1
%649 = OpExtInst %83 %79 SClamp %648 %128 %130
%650 = OpIAdd %83 %649 %642
%651 = OpExtInst %43 %79 Ldexp %134 %650
%652 = OpExtInst %43 %79 FMin %651 %134
%653 = OpFSub %43 %644 %652
%654 = OpExtInst %43 %79 FMax %653 %139
%655 = OpFAdd %43 %644 %652
%656 = OpConvertFToS %83 %654
%657 = OpConvertFToS %83 %655
%658 = OpINotEqual %82 %656 %657
%659 = OpCompositeExtract %80 %658 0
%660 = OpCompositeExtract %80 %658 1
%661 = OpLogicalAnd %80 %659 %660
%662 = OpSelect %81 %659 %148 %127
%663 = OpBitwiseOr %81 %91 %662
%664 = OpSelect %81 %660 %129 %127
%665 = OpBitwiseOr %81 %663 %664
%666 = OpSelect %81 %661 %153 %127
%667 = OpBitwiseOr %81 %665 %666
%668 = OpSConvert %5 %667
%669 = OpIMul %81 %589 %129
%670 = OpSConvert %5 %669
%671 = OpShiftLeftLogical %5 %668 %670
%672 = OpLoad %80 %2192
%673 = OpLogicalNot %80 %672
%674 = OpLogicalAnd %80 %673 %591
%675 = OpFunctionCall %1 %167 %24 %656 %671 %674
%676 = OpLoad %6 %24
%677 = OpLoad %11 %13
%678 = OpLoad %34 %36
%679 = OpCompositeConstruct %43 %65 %67
%680 = OpImageQuerySize %83 %676
%681 = OpBitwiseAnd %83 %680 %88
%682 = OpImageQueryLevels %81 %677
%683 = OpISub %81 %682 %91
%684 = OpSampledImage %92 %677 %678
%685 = OpExtInst %10 %79 Exp2 %70
%686 = OpGroupNonUniformQuadBroadcast %10 %72 %685 %55
%687 = OpVectorTimesScalar %43 %679 %686
%688 = OpImageQueryLod %43 %684 %687
%689 = OpCompositeExtract %10 %688 0
%690 = OpExtInst %10 %79 FMax %689 %53
%691 = OpExtInst %10 %79 FMin %690 %241
%692 = OpConvertFToS %81 %691
%693 = OpFAdd %10 %691 %98
%694 = OpConvertFToS %81 %693
%695 = OpExtInst %81 %79 SMin %694 %683
%696 = OpINotEqual %80 %692 %694
%697 = OpDPdx %43 %679
%698 = OpDPdy %43 %679
%699 = OpExtInst %43 %79 FAbs %697
%700 = OpExtInst %43 %79 FAbs %698
%701 = OpCompositeExtract %10 %699 0
%702 = OpCompositeExtract %10 %700 0
%703 = OpCompositeExtract %10 %699 1
%704 = OpCompositeExtract %10 %700 1
%705 = OpExtInst %10 %79 FMax %701 %702
%706 = OpExtInst %10 %79 FMax %703 %704
%707 = OpCompositeConstruct %43 %705 %706
%708 = OpVectorTimesScalar %43 %707 %685
%709 = OpExtInst %43 %79 Fract %679
%711 = OpImageQuerySizeLod %83 %677 %692
%710 = OpConvertSToF %43 %711
%712 = OpSNegate %83 %681
%713 = OpExtInst %43 %79 Ldexp %710 %712
%714 = OpFMul %43 %709 %713
%715 = OpFMul %43 %708 %710
%716 = OpFSub %43 %715 %122
%717 = OpExtInst %123 %79 FrexpStruct %716
%718 = OpCompositeExtract %83 %717 1
%719 = OpExtInst %83 %79 SClamp %718 %128 %130
%720 = OpIAdd %83 %719 %712
%721 = OpExtInst %43 %79 Ldexp %134 %720
%722 = OpExtInst %43 %79 FMin %721 %134
%723 = OpFSub %43 %714 %722
%724 = OpExtInst %43 %79 FMax %723 %139
%725 = OpFAdd %43 %714 %722
%726 = OpConvertFToS %83 %724
%727 = OpConvertFToS %83 %725
%728 = OpINotEqual %82 %726 %727
%729 = OpCompositeExtract %80 %728 0
%730 = OpCompositeExtract %80 %728 1
%731 = OpLogicalAnd %80 %729 %730
%732 = OpSelect %81 %729 %148 %127
%733 = OpBitwiseOr %81 %91 %732
%734 = OpSelect %81 %730 %129 %127
%735 = OpBitwiseOr %81 %733 %734
%736 = OpSelect %81 %731 %153 %127
%737 = OpBitwiseOr %81 %735 %736
%738 = OpSConvert %5 %737
%739 = OpIMul %81 %692 %129
%740 = OpSConvert %5 %739
%741 = OpShiftLeftLogical %5 %738 %740
%742 = OpLoad %80 %2192
%743 = OpLogicalNot %80 %742
%744 = OpFunctionCall %1 %167 %24 %726 %741 %743
%746 = OpImageQuerySizeLod %83 %677 %694
%745 = OpConvertSToF %43 %746
%747 = OpSNegate %83 %681
%748 = OpExtInst %43 %79 Ldexp %745 %747
%749 = OpFMul %43 %709 %748
%750 = OpFMul %43 %708 %745
%751 = OpFSub %43 %750 %122
%752 = OpExtInst %123 %79 FrexpStruct %751
%753 = OpCompositeExtract %83 %752 1
%754 = OpExtInst %83 %79 SClamp %753 %128 %130
%755 = OpIAdd %83 %754 %747
%756 = OpExtInst %43 %79 Ldexp %134 %755
%757 = OpExtInst %43 %79 FMin %756 %134
%758 = OpFSub %43 %749 %757
%759 = OpExtInst %43 %79 FMax %758 %139
%760 = OpFAdd %43 %749 %757
%761 = OpConvertFToS %83 %759
%762 = OpConvertFToS %83 %760
%763 = OpINotEqual %82 %761 %762
%764 = OpCompositeExtract %80 %763 0
%765 = OpCompositeExtract %80 %763 1
%766 = OpLogicalAnd %80 %764 %765
%767 = OpSelect %81 %764 %148 %127
%768 = OpBitwiseOr %81 %91 %767
%769 = OpSelect %81 %765 %129 %127
%770 = OpBitwiseOr %81 %768 %769
%771 = OpSelect %81 %766 %153 %127
%772 = OpBitwiseOr %81 %770 %771
%773 = OpSConvert %5 %772
%774 = OpIMul %81 %694 %129
%775 = OpSConvert %5 %774
%776 = OpShiftLeftLogical %5 %773 %775
%777 = OpLoad %80 %2192
%778 = OpLogicalNot %80 %777
%779 = OpLogicalAnd %80 %778 %696
%780 = OpFunctionCall %1 %167 %24 %761 %776 %779
%781 = OpLoad %25 %27
%782 = OpLoad %14 %16
%783 = OpLoad %34 %36
%784 = OpExtInst %10 %79 RoundEven %70
%785 = OpConvertFToS %81 %784
%786 = OpCompositeConstruct %43 %65 %67
%788 = OpImageQuerySize %787 %781
%789 = OpVectorShuffle %83 %788 %788 0 1
%790 = OpBitwiseAnd %83 %789 %88
%791 = OpImageQueryLevels %81 %782
%792 = OpISub %81 %791 %91
%794 = OpSampledImage %793 %782 %783
%795 = OpImageQueryLod %43 %794 %786
%796 = OpCompositeExtract %10 %795 0
%797 = OpConvertFToS %81 %796
%798 = OpFAdd %10 %796 %98
%799 = OpConvertFToS %81 %798
%800 = OpExtInst %81 %79 SMin %799 %792
%801 = OpINotEqual %80 %797 %799
%802 = OpDPdx %43 %786
%803 = OpDPdy %43 %786
%804 = OpExtInst %43 %79 FAbs %802
%805 = OpExtInst %43 %79 FAbs %803
%806 = OpCompositeExtract %10 %804 0
%807 = OpCompositeExtract %10 %805 0
%808 = OpCompositeExtract %10 %804 1
%809 = OpCompositeExtract %10 %805 1
%810 = OpExtInst %10 %79 FMax %806 %807
%811 = OpExtInst %10 %79 FMax %808 %809
%812 = OpCompositeConstruct %43 %810 %811
%813 = OpExtInst %43 %79 Fract %786
%815 = OpImageQuerySizeLod %787 %782 %797
%816 = OpVectorShuffle %83 %815 %815 0 1
%814 = OpConvertSToF %43 %816
%817 = OpSNegate %83 %790
%818 = OpExtInst %43 %79 Ldexp %814 %817
%819 = OpFMul %43 %813 %818
%820 = OpFMul %43 %812 %814
%821 = OpFSub %43 %820 %122
%822 = OpExtInst %123 %79 FrexpStruct %821
%823 = OpCompositeExtract %83 %822 1
%824 = OpExtInst %83 %79 SClamp %823 %128 %130
%825 = OpIAdd %83 %824 %817
%826 = OpExtInst %43 %79 Ldexp %134 %825
%827 = OpExtInst %43 %79 FMin %826 %134
%828 = OpFSub %43 %819 %827
%829 = OpExtInst %43 %79 FMax %828 %139
%830 = OpFAdd %43 %819 %827
%831 = OpConvertFToS %83 %829
%832 = OpConvertFToS %83 %830
%833 = OpINotEqual %82 %831 %832
%834 = OpCompositeExtract %80 %833 0
%835 = OpCompositeExtract %80 %833 1
%836 = OpLogicalAnd %80 %834 %835
%837 = OpSelect %81 %834 %148 %127
%838 = OpBitwiseOr %81 %91 %837
%839 = OpSelect %81 %835 %129 %127
%840 = OpBitwiseOr %81 %838 %839
%841 = OpSelect %81 %836 %153 %127
%842 = OpBitwiseOr %81 %840 %841
%843 = OpSConvert %5 %842
%844 = OpIMul %81 %797 %129
%845 = OpSConvert %5 %844
%846 = OpShiftLeftLogical %5 %843 %845
%847 = OpCompositeConstruct %787 %831 %785
%848 = OpLoad %80 %2192
%849 = OpLogicalNot %80 %848
%878 = OpFunctionCall %1 %856 %27 %847 %846 %849
%880 = OpImageQuerySizeLod %787 %782 %799
%881 = OpVectorShuffle %83 %880 %880 0 1
%879 = OpConvertSToF %43 %881
%882 = OpSNegate %83 %790
%883 = OpExtInst %43 %79 Ldexp %879 %882
%884 = OpFMul %43 %813 %883
%885 = OpFMul %43 %812 %879
%886 = OpFSub %43 %885 %122
%887 = OpExtInst %123 %79 FrexpStruct %886
%888 = OpCompositeExtract %83 %887 1
%889 = OpExtInst %83 %79 SClamp %888 %128 %130
%890 = OpIAdd %83 %889 %882
%891 = OpExtInst %43 %79 Ldexp %134 %890
%892 = OpExtInst %43 %79 FMin %891 %134
%893 = OpFSub %43 %884 %892
%894 = OpExtInst %43 %79 FMax %893 %139
%895 = OpFAdd %43 %884 %892
%896 = OpConvertFToS %83 %894
%897 = OpConvertFToS %83 %895
%898 = OpINotEqual %82 %896 %897
%899 = OpCompositeExtract %80 %898 0
%900 = OpCompositeExtract %80 %898 1
%901 = OpLogicalAnd %80 %899 %900
%902 = OpSelect %81 %899 %148 %127
%903 = OpBitwiseOr %81 %91 %902
%904 = OpSelect %81 %900 %129 %127
%905 = OpBitwiseOr %81 %903 %904
%906 = OpSelect %81 %901 %153 %127
%907 = OpBitwiseOr %81 %905 %906
%908 = OpSConvert %5 %907
%909 = OpIMul %81 %799 %129
%910 = OpSConvert %5 %909
%911 = OpShiftLeftLogical %5 %908 %910
%912 = OpCompositeConstruct %787 %896 %785
%913 = OpLoad %80 %2192
%914 = OpLogicalNot %80 %913
%915 = OpLogicalAnd %80 %914 %801
%916 = OpFunctionCall %1 %856 %27 %912 %911 %915
%917 = OpLoad %25 %27
%918 = OpLoad %14 %16
%919 = OpLoad %34 %36
%920 = OpExtInst %10 %79 RoundEven %70
%921 = OpConvertFToS %81 %920
%922 = OpCompositeConstruct %43 %65 %67
%923 = OpImageQuerySize %787 %917
%924 = OpVectorShuffle %83 %923 %923 0 1
%925 = OpBitwiseAnd %83 %924 %88
%926 = OpImageQueryLevels %81 %918
%927 = OpISub %81 %926 %91
%928 = OpSampledImage %793 %918 %919
%929 = OpImageQueryLod %43 %928 %922
%930 = OpCompositeExtract %10 %929 0
%931 = OpExtInst %10 %79 FMax %930 %53
%932 = OpExtInst %10 %79 FMin %931 %241
%933 = OpConvertFToS %81 %932
%934 = OpFAdd %10 %932 %98
%935 = OpConvertFToS %81 %934
%936 = OpExtInst %81 %79 SMin %935 %927
%937 = OpINotEqual %80 %933 %935
%938 = OpDPdx %43 %922
%939 = OpDPdy %43 %922
%940 = OpExtInst %43 %79 FAbs %938
%941 = OpExtInst %43 %79 FAbs %939
%942 = OpCompositeExtract %10 %940 0
%943 = OpCompositeExtract %10 %941 0
%944 = OpCompositeExtract %10 %940 1
%945 = OpCompositeExtract %10 %941 1
%946 = OpExtInst %10 %79 FMax %942 %943
%947 = OpExtInst %10 %79 FMax %944 %945
%948 = OpCompositeConstruct %43 %946 %947
%949 = OpExtInst %43 %79 Fract %922
%951 = OpImageQuerySizeLod %787 %918 %933
%952 = OpVectorShuffle %83 %951 %951 0 1
%950 = OpConvertSToF %43 %952
%953 = OpSNegate %83 %925
%954 = OpExtInst %43 %79 Ldexp %950 %953
%955 = OpFMul %43 %949 %954
%956 = OpFMul %43 %948 %950
%957 = OpFSub %43 %956 %122
%958 = OpExtInst %123 %79 FrexpStruct %957
%959 = OpCompositeExtract %83 %958 1
%960 = OpExtInst %83 %79 SClamp %959 %128 %130
%961 = OpIAdd %83 %960 %953
%962 = OpExtInst %43 %79 Ldexp %134 %961
%963 = OpExtInst %43 %79 FMin %962 %134
%964 = OpFSub %43 %955 %963
%965 = OpExtInst %43 %79 FMax %964 %139
%966 = OpFAdd %43 %955 %963
%967 = OpConvertFToS %83 %965
%968 = OpConvertFToS %83 %966
%969 = OpINotEqual %82 %967 %968
%970 = OpCompositeExtract %80 %969 0
%971 = OpCompositeExtract %80 %969 1
%972 = OpLogicalAnd %80 %970 %971
%973 = OpSelect %81 %970 %148 %127
%974 = OpBitwiseOr %81 %91 %973
%975 = OpSelect %81 %971 %129 %127
%976 = OpBitwiseOr %81 %974 %975
%977 = OpSelect %81 %972 %153 %127
%978 = OpBitwiseOr %81 %976 %977
%979 = OpSConvert %5 %978
%980 = OpIMul %81 %933 %129
%981 = OpSConvert %5 %980
%982 = OpShiftLeftLogical %5 %979 %981
%983 = OpCompositeConstruct %787 %967 %921
%984 = OpLoad %80 %2192
%985 = OpLogicalNot %80 %984
%986 = OpFunctionCall %1 %856 %27 %983 %982 %985
%988 = OpImageQuerySizeLod %787 %918 %935
%989 = OpVectorShuffle %83 %988 %988 0 1
%987 = OpConvertSToF %43 %989
%990 = OpSNegate %83 %925
%991 = OpExtInst %43 %79 Ldexp %987 %990
%992 = OpFMul %43 %949 %991
%993 = OpFMul %43 %948 %987
%994 = OpFSub %43 %993 %122
%995 = OpExtInst %123 %79 FrexpStruct %994
%996 = OpCompositeExtract %83 %995 1
%997 = OpExtInst %83 %79 SClamp %996 %128 %130
%998 = OpIAdd %83 %997 %990
%999 = OpExtInst %43 %79 Ldexp %134 %998
%1000 = OpExtInst %43 %79 FMin %999 %134
%1001 = OpFSub %43 %992 %1000
%1002 = OpExtInst %43 %79 FMax %1001 %139
%1003 = OpFAdd %43 %992 %1000
%1004 = OpConvertFToS %83 %1002
%1005 = OpConvertFToS %83 %1003
%1006 = OpINotEqual %82 %1004 %1005
%1007 = OpCompositeExtract %80 %1006 0
%1008 = OpCompositeExtract %80 %1006 1
%1009 = OpLogicalAnd %80 %1007 %1008
%1010 = OpSelect %81 %1007 %148 %127
%1011 = OpBitwiseOr %81 %91 %1010
%1012 = OpSelect %81 %1008 %129 %127
%1013 = OpBitwiseOr %81 %1011 %1012
%1014 = OpSelect %81 %1009 %153 %127
%1015 = OpBitwiseOr %81 %1013 %1014
%1016 = OpSConvert %5 %1015
%1017 = OpIMul %81 %935 %129
%1018 = OpSConvert %5 %1017
%1019 = OpShiftLeftLogical %5 %1016 %1018
%1020 = OpCompositeConstruct %787 %1004 %921
%1021 = OpLoad %80 %2192
%1022 = OpLogicalNot %80 %1021
%1023 = OpLogicalAnd %80 %1022 %937
%1024 = OpFunctionCall %1 %856 %27 %1020 %1019 %1023
%1025 = OpLoad %25 %27
%1026 = OpLoad %14 %16
%1027 = OpLoad %34 %36
%1028 = OpExtInst %10 %79 RoundEven %70
%1029 = OpConvertFToS %81 %1028
%1030 = OpCompositeConstruct %43 %65 %67
%1031 = OpImageQuerySize %787 %1025
%1032 = OpVectorShuffle %83 %1031 %1031 0 1
%1033 = OpBitwiseAnd %83 %1032 %88
%1034 = OpImageQueryLevels %81 %1026
%1035 = OpISub %81 %1034 %91
%1036 = OpConvertSToF %10 %1035
%1037 = OpExtInst %10 %79 NClamp %74 %138 %1036
%1038 = OpConvertFToS %81 %1037
%1039 = OpFAdd %10 %1037 %98
%1040 = OpConvertFToS %81 %1039
%1041 = OpExtInst %81 %79 SMin %1040 %1035
%1042 = OpINotEqual %80 %1038 %1040
%1043 = OpExtInst %43 %79 Fract %1030
%1045 = OpImageQuerySizeLod %787 %1026 %1038
%1046 = OpVectorShuffle %83 %1045 %1045 0 1
%1044 = OpConvertSToF %43 %1046
%1047 = OpSNegate %83 %1033
%1048 = OpExtInst %43 %79 Ldexp %1044 %1047
%1049 = OpFMul %43 %1043 %1048
%1050 = OpExtInst %43 %79 Ldexp %134 %1047
%1051 = OpFSub %43 %1049 %1050
%1052 = OpExtInst %43 %79 FMax %1051 %139
%1053 = OpFAdd %43 %1049 %1050
%1054 = OpConvertFToS %83 %1052
%1055 = OpConvertFToS %83 %1053
%1056 = OpINotEqual %82 %1054 %1055
%1057 = OpCompositeExtract %80 %1056 0
%1058 = OpCompositeExtract %80 %1056 1
%1059 = OpLogicalAnd %80 %1057 %1058
%1060 = OpSelect %81 %1057 %148 %127
%1061 = OpBitwiseOr %81 %91 %1060
%1062 = OpSelect %81 %1058 %129 %127
%1063 = OpBitwiseOr %81 %1061 %1062
%1064 = OpSelect %81 %1059 %153 %127
%1065 = OpBitwiseOr %81 %1063 %1064
%1066 = OpSConvert %5 %1065
%1067 = OpIMul %81 %1038 %129
%1068 = OpSConvert %5 %1067
%1069 = OpShiftLeftLogical %5 %1066 %1068
%1070 = OpCompositeConstruct %787 %1054 %1029
%1071 = OpLoad %80 %2192
%1072 = OpLogicalNot %80 %1071
%1073 = OpFunctionCall %1 %856 %27 %1070 %1069 %1072
%1075 = OpImageQuerySizeLod %787 %1026 %1040
%1076 = OpVectorShuffle %83 %1075 %1075 0 1
%1074 = OpConvertSToF %43 %1076
%1077 = OpSNegate %83 %1033
%1078 = OpExtInst %43 %79 Ldexp %1074 %1077
%1079 = OpFMul %43 %1043 %1078
%1080 = OpExtInst %43 %79 Ldexp %134 %1077
%1081 = OpFSub %43 %1079 %1080
%1082 = OpExtInst %43 %79 FMax %1081 %139
%1083 = OpFAdd %43 %1079 %1080
%1084 = OpConvertFToS %83 %1082
%1085 = OpConvertFToS %83 %1083
%1086 = OpINotEqual %82 %1084 %1085
%1087 = OpCompositeExtract %80 %1086 0
%1088 = OpCompositeExtract %80 %1086 1
%1089 = OpLogicalAnd %80 %1087 %1088
%1090 = OpSelect %81 %1087 %148 %127
%1091 = OpBitwiseOr %81 %91 %1090
%1092 = OpSelect %81 %1088 %129 %127
%1093 = OpBitwiseOr %81 %1091 %1092
%1094 = OpSelect %81 %1089 %153 %127
%1095 = OpBitwiseOr %81 %1093 %1094
%1096 = OpSConvert %5 %1095
%1097 = OpIMul %81 %1040 %129
%1098 = OpSConvert %5 %1097
%1099 = OpShiftLeftLogical %5 %1096 %1098
%1100 = OpCompositeConstruct %787 %1084 %1029
%1101 = OpLoad %80 %2192
%1102 = OpLogicalNot %80 %1101
%1103 = OpLogicalAnd %80 %1102 %1042
%1104 = OpFunctionCall %1 %856 %27 %1100 %1099 %1103
%1105 = OpLoad %25 %27
%1106 = OpLoad %14 %16
%1107 = OpLoad %34 %36
%1108 = OpExtInst %10 %79 RoundEven %70
%1109 = OpConvertFToS %81 %1108
%1110 = OpCompositeConstruct %43 %65 %67
%1111 = OpImageQuerySize %787 %1105
%1112 = OpVectorShuffle %83 %1111 %1111 0 1
%1113 = OpBitwiseAnd %83 %1112 %88
%1114 = OpImageQueryLevels %81 %1106
%1115 = OpISub %81 %1114 %91
%1117 = OpImageQuerySizeLod %787 %1106 %127
%1118 = OpVectorShuffle %83 %1117 %1117 0 1
%1116 = OpConvertSToF %43 %1118
%1119 = OpCompositeConstruct %43 %61 %63
%1120 = OpCompositeConstruct %43 %56 %59
%1121 = OpFMul %43 %1116 %1119
%1122 = OpFMul %43 %1116 %1120
%1123 = OpDot %10 %1121 %1121
%1124 = OpDot %10 %1122 %1122
%1125 = OpExtInst %10 %79 FMax %1123 %1124
%1126 = OpExtInst %10 %79 Log2 %1125
%1127 = OpFMul %10 %1126 %133
%1128 = OpConvertSToF %10 %1115
%1129 = OpExtInst %10 %79 NClamp %1127 %138 %1128
%1130 = OpConvertFToS %81 %1129
%1131 = OpFAdd %10 %1129 %98
%1132 = OpConvertFToS %81 %1131
%1133 = OpExtInst %81 %79 SMin %1132 %1115
%1134 = OpINotEqual %80 %1130 %1132
%1135 = OpExtInst %43 %79 Fract %1110
%1137 = OpImageQuerySizeLod %787 %1106 %1130
%1138 = OpVectorShuffle %83 %1137 %1137 0 1
%1136 = OpConvertSToF %43 %1138
%1139 = OpSNegate %83 %1113
%1140 = OpExtInst %43 %79 Ldexp %1136 %1139
%1141 = OpFMul %43 %1135 %1140
%1142 = OpExtInst %43 %79 Ldexp %134 %1139
%1143 = OpFSub %43 %1141 %1142
%1144 = OpExtInst %43 %79 FMax %1143 %139
%1145 = OpFAdd %43 %1141 %1142
%1146 = OpConvertFToS %83 %1144
%1147 = OpConvertFToS %83 %1145
%1148 = OpINotEqual %82 %1146 %1147
%1149 = OpCompositeExtract %80 %1148 0
%1150 = OpCompositeExtract %80 %1148 1
%1151 = OpLogicalAnd %80 %1149 %1150
%1152 = OpSelect %81 %1149 %148 %127
%1153 = OpBitwiseOr %81 %91 %1152
%1154 = OpSelect %81 %1150 %129 %127
%1155 = OpBitwiseOr %81 %1153 %1154
%1156 = OpSelect %81 %1151 %153 %127
%1157 = OpBitwiseOr %81 %1155 %1156
%1158 = OpSConvert %5 %1157
%1159 = OpIMul %81 %1130 %129
%1160 = OpSConvert %5 %1159
%1161 = OpShiftLeftLogical %5 %1158 %1160
%1162 = OpCompositeConstruct %787 %1146 %1109
%1163 = OpLoad %80 %2192
%1164 = OpLogicalNot %80 %1163
%1165 = OpFunctionCall %1 %856 %27 %1162 %1161 %1164
%1167 = OpImageQuerySizeLod %787 %1106 %1132
%1168 = OpVectorShuffle %83 %1167 %1167 0 1
%1166 = OpConvertSToF %43 %1168
%1169 = OpSNegate %83 %1113
%1170 = OpExtInst %43 %79 Ldexp %1166 %1169
%1171 = OpFMul %43 %1135 %1170
%1172 = OpExtInst %43 %79 Ldexp %134 %1169
%1173 = OpFSub %43 %1171 %1172
%1174 = OpExtInst %43 %79 FMax %1173 %139
%1175 = OpFAdd %43 %1171 %1172
%1176 = OpConvertFToS %83 %1174
%1177 = OpConvertFToS %83 %1175
%1178 = OpINotEqual %82 %1176 %1177
%1179 = OpCompositeExtract %80 %1178 0
%1180 = OpCompositeExtract %80 %1178 1
%1181 = OpLogicalAnd %80 %1179 %1180
%1182 = OpSelect %81 %1179 %148 %127
%1183 = OpBitwiseOr %81 %91 %1182
%1184 = OpSelect %81 %1180 %129 %127
%1185 = OpBitwiseOr %81 %1183 %1184
%1186 = OpSelect %81 %1181 %153 %127
%1187 = OpBitwiseOr %81 %1185 %1186
%1188 = OpSConvert %5 %1187
%1189 = OpIMul %81 %1132 %129
%1190 = OpSConvert %5 %1189
%1191 = OpShiftLeftLogical %5 %1188 %1190
%1192 = OpCompositeConstruct %787 %1176 %1109
%1193 = OpLoad %80 %2192
%1194 = OpLogicalNot %80 %1193
%1195 = OpLogicalAnd %80 %1194 %1134
%1196 = OpFunctionCall %1 %856 %27 %1192 %1191 %1195
%1197 = OpLoad %25 %27
%1198 = OpLoad %14 %16
%1199 = OpLoad %34 %36
%1200 = OpExtInst %10 %79 RoundEven %70
%1201 = OpConvertFToS %81 %1200
%1202 = OpCompositeConstruct %43 %65 %67
%1203 = OpImageQuerySize %787 %1197
%1204 = OpVectorShuffle %83 %1203 %1203 0 1
%1205 = OpBitwiseAnd %83 %1204 %88
%1206 = OpImageQueryLevels %81 %1198
%1207 = OpISub %81 %1206 %91
%1209 = OpImageQuerySizeLod %787 %1198 %127
%1210 = OpVectorShuffle %83 %1209 %1209 0 1
%1208 = OpConvertSToF %43 %1210
%1211 = OpCompositeConstruct %43 %61 %63
%1212 = OpCompositeConstruct %43 %56 %59
%1213 = OpFMul %43 %1208 %1211
%1214 = OpFMul %43 %1208 %1212
%1215 = OpDot %10 %1213 %1213
%1216 = OpDot %10 %1214 %1214
%1217 = OpExtInst %10 %79 FMax %1215 %1216
%1218 = OpExtInst %10 %79 Log2 %1217
%1219 = OpFMul %10 %1218 %133
%1220 = OpConvertSToF %10 %1207
%1221 = OpExtInst %10 %79 NClamp %1219 %138 %1220
%1222 = OpExtInst %10 %79 FMax %1221 %53
%1223 = OpExtInst %10 %79 FMin %1222 %241
%1224 = OpConvertFToS %81 %1223
%1225 = OpFAdd %10 %1223 %98
%1226 = OpConvertFToS %81 %1225
%1227 = OpExtInst %81 %79 SMin %1226 %1207
%1228 = OpINotEqual %80 %1224 %1226
%1229 = OpExtInst %43 %79 Fract %1202
%1231 = OpImageQuerySizeLod %787 %1198 %1224
%1232 = OpVectorShuffle %83 %1231 %1231 0 1
%1230 = OpConvertSToF %43 %1232
%1233 = OpSNegate %83 %1205
%1234 = OpExtInst %43 %79 Ldexp %1230 %1233
%1235 = OpFMul %43 %1229 %1234
%1236 = OpExtInst %43 %79 Ldexp %134 %1233
%1237 = OpFSub %43 %1235 %1236
%1238 = OpExtInst %43 %79 FMax %1237 %139
%1239 = OpFAdd %43 %1235 %1236
%1240 = OpConvertFToS %83 %1238
%1241 = OpConvertFToS %83 %1239
%1242 = OpINotEqual %82 %1240 %1241
%1243 = OpCompositeExtract %80 %1242 0
%1244 = OpCompositeExtract %80 %1242 1
%1245 = OpLogicalAnd %80 %1243 %1244
%1246 = OpSelect %81 %1243 %148 %127
%1247 = OpBitwiseOr %81 %91 %1246
%1248 = OpSelect %81 %1244 %129 %127
%1249 = OpBitwiseOr %81 %1247 %1248
%1250 = OpSelect %81 %1245 %153 %127
%1251 = OpBitwiseOr %81 %1249 %1250
%1252 = OpSConvert %5 %1251
%1253 = OpIMul %81 %1224 %129
%1254 = OpSConvert %5 %1253
%1255 = OpShiftLeftLogical %5 %1252 %1254
%1256 = OpCompositeConstruct %787 %1240 %1201
%1257 = OpLoad %80 %2192
%1258 = OpLogicalNot %80 %1257
%1259 = OpFunctionCall %1 %856 %27 %1256 %1255 %1258
%1261 = OpImageQuerySizeLod %787 %1198 %1226
%1262 = OpVectorShuffle %83 %1261 %1261 0 1
%1260 = OpConvertSToF %43 %1262
%1263 = OpSNegate %83 %1205
%1264 = OpExtInst %43 %79 Ldexp %1260 %1263
%1265 = OpFMul %43 %1229 %1264
%1266 = OpExtInst %43 %79 Ldexp %134 %1263
%1267 = OpFSub %43 %1265 %1266
%1268 = OpExtInst %43 %79 FMax %1267 %139
%1269 = OpFAdd %43 %1265 %1266
%1270 = OpConvertFToS %83 %1268
%1271 = OpConvertFToS %83 %1269
%1272 = OpINotEqual %82 %1270 %1271
%1273 = OpCompositeExtract %80 %1272 0
%1274 = OpCompositeExtract %80 %1272 1
%1275 = OpLogicalAnd %80 %1273 %1274
%1276 = OpSelect %81 %1273 %148 %127
%1277 = OpBitwiseOr %81 %91 %1276
%1278 = OpSelect %81 %1274 %129 %127
%1279 = OpBitwiseOr %81 %1277 %1278
%1280 = OpSelect %81 %1275 %153 %127
%1281 = OpBitwiseOr %81 %1279 %1280
%1282 = OpSConvert %5 %1281
%1283 = OpIMul %81 %1226 %129
%1284 = OpSConvert %5 %1283
%1285 = OpShiftLeftLogical %5 %1282 %1284
%1286 = OpCompositeConstruct %787 %1270 %1201
%1287 = OpLoad %80 %2192
%1288 = OpLogicalNot %80 %1287
%1289 = OpLogicalAnd %80 %1288 %1228
%1290 = OpFunctionCall %1 %856 %27 %1286 %1285 %1289
%1291 = OpLoad %25 %27
%1292 = OpLoad %14 %16
%1293 = OpLoad %34 %36
%1294 = OpExtInst %10 %79 RoundEven %70
%1295 = OpConvertFToS %81 %1294
%1296 = OpCompositeConstruct %43 %65 %67
%1297 = OpImageQuerySize %787 %1291
%1298 = OpVectorShuffle %83 %1297 %1297 0 1
%1299 = OpBitwiseAnd %83 %1298 %88
%1300 = OpImageQueryLevels %81 %1292
%1301 = OpISub %81 %1300 %91
%1302 = OpSampledImage %793 %1292 %1293
%1303 = OpExtInst %10 %79 Exp2 %70
%1304 = OpGroupNonUniformQuadBroadcast %10 %72 %1303 %55
%1305 = OpVectorTimesScalar %43 %1296 %1304
%1306 = OpImageQueryLod %43 %1302 %1305
%1307 = OpCompositeExtract %10 %1306 0
%1308 = OpConvertFToS %81 %1307
%1309 = OpFAdd %10 %1307 %98
%1310 = OpConvertFToS %81 %1309
%1311 = OpExtInst %81 %79 SMin %1310 %1301
%1312 = OpINotEqual %80 %1308 %1310
%1313 = OpDPdx %43 %1296
%1314 = OpDPdy %43 %1296
%1315 = OpExtInst %43 %79 FAbs %1313
%1316 = OpExtInst %43 %79 FAbs %1314
%1317 = OpCompositeExtract %10 %1315 0
%1318 = OpCompositeExtract %10 %1316 0
%1319 = OpCompositeExtract %10 %1315 1
%1320 = OpCompositeExtract %10 %1316 1
%1321 = OpExtInst %10 %79 FMax %1317 %1318
%1322 = OpExtInst %10 %79 FMax %1319 %1320
%1323 = OpCompositeConstruct %43 %1321 %1322
%1324 = OpVectorTimesScalar %43 %1323 %1303
%1325 = OpExtInst %43 %79 Fract %1296
%1327 = OpImageQuerySizeLod %787 %1292 %1308
%1328 = OpVectorShuffle %83 %1327 %1327 0 1
%1326 = OpConvertSToF %43 %1328
%1329 = OpSNegate %83 %1299
%1330 = OpExtInst %43 %79 Ldexp %1326 %1329
%1331 = OpFMul %43 %1325 %1330
%1332 = OpFMul %43 %1324 %1326
%1333 = OpFSub %43 %1332 %122
%1334 = OpExtInst %123 %79 FrexpStruct %1333
%1335 = OpCompositeExtract %83 %1334 1
%1336 = OpExtInst %83 %79 SClamp %1335 %128 %130
%1337 = OpIAdd %83 %1336 %1329
%1338 = OpExtInst %43 %79 Ldexp %134 %1337
%1339 = OpExtInst %43 %79 FMin %1338 %134
%1340 = OpFSub %43 %1331 %1339
%1341 = OpExtInst %43 %79 FMax %1340 %139
%1342 = OpFAdd %43 %1331 %1339
%1343 = OpConvertFToS %83 %1341
%1344 = OpConvertFToS %83 %1342
%1345 = OpINotEqual %82 %1343 %1344
%1346 = OpCompositeExtract %80 %1345 0
%1347 = OpCompositeExtract %80 %1345 1
%1348 = OpLogicalAnd %80 %1346 %1347
%1349 = OpSelect %81 %1346 %148 %127
%1350 = OpBitwiseOr %81 %91 %1349
%1351 = OpSelect %81 %1347 %129 %127
%1352 = OpBitwiseOr %81 %1350 %1351
%1353 = OpSelect %81 %1348 %153 %127
%1354 = OpBitwiseOr %81 %1352 %1353
%1355 = OpSConvert %5 %1354
%1356 = OpIMul %81 %1308 %129
%1357 = OpSConvert %5 %1356
%1358 = OpShiftLeftLogical %5 %1355 %1357
%1359 = OpCompositeConstruct %787 %1343 %1295
%1360 = OpLoad %80 %2192
%1361 = OpLogicalNot %80 %1360
%1362 = OpFunctionCall %1 %856 %27 %1359 %1358 %1361
%1364 = OpImageQuerySizeLod %787 %1292 %1310
%1365 = OpVectorShuffle %83 %1364 %1364 0 1
%1363 = OpConvertSToF %43 %1365
%1366 = OpSNegate %83 %1299
%1367 = OpExtInst %43 %79 Ldexp %1363 %1366
%1368 = OpFMul %43 %1325 %1367
%1369 = OpFMul %43 %1324 %1363
%1370 = OpFSub %43 %1369 %122
%1371 = OpExtInst %123 %79 FrexpStruct %1370
%1372 = OpCompositeExtract %83 %1371 1
%1373 = OpExtInst %83 %79 SClamp %1372 %128 %130
%1374 = OpIAdd %83 %1373 %1366
%1375 = OpExtInst %43 %79 Ldexp %134 %1374
%1376 = OpExtInst %43 %79 FMin %1375 %134
%1377 = OpFSub %43 %1368 %1376
%1378 = OpExtInst %43 %79 FMax %1377 %139
%1379 = OpFAdd %43 %1368 %1376
%1380 = OpConvertFToS %83 %1378
%1381 = OpConvertFToS %83 %1379
%1382 = OpINotEqual %82 %1380 %1381
%1383 = OpCompositeExtract %80 %1382 0
%1384 = OpCompositeExtract %80 %1382 1
%1385 = OpLogicalAnd %80 %1383 %1384
%1386 = OpSelect %81 %1383 %148 %127
%1387 = OpBitwiseOr %81 %91 %1386
%1388 = OpSelect %81 %1384 %129 %127
%1389 = OpBitwiseOr %81 %1387 %1388
%1390 = OpSelect %81 %1385 %153 %127
%1391 = OpBitwiseOr %81 %1389 %1390
%1392 = OpSConvert %5 %1391
%1393 = OpIMul %81 %1310 %129
%1394 = OpSConvert %5 %1393
%1395 = OpShiftLeftLogical %5 %1392 %1394
%1396 = OpCompositeConstruct %787 %1380 %1295
%1397 = OpLoad %80 %2192
%1398 = OpLogicalNot %80 %1397
%1399 = OpLogicalAnd %80 %1398 %1312
%1400 = OpFunctionCall %1 %856 %27 %1396 %1395 %1399
%1401 = OpLoad %25 %27
%1402 = OpLoad %14 %16
%1403 = OpLoad %34 %36
%1404 = OpExtInst %10 %79 RoundEven %70
%1405 = OpConvertFToS %81 %1404
%1406 = OpCompositeConstruct %43 %65 %67
%1407 = OpImageQuerySize %787 %1401
%1408 = OpVectorShuffle %83 %1407 %1407 0 1
%1409 = OpBitwiseAnd %83 %1408 %88
%1410 = OpImageQueryLevels %81 %1402
%1411 = OpISub %81 %1410 %91
%1412 = OpSampledImage %793 %1402 %1403
%1413 = OpExtInst %10 %79 Exp2 %74
%1414 = OpGroupNonUniformQuadBroadcast %10 %72 %1413 %55
%1415 = OpVectorTimesScalar %43 %1406 %1414
%1416 = OpImageQueryLod %43 %1412 %1415
%1417 = OpCompositeExtract %10 %1416 0
%1418 = OpExtInst %10 %79 FMax %1417 %53
%1419 = OpExtInst %10 %79 FMin %1418 %241
%1420 = OpConvertFToS %81 %1419
%1421 = OpFAdd %10 %1419 %98
%1422 = OpConvertFToS %81 %1421
%1423 = OpExtInst %81 %79 SMin %1422 %1411
%1424 = OpINotEqual %80 %1420 %1422
%1425 = OpDPdx %43 %1406
%1426 = OpDPdy %43 %1406
%1427 = OpExtInst %43 %79 FAbs %1425
%1428 = OpExtInst %43 %79 FAbs %1426
%1429 = OpCompositeExtract %10 %1427 0
%1430 = OpCompositeExtract %10 %1428 0
%1431 = OpCompositeExtract %10 %1427 1
%1432 = OpCompositeExtract %10 %1428 1
%1433 = OpExtInst %10 %79 FMax %1429 %1430
%1434 = OpExtInst %10 %79 FMax %1431 %1432
%1435 = OpCompositeConstruct %43 %1433 %1434
%1436 = OpVectorTimesScalar %43 %1435 %1413
%1437 = OpExtInst %43 %79 Fract %1406
%1439 = OpImageQuerySizeLod %787 %1402 %1420
%1440 = OpVectorShuffle %83 %1439 %1439 0 1
%1438 = OpConvertSToF %43 %1440
%1441 = OpSNegate %83 %1409
%1442 = OpExtInst %43 %79 Ldexp %1438 %1441
%1443 = OpFMul %43 %1437 %1442
%1444 = OpFMul %43 %1436 %1438
%1445 = OpFSub %43 %1444 %122
%1446 = OpExtInst %123 %79 FrexpStruct %1445
%1447 = OpCompositeExtract %83 %1446 1
%1448 = OpExtInst %83 %79 SClamp %1447 %128 %130
%1449 = OpIAdd %83 %1448 %1441
%1450 = OpExtInst %43 %79 Ldexp %134 %1449
%1451 = OpExtInst %43 %79 FMin %1450 %134
%1452 = OpFSub %43 %1443 %1451
%1453 = OpExtInst %43 %79 FMax %1452 %139
%1454 = OpFAdd %43 %1443 %1451
%1455 = OpConvertFToS %83 %1453
%1456 = OpConvertFToS %83 %1454
%1457 = OpINotEqual %82 %1455 %1456
%1458 = OpCompositeExtract %80 %1457 0
%1459 = OpCompositeExtract %80 %1457 1
%1460 = OpLogicalAnd %80 %1458 %1459
%1461 = OpSelect %81 %1458 %148 %127
%1462 = OpBitwiseOr %81 %91 %1461
%1463 = OpSelect %81 %1459 %129 %127
%1464 = OpBitwiseOr %81 %1462 %1463
%1465 = OpSelect %81 %1460 %153 %127
%1466 = OpBitwiseOr %81 %1464 %1465
%1467 = OpSConvert %5 %1466
%1468 = OpIMul %81 %1420 %129
%1469 = OpSConvert %5 %1468
%1470 = OpShiftLeftLogical %5 %1467 %1469
%1471 = OpCompositeConstruct %787 %1455 %1405
%1472 = OpLoad %80 %2192
%1473 = OpLogicalNot %80 %1472
%1474 = OpFunctionCall %1 %856 %27 %1471 %1470 %1473
%1476 = OpImageQuerySizeLod %787 %1402 %1422
%1477 = OpVectorShuffle %83 %1476 %1476 0 1
%1475 = OpConvertSToF %43 %1477
%1478 = OpSNegate %83 %1409
%1479 = OpExtInst %43 %79 Ldexp %1475 %1478
%1480 = OpFMul %43 %1437 %1479
%1481 = OpFMul %43 %1436 %1475
%1482 = OpFSub %43 %1481 %122
%1483 = OpExtInst %123 %79 FrexpStruct %1482
%1484 = OpCompositeExtract %83 %1483 1
%1485 = OpExtInst %83 %79 SClamp %1484 %128 %130
%1486 = OpIAdd %83 %1485 %1478
%1487 = OpExtInst %43 %79 Ldexp %134 %1486
%1488 = OpExtInst %43 %79 FMin %1487 %134
%1489 = OpFSub %43 %1480 %1488
%1490 = OpExtInst %43 %79 FMax %1489 %139
%1491 = OpFAdd %43 %1480 %1488
%1492 = OpConvertFToS %83 %1490
%1493 = OpConvertFToS %83 %1491
%1494 = OpINotEqual %82 %1492 %1493
%1495 = OpCompositeExtract %80 %1494 0
%1496 = OpCompositeExtract %80 %1494 1
%1497 = OpLogicalAnd %80 %1495 %1496
%1498 = OpSelect %81 %1495 %148 %127
%1499 = OpBitwiseOr %81 %91 %1498
%1500 = OpSelect %81 %1496 %129 %127
%1501 = OpBitwiseOr %81 %1499 %1500
%1502 = OpSelect %81 %1497 %153 %127
%1503 = OpBitwiseOr %81 %1501 %1502
%1504 = OpSConvert %5 %1503
%1505 = OpIMul %81 %1422 %129
%1506 = OpSConvert %5 %1505
%1507 = OpShiftLeftLogical %5 %1504 %1506
%1508 = OpCompositeConstruct %787 %1492 %1405
%1509 = OpLoad %80 %2192
%1510 = OpLogicalNot %80 %1509
%1511 = OpLogicalAnd %80 %1510 %1424
%1512 = OpFunctionCall %1 %856 %27 %1508 %1507 %1511
%1513 = OpIAdd %49 %52 %55
%1514 = OpAccessChain %23 %30 %1513
%1515 = OpLoad %6 %1514
%1516 = OpIAdd %49 %52 %55
%1517 = OpAccessChain %12 %19 %1516
%1518 = OpLoad %11 %1517
%1519 = OpIAdd %49 %52 %55
%1520 = OpAccessChain %35 %39 %1519
%1521 = OpLoad %34 %1520
%1522 = OpCompositeConstruct %43 %65 %67
%1523 = OpImageQuerySize %83 %1515
%1524 = OpBitwiseAnd %83 %1523 %88
%1525 = OpImageQueryLevels %81 %1518
%1526 = OpISub %81 %1525 %91
%1527 = OpSampledImage %92 %1518 %1521
%1528 = OpImageQueryLod %43 %1527 %1522
%1529 = OpCompositeExtract %10 %1528 0
%1530 = OpConvertFToS %81 %1529
%1531 = OpFAdd %10 %1529 %98
%1532 = OpConvertFToS %81 %1531
%1533 = OpExtInst %81 %79 SMin %1532 %1526
%1534 = OpINotEqual %80 %1530 %1532
%1535 = OpDPdx %43 %1522
%1536 = OpDPdy %43 %1522
%1537 = OpExtInst %43 %79 FAbs %1535
%1538 = OpExtInst %43 %79 FAbs %1536
%1539 = OpCompositeExtract %10 %1537 0
%1540 = OpCompositeExtract %10 %1538 0
%1541 = OpCompositeExtract %10 %1537 1
%1542 = OpCompositeExtract %10 %1538 1
%1543 = OpExtInst %10 %79 FMax %1539 %1540
%1544 = OpExtInst %10 %79 FMax %1541 %1542
%1545 = OpCompositeConstruct %43 %1543 %1544
%1546 = OpExtInst %43 %79 Fract %1522
%1548 = OpImageQuerySizeLod %83 %1518 %1530
%1547 = OpConvertSToF %43 %1548
%1549 = OpSNegate %83 %1524
%1550 = OpExtInst %43 %79 Ldexp %1547 %1549
%1551 = OpFMul %43 %1546 %1550
%1552 = OpFMul %43 %1545 %1547
%1553 = OpFSub %43 %1552 %122
%1554 = OpExtInst %123 %79 FrexpStruct %1553
%1555 = OpCompositeExtract %83 %1554 1
%1556 = OpExtInst %83 %79 SClamp %1555 %128 %130
%1557 = OpIAdd %83 %1556 %1549
%1558 = OpExtInst %43 %79 Ldexp %134 %1557
%1559 = OpExtInst %43 %79 FMin %1558 %134
%1560 = OpFSub %43 %1551 %1559
%1561 = OpExtInst %43 %79 FMax %1560 %139
%1562 = OpFAdd %43 %1551 %1559
%1563 = OpConvertFToS %83 %1561
%1564 = OpConvertFToS %83 %1562
%1565 = OpINotEqual %82 %1563 %1564
%1566 = OpCompositeExtract %80 %1565 0
%1567 = OpCompositeExtract %80 %1565 1
%1568 = OpLogicalAnd %80 %1566 %1567
%1569 = OpSelect %81 %1566 %148 %127
%1570 = OpBitwiseOr %81 %91 %1569
%1571 = OpSelect %81 %1567 %129 %127
%1572 = OpBitwiseOr %81 %1570 %1571
%1573 = OpSelect %81 %1568 %153 %127
%1574 = OpBitwiseOr %81 %1572 %1573
%1575 = OpSConvert %5 %1574
%1576 = OpIMul %81 %1530 %129
%1577 = OpSConvert %5 %1576
%1578 = OpShiftLeftLogical %5 %1575 %1577
%1579 = OpLoad %80 %2192
%1580 = OpLogicalNot %80 %1579
%1607 = OpFunctionCall %1 %1585 %1514 %1563 %1578 %1580
%1609 = OpImageQuerySizeLod %83 %1518 %1532
%1608 = OpConvertSToF %43 %1609
%1610 = OpSNegate %83 %1524
%1611 = OpExtInst %43 %79 Ldexp %1608 %1610
%1612 = OpFMul %43 %1546 %1611
%1613 = OpFMul %43 %1545 %1608
%1614 = OpFSub %43 %1613 %122
%1615 = OpExtInst %123 %79 FrexpStruct %1614
%1616 = OpCompositeExtract %83 %1615 1
%1617 = OpExtInst %83 %79 SClamp %1616 %128 %130
%1618 = OpIAdd %83 %1617 %1610
%1619 = OpExtInst %43 %79 Ldexp %134 %1618
%1620 = OpExtInst %43 %79 FMin %1619 %134
%1621 = OpFSub %43 %1612 %1620
%1622 = OpExtInst %43 %79 FMax %1621 %139
%1623 = OpFAdd %43 %1612 %1620
%1624 = OpConvertFToS %83 %1622
%1625 = OpConvertFToS %83 %1623
%1626 = OpINotEqual %82 %1624 %1625
%1627 = OpCompositeExtract %80 %1626 0
%1628 = OpCompositeExtract %80 %1626 1
%1629 = OpLogicalAnd %80 %1627 %1628
%1630 = OpSelect %81 %1627 %148 %127
%1631 = OpBitwiseOr %81 %91 %1630
%1632 = OpSelect %81 %1628 %129 %127
%1633 = OpBitwiseOr %81 %1631 %1632
%1634 = OpSelect %81 %1629 %153 %127
%1635 = OpBitwiseOr %81 %1633 %1634
%1636 = OpSConvert %5 %1635
%1637 = OpIMul %81 %1532 %129
%1638 = OpSConvert %5 %1637
%1639 = OpShiftLeftLogical %5 %1636 %1638
%1640 = OpLoad %80 %2192
%1641 = OpLogicalNot %80 %1640
%1642 = OpLogicalAnd %80 %1641 %1534
%1643 = OpFunctionCall %1 %1585 %1514 %1624 %1639 %1642
%1644 = OpIAdd %49 %52 %55
%1645 = OpAccessChain %26 %33 %1644
%1646 = OpLoad %25 %1645
%1647 = OpIAdd %49 %52 %55
%1648 = OpAccessChain %15 %22 %1647
%1649 = OpLoad %14 %1648
%1650 = OpAccessChain %35 %39 %1519
%1651 = OpLoad %34 %1650
%1652 = OpExtInst %10 %79 RoundEven %70
%1653 = OpConvertFToS %81 %1652
%1654 = OpCompositeConstruct %43 %65 %67
%1655 = OpImageQuerySize %787 %1646
%1656 = OpVectorShuffle %83 %1655 %1655 0 1
%1657 = OpBitwiseAnd %83 %1656 %88
%1658 = OpImageQueryLevels %81 %1649
%1659 = OpISub %81 %1658 %91
%1660 = OpSampledImage %793 %1649 %1651
%1661 = OpImageQueryLod %43 %1660 %1654
%1662 = OpCompositeExtract %10 %1661 0
%1663 = OpConvertFToS %81 %1662
%1664 = OpFAdd %10 %1662 %98
%1665 = OpConvertFToS %81 %1664
%1666 = OpExtInst %81 %79 SMin %1665 %1659
%1667 = OpINotEqual %80 %1663 %1665
%1668 = OpDPdx %43 %1654
%1669 = OpDPdy %43 %1654
%1670 = OpExtInst %43 %79 FAbs %1668
%1671 = OpExtInst %43 %79 FAbs %1669
%1672 = OpCompositeExtract %10 %1670 0
%1673 = OpCompositeExtract %10 %1671 0
%1674 = OpCompositeExtract %10 %1670 1
%1675 = OpCompositeExtract %10 %1671 1
%1676 = OpExtInst %10 %79 FMax %1672 %1673
%1677 = OpExtInst %10 %79 FMax %1674 %1675
%1678 = OpCompositeConstruct %43 %1676 %1677
%1679 = OpExtInst %43 %79 Fract %1654
%1681 = OpImageQuerySizeLod %787 %1649 %1663
%1682 = OpVectorShuffle %83 %1681 %1681 0 1
%1680 = OpConvertSToF %43 %1682
%1683 = OpSNegate %83 %1657
%1684 = OpExtInst %43 %79 Ldexp %1680 %1683
%1685 = OpFMul %43 %1679 %1684
%1686 = OpFMul %43 %1678 %1680
%1687 = OpFSub %43 %1686 %122
%1688 = OpExtInst %123 %79 FrexpStruct %1687
%1689 = OpCompositeExtract %83 %1688 1
%1690 = OpExtInst %83 %79 SClamp %1689 %128 %130
%1691 = OpIAdd %83 %1690 %1683
%1692 = OpExtInst %43 %79 Ldexp %134 %1691
%1693 = OpExtInst %43 %79 FMin %1692 %134
%1694 = OpFSub %43 %1685 %1693
%1695 = OpExtInst %43 %79 FMax %1694 %139
%1696 = OpFAdd %43 %1685 %1693
%1697 = OpConvertFToS %83 %1695
%1698 = OpConvertFToS %83 %1696
%1699 = OpINotEqual %82 %1697 %1698
%1700 = OpCompositeExtract %80 %1699 0
%1701 = OpCompositeExtract %80 %1699 1
%1702 = OpLogicalAnd %80 %1700 %1701
%1703 = OpSelect %81 %1700 %148 %127
%1704 = OpBitwiseOr %81 %91 %1703
%1705 = OpSelect %81 %1701 %129 %127
%1706 = OpBitwiseOr %81 %1704 %1705
%1707 = OpSelect %81 %1702 %153 %127
%1708 = OpBitwiseOr %81 %1706 %1707
%1709 = OpSConvert %5 %1708
%1710 = OpIMul %81 %1663 %129
%1711 = OpSConvert %5 %1710
%1712 = OpShiftLeftLogical %5 %1709 %1711
%1713 = OpCompositeConstruct %787 %1697 %1653
%1714 = OpLoad %80 %2192
%1715 = OpLogicalNot %80 %1714
%1742 = OpFunctionCall %1 %1720 %1645 %1713 %1712 %1715
%1744 = OpImageQuerySizeLod %787 %1649 %1665
%1745 = OpVectorShuffle %83 %1744 %1744 0 1
%1743 = OpConvertSToF %43 %1745
%1746 = OpSNegate %83 %1657
%1747 = OpExtInst %43 %79 Ldexp %1743 %1746
%1748 = OpFMul %43 %1679 %1747
%1749 = OpFMul %43 %1678 %1743
%1750 = OpFSub %43 %1749 %122
%1751 = OpExtInst %123 %79 FrexpStruct %1750
%1752 = OpCompositeExtract %83 %1751 1
%1753 = OpExtInst %83 %79 SClamp %1752 %128 %130
%1754 = OpIAdd %83 %1753 %1746
%1755 = OpExtInst %43 %79 Ldexp %134 %1754
%1756 = OpExtInst %43 %79 FMin %1755 %134
%1757 = OpFSub %43 %1748 %1756
%1758 = OpExtInst %43 %79 FMax %1757 %139
%1759 = OpFAdd %43 %1748 %1756
%1760 = OpConvertFToS %83 %1758
%1761 = OpConvertFToS %83 %1759
%1762 = OpINotEqual %82 %1760 %1761
%1763 = OpCompositeExtract %80 %1762 0
%1764 = OpCompositeExtract %80 %1762 1
%1765 = OpLogicalAnd %80 %1763 %1764
%1766 = OpSelect %81 %1763 %148 %127
%1767 = OpBitwiseOr %81 %91 %1766
%1768 = OpSelect %81 %1764 %129 %127
%1769 = OpBitwiseOr %81 %1767 %1768
%1770 = OpSelect %81 %1765 %153 %127
%1771 = OpBitwiseOr %81 %1769 %1770
%1772 = OpSConvert %5 %1771
%1773 = OpIMul %81 %1665 %129
%1774 = OpSConvert %5 %1773
%1775 = OpShiftLeftLogical %5 %1772 %1774
%1776 = OpCompositeConstruct %787 %1760 %1653
%1777 = OpLoad %80 %2192
%1778 = OpLogicalNot %80 %1777
%1779 = OpLogicalAnd %80 %1778 %1667
%1780 = OpFunctionCall %1 %1720 %1645 %1776 %1775 %1779
%1781 = OpAccessChain %23 %30 %1513
%1782 = OpLoad %6 %1781
%1783 = OpAccessChain %12 %19 %1516
%1784 = OpLoad %11 %1783
%1785 = OpAccessChain %35 %39 %1519
%1786 = OpLoad %34 %1785
%1787 = OpCompositeConstruct %43 %65 %67
%1788 = OpImageQuerySize %83 %1782
%1789 = OpBitwiseAnd %83 %1788 %88
%1790 = OpImageQueryLevels %81 %1784
%1791 = OpISub %81 %1790 %91
%1792 = OpSampledImage %92 %1784 %1786
%1793 = OpImageQueryLod %43 %1792 %1787
%1794 = OpCompositeExtract %10 %1793 0
%1795 = OpConvertFToS %81 %1794
%1796 = OpFAdd %10 %1794 %98
%1797 = OpConvertFToS %81 %1796
%1798 = OpExtInst %81 %79 SMin %1797 %1791
%1799 = OpINotEqual %80 %1795 %1797
%1800 = OpDPdx %43 %1787
%1801 = OpDPdy %43 %1787
%1802 = OpExtInst %43 %79 FAbs %1800
%1803 = OpExtInst %43 %79 FAbs %1801
%1804 = OpCompositeExtract %10 %1802 0
%1805 = OpCompositeExtract %10 %1803 0
%1806 = OpCompositeExtract %10 %1802 1
%1807 = OpCompositeExtract %10 %1803 1
%1808 = OpExtInst %10 %79 FMax %1804 %1805
%1809 = OpExtInst %10 %79 FMax %1806 %1807
%1810 = OpCompositeConstruct %43 %1808 %1809
%1811 = OpExtInst %43 %79 Fract %1787
%1813 = OpImageQuerySizeLod %83 %1784 %1795
%1812 = OpConvertSToF %43 %1813
%1814 = OpSNegate %83 %1789
%1815 = OpExtInst %43 %79 Ldexp %1812 %1814
%1816 = OpFMul %43 %1811 %1815
%1817 = OpFMul %43 %1810 %1812
%1818 = OpFSub %43 %1817 %122
%1819 = OpExtInst %123 %79 FrexpStruct %1818
%1820 = OpCompositeExtract %83 %1819 1
%1821 = OpExtInst %83 %79 SClamp %1820 %128 %130
%1822 = OpIAdd %83 %1821 %1814
%1823 = OpExtInst %43 %79 Ldexp %134 %1822
%1824 = OpExtInst %43 %79 FMin %1823 %134
%1825 = OpFSub %43 %1816 %1824
%1826 = OpExtInst %43 %79 FMax %1825 %139
%1827 = OpFAdd %43 %1816 %1824
%1828 = OpConvertFToS %83 %1826
%1829 = OpConvertFToS %83 %1827
%1830 = OpINotEqual %82 %1828 %1829
%1831 = OpCompositeExtract %80 %1830 0
%1832 = OpCompositeExtract %80 %1830 1
%1833 = OpLogicalAnd %80 %1831 %1832
%1834 = OpSelect %81 %1831 %148 %127
%1835 = OpBitwiseOr %81 %91 %1834
%1836 = OpSelect %81 %1832 %129 %127
%1837 = OpBitwiseOr %81 %1835 %1836
%1838 = OpSelect %81 %1833 %153 %127
%1839 = OpBitwiseOr %81 %1837 %1838
%1840 = OpSConvert %5 %1839
%1841 = OpIMul %81 %1795 %129
%1842 = OpSConvert %5 %1841
%1843 = OpShiftLeftLogical %5 %1840 %1842
%1844 = OpLoad %80 %2192
%1845 = OpLogicalNot %80 %1844
%1846 = OpFunctionCall %1 %1585 %1781 %1828 %1843 %1845
%1848 = OpImageQuerySizeLod %83 %1784 %1797
%1847 = OpConvertSToF %43 %1848
%1849 = OpSNegate %83 %1789
%1850 = OpExtInst %43 %79 Ldexp %1847 %1849
%1851 = OpFMul %43 %1811 %1850
%1852 = OpFMul %43 %1810 %1847
%1853 = OpFSub %43 %1852 %122
%1854 = OpExtInst %123 %79 FrexpStruct %1853
%1855 = OpCompositeExtract %83 %1854 1
%1856 = OpExtInst %83 %79 SClamp %1855 %128 %130
%1857 = OpIAdd %83 %1856 %1849
%1858 = OpExtInst %43 %79 Ldexp %134 %1857
%1859 = OpExtInst %43 %79 FMin %1858 %134
%1860 = OpFSub %43 %1851 %1859
%1861 = OpExtInst %43 %79 FMax %1860 %139
%1862 = OpFAdd %43 %1851 %1859
%1863 = OpConvertFToS %83 %1861
%1864 = OpConvertFToS %83 %1862
%1865 = OpINotEqual %82 %1863 %1864
%1866 = OpCompositeExtract %80 %1865 0
%1867 = OpCompositeExtract %80 %1865 1
%1868 = OpLogicalAnd %80 %1866 %1867
%1869 = OpSelect %81 %1866 %148 %127
%1870 = OpBitwiseOr %81 %91 %1869
%1871 = OpSelect %81 %1867 %129 %127
%1872 = OpBitwiseOr %81 %1870 %1871
%1873 = OpSelect %81 %1868 %153 %127
%1874 = OpBitwiseOr %81 %1872 %1873
%1875 = OpSConvert %5 %1874
%1876 = OpIMul %81 %1797 %129
%1877 = OpSConvert %5 %1876
%1878 = OpShiftLeftLogical %5 %1875 %1877
%1879 = OpLoad %80 %2192
%1880 = OpLogicalNot %80 %1879
%1881 = OpLogicalAnd %80 %1880 %1799
%1882 = OpFunctionCall %1 %1585 %1781 %1863 %1878 %1881
%1883 = OpAccessChain %26 %33 %1644
%1884 = OpLoad %25 %1883
%1885 = OpAccessChain %15 %22 %1647
%1886 = OpLoad %14 %1885
%1887 = OpAccessChain %35 %39 %1519
%1888 = OpLoad %34 %1887
%1889 = OpExtInst %10 %79 RoundEven %70
%1890 = OpConvertFToS %81 %1889
%1891 = OpCompositeConstruct %43 %65 %67
%1892 = OpImageQuerySize %787 %1884
%1893 = OpVectorShuffle %83 %1892 %1892 0 1
%1894 = OpBitwiseAnd %83 %1893 %88
%1895 = OpImageQueryLevels %81 %1886
%1896 = OpISub %81 %1895 %91
%1897 = OpSampledImage %793 %1886 %1888
%1898 = OpImageQueryLod %43 %1897 %1891
%1899 = OpCompositeExtract %10 %1898 0
%1900 = OpConvertFToS %81 %1899
%1901 = OpFAdd %10 %1899 %98
%1902 = OpConvertFToS %81 %1901
%1903 = OpExtInst %81 %79 SMin %1902 %1896
%1904 = OpINotEqual %80 %1900 %1902
%1905 = OpDPdx %43 %1891
%1906 = OpDPdy %43 %1891
%1907 = OpExtInst %43 %79 FAbs %1905
%1908 = OpExtInst %43 %79 FAbs %1906
%1909 = OpCompositeExtract %10 %1907 0
%1910 = OpCompositeExtract %10 %1908 0
%1911 = OpCompositeExtract %10 %1907 1
%1912 = OpCompositeExtract %10 %1908 1
%1913 = OpExtInst %10 %79 FMax %1909 %1910
%1914 = OpExtInst %10 %79 FMax %1911 %1912
%1915 = OpCompositeConstruct %43 %1913 %1914
%1916 = OpExtInst %43 %79 Fract %1891
%1918 = OpImageQuerySizeLod %787 %1886 %1900
%1919 = OpVectorShuffle %83 %1918 %1918 0 1
%1917 = OpConvertSToF %43 %1919
%1920 = OpSNegate %83 %1894
%1921 = OpExtInst %43 %79 Ldexp %1917 %1920
%1922 = OpFMul %43 %1916 %1921
%1923 = OpFMul %43 %1915 %1917
%1924 = OpFSub %43 %1923 %122
%1925 = OpExtInst %123 %79 FrexpStruct %1924
%1926 = OpCompositeExtract %83 %1925 1
%1927 = OpExtInst %83 %79 SClamp %1926 %128 %130
%1928 = OpIAdd %83 %1927 %1920
%1929 = OpExtInst %43 %79 Ldexp %134 %1928
%1930 = OpExtInst %43 %79 FMin %1929 %134
%1931 = OpFSub %43 %1922 %1930
%1932 = OpExtInst %43 %79 FMax %1931 %139
%1933 = OpFAdd %43 %1922 %1930
%1934 = OpConvertFToS %83 %1932
%1935 = OpConvertFToS %83 %1933
%1936 = OpINotEqual %82 %1934 %1935
%1937 = OpCompositeExtract %80 %1936 0
%1938 = OpCompositeExtract %80 %1936 1
%1939 = OpLogicalAnd %80 %1937 %1938
%1940 = OpSelect %81 %1937 %148 %127
%1941 = OpBitwiseOr %81 %91 %1940
%1942 = OpSelect %81 %1938 %129 %127
%1943 = OpBitwiseOr %81 %1941 %1942
%1944 = OpSelect %81 %1939 %153 %127
%1945 = OpBitwiseOr %81 %1943 %1944
%1946 = OpSConvert %5 %1945
%1947 = OpIMul %81 %1900 %129
%1948 = OpSConvert %5 %1947
%1949 = OpShiftLeftLogical %5 %1946 %1948
%1950 = OpCompositeConstruct %787 %1934 %1890
%1951 = OpLoad %80 %2192
%1952 = OpLogicalNot %80 %1951
%1953 = OpFunctionCall %1 %1720 %1883 %1950 %1949 %1952
%1955 = OpImageQuerySizeLod %787 %1886 %1902
%1956 = OpVectorShuffle %83 %1955 %1955 0 1
%1954 = OpConvertSToF %43 %1956
%1957 = OpSNegate %83 %1894
%1958 = OpExtInst %43 %79 Ldexp %1954 %1957
%1959 = OpFMul %43 %1916 %1958
%1960 = OpFMul %43 %1915 %1954
%1961 = OpFSub %43 %1960 %122
%1962 = OpExtInst %123 %79 FrexpStruct %1961
%1963 = OpCompositeExtract %83 %1962 1
%1964 = OpExtInst %83 %79 SClamp %1963 %128 %130
%1965 = OpIAdd %83 %1964 %1957
%1966 = OpExtInst %43 %79 Ldexp %134 %1965
%1967 = OpExtInst %43 %79 FMin %1966 %134
%1968 = OpFSub %43 %1959 %1967
%1969 = OpExtInst %43 %79 FMax %1968 %139
%1970 = OpFAdd %43 %1959 %1967
%1971 = OpConvertFToS %83 %1969
%1972 = OpConvertFToS %83 %1970
%1973 = OpINotEqual %82 %1971 %1972
%1974 = OpCompositeExtract %80 %1973 0
%1975 = OpCompositeExtract %80 %1973 1
%1976 = OpLogicalAnd %80 %1974 %1975
%1977 = OpSelect %81 %1974 %148 %127
%1978 = OpBitwiseOr %81 %91 %1977
%1979 = OpSelect %81 %1975 %129 %127
%1980 = OpBitwiseOr %81 %1978 %1979
%1981 = OpSelect %81 %1976 %153 %127
%1982 = OpBitwiseOr %81 %1980 %1981
%1983 = OpSConvert %5 %1982
%1984 = OpIMul %81 %1902 %129
%1985 = OpSConvert %5 %1984
%1986 = OpShiftLeftLogical %5 %1983 %1985
%1987 = OpCompositeConstruct %787 %1971 %1890
%1988 = OpLoad %80 %2192
%1989 = OpLogicalNot %80 %1988
%1990 = OpLogicalAnd %80 %1989 %1904
%1991 = OpFunctionCall %1 %1720 %1883 %1987 %1986 %1990
%1992 = OpAccessChain %23 %9 %52
%1993 = OpLoad %6 %1992
%1994 = OpLoad %11 %13
%1995 = OpLoad %34 %36
%1996 = OpCompositeConstruct %43 %65 %67
%1997 = OpImageQuerySize %83 %1993
%1998 = OpBitwiseAnd %83 %1997 %88
%1999 = OpImageQueryLevels %81 %1994
%2000 = OpISub %81 %1999 %91
%2001 = OpSampledImage %92 %1994 %1995
%2002 = OpImageQueryLod %43 %2001 %1996
%2003 = OpCompositeExtract %10 %2002 0
%2004 = OpConvertFToS %81 %2003
%2005 = OpFAdd %10 %2003 %98
%2006 = OpConvertFToS %81 %2005
%2007 = OpExtInst %81 %79 SMin %2006 %2000
%2008 = OpINotEqual %80 %2004 %2006
%2009 = OpDPdx %43 %1996
%2010 = OpDPdy %43 %1996
%2011 = OpExtInst %43 %79 FAbs %2009
%2012 = OpExtInst %43 %79 FAbs %2010
%2013 = OpCompositeExtract %10 %2011 0
%2014 = OpCompositeExtract %10 %2012 0
%2015 = OpCompositeExtract %10 %2011 1
%2016 = OpCompositeExtract %10 %2012 1
%2017 = OpExtInst %10 %79 FMax %2013 %2014
%2018 = OpExtInst %10 %79 FMax %2015 %2016
%2019 = OpCompositeConstruct %43 %2017 %2018
%2020 = OpExtInst %43 %79 Fract %1996
%2022 = OpImageQuerySizeLod %83 %1994 %2004
%2021 = OpConvertSToF %43 %2022
%2023 = OpSNegate %83 %1998
%2024 = OpExtInst %43 %79 Ldexp %2021 %2023
%2025 = OpFMul %43 %2020 %2024
%2026 = OpFMul %43 %2019 %2021
%2027 = OpFSub %43 %2026 %122
%2028 = OpExtInst %123 %79 FrexpStruct %2027
%2029 = OpCompositeExtract %83 %2028 1
%2030 = OpExtInst %83 %79 SClamp %2029 %128 %130
%2031 = OpIAdd %83 %2030 %2023
%2032 = OpExtInst %43 %79 Ldexp %134 %2031
%2033 = OpExtInst %43 %79 FMin %2032 %134
%2034 = OpFSub %43 %2025 %2033
%2035 = OpExtInst %43 %79 FMax %2034 %139
%2036 = OpFAdd %43 %2025 %2033
%2037 = OpConvertFToS %83 %2035
%2038 = OpConvertFToS %83 %2036
%2039 = OpINotEqual %82 %2037 %2038
%2040 = OpCompositeExtract %80 %2039 0
%2041 = OpCompositeExtract %80 %2039 1
%2042 = OpLogicalAnd %80 %2040 %2041
%2043 = OpSelect %81 %2040 %148 %127
%2044 = OpBitwiseOr %81 %91 %2043
%2045 = OpSelect %81 %2041 %129 %127
%2046 = OpBitwiseOr %81 %2044 %2045
%2047 = OpSelect %81 %2042 %153 %127
%2048 = OpBitwiseOr %81 %2046 %2047
%2049 = OpSConvert %5 %2048
%2050 = OpIMul %81 %2004 %129
%2051 = OpSConvert %5 %2050
%2052 = OpShiftLeftLogical %5 %2049 %2051
%2053 = OpLoad %80 %2192
%2054 = OpLogicalNot %80 %2053
%2055 = OpFunctionCall %1 %1585 %1992 %2037 %2052 %2054
%2057 = OpImageQuerySizeLod %83 %1994 %2006
%2056 = OpConvertSToF %43 %2057
%2058 = OpSNegate %83 %1998
%2059 = OpExtInst %43 %79 Ldexp %2056 %2058
%2060 = OpFMul %43 %2020 %2059
%2061 = OpFMul %43 %2019 %2056
%2062 = OpFSub %43 %2061 %122
%2063 = OpExtInst %123 %79 FrexpStruct %2062
%2064 = OpCompositeExtract %83 %2063 1
%2065 = OpExtInst %83 %79 SClamp %2064 %128 %130
%2066 = OpIAdd %83 %2065 %2058
%2067 = OpExtInst %43 %79 Ldexp %134 %2066
%2068 = OpExtInst %43 %79 FMin %2067 %134
%2069 = OpFSub %43 %2060 %2068
%2070 = OpExtInst %43 %79 FMax %2069 %139
%2071 = OpFAdd %43 %2060 %2068
%2072 = OpConvertFToS %83 %2070
%2073 = OpConvertFToS %83 %2071
%2074 = OpINotEqual %82 %2072 %2073
%2075 = OpCompositeExtract %80 %2074 0
%2076 = OpCompositeExtract %80 %2074 1
%2077 = OpLogicalAnd %80 %2075 %2076
%2078 = OpSelect %81 %2075 %148 %127
%2079 = OpBitwiseOr %81 %91 %2078
%2080 = OpSelect %81 %2076 %129 %127
%2081 = OpBitwiseOr %81 %2079 %2080
%2082 = OpSelect %81 %2077 %153 %127
%2083 = OpBitwiseOr %81 %2081 %2082
%2084 = OpSConvert %5 %2083
%2085 = OpIMul %81 %2006 %129
%2086 = OpSConvert %5 %2085
%2087 = OpShiftLeftLogical %5 %2084 %2086
%2088 = OpLoad %80 %2192
%2089 = OpLogicalNot %80 %2088
%2090 = OpLogicalAnd %80 %2089 %2008
%2091 = OpFunctionCall %1 %1585 %1992 %2072 %2087 %2090
%2092 = OpLoad %11 %13
%2093 = OpLoad %34 %36
%2094 = OpCompositeConstruct %43 %65 %67
%2095 = OpImageQuerySize %83 %1993
%2096 = OpBitwiseAnd %83 %2095 %88
%2097 = OpImageQueryLevels %81 %2092
%2098 = OpISub %81 %2097 %91
%2099 = OpSampledImage %92 %2092 %2093
%2100 = OpImageQueryLod %43 %2099 %2094
%2101 = OpCompositeExtract %10 %2100 0
%2102 = OpConvertFToS %81 %2101
%2103 = OpFAdd %10 %2101 %98
%2104 = OpConvertFToS %81 %2103
%2105 = OpExtInst %81 %79 SMin %2104 %2098
%2106 = OpINotEqual %80 %2102 %2104
%2107 = OpDPdx %43 %2094
%2108 = OpDPdy %43 %2094
%2109 = OpExtInst %43 %79 FAbs %2107
%2110 = OpExtInst %43 %79 FAbs %2108
%2111 = OpCompositeExtract %10 %2109 0
%2112 = OpCompositeExtract %10 %2110 0
%2113 = OpCompositeExtract %10 %2109 1
%2114 = OpCompositeExtract %10 %2110 1
%2115 = OpExtInst %10 %79 FMax %2111 %2112
%2116 = OpExtInst %10 %79 FMax %2113 %2114
%2117 = OpCompositeConstruct %43 %2115 %2116
%2118 = OpExtInst %43 %79 Fract %2094
%2120 = OpImageQuerySizeLod %83 %2092 %2102
%2119 = OpConvertSToF %43 %2120
%2121 = OpSNegate %83 %2096
%2122 = OpExtInst %43 %79 Ldexp %2119 %2121
%2123 = OpFMul %43 %2118 %2122
%2124 = OpFMul %43 %2117 %2119
%2125 = OpFSub %43 %2124 %122
%2126 = OpExtInst %123 %79 FrexpStruct %2125
%2127 = OpCompositeExtract %83 %2126 1
%2128 = OpExtInst %83 %79 SClamp %2127 %128 %130
%2129 = OpIAdd %83 %2128 %2121
%2130 = OpExtInst %43 %79 Ldexp %134 %2129
%2131 = OpExtInst %43 %79 FMin %2130 %134
%2132 = OpFSub %43 %2123 %2131
%2133 = OpExtInst %43 %79 FMax %2132 %139
%2134 = OpFAdd %43 %2123 %2131
%2135 = OpConvertFToS %83 %2133
%2136 = OpConvertFToS %83 %2134
%2137 = OpINotEqual %82 %2135 %2136
%2138 = OpCompositeExtract %80 %2137 0
%2139 = OpCompositeExtract %80 %2137 1
%2140 = OpLogicalAnd %80 %2138 %2139
%2141 = OpSelect %81 %2138 %148 %127
%2142 = OpBitwiseOr %81 %91 %2141
%2143 = OpSelect %81 %2139 %129 %127
%2144 = OpBitwiseOr %81 %2142 %2143
%2145 = OpSelect %81 %2140 %153 %127
%2146 = OpBitwiseOr %81 %2144 %2145
%2147 = OpSConvert %5 %2146
%2148 = OpIMul %81 %2102 %129
%2149 = OpSConvert %5 %2148
%2150 = OpShiftLeftLogical %5 %2147 %2149
%2151 = OpLoad %80 %2192
%2152 = OpLogicalNot %80 %2151
%2153 = OpFunctionCall %1 %1585 %1992 %2135 %2150 %2152
%2155 = OpImageQuerySizeLod %83 %2092 %2104
%2154 = OpConvertSToF %43 %2155
%2156 = OpSNegate %83 %2096
%2157 = OpExtInst %43 %79 Ldexp %2154 %2156
%2158 = OpFMul %43 %2118 %2157
%2159 = OpFMul %43 %2117 %2154
%2160 = OpFSub %43 %2159 %122
%2161 = OpExtInst %123 %79 FrexpStruct %2160
%2162 = OpCompositeExtract %83 %2161 1
%2163 = OpExtInst %83 %79 SClamp %2162 %128 %130
%2164 = OpIAdd %83 %2163 %2156
%2165 = OpExtInst %43 %79 Ldexp %134 %2164
%2166 = OpExtInst %43 %79 FMin %2165 %134
%2167 = OpFSub %43 %2158 %2166
%2168 = OpExtInst %43 %79 FMax %2167 %139
%2169 = OpFAdd %43 %2158 %2166
%2170 = OpConvertFToS %83 %2168
%2171 = OpConvertFToS %83 %2169
%2172 = OpINotEqual %82 %2170 %2171
%2173 = OpCompositeExtract %80 %2172 0
%2174 = OpCompositeExtract %80 %2172 1
%2175 = OpLogicalAnd %80 %2173 %2174
%2176 = OpSelect %81 %2173 %148 %127
%2177 = OpBitwiseOr %81 %91 %2176
%2178 = OpSelect %81 %2174 %129 %127
%2179 = OpBitwiseOr %81 %2177 %2178
%2180 = OpSelect %81 %2175 %153 %127
%2181 = OpBitwiseOr %81 %2179 %2180
%2182 = OpSConvert %5 %2181
%2183 = OpIMul %81 %2104 %129
%2184 = OpSConvert %5 %2183
%2185 = OpShiftLeftLogical %5 %2182 %2184
%2186 = OpLoad %80 %2192
%2187 = OpLogicalNot %80 %2186
%2188 = OpLogicalAnd %80 %2187 %2106
%2189 = OpFunctionCall %1 %1585 %1992 %2170 %2185 %2188
OpReturn
OpFunctionEnd
%167 = OpFunction %1 None %162
%163 = OpFunctionParameter %23
%164 = OpFunctionParameter %83
%165 = OpFunctionParameter %5
%166 = OpFunctionParameter %80
%168 = OpLabel
%180 = OpVariable %179 Function %178
OpSelectionMerge %170 None
OpBranchConditional %166 %169 %170
%169 = OpLabel
%181 = OpLoad %80 %180
%182 = OpLogicalNot %80 %181
OpLoopMerge %171 %173 None
OpBranchConditional %182 %172 %171
%172 = OpLabel
%183 = OpGroupNonUniformBroadcastFirst %83 %72 %164
%184 = OpIEqual %82 %164 %183
%185 = OpAll %80 %184
OpStore %180 %185
OpSelectionMerge %175 None
OpBranchConditional %185 %174 %175
%174 = OpLabel
%186 = OpGroupNonUniformBitwiseOr %5 %72 Reduce %165
%187 = OpGroupNonUniformElect %80 %72
OpSelectionMerge %177 None
OpBranchConditional %187 %176 %177
%176 = OpLabel
%188 = OpImageTexelPointer %161 %163 %164 %127
%189 = OpAtomicOr %5 %188 %58 %55 %186
OpBranch %177
%177 = OpLabel
OpBranch %175
%175 = OpLabel
OpBranch %173
%173 = OpLabel
OpBranch %169
%171 = OpLabel
OpBranch %170
%170 = OpLabel
OpReturn
OpFunctionEnd
%856 = OpFunction %1 None %851
%852 = OpFunctionParameter %26
%853 = OpFunctionParameter %787
%854 = OpFunctionParameter %5
%855 = OpFunctionParameter %80
%857 = OpLabel
%867 = OpVariable %179 Function %178
OpSelectionMerge %859 None
OpBranchConditional %855 %858 %859
%858 = OpLabel
%868 = OpLoad %80 %867
%869 = OpLogicalNot %80 %868
OpLoopMerge %860 %862 None
OpBranchConditional %869 %861 %860
%861 = OpLabel
%870 = OpGroupNonUniformBroadcastFirst %787 %72 %853
%871 = OpIEqual %850 %853 %870
%872 = OpAll %80 %871
OpStore %867 %872
OpSelectionMerge %864 None
OpBranchConditional %872 %863 %864
%863 = OpLabel
%873 = OpGroupNonUniformBitwiseOr %5 %72 Reduce %854
%874 = OpGroupNonUniformElect %80 %72
OpSelectionMerge %866 None
OpBranchConditional %874 %865 %866
%865 = OpLabel
%875 = OpImageTexelPointer %161 %852 %853 %127
%876 = OpAtomicOr %5 %875 %58 %55 %873
OpBranch %866
%866 = OpLabel
OpBranch %864
%864 = OpLabel
OpBranch %862
%862 = OpLabel
OpBranch %858
%860 = OpLabel
OpBranch %859
%859 = OpLabel
OpReturn
OpFunctionEnd
%1585 = OpFunction %1 None %162
%1581 = OpFunctionParameter %23
%1582 = OpFunctionParameter %83
%1583 = OpFunctionParameter %5
%1584 = OpFunctionParameter %80
%1586 = OpLabel
%1596 = OpVariable %179 Function %178
OpSelectionMerge %1588 None
OpBranchConditional %1584 %1587 %1588
%1587 = OpLabel
%1597 = OpLoad %80 %1596
%1598 = OpLogicalNot %80 %1597
OpLoopMerge %1589 %1591 None
OpBranchConditional %1598 %1590 %1589
%1590 = OpLabel
%1599 = OpGroupNonUniformBroadcastFirst %83 %72 %1582
%1600 = OpIEqual %82 %1582 %1599
%1601 = OpAll %80 %1600
OpStore %1596 %1601
OpSelectionMerge %1593 None
OpBranchConditional %1601 %1592 %1593
%1592 = OpLabel
%1602 = OpGroupNonUniformBitwiseOr %5 %72 Reduce %1583
%1603 = OpGroupNonUniformElect %80 %72
OpSelectionMerge %1595 None
OpBranchConditional %1603 %1594 %1595
%1594 = OpLabel
%1604 = OpImageTexelPointer %161 %1581 %1582 %127
%1605 = OpAtomicOr %5 %1604 %58 %55 %1602
OpBranch %1595
%1595 = OpLabel
OpBranch %1593
%1593 = OpLabel
OpBranch %1591
%1591 = OpLabel
OpBranch %1587
%1589 = OpLabel
OpBranch %1588
%1588 = OpLabel
OpReturn
OpFunctionEnd
%1720 = OpFunction %1 None %851
%1716 = OpFunctionParameter %26
%1717 = OpFunctionParameter %787
%1718 = OpFunctionParameter %5
%1719 = OpFunctionParameter %80
%1721 = OpLabel
%1731 = OpVariable %179 Function %178
OpSelectionMerge %1723 None
OpBranchConditional %1719 %1722 %1723
%1722 = OpLabel
%1732 = OpLoad %80 %1731
%1733 = OpLogicalNot %80 %1732
OpLoopMerge %1724 %1726 None
OpBranchConditional %1733 %1725 %1724
%1725 = OpLabel
%1734 = OpGroupNonUniformBroadcastFirst %787 %72 %1717
%1735 = OpIEqual %850 %1717 %1734
%1736 = OpAll %80 %1735
OpStore %1731 %1736
OpSelectionMerge %1728 None
OpBranchConditional %1736 %1727 %1728
%1727 = OpLabel
%1737 = OpGroupNonUniformBitwiseOr %5 %72 Reduce %1718
%1738 = OpGroupNonUniformElect %80 %72
OpSelectionMerge %1730 None
OpBranchConditional %1738 %1729 %1730
%1729 = OpLabel
%1739 = OpImageTexelPointer %161 %1716 %1717 %127
%1740 = OpAtomicOr %5 %1739 %58 %55 %1737
OpBranch %1730
%1730 = OpLabel
OpBranch %1728
%1728 = OpLabel
OpBranch %1726
%1726 = OpLabel
OpBranch %1722
%1724 = OpLabel
OpBranch %1723
%1723 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-cull-distance.vert
================================================
#version 460

out float gl_ClipDistance[4];
out float gl_CullDistance[4];

layout(location = 0) in vec4 POS;
layout(location = 1) in vec4 CLIP;
layout(location = 2) in vec4 CULL;

void main()
{
    gl_Position.x = POS.x;
    gl_Position.y = POS.y;
    gl_Position.z = POS.z;
    gl_Position.w = POS.w;
    gl_ClipDistance[0u] = CLIP.x;
    gl_ClipDistance[1u] = CLIP.y;
    gl_ClipDistance[2u] = CLIP.z;
    gl_ClipDistance[3u] = CLIP.w;
    gl_CullDistance[0u] = CULL.x;
    gl_CullDistance[1u] = CULL.y;
    gl_CullDistance[2u] = CULL.z;
    gl_CullDistance[3u] = CULL.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 63
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpCapability CullDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %9 %10 %12 %17 %18
OpName %3 "main"
OpName %8 "POS"
OpName %9 "CLIP"
OpName %10 "CULL"
OpName %12 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %10 Location 2
OpDecorate %12 BuiltIn Position
OpDecorate %17 BuiltIn ClipDistance
OpDecorate %18 BuiltIn CullDistance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpVariable %7 Input
%11 = OpTypePointer Output %6
%12 = OpVariable %11 Output
%13 = OpTypeInt 32 0
%14 = OpConstant %13 4
%15 = OpTypeArray %5 %14
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%18 = OpVariable %16 Output
%19 = OpTypePointer Input %5
%21 = OpConstant %13 0
%24 = OpConstant %13 1
%27 = OpConstant %13 2
%30 = OpConstant %13 3
%48 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %61
%61 = OpLabel
%20 = OpAccessChain %19 %10 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %10 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %19 %10 %27
%28 = OpLoad %5 %26
%29 = OpAccessChain %19 %10 %30
%31 = OpLoad %5 %29
%32 = OpAccessChain %19 %9 %21
%33 = OpLoad %5 %32
%34 = OpAccessChain %19 %9 %24
%35 = OpLoad %5 %34
%36 = OpAccessChain %19 %9 %27
%37 = OpLoad %5 %36
%38 = OpAccessChain %19 %9 %30
%39 = OpLoad %5 %38
%40 = OpAccessChain %19 %8 %21
%41 = OpLoad %5 %40
%42 = OpAccessChain %19 %8 %24
%43 = OpLoad %5 %42
%44 = OpAccessChain %19 %8 %27
%45 = OpLoad %5 %44
%46 = OpAccessChain %19 %8 %30
%47 = OpLoad %5 %46
%49 = OpAccessChain %48 %12 %21
OpStore %49 %41
%50 = OpAccessChain %48 %12 %24
OpStore %50 %43
%51 = OpAccessChain %48 %12 %27
OpStore %51 %45
%52 = OpAccessChain %48 %12 %30
OpStore %52 %47
%53 = OpAccessChain %48 %17 %21
OpStore %53 %33
%54 = OpAccessChain %48 %17 %24
OpStore %54 %35
%55 = OpAccessChain %48 %17 %27
OpStore %55 %37
%56 = OpAccessChain %48 %17 %30
OpStore %56 %39
%57 = OpAccessChain %48 %18 %21
OpStore %57 %22
%58 = OpAccessChain %48 %18 %24
OpStore %58 %25
%59 = OpAccessChain %48 %18 %27
OpStore %59 %28
%60 = OpAccessChain %48 %18 %30
OpStore %60 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-cull.frag
================================================
#version 460

in float gl_ClipDistance[4];
in float gl_CullDistance[3];

layout(location = 0) out vec4 SV_Target;

void main()
{
    float _37 = gl_CullDistance[0u] + gl_CullDistance[2u];
    float _39 = gl_CullDistance[1u] + gl_CullDistance[2u];
    SV_Target.x = gl_ClipDistance[0u] + _37;
    SV_Target.y = gl_ClipDistance[1u] + _39;
    SV_Target.z = gl_ClipDistance[2u] + _37;
    SV_Target.w = _39 + gl_ClipDistance[3u];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 50
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpCapability CullDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %15 %18
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %18 "SV_Target"
OpDecorate %11 BuiltIn ClipDistance
OpDecorate %15 BuiltIn CullDistance
OpDecorate %18 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypeInt 32 0
%8 = OpConstant %7 4
%9 = OpTypeArray %5 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpConstant %7 3
%13 = OpTypeArray %5 %12
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypeVector %5 4
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %5
%21 = OpConstant %7 2
%24 = OpConstant %7 0
%27 = OpConstant %7 1
%43 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %48
%48 = OpLabel
%20 = OpAccessChain %19 %15 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %15 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %19 %15 %27
%28 = OpLoad %5 %26
%29 = OpAccessChain %19 %11 %12
%30 = OpLoad %5 %29
%31 = OpAccessChain %19 %11 %21
%32 = OpLoad %5 %31
%33 = OpAccessChain %19 %11 %24
%34 = OpLoad %5 %33
%35 = OpAccessChain %19 %11 %27
%36 = OpLoad %5 %35
%37 = OpFAdd %5 %25 %22
%38 = OpFAdd %5 %34 %37
%39 = OpFAdd %5 %28 %22
%40 = OpFAdd %5 %36 %39
%41 = OpFAdd %5 %32 %37
%42 = OpFAdd %5 %39 %30
%44 = OpAccessChain %43 %18 %24
OpStore %44 %38
%45 = OpAccessChain %43 %18 %27
OpStore %45 %40
%46 = OpAccessChain %43 %18 %21
OpStore %46 %41
%47 = OpAccessChain %43 %18 %12
OpStore %47 %42
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-cols.frag
================================================
#version 460

in float gl_ClipDistance[2];

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = gl_ClipDistance[0u];
    SV_Target.y = gl_ClipDistance[1u];
    SV_Target.z = gl_ClipDistance[1u];
    SV_Target.w = gl_ClipDistance[0u];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %11 %14
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %14 "SV_Target"
OpDecorate %11 BuiltIn ClipDistance
OpDecorate %14 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypeInt 32 0
%8 = OpConstant %7 2
%9 = OpTypeArray %5 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeVector %5 4
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpTypePointer Input %5
%17 = OpConstant %7 0
%20 = OpConstant %7 1
%22 = OpTypePointer Output %5
%27 = OpConstant %7 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%16 = OpAccessChain %15 %11 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %15 %11 %20
%21 = OpLoad %5 %19
%23 = OpAccessChain %22 %14 %17
OpStore %23 %18
%24 = OpAccessChain %22 %14 %20
OpStore %24 %21
%25 = OpAccessChain %22 %14 %8
OpStore %25 %21
%26 = OpAccessChain %22 %14 %27
OpStore %26 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-cols.vert
================================================
#version 460

out float gl_ClipDistance[2];

layout(location = 0) in vec4 POS;
layout(location = 1) in vec2 CLIP;

void main()
{
    gl_Position.x = POS.x;
    gl_Position.y = POS.y;
    gl_Position.z = POS.z;
    gl_Position.w = POS.w;
    gl_ClipDistance[0u] = CLIP.x;
    gl_ClipDistance[1u] = CLIP.y;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %11 %13 %18
OpName %3 "main"
OpName %8 "POS"
OpName %11 "CLIP"
OpName %13 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %11 Location 1
OpDecorate %13 BuiltIn Position
OpDecorate %18 BuiltIn ClipDistance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 2
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Output %6
%13 = OpVariable %12 Output
%14 = OpTypeInt 32 0
%15 = OpConstant %14 2
%16 = OpTypeArray %5 %15
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %5
%21 = OpConstant %14 0
%24 = OpConstant %14 1
%33 = OpConstant %14 3
%35 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %42
%42 = OpLabel
%20 = OpAccessChain %19 %11 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %11 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %19 %8 %21
%27 = OpLoad %5 %26
%28 = OpAccessChain %19 %8 %24
%29 = OpLoad %5 %28
%30 = OpAccessChain %19 %8 %15
%31 = OpLoad %5 %30
%32 = OpAccessChain %19 %8 %33
%34 = OpLoad %5 %32
%36 = OpAccessChain %35 %13 %21
OpStore %36 %27
%37 = OpAccessChain %35 %13 %24
OpStore %37 %29
%38 = OpAccessChain %35 %13 %15
OpStore %38 %31
%39 = OpAccessChain %35 %13 %33
OpStore %39 %34
%40 = OpAccessChain %35 %18 %21
OpStore %40 %22
%41 = OpAccessChain %35 %18 %24
OpStore %41 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-flatten.frag
================================================
#version 460

in float gl_ClipDistance[4];

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = gl_ClipDistance[0u];
    SV_Target.y = gl_ClipDistance[1u];
    SV_Target.z = gl_ClipDistance[2u];
    SV_Target.w = gl_ClipDistance[3u];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %13 %16
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %16 "SV_Target"
OpDecorate %13 BuiltIn ClipDistance
OpDecorate %16 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypeInt 32 0
%8 = OpConstant %7 2
%9 = OpTypeArray %6 %8
%10 = OpConstant %7 4
%11 = OpTypeArray %5 %10
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpTypePointer Input %5
%19 = OpConstant %7 0
%22 = OpConstant %7 1
%27 = OpConstant %7 3
%29 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%18 = OpAccessChain %17 %13 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %13 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %17 %13 %8
%25 = OpLoad %5 %24
%26 = OpAccessChain %17 %13 %27
%28 = OpLoad %5 %26
%30 = OpAccessChain %29 %16 %19
OpStore %30 %20
%31 = OpAccessChain %29 %16 %22
OpStore %31 %23
%32 = OpAccessChain %29 %16 %8
OpStore %32 %25
%33 = OpAccessChain %29 %16 %27
OpStore %33 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-flatten.vert
================================================
#version 460

out float gl_ClipDistance[4];

layout(location = 0) in vec4 POS;
layout(location = 1) in vec2 CLIP;

void main()
{
    gl_Position.x = POS.x;
    gl_Position.y = POS.y;
    gl_Position.z = POS.z;
    gl_Position.w = POS.w;
    gl_ClipDistance[0u] = CLIP.x;
    gl_ClipDistance[1u] = CLIP.y;
    gl_ClipDistance[2u] = CLIP.x + 1.0;
    gl_ClipDistance[3u] = CLIP.y + 1.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 51
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %11 %13 %20
OpName %3 "main"
OpName %8 "POS"
OpName %11 "CLIP"
OpName %13 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %11 Location 1
OpDecorate %13 BuiltIn Position
OpDecorate %20 BuiltIn ClipDistance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeVector %5 2
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Output %6
%13 = OpVariable %12 Output
%14 = OpTypeInt 32 0
%15 = OpConstant %14 2
%16 = OpTypeArray %9 %15
%17 = OpConstant %14 4
%18 = OpTypeArray %5 %17
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpTypePointer Input %5
%23 = OpConstant %14 0
%26 = OpConstant %14 1
%35 = OpConstant %14 3
%38 = OpConstant %5 1
%40 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %49
%49 = OpLabel
%22 = OpAccessChain %21 %11 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %21 %11 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %21 %8 %23
%29 = OpLoad %5 %28
%30 = OpAccessChain %21 %8 %26
%31 = OpLoad %5 %30
%32 = OpAccessChain %21 %8 %15
%33 = OpLoad %5 %32
%34 = OpAccessChain %21 %8 %35
%36 = OpLoad %5 %34
%37 = OpFAdd %5 %24 %38
%39 = OpFAdd %5 %27 %38
%41 = OpAccessChain %40 %13 %23
OpStore %41 %29
%42 = OpAccessChain %40 %13 %26
OpStore %42 %31
%43 = OpAccessChain %40 %13 %15
OpStore %43 %33
%44 = OpAccessChain %40 %13 %35
OpStore %44 %36
%45 = OpAccessChain %40 %20 %23
OpStore %45 %24
%46 = OpAccessChain %40 %20 %26
OpStore %46 %27
%47 = OpAccessChain %40 %20 %15
OpStore %47 %37
%48 = OpAccessChain %40 %20 %35
OpStore %48 %39
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-rows.frag
================================================
#version 460

in float gl_ClipDistance[2];

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = gl_ClipDistance[0u];
    SV_Target.y = gl_ClipDistance[0u];
    SV_Target.z = gl_ClipDistance[1u];
    SV_Target.w = gl_ClipDistance[1u];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %10 %13
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %13 "SV_Target"
OpDecorate %10 BuiltIn ClipDistance
OpDecorate %13 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 2
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpTypeVector %5 4
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypePointer Input %5
%16 = OpConstant %6 0
%19 = OpConstant %6 1
%21 = OpTypePointer Output %5
%26 = OpConstant %6 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%15 = OpAccessChain %14 %10 %16
%17 = OpLoad %5 %15
%18 = OpAccessChain %14 %10 %19
%20 = OpLoad %5 %18
%22 = OpAccessChain %21 %13 %16
OpStore %22 %17
%23 = OpAccessChain %21 %13 %19
OpStore %23 %17
%24 = OpAccessChain %21 %13 %7
OpStore %24 %20
%25 = OpAccessChain %21 %13 %26
OpStore %25 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-rows.vert
================================================
#version 460

out float gl_ClipDistance[2];

layout(location = 0) in vec4 POS;
layout(location = 1) in float CLIP;

void main()
{
    gl_Position.x = POS.x;
    gl_Position.y = POS.y;
    gl_Position.z = POS.z;
    gl_Position.w = POS.w;
    gl_ClipDistance[0u] = CLIP;
    gl_ClipDistance[1u] = CLIP + 1.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %10 %12 %17
OpName %3 "main"
OpName %8 "POS"
OpName %10 "CLIP"
OpName %12 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %10 Location 1
OpDecorate %12 BuiltIn Position
OpDecorate %17 BuiltIn ClipDistance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%11 = OpTypePointer Output %6
%12 = OpVariable %11 Output
%13 = OpTypeInt 32 0
%14 = OpConstant %13 2
%15 = OpTypeArray %5 %14
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%20 = OpConstant %13 0
%23 = OpConstant %13 1
%28 = OpConstant %13 3
%31 = OpConstant %5 1
%32 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%18 = OpLoad %5 %10
%19 = OpAccessChain %9 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %9 %8 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %9 %8 %14
%26 = OpLoad %5 %25
%27 = OpAccessChain %9 %8 %28
%29 = OpLoad %5 %27
%30 = OpFAdd %5 %18 %31
%33 = OpAccessChain %32 %12 %20
OpStore %33 %21
%34 = OpAccessChain %32 %12 %23
OpStore %34 %24
%35 = OpAccessChain %32 %12 %14
OpStore %35 %26
%36 = OpAccessChain %32 %12 %28
OpStore %36 %29
%37 = OpAccessChain %32 %17 %20
OpStore %37 %18
%38 = OpAccessChain %32 %17 %23
OpStore %38 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/clip-distance-single.vert
================================================
#version 460

out float gl_ClipDistance[1];

layout(location = 0) in vec4 POS;
layout(location = 1) in float CLIP;

void main()
{
    gl_Position.x = POS.x;
    gl_Position.y = POS.y;
    gl_Position.z = POS.z;
    gl_Position.w = POS.w;
    gl_ClipDistance[0u] = CLIP;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %10 %12 %17
OpName %3 "main"
OpName %8 "POS"
OpName %10 "CLIP"
OpName %12 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %10 Location 1
OpDecorate %12 BuiltIn Position
OpDecorate %17 BuiltIn ClipDistance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%11 = OpTypePointer Output %6
%12 = OpVariable %11 Output
%13 = OpTypeInt 32 0
%14 = OpConstant %13 1
%15 = OpTypeArray %5 %14
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%20 = OpConstant %13 0
%25 = OpConstant %13 2
%28 = OpConstant %13 3
%30 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %36
%36 = OpLabel
%18 = OpLoad %5 %10
%19 = OpAccessChain %9 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %9 %8 %14
%23 = OpLoad %5 %22
%24 = OpAccessChain %9 %8 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %9 %8 %28
%29 = OpLoad %5 %27
%31 = OpAccessChain %30 %12 %20
OpStore %31 %21
%32 = OpAccessChain %30 %12 %14
OpStore %32 %23
%33 = OpAccessChain %30 %12 %25
OpStore %33 %26
%34 = OpAccessChain %30 %12 %28
OpStore %34 %29
%35 = OpAccessChain %30 %17 %20
OpStore %35 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/coverage.frag
================================================
#version 460

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = 1.0;
    SV_Target.y = 1.0;
    SV_Target.z = 1.0;
    SV_Target.w = 1.0;
    gl_SampleMask[0u] = int(uint(gl_SampleMaskIn[0u]) & 3u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %13 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SV_Target"
OpName %13 "SV_Coverage"
OpDecorate %8 Location 0
OpDecorate %13 BuiltIn SampleMask
OpDecorate %15 BuiltIn SampleMask
OpDecorate %15 Flat
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpConstant %9 1
%11 = OpTypeArray %9 %10
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypePointer Input %11
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %9
%18 = OpConstant %9 0
%21 = OpConstant %9 3
%22 = OpTypePointer Output %5
%24 = OpConstant %5 1
%27 = OpConstant %9 2
%29 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %31
%31 = OpLabel
%17 = OpAccessChain %16 %15 %18
%19 = OpLoad %9 %17
%20 = OpBitwiseAnd %9 %19 %21
%23 = OpAccessChain %22 %8 %18
OpStore %23 %24
%25 = OpAccessChain %22 %8 %10
OpStore %25 %24
%26 = OpAccessChain %22 %8 %27
OpStore %26 %24
%28 = OpAccessChain %22 %8 %21
OpStore %28 %24
%30 = OpAccessChain %29 %13 %18
OpStore %30 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/depth-greater-equal.frag
================================================
#version 460
layout(depth_greater) out float gl_FragDepth;

layout(location = 0) in float D;

void main()
{
    gl_FragDepth = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DepthReplacing
OpExecutionMode %3 DepthGreater
OpName %3 "main"
OpName %7 "D"
OpName %9 "SV_DepthGreaterEqual"
OpDecorate %7 Location 0
OpDecorate %9 BuiltIn FragDepth
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/depth-less-equal.frag
================================================
#version 460
layout(depth_less) out float gl_FragDepth;

layout(location = 0) in float D;

void main()
{
    gl_FragDepth = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DepthReplacing
OpExecutionMode %3 DepthLess
OpName %3 "main"
OpName %7 "D"
OpName %9 "SV_DepthLessEqual"
OpDecorate %7 Location 0
OpDecorate %9 BuiltIn FragDepth
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/depth.frag
================================================
#version 460

layout(location = 0) in float D;

void main()
{
    gl_FragDepth = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DepthReplacing
OpName %3 "main"
OpName %7 "D"
OpName %9 "SV_Depth"
OpDecorate %7 Location 0
OpDecorate %9 BuiltIn FragDepth
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/early-depth-stencil.frag
================================================
#version 460
layout(early_fragment_tests) in;

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = 1.0;
    SV_Target.y = 1.0;
    SV_Target.z = 1.0;
    SV_Target.w = 1.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpName %3 "main"
OpName %8 "SV_Target"
OpDecorate %8 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypePointer Output %5
%11 = OpTypeInt 32 0
%12 = OpConstant %11 0
%13 = OpConstant %5 1
%15 = OpConstant %11 1
%17 = OpConstant %11 2
%19 = OpConstant %11 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%10 = OpAccessChain %9 %8 %12
OpStore %10 %13
%14 = OpAccessChain %9 %8 %15
OpStore %14 %13
%16 = OpAccessChain %9 %8 %17
OpStore %16 %13
%18 = OpAccessChain %9 %8 %19
OpStore %18 %13
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/inner-coverage.noglsl.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
OpCapability Shader
OpCapability FragmentFullyCoveredEXT
OpExtension "SPV_EXT_fragment_fully_covered"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_Target"
OpName %18 "discard_state"
OpName %25 "discard_exit"
OpDecorate %7 Location 0
OpDecorate %10 BuiltIn FullyCoveredEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Output %5
%7 = OpVariable %6 Output
%8 = OpTypeBool
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%12 = OpTypeInt 32 0
%14 = OpConstant %12 1
%15 = OpConstant %12 0
%17 = OpTypePointer Private %8
%18 = OpVariable %17 Private
%19 = OpConstantFalse %8
%20 = OpConstant %5 1
%24 = OpConstantTrue %8
%3 = OpFunction %1 None %2
%4 = OpLabel
OpStore %18 %19
OpBranch %21
%21 = OpLabel
%11 = OpLoad %8 %10
%13 = OpSelect %12 %11 %14 %15
%16 = OpIEqual %8 %13 %15
OpSelectionMerge %23 None
OpBranchConditional %16 %22 %23
%22 = OpLabel
OpStore %18 %24
OpBranch %23
%23 = OpLabel
OpStore %7 %20
%31 = OpFunctionCall %1 %25
OpReturn
OpFunctionEnd
%25 = OpFunction %1 None %2
%26 = OpLabel
%29 = OpLoad %8 %18
OpSelectionMerge %28 None
OpBranchConditional %29 %27 %28
%27 = OpLabel
OpKill
%28 = OpLabel
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/semantics/is-front-face.frag
================================================
#version 460

layout(location = 0) out float SV_Target;

void main()
{
    SV_Target = float((gl_FrontFacing ? 4294967295u : 0u) != 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SV_IsFrontFace"
OpName %11 "SV_Target"
OpDecorate %8 BuiltIn FrontFacing
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeBool
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeFloat 32
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%14 = OpConstant %5 4294967295
%15 = OpConstant %5 0
%18 = OpConstant %9 1
%19 = OpConstant %9 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %20
%20 = OpLabel
%12 = OpLoad %6 %8
%13 = OpSelect %5 %12 %14 %15
%16 = OpINotEqual %6 %13 %15
%17 = OpSelect %9 %16 %18 %19
OpStore %11 %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/position-short.frag
================================================
#version 460

layout(location = 0) out vec3 SV_Target;

void main()
{
    SV_Target.x = gl_FragCoord.x;
    SV_Target.y = gl_FragCoord.y;
    SV_Target.z = gl_FragCoord.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %9 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %9 "SV_Position"
OpName %11 "SV_Target"
OpDecorate %9 BuiltIn FragCoord
OpDecorate %11 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeVector %5 4
%8 = OpTypePointer Input %7
%9 = OpVariable %8 Input
%10 = OpTypePointer Output %6
%11 = OpVariable %10 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%23 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%13 = OpAccessChain %12 %9 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %9 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %9 %21
%22 = OpLoad %5 %20
%24 = OpAccessChain %23 %11 %15
OpStore %24 %16
%25 = OpAccessChain %23 %11 %18
OpStore %25 %19
%26 = OpAccessChain %23 %11 %21
OpStore %26 %22
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/position-short.vert
================================================
#version 460

void main()
{
    gl_Position.x = 1.0;
    gl_Position.y = 1.0;
    gl_Position.z = 1.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %9
OpName %3 "main"
OpName %9 "SV_Position"
OpDecorate %9 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeVector %5 4
%8 = OpTypePointer Output %7
%9 = OpVariable %8 Output
%10 = OpTypePointer Output %5
%12 = OpTypeInt 32 0
%13 = OpConstant %12 0
%14 = OpConstant %5 1
%16 = OpConstant %12 1
%18 = OpConstant %12 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %19
%19 = OpLabel
%11 = OpAccessChain %10 %9 %13
OpStore %11 %14
%15 = OpAccessChain %10 %9 %16
OpStore %15 %14
%17 = OpAccessChain %10 %9 %18
OpStore %17 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/position.frag
================================================
#version 460

layout(location = 1) flat in uint INDEX;
layout(location = 0) out float SV_Target;

void main()
{
    float _33[4];
    _33[0u] = gl_FragCoord.x;
    _33[1u] = gl_FragCoord.y;
    _33[2u] = gl_FragCoord.z;
    _33[3u] = 1.0 / gl_FragCoord.w;
    SV_Target = _33[INDEX];
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %11 %13
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SV_Position"
OpName %11 "INDEX"
OpName %13 "SV_Target"
OpDecorate %8 BuiltIn FragCoord
OpDecorate %11 Flat
OpDecorate %11 Location 1
OpDecorate %13 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeInt 32 0
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypePointer Input %5
%17 = OpConstant %9 0
%20 = OpConstant %9 1
%23 = OpConstant %9 2
%26 = OpConstant %9 3
%29 = OpConstant %5 1
%30 = OpConstant %9 4
%31 = OpTypeArray %5 %30
%32 = OpTypePointer Function %31
%34 = OpTypePointer Function %5
%3 = OpFunction %1 None %2
%4 = OpLabel
%33 = OpVariable %32 Function
OpBranch %41
%41 = OpLabel
%14 = OpLoad %9 %11
%16 = OpAccessChain %15 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %15 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %15 %8 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %15 %8 %26
%27 = OpLoad %5 %25
%28 = OpFDiv %5 %29 %27
%35 = OpInBoundsAccessChain %34 %33 %17
OpStore %35 %18
%36 = OpInBoundsAccessChain %34 %33 %20
OpStore %36 %21
%37 = OpInBoundsAccessChain %34 %33 %23
OpStore %37 %24
%38 = OpInBoundsAccessChain %34 %33 %26
OpStore %38 %28
%39 = OpAccessChain %34 %33 %14
%40 = OpLoad %5 %39
OpStore %13 %40
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/primitive-id.frag
================================================
#version 460

layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(gl_PrimitiveID);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_PrimitiveID"
OpName %9 "SV_Target"
OpDecorate %7 BuiltIn PrimitiveId
OpDecorate %7 Flat
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/primitive-id.geom
================================================
#version 460
layout(points) in;
layout(max_vertices = 1, points) out;

layout(location = 0) in vec4 A[1];

void main()
{
    gl_Position.x = A[0u].x;
    gl_Position.y = A[0u].y;
    gl_Position.z = A[0u].z;
    gl_Position.w = A[0u].w;
    gl_PrimitiveID = int(uint(gl_PrimitiveIDIn) + 1u);
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %13 %15 %17
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 1
OpExecutionMode %3 InputPoints
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "A"
OpName %13 "SV_PrimitiveID"
OpName %15 "SV_Position"
OpName %17 "SV_PrimitiveID"
OpDecorate %11 Location 0
OpDecorate %13 BuiltIn PrimitiveId
OpDecorate %15 BuiltIn Position
OpDecorate %17 BuiltIn PrimitiveId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 1
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %7
%13 = OpVariable %12 Input
%14 = OpTypePointer Output %6
%15 = OpVariable %14 Output
%16 = OpTypePointer Output %7
%17 = OpVariable %16 Output
%19 = OpTypePointer Input %5
%21 = OpConstant %7 0
%26 = OpConstant %7 2
%29 = OpConstant %7 3
%32 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%18 = OpLoad %7 %13
%20 = OpAccessChain %19 %11 %21 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %11 %21 %8
%24 = OpLoad %5 %23
%25 = OpAccessChain %19 %11 %21 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %19 %11 %21 %29
%30 = OpLoad %5 %28
%31 = OpIAdd %7 %18 %8
%33 = OpAccessChain %32 %15 %21
OpStore %33 %22
%34 = OpAccessChain %32 %15 %8
OpStore %34 %24
%35 = OpAccessChain %32 %15 %26
OpStore %35 %27
%36 = OpAccessChain %32 %15 %29
OpStore %36 %30
OpStore %17 %31
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/render-target-array-index.frag
================================================
#version 460

layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(gl_Layer);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_RenderTargetArrayIndex"
OpName %9 "SV_Target"
OpDecorate %7 BuiltIn Layer
OpDecorate %7 Flat
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/render-target-array-index.geom
================================================
#version 460
layout(points) in;
layout(max_vertices = 2, points) out;

layout(location = 0) in vec4 A[1];
layout(location = 1) in vec4 B[1];

void main()
{
    gl_Position.x = A[0u].x;
    gl_Position.y = A[0u].y;
    gl_Position.z = A[0u].z;
    gl_Position.w = A[0u].w;
    gl_Layer = int(0u);
    EmitVertex();
    gl_Position.x = B[0u].x;
    gl_Position.y = B[0u].y;
    gl_Position.z = B[0u].z;
    gl_Position.w = B[0u].w;
    gl_Layer = int(1u);
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %16
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 2
OpExecutionMode %3 InputPoints
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "A"
OpName %12 "B"
OpName %14 "SV_Position"
OpName %16 "SV_RenderTargetArrayIndex"
OpDecorate %11 Location 0
OpDecorate %12 Location 1
OpDecorate %14 BuiltIn Position
OpDecorate %16 BuiltIn Layer
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 1
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpTypePointer Output %7
%16 = OpVariable %15 Output
%17 = OpTypePointer Input %5
%19 = OpConstant %7 0
%24 = OpConstant %7 2
%27 = OpConstant %7 3
%29 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%18 = OpAccessChain %17 %11 %19 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %11 %19 %8
%22 = OpLoad %5 %21
%23 = OpAccessChain %17 %11 %19 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %17 %11 %19 %27
%28 = OpLoad %5 %26
%30 = OpAccessChain %29 %14 %19
OpStore %30 %20
%31 = OpAccessChain %29 %14 %8
OpStore %31 %22
%32 = OpAccessChain %29 %14 %24
OpStore %32 %25
%33 = OpAccessChain %29 %14 %27
OpStore %33 %28
OpStore %16 %19
OpEmitVertex
%34 = OpAccessChain %17 %12 %19 %19
%35 = OpLoad %5 %34
%36 = OpAccessChain %17 %12 %19 %8
%37 = OpLoad %5 %36
%38 = OpAccessChain %17 %12 %19 %24
%39 = OpLoad %5 %38
%40 = OpAccessChain %17 %12 %19 %27
%41 = OpLoad %5 %40
%42 = OpAccessChain %29 %14 %19
OpStore %42 %35
%43 = OpAccessChain %29 %14 %8
OpStore %43 %37
%44 = OpAccessChain %29 %14 %24
OpStore %44 %39
%45 = OpAccessChain %29 %14 %27
OpStore %45 %41
OpStore %16 %8
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/sample-rate-pos.frag
================================================
#version 460

layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = gl_FragCoord.x;
    SV_Target.y = gl_FragCoord.y;
    SV_Target.z = gl_FragCoord.z;
    SV_Target.w = 1.0 / gl_FragCoord.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpCapability SampleRateShading
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "SV_Target"
OpDecorate %8 BuiltIn FragCoord
OpDecorate %8 Sample
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%26 = OpConstant %5 1
%27 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%25 = OpFDiv %5 %26 %24
%28 = OpAccessChain %27 %10 %14
OpStore %28 %15
%29 = OpAccessChain %27 %10 %17
OpStore %29 %18
%30 = OpAccessChain %27 %10 %20
OpStore %30 %21
%31 = OpAccessChain %27 %10 %23
OpStore %31 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/stencil-ref.frag
================================================
#version 460
#extension GL_ARB_shader_stencil_export : require

layout(location = 0) in float V;
layout(location = 0) out vec4 SV_Target;

void main()
{
    SV_Target.x = V;
    SV_Target.y = V;
    SV_Target.z = V;
    SV_Target.w = V;
    gl_FragStencilRefARB = int(uint(V));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
OpCapability Shader
OpCapability StencilExportEXT
OpExtension "SPV_EXT_shader_stencil_export"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %10 %13
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 StencilRefReplacingEXT
OpName %3 "main"
OpName %7 "V"
OpName %10 "SV_Target"
OpName %13 "SV_StencilRef"
OpDecorate %7 Location 0
OpDecorate %10 Location 0
OpDecorate %13 BuiltIn FragStencilRefEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
%11 = OpTypeInt 32 0
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%16 = OpTypePointer Output %5
%18 = OpConstant %11 0
%20 = OpConstant %11 1
%22 = OpConstant %11 2
%24 = OpConstant %11 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%14 = OpLoad %5 %7
%15 = OpConvertFToU %11 %14
%17 = OpAccessChain %16 %10 %18
OpStore %17 %14
%19 = OpAccessChain %16 %10 %20
OpStore %19 %14
%21 = OpAccessChain %16 %10 %22
OpStore %21 %14
%23 = OpAccessChain %16 %10 %24
OpStore %23 %14
OpStore %13 %15
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/sv-shading-rate.noglsl.frag
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpCapability FragmentShadingRateKHR
OpExtension "SPV_KHR_fragment_shading_rate"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_ShadingRate"
OpName %9 "SV_Target"
OpDecorate %7 BuiltIn ShadingRateKHR
OpDecorate %7 Flat
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/semantics/sv-shading-rate.noglsl.vert
================================================
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability FragmentShadingRateKHR
OpExtension "SPV_KHR_fragment_shading_rate"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %11
OpName %3 "main"
OpName %8 "SV_Position"
OpName %11 "SV_ShadingRate"
OpDecorate %8 BuiltIn Position
OpDecorate %11 BuiltIn PrimitiveShadingRateKHR
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%14 = OpConstant %9 0
%15 = OpConstant %5 1
%17 = OpConstant %9 1
%19 = OpConstant %9 2
%21 = OpConstant %9 3
%22 = OpConstant %9 10
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %23
%23 = OpLabel
%13 = OpAccessChain %12 %8 %14
OpStore %13 %15
%16 = OpAccessChain %12 %8 %17
OpStore %16 %15
%18 = OpAccessChain %12 %8 %19
OpStore %18 %15
%20 = OpAccessChain %12 %8 %21
OpStore %20 %15
OpStore %11 %22
OpReturn
OpFunctionEnd


================================================
FILE: reference/shaders/semantics/view-id.frag
================================================
#version 460

layout(location = 0) in vec4 POSITION;
layout(location = 0) out vec4 SV_Target;

void main()
{
    float _25 = float(0u);
    SV_Target.x = _25 + POSITION.x;
    SV_Target.y = _25 + POSITION.y;
    SV_Target.z = _25 + POSITION.z;
    SV_Target.w = _25 + POSITION.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "POSITION"
OpName %10 "SV_Target"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%30 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%25 = OpConvertUToF %5 %14
%26 = OpFAdd %5 %25 %15
%27 = OpFAdd %5 %25 %18
%28 = OpFAdd %5 %25 %21
%29 = OpFAdd %5 %25 %24
%31 = OpAccessChain %30 %10 %14
OpStore %31 %26
%32 = OpAccessChain %30 %10 %17
OpStore %32 %27
%33 = OpAccessChain %30 %10 %20
OpStore %33 %28
%34 = OpAccessChain %30 %10 %23
OpStore %34 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/view-id.vert
================================================
#version 460

layout(location = 0) in vec4 POSITION;

void main()
{
    float _25 = float(0u);
    gl_Position.x = _25 + POSITION.x;
    gl_Position.y = _25 + POSITION.y;
    gl_Position.z = _25 + POSITION.z;
    gl_Position.w = _25 + POSITION.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %10
OpName %3 "main"
OpName %8 "POSITION"
OpName %10 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %10 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypePointer Input %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%17 = OpConstant %13 1
%20 = OpConstant %13 2
%23 = OpConstant %13 3
%30 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%12 = OpAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%16 = OpAccessChain %11 %8 %17
%18 = OpLoad %5 %16
%19 = OpAccessChain %11 %8 %20
%21 = OpLoad %5 %19
%22 = OpAccessChain %11 %8 %23
%24 = OpLoad %5 %22
%25 = OpConvertUToF %5 %14
%26 = OpFAdd %5 %25 %15
%27 = OpFAdd %5 %25 %18
%28 = OpFAdd %5 %25 %21
%29 = OpFAdd %5 %25 %24
%31 = OpAccessChain %30 %10 %14
OpStore %31 %26
%32 = OpAccessChain %30 %10 %17
OpStore %32 %27
%33 = OpAccessChain %30 %10 %20
OpStore %33 %28
%34 = OpAccessChain %30 %10 %23
OpStore %34 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/viewport-array-index.frag
================================================
#version 460

layout(location = 0) out uint SV_Target;

void main()
{
    SV_Target = uint(gl_ViewportIndex);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
OpCapability Shader
OpCapability MultiViewport
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %7 %9
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_ViewportArrayIndex"
OpName %9 "SV_Target"
OpDecorate %7 BuiltIn ViewportIndex
OpDecorate %7 Flat
OpDecorate %9 Location 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypePointer Output %5
%9 = OpVariable %8 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %11
%11 = OpLabel
%10 = OpLoad %5 %7
OpStore %9 %10
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/semantics/viewport-array-index.geom
================================================
#version 460
layout(points) in;
layout(max_vertices = 2, points) out;

layout(location = 0) in vec4 A[1];
layout(location = 1) in vec4 B[1];

void main()
{
    gl_Position.x = A[0u].x;
    gl_Position.y = A[0u].y;
    gl_Position.z = A[0u].z;
    gl_Position.w = A[0u].w;
    gl_ViewportIndex = int(0u);
    EmitVertex();
    gl_Position.x = B[0u].x;
    gl_Position.y = B[0u].y;
    gl_Position.z = B[0u].z;
    gl_Position.w = B[0u].w;
    gl_ViewportIndex = int(1u);
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %16
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 2
OpExecutionMode %3 InputPoints
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "A"
OpName %12 "B"
OpName %14 "SV_Position"
OpName %16 "SV_ViewportArrayIndex"
OpDecorate %11 Location 0
OpDecorate %12 Location 1
OpDecorate %14 BuiltIn Position
OpDecorate %16 BuiltIn ViewportIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 1
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpTypePointer Output %7
%16 = OpVariable %15 Output
%17 = OpTypePointer Input %5
%19 = OpConstant %7 0
%24 = OpConstant %7 2
%27 = OpConstant %7 3
%29 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%18 = OpAccessChain %17 %11 %19 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %11 %19 %8
%22 = OpLoad %5 %21
%23 = OpAccessChain %17 %11 %19 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %17 %11 %19 %27
%28 = OpLoad %5 %26
%30 = OpAccessChain %29 %14 %19
OpStore %30 %20
%31 = OpAccessChain %29 %14 %8
OpStore %31 %22
%32 = OpAccessChain %29 %14 %24
OpStore %32 %25
%33 = OpAccessChain %29 %14 %27
OpStore %33 %28
OpStore %16 %19
OpEmitVertex
%34 = OpAccessChain %17 %12 %19 %19
%35 = OpLoad %5 %34
%36 = OpAccessChain %17 %12 %19 %8
%37 = OpLoad %5 %36
%38 = OpAccessChain %17 %12 %19 %24
%39 = OpLoad %5 %38
%40 = OpAccessChain %17 %12 %19 %27
%41 = OpLoad %5 %40
%42 = OpAccessChain %29 %14 %19
OpStore %42 %35
%43 = OpAccessChain %29 %14 %8
OpStore %43 %37
%44 = OpAccessChain %29 %14 %24
OpStore %44 %39
%45 = OpAccessChain %29 %14 %27
OpStore %45 %41
OpStore %16 %8
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/boolean-io.vert
================================================
#version 460

layout(location = 0) in uint A;
layout(location = 1) flat out uint V;

void main()
{
    gl_Position.x = 1.0;
    gl_Position.y = 1.0;
    gl_Position.z = 1.0;
    gl_Position.w = 1.0;
    V = 1u;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 26
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "A"
OpName %11 "SV_Position"
OpName %13 "V"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 Flat
OpDecorate %13 Location 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%14 = OpTypePointer Output %8
%16 = OpConstant %5 0
%17 = OpConstant %8 1
%19 = OpConstant %5 1
%21 = OpConstant %5 2
%23 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %24
%24 = OpLabel
%15 = OpAccessChain %14 %11 %16
OpStore %15 %17
%18 = OpAccessChain %14 %11 %19
OpStore %18 %17
%20 = OpAccessChain %14 %11 %21
OpStore %20 %17
%22 = OpAccessChain %14 %11 %23
OpStore %22 %17
OpStore %13 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/callable-chain.rcall
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    float _m0;
};

layout(location = 0) callableDataInEXT _6 payload;
layout(location = 1) callableDataEXT _6 _10;

void main()
{
    _10._m0 = payload._m0;
    executeCallableEXT(0u, 1);
    payload._m0 = _10._m0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint CallableKHR %3 "main" %8 %10
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingCallableDataKHR %6
%8 = OpVariable %7 IncomingCallableDataKHR
%9 = OpTypePointer CallableDataKHR %6
%10 = OpVariable %9 CallableDataKHR
%11 = OpTypePointer IncomingCallableDataKHR %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%16 = OpTypePointer CallableDataKHR %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %19
%19 = OpLabel
%12 = OpInBoundsAccessChain %11 %8 %14
%15 = OpLoad %5 %12
%17 = OpInBoundsAccessChain %16 %10 %14
OpStore %17 %15
OpExecuteCallableKHR %14 %10
%18 = OpLoad %5 %17
OpStore %12 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/callable.rcall
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _6
{
    float _m0;
};

layout(location = 0) callableDataInEXT _6 payload;

void main()
{
    payload._m0 = 1.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint CallableKHR %3 "main" %8
OpName %3 "main"
OpName %6 ""
OpName %8 "payload"
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeStruct %5
%7 = OpTypePointer IncomingCallableDataKHR %6
%8 = OpVariable %7 IncomingCallableDataKHR
%9 = OpTypePointer IncomingCallableDataKHR %5
%11 = OpTypeInt 32 0
%12 = OpConstant %11 0
%13 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %14
%14 = OpLabel
%10 = OpInBoundsAccessChain %9 %8 %12
OpStore %10 %13
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/closesthit.rclosest
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec4 _m0;
};

struct _11
{
    vec2 _m0;
};

vec2 _20;

layout(location = 0) rayPayloadInEXT _7 payload;
hitAttributeEXT _11 hit;

void main()
{
    payload._m0 = hit._m0.xyxy;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 25
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint ClosestHitKHR %3 "main" %9 %13
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
OpName %11 ""
OpName %13 "hit"
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypeVector %5 2
%11 = OpTypeStruct %10
%12 = OpTypePointer HitAttributeKHR %11
%13 = OpVariable %12 HitAttributeKHR
%14 = OpTypePointer HitAttributeKHR %10
%16 = OpTypeInt 32 0
%17 = OpConstant %16 0
%21 = OpTypePointer IncomingRayPayloadKHR %6
%3 = OpFunction %1 None %2
%4 = OpLabel
%20 = OpUndef %10
OpBranch %23
%23 = OpLabel
%15 = OpInBoundsAccessChain %14 %13 %17
%18 = OpLoad %10 %15
%19 = OpVectorShuffle %6 %18 %20 0 1 0 1
%22 = OpInBoundsAccessChain %21 %9 %17
OpStore %22 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/domain-clip-cull.tese
================================================
#version 460
layout(triangles) in;

out float gl_ClipDistance[1];

layout(location = 0) in vec3 SV_CullDistance[];
layout(location = 0, component = 3) in float SV_ClipDistance[];
layout(location = 1) patch in float C[3];
layout(location = 1, component = 1) patch in float D;

void main()
{
    gl_Position.x = (((SV_CullDistance[1u].y + SV_ClipDistance[1u]) * gl_TessCoord.y) + ((SV_CullDistance[0u].z + SV_ClipDistance[0u]) * gl_TessCoord.x)) + ((SV_CullDistance[2u].x + SV_ClipDistance[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
    gl_ClipDistance[0u] = 5.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %14 %17 %21 %22 %24 %27
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "SV_CullDistance"
OpName %14 "SV_ClipDistance"
OpName %17 "SV_Position"
OpName %22 "C"
OpName %24 "D"
OpDecorate %11 Location 0
OpDecorate %14 Location 0
OpDecorate %14 Component 3
OpDecorate %17 BuiltIn Position
OpDecorate %21 BuiltIn ClipDistance
OpDecorate %22 Location 1
OpDecorate %22 Patch
OpDecorate %24 Location 1
OpDecorate %24 Component 1
OpDecorate %24 Patch
OpDecorate %27 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 3
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypeVector %5 4
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%18 = OpConstant %7 1
%19 = OpTypeArray %5 %18
%20 = OpTypePointer Output %19
%21 = OpVariable %20 Output
%22 = OpVariable %13 Input
%23 = OpTypePointer Input %5
%24 = OpVariable %23 Input
%26 = OpTypePointer Input %6
%27 = OpVariable %26 Input
%29 = OpConstant %7 0
%34 = OpConstant %7 2
%63 = OpTypePointer Output %5
%69 = OpConstant %5 5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
%25 = OpLoad %5 %24
%28 = OpAccessChain %23 %27 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %23 %27 %18
%32 = OpLoad %5 %31
%33 = OpAccessChain %23 %27 %34
%35 = OpLoad %5 %33
%36 = OpAccessChain %23 %14 %29
%37 = OpLoad %5 %36
%38 = OpAccessChain %23 %14 %18
%39 = OpLoad %5 %38
%40 = OpAccessChain %23 %14 %34
%41 = OpLoad %5 %40
%42 = OpAccessChain %23 %11 %29 %34
%43 = OpLoad %5 %42
%44 = OpAccessChain %23 %11 %18 %18
%45 = OpLoad %5 %44
%46 = OpAccessChain %23 %11 %34 %29
%47 = OpLoad %5 %46
%48 = OpFAdd %5 %47 %41
%49 = OpFMul %5 %48 %35
%50 = OpFAdd %5 %45 %39
%51 = OpFMul %5 %50 %32
%52 = OpFAdd %5 %43 %37
%53 = OpFMul %5 %52 %30
%54 = OpFAdd %5 %51 %53
%55 = OpFAdd %5 %54 %49
%56 = OpAccessChain %23 %22 %29
%57 = OpLoad %5 %56
%58 = OpAccessChain %23 %22 %18
%59 = OpLoad %5 %58
%60 = OpFAdd %5 %59 %57
%61 = OpAccessChain %23 %22 %34
%62 = OpLoad %5 %61
%64 = OpAccessChain %63 %17 %29
OpStore %64 %55
%65 = OpAccessChain %63 %17 %18
OpStore %65 %60
%66 = OpAccessChain %63 %17 %34
OpStore %66 %62
%67 = OpAccessChain %63 %17 %8
OpStore %67 %25
%68 = OpAccessChain %63 %21 %29
OpStore %68 %69
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/domain-patch-input-integer-io.tese
================================================
#version 460
layout(triangles) in;

layout(location = 0) in float A[];
layout(location = 1) patch in int B;
layout(location = 1, component = 1) patch in uint C;
layout(location = 2) patch in ivec4 BV;
layout(location = 3) patch in uvec4 CV;
layout(location = 4) patch in int BVS[2];
layout(location = 4, component = 1) patch in uint CVS[2];
layout(location = 6) patch in ivec4 BVSS[2];
layout(location = 8) patch in uvec4 CVSS[2];

void main()
{
    float _111 = float(CVS[1u]) + float(int(uint(BVS[0u])));
    gl_Position.x = ((((float(CV.x) + float(int(uint(BV.x)))) + A[0u]) + _111) + float(int(uint(BVSS[0u].x)))) + float(CVSS[1u].x);
    gl_Position.y = ((((float(int(uint(BV.y))) + float(int(uint(B)))) + float(CV.y)) + _111) + float(int(uint(BVSS[0u].y)))) + float(CVSS[1u].y);
    gl_Position.z = ((((float(int(uint(BV.z))) + float(C)) + float(CV.z)) + _111) + float(int(uint(BVSS[0u].z)))) + float(CVSS[1u].z);
    gl_Position.w = ((((float(int(uint(BV.w))) + 1.0) + float(CV.w)) + _111) + float(int(uint(BVSS[0u].w)))) + float(CVSS[1u].w);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 140
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %10 %13 %16 %18 %21 %24 %28 %31 %34 %37
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %10 "A"
OpName %13 "SV_Position"
OpName %16 "B"
OpName %18 "C"
OpName %21 "BV"
OpName %24 "CV"
OpName %28 "BVS"
OpName %31 "CVS"
OpName %34 "BVSS"
OpName %37 "CVSS"
OpDecorate %10 Location 0
OpDecorate %13 BuiltIn Position
OpDecorate %16 Location 1
OpDecorate %16 Patch
OpDecorate %18 Location 1
OpDecorate %18 Component 1
OpDecorate %18 Patch
OpDecorate %21 Location 2
OpDecorate %21 Patch
OpDecorate %24 Location 3
OpDecorate %24 Patch
OpDecorate %28 Location 4
OpDecorate %28 Patch
OpDecorate %31 Location 4
OpDecorate %31 Component 1
OpDecorate %31 Patch
OpDecorate %34 Location 6
OpDecorate %34 Patch
OpDecorate %37 Location 8
OpDecorate %37 Patch
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpTypeVector %5 4
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
%14 = OpTypeInt 32 1
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %6
%18 = OpVariable %17 Input
%19 = OpTypeVector %14 4
%20 = OpTypePointer Input %19
%21 = OpVariable %20 Input
%22 = OpTypeVector %6 4
%23 = OpTypePointer Input %22
%24 = OpVariable %23 Input
%25 = OpConstant %6 2
%26 = OpTypeArray %14 %25
%27 = OpTypePointer Input %26
%28 = OpVariable %27 Input
%29 = OpTypeArray %6 %25
%30 = OpTypePointer Input %29
%31 = OpVariable %30 Input
%32 = OpTypeArray %19 %25
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%35 = OpTypeArray %22 %25
%36 = OpTypePointer Input %35
%37 = OpVariable %36 Input
%42 = OpConstant %6 0
%46 = OpConstant %6 1
%63 = OpTypePointer Input %5
%128 = OpConstant %5 1
%133 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %138
%138 = OpLabel
%38 = OpLoad %14 %16
%39 = OpBitcast %6 %38
%40 = OpLoad %6 %18
%41 = OpAccessChain %15 %21 %42
%43 = OpLoad %14 %41
%44 = OpBitcast %6 %43
%45 = OpAccessChain %15 %21 %46
%47 = OpLoad %14 %45
%48 = OpBitcast %6 %47
%49 = OpAccessChain %15 %21 %25
%50 = OpLoad %14 %49
%51 = OpBitcast %6 %50
%52 = OpAccessChain %15 %21 %7
%53 = OpLoad %14 %52
%54 = OpBitcast %6 %53
%55 = OpAccessChain %17 %24 %42
%56 = OpLoad %6 %55
%57 = OpAccessChain %17 %24 %46
%58 = OpLoad %6 %57
%59 = OpAccessChain %17 %24 %25
%60 = OpLoad %6 %59
%61 = OpAccessChain %17 %24 %7
%62 = OpLoad %6 %61
%64 = OpAccessChain %63 %10 %42
%65 = OpLoad %5 %64
%66 = OpConvertSToF %5 %39
%67 = OpConvertUToF %5 %40
%68 = OpConvertSToF %5 %44
%69 = OpConvertSToF %5 %48
%70 = OpConvertSToF %5 %51
%71 = OpConvertSToF %5 %54
%72 = OpConvertUToF %5 %56
%73 = OpConvertUToF %5 %58
%74 = OpConvertUToF %5 %60
%75 = OpConvertUToF %5 %62
%76 = OpAccessChain %15 %28 %42
%77 = OpLoad %14 %76
%78 = OpBitcast %6 %77
%79 = OpConvertSToF %5 %78
%80 = OpAccessChain %17 %31 %46
%81 = OpLoad %6 %80
%82 = OpConvertUToF %5 %81
%83 = OpAccessChain %15 %34 %42 %42
%84 = OpLoad %14 %83
%85 = OpBitcast %6 %84
%86 = OpAccessChain %15 %34 %42 %46
%87 = OpLoad %14 %86
%88 = OpBitcast %6 %87
%89 = OpAccessChain %15 %34 %42 %25
%90 = OpLoad %14 %89
%91 = OpBitcast %6 %90
%92 = OpAccessChain %15 %34 %42 %7
%93 = OpLoad %14 %92
%94 = OpBitcast %6 %93
%95 = OpConvertSToF %5 %85
%96 = OpConvertSToF %5 %88
%97 = OpConvertSToF %5 %91
%98 = OpConvertSToF %5 %94
%99 = OpAccessChain %17 %37 %46 %42
%100 = OpLoad %6 %99
%101 = OpAccessChain %17 %37 %46 %46
%102 = OpLoad %6 %101
%103 = OpAccessChain %17 %37 %46 %25
%104 = OpLoad %6 %103
%105 = OpAccessChain %17 %37 %46 %7
%106 = OpLoad %6 %105
%107 = OpConvertUToF %5 %100
%108 = OpConvertUToF %5 %102
%109 = OpConvertUToF %5 %104
%110 = OpConvertUToF %5 %106
%111 = OpFAdd %5 %82 %79
%112 = OpFAdd %5 %72 %68
%113 = OpFAdd %5 %112 %65
%114 = OpFAdd %5 %113 %111
%115 = OpFAdd %5 %114 %95
%116 = OpFAdd %5 %115 %107
%117 = OpFAdd %5 %69 %66
%118 = OpFAdd %5 %117 %73
%119 = OpFAdd %5 %118 %111
%120 = OpFAdd %5 %119 %96
%121 = OpFAdd %5 %120 %108
%122 = OpFAdd %5 %70 %67
%123 = OpFAdd %5 %122 %74
%124 = OpFAdd %5 %123 %111
%125 = OpFAdd %5 %124 %97
%126 = OpFAdd %5 %125 %109
%127 = OpFAdd %5 %71 %128
%129 = OpFAdd %5 %127 %75
%130 = OpFAdd %5 %129 %111
%131 = OpFAdd %5 %130 %98
%132 = OpFAdd %5 %131 %110
%134 = OpAccessChain %133 %13 %42
OpStore %134 %116
%135 = OpAccessChain %133 %13 %46
OpStore %135 %121
%136 = OpAccessChain %133 %13 %25
OpStore %136 %126
%137 = OpAccessChain %133 %13 %7
OpStore %137 %132
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/domain.tese
================================================
#version 460
layout(triangles) in;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Position.x = (((A[1u][1u] + B[1u]) * gl_TessCoord.y) + ((A[0u][2u] + B[0u]) * gl_TessCoord.x)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 67
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %23
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %23 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%21 = OpTypeVector %5 3
%22 = OpTypePointer Input %21
%23 = OpVariable %22 Input
%25 = OpConstant %6 0
%28 = OpConstant %6 1
%31 = OpConstant %6 2
%60 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %65
%65 = OpLabel
%20 = OpLoad %5 %19
%24 = OpAccessChain %18 %23 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %18 %23 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %18 %23 %31
%32 = OpLoad %5 %30
%33 = OpAccessChain %18 %13 %25
%34 = OpLoad %5 %33
%35 = OpAccessChain %18 %13 %28
%36 = OpLoad %5 %35
%37 = OpAccessChain %18 %13 %31
%38 = OpLoad %5 %37
%39 = OpAccessChain %18 %11 %25 %31
%40 = OpLoad %5 %39
%41 = OpAccessChain %18 %11 %28 %28
%42 = OpLoad %5 %41
%43 = OpAccessChain %18 %11 %31 %25
%44 = OpLoad %5 %43
%45 = OpFAdd %5 %44 %38
%46 = OpFMul %5 %45 %32
%47 = OpFAdd %5 %42 %36
%48 = OpFMul %5 %47 %29
%49 = OpFAdd %5 %40 %34
%50 = OpFMul %5 %49 %26
%51 = OpFAdd %5 %48 %50
%52 = OpFAdd %5 %51 %46
%53 = OpAccessChain %18 %17 %25
%54 = OpLoad %5 %53
%55 = OpAccessChain %18 %17 %28
%56 = OpLoad %5 %55
%57 = OpFAdd %5 %56 %54
%58 = OpAccessChain %18 %17 %31
%59 = OpLoad %5 %58
%61 = OpAccessChain %60 %16 %25
OpStore %61 %52
%62 = OpAccessChain %60 %16 %28
OpStore %62 %57
%63 = OpAccessChain %60 %16 %31
OpStore %63 %59
%64 = OpAccessChain %60 %16 %7
OpStore %64 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/extra_output.dual-source-blending.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[3];
} _12;

layout(location = 0, index = 0) out vec4 SV_Target;
layout(location = 0, index = 1) out vec4 SV_Target_1;

void main()
{
    SV_Target.x = _12._m0[0u].x;
    SV_Target.y = _12._m0[0u].y;
    SV_Target.z = _12._m0[0u].z;
    SV_Target.w = _12._m0[0u].w;
    SV_Target_1.x = _12._m0[1u].x;
    SV_Target_1.y = _12._m0[1u].y;
    SV_Target_1.z = _12._m0[1u].z;
    SV_Target_1.w = _12._m0[1u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "SV_Target"
OpName %15 "SV_Target_1"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Location 0
OpDecorate %14 Index 0
OpDecorate %15 Location 0
OpDecorate %15 Index 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 3
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Output %8
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpConstant %5 0
%17 = OpTypePointer Uniform %8
%24 = OpTypePointer Output %7
%27 = OpConstant %5 1
%29 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %47
%47 = OpLabel
%18 = OpAccessChain %17 %12 %16 %16
%19 = OpLoad %8 %18
%20 = OpCompositeExtract %7 %19 0
%21 = OpCompositeExtract %7 %19 1
%22 = OpCompositeExtract %7 %19 2
%23 = OpCompositeExtract %7 %19 3
%25 = OpAccessChain %24 %14 %16
OpStore %25 %20
%26 = OpAccessChain %24 %14 %27
OpStore %26 %21
%28 = OpAccessChain %24 %14 %29
OpStore %28 %22
%30 = OpAccessChain %24 %14 %6
OpStore %30 %23
%31 = OpAccessChain %17 %12 %16 %27
%32 = OpLoad %8 %31
%33 = OpCompositeExtract %7 %32 0
%34 = OpCompositeExtract %7 %32 1
%35 = OpCompositeExtract %7 %32 2
%36 = OpCompositeExtract %7 %32 3
%37 = OpAccessChain %24 %15 %16
OpStore %37 %33
%38 = OpAccessChain %24 %15 %27
OpStore %38 %34
%39 = OpAccessChain %24 %15 %29
OpStore %39 %35
%40 = OpAccessChain %24 %15 %6
OpStore %40 %36
%41 = OpAccessChain %17 %12 %16 %29
%42 = OpLoad %8 %41
%43 = OpCompositeExtract %7 %42 0
%44 = OpCompositeExtract %7 %42 1
%45 = OpCompositeExtract %7 %42 2
%46 = OpCompositeExtract %7 %42 3
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/extra_output_reordered.dual-source-blending.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[3];
} _12;

layout(location = 0, index = 0) out vec4 SV_Target;
layout(location = 0, index = 1) out vec4 SV_Target_1;

void main()
{
    SV_Target.x = _12._m0[0u].x;
    SV_Target.y = _12._m0[0u].y;
    SV_Target.z = _12._m0[0u].z;
    SV_Target.w = _12._m0[0u].w;
    SV_Target_1.x = _12._m0[1u].x;
    SV_Target_1.y = _12._m0[1u].y;
    SV_Target_1.z = _12._m0[1u].z;
    SV_Target_1.w = _12._m0[1u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "SV_Target"
OpName %15 "SV_Target_1"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Location 0
OpDecorate %14 Index 0
OpDecorate %15 Location 0
OpDecorate %15 Index 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 3
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Output %8
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpConstant %5 0
%17 = OpTypePointer Uniform %8
%24 = OpTypePointer Output %7
%27 = OpConstant %5 1
%29 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %47
%47 = OpLabel
%18 = OpAccessChain %17 %12 %16 %16
%19 = OpLoad %8 %18
%20 = OpCompositeExtract %7 %19 0
%21 = OpCompositeExtract %7 %19 1
%22 = OpCompositeExtract %7 %19 2
%23 = OpCompositeExtract %7 %19 3
%25 = OpAccessChain %24 %14 %16
OpStore %25 %20
%26 = OpAccessChain %24 %14 %27
OpStore %26 %21
%28 = OpAccessChain %24 %14 %29
OpStore %28 %22
%30 = OpAccessChain %24 %14 %6
OpStore %30 %23
%31 = OpAccessChain %17 %12 %16 %27
%32 = OpLoad %8 %31
%33 = OpCompositeExtract %7 %32 0
%34 = OpCompositeExtract %7 %32 1
%35 = OpCompositeExtract %7 %32 2
%36 = OpCompositeExtract %7 %32 3
%37 = OpAccessChain %24 %15 %16
OpStore %37 %33
%38 = OpAccessChain %24 %15 %27
OpStore %38 %34
%39 = OpAccessChain %24 %15 %29
OpStore %39 %35
%40 = OpAccessChain %24 %15 %6
OpStore %40 %36
%41 = OpAccessChain %17 %12 %16 %29
%42 = OpLoad %8 %41
%43 = OpCompositeExtract %7 %42 0
%44 = OpCompositeExtract %7 %42 1
%45 = OpCompositeExtract %7 %42 2
%46 = OpCompositeExtract %7 %42 3
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-clip-cull.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 6, triangle_strip) out;

out float gl_ClipDistance[4];

void main()
{
    gl_ClipDistance[0u] = gl_in[0u].gl_ClipDistance[0u];
    gl_ClipDistance[1u] = gl_in[0u].gl_ClipDistance[1u];
    gl_ClipDistance[2u] = gl_in[0u].gl_ClipDistance[2u];
    gl_ClipDistance[3u] = gl_in[0u].gl_ClipDistance[3u];
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    gl_ClipDistance[0u] = gl_in[1u].gl_ClipDistance[0u];
    gl_ClipDistance[1u] = gl_in[1u].gl_ClipDistance[1u];
    gl_ClipDistance[2u] = gl_in[1u].gl_ClipDistance[2u];
    gl_ClipDistance[3u] = gl_in[1u].gl_ClipDistance[3u];
    gl_Position.x = gl_in[1u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    float _68 = gl_in[1u].gl_Position.x + 0.0199999995529651641845703125;
    gl_ClipDistance[0u] = gl_in[1u].gl_ClipDistance[0u];
    gl_ClipDistance[1u] = gl_in[1u].gl_ClipDistance[1u];
    gl_ClipDistance[2u] = gl_in[1u].gl_ClipDistance[2u];
    gl_ClipDistance[3u] = gl_in[1u].gl_ClipDistance[3u];
    gl_Position.x = _68;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    gl_ClipDistance[0u] = gl_in[1u].gl_ClipDistance[0u];
    gl_ClipDistance[1u] = gl_in[1u].gl_ClipDistance[1u];
    gl_ClipDistance[2u] = gl_in[1u].gl_ClipDistance[2u];
    gl_ClipDistance[3u] = gl_in[1u].gl_ClipDistance[3u];
    gl_Position.x = _68;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    gl_ClipDistance[0u] = gl_in[1u].gl_ClipDistance[0u];
    gl_ClipDistance[1u] = gl_in[1u].gl_ClipDistance[1u];
    gl_ClipDistance[2u] = gl_in[1u].gl_ClipDistance[2u];
    gl_ClipDistance[3u] = gl_in[1u].gl_ClipDistance[3u];
    gl_Position.x = _68;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    gl_ClipDistance[0u] = gl_in[1u].gl_ClipDistance[0u];
    gl_ClipDistance[1u] = gl_in[1u].gl_ClipDistance[1u];
    gl_ClipDistance[2u] = gl_in[1u].gl_ClipDistance[2u];
    gl_ClipDistance[3u] = gl_in[1u].gl_ClipDistance[3u];
    gl_Position.x = _68;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 104
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ClipDistance
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %16 %18 %20
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "SV_Position"
OpName %18 "SV_Position"
OpDecorate %11 BuiltIn Position
OpDecorate %16 BuiltIn ClipDistance
OpDecorate %18 BuiltIn Position
OpDecorate %20 BuiltIn ClipDistance
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpConstant %7 4
%13 = OpTypeArray %5 %12
%14 = OpTypeArray %13 %8
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpTypePointer Output %6
%18 = OpVariable %17 Output
%19 = OpTypePointer Output %13
%20 = OpVariable %19 Output
%21 = OpTypePointer Input %5
%23 = OpConstant %7 0
%26 = OpConstant %7 1
%29 = OpConstant %7 2
%41 = OpTypePointer Output %5
%59 = OpConstant %5 0.00999999978
%69 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %102
%102 = OpLabel
%22 = OpAccessChain %21 %16 %23 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %21 %16 %23 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %21 %16 %23 %29
%30 = OpLoad %5 %28
%31 = OpAccessChain %21 %16 %23 %8
%32 = OpLoad %5 %31
%33 = OpAccessChain %21 %11 %26 %23
%34 = OpLoad %5 %33
%35 = OpAccessChain %21 %11 %26 %26
%36 = OpLoad %5 %35
%37 = OpAccessChain %21 %11 %26 %29
%38 = OpLoad %5 %37
%39 = OpAccessChain %21 %11 %26 %8
%40 = OpLoad %5 %39
%42 = OpAccessChain %41 %20 %23
OpStore %42 %24
%43 = OpAccessChain %41 %20 %26
OpStore %43 %27
%44 = OpAccessChain %41 %20 %29
OpStore %44 %30
%45 = OpAccessChain %41 %20 %8
OpStore %45 %32
%46 = OpAccessChain %41 %18 %23
OpStore %46 %34
%47 = OpAccessChain %41 %18 %26
OpStore %47 %36
%48 = OpAccessChain %41 %18 %29
OpStore %48 %38
%49 = OpAccessChain %41 %18 %8
OpStore %49 %40
OpEmitVertex
%50 = OpAccessChain %21 %16 %26 %23
%51 = OpLoad %5 %50
%52 = OpAccessChain %21 %16 %26 %26
%53 = OpLoad %5 %52
%54 = OpAccessChain %21 %16 %26 %29
%55 = OpLoad %5 %54
%56 = OpAccessChain %21 %16 %26 %8
%57 = OpLoad %5 %56
%58 = OpFAdd %5 %34 %59
%60 = OpAccessChain %41 %20 %23
OpStore %60 %51
%61 = OpAccessChain %41 %20 %26
OpStore %61 %53
%62 = OpAccessChain %41 %20 %29
OpStore %62 %55
%63 = OpAccessChain %41 %20 %8
OpStore %63 %57
%64 = OpAccessChain %41 %18 %23
OpStore %64 %58
%65 = OpAccessChain %41 %18 %26
OpStore %65 %36
%66 = OpAccessChain %41 %18 %29
OpStore %66 %38
%67 = OpAccessChain %41 %18 %8
OpStore %67 %40
OpEmitVertex
%68 = OpFAdd %5 %34 %69
%70 = OpAccessChain %41 %20 %23
OpStore %70 %51
%71 = OpAccessChain %41 %20 %26
OpStore %71 %53
%72 = OpAccessChain %41 %20 %29
OpStore %72 %55
%73 = OpAccessChain %41 %20 %8
OpStore %73 %57
%74 = OpAccessChain %41 %18 %23
OpStore %74 %68
%75 = OpAccessChain %41 %18 %26
OpStore %75 %36
%76 = OpAccessChain %41 %18 %29
OpStore %76 %38
%77 = OpAccessChain %41 %18 %8
OpStore %77 %40
OpEmitVertex
OpEndPrimitive
%78 = OpAccessChain %41 %20 %23
OpStore %78 %51
%79 = OpAccessChain %41 %20 %26
OpStore %79 %53
%80 = OpAccessChain %41 %20 %29
OpStore %80 %55
%81 = OpAccessChain %41 %20 %8
OpStore %81 %57
%82 = OpAccessChain %41 %18 %23
OpStore %82 %68
%83 = OpAccessChain %41 %18 %26
OpStore %83 %36
%84 = OpAccessChain %41 %18 %29
OpStore %84 %38
%85 = OpAccessChain %41 %18 %8
OpStore %85 %40
OpEmitVertex
%86 = OpAccessChain %41 %20 %23
OpStore %86 %51
%87 = OpAccessChain %41 %20 %26
OpStore %87 %53
%88 = OpAccessChain %41 %20 %29
OpStore %88 %55
%89 = OpAccessChain %41 %20 %8
OpStore %89 %57
%90 = OpAccessChain %41 %18 %23
OpStore %90 %68
%91 = OpAccessChain %41 %18 %26
OpStore %91 %36
%92 = OpAccessChain %41 %18 %29
OpStore %92 %38
%93 = OpAccessChain %41 %18 %8
OpStore %93 %40
OpEmitVertex
%94 = OpAccessChain %41 %20 %23
OpStore %94 %51
%95 = OpAccessChain %41 %20 %26
OpStore %95 %53
%96 = OpAccessChain %41 %20 %29
OpStore %96 %55
%97 = OpAccessChain %41 %20 %8
OpStore %97 %57
%98 = OpAccessChain %41 %18 %23
OpStore %98 %68
%99 = OpAccessChain %41 %18 %26
OpStore %99 %36
%100 = OpAccessChain %41 %18 %29
OpStore %100 %38
%101 = OpAccessChain %41 %18 %8
OpStore %101 %40
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-input-line.geom
================================================
#version 460
layout(lines) in;
layout(max_vertices = 6, triangle_strip) out;

layout(location = 0) in vec4 TEXCOORD[2];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    float _55 = gl_in[1u].gl_Position.x + 0.0199999995529651641845703125;
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 InputLines
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 2
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%26 = OpConstant %7 3
%36 = OpTypePointer Output %5
%46 = OpConstant %5 0.00999999978
%56 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %89
%89 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %8
%24 = OpLoad %5 %23
%25 = OpAccessChain %16 %11 %18 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %8
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %26
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %8
OpStore %39 %24
%40 = OpAccessChain %36 %14 %26
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %8
OpStore %43 %33
%44 = OpAccessChain %36 %15 %26
OpStore %44 %35
OpEmitVertex
%45 = OpFAdd %5 %29 %46
%47 = OpAccessChain %36 %14 %18
OpStore %47 %19
%48 = OpAccessChain %36 %14 %21
OpStore %48 %22
%49 = OpAccessChain %36 %14 %8
OpStore %49 %24
%50 = OpAccessChain %36 %14 %26
OpStore %50 %27
%51 = OpAccessChain %36 %15 %18
OpStore %51 %45
%52 = OpAccessChain %36 %15 %21
OpStore %52 %31
%53 = OpAccessChain %36 %15 %8
OpStore %53 %33
%54 = OpAccessChain %36 %15 %26
OpStore %54 %35
OpEmitVertex
%55 = OpFAdd %5 %29 %56
%57 = OpAccessChain %36 %14 %18
OpStore %57 %19
%58 = OpAccessChain %36 %14 %21
OpStore %58 %22
%59 = OpAccessChain %36 %14 %8
OpStore %59 %24
%60 = OpAccessChain %36 %14 %26
OpStore %60 %27
%61 = OpAccessChain %36 %15 %18
OpStore %61 %55
%62 = OpAccessChain %36 %15 %21
OpStore %62 %31
%63 = OpAccessChain %36 %15 %8
OpStore %63 %33
%64 = OpAccessChain %36 %15 %26
OpStore %64 %35
OpEmitVertex
OpEndPrimitive
%65 = OpAccessChain %36 %14 %18
OpStore %65 %19
%66 = OpAccessChain %36 %14 %21
OpStore %66 %22
%67 = OpAccessChain %36 %14 %8
OpStore %67 %24
%68 = OpAccessChain %36 %14 %26
OpStore %68 %27
%69 = OpAccessChain %36 %15 %18
OpStore %69 %55
%70 = OpAccessChain %36 %15 %21
OpStore %70 %31
%71 = OpAccessChain %36 %15 %8
OpStore %71 %33
%72 = OpAccessChain %36 %15 %26
OpStore %72 %35
OpEmitVertex
%73 = OpAccessChain %36 %14 %18
OpStore %73 %19
%74 = OpAccessChain %36 %14 %21
OpStore %74 %22
%75 = OpAccessChain %36 %14 %8
OpStore %75 %24
%76 = OpAccessChain %36 %14 %26
OpStore %76 %27
%77 = OpAccessChain %36 %15 %18
OpStore %77 %55
%78 = OpAccessChain %36 %15 %21
OpStore %78 %31
%79 = OpAccessChain %36 %15 %8
OpStore %79 %33
%80 = OpAccessChain %36 %15 %26
OpStore %80 %35
OpEmitVertex
%81 = OpAccessChain %36 %14 %18
OpStore %81 %19
%82 = OpAccessChain %36 %14 %21
OpStore %82 %22
%83 = OpAccessChain %36 %14 %8
OpStore %83 %24
%84 = OpAccessChain %36 %14 %26
OpStore %84 %27
%85 = OpAccessChain %36 %15 %18
OpStore %85 %55
%86 = OpAccessChain %36 %15 %21
OpStore %86 %31
%87 = OpAccessChain %36 %15 %8
OpStore %87 %33
%88 = OpAccessChain %36 %15 %26
OpStore %88 %35
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-input-lineadj.geom
================================================
#version 460
layout(lines_adjacency) in;
layout(max_vertices = 6, triangle_strip) out;

layout(location = 0) in vec4 TEXCOORD[4];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[3u].gl_Position.x;
    gl_Position.y = gl_in[3u].gl_Position.y;
    gl_Position.z = gl_in[3u].gl_Position.z;
    gl_Position.w = gl_in[3u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[3u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[3u].gl_Position.y;
    gl_Position.z = gl_in[3u].gl_Position.z;
    gl_Position.w = gl_in[3u].gl_Position.w;
    EmitVertex();
    float _56 = gl_in[3u].gl_Position.x + 0.0199999995529651641845703125;
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _56;
    gl_Position.y = gl_in[3u].gl_Position.y;
    gl_Position.z = gl_in[3u].gl_Position.z;
    gl_Position.w = gl_in[3u].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _56;
    gl_Position.y = gl_in[3u].gl_Position.y;
    gl_Position.z = gl_in[3u].gl_Position.z;
    gl_Position.w = gl_in[3u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _56;
    gl_Position.y = gl_in[3u].gl_Position.y;
    gl_Position.z = gl_in[3u].gl_Position.z;
    gl_Position.w = gl_in[3u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _56;
    gl_Position.y = gl_in[3u].gl_Position.y;
    gl_Position.z = gl_in[3u].gl_Position.z;
    gl_Position.w = gl_in[3u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 92
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 InputLinesAdjacency
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 4
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%27 = OpConstant %7 3
%37 = OpTypePointer Output %5
%47 = OpConstant %5 0.00999999978
%57 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %90
%90 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %27
%28 = OpLoad %5 %26
%29 = OpAccessChain %16 %12 %27 %18
%30 = OpLoad %5 %29
%31 = OpAccessChain %16 %12 %27 %21
%32 = OpLoad %5 %31
%33 = OpAccessChain %16 %12 %27 %24
%34 = OpLoad %5 %33
%35 = OpAccessChain %16 %12 %27 %27
%36 = OpLoad %5 %35
%38 = OpAccessChain %37 %14 %18
OpStore %38 %19
%39 = OpAccessChain %37 %14 %21
OpStore %39 %22
%40 = OpAccessChain %37 %14 %24
OpStore %40 %25
%41 = OpAccessChain %37 %14 %27
OpStore %41 %28
%42 = OpAccessChain %37 %15 %18
OpStore %42 %30
%43 = OpAccessChain %37 %15 %21
OpStore %43 %32
%44 = OpAccessChain %37 %15 %24
OpStore %44 %34
%45 = OpAccessChain %37 %15 %27
OpStore %45 %36
OpEmitVertex
%46 = OpFAdd %5 %30 %47
%48 = OpAccessChain %37 %14 %18
OpStore %48 %19
%49 = OpAccessChain %37 %14 %21
OpStore %49 %22
%50 = OpAccessChain %37 %14 %24
OpStore %50 %25
%51 = OpAccessChain %37 %14 %27
OpStore %51 %28
%52 = OpAccessChain %37 %15 %18
OpStore %52 %46
%53 = OpAccessChain %37 %15 %21
OpStore %53 %32
%54 = OpAccessChain %37 %15 %24
OpStore %54 %34
%55 = OpAccessChain %37 %15 %27
OpStore %55 %36
OpEmitVertex
%56 = OpFAdd %5 %30 %57
%58 = OpAccessChain %37 %14 %18
OpStore %58 %19
%59 = OpAccessChain %37 %14 %21
OpStore %59 %22
%60 = OpAccessChain %37 %14 %24
OpStore %60 %25
%61 = OpAccessChain %37 %14 %27
OpStore %61 %28
%62 = OpAccessChain %37 %15 %18
OpStore %62 %56
%63 = OpAccessChain %37 %15 %21
OpStore %63 %32
%64 = OpAccessChain %37 %15 %24
OpStore %64 %34
%65 = OpAccessChain %37 %15 %27
OpStore %65 %36
OpEmitVertex
OpEndPrimitive
%66 = OpAccessChain %37 %14 %18
OpStore %66 %19
%67 = OpAccessChain %37 %14 %21
OpStore %67 %22
%68 = OpAccessChain %37 %14 %24
OpStore %68 %25
%69 = OpAccessChain %37 %14 %27
OpStore %69 %28
%70 = OpAccessChain %37 %15 %18
OpStore %70 %56
%71 = OpAccessChain %37 %15 %21
OpStore %71 %32
%72 = OpAccessChain %37 %15 %24
OpStore %72 %34
%73 = OpAccessChain %37 %15 %27
OpStore %73 %36
OpEmitVertex
%74 = OpAccessChain %37 %14 %18
OpStore %74 %19
%75 = OpAccessChain %37 %14 %21
OpStore %75 %22
%76 = OpAccessChain %37 %14 %24
OpStore %76 %25
%77 = OpAccessChain %37 %14 %27
OpStore %77 %28
%78 = OpAccessChain %37 %15 %18
OpStore %78 %56
%79 = OpAccessChain %37 %15 %21
OpStore %79 %32
%80 = OpAccessChain %37 %15 %24
OpStore %80 %34
%81 = OpAccessChain %37 %15 %27
OpStore %81 %36
OpEmitVertex
%82 = OpAccessChain %37 %14 %18
OpStore %82 %19
%83 = OpAccessChain %37 %14 %21
OpStore %83 %22
%84 = OpAccessChain %37 %14 %24
OpStore %84 %25
%85 = OpAccessChain %37 %14 %27
OpStore %85 %28
%86 = OpAccessChain %37 %15 %18
OpStore %86 %56
%87 = OpAccessChain %37 %15 %21
OpStore %87 %32
%88 = OpAccessChain %37 %15 %24
OpStore %88 %34
%89 = OpAccessChain %37 %15 %27
OpStore %89 %36
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-input-point.geom
================================================
#version 460
layout(points) in;
layout(max_vertices = 6, triangle_strip) out;

layout(location = 0) in vec4 TEXCOORD[1];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[0u].gl_Position.x;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[0u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitVertex();
    float _55 = gl_in[0u].gl_Position.x + 0.0199999995529651641845703125;
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 InputPoints
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 1
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%23 = OpConstant %7 2
%26 = OpConstant %7 3
%36 = OpTypePointer Output %5
%46 = OpConstant %5 0.00999999978
%56 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %89
%89 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %8
%21 = OpLoad %5 %20
%22 = OpAccessChain %16 %11 %18 %23
%24 = OpLoad %5 %22
%25 = OpAccessChain %16 %11 %18 %26
%27 = OpLoad %5 %25
%28 = OpAccessChain %16 %12 %18 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %18 %8
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %18 %23
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %18 %26
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %8
OpStore %38 %21
%39 = OpAccessChain %36 %14 %23
OpStore %39 %24
%40 = OpAccessChain %36 %14 %26
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %8
OpStore %42 %31
%43 = OpAccessChain %36 %15 %23
OpStore %43 %33
%44 = OpAccessChain %36 %15 %26
OpStore %44 %35
OpEmitVertex
%45 = OpFAdd %5 %29 %46
%47 = OpAccessChain %36 %14 %18
OpStore %47 %19
%48 = OpAccessChain %36 %14 %8
OpStore %48 %21
%49 = OpAccessChain %36 %14 %23
OpStore %49 %24
%50 = OpAccessChain %36 %14 %26
OpStore %50 %27
%51 = OpAccessChain %36 %15 %18
OpStore %51 %45
%52 = OpAccessChain %36 %15 %8
OpStore %52 %31
%53 = OpAccessChain %36 %15 %23
OpStore %53 %33
%54 = OpAccessChain %36 %15 %26
OpStore %54 %35
OpEmitVertex
%55 = OpFAdd %5 %29 %56
%57 = OpAccessChain %36 %14 %18
OpStore %57 %19
%58 = OpAccessChain %36 %14 %8
OpStore %58 %21
%59 = OpAccessChain %36 %14 %23
OpStore %59 %24
%60 = OpAccessChain %36 %14 %26
OpStore %60 %27
%61 = OpAccessChain %36 %15 %18
OpStore %61 %55
%62 = OpAccessChain %36 %15 %8
OpStore %62 %31
%63 = OpAccessChain %36 %15 %23
OpStore %63 %33
%64 = OpAccessChain %36 %15 %26
OpStore %64 %35
OpEmitVertex
OpEndPrimitive
%65 = OpAccessChain %36 %14 %18
OpStore %65 %19
%66 = OpAccessChain %36 %14 %8
OpStore %66 %21
%67 = OpAccessChain %36 %14 %23
OpStore %67 %24
%68 = OpAccessChain %36 %14 %26
OpStore %68 %27
%69 = OpAccessChain %36 %15 %18
OpStore %69 %55
%70 = OpAccessChain %36 %15 %8
OpStore %70 %31
%71 = OpAccessChain %36 %15 %23
OpStore %71 %33
%72 = OpAccessChain %36 %15 %26
OpStore %72 %35
OpEmitVertex
%73 = OpAccessChain %36 %14 %18
OpStore %73 %19
%74 = OpAccessChain %36 %14 %8
OpStore %74 %21
%75 = OpAccessChain %36 %14 %23
OpStore %75 %24
%76 = OpAccessChain %36 %14 %26
OpStore %76 %27
%77 = OpAccessChain %36 %15 %18
OpStore %77 %55
%78 = OpAccessChain %36 %15 %8
OpStore %78 %31
%79 = OpAccessChain %36 %15 %23
OpStore %79 %33
%80 = OpAccessChain %36 %15 %26
OpStore %80 %35
OpEmitVertex
%81 = OpAccessChain %36 %14 %18
OpStore %81 %19
%82 = OpAccessChain %36 %14 %8
OpStore %82 %21
%83 = OpAccessChain %36 %14 %23
OpStore %83 %24
%84 = OpAccessChain %36 %14 %26
OpStore %84 %27
%85 = OpAccessChain %36 %15 %18
OpStore %85 %55
%86 = OpAccessChain %36 %15 %8
OpStore %86 %31
%87 = OpAccessChain %36 %15 %23
OpStore %87 %33
%88 = OpAccessChain %36 %15 %26
OpStore %88 %35
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-input-triangle.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 6, triangle_strip) out;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    float _55 = gl_in[1u].gl_Position.x + 0.0199999995529651641845703125;
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _55;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%36 = OpTypePointer Output %5
%46 = OpConstant %5 0.00999999978
%56 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %89
%89 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %8
%27 = OpLoad %5 %26
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %24
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %8
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %24
OpStore %39 %25
%40 = OpAccessChain %36 %14 %8
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %24
OpStore %43 %33
%44 = OpAccessChain %36 %15 %8
OpStore %44 %35
OpEmitVertex
%45 = OpFAdd %5 %29 %46
%47 = OpAccessChain %36 %14 %18
OpStore %47 %19
%48 = OpAccessChain %36 %14 %21
OpStore %48 %22
%49 = OpAccessChain %36 %14 %24
OpStore %49 %25
%50 = OpAccessChain %36 %14 %8
OpStore %50 %27
%51 = OpAccessChain %36 %15 %18
OpStore %51 %45
%52 = OpAccessChain %36 %15 %21
OpStore %52 %31
%53 = OpAccessChain %36 %15 %24
OpStore %53 %33
%54 = OpAccessChain %36 %15 %8
OpStore %54 %35
OpEmitVertex
%55 = OpFAdd %5 %29 %56
%57 = OpAccessChain %36 %14 %18
OpStore %57 %19
%58 = OpAccessChain %36 %14 %21
OpStore %58 %22
%59 = OpAccessChain %36 %14 %24
OpStore %59 %25
%60 = OpAccessChain %36 %14 %8
OpStore %60 %27
%61 = OpAccessChain %36 %15 %18
OpStore %61 %55
%62 = OpAccessChain %36 %15 %21
OpStore %62 %31
%63 = OpAccessChain %36 %15 %24
OpStore %63 %33
%64 = OpAccessChain %36 %15 %8
OpStore %64 %35
OpEmitVertex
OpEndPrimitive
%65 = OpAccessChain %36 %14 %18
OpStore %65 %19
%66 = OpAccessChain %36 %14 %21
OpStore %66 %22
%67 = OpAccessChain %36 %14 %24
OpStore %67 %25
%68 = OpAccessChain %36 %14 %8
OpStore %68 %27
%69 = OpAccessChain %36 %15 %18
OpStore %69 %55
%70 = OpAccessChain %36 %15 %21
OpStore %70 %31
%71 = OpAccessChain %36 %15 %24
OpStore %71 %33
%72 = OpAccessChain %36 %15 %8
OpStore %72 %35
OpEmitVertex
%73 = OpAccessChain %36 %14 %18
OpStore %73 %19
%74 = OpAccessChain %36 %14 %21
OpStore %74 %22
%75 = OpAccessChain %36 %14 %24
OpStore %75 %25
%76 = OpAccessChain %36 %14 %8
OpStore %76 %27
%77 = OpAccessChain %36 %15 %18
OpStore %77 %55
%78 = OpAccessChain %36 %15 %21
OpStore %78 %31
%79 = OpAccessChain %36 %15 %24
OpStore %79 %33
%80 = OpAccessChain %36 %15 %8
OpStore %80 %35
OpEmitVertex
%81 = OpAccessChain %36 %14 %18
OpStore %81 %19
%82 = OpAccessChain %36 %14 %21
OpStore %82 %22
%83 = OpAccessChain %36 %14 %24
OpStore %83 %25
%84 = OpAccessChain %36 %14 %8
OpStore %84 %27
%85 = OpAccessChain %36 %15 %18
OpStore %85 %55
%86 = OpAccessChain %36 %15 %21
OpStore %86 %31
%87 = OpAccessChain %36 %15 %24
OpStore %87 %33
%88 = OpAccessChain %36 %15 %8
OpStore %88 %35
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-input-triangleadj.geom
================================================
#version 460
layout(triangles_adjacency) in;
layout(max_vertices = 6, triangle_strip) out;

layout(location = 0) in vec4 TEXCOORD[6];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[5u].gl_Position.x;
    gl_Position.y = gl_in[5u].gl_Position.y;
    gl_Position.z = gl_in[5u].gl_Position.z;
    gl_Position.w = gl_in[5u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[5u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[5u].gl_Position.y;
    gl_Position.z = gl_in[5u].gl_Position.z;
    gl_Position.w = gl_in[5u].gl_Position.w;
    EmitVertex();
    float _57 = gl_in[5u].gl_Position.x + 0.0199999995529651641845703125;
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _57;
    gl_Position.y = gl_in[5u].gl_Position.y;
    gl_Position.z = gl_in[5u].gl_Position.z;
    gl_Position.w = gl_in[5u].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _57;
    gl_Position.y = gl_in[5u].gl_Position.y;
    gl_Position.z = gl_in[5u].gl_Position.z;
    gl_Position.w = gl_in[5u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _57;
    gl_Position.y = gl_in[5u].gl_Position.y;
    gl_Position.z = gl_in[5u].gl_Position.z;
    gl_Position.w = gl_in[5u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = _57;
    gl_Position.y = gl_in[5u].gl_Position.y;
    gl_Position.z = gl_in[5u].gl_Position.z;
    gl_Position.w = gl_in[5u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 InputTrianglesAdjacency
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 6
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%27 = OpConstant %7 3
%30 = OpConstant %7 5
%38 = OpTypePointer Output %5
%48 = OpConstant %5 0.00999999978
%58 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %91
%91 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %27
%28 = OpLoad %5 %26
%29 = OpAccessChain %16 %12 %30 %18
%31 = OpLoad %5 %29
%32 = OpAccessChain %16 %12 %30 %21
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %30 %24
%35 = OpLoad %5 %34
%36 = OpAccessChain %16 %12 %30 %27
%37 = OpLoad %5 %36
%39 = OpAccessChain %38 %14 %18
OpStore %39 %19
%40 = OpAccessChain %38 %14 %21
OpStore %40 %22
%41 = OpAccessChain %38 %14 %24
OpStore %41 %25
%42 = OpAccessChain %38 %14 %27
OpStore %42 %28
%43 = OpAccessChain %38 %15 %18
OpStore %43 %31
%44 = OpAccessChain %38 %15 %21
OpStore %44 %33
%45 = OpAccessChain %38 %15 %24
OpStore %45 %35
%46 = OpAccessChain %38 %15 %27
OpStore %46 %37
OpEmitVertex
%47 = OpFAdd %5 %31 %48
%49 = OpAccessChain %38 %14 %18
OpStore %49 %19
%50 = OpAccessChain %38 %14 %21
OpStore %50 %22
%51 = OpAccessChain %38 %14 %24
OpStore %51 %25
%52 = OpAccessChain %38 %14 %27
OpStore %52 %28
%53 = OpAccessChain %38 %15 %18
OpStore %53 %47
%54 = OpAccessChain %38 %15 %21
OpStore %54 %33
%55 = OpAccessChain %38 %15 %24
OpStore %55 %35
%56 = OpAccessChain %38 %15 %27
OpStore %56 %37
OpEmitVertex
%57 = OpFAdd %5 %31 %58
%59 = OpAccessChain %38 %14 %18
OpStore %59 %19
%60 = OpAccessChain %38 %14 %21
OpStore %60 %22
%61 = OpAccessChain %38 %14 %24
OpStore %61 %25
%62 = OpAccessChain %38 %14 %27
OpStore %62 %28
%63 = OpAccessChain %38 %15 %18
OpStore %63 %57
%64 = OpAccessChain %38 %15 %21
OpStore %64 %33
%65 = OpAccessChain %38 %15 %24
OpStore %65 %35
%66 = OpAccessChain %38 %15 %27
OpStore %66 %37
OpEmitVertex
OpEndPrimitive
%67 = OpAccessChain %38 %14 %18
OpStore %67 %19
%68 = OpAccessChain %38 %14 %21
OpStore %68 %22
%69 = OpAccessChain %38 %14 %24
OpStore %69 %25
%70 = OpAccessChain %38 %14 %27
OpStore %70 %28
%71 = OpAccessChain %38 %15 %18
OpStore %71 %57
%72 = OpAccessChain %38 %15 %21
OpStore %72 %33
%73 = OpAccessChain %38 %15 %24
OpStore %73 %35
%74 = OpAccessChain %38 %15 %27
OpStore %74 %37
OpEmitVertex
%75 = OpAccessChain %38 %14 %18
OpStore %75 %19
%76 = OpAccessChain %38 %14 %21
OpStore %76 %22
%77 = OpAccessChain %38 %14 %24
OpStore %77 %25
%78 = OpAccessChain %38 %14 %27
OpStore %78 %28
%79 = OpAccessChain %38 %15 %18
OpStore %79 %57
%80 = OpAccessChain %38 %15 %21
OpStore %80 %33
%81 = OpAccessChain %38 %15 %24
OpStore %81 %35
%82 = OpAccessChain %38 %15 %27
OpStore %82 %37
OpEmitVertex
%83 = OpAccessChain %38 %14 %18
OpStore %83 %19
%84 = OpAccessChain %38 %14 %21
OpStore %84 %22
%85 = OpAccessChain %38 %14 %24
OpStore %85 %25
%86 = OpAccessChain %38 %14 %27
OpStore %86 %28
%87 = OpAccessChain %38 %15 %18
OpStore %87 %57
%88 = OpAccessChain %38 %15 %21
OpStore %88 %33
%89 = OpAccessChain %38 %15 %24
OpStore %89 %35
%90 = OpAccessChain %38 %15 %27
OpStore %90 %37
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-instancing.geom
================================================
#version 460
layout(invocations = 2, triangles) in;
layout(max_vertices = 6, triangle_strip) out;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    uint _31 = gl_InvocationID ^ 1u;
    TEXCOORD_1.x = TEXCOORD[gl_InvocationID].x;
    TEXCOORD_1.y = TEXCOORD[gl_InvocationID].y;
    TEXCOORD_1.z = TEXCOORD[gl_InvocationID].z;
    TEXCOORD_1.w = TEXCOORD[gl_InvocationID].w;
    gl_Position.x = gl_in[_31].gl_Position.x;
    gl_Position.y = gl_in[_31].gl_Position.y;
    gl_Position.z = gl_in[_31].gl_Position.z;
    gl_Position.w = gl_in[_31].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[gl_InvocationID].x;
    TEXCOORD_1.y = TEXCOORD[gl_InvocationID].y;
    TEXCOORD_1.z = TEXCOORD[gl_InvocationID].z;
    TEXCOORD_1.w = TEXCOORD[gl_InvocationID].w;
    gl_Position.x = gl_in[_31].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[_31].gl_Position.y;
    gl_Position.z = gl_in[_31].gl_Position.z;
    gl_Position.w = gl_in[_31].gl_Position.w;
    EmitVertex();
    float _59 = gl_in[_31].gl_Position.x + 0.0199999995529651641845703125;
    TEXCOORD_1.x = TEXCOORD[gl_InvocationID].x;
    TEXCOORD_1.y = TEXCOORD[gl_InvocationID].y;
    TEXCOORD_1.z = TEXCOORD[gl_InvocationID].z;
    TEXCOORD_1.w = TEXCOORD[gl_InvocationID].w;
    gl_Position.x = _59;
    gl_Position.y = gl_in[_31].gl_Position.y;
    gl_Position.z = gl_in[_31].gl_Position.z;
    gl_Position.w = gl_in[_31].gl_Position.w;
    EmitVertex();
    EndPrimitive();
    TEXCOORD_1.x = TEXCOORD[gl_InvocationID].x;
    TEXCOORD_1.y = TEXCOORD[gl_InvocationID].y;
    TEXCOORD_1.z = TEXCOORD[gl_InvocationID].z;
    TEXCOORD_1.w = TEXCOORD[gl_InvocationID].w;
    gl_Position.x = _59;
    gl_Position.y = gl_in[_31].gl_Position.y;
    gl_Position.z = gl_in[_31].gl_Position.z;
    gl_Position.w = gl_in[_31].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[gl_InvocationID].x;
    TEXCOORD_1.y = TEXCOORD[gl_InvocationID].y;
    TEXCOORD_1.z = TEXCOORD[gl_InvocationID].z;
    TEXCOORD_1.w = TEXCOORD[gl_InvocationID].w;
    gl_Position.x = _59;
    gl_Position.y = gl_in[_31].gl_Position.y;
    gl_Position.z = gl_in[_31].gl_Position.z;
    gl_Position.w = gl_in[_31].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[gl_InvocationID].x;
    TEXCOORD_1.y = TEXCOORD[gl_InvocationID].y;
    TEXCOORD_1.z = TEXCOORD[gl_InvocationID].z;
    TEXCOORD_1.w = TEXCOORD[gl_InvocationID].w;
    gl_Position.x = _59;
    gl_Position.y = gl_in[_31].gl_Position.y;
    gl_Position.z = gl_in[_31].gl_Position.z;
    gl_Position.w = gl_in[_31].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %17
OpExecutionMode %3 Invocations 2
OpExecutionMode %3 OutputVertices 6
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputTriangleStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %17 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %7
%17 = OpVariable %16 Input
%19 = OpTypePointer Input %5
%21 = OpConstant %7 0
%24 = OpConstant %7 1
%27 = OpConstant %7 2
%40 = OpTypePointer Output %5
%50 = OpConstant %5 0.00999999978
%60 = OpConstant %5 0.0199999996
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %93
%93 = OpLabel
%18 = OpLoad %7 %17
%20 = OpAccessChain %19 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %19 %11 %18 %27
%28 = OpLoad %5 %26
%29 = OpAccessChain %19 %11 %18 %8
%30 = OpLoad %5 %29
%31 = OpBitwiseXor %7 %18 %24
%32 = OpAccessChain %19 %12 %31 %21
%33 = OpLoad %5 %32
%34 = OpAccessChain %19 %12 %31 %24
%35 = OpLoad %5 %34
%36 = OpAccessChain %19 %12 %31 %27
%37 = OpLoad %5 %36
%38 = OpAccessChain %19 %12 %31 %8
%39 = OpLoad %5 %38
%41 = OpAccessChain %40 %14 %21
OpStore %41 %22
%42 = OpAccessChain %40 %14 %24
OpStore %42 %25
%43 = OpAccessChain %40 %14 %27
OpStore %43 %28
%44 = OpAccessChain %40 %14 %8
OpStore %44 %30
%45 = OpAccessChain %40 %15 %21
OpStore %45 %33
%46 = OpAccessChain %40 %15 %24
OpStore %46 %35
%47 = OpAccessChain %40 %15 %27
OpStore %47 %37
%48 = OpAccessChain %40 %15 %8
OpStore %48 %39
OpEmitVertex
%49 = OpFAdd %5 %33 %50
%51 = OpAccessChain %40 %14 %21
OpStore %51 %22
%52 = OpAccessChain %40 %14 %24
OpStore %52 %25
%53 = OpAccessChain %40 %14 %27
OpStore %53 %28
%54 = OpAccessChain %40 %14 %8
OpStore %54 %30
%55 = OpAccessChain %40 %15 %21
OpStore %55 %49
%56 = OpAccessChain %40 %15 %24
OpStore %56 %35
%57 = OpAccessChain %40 %15 %27
OpStore %57 %37
%58 = OpAccessChain %40 %15 %8
OpStore %58 %39
OpEmitVertex
%59 = OpFAdd %5 %33 %60
%61 = OpAccessChain %40 %14 %21
OpStore %61 %22
%62 = OpAccessChain %40 %14 %24
OpStore %62 %25
%63 = OpAccessChain %40 %14 %27
OpStore %63 %28
%64 = OpAccessChain %40 %14 %8
OpStore %64 %30
%65 = OpAccessChain %40 %15 %21
OpStore %65 %59
%66 = OpAccessChain %40 %15 %24
OpStore %66 %35
%67 = OpAccessChain %40 %15 %27
OpStore %67 %37
%68 = OpAccessChain %40 %15 %8
OpStore %68 %39
OpEmitVertex
OpEndPrimitive
%69 = OpAccessChain %40 %14 %21
OpStore %69 %22
%70 = OpAccessChain %40 %14 %24
OpStore %70 %25
%71 = OpAccessChain %40 %14 %27
OpStore %71 %28
%72 = OpAccessChain %40 %14 %8
OpStore %72 %30
%73 = OpAccessChain %40 %15 %21
OpStore %73 %59
%74 = OpAccessChain %40 %15 %24
OpStore %74 %35
%75 = OpAccessChain %40 %15 %27
OpStore %75 %37
%76 = OpAccessChain %40 %15 %8
OpStore %76 %39
OpEmitVertex
%77 = OpAccessChain %40 %14 %21
OpStore %77 %22
%78 = OpAccessChain %40 %14 %24
OpStore %78 %25
%79 = OpAccessChain %40 %14 %27
OpStore %79 %28
%80 = OpAccessChain %40 %14 %8
OpStore %80 %30
%81 = OpAccessChain %40 %15 %21
OpStore %81 %59
%82 = OpAccessChain %40 %15 %24
OpStore %82 %35
%83 = OpAccessChain %40 %15 %27
OpStore %83 %37
%84 = OpAccessChain %40 %15 %8
OpStore %84 %39
OpEmitVertex
%85 = OpAccessChain %40 %14 %21
OpStore %85 %22
%86 = OpAccessChain %40 %14 %24
OpStore %86 %25
%87 = OpAccessChain %40 %14 %27
OpStore %87 %28
%88 = OpAccessChain %40 %14 %8
OpStore %88 %30
%89 = OpAccessChain %40 %15 %21
OpStore %89 %59
%90 = OpAccessChain %40 %15 %24
OpStore %90 %35
%91 = OpAccessChain %40 %15 %27
OpStore %91 %37
%92 = OpAccessChain %40 %15 %8
OpStore %92 %39
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-output-line.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 2, line_strip) out;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x + 0.00999999977648258209228515625;
    gl_Position.y = gl_in[1u].gl_Position.y + 0.00999999977648258209228515625;
    gl_Position.z = gl_in[1u].gl_Position.z + 0.00999999977648258209228515625;
    gl_Position.w = gl_in[1u].gl_Position.w + 0.00999999977648258209228515625;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 2
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputLineStrip
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%36 = OpTypePointer Output %5
%46 = OpConstant %5 0.00999999978
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %58
%58 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %8
%27 = OpLoad %5 %26
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %24
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %8
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %24
OpStore %39 %25
%40 = OpAccessChain %36 %14 %8
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %24
OpStore %43 %33
%44 = OpAccessChain %36 %15 %8
OpStore %44 %35
OpEmitVertex
%45 = OpFAdd %5 %29 %46
%47 = OpFAdd %5 %31 %46
%48 = OpFAdd %5 %33 %46
%49 = OpFAdd %5 %35 %46
%50 = OpAccessChain %36 %14 %18
OpStore %50 %19
%51 = OpAccessChain %36 %14 %21
OpStore %51 %22
%52 = OpAccessChain %36 %14 %24
OpStore %52 %25
%53 = OpAccessChain %36 %14 %8
OpStore %53 %27
%54 = OpAccessChain %36 %15 %18
OpStore %54 %45
%55 = OpAccessChain %36 %15 %21
OpStore %55 %47
%56 = OpAccessChain %36 %15 %24
OpStore %56 %48
%57 = OpAccessChain %36 %15 %8
OpStore %57 %49
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-output-point.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 1, points) out;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 1
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%36 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %45
%45 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %8
%27 = OpLoad %5 %26
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %24
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %8
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %24
OpStore %39 %25
%40 = OpAccessChain %36 %14 %8
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %24
OpStore %43 %33
%44 = OpAccessChain %36 %15 %8
OpStore %44 %35
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/geometry-streams.geom
================================================
#version 460
layout(points) in;
layout(max_vertices = 3, points) out;

layout(location = 0) in vec4 TEXCOORD[1];
layout(location = 0) out vec4 A;
layout(location = 2, stream = 1) out vec4 B;
layout(location = 3, stream = 1) out vec4 D[2];
layout(location = 5, stream = 2) out vec4 C;
layout(location = 6, stream = 2) out vec4 E;

void main()
{
    A.x = TEXCOORD[0u].x;
    A.y = TEXCOORD[0u].y;
    A.z = TEXCOORD[0u].z;
    A.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[0u].gl_Position.x;
    gl_Position.y = gl_in[0u].gl_Position.y;
    gl_Position.z = gl_in[0u].gl_Position.z;
    gl_Position.w = gl_in[0u].gl_Position.w;
    EmitStreamVertex(int(0u));
    B.x = TEXCOORD[0u].x + 1.0;
    B.y = TEXCOORD[0u].y + 1.0;
    B.z = TEXCOORD[0u].z + 1.0;
    B.w = TEXCOORD[0u].w + 1.0;
    D[0u].x = 1.0;
    D[0u].y = 1.0;
    D[0u].z = 1.0;
    D[0u].w = 1.0;
    D[1u].x = 3.0;
    D[1u].y = 3.0;
    D[1u].z = 3.0;
    D[1u].w = 3.0;
    EmitStreamVertex(int(1u));
    C.x = TEXCOORD[0u].x + 2.0;
    C.y = TEXCOORD[0u].y + 2.0;
    C.z = TEXCOORD[0u].z + 2.0;
    C.w = TEXCOORD[0u].w + 2.0;
    E.x = TEXCOORD[0u].x + 3.0;
    E.y = TEXCOORD[0u].y + 3.0;
    E.z = TEXCOORD[0u].z + 3.0;
    E.w = TEXCOORD[0u].w + 3.0;
    EmitStreamVertex(int(2u));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 88
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GeometryStreams
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %16 %20 %21 %22
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 3
OpExecutionMode %3 InputPoints
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "A"
OpName %15 "SV_Position"
OpName %16 "B"
OpName %20 "D"
OpName %21 "C"
OpName %22 "E"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %16 Stream 1
OpDecorate %16 Location 2
OpDecorate %20 Stream 1
OpDecorate %20 Location 3
OpDecorate %21 Stream 2
OpDecorate %21 Location 5
OpDecorate %22 Stream 2
OpDecorate %22 Location 6
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 1
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpVariable %13 Output
%17 = OpConstant %7 2
%18 = OpTypeArray %6 %17
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpVariable %13 Output
%22 = OpVariable %13 Output
%23 = OpTypePointer Input %5
%25 = OpConstant %7 0
%32 = OpConstant %7 3
%42 = OpTypePointer Output %5
%52 = OpConstant %5 1
%65 = OpConstant %5 3
%70 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %86
%86 = OpLabel
%24 = OpAccessChain %23 %11 %25 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %23 %11 %25 %8
%28 = OpLoad %5 %27
%29 = OpAccessChain %23 %11 %25 %17
%30 = OpLoad %5 %29
%31 = OpAccessChain %23 %11 %25 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %23 %12 %25 %25
%35 = OpLoad %5 %34
%36 = OpAccessChain %23 %12 %25 %8
%37 = OpLoad %5 %36
%38 = OpAccessChain %23 %12 %25 %17
%39 = OpLoad %5 %38
%40 = OpAccessChain %23 %12 %25 %32
%41 = OpLoad %5 %40
%43 = OpAccessChain %42 %14 %25
OpStore %43 %26
%44 = OpAccessChain %42 %14 %8
OpStore %44 %28
%45 = OpAccessChain %42 %14 %17
OpStore %45 %30
%46 = OpAccessChain %42 %14 %32
OpStore %46 %33
%47 = OpAccessChain %42 %15 %25
OpStore %47 %35
%48 = OpAccessChain %42 %15 %8
OpStore %48 %37
%49 = OpAccessChain %42 %15 %17
OpStore %49 %39
%50 = OpAccessChain %42 %15 %32
OpStore %50 %41
OpEmitStreamVertex %25
%51 = OpFAdd %5 %26 %52
%53 = OpFAdd %5 %28 %52
%54 = OpFAdd %5 %30 %52
%55 = OpFAdd %5 %33 %52
%56 = OpAccessChain %42 %16 %25
OpStore %56 %51
%57 = OpAccessChain %42 %16 %8
OpStore %57 %53
%58 = OpAccessChain %42 %16 %17
OpStore %58 %54
%59 = OpAccessChain %42 %16 %32
OpStore %59 %55
%60 = OpAccessChain %42 %20 %25 %25
OpStore %60 %52
%61 = OpAccessChain %42 %20 %25 %8
OpStore %61 %52
%62 = OpAccessChain %42 %20 %25 %17
OpStore %62 %52
%63 = OpAccessChain %42 %20 %25 %32
OpStore %63 %52
%64 = OpAccessChain %42 %20 %8 %25
OpStore %64 %65
%66 = OpAccessChain %42 %20 %8 %8
OpStore %66 %65
%67 = OpAccessChain %42 %20 %8 %17
OpStore %67 %65
%68 = OpAccessChain %42 %20 %8 %32
OpStore %68 %65
OpEmitStreamVertex %8
%69 = OpFAdd %5 %26 %70
%71 = OpFAdd %5 %28 %70
%72 = OpFAdd %5 %30 %70
%73 = OpFAdd %5 %33 %70
%74 = OpFAdd %5 %26 %65
%75 = OpFAdd %5 %28 %65
%76 = OpFAdd %5 %30 %65
%77 = OpFAdd %5 %33 %65
%78 = OpAccessChain %42 %21 %25
OpStore %78 %69
%79 = OpAccessChain %42 %21 %8
OpStore %79 %71
%80 = OpAccessChain %42 %21 %17
OpStore %80 %72
%81 = OpAccessChain %42 %21 %32
OpStore %81 %73
%82 = OpAccessChain %42 %22 %25
OpStore %82 %74
%83 = OpAccessChain %42 %22 %8
OpStore %83 %75
%84 = OpAccessChain %42 %22 %17
OpStore %84 %76
%85 = OpAccessChain %42 %22 %32
OpStore %85 %77
OpEmitStreamVertex %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/hull-arrays.tesc
================================================
#version 460
layout(vertices = 2) out;

layout(location = 0) in float VSValue[][3];
layout(location = 0) out float HSValue[2][3];
layout(location = 3) patch out float PATCH[2];

void hull_main()
{
    float _39 = (VSValue[1u][1u] + VSValue[0u][0u]) + VSValue[2u][2u];
    HSValue[gl_InvocationID][0u] = _39;
    HSValue[gl_InvocationID][1u] = _39 + 1.0;
    HSValue[gl_InvocationID][2u] = _39 + 2.0;
}

void patch_main()
{
    gl_TessLevelOuter[0u] = VSValue[0u][2u];
    gl_TessLevelOuter[1u] = VSValue[1u][1u];
    gl_TessLevelOuter[2u] = VSValue[2u][0u];
    gl_TessLevelInner[0u] = HSValue[1u][2u] + HSValue[0u][1u];
    PATCH[0u] = VSValue[3u][2u];
    PATCH[1u] = VSValue[4u][1u];
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %12 %16 %20 %23 %24 %47
OpExecutionMode %3 Triangles
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCcw
OpExecutionMode %3 OutputVertices 2
OpName %3 "main"
OpName %12 "VSValue"
OpName %16 "HSValue"
OpName %20 "SV_TessFactor"
OpName %23 "SV_InsideTessFactor"
OpName %24 "PATCH"
OpName %25 "hull_main"
OpName %27 "patch_main"
OpDecorate %12 Location 0
OpDecorate %16 Location 0
OpDecorate %20 BuiltIn TessLevelOuter
OpDecorate %20 Patch
OpDecorate %23 BuiltIn TessLevelInner
OpDecorate %23 Patch
OpDecorate %24 Location 3
OpDecorate %24 Patch
OpDecorate %47 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpConstant %6 32
%10 = OpTypeArray %8 %9
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpConstant %6 2
%14 = OpTypeArray %8 %13
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpConstant %6 4
%18 = OpTypeArray %5 %17
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpTypeArray %5 %13
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpVariable %22 Output
%29 = OpTypePointer Input %5
%31 = OpConstant %6 0
%34 = OpConstant %6 1
%41 = OpConstant %5 1
%43 = OpConstant %5 2
%44 = OpTypePointer Output %5
%46 = OpTypePointer Input %6
%47 = OpVariable %46 Input
%76 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %79
%79 = OpLabel
%74 = OpFunctionCall %1 %25
%75 = OpLoad %6 %47
%77 = OpIEqual %76 %75 %31
OpControlBarrier %13 %17 %31
OpSelectionMerge %81 None
OpBranchConditional %77 %80 %81
%80 = OpLabel
%78 = OpFunctionCall %1 %27
OpBranch %81
%81 = OpLabel
OpReturn
OpFunctionEnd
%25 = OpFunction %1 None %2
%26 = OpLabel
OpBranch %83
%83 = OpLabel
%30 = OpAccessChain %29 %12 %31 %31
%32 = OpLoad %5 %30
%33 = OpAccessChain %29 %12 %34 %34
%35 = OpLoad %5 %33
%36 = OpFAdd %5 %35 %32
%37 = OpAccessChain %29 %12 %13 %13
%38 = OpLoad %5 %37
%39 = OpFAdd %5 %36 %38
%40 = OpFAdd %5 %39 %41
%42 = OpFAdd %5 %39 %43
%48 = OpLoad %6 %47
%45 = OpAccessChain %44 %16 %48 %31
OpStore %45 %39
%50 = OpLoad %6 %47
%49 = OpAccessChain %44 %16 %50 %34
OpStore %49 %40
%52 = OpLoad %6 %47
%51 = OpAccessChain %44 %16 %52 %13
OpStore %51 %42
OpReturn
OpFunctionEnd
%27 = OpFunction %1 None %2
%28 = OpLabel
OpBranch %85
%85 = OpLabel
%53 = OpAccessChain %44 %16 %31 %34
%54 = OpLoad %5 %53
%55 = OpAccessChain %44 %16 %34 %13
%56 = OpLoad %5 %55
%57 = OpFAdd %5 %56 %54
%58 = OpAccessChain %29 %12 %31 %13
%59 = OpLoad %5 %58
%60 = OpAccessChain %29 %12 %34 %34
%61 = OpLoad %5 %60
%62 = OpAccessChain %29 %12 %13 %31
%63 = OpLoad %5 %62
%64 = OpAccessChain %29 %12 %7 %13
%65 = OpLoad %5 %64
%66 = OpAccessChain %29 %12 %17 %34
%67 = OpLoad %5 %66
%68 = OpAccessChain %44 %20 %31
OpStore %68 %59
%69 = OpAccessChain %44 %20 %34
OpStore %69 %61
%70 = OpAccessChain %44 %20 %13
OpStore %70 %63
%71 = OpAccessChain %44 %23 %31
OpStore %71 %57
%72 = OpAccessChain %44 %24 %31
OpStore %72 %65
%73 = OpAccessChain %44 %24 %34
OpStore %73 %67
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/hull-clip-cull.tesc
================================================
#version 460
layout(vertices = 4) out;

layout(location = 0) out vec2 HSValue[4];
layout(location = 1) out vec2 SV_ClipDistance[4];
layout(location = 1, component = 2) out float SV_CullDistance[4][2];

void hull_main()
{
    float _46 = gl_in[gl_InvocationID].gl_CullDistance[1u] + gl_in[gl_InvocationID].gl_CullDistance[0u];
    HSValue[gl_InvocationID].x = _46 + gl_in[gl_InvocationID].gl_ClipDistance[0u];
    HSValue[gl_InvocationID].y = _46 + gl_in[gl_InvocationID].gl_ClipDistance[1u];
    SV_ClipDistance[gl_InvocationID].x = 1.0;
    SV_ClipDistance[gl_InvocationID].y = 2.0;
    SV_CullDistance[gl_InvocationID][0u] = 3.0;
    SV_CullDistance[gl_InvocationID][1u] = 4.0;
}

void patch_main()
{
    gl_TessLevelOuter[0u] = 0.0;
    gl_TessLevelOuter[1u] = 0.0;
    gl_TessLevelOuter[2u] = 0.0;
    gl_TessLevelOuter[3u] = 0.0;
    gl_TessLevelInner[0u] = 0.0;
    gl_TessLevelInner[1u] = 0.0;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpCapability ClipDistance
OpCapability CullDistance
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %13 %14 %18 %19 %22 %25 %27 %33
OpExecutionMode %3 Quads
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCw
OpExecutionMode %3 OutputVertices 4
OpName %3 "main"
OpName %18 "HSValue"
OpName %19 "SV_ClipDistance"
OpName %22 "SV_CullDistance"
OpName %25 "SV_TessFactor"
OpName %27 "SV_InsideTessFactor"
OpName %28 "hull_main"
OpName %30 "patch_main"
OpDecorate %13 BuiltIn ClipDistance
OpDecorate %14 BuiltIn CullDistance
OpDecorate %18 Location 0
OpDecorate %19 Location 1
OpDecorate %22 Location 1
OpDecorate %22 Component 2
OpDecorate %25 BuiltIn TessLevelOuter
OpDecorate %25 Patch
OpDecorate %27 BuiltIn TessLevelInner
OpDecorate %27 Patch
OpDecorate %33 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypeInt 32 0
%8 = OpConstant %7 2
%9 = OpTypeArray %5 %8
%10 = OpConstant %7 32
%11 = OpTypeArray %9 %10
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpVariable %12 Input
%15 = OpConstant %7 4
%16 = OpTypeArray %6 %15
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpVariable %17 Output
%20 = OpTypeArray %9 %15
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%23 = OpTypeArray %5 %15
%24 = OpTypePointer Output %23
%25 = OpVariable %24 Output
%26 = OpTypePointer Output %9
%27 = OpVariable %26 Output
%32 = OpTypePointer Input %7
%33 = OpVariable %32 Input
%35 = OpTypePointer Input %5
%37 = OpConstant %7 0
%40 = OpConstant %7 1
%49 = OpTypePointer Output %5
%56 = OpConstant %5 1
%59 = OpConstant %5 2
%62 = OpConstant %5 3
%65 = OpConstant %5 4
%67 = OpConstant %5 0
%71 = OpConstant %7 3
%76 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %79
%79 = OpLabel
%74 = OpFunctionCall %1 %28
%75 = OpLoad %7 %33
%77 = OpIEqual %76 %75 %37
OpControlBarrier %8 %15 %37
OpSelectionMerge %81 None
OpBranchConditional %77 %80 %81
%80 = OpLabel
%78 = OpFunctionCall %1 %30
OpBranch %81
%81 = OpLabel
OpReturn
OpFunctionEnd
%28 = OpFunction %1 None %2
%29 = OpLabel
OpBranch %83
%83 = OpLabel
%34 = OpLoad %7 %33
%36 = OpAccessChain %35 %13 %34 %37
%38 = OpLoad %5 %36
%39 = OpAccessChain %35 %13 %34 %40
%41 = OpLoad %5 %39
%42 = OpAccessChain %35 %14 %34 %37
%43 = OpLoad %5 %42
%44 = OpAccessChain %35 %14 %34 %40
%45 = OpLoad %5 %44
%46 = OpFAdd %5 %45 %43
%47 = OpFAdd %5 %46 %38
%48 = OpFAdd %5 %46 %41
%51 = OpLoad %7 %33
%50 = OpAccessChain %49 %18 %51 %37
OpStore %50 %47
%53 = OpLoad %7 %33
%52 = OpAccessChain %49 %18 %53 %40
OpStore %52 %48
%55 = OpLoad %7 %33
%54 = OpAccessChain %49 %19 %55 %37
OpStore %54 %56
%58 = OpLoad %7 %33
%57 = OpAccessChain %49 %19 %58 %40
OpStore %57 %59
%61 = OpLoad %7 %33
%60 = OpAccessChain %49 %22 %61 %37
OpStore %60 %62
%64 = OpLoad %7 %33
%63 = OpAccessChain %49 %22 %64 %40
OpStore %63 %65
OpReturn
OpFunctionEnd
%30 = OpFunction %1 None %2
%31 = OpLabel
OpBranch %85
%85 = OpLabel
%66 = OpAccessChain %49 %25 %37
OpStore %66 %67
%68 = OpAccessChain %49 %25 %40
OpStore %68 %67
%69 = OpAccessChain %49 %25 %8
OpStore %69 %67
%70 = OpAccessChain %49 %25 %71
OpStore %70 %67
%72 = OpAccessChain %49 %27 %37
OpStore %72 %67
%73 = OpAccessChain %49 %27 %40
OpStore %73 %67
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/hull-patch-output-integer-io.tesc
================================================
#version 460
layout(vertices = 3) out;

layout(location = 0) in int A[];
layout(location = 0, component = 1) in uint B[];
layout(location = 0) out float A_1[3];
layout(location = 1) patch out int B_1;
layout(location = 1, component = 1) patch out uint C;
layout(location = 5) patch out ivec4 BV;
layout(location = 6) patch out uvec4 CV;
layout(location = 1, component = 2) patch out int BVS[2];
layout(location = 2) patch out uint CVS[2];
layout(location = 7) patch out ivec4 BVSS[2];
layout(location = 9) patch out uvec4 CVSS[2];

void hull_main()
{
    A_1[gl_InvocationID] = float(B[1u]) + float(int(uint(A[0u])));
}

void patch_main()
{
    gl_TessLevelOuter[0u] = float(int(uint(A[0u])));
    gl_TessLevelOuter[1u] = float(B[1u]);
    gl_TessLevelOuter[2u] = 4.0;
    gl_TessLevelInner[0u] = A_1[1u] + A_1[0u];
    B_1 = int(10u);
    C = 20u;
    BV.x = int(1u);
    BV.y = int(2u);
    BV.z = int(3u);
    BV.w = int(4u);
    CV.x = 5u;
    CV.y = 6u;
    CV.z = 7u;
    CV.w = 8u;
    BVS[0u] = int(50u);
    BVS[1u] = int(60u);
    CVS[0u] = 70u;
    CVS[1u] = 80u;
    BVSS[0u].x = int(50u);
    BVSS[0u].y = int(60u);
    BVSS[0u].z = int(70u);
    BVSS[0u].w = int(80u);
    BVSS[1u].x = int(51u);
    BVSS[1u].y = int(52u);
    BVSS[1u].z = int(53u);
    BVSS[1u].w = int(54u);
    CVSS[0u].x = 70u;
    CVSS[0u].y = 71u;
    CVSS[0u].z = 72u;
    CVSS[0u].w = 73u;
    CVSS[1u].x = 80u;
    CVSS[1u].y = 81u;
    CVSS[1u].z = 82u;
    CVSS[1u].w = 83u;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 162
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %10 %13 %18 %22 %26 %28 %30 %33 %36 %39 %42 %45 %48 %67
OpExecutionMode %3 Triangles
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCcw
OpExecutionMode %3 OutputVertices 3
OpName %3 "main"
OpName %10 "A"
OpName %13 "B"
OpName %18 "A"
OpName %22 "SV_TessFactor"
OpName %26 "SV_InsideTessFactor"
OpName %28 "B"
OpName %30 "C"
OpName %33 "BV"
OpName %36 "CV"
OpName %39 "BVS"
OpName %42 "CVS"
OpName %45 "BVSS"
OpName %48 "CVSS"
OpName %49 "hull_main"
OpName %51 "patch_main"
OpDecorate %10 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %18 Location 0
OpDecorate %22 BuiltIn TessLevelOuter
OpDecorate %22 Patch
OpDecorate %26 BuiltIn TessLevelInner
OpDecorate %26 Patch
OpDecorate %28 Location 1
OpDecorate %28 Patch
OpDecorate %30 Location 1
OpDecorate %30 Component 1
OpDecorate %30 Patch
OpDecorate %33 Location 5
OpDecorate %33 Patch
OpDecorate %36 Location 6
OpDecorate %36 Patch
OpDecorate %39 Location 1
OpDecorate %39 Component 2
OpDecorate %39 Patch
OpDecorate %42 Location 2
OpDecorate %42 Patch
OpDecorate %45 Location 7
OpDecorate %45 Patch
OpDecorate %48 Location 9
OpDecorate %48 Patch
OpDecorate %67 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeInt 32 0
%7 = OpConstant %6 32
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpTypeArray %6 %7
%12 = OpTypePointer Input %11
%13 = OpVariable %12 Input
%14 = OpTypeFloat 32
%15 = OpConstant %6 3
%16 = OpTypeArray %14 %15
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpConstant %6 4
%20 = OpTypeArray %14 %19
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%23 = OpConstant %6 2
%24 = OpTypeArray %14 %23
%25 = OpTypePointer Output %24
%26 = OpVariable %25 Output
%27 = OpTypePointer Output %5
%28 = OpVariable %27 Output
%29 = OpTypePointer Output %6
%30 = OpVariable %29 Output
%31 = OpTypeVector %5 4
%32 = OpTypePointer Output %31
%33 = OpVariable %32 Output
%34 = OpTypeVector %6 4
%35 = OpTypePointer Output %34
%36 = OpVariable %35 Output
%37 = OpTypeArray %5 %23
%38 = OpTypePointer Output %37
%39 = OpVariable %38 Output
%40 = OpTypeArray %6 %23
%41 = OpTypePointer Output %40
%42 = OpVariable %41 Output
%43 = OpTypeArray %31 %23
%44 = OpTypePointer Output %43
%45 = OpVariable %44 Output
%46 = OpTypeArray %34 %23
%47 = OpTypePointer Output %46
%48 = OpVariable %47 Output
%53 = OpTypePointer Input %5
%55 = OpConstant %6 0
%59 = OpTypePointer Input %6
%61 = OpConstant %6 1
%65 = OpTypePointer Output %14
%67 = OpVariable %59 Input
%84 = OpConstant %14 4
%86 = OpConstant %6 10
%88 = OpConstant %6 20
%98 = OpConstant %6 5
%100 = OpConstant %6 6
%102 = OpConstant %6 7
%104 = OpConstant %6 8
%106 = OpConstant %6 50
%109 = OpConstant %6 60
%112 = OpConstant %6 70
%114 = OpConstant %6 80
%124 = OpConstant %6 51
%127 = OpConstant %6 52
%130 = OpConstant %6 53
%133 = OpConstant %6 54
%137 = OpConstant %6 71
%139 = OpConstant %6 72
%141 = OpConstant %6 73
%144 = OpConstant %6 81
%146 = OpConstant %6 82
%148 = OpConstant %6 83
%151 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %154
%154 = OpLabel
%149 = OpFunctionCall %1 %49
%150 = OpLoad %6 %67
%152 = OpIEqual %151 %150 %55
OpControlBarrier %23 %19 %55
OpSelectionMerge %156 None
OpBranchConditional %152 %155 %156
%155 = OpLabel
%153 = OpFunctionCall %1 %51
OpBranch %156
%156 = OpLabel
OpReturn
OpFunctionEnd
%49 = OpFunction %1 None %2
%50 = OpLabel
OpBranch %158
%158 = OpLabel
%54 = OpAccessChain %53 %10 %55
%56 = OpLoad %5 %54
%57 = OpBitcast %6 %56
%58 = OpConvertSToF %14 %57
%60 = OpAccessChain %59 %13 %61
%62 = OpLoad %6 %60
%63 = OpConvertUToF %14 %62
%64 = OpFAdd %14 %63 %58
%68 = OpLoad %6 %67
%66 = OpAccessChain %65 %18 %68
OpStore %66 %64
OpReturn
OpFunctionEnd
%51 = OpFunction %1 None %2
%52 = OpLabel
OpBranch %160
%160 = OpLabel
%69 = OpAccessChain %65 %18 %55
%70 = OpLoad %14 %69
%71 = OpAccessChain %65 %18 %61
%72 = OpLoad %14 %71
%73 = OpFAdd %14 %72 %70
%74 = OpAccessChain %53 %10 %55
%75 = OpLoad %5 %74
%76 = OpBitcast %6 %75
%77 = OpConvertSToF %14 %76
%78 = OpAccessChain %59 %13 %61
%79 = OpLoad %6 %78
%80 = OpConvertUToF %14 %79
%81 = OpAccessChain %65 %22 %55
OpStore %81 %77
%82 = OpAccessChain %65 %22 %61
OpStore %82 %80
%83 = OpAccessChain %65 %22 %23
OpStore %83 %84
%85 = OpAccessChain %65 %26 %55
OpStore %85 %73
%87 = OpBitcast %5 %86
OpStore %28 %87
OpStore %30 %88
%89 = OpAccessChain %27 %33 %55
%90 = OpBitcast %5 %61
OpStore %89 %90
%91 = OpAccessChain %27 %33 %61
%92 = OpBitcast %5 %23
OpStore %91 %92
%93 = OpAccessChain %27 %33 %23
%94 = OpBitcast %5 %15
OpStore %93 %94
%95 = OpAccessChain %27 %33 %15
%96 = OpBitcast %5 %19
OpStore %95 %96
%97 = OpAccessChain %29 %36 %55
OpStore %97 %98
%99 = OpAccessChain %29 %36 %61
OpStore %99 %100
%101 = OpAccessChain %29 %36 %23
OpStore %101 %102
%103 = OpAccessChain %29 %36 %15
OpStore %103 %104
%105 = OpAccessChain %27 %39 %55
%107 = OpBitcast %5 %106
OpStore %105 %107
%108 = OpAccessChain %27 %39 %61
%110 = OpBitcast %5 %109
OpStore %108 %110
%111 = OpAccessChain %29 %42 %55
OpStore %111 %112
%113 = OpAccessChain %29 %42 %61
OpStore %113 %114
%115 = OpAccessChain %27 %45 %55 %55
%116 = OpBitcast %5 %106
OpStore %115 %116
%117 = OpAccessChain %27 %45 %55 %61
%118 = OpBitcast %5 %109
OpStore %117 %118
%119 = OpAccessChain %27 %45 %55 %23
%120 = OpBitcast %5 %112
OpStore %119 %120
%121 = OpAccessChain %27 %45 %55 %15
%122 = OpBitcast %5 %114
OpStore %121 %122
%123 = OpAccessChain %27 %45 %61 %55
%125 = OpBitcast %5 %124
OpStore %123 %125
%126 = OpAccessChain %27 %45 %61 %61
%128 = OpBitcast %5 %127
OpStore %126 %128
%129 = OpAccessChain %27 %45 %61 %23
%131 = OpBitcast %5 %130
OpStore %129 %131
%132 = OpAccessChain %27 %45 %61 %15
%134 = OpBitcast %5 %133
OpStore %132 %134
%135 = OpAccessChain %29 %48 %55 %55
OpStore %135 %112
%136 = OpAccessChain %29 %48 %55 %61
OpStore %136 %137
%138 = OpAccessChain %29 %48 %55 %23
OpStore %138 %139
%140 = OpAccessChain %29 %48 %55 %15
OpStore %140 %141
%142 = OpAccessChain %29 %48 %61 %55
OpStore %142 %114
%143 = OpAccessChain %29 %48 %61 %61
OpStore %143 %144
%145 = OpAccessChain %29 %48 %61 %23
OpStore %145 %146
%147 = OpAccessChain %29 %48 %61 %15
OpStore %147 %148
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/hull-single-cp.tesc
================================================
#version 460
layout(vertices = 1) out;

layout(location = 0) in float VSValue[];
layout(location = 0) out float HSValue[1];
layout(location = 1) patch out float PATCH;

void hull_main()
{
    HSValue[gl_InvocationID] = (VSValue[1u] + VSValue[0u]) + VSValue[2u];
}

void patch_main()
{
    gl_TessLevelOuter[0u] = VSValue[0u];
    gl_TessLevelOuter[1u] = VSValue[1u];
    gl_TessLevelOuter[2u] = VSValue[2u];
    gl_TessLevelInner[0u] = HSValue[0u];
    PATCH = VSValue[3u];
}

void main()
{
    hull_main();
    patch_main();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %10 %14 %18 %22 %24 %41
OpExecutionMode %3 Triangles
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCcw
OpExecutionMode %3 OutputVertices 1
OpName %3 "main"
OpName %10 "VSValue"
OpName %14 "HSValue"
OpName %18 "SV_TessFactor"
OpName %22 "SV_InsideTessFactor"
OpName %24 "PATCH"
OpName %25 "hull_main"
OpName %27 "patch_main"
OpDecorate %10 Location 0
OpDecorate %14 Location 0
OpDecorate %18 BuiltIn TessLevelOuter
OpDecorate %18 Patch
OpDecorate %22 BuiltIn TessLevelInner
OpDecorate %22 Patch
OpDecorate %24 Location 1
OpDecorate %24 Patch
OpDecorate %41 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 32
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpConstant %6 1
%12 = OpTypeArray %5 %11
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %6 4
%16 = OpTypeArray %5 %15
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpConstant %6 2
%20 = OpTypeArray %5 %19
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%23 = OpTypePointer Output %5
%24 = OpVariable %23 Output
%29 = OpTypePointer Input %5
%31 = OpConstant %6 0
%40 = OpTypePointer Input %6
%41 = OpVariable %40 Input
%52 = OpConstant %6 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
%60 = OpLabel
%58 = OpFunctionCall %1 %25
%59 = OpFunctionCall %1 %27
OpReturn
OpFunctionEnd
%25 = OpFunction %1 None %2
%26 = OpLabel
OpBranch %62
%62 = OpLabel
%30 = OpAccessChain %29 %10 %31
%32 = OpLoad %5 %30
%33 = OpAccessChain %29 %10 %11
%34 = OpLoad %5 %33
%35 = OpFAdd %5 %34 %32
%36 = OpAccessChain %29 %10 %19
%37 = OpLoad %5 %36
%38 = OpFAdd %5 %35 %37
%42 = OpLoad %6 %41
%39 = OpAccessChain %23 %14 %42
OpStore %39 %38
OpReturn
OpFunctionEnd
%27 = OpFunction %1 None %2
%28 = OpLabel
OpBranch %64
%64 = OpLabel
%43 = OpAccessChain %23 %14 %31
%44 = OpLoad %5 %43
%45 = OpAccessChain %29 %10 %31
%46 = OpLoad %5 %45
%47 = OpAccessChain %29 %10 %11
%48 = OpLoad %5 %47
%49 = OpAccessChain %29 %10 %19
%50 = OpLoad %5 %49
%51 = OpAccessChain %29 %10 %52
%53 = OpLoad %5 %51
%54 = OpAccessChain %23 %18 %31
OpStore %54 %46
%55 = OpAccessChain %23 %18 %11
OpStore %55 %48
%56 = OpAccessChain %23 %18 %19
OpStore %56 %50
%57 = OpAccessChain %23 %22 %31
OpStore %57 %44
OpStore %24 %53
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/hull.tesc
================================================
#version 460
layout(vertices = 4) out;

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[1];
} _12;

layout(location = 0) in float VSValue[];
layout(location = 0) out float HSValue[4];
layout(location = 1) patch out float PATCH;

void hull_main()
{
    float _36[4];
    _36[0u] = VSValue[0u];
    _36[1u] = VSValue[1u];
    _36[2u] = VSValue[2u];
    _36[3u] = VSValue[3u];
    uint _53 = uint(int(VSValue[0u]));
    _36[_53] += 40.0;
    HSValue[gl_InvocationID] = (((VSValue[0u] + float(gl_InvocationID)) + VSValue[1u]) + VSValue[2u]) + _36[3u];
}

void patch_main()
{
    float _66[4];
    _66[0u] = VSValue[0u];
    _66[1u] = VSValue[1u];
    _66[2u] = VSValue[2u];
    _66[3u] = VSValue[3u];
    uint _79 = uint(int(VSValue[0u]));
    _66[_79] += 40.0;
    gl_TessLevelOuter[0u] = VSValue[0u];
    gl_TessLevelOuter[1u] = VSValue[1u];
    gl_TessLevelOuter[2u] = VSValue[2u];
    gl_TessLevelOuter[3u] = HSValue[0u] + VSValue[0u];
    gl_TessLevelInner[0u] = HSValue[0u];
    gl_TessLevelInner[1u] = HSValue[1u];
    PATCH = _12._m0[0u].x + _66[3u];
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 113
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %16 %20 %21 %25 %27 %33
OpExecutionMode %3 Quads
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCw
OpExecutionMode %3 OutputVertices 4
OpName %3 "main"
OpName %10 ""
OpName %16 "VSValue"
OpName %20 "HSValue"
OpName %21 "SV_TessFactor"
OpName %25 "SV_InsideTessFactor"
OpName %27 "PATCH"
OpName %28 "hull_main"
OpName %30 "patch_main"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %16 Location 0
OpDecorate %20 Location 0
OpDecorate %21 BuiltIn TessLevelOuter
OpDecorate %21 Patch
OpDecorate %25 BuiltIn TessLevelInner
OpDecorate %25 Patch
OpDecorate %27 Location 1
OpDecorate %27 Patch
OpDecorate %33 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpConstant %5 32
%14 = OpTypeArray %7 %13
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpConstant %5 4
%18 = OpTypeArray %7 %17
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpVariable %19 Output
%22 = OpConstant %5 2
%23 = OpTypeArray %7 %22
%24 = OpTypePointer Output %23
%25 = OpVariable %24 Output
%26 = OpTypePointer Output %7
%27 = OpVariable %26 Output
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypePointer Function %18
%37 = OpTypePointer Input %7
%39 = OpConstant %5 0
%41 = OpTypePointer Function %7
%50 = OpConstant %5 3
%57 = OpConstant %7 40
%89 = OpTypePointer Uniform %8
%102 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %105
%105 = OpLabel
%100 = OpFunctionCall %1 %28
%101 = OpLoad %5 %33
%103 = OpIEqual %102 %101 %39
OpControlBarrier %22 %17 %39
OpSelectionMerge %107 None
OpBranchConditional %103 %106 %107
%106 = OpLabel
%104 = OpFunctionCall %1 %30
OpBranch %107
%107 = OpLabel
OpReturn
OpFunctionEnd
%28 = OpFunction %1 None %2
%29 = OpLabel
%36 = OpVariable %35 Function
OpBranch %109
%109 = OpLabel
%34 = OpLoad %5 %33
%38 = OpAccessChain %37 %16 %39
%40 = OpLoad %7 %38
%42 = OpAccessChain %41 %36 %39
OpStore %42 %40
%43 = OpAccessChain %37 %16 %6
%44 = OpLoad %7 %43
%45 = OpAccessChain %41 %36 %6
OpStore %45 %44
%46 = OpAccessChain %37 %16 %22
%47 = OpLoad %7 %46
%48 = OpAccessChain %41 %36 %22
OpStore %48 %47
%49 = OpAccessChain %37 %16 %50
%51 = OpLoad %7 %49
%52 = OpAccessChain %41 %36 %50
OpStore %52 %51
%53 = OpConvertFToS %5 %40
%54 = OpAccessChain %41 %36 %53
%55 = OpLoad %7 %54
%56 = OpFAdd %7 %55 %57
OpStore %54 %56
%58 = OpLoad %7 %52
%59 = OpConvertUToF %7 %34
%60 = OpFAdd %7 %40 %59
%61 = OpFAdd %7 %60 %44
%62 = OpFAdd %7 %61 %47
%63 = OpFAdd %7 %62 %58
%65 = OpLoad %5 %33
%64 = OpAccessChain %26 %20 %65
OpStore %64 %63
OpReturn
OpFunctionEnd
%30 = OpFunction %1 None %2
%31 = OpLabel
%66 = OpVariable %35 Function
OpBranch %111
%111 = OpLabel
%67 = OpAccessChain %37 %16 %39
%68 = OpLoad %7 %67
%69 = OpAccessChain %41 %66 %39
OpStore %69 %68
%70 = OpAccessChain %37 %16 %6
%71 = OpLoad %7 %70
%72 = OpAccessChain %41 %66 %6
OpStore %72 %71
%73 = OpAccessChain %37 %16 %22
%74 = OpLoad %7 %73
%75 = OpAccessChain %41 %66 %22
OpStore %75 %74
%76 = OpAccessChain %37 %16 %50
%77 = OpLoad %7 %76
%78 = OpAccessChain %41 %66 %50
OpStore %78 %77
%79 = OpConvertFToS %5 %68
%80 = OpAccessChain %41 %66 %79
%81 = OpLoad %7 %80
%82 = OpFAdd %7 %81 %57
OpStore %80 %82
%83 = OpAccessChain %26 %20 %39
%84 = OpLoad %7 %83
%85 = OpAccessChain %26 %20 %6
%86 = OpLoad %7 %85
%87 = OpFAdd %7 %84 %68
%88 = OpLoad %7 %78
%90 = OpAccessChain %89 %12 %39 %39
%91 = OpLoad %8 %90
%92 = OpCompositeExtract %7 %91 0
%93 = OpFAdd %7 %92 %88
%94 = OpAccessChain %26 %21 %39
OpStore %94 %68
%95 = OpAccessChain %26 %21 %6
OpStore %95 %71
%96 = OpAccessChain %26 %21 %22
OpStore %96 %74
%97 = OpAccessChain %26 %21 %50
OpStore %97 %87
%98 = OpAccessChain %26 %25 %39
OpStore %98 %84
%99 = OpAccessChain %26 %25 %6
OpStore %99 %86
OpStore %27 %93
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/mesh-basic-line.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, lines) out;

struct _36
{
    float _m0;
};

layout(location = 1) out vec4 B[24];
layout(location = 3) perprimitiveEXT out vec4 C[8];
shared float _32[64];
taskPayloadSharedEXT _36 _38;

void main()
{
    _32[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
    barrier();
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _32[gl_LocalInvocationIndex];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _32[gl_LocalInvocationIndex];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _32[gl_LocalInvocationIndex];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _32[gl_LocalInvocationIndex];
    float _68 = _38._m0 + _32[gl_LocalInvocationIndex ^ 1u];
    B[gl_LocalInvocationIndex].x = _68;
    B[gl_LocalInvocationIndex].y = _68;
    B[gl_LocalInvocationIndex].z = _68;
    B[gl_LocalInvocationIndex].w = _68;
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _74 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(_74, _74 + 1u);
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u;
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex);
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex);
        uint _86 = gl_LocalInvocationIndex ^ 2u;
        C[gl_LocalInvocationIndex].x = _32[_86];
        C[gl_LocalInvocationIndex].y = _32[_86];
        C[gl_LocalInvocationIndex].z = _32[_86];
        C[gl_LocalInvocationIndex].w = _32[_86];
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 100
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %12 %17 %20 %21 %24 %28 %32 %34 %38 %44
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputLinesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %12 "B"
OpName %17 "SV_CullPrimitive"
OpName %20 "SV_RenderTargetArrayIndex"
OpName %21 "SV_PrimitiveID"
OpName %24 "C"
OpName %28 "indices"
OpName %36 ""
OpDecorate %11 BuiltIn Position
OpDecorate %12 Location 1
OpDecorate %17 BuiltIn CullPrimitiveEXT
OpDecorate %17 PerPrimitiveEXT
OpDecorate %20 BuiltIn Layer
OpDecorate %20 PerPrimitiveEXT
OpDecorate %21 BuiltIn PrimitiveId
OpDecorate %21 PerPrimitiveEXT
OpDecorate %24 Location 3
OpDecorate %24 PerPrimitiveEXT
OpDecorate %28 BuiltIn PrimitiveLineIndicesEXT
OpDecorate %34 BuiltIn LocalInvocationIndex
OpDecorate %44 BuiltIn NumSubgroups
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpVariable %10 Output
%13 = OpTypeBool
%14 = OpConstant %7 8
%15 = OpTypeArray %13 %14
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%18 = OpTypeArray %7 %14
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpVariable %19 Output
%22 = OpTypeArray %6 %14
%23 = OpTypePointer Output %22
%24 = OpVariable %23 Output
%25 = OpTypeVector %7 2
%26 = OpTypeArray %25 %14
%27 = OpTypePointer Output %26
%28 = OpVariable %27 Output
%29 = OpConstant %7 64
%30 = OpTypeArray %5 %29
%31 = OpTypePointer Workgroup %30
%32 = OpVariable %31 Workgroup
%33 = OpTypePointer Input %7
%34 = OpVariable %33 Input
%36 = OpTypeStruct %5
%37 = OpTypePointer TaskPayloadWorkgroupEXT %36
%38 = OpVariable %37 TaskPayloadWorkgroupEXT
%40 = OpTypePointer Workgroup %5
%42 = OpConstant %7 2
%43 = OpConstant %7 264
%44 = OpVariable %33 Input
%47 = OpConstant %7 1
%49 = OpConstant %7 3
%52 = OpConstant %7 0
%57 = OpTypePointer Output %5
%65 = OpTypePointer TaskPayloadWorkgroupEXT %5
%77 = OpTypePointer Output %25
%81 = OpTypePointer Output %13
%83 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %93
%93 = OpLabel
%35 = OpLoad %7 %34
%39 = OpConvertUToF %5 %35
%41 = OpAccessChain %40 %32 %35
OpStore %41 %39
OpControlBarrier %42 %42 %43
OpSetMeshOutputsEXT %8 %14
%45 = OpLoad %7 %44
%46 = OpIEqual %13 %45 %47
%48 = OpGroupNonUniformBroadcastFirst %7 %49 %8
%50 = OpGroupNonUniformBroadcastFirst %7 %49 %14
%51 = OpIEqual %13 %48 %52
%53 = OpIEqual %13 %50 %52
%54 = OpLogicalOr %13 %51 %53
%55 = OpLogicalAnd %13 %46 %54
OpSelectionMerge %97 None
OpBranchConditional %55 %96 %97
%96 = OpLabel
OpReturn
%97 = OpLabel
%56 = OpLoad %5 %41
%58 = OpAccessChain %57 %11 %35 %52
OpStore %58 %56
%59 = OpAccessChain %57 %11 %35 %47
OpStore %59 %56
%60 = OpAccessChain %57 %11 %35 %42
OpStore %60 %56
%61 = OpAccessChain %57 %11 %35 %49
OpStore %61 %56
%62 = OpBitwiseXor %7 %35 %47
%63 = OpAccessChain %40 %32 %62
%64 = OpLoad %5 %63
%66 = OpInBoundsAccessChain %65 %38 %52
%67 = OpLoad %5 %66
%68 = OpFAdd %5 %67 %64
%69 = OpAccessChain %57 %12 %35 %52
OpStore %69 %68
%70 = OpAccessChain %57 %12 %35 %47
OpStore %70 %68
%71 = OpAccessChain %57 %12 %35 %42
OpStore %71 %68
%72 = OpAccessChain %57 %12 %35 %49
OpStore %72 %68
%73 = OpULessThan %13 %35 %14
OpSelectionMerge %95 None
OpBranchConditional %73 %94 %95
%94 = OpLabel
%74 = OpIMul %7 %35 %49
%75 = OpIAdd %7 %74 %47
%76 = OpCompositeConstruct %25 %74 %75
%78 = OpAccessChain %77 %28 %35
OpStore %78 %76
%79 = OpBitwiseAnd %7 %35 %47
%80 = OpINotEqual %13 %79 %52
%82 = OpAccessChain %81 %17 %35
OpStore %82 %80
%84 = OpAccessChain %83 %21 %35
OpStore %84 %35
%85 = OpAccessChain %83 %20 %35
OpStore %85 %35
%86 = OpBitwiseXor %7 %35 %42
%87 = OpAccessChain %40 %32 %86
%88 = OpLoad %5 %87
%89 = OpAccessChain %57 %24 %35 %52
OpStore %89 %88
%90 = OpAccessChain %57 %24 %35 %47
OpStore %90 %88
%91 = OpAccessChain %57 %24 %35 %42
OpStore %91 %88
%92 = OpAccessChain %57 %24 %35 %49
OpStore %92 %88
OpBranch %95
%95 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/mesh-basic.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

struct _36
{
    float _m0;
};

layout(location = 1) out vec4 B[24];
layout(location = 3) perprimitiveEXT out vec4 C[8];
shared float _32[64];
taskPayloadSharedEXT _36 _38;

void main()
{
    _32[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
    barrier();
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _32[gl_LocalInvocationIndex];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _32[gl_LocalInvocationIndex];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _32[gl_LocalInvocationIndex];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _32[gl_LocalInvocationIndex];
    float _68 = _38._m0 + _32[gl_LocalInvocationIndex ^ 1u];
    B[gl_LocalInvocationIndex].x = _68;
    B[gl_LocalInvocationIndex].y = _68;
    B[gl_LocalInvocationIndex].z = _68;
    B[gl_LocalInvocationIndex].w = _68;
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _74 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_74, _74 + 1u, _74 + 2u);
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u;
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex);
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex);
        uint _87 = gl_LocalInvocationIndex ^ 2u;
        C[gl_LocalInvocationIndex].x = _32[_87];
        C[gl_LocalInvocationIndex].y = _32[_87];
        C[gl_LocalInvocationIndex].z = _32[_87];
        C[gl_LocalInvocationIndex].w = _32[_87];
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 101
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %12 %17 %20 %21 %24 %28 %32 %34 %38 %44
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %12 "B"
OpName %17 "SV_CullPrimitive"
OpName %20 "SV_RenderTargetArrayIndex"
OpName %21 "SV_PrimitiveID"
OpName %24 "C"
OpName %28 "indices"
OpName %36 ""
OpDecorate %11 BuiltIn Position
OpDecorate %12 Location 1
OpDecorate %17 BuiltIn CullPrimitiveEXT
OpDecorate %17 PerPrimitiveEXT
OpDecorate %20 BuiltIn Layer
OpDecorate %20 PerPrimitiveEXT
OpDecorate %21 BuiltIn PrimitiveId
OpDecorate %21 PerPrimitiveEXT
OpDecorate %24 Location 3
OpDecorate %24 PerPrimitiveEXT
OpDecorate %28 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %34 BuiltIn LocalInvocationIndex
OpDecorate %44 BuiltIn NumSubgroups
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpVariable %10 Output
%13 = OpTypeBool
%14 = OpConstant %7 8
%15 = OpTypeArray %13 %14
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%18 = OpTypeArray %7 %14
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpVariable %19 Output
%22 = OpTypeArray %6 %14
%23 = OpTypePointer Output %22
%24 = OpVariable %23 Output
%25 = OpTypeVector %7 3
%26 = OpTypeArray %25 %14
%27 = OpTypePointer Output %26
%28 = OpVariable %27 Output
%29 = OpConstant %7 64
%30 = OpTypeArray %5 %29
%31 = OpTypePointer Workgroup %30
%32 = OpVariable %31 Workgroup
%33 = OpTypePointer Input %7
%34 = OpVariable %33 Input
%36 = OpTypeStruct %5
%37 = OpTypePointer TaskPayloadWorkgroupEXT %36
%38 = OpVariable %37 TaskPayloadWorkgroupEXT
%40 = OpTypePointer Workgroup %5
%42 = OpConstant %7 2
%43 = OpConstant %7 264
%44 = OpVariable %33 Input
%47 = OpConstant %7 1
%49 = OpConstant %7 3
%52 = OpConstant %7 0
%57 = OpTypePointer Output %5
%65 = OpTypePointer TaskPayloadWorkgroupEXT %5
%78 = OpTypePointer Output %25
%82 = OpTypePointer Output %13
%84 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %94
%94 = OpLabel
%35 = OpLoad %7 %34
%39 = OpConvertUToF %5 %35
%41 = OpAccessChain %40 %32 %35
OpStore %41 %39
OpControlBarrier %42 %42 %43
OpSetMeshOutputsEXT %8 %14
%45 = OpLoad %7 %44
%46 = OpIEqual %13 %45 %47
%48 = OpGroupNonUniformBroadcastFirst %7 %49 %8
%50 = OpGroupNonUniformBroadcastFirst %7 %49 %14
%51 = OpIEqual %13 %48 %52
%53 = OpIEqual %13 %50 %52
%54 = OpLogicalOr %13 %51 %53
%55 = OpLogicalAnd %13 %46 %54
OpSelectionMerge %98 None
OpBranchConditional %55 %97 %98
%97 = OpLabel
OpReturn
%98 = OpLabel
%56 = OpLoad %5 %41
%58 = OpAccessChain %57 %11 %35 %52
OpStore %58 %56
%59 = OpAccessChain %57 %11 %35 %47
OpStore %59 %56
%60 = OpAccessChain %57 %11 %35 %42
OpStore %60 %56
%61 = OpAccessChain %57 %11 %35 %49
OpStore %61 %56
%62 = OpBitwiseXor %7 %35 %47
%63 = OpAccessChain %40 %32 %62
%64 = OpLoad %5 %63
%66 = OpInBoundsAccessChain %65 %38 %52
%67 = OpLoad %5 %66
%68 = OpFAdd %5 %67 %64
%69 = OpAccessChain %57 %12 %35 %52
OpStore %69 %68
%70 = OpAccessChain %57 %12 %35 %47
OpStore %70 %68
%71 = OpAccessChain %57 %12 %35 %42
OpStore %71 %68
%72 = OpAccessChain %57 %12 %35 %49
OpStore %72 %68
%73 = OpULessThan %13 %35 %14
OpSelectionMerge %96 None
OpBranchConditional %73 %95 %96
%95 = OpLabel
%74 = OpIMul %7 %35 %49
%75 = OpIAdd %7 %74 %47
%76 = OpIAdd %7 %74 %42
%77 = OpCompositeConstruct %25 %74 %75 %76
%79 = OpAccessChain %78 %28 %35
OpStore %79 %77
%80 = OpBitwiseAnd %7 %35 %47
%81 = OpINotEqual %13 %80 %52
%83 = OpAccessChain %82 %17 %35
OpStore %83 %81
%85 = OpAccessChain %84 %21 %35
OpStore %85 %35
%86 = OpAccessChain %84 %20 %35
OpStore %86 %35
%87 = OpBitwiseXor %7 %35 %42
%88 = OpAccessChain %40 %32 %87
%89 = OpLoad %5 %88
%90 = OpAccessChain %57 %24 %35 %52
OpStore %90 %89
%91 = OpAccessChain %57 %24 %35 %47
OpStore %91 %89
%92 = OpAccessChain %57 %24 %35 %42
OpStore %92 %89
%93 = OpAccessChain %57 %24 %35 %49
OpStore %93 %89
OpBranch %96
%96 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/mesh-clip-cull.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

out gl_MeshPerVertexEXT
{
    vec4 gl_Position;
    float gl_ClipDistance[4];
    float gl_CullDistance[4];
} gl_MeshVerticesEXT[];

struct _26
{
    float _m0[24][4];
    float _m1[24][4];
    float _m2[24][4];
};

taskPayloadSharedEXT _26 _28;

void main()
{
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _28._m0[gl_LocalInvocationIndex][0u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _28._m0[gl_LocalInvocationIndex][1u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _28._m0[gl_LocalInvocationIndex][2u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _28._m0[gl_LocalInvocationIndex][3u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0u] = _28._m1[gl_LocalInvocationIndex][0u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[1u] = _28._m1[gl_LocalInvocationIndex][1u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[2u] = _28._m1[gl_LocalInvocationIndex][2u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[3u] = _28._m1[gl_LocalInvocationIndex][3u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0u] = _28._m2[gl_LocalInvocationIndex][0u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1u] = _28._m2[gl_LocalInvocationIndex][1u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[2u] = _28._m2[gl_LocalInvocationIndex][2u];
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[3u] = _28._m2[gl_LocalInvocationIndex][3u];
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _82 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_82, _82 + 1u, _82 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability ClipDistance
OpCapability CullDistance
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %16 %17 %22 %24 %28 %29
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %22 "indices"
OpName %26 ""
OpDecorate %11 BuiltIn Position
OpDecorate %16 BuiltIn ClipDistance
OpDecorate %17 BuiltIn CullDistance
OpDecorate %22 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %24 BuiltIn LocalInvocationIndex
OpDecorate %29 BuiltIn NumSubgroups
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpConstant %7 4
%13 = OpTypeArray %5 %12
%14 = OpTypeArray %13 %8
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %15 Output
%18 = OpConstant %7 8
%19 = OpTypeVector %7 3
%20 = OpTypeArray %19 %18
%21 = OpTypePointer Output %20
%22 = OpVariable %21 Output
%23 = OpTypePointer Input %7
%24 = OpVariable %23 Input
%26 = OpTypeStruct %14 %14 %14
%27 = OpTypePointer TaskPayloadWorkgroupEXT %26
%28 = OpVariable %27 TaskPayloadWorkgroupEXT
%29 = OpVariable %23 Input
%31 = OpTypeBool
%33 = OpConstant %7 1
%35 = OpConstant %7 3
%38 = OpConstant %7 0
%42 = OpTypePointer TaskPayloadWorkgroupEXT %5
%48 = OpConstant %7 2
%52 = OpTypePointer Output %5
%86 = OpTypePointer Output %19
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %88
%88 = OpLabel
%25 = OpLoad %7 %24
OpSetMeshOutputsEXT %8 %18
%30 = OpLoad %7 %29
%32 = OpIEqual %31 %30 %33
%34 = OpGroupNonUniformBroadcastFirst %7 %35 %8
%36 = OpGroupNonUniformBroadcastFirst %7 %35 %18
%37 = OpIEqual %31 %34 %38
%39 = OpIEqual %31 %36 %38
%40 = OpLogicalOr %31 %37 %39
%41 = OpLogicalAnd %31 %32 %40
OpSelectionMerge %92 None
OpBranchConditional %41 %91 %92
%91 = OpLabel
OpReturn
%92 = OpLabel
%43 = OpInBoundsAccessChain %42 %28 %38 %25 %38
%44 = OpLoad %5 %43
%45 = OpInBoundsAccessChain %42 %28 %38 %25 %33
%46 = OpLoad %5 %45
%47 = OpInBoundsAccessChain %42 %28 %38 %25 %48
%49 = OpLoad %5 %47
%50 = OpInBoundsAccessChain %42 %28 %38 %25 %35
%51 = OpLoad %5 %50
%53 = OpAccessChain %52 %11 %25 %38
OpStore %53 %44
%54 = OpAccessChain %52 %11 %25 %33
OpStore %54 %46
%55 = OpAccessChain %52 %11 %25 %48
OpStore %55 %49
%56 = OpAccessChain %52 %11 %25 %35
OpStore %56 %51
%57 = OpInBoundsAccessChain %42 %28 %33 %25 %38
%58 = OpLoad %5 %57
%59 = OpInBoundsAccessChain %42 %28 %33 %25 %33
%60 = OpLoad %5 %59
%61 = OpInBoundsAccessChain %42 %28 %33 %25 %48
%62 = OpLoad %5 %61
%63 = OpInBoundsAccessChain %42 %28 %33 %25 %35
%64 = OpLoad %5 %63
%65 = OpAccessChain %52 %16 %25 %38
OpStore %65 %58
%66 = OpAccessChain %52 %16 %25 %33
OpStore %66 %60
%67 = OpAccessChain %52 %16 %25 %48
OpStore %67 %62
%68 = OpAccessChain %52 %16 %25 %35
OpStore %68 %64
%69 = OpInBoundsAccessChain %42 %28 %48 %25 %38
%70 = OpLoad %5 %69
%71 = OpInBoundsAccessChain %42 %28 %48 %25 %33
%72 = OpLoad %5 %71
%73 = OpInBoundsAccessChain %42 %28 %48 %25 %48
%74 = OpLoad %5 %73
%75 = OpInBoundsAccessChain %42 %28 %48 %25 %35
%76 = OpLoad %5 %75
%77 = OpAccessChain %52 %17 %25 %38
OpStore %77 %70
%78 = OpAccessChain %52 %17 %25 %33
OpStore %78 %72
%79 = OpAccessChain %52 %17 %25 %48
OpStore %79 %74
%80 = OpAccessChain %52 %17 %25 %35
OpStore %80 %76
%81 = OpULessThan %31 %25 %18
OpSelectionMerge %90 None
OpBranchConditional %81 %89 %90
%89 = OpLabel
%82 = OpIMul %7 %25 %35
%83 = OpIAdd %7 %82 %33
%84 = OpIAdd %7 %82 %48
%85 = OpCompositeConstruct %19 %82 %83 %84
%87 = OpAccessChain %86 %22 %25
OpStore %87 %85
OpBranch %90
%90 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/raygen-complex-storage-class.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _14
{
    vec4 _m0;
};

struct _18
{
    _14 _m0;
    _14 _m1;
};

layout(set = 40, binding = 30) uniform accelerationStructureEXT AS;
layout(set = 20, binding = 10) uniform writeonly image2D IMG;
layout(location = 0) callableDataEXT _14 _16;
layout(location = 1) callableDataEXT _14 _17;
layout(location = 2) callableDataEXT _18 _20;
layout(location = 3) rayPayloadEXT _14 _22;
layout(location = 4) rayPayloadEXT _14 _23;
layout(location = 5) rayPayloadEXT _18 _25;

void main()
{
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 5);
    _23._m0 = _25._m0._m0;
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 4);
    _22._m0 = _25._m1._m0;
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 3);
    _20._m0._m0 = _23._m0;
    _20._m1._m0 = _22._m0;
    executeCallableEXT(0u, 2);
    _17._m0 = _20._m0._m0;
    executeCallableEXT(0u, 1);
    _16._m0 = _20._m1._m0;
    executeCallableEXT(0u, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_16._m0.x + _17._m0.x, _16._m0.y + _17._m0.y, _16._m0.z + _17._m0.z, _16._m0.w + _17._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 80
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %16 %17 %20 %22 %23 %25
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %14 ""
OpName %18 ""
OpDecorate %8 DescriptorSet 40
OpDecorate %8 Binding 30
OpDecorate %12 DescriptorSet 20
OpDecorate %12 Binding 10
OpDecorate %12 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeVector %9 4
%14 = OpTypeStruct %13
%15 = OpTypePointer CallableDataKHR %14
%16 = OpVariable %15 CallableDataKHR
%17 = OpVariable %15 CallableDataKHR
%18 = OpTypeStruct %14 %14
%19 = OpTypePointer CallableDataKHR %18
%20 = OpVariable %19 CallableDataKHR
%21 = OpTypePointer RayPayloadKHR %14
%22 = OpVariable %21 RayPayloadKHR
%23 = OpVariable %21 RayPayloadKHR
%24 = OpTypePointer RayPayloadKHR %18
%25 = OpVariable %24 RayPayloadKHR
%27 = OpTypeInt 32 0
%28 = OpConstant %27 0
%29 = OpConstant %9 1
%30 = OpConstant %9 0
%31 = OpConstant %9 2
%32 = OpConstant %9 3
%33 = OpConstant %9 4
%34 = OpTypeVector %9 3
%37 = OpTypePointer RayPayloadKHR %13
%41 = OpConstant %27 1
%53 = OpTypePointer CallableDataKHR %13
%75 = OpTypeVector %27 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %78
%78 = OpLabel
%26 = OpLoad %6 %8
%35 = OpCompositeConstruct %34 %29 %31 %32
%36 = OpCompositeConstruct %34 %30 %30 %29
OpTraceRayKHR %26 %28 %28 %28 %28 %28 %35 %29 %36 %33 %25
%38 = OpInBoundsAccessChain %37 %25 %28 %28
%39 = OpLoad %13 %38
%40 = OpInBoundsAccessChain %37 %25 %41 %28
%42 = OpLoad %13 %40
%43 = OpLoad %6 %8
%44 = OpInBoundsAccessChain %37 %23 %28
OpStore %44 %39
%45 = OpCompositeConstruct %34 %29 %31 %32
%46 = OpCompositeConstruct %34 %30 %30 %29
OpTraceRayKHR %43 %28 %28 %28 %28 %28 %45 %29 %46 %33 %23
%47 = OpLoad %13 %44
%48 = OpLoad %6 %8
%49 = OpInBoundsAccessChain %37 %22 %28
OpStore %49 %42
%50 = OpCompositeConstruct %34 %29 %31 %32
%51 = OpCompositeConstruct %34 %30 %30 %29
OpTraceRayKHR %48 %28 %28 %28 %28 %28 %50 %29 %51 %33 %22
%52 = OpLoad %13 %49
%54 = OpInBoundsAccessChain %53 %20 %28 %28
OpStore %54 %47
%55 = OpInBoundsAccessChain %53 %20 %41 %28
OpStore %55 %52
OpExecuteCallableKHR %28 %20
%56 = OpLoad %13 %54
%57 = OpLoad %13 %55
%58 = OpInBoundsAccessChain %53 %17 %28
OpStore %58 %56
OpExecuteCallableKHR %28 %17
%59 = OpLoad %13 %58
%60 = OpCompositeExtract %9 %59 0
%61 = OpCompositeExtract %9 %59 1
%62 = OpCompositeExtract %9 %59 2
%63 = OpCompositeExtract %9 %59 3
%64 = OpInBoundsAccessChain %53 %16 %28
OpStore %64 %57
OpExecuteCallableKHR %28 %16
%65 = OpLoad %13 %64
%66 = OpCompositeExtract %9 %65 0
%67 = OpFAdd %9 %66 %60
%68 = OpCompositeExtract %9 %65 1
%69 = OpFAdd %9 %68 %61
%70 = OpCompositeExtract %9 %65 2
%71 = OpFAdd %9 %70 %62
%72 = OpCompositeExtract %9 %65 3
%73 = OpFAdd %9 %72 %63
%74 = OpLoad %10 %12
%76 = OpCompositeConstruct %75 %28 %28
%77 = OpCompositeConstruct %13 %67 %69 %71 %73
OpImageWrite %74 %76 %77
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/raygen-skip-inactive-resources.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _13
{
    float _m0;
};

struct _17
{
    vec4 _m0;
};

layout(set = 40, binding = 30) uniform accelerationStructureEXT AS;
layout(set = 20, binding = 10) uniform writeonly image2D IMG;
layout(location = 0) rayPayloadEXT _13 _15;
layout(location = 1) rayPayloadEXT _17 _19;

void main()
{
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(AS, 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_15._m0 + _19._m0.x, _15._m0 + _19._m0.y, _15._m0 + _19._m0.z, _15._m0 + _19._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %15 %19
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %13 ""
OpName %17 ""
OpDecorate %8 DescriptorSet 40
OpDecorate %8 Binding 30
OpDecorate %12 DescriptorSet 20
OpDecorate %12 Binding 10
OpDecorate %12 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeStruct %9
%14 = OpTypePointer RayPayloadKHR %13
%15 = OpVariable %14 RayPayloadKHR
%16 = OpTypeVector %9 4
%17 = OpTypeStruct %16
%18 = OpTypePointer RayPayloadKHR %17
%19 = OpVariable %18 RayPayloadKHR
%21 = OpTypeInt 32 0
%22 = OpConstant %21 0
%23 = OpConstant %9 1
%24 = OpConstant %9 0
%25 = OpConstant %9 2
%26 = OpConstant %9 3
%27 = OpConstant %9 4
%28 = OpTypeVector %9 3
%31 = OpTypePointer RayPayloadKHR %16
%39 = OpConstant %21 1
%42 = OpTypePointer RayPayloadKHR %9
%50 = OpTypeVector %21 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %53
%53 = OpLabel
%20 = OpLoad %6 %8
%29 = OpCompositeConstruct %28 %23 %25 %26
%30 = OpCompositeConstruct %28 %24 %24 %23
OpTraceRayKHR %20 %22 %22 %22 %22 %22 %29 %23 %30 %27 %19
%32 = OpInBoundsAccessChain %31 %19 %22
%33 = OpLoad %16 %32
%34 = OpCompositeExtract %9 %33 0
%35 = OpCompositeExtract %9 %33 1
%36 = OpCompositeExtract %9 %33 2
%37 = OpCompositeExtract %9 %33 3
%38 = OpLoad %6 %8
%40 = OpCompositeConstruct %28 %23 %25 %26
%41 = OpCompositeConstruct %28 %24 %24 %23
OpTraceRayKHR %38 %22 %39 %22 %22 %22 %40 %23 %41 %27 %15
%43 = OpInBoundsAccessChain %42 %15 %22
%44 = OpLoad %9 %43
%45 = OpFAdd %9 %44 %34
%46 = OpFAdd %9 %44 %35
%47 = OpFAdd %9 %44 %36
%48 = OpFAdd %9 %44 %37
%49 = OpLoad %10 %12
%51 = OpCompositeConstruct %50 %22 %22
%52 = OpCompositeConstruct %16 %45 %46 %47 %48
OpImageWrite %49 %51 %52
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/raygen.rgen
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _13
{
    float _m0;
};

struct _17
{
    vec4 _m0;
};

layout(set = 40, binding = 30) uniform accelerationStructureEXT AS;
layout(set = 20, binding = 10) uniform writeonly image2D IMG;
layout(location = 0) rayPayloadEXT _13 _15;
layout(location = 1) rayPayloadEXT _17 _19;

void main()
{
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    traceRayEXT(AS, 0u, 1u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 0);
    imageStore(IMG, ivec2(uvec2(0u)), vec4(_15._m0 + _19._m0.x, _15._m0 + _19._m0.y, _15._m0 + _19._m0.z, _15._m0 + _19._m0.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability StorageImageWriteWithoutFormat
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint RayGenerationKHR %3 "main" %8 %12 %15 %19
OpName %3 "main"
OpName %8 "AS"
OpName %12 "IMG"
OpName %13 ""
OpName %17 ""
OpDecorate %8 DescriptorSet 40
OpDecorate %8 Binding 30
OpDecorate %12 DescriptorSet 20
OpDecorate %12 Binding 10
OpDecorate %12 NonReadable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 2 Unknown
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeStruct %9
%14 = OpTypePointer RayPayloadKHR %13
%15 = OpVariable %14 RayPayloadKHR
%16 = OpTypeVector %9 4
%17 = OpTypeStruct %16
%18 = OpTypePointer RayPayloadKHR %17
%19 = OpVariable %18 RayPayloadKHR
%21 = OpTypeInt 32 0
%22 = OpConstant %21 0
%23 = OpConstant %9 1
%24 = OpConstant %9 0
%25 = OpConstant %9 2
%26 = OpConstant %9 3
%27 = OpConstant %9 4
%28 = OpTypeVector %9 3
%31 = OpTypePointer RayPayloadKHR %16
%39 = OpConstant %21 1
%42 = OpTypePointer RayPayloadKHR %9
%50 = OpTypeVector %21 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %53
%53 = OpLabel
%20 = OpLoad %6 %8
%29 = OpCompositeConstruct %28 %23 %25 %26
%30 = OpCompositeConstruct %28 %24 %24 %23
OpTraceRayKHR %20 %22 %22 %22 %22 %22 %29 %23 %30 %27 %19
%32 = OpInBoundsAccessChain %31 %19 %22
%33 = OpLoad %16 %32
%34 = OpCompositeExtract %9 %33 0
%35 = OpCompositeExtract %9 %33 1
%36 = OpCompositeExtract %9 %33 2
%37 = OpCompositeExtract %9 %33 3
%38 = OpLoad %6 %8
%40 = OpCompositeConstruct %28 %23 %25 %26
%41 = OpCompositeConstruct %28 %24 %24 %23
OpTraceRayKHR %38 %22 %39 %22 %22 %22 %40 %23 %41 %27 %15
%43 = OpInBoundsAccessChain %42 %15 %22
%44 = OpLoad %9 %43
%45 = OpFAdd %9 %44 %34
%46 = OpFAdd %9 %44 %35
%47 = OpFAdd %9 %44 %36
%48 = OpFAdd %9 %44 %37
%49 = OpLoad %10 %12
%51 = OpCompositeConstruct %50 %22 %22
%52 = OpCompositeConstruct %16 %45 %46 %47 %48
OpImageWrite %49 %51 %52
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/raymiss-chain.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _11
{
    vec4 _m0;
};

layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
layout(location = 0) rayPayloadInEXT _11 payload;
layout(location = 1) rayPayloadEXT _11 _15;

void main()
{
    _15._m0 = payload._m0;
    traceRayEXT(AS, 0u, 0u, 0u, 0u, 0u, vec3(1.0, 2.0, 3.0), 1.0, vec3(0.0, 0.0, 1.0), 4.0, 1);
    payload._m0 = _15._m0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint MissKHR %3 "main" %8 %13 %15
OpName %3 "main"
OpName %8 "AS"
OpName %11 ""
OpName %13 "payload"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 1
%6 = OpTypeAccelerationStructureKHR
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypeStruct %10
%12 = OpTypePointer IncomingRayPayloadKHR %11
%13 = OpVariable %12 IncomingRayPayloadKHR
%14 = OpTypePointer RayPayloadKHR %11
%15 = OpVariable %14 RayPayloadKHR
%16 = OpTypePointer IncomingRayPayloadKHR %10
%18 = OpTypeInt 32 0
%19 = OpConstant %18 0
%22 = OpTypePointer RayPayloadKHR %10
%24 = OpConstant %9 1
%25 = OpConstant %9 0
%26 = OpConstant %9 2
%27 = OpConstant %9 3
%28 = OpConstant %9 4
%29 = OpTypeVector %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%17 = OpInBoundsAccessChain %16 %13 %19
%20 = OpLoad %10 %17
%21 = OpLoad %6 %8
%23 = OpInBoundsAccessChain %22 %15 %19
OpStore %23 %20
%30 = OpCompositeConstruct %29 %24 %26 %27
%31 = OpCompositeConstruct %29 %25 %25 %24
OpTraceRayKHR %21 %19 %19 %19 %19 %19 %30 %24 %31 %28 %15
%32 = OpLoad %10 %23
OpStore %17 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/raymiss.rmiss
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require

struct _7
{
    vec4 _m0;
};

layout(location = 0) rayPayloadInEXT _7 payload;

void main()
{
    payload._m0 = vec4(1.0, 2.0, 3.0, 4.0);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical GLSL450
OpEntryPoint MissKHR %3 "main" %9
OpName %3 "main"
OpName %7 ""
OpName %9 "payload"
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeStruct %6
%8 = OpTypePointer IncomingRayPayloadKHR %7
%9 = OpVariable %8 IncomingRayPayloadKHR
%10 = OpTypePointer IncomingRayPayloadKHR %6
%12 = OpTypeInt 32 0
%13 = OpConstant %12 0
%14 = OpConstant %5 1
%15 = OpConstant %5 2
%16 = OpConstant %5 3
%17 = OpConstant %5 4
%18 = OpConstantComposite %6 %14 %15 %16 %17
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %19
%19 = OpLabel
%11 = OpInBoundsAccessChain %10 %9 %13
OpStore %11 %18
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/simple.dual-source-blending.frag
================================================
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[2];
} _12;

layout(location = 0, index = 0) out vec4 SV_Target;
layout(location = 0, index = 1) out vec4 SV_Target_1;

void main()
{
    SV_Target.x = _12._m0[0u].x;
    SV_Target.y = _12._m0[0u].y;
    SV_Target.z = _12._m0[0u].z;
    SV_Target.w = _12._m0[0u].w;
    SV_Target_1.x = _12._m0[1u].x;
    SV_Target_1.y = _12._m0[1u].y;
    SV_Target_1.z = _12._m0[1u].z;
    SV_Target_1.w = _12._m0[1u].w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %14 %15
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %10 ""
OpName %14 "SV_Target"
OpName %15 "SV_Target_1"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %14 Location 0
OpDecorate %14 Index 0
OpDecorate %15 Location 0
OpDecorate %15 Index 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 2
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Output %8
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpConstant %5 0
%17 = OpTypePointer Uniform %8
%24 = OpTypePointer Output %7
%27 = OpConstant %5 1
%30 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %41
%41 = OpLabel
%18 = OpAccessChain %17 %12 %16 %16
%19 = OpLoad %8 %18
%20 = OpCompositeExtract %7 %19 0
%21 = OpCompositeExtract %7 %19 1
%22 = OpCompositeExtract %7 %19 2
%23 = OpCompositeExtract %7 %19 3
%25 = OpAccessChain %24 %14 %16
OpStore %25 %20
%26 = OpAccessChain %24 %14 %27
OpStore %26 %21
%28 = OpAccessChain %24 %14 %6
OpStore %28 %22
%29 = OpAccessChain %24 %14 %30
OpStore %29 %23
%31 = OpAccessChain %17 %12 %16 %27
%32 = OpLoad %8 %31
%33 = OpCompositeExtract %7 %32 0
%34 = OpCompositeExtract %7 %32 1
%35 = OpCompositeExtract %7 %32 2
%36 = OpCompositeExtract %7 %32 3
%37 = OpAccessChain %24 %15 %16
OpStore %37 %33
%38 = OpAccessChain %24 %15 %27
OpStore %38 %34
%39 = OpAccessChain %24 %15 %6
OpStore %39 %35
%40 = OpAccessChain %24 %15 %30
OpStore %40 %36
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/simple.invariant.vert
================================================
#version 460

invariant gl_Position;

layout(location = 0) in vec4 A;
layout(location = 1) in vec4 B;
layout(location = 2) in vec4 C;

void main()
{
    float _44 = fma(A.x, B.x, C.x);
    float _45 = fma(A.y, B.y, C.y);
    float _46 = fma(A.z, B.z, C.z);
    float _47 = fma(A.w, B.w, C.w);
    gl_Position.x = _44;
    gl_Position.y = _45;
    gl_Position.z = _46;
    gl_Position.w = _47;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
OpCapability Shader
%43 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %9 %10 %12
OpName %3 "main"
OpName %8 "A"
OpName %9 "B"
OpName %10 "C"
OpName %12 "SV_Position"
OpDecorate %8 Location 0
OpDecorate %9 Location 1
OpDecorate %10 Location 2
OpDecorate %12 BuiltIn Position
OpDecorate %12 Invariant
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpVariable %7 Input
%10 = OpVariable %7 Input
%11 = OpTypePointer Output %6
%12 = OpVariable %11 Output
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%22 = OpConstant %15 2
%25 = OpConstant %15 3
%48 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %53
%53 = OpLabel
%14 = OpAccessChain %13 %10 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %10 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %13 %10 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %13 %10 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %13 %9 %16
%28 = OpLoad %5 %27
%29 = OpAccessChain %13 %9 %19
%30 = OpLoad %5 %29
%31 = OpAccessChain %13 %9 %22
%32 = OpLoad %5 %31
%33 = OpAccessChain %13 %9 %25
%34 = OpLoad %5 %33
%35 = OpAccessChain %13 %8 %16
%36 = OpLoad %5 %35
%37 = OpAccessChain %13 %8 %19
%38 = OpLoad %5 %37
%39 = OpAccessChain %13 %8 %22
%40 = OpLoad %5 %39
%41 = OpAccessChain %13 %8 %25
%42 = OpLoad %5 %41
%44 = OpExtInst %5 %43 Fma %36 %28 %17
%45 = OpExtInst %5 %43 Fma %38 %30 %20
%46 = OpExtInst %5 %43 Fma %40 %32 %23
%47 = OpExtInst %5 %43 Fma %42 %34 %26
%49 = OpAccessChain %48 %12 %16
OpStore %49 %44
%50 = OpAccessChain %48 %12 %19
OpStore %50 %45
%51 = OpAccessChain %48 %12 %22
OpStore %51 %46
%52 = OpAccessChain %48 %12 %25
OpStore %52 %47
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/stage-input-output.16bit-io.frag
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require

layout(location = 0) in f16vec2 A;
layout(location = 1) flat in i16vec2 B;
layout(location = 1, component = 2) flat in uint16_t C;
layout(location = 0) out f16vec2 SV_Target;
layout(location = 1) out i16vec2 SV_Target_1;
layout(location = 2) out uint16_t SV_Target_2;

void main()
{
    SV_Target.x = A.x * float16_t(8.0);
    SV_Target.y = A.y * float16_t(8.0);
    SV_Target_1.x = int16_t(uint16_t(B.x) * 65528us);
    SV_Target_1.y = int16_t(uint16_t(B.y) * 65528us);
    SV_Target_2 = C << 2us;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageInputOutput16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %12 %15 %17 %19 %21
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "A"
OpName %12 "B"
OpName %15 "C"
OpName %17 "SV_Target"
OpName %19 "SV_Target_1"
OpName %21 "SV_Target_2"
OpDecorate %8 Location 0
OpDecorate %12 Flat
OpDecorate %12 Location 1
OpDecorate %15 Flat
OpDecorate %15 Location 1
OpDecorate %15 Component 2
OpDecorate %17 Location 0
OpDecorate %19 Location 1
OpDecorate %21 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 16
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeInt 16 1
%10 = OpTypeVector %9 2
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeInt 16 0
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypePointer Output %6
%17 = OpVariable %16 Output
%18 = OpTypePointer Output %10
%19 = OpVariable %18 Output
%20 = OpTypePointer Output %13
%21 = OpVariable %20 Output
%23 = OpTypePointer Input %9
%25 = OpTypeInt 32 0
%26 = OpConstant %25 0
%30 = OpConstant %25 1
%33 = OpTypePointer Input %5
%39 = OpConstant %5 0x1p+3
%42 = OpConstant %13 65528
%45 = OpConstant %13 2
%46 = OpTypePointer Output %5
%49 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%22 = OpLoad %13 %15
%24 = OpAccessChain %23 %12 %26
%27 = OpLoad %9 %24
%28 = OpBitcast %13 %27
%29 = OpAccessChain %23 %12 %30
%31 = OpLoad %9 %29
%32 = OpBitcast %13 %31
%34 = OpAccessChain %33 %8 %26
%35 = OpLoad %5 %34
%36 = OpAccessChain %33 %8 %30
%37 = OpLoad %5 %36
%38 = OpFMul %5 %35 %39
%40 = OpFMul %5 %37 %39
%41 = OpIMul %13 %28 %42
%43 = OpIMul %13 %32 %42
%44 = OpShiftLeftLogical %13 %22 %45
%47 = OpAccessChain %46 %17 %26
OpStore %47 %38
%48 = OpAccessChain %46 %17 %30
OpStore %48 %40
%50 = OpAccessChain %49 %19 %26
%51 = OpBitcast %9 %41
OpStore %50 %51
%52 = OpAccessChain %49 %19 %30
%53 = OpBitcast %9 %43
OpStore %52 %53
OpStore %21 %44
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/stage-input-output.frag
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif

layout(location = 0) in mediump vec2 A;
layout(location = 1) flat in mediump ivec2 B;
layout(location = 1, component = 2) flat in mediump uint C;
layout(location = 0) out mediump vec2 SV_Target;
layout(location = 1) out mediump ivec2 SV_Target_1;
layout(location = 2) out mediump uint SV_Target_2;

void main()
{
    SV_Target.x = float(float16_t(A.x) * float16_t(8.0));
    SV_Target.y = float(float16_t(A.y) * float16_t(8.0));
    SV_Target_1.x = int(int16_t(uint16_t(B.x) * 65528us));
    SV_Target_1.y = int(int16_t(uint16_t(B.y) * 65528us));
    SV_Target_2 = uint(uint16_t(C) << 2us);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 63
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %12 %15 %17 %19 %21
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "A"
OpName %12 "B"
OpName %15 "C"
OpName %17 "SV_Target"
OpName %19 "SV_Target_1"
OpName %21 "SV_Target_2"
OpDecorate %8 RelaxedPrecision
OpDecorate %8 Location 0
OpDecorate %12 RelaxedPrecision
OpDecorate %12 Flat
OpDecorate %12 Location 1
OpDecorate %15 RelaxedPrecision
OpDecorate %15 Flat
OpDecorate %15 Location 1
OpDecorate %15 Component 2
OpDecorate %17 RelaxedPrecision
OpDecorate %17 Location 0
OpDecorate %19 RelaxedPrecision
OpDecorate %19 Location 1
OpDecorate %21 RelaxedPrecision
OpDecorate %21 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypeInt 32 1
%10 = OpTypeVector %9 2
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%13 = OpTypeInt 32 0
%14 = OpTypePointer Input %13
%15 = OpVariable %14 Input
%16 = OpTypePointer Output %6
%17 = OpVariable %16 Output
%18 = OpTypePointer Output %10
%19 = OpVariable %18 Output
%20 = OpTypePointer Output %13
%21 = OpVariable %20 Output
%23 = OpTypeInt 16 0
%25 = OpTypePointer Input %9
%27 = OpConstant %13 0
%31 = OpConstant %13 1
%34 = OpTypePointer Input %5
%37 = OpTypeFloat 16
%43 = OpConstant %37 0x1p+3
%46 = OpConstant %23 65528
%49 = OpConstant %23 2
%50 = OpTypePointer Output %5
%55 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %61
%61 = OpLabel
%22 = OpLoad %13 %15
%24 = OpUConvert %23 %22
%26 = OpAccessChain %25 %12 %27
%28 = OpLoad %9 %26
%29 = OpSConvert %23 %28
%30 = OpAccessChain %25 %12 %31
%32 = OpLoad %9 %30
%33 = OpSConvert %23 %32
%35 = OpAccessChain %34 %8 %27
%36 = OpLoad %5 %35
%38 = OpFConvert %37 %36
%39 = OpAccessChain %34 %8 %31
%40 = OpLoad %5 %39
%41 = OpFConvert %37 %40
%42 = OpFMul %37 %38 %43
%44 = OpFMul %37 %41 %43
%45 = OpIMul %23 %29 %46
%47 = OpIMul %23 %33 %46
%48 = OpShiftLeftLogical %23 %24 %49
%51 = OpAccessChain %50 %17 %27
%52 = OpFConvert %5 %42
OpStore %51 %52
%53 = OpAccessChain %50 %17 %31
%54 = OpFConvert %5 %44
OpStore %53 %54
%56 = OpAccessChain %55 %19 %27
%57 = OpSConvert %9 %45
OpStore %56 %57
%58 = OpAccessChain %55 %19 %31
%59 = OpSConvert %9 %47
OpStore %58 %59
%60 = OpUConvert %13 %48
OpStore %21 %60
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/stream-out.stream-out.vert
================================================
#version 460

layout(xfb_buffer = 1, xfb_stride = 32) out gl_PerVertex
{
    layout(xfb_offset = 16) vec4 gl_Position;
};

layout(location = 1, xfb_buffer = 0, xfb_stride = 32, xfb_offset = 0) out vec4 StreamOut;
layout(location = 2, xfb_buffer = 1, xfb_stride = 16, xfb_offset = 0) out vec4 StreamOut_1;

void main()
{
    gl_Position.x = 2.0;
    gl_Position.y = 2.0;
    gl_Position.z = 2.0;
    gl_Position.w = 2.0;
    StreamOut.x = 4.0;
    StreamOut.y = 4.0;
    StreamOut.z = 4.0;
    StreamOut.w = 4.0;
    StreamOut_1.x = 6.0;
    StreamOut_1.y = 6.0;
    StreamOut_1.z = 6.0;
    StreamOut_1.w = 6.0;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpCapability TransformFeedback
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %9 %10
OpExecutionMode %3 Xfb
OpName %3 "main"
OpName %8 "SV_Position"
OpName %9 "StreamOut"
OpName %10 "StreamOut_1"
OpDecorate %8 Offset 16
OpDecorate %8 XfbStride 32
OpDecorate %8 XfbBuffer 1
OpDecorate %8 BuiltIn Position
OpDecorate %9 Offset 0
OpDecorate %9 XfbStride 32
OpDecorate %9 XfbBuffer 0
OpDecorate %9 Location 1
OpDecorate %10 Offset 0
OpDecorate %10 XfbStride 16
OpDecorate %10 XfbBuffer 1
OpDecorate %10 Location 2
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpVariable %7 Output
%10 = OpVariable %7 Output
%11 = OpTypePointer Output %5
%13 = OpTypeInt 32 0
%14 = OpConstant %13 0
%15 = OpConstant %5 2
%17 = OpConstant %13 1
%19 = OpConstant %13 2
%21 = OpConstant %13 3
%23 = OpConstant %5 4
%28 = OpConstant %5 6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%12 = OpAccessChain %11 %8 %14
OpStore %12 %15
%16 = OpAccessChain %11 %8 %17
OpStore %16 %15
%18 = OpAccessChain %11 %8 %19
OpStore %18 %15
%20 = OpAccessChain %11 %8 %21
OpStore %20 %15
%22 = OpAccessChain %11 %9 %14
OpStore %22 %23
%24 = OpAccessChain %11 %9 %17
OpStore %24 %23
%25 = OpAccessChain %11 %9 %19
OpStore %25 %23
%26 = OpAccessChain %11 %9 %21
OpStore %26 %23
%27 = OpAccessChain %11 %10 %14
OpStore %27 %28
%29 = OpAccessChain %11 %10 %17
OpStore %29 %28
%30 = OpAccessChain %11 %10 %19
OpStore %30 %28
%31 = OpAccessChain %11 %10 %21
OpStore %31 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/swizzle.rt-swizzle.frag
================================================
#version 460

layout(location = 0) in vec4 TEXCOORD;
layout(location = 0) out vec4 SV_Target;
layout(location = 1) out vec4 SV_Target_1;

void main()
{
    SV_Target.y = TEXCOORD.x;
    SV_Target.z = TEXCOORD.y;
    SV_Target.w = TEXCOORD.z;
    SV_Target.x = TEXCOORD.w;
    SV_Target_1.y = TEXCOORD.x;
    SV_Target_1.x = TEXCOORD.y;
    SV_Target_1.w = TEXCOORD.z;
    SV_Target_1.z = TEXCOORD.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %3 "main" %8 %10 %11
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpName %10 "SV_Target"
OpName %11 "SV_Target_1"
OpDecorate %8 Location 0
OpDecorate %10 Location 0
OpDecorate %11 Location 1
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpVariable %9 Output
%12 = OpTypePointer Input %5
%14 = OpTypeInt 32 0
%15 = OpConstant %14 0
%18 = OpConstant %14 1
%21 = OpConstant %14 2
%24 = OpConstant %14 3
%26 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%13 = OpAccessChain %12 %8 %15
%16 = OpLoad %5 %13
%17 = OpAccessChain %12 %8 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %12 %8 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %12 %8 %24
%25 = OpLoad %5 %23
%27 = OpAccessChain %26 %10 %18
OpStore %27 %16
%28 = OpAccessChain %26 %10 %21
OpStore %28 %19
%29 = OpAccessChain %26 %10 %24
OpStore %29 %22
%30 = OpAccessChain %26 %10 %15
OpStore %30 %25
%31 = OpAccessChain %26 %11 %18
OpStore %31 %16
%32 = OpAccessChain %26 %11 %15
OpStore %32 %19
%33 = OpAccessChain %26 %11 %24
OpStore %33 %22
%34 = OpAccessChain %26 %11 %21
OpStore %34 %25
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/task-basic.task
================================================
#version 460
#extension GL_EXT_mesh_shader : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

struct _11
{
    float _m0[64][4];
};

taskPayloadSharedEXT _11 _13;

void main()
{
    float _17 = float(gl_LocalInvocationIndex);
    _13._m0[gl_LocalInvocationIndex][0u] = _17;
    _13._m0[gl_LocalInvocationIndex][1u] = _17;
    _13._m0[gl_LocalInvocationIndex][2u] = _17;
    _13._m0[gl_LocalInvocationIndex][3u] = _17;
    EmitMeshTasksEXT(2u, 3u, 4u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpMemoryModel Logical GLSL450
OpEntryPoint TaskEXT %3 "main" %13 %15
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpName %11 ""
OpDecorate %15 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 64
%7 = OpConstant %5 4
%8 = OpTypeFloat 32
%9 = OpTypeArray %8 %7
%10 = OpTypeArray %9 %6
%11 = OpTypeStruct %10
%12 = OpTypePointer TaskPayloadWorkgroupEXT %11
%13 = OpVariable %12 TaskPayloadWorkgroupEXT
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%18 = OpTypePointer TaskPayloadWorkgroupEXT %8
%20 = OpConstant %5 0
%22 = OpConstant %5 1
%24 = OpConstant %5 2
%26 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%16 = OpLoad %5 %15
%17 = OpConvertUToF %8 %16
%19 = OpAccessChain %18 %13 %20 %16 %20
OpStore %19 %17
%21 = OpAccessChain %18 %13 %20 %16 %22
OpStore %21 %17
%23 = OpAccessChain %18 %13 %20 %16 %24
OpStore %23 %17
%25 = OpAccessChain %18 %13 %20 %16 %26
OpStore %25 %17
OpEmitMeshTasksEXT %24 %26 %7 %13
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/vertex-array-input.vert
================================================
#version 460

layout(location = 0) in vec4 ATTR[4];
layout(location = 4) in float ATTR_4[4];

void main()
{
    float _46 = ATTR_4[2u] + ATTR_4[1u];
    gl_Position.x = (ATTR[3u].x + ATTR[0u].x) + _46;
    gl_Position.y = (ATTR[3u].y + ATTR[0u].y) + _46;
    gl_Position.z = (ATTR[3u].z + ATTR[0u].z) + _46;
    gl_Position.w = (ATTR[3u].w + ATTR[0u].w) + _46;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %11 %14 %16
OpName %3 "main"
OpName %11 "ATTR"
OpName %14 "ATTR_4"
OpName %16 "SV_Position"
OpDecorate %11 Location 0
OpDecorate %14 Location 4
OpDecorate %16 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 4
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Input %12
%14 = OpVariable %13 Input
%15 = OpTypePointer Output %6
%16 = OpVariable %15 Output
%17 = OpTypePointer Input %5
%19 = OpConstant %7 0
%22 = OpConstant %7 1
%25 = OpConstant %7 2
%28 = OpConstant %7 3
%51 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %56
%56 = OpLabel
%18 = OpAccessChain %17 %11 %19 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %17 %11 %19 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %17 %11 %19 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %17 %11 %19 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %17 %11 %28 %19
%31 = OpLoad %5 %30
%32 = OpAccessChain %17 %11 %28 %22
%33 = OpLoad %5 %32
%34 = OpAccessChain %17 %11 %28 %25
%35 = OpLoad %5 %34
%36 = OpAccessChain %17 %11 %28 %28
%37 = OpLoad %5 %36
%38 = OpFAdd %5 %31 %20
%39 = OpFAdd %5 %33 %23
%40 = OpFAdd %5 %35 %26
%41 = OpFAdd %5 %37 %29
%42 = OpAccessChain %17 %14 %22
%43 = OpLoad %5 %42
%44 = OpAccessChain %17 %14 %25
%45 = OpLoad %5 %44
%46 = OpFAdd %5 %45 %43
%47 = OpFAdd %5 %38 %46
%48 = OpFAdd %5 %39 %46
%49 = OpFAdd %5 %40 %46
%50 = OpFAdd %5 %41 %46
%52 = OpAccessChain %51 %16 %19
OpStore %52 %47
%53 = OpAccessChain %51 %16 %22
OpStore %53 %48
%54 = OpAccessChain %51 %16 %25
OpStore %54 %49
%55 = OpAccessChain %51 %16 %28
OpStore %55 %50
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/vertex-array-output.vert
================================================
#version 460

layout(location = 0) in vec4 POSITION;
layout(location = 1) out vec4 ATTR[4];
layout(location = 5) out float ATTR_4[4];

void main()
{
    gl_Position.x = POSITION.x;
    gl_Position.y = POSITION.y;
    gl_Position.z = POSITION.z;
    gl_Position.w = POSITION.w;
    ATTR[0u].x = POSITION.x;
    ATTR[0u].y = POSITION.y;
    ATTR[0u].z = POSITION.z;
    ATTR[0u].w = POSITION.w;
    ATTR[1u].x = POSITION.x + 1.0;
    ATTR[1u].y = POSITION.y + 1.0;
    ATTR[1u].z = POSITION.z + 1.0;
    ATTR[1u].w = POSITION.w + 1.0;
    ATTR[2u].x = POSITION.x + 2.0;
    ATTR[2u].y = POSITION.y + 2.0;
    ATTR[2u].z = POSITION.z + 2.0;
    ATTR[2u].w = POSITION.w + 2.0;
    ATTR[3u].x = POSITION.x + 3.0;
    ATTR[3u].y = POSITION.y + 3.0;
    ATTR[3u].z = POSITION.z + 3.0;
    ATTR[3u].w = POSITION.w + 3.0;
    ATTR_4[0u] = POSITION.x;
    ATTR_4[1u] = POSITION.y;
    ATTR_4[2u] = POSITION.z;
    ATTR_4[3u] = POSITION.w;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %10 %15 %18
OpName %3 "main"
OpName %8 "POSITION"
OpName %10 "SV_Position"
OpName %15 "ATTR"
OpName %18 "ATTR_4"
OpDecorate %8 Location 0
OpDecorate %10 BuiltIn Position
OpDecorate %15 Location 1
OpDecorate %18 Location 5
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Output %6
%10 = OpVariable %9 Output
%11 = OpTypeInt 32 0
%12 = OpConstant %11 4
%13 = OpTypeArray %6 %12
%14 = OpTypePointer Output %13
%15 = OpVariable %14 Output
%16 = OpTypeArray %5 %12
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpTypePointer Input %5
%21 = OpConstant %11 0
%24 = OpConstant %11 1
%27 = OpConstant %11 2
%30 = OpConstant %11 3
%33 = OpConstant %5 1
%38 = OpConstant %5 2
%43 = OpConstant %5 3
%47 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %72
%72 = OpLabel
%20 = OpAccessChain %19 %8 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %19 %8 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %19 %8 %27
%28 = OpLoad %5 %26
%29 = OpAccessChain %19 %8 %30
%31 = OpLoad %5 %29
%32 = OpFAdd %5 %22 %33
%34 = OpFAdd %5 %25 %33
%35 = OpFAdd %5 %28 %33
%36 = OpFAdd %5 %31 %33
%37 = OpFAdd %5 %22 %38
%39 = OpFAdd %5 %25 %38
%40 = OpFAdd %5 %28 %38
%41 = OpFAdd %5 %31 %38
%42 = OpFAdd %5 %22 %43
%44 = OpFAdd %5 %25 %43
%45 = OpFAdd %5 %28 %43
%46 = OpFAdd %5 %31 %43
%48 = OpAccessChain %47 %10 %21
OpStore %48 %22
%49 = OpAccessChain %47 %10 %24
OpStore %49 %25
%50 = OpAccessChain %47 %10 %27
OpStore %50 %28
%51 = OpAccessChain %47 %10 %30
OpStore %51 %31
%52 = OpAccessChain %47 %15 %21 %21
OpStore %52 %22
%53 = OpAccessChain %47 %15 %21 %24
OpStore %53 %25
%54 = OpAccessChain %47 %15 %21 %27
OpStore %54 %28
%55 = OpAccessChain %47 %15 %21 %30
OpStore %55 %31
%56 = OpAccessChain %47 %15 %24 %21
OpStore %56 %32
%57 = OpAccessChain %47 %15 %24 %24
OpStore %57 %34
%58 = OpAccessChain %47 %15 %24 %27
OpStore %58 %35
%59 = OpAccessChain %47 %15 %24 %30
OpStore %59 %36
%60 = OpAccessChain %47 %15 %27 %21
OpStore %60 %37
%61 = OpAccessChain %47 %15 %27 %24
OpStore %61 %39
%62 = OpAccessChain %47 %15 %27 %27
OpStore %62 %40
%63 = OpAccessChain %47 %15 %27 %30
OpStore %63 %41
%64 = OpAccessChain %47 %15 %30 %21
OpStore %64 %42
%65 = OpAccessChain %47 %15 %30 %24
OpStore %65 %44
%66 = OpAccessChain %47 %15 %30 %27
OpStore %66 %45
%67 = OpAccessChain %47 %15 %30 %30
OpStore %67 %46
%68 = OpAccessChain %47 %18 %21
OpStore %68 %22
%69 = OpAccessChain %47 %18 %24
OpStore %69 %25
%70 = OpAccessChain %47 %18 %27
OpStore %70 %28
%71 = OpAccessChain %47 %18 %30
OpStore %71 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/stages/vertex-input-remapping.vert
================================================
#version 460

layout(location = 2) in vec4 ATTR_2;
layout(location = 4) in float ATTR_4;

void main()
{
    gl_Position.x = ATTR_2.x + ATTR_4;
    gl_Position.y = ATTR_2.y + ATTR_4;
    gl_Position.z = ATTR_2.z + ATTR_4;
    gl_Position.w = ATTR_2.w + ATTR_4;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %10 %12
OpName %3 "main"
OpName %8 "ATTR_2"
OpName %10 "ATTR_4"
OpName %12 "SV_Position"
OpDecorate %8 Location 2
OpDecorate %10 Location 4
OpDecorate %12 BuiltIn Position
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
%9 = OpTypePointer Input %5
%10 = OpVariable %9 Input
%11 = OpTypePointer Output %6
%12 = OpVariable %11 Output
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%22 = OpConstant %15 2
%25 = OpConstant %15 3
%31 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %36
%36 = OpLabel
%13 = OpLoad %5 %10
%14 = OpAccessChain %9 %8 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %9 %8 %19
%20 = OpLoad %5 %18
%21 = OpAccessChain %9 %8 %22
%23 = OpLoad %5 %21
%24 = OpAccessChain %9 %8 %25
%26 = OpLoad %5 %24
%27 = OpFAdd %5 %17 %13
%28 = OpFAdd %5 %20 %13
%29 = OpFAdd %5 %23 %13
%30 = OpFAdd %5 %26 %13
%32 = OpAccessChain %31 %12 %16
OpStore %32 %27
%33 = OpAccessChain %31 %12 %19
OpStore %33 %28
%34 = OpAccessChain %31 %12 %22
OpStore %34 %29
%35 = OpAccessChain %31 %12 %25
OpStore %35 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-byte-address.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint16_t _m0[];
} _9;

layout(set = 0, binding = 0, std430) readonly buffer _12_14
{
    u16vec2 _m0[];
} _14;

layout(set = 0, binding = 0, scalar) readonly buffer _17_19
{
    u16vec3 _m0[];
} _19;

layout(set = 0, binding = 0, std430) readonly buffer _22_24
{
    u16vec4 _m0[];
} _24;

layout(set = 0, binding = 0, std430) readonly buffer _27_29
{
    uint _m0[];
} _29;

layout(set = 0, binding = 0, std430) readonly buffer _32_34
{
    uvec2 _m0[];
} _34;

layout(set = 0, binding = 0, scalar) readonly buffer _37_39
{
    uvec3 _m0[];
} _39;

layout(set = 0, binding = 0, std430) readonly buffer _42_44
{
    uvec4 _m0[];
} _44;

layout(set = 0, binding = 0, std430) readonly buffer _47_49
{
    double _m0[];
} _49;

layout(set = 0, binding = 0, std430) readonly buffer _52_54
{
    dvec2 _m0[];
} _54;

layout(set = 0, binding = 0, scalar) readonly buffer _57_59
{
    dvec3 _m0[];
} _59;

layout(set = 0, binding = 0, std430) readonly buffer _62_64
{
    dvec4 _m0[];
} _64;

layout(set = 0, binding = 1, std430) writeonly buffer _66_68
{
    uint16_t _m0[];
} _68;

layout(set = 0, binding = 1, std430) writeonly buffer _70_72
{
    u16vec2 _m0[];
} _72;

layout(set = 0, binding = 1, scalar) writeonly buffer _74_76
{
    u16vec3 _m0[];
} _76;

layout(set = 0, binding = 1, std430) writeonly buffer _78_80
{
    u16vec4 _m0[];
} _80;

layout(set = 0, binding = 1, std430) writeonly buffer _82_84
{
    uint _m0[];
} _84;

layout(set = 0, binding = 1, std430) writeonly buffer _86_88
{
    uvec2 _m0[];
} _88;

layout(set = 0, binding = 1, scalar) writeonly buffer _90_92
{
    uvec3 _m0[];
} _92;

layout(set = 0, binding = 1, std430) writeonly buffer _94_96
{
    uvec4 _m0[];
} _96;

layout(set = 0, binding = 1, std430) writeonly buffer _98_100
{
    double _m0[];
} _100;

layout(set = 0, binding = 1, std430) writeonly buffer _102_104
{
    dvec2 _m0[];
} _104;

layout(set = 0, binding = 1, scalar) writeonly buffer _106_108
{
    dvec3 _m0[];
} _108;

layout(set = 0, binding = 1, std430) writeonly buffer _110_112
{
    dvec4 _m0[];
} _112;

void main()
{
    uint _121 = gl_LocalInvocationIndex + gl_GlobalInvocationID.x;
    _112._m0[(_121 * 3u) + 3u] = dvec4(_64._m0[(_121 * 3u) + 3u]);
    _108._m0[(_121 * 4u) + 4u] = dvec3(_59._m0[(_121 * 4u) + 4u]);
    _104._m0[(_121 * 6u) + 6u] = dvec2(_54._m0[(_121 * 6u) + 6u]);
    _100._m0[(_121 * 12u) + 12u] = _49._m0[(_121 * 12u) + 12u];
    vec4 _180 = uintBitsToFloat(_44._m0[(_121 * 6u) + 6u]);
    _96._m0[(_121 * 6u) + 6u] = uvec4(floatBitsToUint(_180.x), floatBitsToUint(_180.y), floatBitsToUint(_180.z), floatBitsToUint(_180.w));
    vec3 _200 = uintBitsToFloat(_39._m0[(_121 * 8u) + 8u]);
    _92._m0[(_121 * 8u) + 8u] = uvec3(floatBitsToUint(_200.x), floatBitsToUint(_200.y), floatBitsToUint(_200.z));
    vec2 _217 = uintBitsToFloat(_34._m0[(_121 * 12u) + 12u]);
    _88._m0[(_121 * 12u) + 12u] = uvec2(floatBitsToUint(_217.x), floatBitsToUint(_217.y));
    _84._m0[(_121 * 24u) + 24u] = floatBitsToUint(uintBitsToFloat(_29._m0[(_121 * 24u) + 24u]));
    f16vec4 _244 = uint16BitsToFloat16(_24._m0[(_121 * 12u) + 12u]);
    _80._m0[(_121 * 12u) + 12u] = u16vec4(float16BitsToUint16(_244.x), float16BitsToUint16(_244.y), float16BitsToUint16(_244.z), float16BitsToUint16(_244.w));
    f16vec3 _264 = uint16BitsToFloat16(_19._m0[(_121 * 16u) + 16u]);
    _76._m0[(_121 * 16u) + 16u] = u16vec3(float16BitsToUint16(_264.x), float16BitsToUint16(_264.y), float16BitsToUint16(_264.z));
    f16vec2 _281 = uint16BitsToFloat16(_14._m0[(_121 * 24u) + 24u]);
    _72._m0[(_121 * 24u) + 24u] = u16vec2(float16BitsToUint16(_281.x), float16BitsToUint16(_281.y));
    _68._m0[(_121 * 48u) + 48u] = float16BitsToUint16(uint16BitsToFloat16(_9._m0[(_121 * 48u) + 48u]));
    _112._m0[6u] = dvec4(_64._m0[6u]);
    vec4 _311 = uintBitsToFloat(_44._m0[12u]);
    _96._m0[12u] = uvec4(floatBitsToUint(_311.x), floatBitsToUint(_311.y), floatBitsToUint(_311.z), floatBitsToUint(_311.w));
    f16vec4 _324 = uint16BitsToFloat16(_24._m0[24u]);
    _80._m0[24u] = u16vec4(float16BitsToUint16(_324.x), float16BitsToUint16(_324.y), float16BitsToUint16(_324.z), float16BitsToUint16(_324.w));
    _112._m0[gl_GlobalInvocationID.x * 6u] = dvec4(_64._m0[gl_GlobalInvocationID.x * 6u]);
    vec4 _350 = uintBitsToFloat(_44._m0[gl_GlobalInvocationID.x * 12u]);
    _96._m0[gl_GlobalInvocationID.x * 12u] = uvec4(floatBitsToUint(_350.x), floatBitsToUint(_350.y), floatBitsToUint(_350.z), floatBitsToUint(_350.w));
    f16vec4 _365 = uint16BitsToFloat16(_24._m0[gl_GlobalInvocationID.x * 24u]);
    _80._m0[gl_GlobalInvocationID.x * 24u] = u16vec4(float16BitsToUint16(_365.x), float16BitsToUint16(_365.y), float16BitsToUint16(_365.z), float16BitsToUint16(_365.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 379
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Float64
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %114 %119
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %17 "SSBO"
OpName %22 "SSBO"
OpName %27 "SSBO"
OpName %32 "SSBO"
OpName %37 "SSBO"
OpName %42 "SSBO"
OpName %47 "SSBO"
OpName %52 "SSBO"
OpName %57 "SSBO"
OpName %62 "SSBO"
OpName %66 "SSBO"
OpName %70 "SSBO"
OpName %74 "SSBO"
OpName %78 "SSBO"
OpName %82 "SSBO"
OpName %86 "SSBO"
OpName %90 "SSBO"
OpName %94 "SSBO"
OpName %98 "SSBO"
OpName %102 "SSBO"
OpName %106 "SSBO"
OpName %110 "SSBO"
OpDecorate %6 ArrayStride 2
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %16 ArrayStride 6
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %21 ArrayStride 8
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %26 ArrayStride 4
OpMemberDecorate %27 0 Offset 0
OpDecorate %27 Block
OpDecorate %31 ArrayStride 8
OpMemberDecorate %32 0 Offset 0
OpDecorate %32 Block
OpDecorate %36 ArrayStride 12
OpMemberDecorate %37 0 Offset 0
OpDecorate %37 Block
OpDecorate %41 ArrayStride 16
OpMemberDecorate %42 0 Offset 0
OpDecorate %42 Block
OpDecorate %46 ArrayStride 8
OpMemberDecorate %47 0 Offset 0
OpDecorate %47 Block
OpDecorate %51 ArrayStride 16
OpMemberDecorate %52 0 Offset 0
OpDecorate %52 Block
OpDecorate %56 ArrayStride 24
OpMemberDecorate %57 0 Offset 0
OpDecorate %57 Block
OpDecorate %61 ArrayStride 32
OpMemberDecorate %62 0 Offset 0
OpDecorate %62 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Aliased
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %14 NonWritable
OpDecorate %14 Aliased
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Aliased
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Aliased
OpDecorate %29 DescriptorSet 0
OpDecorate %29 Binding 0
OpDecorate %29 NonWritable
OpDecorate %29 Aliased
OpDecorate %34 DescriptorSet 0
OpDecorate %34 Binding 0
OpDecorate %34 NonWritable
OpDecorate %34 Aliased
OpDecorate %39 DescriptorSet 0
OpDecorate %39 Binding 0
OpDecorate %39 NonWritable
OpDecorate %39 Aliased
OpDecorate %44 DescriptorSet 0
OpDecorate %44 Binding 0
OpDecorate %44 NonWritable
OpDecorate %44 Aliased
OpDecorate %49 DescriptorSet 0
OpDecorate %49 Binding 0
OpDecorate %49 NonWritable
OpDecorate %49 Aliased
OpDecorate %54 DescriptorSet 0
OpDecorate %54 Binding 0
OpDecorate %54 NonWritable
OpDecorate %54 Aliased
OpDecorate %59 DescriptorSet 0
OpDecorate %59 Binding 0
OpDecorate %59 NonWritable
OpDecorate %59 Aliased
OpDecorate %64 DescriptorSet 0
OpDecorate %64 Binding 0
OpDecorate %64 NonWritable
OpDecorate %64 Aliased
OpDecorate %65 ArrayStride 2
OpMemberDecorate %66 0 Offset 0
OpDecorate %66 Block
OpDecorate %69 ArrayStride 4
OpMemberDecorate %70 0 Offset 0
OpDecorate %70 Block
OpDecorate %73 ArrayStride 6
OpMemberDecorate %74 0 Offset 0
OpDecorate %74 Block
OpDecorate %77 ArrayStride 8
OpMemberDecorate %78 0 Offset 0
OpDecorate %78 Block
OpDecorate %81 ArrayStride 4
OpMemberDecorate %82 0 Offset 0
OpDecorate %82 Block
OpDecorate %85 ArrayStride 8
OpMemberDecorate %86 0 Offset 0
OpDecorate %86 Block
OpDecorate %89 ArrayStride 12
OpMemberDecorate %90 0 Offset 0
OpDecorate %90 Block
OpDecorate %93 ArrayStride 16
OpMemberDecorate %94 0 Offset 0
OpDecorate %94 Block
OpDecorate %97 ArrayStride 8
OpMemberDecorate %98 0 Offset 0
OpDecorate %98 Block
OpDecorate %101 ArrayStride 16
OpMemberDecorate %102 0 Offset 0
OpDecorate %102 Block
OpDecorate %105 ArrayStride 24
OpMemberDecorate %106 0 Offset 0
OpDecorate %106 Block
OpDecorate %109 ArrayStride 32
OpMemberDecorate %110 0 Offset 0
OpDecorate %110 Block
OpDecorate %68 DescriptorSet 0
OpDecorate %68 Binding 1
OpDecorate %68 NonReadable
OpDecorate %68 Aliased
OpDecorate %72 DescriptorSet 0
OpDecorate %72 Binding 1
OpDecorate %72 NonReadable
OpDecorate %72 Aliased
OpDecorate %76 DescriptorSet 0
OpDecorate %76 Binding 1
OpDecorate %76 NonReadable
OpDecorate %76 Aliased
OpDecorate %80 DescriptorSet 0
OpDecorate %80 Binding 1
OpDecorate %80 NonReadable
OpDecorate %80 Aliased
OpDecorate %84 DescriptorSet 0
OpDecorate %84 Binding 1
OpDecorate %84 NonReadable
OpDecorate %84 Aliased
OpDecorate %88 DescriptorSet 0
OpDecorate %88 Binding 1
OpDecorate %88 NonReadable
OpDecorate %88 Aliased
OpDecorate %92 DescriptorSet 0
OpDecorate %92 Binding 1
OpDecorate %92 NonReadable
OpDecorate %92 Aliased
OpDecorate %96 DescriptorSet 0
OpDecorate %96 Binding 1
OpDecorate %96 NonReadable
OpDecorate %96 Aliased
OpDecorate %100 DescriptorSet 0
OpDecorate %100 Binding 1
OpDecorate %100 NonReadable
OpDecorate %100 Aliased
OpDecorate %104 DescriptorSet 0
OpDecorate %104 Binding 1
OpDecorate %104 NonReadable
OpDecorate %104 Aliased
OpDecorate %108 DescriptorSet 0
OpDecorate %108 Binding 1
OpDecorate %108 NonReadable
OpDecorate %108 Aliased
OpDecorate %112 DescriptorSet 0
OpDecorate %112 Binding 1
OpDecorate %112 NonReadable
OpDecorate %112 Aliased
OpDecorate %114 BuiltIn GlobalInvocationId
OpDecorate %119 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 16 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 2
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 3
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeVector %5 4
%21 = OpTypeRuntimeArray %20
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeInt 32 0
%26 = OpTypeRuntimeArray %25
%27 = OpTypeStruct %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeVector %25 2
%31 = OpTypeRuntimeArray %30
%32 = OpTypeStruct %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeVector %25 3
%36 = OpTypeRuntimeArray %35
%37 = OpTypeStruct %36
%38 = OpTypePointer StorageBuffer %37
%39 = OpVariable %38 StorageBuffer
%40 = OpTypeVector %25 4
%41 = OpTypeRuntimeArray %40
%42 = OpTypeStruct %41
%43 = OpTypePointer StorageBuffer %42
%44 = OpVariable %43 StorageBuffer
%45 = OpTypeFloat 64
%46 = OpTypeRuntimeArray %45
%47 = OpTypeStruct %46
%48 = OpTypePointer StorageBuffer %47
%49 = OpVariable %48 StorageBuffer
%50 = OpTypeVector %45 2
%51 = OpTypeRuntimeArray %50
%52 = OpTypeStruct %51
%53 = OpTypePointer StorageBuffer %52
%54 = OpVariable %53 StorageBuffer
%55 = OpTypeVector %45 3
%56 = OpTypeRuntimeArray %55
%57 = OpTypeStruct %56
%58 = OpTypePointer StorageBuffer %57
%59 = OpVariable %58 StorageBuffer
%60 = OpTypeVector %45 4
%61 = OpTypeRuntimeArray %60
%62 = OpTypeStruct %61
%63 = OpTypePointer StorageBuffer %62
%64 = OpVariable %63 StorageBuffer
%65 = OpTypeRuntimeArray %5
%66 = OpTypeStruct %65
%67 = OpTypePointer StorageBuffer %66
%68 = OpVariable %67 StorageBuffer
%69 = OpTypeRuntimeArray %10
%70 = OpTypeStruct %69
%71 = OpTypePointer StorageBuffer %70
%72 = OpVariable %71 StorageBuffer
%73 = OpTypeRuntimeArray %15
%74 = OpTypeStruct %73
%75 = OpTypePointer StorageBuffer %74
%76 = OpVariable %75 StorageBuffer
%77 = OpTypeRuntimeArray %20
%78 = OpTypeStruct %77
%79 = OpTypePointer StorageBuffer %78
%80 = OpVariable %79 StorageBuffer
%81 = OpTypeRuntimeArray %25
%82 = OpTypeStruct %81
%83 = OpTypePointer StorageBuffer %82
%84 = OpVariable %83 StorageBuffer
%85 = OpTypeRuntimeArray %30
%86 = OpTypeStruct %85
%87 = OpTypePointer StorageBuffer %86
%88 = OpVariable %87 StorageBuffer
%89 = OpTypeRuntimeArray %35
%90 = OpTypeStruct %89
%91 = OpTypePointer StorageBuffer %90
%92 = OpVariable %91 StorageBuffer
%93 = OpTypeRuntimeArray %40
%94 = OpTypeStruct %93
%95 = OpTypePointer StorageBuffer %94
%96 = OpVariable %95 StorageBuffer
%97 = OpTypeRuntimeArray %45
%98 = OpTypeStruct %97
%99 = OpTypePointer StorageBuffer %98
%100 = OpVariable %99 StorageBuffer
%101 = OpTypeRuntimeArray %50
%102 = OpTypeStruct %101
%103 = OpTypePointer StorageBuffer %102
%104 = OpVariable %103 StorageBuffer
%105 = OpTypeRuntimeArray %55
%106 = OpTypeStruct %105
%107 = OpTypePointer StorageBuffer %106
%108 = OpVariable %107 StorageBuffer
%109 = OpTypeRuntimeArray %60
%110 = OpTypeStruct %109
%111 = OpTypePointer StorageBuffer %110
%112 = OpVariable %111 StorageBuffer
%113 = OpTypePointer Input %35
%114 = OpVariable %113 Input
%115 = OpTypePointer Input %25
%117 = OpConstant %25 0
%119 = OpVariable %115 Input
%123 = OpConstant %25 96
%126 = OpConstant %25 3
%128 = OpTypePointer StorageBuffer %60
%140 = OpConstant %25 4
%142 = OpTypePointer StorageBuffer %55
%153 = OpConstant %25 6
%155 = OpTypePointer StorageBuffer %50
%165 = OpConstant %25 12
%167 = OpTypePointer StorageBuffer %45
%175 = OpTypePointer StorageBuffer %40
%178 = OpTypeFloat 32
%179 = OpTypeVector %178 4
%194 = OpConstant %25 8
%196 = OpTypePointer StorageBuffer %35
%199 = OpTypeVector %178 3
%213 = OpTypePointer StorageBuffer %30
%216 = OpTypeVector %178 2
%227 = OpConstant %25 24
%229 = OpTypePointer StorageBuffer %25
%239 = OpTypePointer StorageBuffer %20
%242 = OpTypeFloat 16
%243 = OpTypeVector %242 4
%258 = OpConstant %25 16
%260 = OpTypePointer StorageBuffer %15
%263 = OpTypeVector %242 3
%277 = OpTypePointer StorageBuffer %10
%280 = OpTypeVector %242 2
%291 = OpConstant %25 48
%293 = OpTypePointer StorageBuffer %5
%336 = OpConstant %25 192
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %377
%377 = OpLabel
%116 = OpAccessChain %115 %114 %117
%118 = OpLoad %25 %116
%120 = OpLoad %25 %119
%121 = OpIAdd %25 %120 %118
%122 = OpIMul %25 %121 %123
%124 = OpIAdd %25 %122 %123
%125 = OpIMul %25 %121 %126
%127 = OpIAdd %25 %125 %126
%129 = OpAccessChain %128 %64 %117 %127
%130 = OpLoad %60 %129
%131 = OpCompositeExtract %45 %130 0
%132 = OpCompositeExtract %45 %130 1
%133 = OpCompositeExtract %45 %130 2
%134 = OpCompositeExtract %45 %130 3
%135 = OpIMul %25 %121 %126
%136 = OpIAdd %25 %135 %126
%137 = OpCompositeConstruct %60 %131 %132 %133 %134
%138 = OpAccessChain %128 %112 %117 %136
OpStore %138 %137
%139 = OpIMul %25 %121 %140
%141 = OpIAdd %25 %139 %140
%143 = OpAccessChain %142 %59 %117 %141
%144 = OpLoad %55 %143
%145 = OpCompositeExtract %45 %144 0
%146 = OpCompositeExtract %45 %144 1
%147 = OpCompositeExtract %45 %144 2
%148 = OpIMul %25 %121 %140
%149 = OpIAdd %25 %148 %140
%150 = OpCompositeConstruct %55 %145 %146 %147
%151 = OpAccessChain %142 %108 %117 %149
OpStore %151 %150
%152 = OpIMul %25 %121 %153
%154 = OpIAdd %25 %152 %153
%156 = OpAccessChain %155 %54 %117 %154
%157 = OpLoad %50 %156
%158 = OpCompositeExtract %45 %157 0
%159 = OpCompositeExtract %45 %157 1
%160 = OpIMul %25 %121 %153
%161 = OpIAdd %25 %160 %153
%162 = OpCompositeConstruct %50 %158 %159
%163 = OpAccessChain %155 %104 %117 %161
OpStore %163 %162
%164 = OpIMul %25 %121 %165
%166 = OpIAdd %25 %164 %165
%168 = OpAccessChain %167 %49 %117 %166
%169 = OpLoad %45 %168
%170 = OpIMul %25 %121 %165
%171 = OpIAdd %25 %170 %165
%172 = OpAccessChain %167 %100 %117 %171
OpStore %172 %169
%173 = OpIMul %25 %121 %153
%174 = OpIAdd %25 %173 %153
%176 = OpAccessChain %175 %44 %117 %174
%177 = OpLoad %40 %176
%180 = OpBitcast %179 %177
%181 = OpCompositeExtract %178 %180 0
%182 = OpCompositeExtract %178 %180 1
%183 = OpCompositeExtract %178 %180 2
%184 = OpCompositeExtract %178 %180 3
%185 = OpIMul %25 %121 %153
%186 = OpIAdd %25 %185 %153
%187 = OpBitcast %25 %181
%188 = OpBitcast %25 %182
%189 = OpBitcast %25 %183
%190 = OpBitcast %25 %184
%191 = OpCompositeConstruct %40 %187 %188 %189 %190
%192 = OpAccessChain %175 %96 %117 %186
OpStore %192 %191
%193 = OpIMul %25 %121 %194
%195 = OpIAdd %25 %193 %194
%197 = OpAccessChain %196 %39 %117 %195
%198 = OpLoad %35 %197
%200 = OpBitcast %199 %198
%201 = OpCompositeExtract %178 %200 0
%202 = OpCompositeExtract %178 %200 1
%203 = OpCompositeExtract %178 %200 2
%204 = OpIMul %25 %121 %194
%205 = OpIAdd %25 %204 %194
%206 = OpBitcast %25 %201
%207 = OpBitcast %25 %202
%208 = OpBitcast %25 %203
%209 = OpCompositeConstruct %35 %206 %207 %208
%210 = OpAccessChain %196 %92 %117 %205
OpStore %210 %209
%211 = OpIMul %25 %121 %165
%212 = OpIAdd %25 %211 %165
%214 = OpAccessChain %213 %34 %117 %212
%215 = OpLoad %30 %214
%217 = OpBitcast %216 %215
%218 = OpCompositeExtract %178 %217 0
%219 = OpCompositeExtract %178 %217 1
%220 = OpIMul %25 %121 %165
%221 = OpIAdd %25 %220 %165
%222 = OpBitcast %25 %218
%223 = OpBitcast %25 %219
%224 = OpCompositeConstruct %30 %222 %223
%225 = OpAccessChain %213 %88 %117 %221
OpStore %225 %224
%226 = OpIMul %25 %121 %227
%228 = OpIAdd %25 %226 %227
%230 = OpAccessChain %229 %29 %117 %228
%231 = OpLoad %25 %230
%232 = OpBitcast %178 %231
%233 = OpIMul %25 %121 %227
%234 = OpIAdd %25 %233 %227
%235 = OpBitcast %25 %232
%236 = OpAccessChain %229 %84 %117 %234
OpStore %236 %235
%237 = OpIMul %25 %121 %165
%238 = OpIAdd %25 %237 %165
%240 = OpAccessChain %239 %24 %117 %238
%241 = OpLoad %20 %240
%244 = OpBitcast %243 %241
%245 = OpCompositeExtract %242 %244 0
%246 = OpCompositeExtract %242 %244 1
%247 = OpCompositeExtract %242 %244 2
%248 = OpCompositeExtract %242 %244 3
%249 = OpIMul %25 %121 %165
%250 = OpIAdd %25 %249 %165
%251 = OpBitcast %5 %245
%252 = OpBitcast %5 %246
%253 = OpBitcast %5 %247
%254 = OpBitcast %5 %248
%255 = OpCompositeConstruct %20 %251 %252 %253 %254
%256 = OpAccessChain %239 %80 %117 %250
OpStore %256 %255
%257 = OpIMul %25 %121 %258
%259 = OpIAdd %25 %257 %258
%261 = OpAccessChain %260 %19 %117 %259
%262 = OpLoad %15 %261
%264 = OpBitcast %263 %262
%265 = OpCompositeExtract %242 %264 0
%266 = OpCompositeExtract %242 %264 1
%267 = OpCompositeExtract %242 %264 2
%268 = OpIMul %25 %121 %258
%269 = OpIAdd %25 %268 %258
%270 = OpBitcast %5 %265
%271 = OpBitcast %5 %266
%272 = OpBitcast %5 %267
%273 = OpCompositeConstruct %15 %270 %271 %272
%274 = OpAccessChain %260 %76 %117 %269
OpStore %274 %273
%275 = OpIMul %25 %121 %227
%276 = OpIAdd %25 %275 %227
%278 = OpAccessChain %277 %14 %117 %276
%279 = OpLoad %10 %278
%281 = OpBitcast %280 %279
%282 = OpCompositeExtract %242 %281 0
%283 = OpCompositeExtract %242 %281 1
%284 = OpIMul %25 %121 %227
%285 = OpIAdd %25 %284 %227
%286 = OpBitcast %5 %282
%287 = OpBitcast %5 %283
%288 = OpCompositeConstruct %10 %286 %287
%289 = OpAccessChain %277 %72 %117 %285
OpStore %289 %288
%290 = OpIMul %25 %121 %291
%292 = OpIAdd %25 %290 %291
%294 = OpAccessChain %293 %9 %117 %292
%295 = OpLoad %5 %294
%296 = OpBitcast %242 %295
%297 = OpIMul %25 %121 %291
%298 = OpIAdd %25 %297 %291
%299 = OpBitcast %5 %296
%300 = OpAccessChain %293 %68 %117 %298
OpStore %300 %299
%301 = OpAccessChain %128 %64 %117 %153
%302 = OpLoad %60 %301
%303 = OpCompositeExtract %45 %302 0
%304 = OpCompositeExtract %45 %302 1
%305 = OpCompositeExtract %45 %302 2
%306 = OpCompositeExtract %45 %302 3
%307 = OpCompositeConstruct %60 %303 %304 %305 %306
%308 = OpAccessChain %128 %112 %117 %153
OpStore %308 %307
%309 = OpAccessChain %175 %44 %117 %165
%310 = OpLoad %40 %309
%311 = OpBitcast %179 %310
%312 = OpCompositeExtract %178 %311 0
%313 = OpCompositeExtract %178 %311 1
%314 = OpCompositeExtract %178 %311 2
%315 = OpCompositeExtract %178 %311 3
%316 = OpBitcast %25 %312
%317 = OpBitcast %25 %313
%318 = OpBitcast %25 %314
%319 = OpBitcast %25 %315
%320 = OpCompositeConstruct %40 %316 %317 %318 %319
%321 = OpAccessChain %175 %96 %117 %165
OpStore %321 %320
%322 = OpAccessChain %239 %24 %117 %227
%323 = OpLoad %20 %322
%324 = OpBitcast %243 %323
%325 = OpCompositeExtract %242 %324 0
%326 = OpCompositeExtract %242 %324 1
%327 = OpCompositeExtract %242 %324 2
%328 = OpCompositeExtract %242 %324 3
%329 = OpBitcast %5 %325
%330 = OpBitcast %5 %326
%331 = OpBitcast %5 %327
%332 = OpBitcast %5 %328
%333 = OpCompositeConstruct %20 %329 %330 %331 %332
%334 = OpAccessChain %239 %80 %117 %227
OpStore %334 %333
%335 = OpIMul %25 %118 %336
%337 = OpIMul %25 %118 %153
%338 = OpAccessChain %128 %64 %117 %337
%339 = OpLoad %60 %338
%340 = OpCompositeExtract %45 %339 0
%341 = OpCompositeExtract %45 %339 1
%342 = OpCompositeExtract %45 %339 2
%343 = OpCompositeExtract %45 %339 3
%344 = OpIMul %25 %118 %153
%345 = OpCompositeConstruct %60 %340 %341 %342 %343
%346 = OpAccessChain %128 %112 %117 %344
OpStore %346 %345
%347 = OpIMul %25 %118 %165
%348 = OpAccessChain %175 %44 %117 %347
%349 = OpLoad %40 %348
%350 = OpBitcast %179 %349
%351 = OpCompositeExtract %178 %350 0
%352 = OpCompositeExtract %178 %350 1
%353 = OpCompositeExtract %178 %350 2
%354 = OpCompositeExtract %178 %350 3
%355 = OpIMul %25 %118 %165
%356 = OpBitcast %25 %351
%357 = OpBitcast %25 %352
%358 = OpBitcast %25 %353
%359 = OpBitcast %25 %354
%360 = OpCompositeConstruct %40 %356 %357 %358 %359
%361 = OpAccessChain %175 %96 %117 %355
OpStore %361 %360
%362 = OpIMul %25 %118 %227
%363 = OpAccessChain %239 %24 %117 %362
%364 = OpLoad %20 %363
%365 = OpBitcast %243 %364
%366 = OpCompositeExtract %242 %365 0
%367 = OpCompositeExtract %242 %365 1
%368 = OpCompositeExtract %242 %365 2
%369 = OpCompositeExtract %242 %365 3
%370 = OpIMul %25 %118 %227
%371 = OpBitcast %5 %366
%372 = OpBitcast %5 %367
%373 = OpBitcast %5 %368
%374 = OpBitcast %5 %369
%375 = OpCompositeConstruct %20 %371 %372 %373 %374
%376 = OpAccessChain %239 %80 %117 %370
OpStore %376 %375
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-composite-2.ssbo.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uvec4 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    uvec4 _m0[];
} _14;

void main()
{
    vec4 _37 = uintBitsToFloat(_10._m0[((gl_GlobalInvocationID.x * 4u) + gl_LocalInvocationIndex) + 1u]);
    _14._m0[(gl_GlobalInvocationID.x * 4u) + gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_37.x), floatBitsToUint(_37.y), floatBitsToUint(_37.z), floatBitsToUint(_37.w));
    vec4 _54 = uintBitsToFloat(_10._m0[(gl_GlobalInvocationID.x * 4u) + gl_LocalInvocationIndex]);
    _14._m0[((gl_GlobalInvocationID.x * 4u) + gl_LocalInvocationIndex) + 1u] = uvec4(floatBitsToUint(_54.x), floatBitsToUint(_54.y), floatBitsToUint(_54.z), floatBitsToUint(_54.w));
    vec4 _80 = uintBitsToFloat(_10._m0[((gl_GlobalInvocationID.x * 4u) + (3u * gl_LocalInvocationIndex)) + 4294967295u]);
    _14._m0[((gl_GlobalInvocationID.x * 4u) + (2u * gl_LocalInvocationIndex)) + 1u] = uvec4(floatBitsToUint(_80.x), floatBitsToUint(_80.y), floatBitsToUint(_80.z), floatBitsToUint(_80.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 101
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17 %22
OpExecutionMode %3 LocalSize 2 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %17 BuiltIn GlobalInvocationId
OpDecorate %22 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %5
%20 = OpConstant %5 0
%22 = OpVariable %18 Input
%25 = OpConstant %5 4
%27 = OpConstant %5 16
%29 = OpConstant %5 1
%32 = OpTypePointer StorageBuffer %6
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%69 = OpConstant %5 48
%71 = OpConstant %5 4294967280
%74 = OpConstant %5 3
%75 = OpConstant %5 4294967295
%86 = OpConstant %5 5
%90 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %99
%99 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%23 = OpLoad %5 %22
%24 = OpShiftLeftLogical %5 %23 %25
%26 = OpIAdd %5 %24 %27
%28 = OpIMul %5 %21 %25
%30 = OpIAdd %5 %28 %23
%31 = OpIAdd %5 %30 %29
%33 = OpAccessChain %32 %10 %20 %31
%34 = OpLoad %6 %33
%37 = OpBitcast %36 %34
%38 = OpCompositeExtract %35 %37 0
%39 = OpCompositeExtract %35 %37 1
%40 = OpCompositeExtract %35 %37 2
%41 = OpCompositeExtract %35 %37 3
%42 = OpIMul %5 %21 %25
%43 = OpIAdd %5 %42 %23
%44 = OpBitcast %5 %38
%45 = OpBitcast %5 %39
%46 = OpBitcast %5 %40
%47 = OpBitcast %5 %41
%48 = OpCompositeConstruct %6 %44 %45 %46 %47
%49 = OpAccessChain %32 %14 %20 %43
OpStore %49 %48
%50 = OpIMul %5 %21 %25
%51 = OpIAdd %5 %50 %23
%52 = OpAccessChain %32 %10 %20 %51
%53 = OpLoad %6 %52
%54 = OpBitcast %36 %53
%55 = OpCompositeExtract %35 %54 0
%56 = OpCompositeExtract %35 %54 1
%57 = OpCompositeExtract %35 %54 2
%58 = OpCompositeExtract %35 %54 3
%59 = OpIMul %5 %21 %25
%60 = OpIAdd %5 %59 %23
%61 = OpIAdd %5 %60 %29
%62 = OpBitcast %5 %55
%63 = OpBitcast %5 %56
%64 = OpBitcast %5 %57
%65 = OpBitcast %5 %58
%66 = OpCompositeConstruct %6 %62 %63 %64 %65
%67 = OpAccessChain %32 %14 %20 %61
OpStore %67 %66
%68 = OpIMul %5 %23 %69
%70 = OpIAdd %5 %68 %71
%72 = OpIMul %5 %21 %25
%73 = OpIMul %5 %74 %23
%76 = OpIAdd %5 %72 %73
%77 = OpIAdd %5 %76 %75
%78 = OpAccessChain %32 %10 %20 %77
%79 = OpLoad %6 %78
%80 = OpBitcast %36 %79
%81 = OpCompositeExtract %35 %80 0
%82 = OpCompositeExtract %35 %80 1
%83 = OpCompositeExtract %35 %80 2
%84 = OpCompositeExtract %35 %80 3
%85 = OpShiftLeftLogical %5 %23 %86
%87 = OpBitwiseOr %5 %85 %27
%88 = OpIMul %5 %21 %25
%89 = OpIMul %5 %90 %23
%91 = OpIAdd %5 %88 %89
%92 = OpIAdd %5 %91 %29
%93 = OpBitcast %5 %81
%94 = OpBitcast %5 %82
%95 = OpBitcast %5 %83
%96 = OpBitcast %5 %84
%97 = OpCompositeConstruct %6 %93 %94 %95 %96
%98 = OpAccessChain %32 %14 %20 %92
OpStore %98 %97
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-composite.ssbo.comp
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) readonly buffer SSBO
{
    uvec3 _m0[];
} _10;

layout(set = 0, binding = 1, scalar) writeonly buffer _12_14
{
    uvec3 _m0[];
} _14;

void main()
{
    vec3 _28 = uintBitsToFloat(_10._m0[gl_GlobalInvocationID.x * 2u]);
    _14._m0[gl_GlobalInvocationID.x * 2u] = uvec3(floatBitsToUint(_28.x), floatBitsToUint(_28.y), floatBitsToUint(_28.z));
    vec3 _43 = uintBitsToFloat(_10._m0[(gl_GlobalInvocationID.x * 2u) + 1u]);
    _14._m0[(gl_GlobalInvocationID.x * 2u) + 1u] = uvec3(floatBitsToUint(_43.x), floatBitsToUint(_43.y), floatBitsToUint(_43.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 12
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 12
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 3
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypePointer Input %6
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%22 = OpConstant %5 2
%23 = OpTypePointer StorageBuffer %6
%26 = OpTypeFloat 32
%27 = OpTypeVector %26 3
%39 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
%54 = OpLabel
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpIMul %5 %20 %22
%24 = OpAccessChain %23 %10 %19 %21
%25 = OpLoad %6 %24
%28 = OpBitcast %27 %25
%29 = OpCompositeExtract %26 %28 0
%30 = OpCompositeExtract %26 %28 1
%31 = OpCompositeExtract %26 %28 2
%32 = OpIMul %5 %20 %22
%33 = OpBitcast %5 %29
%34 = OpBitcast %5 %30
%35 = OpBitcast %5 %31
%36 = OpCompositeConstruct %6 %33 %34 %35
%37 = OpAccessChain %23 %14 %19 %32
OpStore %37 %36
%38 = OpIMul %5 %20 %22
%40 = OpIAdd %5 %38 %39
%41 = OpAccessChain %23 %10 %19 %40
%42 = OpLoad %6 %41
%43 = OpBitcast %27 %42
%44 = OpCompositeExtract %26 %43 0
%45 = OpCompositeExtract %26 %43 1
%46 = OpCompositeExtract %26 %43 2
%47 = OpIMul %5 %20 %22
%48 = OpIAdd %5 %47 %39
%49 = OpBitcast %5 %44
%50 = OpBitcast %5 %45
%51 = OpBitcast %5 %46
%52 = OpCompositeConstruct %6 %49 %50 %51
%53 = OpAccessChain %23 %14 %19 %48
OpStore %53 %52
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-composite.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _18[];

layout(set = 4, binding = 0, std430) writeonly buffer _20_23
{
    uint _m0[];
} _23[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _30 = registers._m4 + 1u;
    uvec2 _38 = _13._m0[subgroupBroadcastFirst(_30)] >> uvec2(2u);
    uvec2 _48 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(2u);
    uint _55 = gl_GlobalInvocationID.x * 6u;
    uint _62 = (_55 < _48.y) ? (_55 + _48.x) : 1073741820u;
    uint _66 = _18[registers._m4]._m0[_62];
    uint _69 = _18[registers._m4]._m0[_62 + 1u];
    uint _72 = _18[registers._m4]._m0[_62 + 2u];
    vec3 _76 = uintBitsToFloat(uvec3(_66, _69, _72));
    uint _80 = gl_GlobalInvocationID.x * 6u;
    uint _85 = (_80 < _38.y) ? (_80 + _38.x) : 1073741820u;
    _23[_30]._m0[_85] = floatBitsToUint(_76.x);
    _23[_30]._m0[_85 + 1u] = floatBitsToUint(_76.y);
    _23[_30]._m0[_85 + 2u] = floatBitsToUint(_76.z);
    uint _95 = (gl_GlobalInvocationID.x * 6u) + 3u;
    uint _100 = (_95 < _48.y) ? (_95 + _48.x) : 1073741820u;
    uint _102 = _18[registers._m4]._m0[_100];
    uint _105 = _18[registers._m4]._m0[_100 + 1u];
    uint _108 = _18[registers._m4]._m0[_100 + 2u];
    vec3 _110 = uintBitsToFloat(uvec3(_102, _105, _108));
    uint _115 = (gl_GlobalInvocationID.x * 6u) + 3u;
    uint _120 = (_115 < _38.y) ? (_115 + _38.x) : 1073741820u;
    _23[_30]._m0[_120] = floatBitsToUint(_110.x);
    _23[_30]._m0[_120 + 1u] = floatBitsToUint(_110.y);
    _23[_30]._m0[_120 + 2u] = floatBitsToUint(_110.z);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 131
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %51
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %15 "SSBO"
OpName %20 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 4
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 4
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %23 DescriptorSet 4
OpDecorate %23 Binding 0
OpDecorate %23 NonReadable
OpDecorate %51 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %5
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypePointer StorageBuffer %20
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 4
%31 = OpConstant %5 1
%33 = OpConstant %5 3
%34 = OpTypePointer StorageBuffer %9
%36 = OpConstant %5 0
%39 = OpConstant %5 2
%40 = OpConstantComposite %9 %39 %39
%41 = OpTypePointer StorageBuffer %15
%49 = OpTypeVector %5 3
%50 = OpTypePointer Input %49
%51 = OpVariable %50 Input
%52 = OpTypePointer Input %5
%56 = OpConstant %5 6
%60 = OpTypeBool
%63 = OpConstant %5 1073741820
%64 = OpTypePointer StorageBuffer %5
%74 = OpTypeFloat 32
%75 = OpTypeVector %74 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %129
%129 = OpLabel
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %31
%25 = OpAccessChain %24 %23 %30
%32 = OpGroupNonUniformBroadcastFirst %5 %33 %30
%35 = OpAccessChain %34 %13 %36 %32
%37 = OpLoad %9 %35
%38 = OpShiftRightLogical %9 %37 %40
%43 = OpAccessChain %26 %8 %28
%44 = OpLoad %5 %43
%42 = OpAccessChain %41 %18 %44
%45 = OpGroupNonUniformBroadcastFirst %5 %33 %44
%46 = OpAccessChain %34 %13 %36 %45
%47 = OpLoad %9 %46
%48 = OpShiftRightLogical %9 %47 %40
%53 = OpAccessChain %52 %51 %36
%54 = OpLoad %5 %53
%55 = OpIMul %5 %54 %56
%57 = OpCompositeExtract %5 %48 0
%58 = OpCompositeExtract %5 %48 1
%59 = OpIAdd %5 %55 %57
%61 = OpULessThan %60 %55 %58
%62 = OpSelect %5 %61 %59 %63
%65 = OpAccessChain %64 %42 %36 %62
%66 = OpLoad %5 %65
%68 = OpIAdd %5 %62 %31
%67 = OpAccessChain %64 %42 %36 %68
%69 = OpLoad %5 %67
%71 = OpIAdd %5 %62 %39
%70 = OpAccessChain %64 %42 %36 %71
%72 = OpLoad %5 %70
%73 = OpCompositeConstruct %49 %66 %69 %72
%76 = OpBitcast %75 %73
%77 = OpCompositeExtract %74 %76 0
%78 = OpCompositeExtract %74 %76 1
%79 = OpCompositeExtract %74 %76 2
%80 = OpIMul %5 %54 %56
%81 = OpCompositeExtract %5 %38 0
%82 = OpCompositeExtract %5 %38 1
%83 = OpIAdd %5 %80 %81
%84 = OpULessThan %60 %80 %82
%85 = OpSelect %5 %84 %83 %63
%86 = OpBitcast %5 %77
%87 = OpBitcast %5 %78
%88 = OpBitcast %5 %79
%89 = OpAccessChain %64 %25 %36 %85
OpStore %89 %86
%91 = OpIAdd %5 %85 %31
%90 = OpAccessChain %64 %25 %36 %91
OpStore %90 %87
%93 = OpIAdd %5 %85 %39
%92 = OpAccessChain %64 %25 %36 %93
OpStore %92 %88
%94 = OpIMul %5 %54 %56
%95 = OpIAdd %5 %94 %33
%96 = OpCompositeExtract %5 %48 0
%97 = OpCompositeExtract %5 %48 1
%98 = OpIAdd %5 %95 %96
%99 = OpULessThan %60 %95 %97
%100 = OpSelect %5 %99 %98 %63
%101 = OpAccessChain %64 %42 %36 %100
%102 = OpLoad %5 %101
%104 = OpIAdd %5 %100 %31
%103 = OpAccessChain %64 %42 %36 %104
%105 = OpLoad %5 %103
%107 = OpIAdd %5 %100 %39
%106 = OpAccessChain %64 %42 %36 %107
%108 = OpLoad %5 %106
%109 = OpCompositeConstruct %49 %102 %105 %108
%110 = OpBitcast %75 %109
%111 = OpCompositeExtract %74 %110 0
%112 = OpCompositeExtract %74 %110 1
%113 = OpCompositeExtract %74 %110 2
%114 = OpIMul %5 %54 %56
%115 = OpIAdd %5 %114 %33
%116 = OpCompositeExtract %5 %38 0
%117 = OpCompositeExtract %5 %38 1
%118 = OpIAdd %5 %115 %116
%119 = OpULessThan %60 %115 %117
%120 = OpSelect %5 %119 %118 %63
%121 = OpBitcast %5 %111
%122 = OpBitcast %5 %112
%123 = OpBitcast %5 %113
%124 = OpAccessChain %64 %25 %36 %120
OpStore %124 %121
%126 = OpIAdd %5 %120 %31
%125 = OpAccessChain %64 %25 %36 %126
OpStore %125 %122
%128 = OpIAdd %5 %120 %39
%127 = OpAccessChain %64 %25 %36 %128
OpStore %127 %123
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-double2.ssbo.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    dvec2 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    dvec2 _m0[];
} _14;

void main()
{
    _14._m0[gl_GlobalInvocationID.x] = dvec2(_10._m0[gl_GlobalInvocationID.x]);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 64
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%23 = OpTypePointer StorageBuffer %6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%24 = OpAccessChain %23 %10 %21 %22
%25 = OpLoad %6 %24
%26 = OpCompositeExtract %5 %25 0
%27 = OpCompositeExtract %5 %25 1
%28 = OpCompositeConstruct %6 %26 %27
%29 = OpAccessChain %23 %14 %21 %22
OpStore %29 %28
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-double2.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    dvec2 _m0[];
} _20[];

layout(set = 4, binding = 0, std430) writeonly buffer _22_25
{
    dvec2 _m0[];
} _25[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _32 = registers._m4 + 1u;
    uvec2 _40 = _13._m0[subgroupBroadcastFirst(_32)] >> uvec2(4u);
    uvec2 _49 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(4u);
    _25[_32]._m0[(gl_GlobalInvocationID.x < _40.y) ? (gl_GlobalInvocationID.x + _40.x) : 268435455u] = dvec2(_20[registers._m4]._m0[(gl_GlobalInvocationID.x < _49.y) ? (gl_GlobalInvocationID.x + _49.x) : 268435455u]);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability GroupNonUniformBallot
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %52
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %17 "SSBO"
OpName %22 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 DescriptorSet 4
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %21 ArrayStride 16
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %25 DescriptorSet 4
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %52 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 64
%15 = OpTypeVector %14 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypeRuntimeArray %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %15
%22 = OpTypeStruct %21
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypePointer StorageBuffer %22
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 4
%33 = OpConstant %5 1
%35 = OpConstant %5 3
%36 = OpTypePointer StorageBuffer %9
%38 = OpConstant %5 0
%41 = OpConstantComposite %9 %30 %30
%42 = OpTypePointer StorageBuffer %17
%50 = OpTypeVector %5 3
%51 = OpTypePointer Input %50
%52 = OpVariable %51 Input
%53 = OpTypePointer Input %5
%59 = OpTypeBool
%62 = OpConstant %5 268435455
%63 = OpTypePointer StorageBuffer %15
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%32 = OpIAdd %5 %31 %33
%27 = OpAccessChain %26 %25 %32
%34 = OpGroupNonUniformBroadcastFirst %5 %35 %32
%37 = OpAccessChain %36 %13 %38 %34
%39 = OpLoad %9 %37
%40 = OpShiftRightLogical %9 %39 %41
%44 = OpAccessChain %28 %8 %30
%45 = OpLoad %5 %44
%43 = OpAccessChain %42 %20 %45
%46 = OpGroupNonUniformBroadcastFirst %5 %35 %45
%47 = OpAccessChain %36 %13 %38 %46
%48 = OpLoad %9 %47
%49 = OpShiftRightLogical %9 %48 %41
%54 = OpAccessChain %53 %52 %38
%55 = OpLoad %5 %54
%56 = OpCompositeExtract %5 %49 0
%57 = OpCompositeExtract %5 %49 1
%58 = OpIAdd %5 %55 %56
%60 = OpULessThan %59 %55 %57
%61 = OpSelect %5 %60 %58 %62
%64 = OpAccessChain %63 %43 %38 %61
%65 = OpLoad %15 %64
%66 = OpCompositeExtract %14 %65 0
%67 = OpCompositeExtract %14 %65 1
%68 = OpCompositeExtract %5 %40 0
%69 = OpCompositeExtract %5 %40 1
%70 = OpIAdd %5 %55 %68
%71 = OpULessThan %59 %55 %69
%72 = OpSelect %5 %71 %70 %62
%73 = OpCompositeConstruct %15 %66 %67
%74 = OpAccessChain %63 %27 %38 %72
OpStore %74 %73
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-double3.ssbo.comp
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) readonly buffer SSBO
{
    dvec3 _m0[];
} _10;

layout(set = 0, binding = 1, scalar) writeonly buffer _12_14
{
    dvec3 _m0[];
} _14;

void main()
{
    _14._m0[gl_GlobalInvocationID.x] = dvec3(_10._m0[gl_GlobalInvocationID.x]);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 24
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 24
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 64
%6 = OpTypeVector %5 3
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%23 = OpTypePointer StorageBuffer %6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %31
%31 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%24 = OpAccessChain %23 %10 %21 %22
%25 = OpLoad %6 %24
%26 = OpCompositeExtract %5 %25 0
%27 = OpCompositeExtract %5 %25 1
%28 = OpCompositeExtract %5 %25 2
%29 = OpCompositeConstruct %6 %26 %27 %28
%30 = OpAccessChain %23 %14 %21 %22
OpStore %30 %29
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-double3.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    double _m0[];
} _19[];

layout(set = 4, binding = 0, std430) writeonly buffer _21_24
{
    double _m0[];
} _24[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _31 = registers._m4 + 1u;
    uvec2 _39 = _13._m0[subgroupBroadcastFirst(_31)] >> uvec2(3u);
    uvec2 _48 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(3u);
    uint _55 = gl_GlobalInvocationID.x * 3u;
    uint _61 = (_55 < _48.y) ? (_55 + _48.x) : 536870908u;
    double _65 = _19[registers._m4]._m0[_61];
    double _68 = _19[registers._m4]._m0[_61 + 1u];
    double _72 = _19[registers._m4]._m0[_61 + 2u];
    dvec3 _74 = dvec3(_65, _68, _72);
    uint _78 = gl_GlobalInvocationID.x * 3u;
    uint _83 = (_78 < _39.y) ? (_78 + _39.x) : 536870908u;
    _24[_31]._m0[_83] = _74.x;
    _24[_31]._m0[_83 + 1u] = _74.y;
    _24[_31]._m0[_83 + 2u] = _74.z;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability GroupNonUniformBallot
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %51
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 8
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 4
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %20 ArrayStride 8
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 4
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
OpDecorate %51 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeFloat 64
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %14
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypePointer StorageBuffer %21
%27 = OpTypePointer PushConstant %5
%29 = OpConstant %5 4
%32 = OpConstant %5 1
%34 = OpConstant %5 3
%35 = OpTypePointer StorageBuffer %9
%37 = OpConstant %5 0
%40 = OpConstantComposite %9 %34 %34
%41 = OpTypePointer StorageBuffer %16
%49 = OpTypeVector %5 3
%50 = OpTypePointer Input %49
%51 = OpVariable %50 Input
%52 = OpTypePointer Input %5
%59 = OpTypeBool
%62 = OpConstant %5 536870908
%63 = OpTypePointer StorageBuffer %14
%71 = OpConstant %5 2
%73 = OpTypeVector %14 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %89
%89 = OpLabel
%28 = OpAccessChain %27 %8 %29
%30 = OpLoad %5 %28
%31 = OpIAdd %5 %30 %32
%26 = OpAccessChain %25 %24 %31
%33 = OpGroupNonUniformBroadcastFirst %5 %34 %31
%36 = OpAccessChain %35 %13 %37 %33
%38 = OpLoad %9 %36
%39 = OpShiftRightLogical %9 %38 %40
%43 = OpAccessChain %27 %8 %29
%44 = OpLoad %5 %43
%42 = OpAccessChain %41 %19 %44
%45 = OpGroupNonUniformBroadcastFirst %5 %34 %44
%46 = OpAccessChain %35 %13 %37 %45
%47 = OpLoad %9 %46
%48 = OpShiftRightLogical %9 %47 %40
%53 = OpAccessChain %52 %51 %37
%54 = OpLoad %5 %53
%55 = OpIMul %5 %54 %34
%56 = OpCompositeExtract %5 %48 0
%57 = OpCompositeExtract %5 %48 1
%58 = OpIAdd %5 %55 %56
%60 = OpULessThan %59 %55 %57
%61 = OpSelect %5 %60 %58 %62
%64 = OpAccessChain %63 %42 %37 %61
%65 = OpLoad %14 %64
%67 = OpIAdd %5 %61 %32
%66 = OpAccessChain %63 %42 %37 %67
%68 = OpLoad %14 %66
%70 = OpIAdd %5 %61 %71
%69 = OpAccessChain %63 %42 %37 %70
%72 = OpLoad %14 %69
%74 = OpCompositeConstruct %73 %65 %68 %72
%75 = OpCompositeExtract %14 %74 0
%76 = OpCompositeExtract %14 %74 1
%77 = OpCompositeExtract %14 %74 2
%78 = OpIMul %5 %54 %34
%79 = OpCompositeExtract %5 %39 0
%80 = OpCompositeExtract %5 %39 1
%81 = OpIAdd %5 %78 %79
%82 = OpULessThan %59 %78 %80
%83 = OpSelect %5 %82 %81 %62
%84 = OpAccessChain %63 %26 %37 %83
OpStore %84 %75
%86 = OpIAdd %5 %83 %32
%85 = OpAccessChain %63 %26 %37 %86
OpStore %85 %76
%88 = OpIAdd %5 %83 %71
%87 = OpAccessChain %63 %26 %37 %88
OpStore %87 %77
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-double4.ssbo.comp
================================================
#version 460
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    dvec4 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    dvec4 _m0[];
} _14;

void main()
{
    _14._m0[gl_GlobalInvocationID.x] = dvec4(_10._m0[gl_GlobalInvocationID.x]);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
OpCapability Shader
OpCapability Float64
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 64
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 32
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 32
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 64
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%23 = OpTypePointer StorageBuffer %6
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %32
%32 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%24 = OpAccessChain %23 %10 %21 %22
%25 = OpLoad %6 %24
%26 = OpCompositeExtract %5 %25 0
%27 = OpCompositeExtract %5 %25 1
%28 = OpCompositeExtract %5 %25 2
%29 = OpCompositeExtract %5 %25 3
%30 = OpCompositeConstruct %6 %26 %27 %28 %29
%31 = OpAccessChain %23 %14 %21 %22
OpStore %31 %30
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-float2.ssbo.comp
================================================
#version 460
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    uvec2 _m0[];
} _14;

void main()
{
    vec2 _27 = uintBitsToFloat(_10._m0[gl_GlobalInvocationID.x]);
    _14._m0[gl_GlobalInvocationID.x] = uvec2(floatBitsToUint(_27.x), floatBitsToUint(_27.y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %17 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %5
%20 = OpConstant %5 0
%22 = OpTypePointer StorageBuffer %6
%25 = OpTypeFloat 32
%26 = OpTypeVector %25 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %34
%34 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%23 = OpAccessChain %22 %10 %20 %21
%24 = OpLoad %6 %23
%27 = OpBitcast %26 %24
%28 = OpCompositeExtract %25 %27 0
%29 = OpCompositeExtract %25 %27 1
%30 = OpBitcast %5 %28
%31 = OpBitcast %5 %29
%32 = OpCompositeConstruct %6 %30 %31
%33 = OpAccessChain %22 %14 %20 %21
OpStore %33 %32
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-float2.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    uvec2 _m0[];
} _18[];

layout(set = 4, binding = 0, std430) writeonly buffer _20_23
{
    uvec2 _m0[];
} _23[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _30 = registers._m4 + 1u;
    uvec2 _38 = _13._m0[subgroupBroadcastFirst(_30)] >> uvec2(3u);
    uvec2 _47 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(3u);
    vec2 _65 = uintBitsToFloat(_18[registers._m4]._m0[(gl_GlobalInvocationID.x < _47.y) ? (gl_GlobalInvocationID.x + _47.x) : 536870911u]);
    _23[_30]._m0[(gl_GlobalInvocationID.x < _38.y) ? (gl_GlobalInvocationID.x + _38.x) : 536870911u] = uvec2(floatBitsToUint(_65.x), floatBitsToUint(_65.y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %50
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %15 "SSBO"
OpName %20 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 8
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 4
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %19 ArrayStride 8
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %23 DescriptorSet 4
OpDecorate %23 Binding 0
OpDecorate %23 NonReadable
OpDecorate %50 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %9
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %9
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypePointer StorageBuffer %20
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 4
%31 = OpConstant %5 1
%33 = OpConstant %5 3
%34 = OpTypePointer StorageBuffer %9
%36 = OpConstant %5 0
%39 = OpConstantComposite %9 %33 %33
%40 = OpTypePointer StorageBuffer %15
%48 = OpTypeVector %5 3
%49 = OpTypePointer Input %48
%50 = OpVariable %49 Input
%51 = OpTypePointer Input %5
%57 = OpTypeBool
%60 = OpConstant %5 536870911
%63 = OpTypeFloat 32
%64 = OpTypeVector %63 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %77
%77 = OpLabel
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %31
%25 = OpAccessChain %24 %23 %30
%32 = OpGroupNonUniformBroadcastFirst %5 %33 %30
%35 = OpAccessChain %34 %13 %36 %32
%37 = OpLoad %9 %35
%38 = OpShiftRightLogical %9 %37 %39
%42 = OpAccessChain %26 %8 %28
%43 = OpLoad %5 %42
%41 = OpAccessChain %40 %18 %43
%44 = OpGroupNonUniformBroadcastFirst %5 %33 %43
%45 = OpAccessChain %34 %13 %36 %44
%46 = OpLoad %9 %45
%47 = OpShiftRightLogical %9 %46 %39
%52 = OpAccessChain %51 %50 %36
%53 = OpLoad %5 %52
%54 = OpCompositeExtract %5 %47 0
%55 = OpCompositeExtract %5 %47 1
%56 = OpIAdd %5 %53 %54
%58 = OpULessThan %57 %53 %55
%59 = OpSelect %5 %58 %56 %60
%61 = OpAccessChain %34 %41 %36 %59
%62 = OpLoad %9 %61
%65 = OpBitcast %64 %62
%66 = OpCompositeExtract %63 %65 0
%67 = OpCompositeExtract %63 %65 1
%68 = OpCompositeExtract %5 %38 0
%69 = OpCompositeExtract %5 %38 1
%70 = OpIAdd %5 %53 %68
%71 = OpULessThan %57 %53 %69
%72 = OpSelect %5 %71 %70 %60
%73 = OpBitcast %5 %66
%74 = OpBitcast %5 %67
%75 = OpCompositeConstruct %9 %73 %74
%76 = OpAccessChain %34 %25 %36 %72
OpStore %76 %75
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-float2x2.ssbo.comp
================================================
#version 460
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uvec4 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    uvec4 _m0[];
} _14;

void main()
{
    vec4 _27 = uintBitsToFloat(_10._m0[gl_GlobalInvocationID.x]);
    _14._m0[gl_GlobalInvocationID.x] = uvec4(floatBitsToUint(_27.x), floatBitsToUint(_27.y), floatBitsToUint(_27.z), floatBitsToUint(_27.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %17 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %5
%20 = OpConstant %5 0
%22 = OpTypePointer StorageBuffer %6
%25 = OpTypeFloat 32
%26 = OpTypeVector %25 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %38
%38 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%23 = OpAccessChain %22 %10 %20 %21
%24 = OpLoad %6 %23
%27 = OpBitcast %26 %24
%28 = OpCompositeExtract %25 %27 0
%29 = OpCompositeExtract %25 %27 1
%30 = OpCompositeExtract %25 %27 2
%31 = OpCompositeExtract %25 %27 3
%32 = OpBitcast %5 %28
%33 = OpBitcast %5 %29
%34 = OpBitcast %5 %30
%35 = OpBitcast %5 %31
%36 = OpCompositeConstruct %6 %32 %33 %34 %35
%37 = OpAccessChain %22 %14 %20 %21
OpStore %37 %36
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-float3.ssbo.comp
================================================
#version 460
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) readonly buffer SSBO
{
    uvec3 _m0[];
} _10;

layout(set = 0, binding = 1, scalar) writeonly buffer _12_14
{
    uvec3 _m0[];
} _14;

void main()
{
    vec3 _26 = uintBitsToFloat(_10._m0[gl_GlobalInvocationID.x]);
    _14._m0[gl_GlobalInvocationID.x] = uvec3(floatBitsToUint(_26.x), floatBitsToUint(_26.y), floatBitsToUint(_26.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %16
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 12
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 12
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %16 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 3
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypePointer Input %6
%16 = OpVariable %15 Input
%17 = OpTypePointer Input %5
%19 = OpConstant %5 0
%21 = OpTypePointer StorageBuffer %6
%24 = OpTypeFloat 32
%25 = OpTypeVector %24 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%22 = OpAccessChain %21 %10 %19 %20
%23 = OpLoad %6 %22
%26 = OpBitcast %25 %23
%27 = OpCompositeExtract %24 %26 0
%28 = OpCompositeExtract %24 %26 1
%29 = OpCompositeExtract %24 %26 2
%30 = OpBitcast %5 %27
%31 = OpBitcast %5 %28
%32 = OpBitcast %5 %29
%33 = OpCompositeConstruct %6 %30 %31 %32
%34 = OpAccessChain %21 %14 %19 %20
OpStore %34 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-float3.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _18[];

layout(set = 4, binding = 0, std430) writeonly buffer _20_23
{
    uint _m0[];
} _23[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _30 = registers._m4 + 1u;
    uvec2 _38 = _13._m0[subgroupBroadcastFirst(_30)] >> uvec2(2u);
    uvec2 _48 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(2u);
    uint _55 = gl_GlobalInvocationID.x * 3u;
    uint _61 = (_55 < _48.y) ? (_55 + _48.x) : 1073741820u;
    uint _65 = _18[registers._m4]._m0[_61];
    uint _68 = _18[registers._m4]._m0[_61 + 1u];
    uint _71 = _18[registers._m4]._m0[_61 + 2u];
    vec3 _75 = uintBitsToFloat(uvec3(_65, _68, _71));
    uint _79 = gl_GlobalInvocationID.x * 3u;
    uint _84 = (_79 < _38.y) ? (_79 + _38.x) : 1073741820u;
    _23[_30]._m0[_84] = floatBitsToUint(_75.x);
    _23[_30]._m0[_84 + 1u] = floatBitsToUint(_75.y);
    _23[_30]._m0[_84 + 2u] = floatBitsToUint(_75.z);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %51
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %15 "SSBO"
OpName %20 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %14 ArrayStride 4
OpMemberDecorate %15 0 Offset 0
OpDecorate %15 Block
OpDecorate %18 DescriptorSet 4
OpDecorate %18 Binding 0
OpDecorate %18 NonWritable
OpDecorate %19 ArrayStride 4
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %23 DescriptorSet 4
OpDecorate %23 Binding 0
OpDecorate %23 NonReadable
OpDecorate %51 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeRuntimeArray %5
%15 = OpTypeStruct %14
%16 = OpTypeRuntimeArray %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %5
%20 = OpTypeStruct %19
%21 = OpTypeRuntimeArray %20
%22 = OpTypePointer StorageBuffer %21
%23 = OpVariable %22 StorageBuffer
%24 = OpTypePointer StorageBuffer %20
%26 = OpTypePointer PushConstant %5
%28 = OpConstant %5 4
%31 = OpConstant %5 1
%33 = OpConstant %5 3
%34 = OpTypePointer StorageBuffer %9
%36 = OpConstant %5 0
%39 = OpConstant %5 2
%40 = OpConstantComposite %9 %39 %39
%41 = OpTypePointer StorageBuffer %15
%49 = OpTypeVector %5 3
%50 = OpTypePointer Input %49
%51 = OpVariable %50 Input
%52 = OpTypePointer Input %5
%59 = OpTypeBool
%62 = OpConstant %5 1073741820
%63 = OpTypePointer StorageBuffer %5
%73 = OpTypeFloat 32
%74 = OpTypeVector %73 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %93
%93 = OpLabel
%27 = OpAccessChain %26 %8 %28
%29 = OpLoad %5 %27
%30 = OpIAdd %5 %29 %31
%25 = OpAccessChain %24 %23 %30
%32 = OpGroupNonUniformBroadcastFirst %5 %33 %30
%35 = OpAccessChain %34 %13 %36 %32
%37 = OpLoad %9 %35
%38 = OpShiftRightLogical %9 %37 %40
%43 = OpAccessChain %26 %8 %28
%44 = OpLoad %5 %43
%42 = OpAccessChain %41 %18 %44
%45 = OpGroupNonUniformBroadcastFirst %5 %33 %44
%46 = OpAccessChain %34 %13 %36 %45
%47 = OpLoad %9 %46
%48 = OpShiftRightLogical %9 %47 %40
%53 = OpAccessChain %52 %51 %36
%54 = OpLoad %5 %53
%55 = OpIMul %5 %54 %33
%56 = OpCompositeExtract %5 %48 0
%57 = OpCompositeExtract %5 %48 1
%58 = OpIAdd %5 %55 %56
%60 = OpULessThan %59 %55 %57
%61 = OpSelect %5 %60 %58 %62
%64 = OpAccessChain %63 %42 %36 %61
%65 = OpLoad %5 %64
%67 = OpIAdd %5 %61 %31
%66 = OpAccessChain %63 %42 %36 %67
%68 = OpLoad %5 %66
%70 = OpIAdd %5 %61 %39
%69 = OpAccessChain %63 %42 %36 %70
%71 = OpLoad %5 %69
%72 = OpCompositeConstruct %49 %65 %68 %71
%75 = OpBitcast %74 %72
%76 = OpCompositeExtract %73 %75 0
%77 = OpCompositeExtract %73 %75 1
%78 = OpCompositeExtract %73 %75 2
%79 = OpIMul %5 %54 %33
%80 = OpCompositeExtract %5 %38 0
%81 = OpCompositeExtract %5 %38 1
%82 = OpIAdd %5 %79 %80
%83 = OpULessThan %59 %79 %81
%84 = OpSelect %5 %83 %82 %62
%85 = OpBitcast %5 %76
%86 = OpBitcast %5 %77
%87 = OpBitcast %5 %78
%88 = OpAccessChain %63 %25 %36 %84
OpStore %88 %85
%90 = OpIAdd %5 %84 %31
%89 = OpAccessChain %63 %25 %36 %90
OpStore %89 %86
%92 = OpIAdd %5 %84 %39
%91 = OpAccessChain %63 %25 %36 %92
OpStore %91 %87
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-float4x4.ssbo.comp
================================================
#version 460
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uvec4 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    uvec4 _m0[];
} _14;

void main()
{
    vec4 _29 = uintBitsToFloat(_10._m0[gl_GlobalInvocationID.x * 4u]);
    uvec4 _38 = _10._m0[(gl_GlobalInvocationID.x * 4u) + 1u];
    vec4 _39 = uintBitsToFloat(_38);
    uvec4 _48 = _10._m0[(gl_GlobalInvocationID.x * 4u) + 2u];
    vec4 _49 = uintBitsToFloat(_48);
    uvec4 _58 = _10._m0[(gl_GlobalInvocationID.x * 4u) + 3u];
    vec4 _59 = uintBitsToFloat(_58);
    _14._m0[gl_GlobalInvocationID.x * 4u] = uvec4(floatBitsToUint(_29.x), floatBitsToUint(_29.y), floatBitsToUint(_29.z), floatBitsToUint(_29.w));
    _14._m0[(gl_GlobalInvocationID.x * 4u) + 1u] = uvec4(floatBitsToUint(_39.x), floatBitsToUint(_39.y), floatBitsToUint(_39.z), floatBitsToUint(_39.w));
    _14._m0[(gl_GlobalInvocationID.x * 4u) + 2u] = uvec4(floatBitsToUint(_49.x), floatBitsToUint(_49.y), floatBitsToUint(_49.z), floatBitsToUint(_49.w));
    _14._m0[(gl_GlobalInvocationID.x * 4u) + 3u] = uvec4(floatBitsToUint(_59.x), floatBitsToUint(_59.y), floatBitsToUint(_59.z), floatBitsToUint(_59.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 97
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %17
OpExecutionMode %3 LocalSize 128 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %17 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeVector %5 3
%16 = OpTypePointer Input %15
%17 = OpVariable %16 Input
%18 = OpTypePointer Input %5
%20 = OpConstant %5 0
%23 = OpConstant %5 4
%24 = OpTypePointer StorageBuffer %6
%27 = OpTypeFloat 32
%28 = OpTypeVector %27 4
%35 = OpConstant %5 1
%45 = OpConstant %5 2
%55 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %95
%95 = OpLabel
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpIMul %5 %21 %23
%25 = OpAccessChain %24 %10 %20 %22
%26 = OpLoad %6 %25
%29 = OpBitcast %28 %26
%30 = OpCompositeExtract %27 %29 0
%31 = OpCompositeExtract %27 %29 1
%32 = OpCompositeExtract %27 %29 2
%33 = OpCompositeExtract %27 %29 3
%34 = OpIMul %5 %21 %23
%36 = OpIAdd %5 %34 %35
%37 = OpAccessChain %24 %10 %20 %36
%38 = OpLoad %6 %37
%39 = OpBitcast %28 %38
%40 = OpCompositeExtract %27 %39 0
%41 = OpCompositeExtract %27 %39 1
%42 = OpCompositeExtract %27 %39 2
%43 = OpCompositeExtract %27 %39 3
%44 = OpIMul %5 %21 %23
%46 = OpIAdd %5 %44 %45
%47 = OpAccessChain %24 %10 %20 %46
%48 = OpLoad %6 %47
%49 = OpBitcast %28 %48
%50 = OpCompositeExtract %27 %49 0
%51 = OpCompositeExtract %27 %49 1
%52 = OpCompositeExtract %27 %49 2
%53 = OpCompositeExtract %27 %49 3
%54 = OpIMul %5 %21 %23
%56 = OpIAdd %5 %54 %55
%57 = OpAccessChain %24 %10 %20 %56
%58 = OpLoad %6 %57
%59 = OpBitcast %28 %58
%60 = OpCompositeExtract %27 %59 0
%61 = OpCompositeExtract %27 %59 1
%62 = OpCompositeExtract %27 %59 2
%63 = OpCompositeExtract %27 %59 3
%64 = OpIMul %5 %21 %23
%65 = OpBitcast %5 %30
%66 = OpBitcast %5 %31
%67 = OpBitcast %5 %32
%68 = OpBitcast %5 %33
%69 = OpCompositeConstruct %6 %65 %66 %67 %68
%70 = OpAccessChain %24 %14 %20 %64
OpStore %70 %69
%71 = OpIMul %5 %21 %23
%72 = OpIAdd %5 %71 %35
%73 = OpBitcast %5 %40
%74 = OpBitcast %5 %41
%75 = OpBitcast %5 %42
%76 = OpBitcast %5 %43
%77 = OpCompositeConstruct %6 %73 %74 %75 %76
%78 = OpAccessChain %24 %14 %20 %72
OpStore %78 %77
%79 = OpIMul %5 %21 %23
%80 = OpIAdd %5 %79 %45
%81 = OpBitcast %5 %50
%82 = OpBitcast %5 %51
%83 = OpBitcast %5 %52
%84 = OpBitcast %5 %53
%85 = OpCompositeConstruct %6 %81 %82 %83 %84
%86 = OpAccessChain %24 %14 %20 %80
OpStore %86 %85
%87 = OpIMul %5 %21 %23
%88 = OpIAdd %5 %87 %55
%89 = OpBitcast %5 %60
%90 = OpBitcast %5 %61
%91 = OpBitcast %5 %62
%92 = OpBitcast %5 %63
%93 = OpCompositeConstruct %6 %89 %90 %91 %92
%94 = OpAccessChain %24 %14 %20 %88
OpStore %94 %93
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-half2.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    u16vec2 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    u16vec2 _m0[];
} _14;

void main()
{
    f16vec2 _28 = uint16BitsToFloat16(_10._m0[gl_GlobalInvocationID.x]);
    _14._m0[gl_GlobalInvocationID.x] = u16vec2(float16BitsToUint16(_28.x), float16BitsToUint16(_28.y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 4
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 4
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 16 0
%6 = OpTypeVector %5 2
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%23 = OpTypePointer StorageBuffer %6
%26 = OpTypeFloat 16
%27 = OpTypeVector %26 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %35
%35 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%24 = OpAccessChain %23 %10 %21 %22
%25 = OpLoad %6 %24
%28 = OpBitcast %27 %25
%29 = OpCompositeExtract %26 %28 0
%30 = OpCompositeExtract %26 %28 1
%31 = OpBitcast %5 %29
%32 = OpBitcast %5 %30
%33 = OpCompositeConstruct %6 %31 %32
%34 = OpAccessChain %23 %14 %21 %22
OpStore %34 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    u16vec2 _m0[];
} _20[];

layout(set = 4, binding = 0, std430) writeonly buffer _22_25
{
    u16vec2 _m0[];
} _25[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _32 = registers._m4 + 1u;
    uvec2 _40 = _13._m0[subgroupBroadcastFirst(_32)] >> uvec2(2u);
    uvec2 _50 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(2u);
    f16vec2 _69 = uint16BitsToFloat16(_20[registers._m4]._m0[(gl_GlobalInvocationID.x < _50.y) ? (gl_GlobalInvocationID.x + _50.x) : 1073741823u]);
    _25[_32]._m0[(gl_GlobalInvocationID.x < _40.y) ? (gl_GlobalInvocationID.x + _40.x) : 1073741823u] = u16vec2(float16BitsToUint16(_69.x), float16BitsToUint16(_69.y));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 83
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %53
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %17 "SSBO"
OpName %22 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 4
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 DescriptorSet 4
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %21 ArrayStride 4
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %25 DescriptorSet 4
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %53 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeVector %14 2
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypeRuntimeArray %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %15
%22 = OpTypeStruct %21
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypePointer StorageBuffer %22
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 4
%33 = OpConstant %5 1
%35 = OpConstant %5 3
%36 = OpTypePointer StorageBuffer %9
%38 = OpConstant %5 0
%41 = OpConstant %5 2
%42 = OpConstantComposite %9 %41 %41
%43 = OpTypePointer StorageBuffer %17
%51 = OpTypeVector %5 3
%52 = OpTypePointer Input %51
%53 = OpVariable %52 Input
%54 = OpTypePointer Input %5
%60 = OpTypeBool
%63 = OpConstant %5 1073741823
%64 = OpTypePointer StorageBuffer %15
%67 = OpTypeFloat 16
%68 = OpTypeVector %67 2
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %81
%81 = OpLabel
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%32 = OpIAdd %5 %31 %33
%27 = OpAccessChain %26 %25 %32
%34 = OpGroupNonUniformBroadcastFirst %5 %35 %32
%37 = OpAccessChain %36 %13 %38 %34
%39 = OpLoad %9 %37
%40 = OpShiftRightLogical %9 %39 %42
%45 = OpAccessChain %28 %8 %30
%46 = OpLoad %5 %45
%44 = OpAccessChain %43 %20 %46
%47 = OpGroupNonUniformBroadcastFirst %5 %35 %46
%48 = OpAccessChain %36 %13 %38 %47
%49 = OpLoad %9 %48
%50 = OpShiftRightLogical %9 %49 %42
%55 = OpAccessChain %54 %53 %38
%56 = OpLoad %5 %55
%57 = OpCompositeExtract %5 %50 0
%58 = OpCompositeExtract %5 %50 1
%59 = OpIAdd %5 %56 %57
%61 = OpULessThan %60 %56 %58
%62 = OpSelect %5 %61 %59 %63
%65 = OpAccessChain %64 %44 %38 %62
%66 = OpLoad %15 %65
%69 = OpBitcast %68 %66
%70 = OpCompositeExtract %67 %69 0
%71 = OpCompositeExtract %67 %69 1
%72 = OpCompositeExtract %5 %40 0
%73 = OpCompositeExtract %5 %40 1
%74 = OpIAdd %5 %56 %72
%75 = OpULessThan %60 %56 %73
%76 = OpSelect %5 %75 %74 %63
%77 = OpBitcast %14 %70
%78 = OpBitcast %14 %71
%79 = OpCompositeConstruct %15 %77 %78
%80 = OpAccessChain %64 %27 %38 %76
OpStore %80 %79
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-half3.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, scalar) readonly buffer SSBO
{
    u16vec3 _m0[];
} _10;

layout(set = 0, binding = 1, scalar) writeonly buffer _12_14
{
    u16vec3 _m0[];
} _14;

void main()
{
    f16vec3 _28 = uint16BitsToFloat16(_10._m0[gl_GlobalInvocationID.x]);
    _14._m0[gl_GlobalInvocationID.x] = u16vec3(float16BitsToUint16(_28.x), float16BitsToUint16(_28.y), float16BitsToUint16(_28.z));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 6
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 6
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 16 0
%6 = OpTypeVector %5 3
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%23 = OpTypePointer StorageBuffer %6
%26 = OpTypeFloat 16
%27 = OpTypeVector %26 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%24 = OpAccessChain %23 %10 %21 %22
%25 = OpLoad %6 %24
%28 = OpBitcast %27 %25
%29 = OpCompositeExtract %26 %28 0
%30 = OpCompositeExtract %26 %28 1
%31 = OpCompositeExtract %26 %28 2
%32 = OpBitcast %5 %29
%33 = OpBitcast %5 %30
%34 = OpBitcast %5 %31
%35 = OpCompositeConstruct %6 %32 %33 %34
%36 = OpAccessChain %23 %14 %21 %22
OpStore %36 %35
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    uint16_t _m0[];
} _19[];

layout(set = 4, binding = 0, std430) writeonly buffer _21_24
{
    uint16_t _m0[];
} _24[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _31 = registers._m4 + 1u;
    uvec2 _39 = _13._m0[subgroupBroadcastFirst(_31)] >> uvec2(1u);
    uvec2 _48 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(1u);
    uint _55 = gl_GlobalInvocationID.x * 3u;
    uint _61 = (_55 < _48.y) ? (_55 + _48.x) : 2147483644u;
    uint16_t _65 = _19[registers._m4]._m0[_61];
    uint16_t _68 = _19[registers._m4]._m0[_61 + 1u];
    uint16_t _72 = _19[registers._m4]._m0[_61 + 2u];
    f16vec3 _77 = uint16BitsToFloat16(u16vec3(_65, _68, _72));
    uint _81 = gl_GlobalInvocationID.x * 3u;
    uint _86 = (_81 < _39.y) ? (_81 + _39.x) : 2147483644u;
    _24[_31]._m0[_86] = float16BitsToUint16(_77.x);
    _24[_31]._m0[_86 + 1u] = float16BitsToUint16(_77.y);
    _24[_31]._m0[_86 + 2u] = float16BitsToUint16(_77.z);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 97
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %51
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 2
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 4
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %20 ArrayStride 2
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 4
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
OpDecorate %51 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %14
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypePointer StorageBuffer %21
%27 = OpTypePointer PushConstant %5
%29 = OpConstant %5 4
%32 = OpConstant %5 1
%34 = OpConstant %5 3
%35 = OpTypePointer StorageBuffer %9
%37 = OpConstant %5 0
%40 = OpConstantComposite %9 %32 %32
%41 = OpTypePointer StorageBuffer %16
%49 = OpTypeVector %5 3
%50 = OpTypePointer Input %49
%51 = OpVariable %50 Input
%52 = OpTypePointer Input %5
%59 = OpTypeBool
%62 = OpConstant %5 2147483644
%63 = OpTypePointer StorageBuffer %14
%71 = OpConstant %5 2
%73 = OpTypeVector %14 3
%75 = OpTypeFloat 16
%76 = OpTypeVector %75 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %95
%95 = OpLabel
%28 = OpAccessChain %27 %8 %29
%30 = OpLoad %5 %28
%31 = OpIAdd %5 %30 %32
%26 = OpAccessChain %25 %24 %31
%33 = OpGroupNonUniformBroadcastFirst %5 %34 %31
%36 = OpAccessChain %35 %13 %37 %33
%38 = OpLoad %9 %36
%39 = OpShiftRightLogical %9 %38 %40
%43 = OpAccessChain %27 %8 %29
%44 = OpLoad %5 %43
%42 = OpAccessChain %41 %19 %44
%45 = OpGroupNonUniformBroadcastFirst %5 %34 %44
%46 = OpAccessChain %35 %13 %37 %45
%47 = OpLoad %9 %46
%48 = OpShiftRightLogical %9 %47 %40
%53 = OpAccessChain %52 %51 %37
%54 = OpLoad %5 %53
%55 = OpIMul %5 %54 %34
%56 = OpCompositeExtract %5 %48 0
%57 = OpCompositeExtract %5 %48 1
%58 = OpIAdd %5 %55 %56
%60 = OpULessThan %59 %55 %57
%61 = OpSelect %5 %60 %58 %62
%64 = OpAccessChain %63 %42 %37 %61
%65 = OpLoad %14 %64
%67 = OpIAdd %5 %61 %32
%66 = OpAccessChain %63 %42 %37 %67
%68 = OpLoad %14 %66
%70 = OpIAdd %5 %61 %71
%69 = OpAccessChain %63 %42 %37 %70
%72 = OpLoad %14 %69
%74 = OpCompositeConstruct %73 %65 %68 %72
%77 = OpBitcast %76 %74
%78 = OpCompositeExtract %75 %77 0
%79 = OpCompositeExtract %75 %77 1
%80 = OpCompositeExtract %75 %77 2
%81 = OpIMul %5 %54 %34
%82 = OpCompositeExtract %5 %39 0
%83 = OpCompositeExtract %5 %39 1
%84 = OpIAdd %5 %81 %82
%85 = OpULessThan %59 %81 %83
%86 = OpSelect %5 %85 %84 %62
%87 = OpBitcast %14 %78
%88 = OpBitcast %14 %79
%89 = OpBitcast %14 %80
%90 = OpAccessChain %63 %26 %37 %86
OpStore %90 %87
%92 = OpIAdd %5 %86 %32
%91 = OpAccessChain %63 %26 %37 %92
OpStore %91 %88
%94 = OpIAdd %5 %86 %71
%93 = OpAccessChain %63 %26 %37 %94
OpStore %93 %89
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-half4.ssbo.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    u16vec4 _m0[];
} _10;

layout(set = 0, binding = 1, std430) writeonly buffer _12_14
{
    u16vec4 _m0[];
} _14;

void main()
{
    f16vec4 _28 = uint16BitsToFloat16(_10._m0[gl_GlobalInvocationID.x]);
    _14._m0[gl_GlobalInvocationID.x] = u16vec4(float16BitsToUint16(_28.x), float16BitsToUint16(_28.y), float16BitsToUint16(_28.z), float16BitsToUint16(_28.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 41
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %8 "SSBO"
OpName %12 "SSBO"
OpDecorate %7 ArrayStride 8
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %10 DescriptorSet 0
OpDecorate %10 Binding 0
OpDecorate %10 NonWritable
OpDecorate %11 ArrayStride 8
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonReadable
OpDecorate %18 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 16 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypePointer StorageBuffer %8
%10 = OpVariable %9 StorageBuffer
%11 = OpTypeRuntimeArray %6
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeInt 32 0
%16 = OpTypeVector %15 3
%17 = OpTypePointer Input %16
%18 = OpVariable %17 Input
%19 = OpTypePointer Input %15
%21 = OpConstant %15 0
%23 = OpTypePointer StorageBuffer %6
%26 = OpTypeFloat 16
%27 = OpTypeVector %26 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
%39 = OpLabel
%20 = OpAccessChain %19 %18 %21
%22 = OpLoad %15 %20
%24 = OpAccessChain %23 %10 %21 %22
%25 = OpLoad %6 %24
%28 = OpBitcast %27 %25
%29 = OpCompositeExtract %26 %28 0
%30 = OpCompositeExtract %26 %28 1
%31 = OpCompositeExtract %26 %28 2
%32 = OpCompositeExtract %26 %28 3
%33 = OpBitcast %5 %29
%34 = OpBitcast %5 %30
%35 = OpBitcast %5 %31
%36 = OpBitcast %5 %32
%37 = OpCompositeConstruct %6 %33 %34 %35 %36
%38 = OpAccessChain %23 %14 %21 %22
OpStore %38 %37
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp
================================================
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in;

layout(set = 15, binding = 0, std430) restrict readonly buffer SSBO_Offsets
{
    uvec2 _m0[];
} _13;

layout(set = 4, binding = 0, std430) readonly buffer SSBO
{
    u16vec4 _m0[];
} _20[];

layout(set = 4, binding = 0, std430) writeonly buffer _22_25
{
    u16vec4 _m0[];
} _25[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _32 = registers._m4 + 1u;
    uvec2 _40 = _13._m0[subgroupBroadcastFirst(_32)] >> uvec2(3u);
    uvec2 _49 = _13._m0[subgroupBroadcastFirst(registers._m4)] >> uvec2(3u);
    f16vec4 _68 = uint16BitsToFloat16(_20[registers._m4]._m0[(gl_GlobalInvocationID.x < _49.y) ? (gl_GlobalInvocationID.x + _49.x) : 536870911u]);
    _25[_32]._m0[(gl_GlobalInvocationID.x < _40.y) ? (gl_GlobalInvocationID.x + _40.x) : 536870911u] = u16vec4(float16BitsToUint16(_68.x), float16BitsToUint16(_68.y), float16BitsToUint16(_68.z), float16BitsToUint16(_68.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 86
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
OpCapability StorageBuffer16BitAccess
OpCapability DenormPreserve
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %3 "main" %52
OpExecutionMode %3 LocalSize 2 1 1
OpExecutionMode %3 DenormPreserve 16
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO_Offsets"
OpName %17 "SSBO"
OpName %22 "SSBO"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 8
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %13 DescriptorSet 15
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %16 ArrayStride 8
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %20 DescriptorSet 4
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %21 ArrayStride 8
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %25 DescriptorSet 4
OpDecorate %25 Binding 0
OpDecorate %25 NonReadable
OpDecorate %52 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 2
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeInt 16 0
%15 = OpTypeVector %14 4
%16 = OpTypeRuntimeArray %15
%17 = OpTypeStruct %16
%18 = OpTypeRuntimeArray %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %15
%22 = OpTypeStruct %21
%23 = OpTypeRuntimeArray %22
%24 = OpTypePointer StorageBuffer %23
%25 = OpVariable %24 StorageBuffer
%26 = OpTypePointer StorageBuffer %22
%28 = OpTypePointer PushConstant %5
%30 = OpConstant %5 4
%33 = OpConstant %5 1
%35 = OpConstant %5 3
%36 = OpTypePointer StorageBuffer %9
%38 = OpConstant %5 0
%41 = OpConstantComposite %9 %35 %35
%42 = OpTypePointer StorageBuffer %17
%50 = OpTypeVector %5 3
%51 = OpTypePointer Input %50
%52 = OpVariable %51 Input
%53 = OpTypePointer Input %5
%59 = OpTypeBool
%62 = OpConstant %5 536870911
%63 = OpTypePointer StorageBuffer %15
%66 = OpTypeFloat 16
%67 = OpTypeVector %66 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %84
%84 = OpLabel
%29 = OpAccessChain %28 %8 %30
%31 = OpLoad %5 %29
%32 = OpIAdd %5 %31 %33
%27 = OpAccessChain %26 %25 %32
%34 = OpGroupNonUniformBroadcastFirst %5 %35 %32
%37 = OpAccessChain %36 %13 %38 %34
%39 = OpLoad %9 %37
%40 = OpShiftRightLogical %9 %39 %41
%44 = OpAccessChain %28 %8 %30
%45 = OpLoad %5 %44
%43 = OpAccessChain %42 %20 %45
%46 = OpGroupNonUniformBroadcastFirst %5 %35 %45
%47 = OpAccessChain %36 %13 %38 %46
%48 = OpLoad %9 %47
%49 = OpShiftRightLogical %9 %48 %41
%54 = OpAccessChain %53 %52 %38
%55 = OpLoad %5 %54
%56 = OpCompositeExtract %5 %49 0
%57 = OpCompositeExtract %5 %49 1
%58 = OpIAdd %5 %55 %56
%60 = OpULessThan %59 %55 %57
%61 = OpSelect %5 %60 %58 %62
%64 = OpAccessChain %63 %43 %38 %61
%65 = OpLoad %15 %64
%68 = OpBitcast %67 %65
%69 = OpCompositeExtract %66 %68 0
%70 = OpCompositeExtract %66 %68 1
%71 = OpCompositeExtract %66 %68 2
%72 = OpCompositeExtract %66 %68 3
%73 = OpCompositeExtract %5 %40 0
%74 = OpCompositeExtract %5 %40 1
%75 = OpIAdd %5 %55 %73
%76 = OpULessThan %59 %55 %74
%77 = OpSelect %5 %76 %75 %62
%78 = OpBitcast %14 %69
%79 = OpBitcast %14 %70
%80 = OpBitcast %14 %71
%81 = OpBitcast %14 %72
%82 = OpCompositeConstruct %15 %78 %79 %80 %81
%83 = OpAccessChain %63 %27 %38 %77
OpStore %83 %82
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/geom/basic.view-instancing.last-pre-raster.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 1, points) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %54
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 1
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpName %45 "ViewInstancingOffsetsUBO"
OpMemberName %45 0 "ViewID_Layer"
OpName %47 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %45 Block
OpMemberDecorate %45 0 Offset 0
OpDecorate %47 DescriptorSet 10
OpDecorate %47 Binding 22
OpDecorate %54 BuiltIn Layer
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%36 = OpTypePointer Output %5
%45 = OpTypeStruct %7
%46 = OpTypePointer Uniform %45
%47 = OpVariable %46 Uniform
%48 = OpTypePointer Uniform %7
%52 = OpConstant %7 16
%53 = OpTypePointer Output %7
%54 = OpVariable %53 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %55
%55 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %8
%27 = OpLoad %5 %26
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %24
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %8
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %24
OpStore %39 %25
%40 = OpAccessChain %36 %14 %8
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %24
OpStore %43 %33
%44 = OpAccessChain %36 %15 %8
OpStore %44 %35
%49 = OpAccessChain %48 %47 %18
%50 = OpLoad %7 %49
%51 = OpBitFieldUExtract %7 %50 %52 %52
OpStore %54 %51
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/geom/basic.view-instancing.last-pre-raster.view-instance-mask.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 1, points) out;

layout(set = 10, binding = 23, std140) uniform ViewInstanceMaskUBO
{
    uint Mask;
} ViewInstanceMask;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    if (bitfieldExtract(ViewInstanceMask.Mask, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u))), int(1u)) == 0u)
    {
        return;
    }
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %65
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 1
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpName %16 "ViewInstanceMaskUBO"
OpMemberName %16 0 "Mask"
OpName %18 "ViewInstanceMask"
OpName %19 "ViewInstancingOffsetsUBO"
OpMemberName %19 0 "ViewID_Layer"
OpName %21 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %16 Block
OpMemberDecorate %16 0 Offset 0
OpDecorate %18 DescriptorSet 10
OpDecorate %18 Binding 23
OpDecorate %19 Block
OpMemberDecorate %19 0 Offset 0
OpDecorate %21 DescriptorSet 10
OpDecorate %21 Binding 22
OpDecorate %65 BuiltIn Layer
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypeStruct %7
%17 = OpTypePointer Uniform %16
%18 = OpVariable %17 Uniform
%19 = OpTypeStruct %7
%20 = OpTypePointer Uniform %19
%21 = OpVariable %20 Uniform
%22 = OpTypePointer Uniform %7
%24 = OpConstant %7 0
%27 = OpConstant %7 16
%31 = OpConstant %7 1
%32 = OpTypeBool
%34 = OpTypePointer Input %5
%40 = OpConstant %7 2
%52 = OpTypePointer Output %5
%64 = OpTypePointer Output %7
%65 = OpVariable %64 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %66
%66 = OpLabel
%23 = OpAccessChain %22 %21 %24
%25 = OpLoad %7 %23
%26 = OpBitFieldUExtract %7 %25 %24 %27
%28 = OpAccessChain %22 %18 %24
%29 = OpLoad %7 %28
%30 = OpBitFieldUExtract %7 %29 %26 %31
%33 = OpIEqual %32 %30 %24
OpSelectionMerge %68 None
OpBranchConditional %33 %67 %68
%67 = OpLabel
OpReturn
%68 = OpLabel
%35 = OpAccessChain %34 %11 %24 %24
%36 = OpLoad %5 %35
%37 = OpAccessChain %34 %11 %24 %31
%38 = OpLoad %5 %37
%39 = OpAccessChain %34 %11 %24 %40
%41 = OpLoad %5 %39
%42 = OpAccessChain %34 %11 %24 %8
%43 = OpLoad %5 %42
%44 = OpAccessChain %34 %12 %31 %24
%45 = OpLoad %5 %44
%46 = OpAccessChain %34 %12 %31 %31
%47 = OpLoad %5 %46
%48 = OpAccessChain %34 %12 %31 %40
%49 = OpLoad %5 %48
%50 = OpAccessChain %34 %12 %31 %8
%51 = OpLoad %5 %50
%53 = OpAccessChain %52 %14 %24
OpStore %53 %36
%54 = OpAccessChain %52 %14 %31
OpStore %54 %38
%55 = OpAccessChain %52 %14 %40
OpStore %55 %41
%56 = OpAccessChain %52 %14 %8
OpStore %56 %43
%57 = OpAccessChain %52 %15 %24
OpStore %57 %45
%58 = OpAccessChain %52 %15 %31
OpStore %58 %47
%59 = OpAccessChain %52 %15 %40
OpStore %59 %49
%60 = OpAccessChain %52 %15 %8
OpStore %60 %51
%61 = OpAccessChain %22 %21 %24
%62 = OpLoad %7 %61
%63 = OpBitFieldUExtract %7 %62 %27 %27
OpStore %65 %63
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/geom/basic.view-instancing.view-instancing-multiview.last-pre-raster.geom
================================================
#version 460
#extension GL_EXT_multiview : require
layout(triangles) in;
layout(max_vertices = 2, points) out;

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    float _34 = float(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)));
    float _35 = TEXCOORD[0u].x + _34;
    float _36 = TEXCOORD[0u].y + _34;
    float _37 = TEXCOORD[0u].z + _34;
    float _38 = TEXCOORD[0u].w + _34;
    TEXCOORD_1.x = _35;
    TEXCOORD_1.y = _36;
    TEXCOORD_1.z = _37;
    TEXCOORD_1.w = _38;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
    TEXCOORD_1.x = _35;
    TEXCOORD_1.y = _36;
    TEXCOORD_1.z = _37;
    TEXCOORD_1.w = _38;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    EmitVertex();
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiView
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %18
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 2
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpName %16 "ViewIndexToViewInstanceMap"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %16 SpecId 1000
OpDecorate %18 BuiltIn ViewIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpSpecConstant %7 0
%17 = OpTypePointer Input %7
%18 = OpVariable %17 Input
%21 = OpConstant %7 2
%23 = OpTypePointer Input %5
%25 = OpConstant %7 0
%28 = OpConstant %7 1
%47 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%19 = OpLoad %7 %18
%20 = OpIMul %7 %19 %21
%22 = OpBitFieldUExtract %7 %16 %20 %21
%24 = OpAccessChain %23 %11 %25 %25
%26 = OpLoad %5 %24
%27 = OpAccessChain %23 %11 %25 %28
%29 = OpLoad %5 %27
%30 = OpAccessChain %23 %11 %25 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %23 %11 %25 %8
%33 = OpLoad %5 %32
%34 = OpConvertUToF %5 %22
%35 = OpFAdd %5 %26 %34
%36 = OpFAdd %5 %29 %34
%37 = OpFAdd %5 %31 %34
%38 = OpFAdd %5 %33 %34
%39 = OpAccessChain %23 %12 %28 %25
%40 = OpLoad %5 %39
%41 = OpAccessChain %23 %12 %28 %28
%42 = OpLoad %5 %41
%43 = OpAccessChain %23 %12 %28 %21
%44 = OpLoad %5 %43
%45 = OpAccessChain %23 %12 %28 %8
%46 = OpLoad %5 %45
%48 = OpAccessChain %47 %14 %25
OpStore %48 %35
%49 = OpAccessChain %47 %14 %28
OpStore %49 %36
%50 = OpAccessChain %47 %14 %21
OpStore %50 %37
%51 = OpAccessChain %47 %14 %8
OpStore %51 %38
%52 = OpAccessChain %47 %15 %25
OpStore %52 %40
%53 = OpAccessChain %47 %15 %28
OpStore %53 %42
%54 = OpAccessChain %47 %15 %21
OpStore %54 %44
%55 = OpAccessChain %47 %15 %8
OpStore %55 %46
OpEmitVertex
%56 = OpAccessChain %47 %14 %25
OpStore %56 %35
%57 = OpAccessChain %47 %14 %28
OpStore %57 %36
%58 = OpAccessChain %47 %14 %21
OpStore %58 %37
%59 = OpAccessChain %47 %14 %8
OpStore %59 %38
%60 = OpAccessChain %47 %15 %25
OpStore %60 %40
%61 = OpAccessChain %47 %15 %28
OpStore %61 %42
%62 = OpAccessChain %47 %15 %21
OpStore %62 %44
%63 = OpAccessChain %47 %15 %8
OpStore %63 %46
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/geom/basic.view-instancing.view-instancing-multiview.view-instancing-viewport-offset.last-pre-raster.geom
================================================
#version 460
#extension GL_EXT_multiview : require
layout(triangles) in;
layout(max_vertices = 2, points) out;

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;
layout(constant_id = 1001) const uint ViewIDToViewport = 0u;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    gl_ViewportIndex = int(bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)) * 8u), int(8u)));
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    gl_ViewportIndex = int(bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)) * 8u), int(8u)));
    EmitVertex();
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability MultiView
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %47 %56
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 2
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpName %45 "ViewIndexToViewInstanceMap"
OpName %51 "ViewIDToViewport"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %45 SpecId 1000
OpDecorate %47 BuiltIn ViewIndex
OpDecorate %51 SpecId 1001
OpDecorate %56 BuiltIn ViewportIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%36 = OpTypePointer Output %5
%45 = OpSpecConstant %7 0
%46 = OpTypePointer Input %7
%47 = OpVariable %46 Input
%51 = OpSpecConstant %7 0
%53 = OpConstant %7 8
%55 = OpTypePointer Output %7
%56 = OpVariable %55 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %70
%70 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %8
%27 = OpLoad %5 %26
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %24
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %8
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %24
OpStore %39 %25
%40 = OpAccessChain %36 %14 %8
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %24
OpStore %43 %33
%44 = OpAccessChain %36 %15 %8
OpStore %44 %35
%48 = OpLoad %7 %47
%49 = OpIMul %7 %48 %24
%50 = OpBitFieldUExtract %7 %45 %49 %24
%52 = OpIMul %7 %50 %53
%54 = OpBitFieldUExtract %7 %51 %52 %53
OpStore %56 %54
OpEmitVertex
%57 = OpAccessChain %36 %14 %18
OpStore %57 %19
%58 = OpAccessChain %36 %14 %21
OpStore %58 %22
%59 = OpAccessChain %36 %14 %24
OpStore %59 %25
%60 = OpAccessChain %36 %14 %8
OpStore %60 %27
%61 = OpAccessChain %36 %15 %18
OpStore %61 %29
%62 = OpAccessChain %36 %15 %21
OpStore %62 %31
%63 = OpAccessChain %36 %15 %24
OpStore %63 %33
%64 = OpAccessChain %36 %15 %8
OpStore %64 %35
%65 = OpLoad %7 %47
%66 = OpIMul %7 %65 %24
%67 = OpBitFieldUExtract %7 %45 %66 %24
%68 = OpIMul %7 %67 %53
%69 = OpBitFieldUExtract %7 %51 %68 %53
OpStore %56 %69
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/geom/basic.view-instancing.view-instancing-viewport-offset.last-pre-raster.geom
================================================
#version 460
layout(triangles) in;
layout(max_vertices = 2, points) out;

layout(constant_id = 1001) const uint ViewIDToViewport = 0u;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in vec4 TEXCOORD[3];
layout(location = 0) out vec4 TEXCOORD_1;

void main()
{
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    gl_ViewportIndex = int(bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)) * 8u), int(8u)));
    EmitVertex();
    TEXCOORD_1.x = TEXCOORD[0u].x;
    TEXCOORD_1.y = TEXCOORD[0u].y;
    TEXCOORD_1.z = TEXCOORD[0u].z;
    TEXCOORD_1.w = TEXCOORD[0u].w;
    gl_Position.x = gl_in[1u].gl_Position.x;
    gl_Position.y = gl_in[1u].gl_Position.y;
    gl_Position.z = gl_in[1u].gl_Position.z;
    gl_Position.w = gl_in[1u].gl_Position.w;
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    gl_ViewportIndex = int(bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)) * 8u), int(8u)));
    EmitVertex();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 81
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %3 "main" %11 %12 %14 %15 %54 %62
OpExecutionMode %3 Invocations 1
OpExecutionMode %3 OutputVertices 2
OpExecutionMode %3 Triangles
OpExecutionMode %3 OutputPoints
OpName %3 "main"
OpName %11 "TEXCOORD"
OpName %12 "SV_Position"
OpName %14 "TEXCOORD"
OpName %15 "SV_Position"
OpName %45 "ViewInstancingOffsetsUBO"
OpMemberName %45 0 "ViewID_Layer"
OpName %47 "ViewInstancingOffsets"
OpName %58 "ViewIDToViewport"
OpDecorate %11 Location 0
OpDecorate %12 BuiltIn Position
OpDecorate %14 Location 0
OpDecorate %15 BuiltIn Position
OpDecorate %45 Block
OpMemberDecorate %45 0 Offset 0
OpDecorate %47 DescriptorSet 10
OpDecorate %47 Binding 22
OpDecorate %54 BuiltIn Layer
OpDecorate %58 SpecId 1001
OpDecorate %62 BuiltIn ViewportIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 3
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpVariable %10 Input
%13 = OpTypePointer Output %6
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%16 = OpTypePointer Input %5
%18 = OpConstant %7 0
%21 = OpConstant %7 1
%24 = OpConstant %7 2
%36 = OpTypePointer Output %5
%45 = OpTypeStruct %7
%46 = OpTypePointer Uniform %45
%47 = OpVariable %46 Uniform
%48 = OpTypePointer Uniform %7
%52 = OpConstant %7 16
%53 = OpTypePointer Output %7
%54 = OpVariable %53 Output
%58 = OpSpecConstant %7 0
%60 = OpConstant %7 8
%62 = OpVariable %53 Output
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %79
%79 = OpLabel
%17 = OpAccessChain %16 %11 %18 %18
%19 = OpLoad %5 %17
%20 = OpAccessChain %16 %11 %18 %21
%22 = OpLoad %5 %20
%23 = OpAccessChain %16 %11 %18 %24
%25 = OpLoad %5 %23
%26 = OpAccessChain %16 %11 %18 %8
%27 = OpLoad %5 %26
%28 = OpAccessChain %16 %12 %21 %18
%29 = OpLoad %5 %28
%30 = OpAccessChain %16 %12 %21 %21
%31 = OpLoad %5 %30
%32 = OpAccessChain %16 %12 %21 %24
%33 = OpLoad %5 %32
%34 = OpAccessChain %16 %12 %21 %8
%35 = OpLoad %5 %34
%37 = OpAccessChain %36 %14 %18
OpStore %37 %19
%38 = OpAccessChain %36 %14 %21
OpStore %38 %22
%39 = OpAccessChain %36 %14 %24
OpStore %39 %25
%40 = OpAccessChain %36 %14 %8
OpStore %40 %27
%41 = OpAccessChain %36 %15 %18
OpStore %41 %29
%42 = OpAccessChain %36 %15 %21
OpStore %42 %31
%43 = OpAccessChain %36 %15 %24
OpStore %43 %33
%44 = OpAccessChain %36 %15 %8
OpStore %44 %35
%49 = OpAccessChain %48 %47 %18
%50 = OpLoad %7 %49
%51 = OpBitFieldUExtract %7 %50 %52 %52
OpStore %54 %51
%55 = OpAccessChain %48 %47 %18
%56 = OpLoad %7 %55
%57 = OpBitFieldUExtract %7 %56 %18 %52
%59 = OpIMul %7 %57 %60
%61 = OpBitFieldUExtract %7 %58 %59 %60
OpStore %62 %61
OpEmitVertex
%63 = OpAccessChain %36 %14 %18
OpStore %63 %19
%64 = OpAccessChain %36 %14 %21
OpStore %64 %22
%65 = OpAccessChain %36 %14 %24
OpStore %65 %25
%66 = OpAccessChain %36 %14 %8
OpStore %66 %27
%67 = OpAccessChain %36 %15 %18
OpStore %67 %29
%68 = OpAccessChain %36 %15 %21
OpStore %68 %31
%69 = OpAccessChain %36 %15 %24
OpStore %69 %33
%70 = OpAccessChain %36 %15 %8
OpStore %70 %35
%71 = OpAccessChain %48 %47 %18
%72 = OpLoad %7 %71
%73 = OpBitFieldUExtract %7 %72 %52 %52
OpStore %54 %73
%74 = OpAccessChain %48 %47 %18
%75 = OpLoad %7 %74
%76 = OpBitFieldUExtract %7 %75 %18 %52
%77 = OpIMul %7 %76 %60
%78 = OpBitFieldUExtract %7 %58 %77 %60
OpStore %62 %78
OpEmitVertex
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic-export-viewport-layer.view-instancing.last-pre-raster.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    uint _31 = bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u));
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    float _48 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _48;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _48;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _48;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _48;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _59 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_59, _59 + 1u, _59 + 2u);
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(_31 + bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(_31 + 1u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 80
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %18 %19 %23 %26 %34 %36
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %18 "SV_RenderTargetArrayIndex"
OpName %19 "SV_ViewportArrayIndex"
OpName %23 "indices"
OpName %24 "ViewInstancingOffsetsUBO"
OpMemberName %24 0 "ViewID_Layer"
OpName %26 "ViewInstancingOffsets"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %18 BuiltIn Layer
OpDecorate %18 PerPrimitiveEXT
OpDecorate %19 BuiltIn ViewportIndex
OpDecorate %19 PerPrimitiveEXT
OpDecorate %23 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %24 Block
OpMemberDecorate %24 0 Offset 0
OpDecorate %26 DescriptorSet 10
OpDecorate %26 Binding 22
OpDecorate %34 BuiltIn LocalInvocationIndex
OpDecorate %36 BuiltIn NumSubgroups
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeArray %7 %15
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpVariable %17 Output
%20 = OpTypeVector %7 3
%21 = OpTypeArray %20 %15
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpTypeStruct %7
%25 = OpTypePointer Uniform %24
%26 = OpVariable %25 Uniform
%27 = OpTypePointer Uniform %7
%29 = OpConstant %7 0
%32 = OpConstant %7 16
%33 = OpTypePointer Input %7
%34 = OpVariable %33 Input
%36 = OpVariable %33 Input
%38 = OpTypeBool
%40 = OpConstant %7 1
%42 = OpConstant %7 3
%49 = OpTypePointer Output %5
%53 = OpConstant %7 2
%63 = OpTypePointer Output %20
%69 = OpTypePointer Output %7
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%28 = OpAccessChain %27 %26 %29
%30 = OpLoad %7 %28
%31 = OpBitFieldUExtract %7 %30 %29 %32
%35 = OpLoad %7 %34
OpSetMeshOutputsEXT %8 %15
%37 = OpLoad %7 %36
%39 = OpIEqual %38 %37 %40
%41 = OpGroupNonUniformBroadcastFirst %7 %42 %8
%43 = OpGroupNonUniformBroadcastFirst %7 %42 %15
%44 = OpIEqual %38 %41 %29
%45 = OpIEqual %38 %43 %29
%46 = OpLogicalOr %38 %44 %45
%47 = OpLogicalAnd %38 %39 %46
OpSelectionMerge %77 None
OpBranchConditional %47 %76 %77
%76 = OpLabel
OpReturn
%77 = OpLabel
%48 = OpConvertUToF %5 %35
%50 = OpAccessChain %49 %11 %35 %29
OpStore %50 %48
%51 = OpAccessChain %49 %11 %35 %40
OpStore %51 %48
%52 = OpAccessChain %49 %11 %35 %53
OpStore %52 %48
%54 = OpAccessChain %49 %11 %35 %42
OpStore %54 %48
%55 = OpBitwiseXor %7 %35 %40
%56 = OpConvertUToF %5 %55
%57 = OpAccessChain %49 %14 %35
OpStore %57 %56
%58 = OpULessThan %38 %35 %15
OpSelectionMerge %75 None
OpBranchConditional %58 %74 %75
%74 = OpLabel
%59 = OpIMul %7 %35 %42
%60 = OpIAdd %7 %59 %40
%61 = OpIAdd %7 %59 %53
%62 = OpCompositeConstruct %20 %59 %60 %61
%64 = OpAccessChain %63 %23 %35
OpStore %64 %62
%66 = OpAccessChain %27 %26 %29
%67 = OpLoad %7 %66
%68 = OpBitFieldUExtract %7 %67 %32 %32
%65 = OpIAdd %7 %31 %68
%70 = OpAccessChain %69 %18 %35
OpStore %70 %65
%71 = OpIAdd %7 %31 %40
%72 = OpAccessChain %69 %19 %35
OpStore %72 %71
OpBranch %75
%75 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic-export-viewport-layer.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(constant_id = 1001) const uint ViewIDToViewport = 0u;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    uint _31 = bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u));
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    float _48 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _48;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _48;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _48;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _48;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _59 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_59, _59 + 1u, _59 + 2u);
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(_31 + bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int((_31 + 1u) + bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)) * 8u), int(8u)));
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %18 %19 %23 %26 %34 %36
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %18 "SV_RenderTargetArrayIndex"
OpName %19 "SV_ViewportArrayIndex"
OpName %23 "indices"
OpName %24 "ViewInstancingOffsetsUBO"
OpMemberName %24 0 "ViewID_Layer"
OpName %26 "ViewInstancingOffsets"
OpName %76 "ViewIDToViewport"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %18 BuiltIn Layer
OpDecorate %18 PerPrimitiveEXT
OpDecorate %19 BuiltIn ViewportIndex
OpDecorate %19 PerPrimitiveEXT
OpDecorate %23 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %24 Block
OpMemberDecorate %24 0 Offset 0
OpDecorate %26 DescriptorSet 10
OpDecorate %26 Binding 22
OpDecorate %34 BuiltIn LocalInvocationIndex
OpDecorate %36 BuiltIn NumSubgroups
OpDecorate %76 SpecId 1001
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeArray %7 %15
%17 = OpTypePointer Output %16
%18 = OpVariable %17 Output
%19 = OpVariable %17 Output
%20 = OpTypeVector %7 3
%21 = OpTypeArray %20 %15
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%24 = OpTypeStruct %7
%25 = OpTypePointer Uniform %24
%26 = OpVariable %25 Uniform
%27 = OpTypePointer Uniform %7
%29 = OpConstant %7 0
%32 = OpConstant %7 16
%33 = OpTypePointer Input %7
%34 = OpVariable %33 Input
%36 = OpVariable %33 Input
%38 = OpTypeBool
%40 = OpConstant %7 1
%42 = OpConstant %7 3
%49 = OpTypePointer Output %5
%53 = OpConstant %7 2
%63 = OpTypePointer Output %20
%69 = OpTypePointer Output %7
%76 = OpSpecConstant %7 0
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%28 = OpAccessChain %27 %26 %29
%30 = OpLoad %7 %28
%31 = OpBitFieldUExtract %7 %30 %29 %32
%35 = OpLoad %7 %34
OpSetMeshOutputsEXT %8 %15
%37 = OpLoad %7 %36
%39 = OpIEqual %38 %37 %40
%41 = OpGroupNonUniformBroadcastFirst %7 %42 %8
%43 = OpGroupNonUniformBroadcastFirst %7 %42 %15
%44 = OpIEqual %38 %41 %29
%45 = OpIEqual %38 %43 %29
%46 = OpLogicalOr %38 %44 %45
%47 = OpLogicalAnd %38 %39 %46
OpSelectionMerge %84 None
OpBranchConditional %47 %83 %84
%83 = OpLabel
OpReturn
%84 = OpLabel
%48 = OpConvertUToF %5 %35
%50 = OpAccessChain %49 %11 %35 %29
OpStore %50 %48
%51 = OpAccessChain %49 %11 %35 %40
OpStore %51 %48
%52 = OpAccessChain %49 %11 %35 %53
OpStore %52 %48
%54 = OpAccessChain %49 %11 %35 %42
OpStore %54 %48
%55 = OpBitwiseXor %7 %35 %40
%56 = OpConvertUToF %5 %55
%57 = OpAccessChain %49 %14 %35
OpStore %57 %56
%58 = OpULessThan %38 %35 %15
OpSelectionMerge %82 None
OpBranchConditional %58 %81 %82
%81 = OpLabel
%59 = OpIMul %7 %35 %42
%60 = OpIAdd %7 %59 %40
%61 = OpIAdd %7 %59 %53
%62 = OpCompositeConstruct %20 %59 %60 %61
%64 = OpAccessChain %63 %23 %35
OpStore %64 %62
%66 = OpAccessChain %27 %26 %29
%67 = OpLoad %7 %66
%68 = OpBitFieldUExtract %7 %67 %32 %32
%65 = OpIAdd %7 %31 %68
%70 = OpAccessChain %69 %18 %35
OpStore %70 %65
%71 = OpIAdd %7 %31 %40
%73 = OpAccessChain %27 %26 %29
%74 = OpLoad %7 %73
%75 = OpBitFieldUExtract %7 %74 %29 %32
%77 = OpIMul %7 %75 %15
%78 = OpBitFieldUExtract %7 %76 %77 %15
%72 = OpIAdd %7 %71 %78
%79 = OpAccessChain %69 %19 %35
OpStore %79 %72
OpBranch %82
%82 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic-few-thread.view-instancing.last-pre-raster.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 1) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    uint _42 = bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u));
    uint _48 = gl_LocalInvocationIndex + 0u;
    if (_48 < 8u)
    {
        gl_MeshPrimitivesEXT[_48].gl_Layer = int(_42);
    }
    uint _52 = gl_LocalInvocationIndex + 6u;
    if (_52 < 8u)
    {
        gl_MeshPrimitivesEXT[_52].gl_Layer = int(_42);
    }
    float _56 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _56;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _56;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _56;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _56;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _67 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_67, _67 + 1u, _67 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 84
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %19 %21 %23 %38 %46
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 1
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %19 "indices"
OpName %36 "ViewInstancingOffsetsUBO"
OpMemberName %36 0 "ViewID_Layer"
OpName %38 "ViewInstancingOffsets"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %19 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %21 BuiltIn LocalInvocationIndex
OpDecorate %23 BuiltIn NumSubgroups
OpDecorate %36 Block
OpMemberDecorate %36 0 Offset 0
OpDecorate %38 DescriptorSet 10
OpDecorate %38 Binding 22
OpDecorate %46 BuiltIn Layer
OpDecorate %46 PerPrimitiveEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeVector %7 3
%17 = OpTypeArray %16 %15
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%20 = OpTypePointer Input %7
%21 = OpVariable %20 Input
%23 = OpVariable %20 Input
%25 = OpTypeBool
%27 = OpConstant %7 1
%29 = OpConstant %7 3
%32 = OpConstant %7 0
%36 = OpTypeStruct %7
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypePointer Uniform %7
%43 = OpConstant %7 16
%44 = OpTypeArray %7 %15
%45 = OpTypePointer Output %44
%46 = OpVariable %45 Output
%50 = OpTypePointer Output %7
%53 = OpConstant %7 6
%57 = OpTypePointer Output %5
%61 = OpConstant %7 2
%71 = OpTypePointer Output %16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%22 = OpLoad %7 %21
OpSetMeshOutputsEXT %8 %15
%24 = OpLoad %7 %23
%26 = OpIEqual %25 %24 %27
%28 = OpGroupNonUniformBroadcastFirst %7 %29 %8
%30 = OpGroupNonUniformBroadcastFirst %7 %29 %15
%31 = OpIEqual %25 %28 %32
%33 = OpIEqual %25 %30 %32
%34 = OpLogicalOr %25 %31 %33
%35 = OpLogicalAnd %25 %26 %34
OpSelectionMerge %77 None
OpBranchConditional %35 %76 %77
%76 = OpLabel
OpReturn
%77 = OpLabel
%40 = OpAccessChain %39 %38 %32
%41 = OpLoad %7 %40
%42 = OpBitFieldUExtract %7 %41 %43 %43
%47 = OpLoad %7 %21
%48 = OpIAdd %7 %47 %32
%49 = OpULessThan %25 %48 %15
%51 = OpAccessChain %50 %46 %48
OpSelectionMerge %80 None
OpBranchConditional %49 %79 %80
%79 = OpLabel
OpStore %51 %42
OpBranch %80
%80 = OpLabel
%52 = OpIAdd %7 %47 %53
%54 = OpULessThan %25 %52 %15
%55 = OpAccessChain %50 %46 %52
OpSelectionMerge %82 None
OpBranchConditional %54 %81 %82
%81 = OpLabel
OpStore %55 %42
OpBranch %82
%82 = OpLabel
%56 = OpConvertUToF %5 %22
%58 = OpAccessChain %57 %11 %22 %32
OpStore %58 %56
%59 = OpAccessChain %57 %11 %22 %27
OpStore %59 %56
%60 = OpAccessChain %57 %11 %22 %61
OpStore %60 %56
%62 = OpAccessChain %57 %11 %22 %29
OpStore %62 %56
%63 = OpBitwiseXor %7 %22 %27
%64 = OpConvertUToF %5 %63
%65 = OpAccessChain %57 %14 %22
OpStore %65 %64
%66 = OpULessThan %25 %22 %15
OpSelectionMerge %75 None
OpBranchConditional %66 %74 %75
%74 = OpLabel
%67 = OpIMul %7 %22 %29
%68 = OpIAdd %7 %67 %27
%69 = OpIAdd %7 %67 %61
%70 = OpCompositeConstruct %16 %67 %68 %69
%72 = OpAccessChain %71 %19 %22
OpStore %72 %70
OpBranch %75
%75 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic-many-thread.view-instancing.last-pre-raster.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 6) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    uint _48 = gl_LocalInvocationIndex + 0u;
    if (_48 < 8u)
    {
        gl_MeshPrimitivesEXT[_48].gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    }
    float _52 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _52;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _52;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _52;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _52;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _63 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_63, _63 + 1u, _63 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 78
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %19 %21 %23 %38 %46
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 6
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %19 "indices"
OpName %36 "ViewInstancingOffsetsUBO"
OpMemberName %36 0 "ViewID_Layer"
OpName %38 "ViewInstancingOffsets"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %19 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %21 BuiltIn LocalInvocationIndex
OpDecorate %23 BuiltIn NumSubgroups
OpDecorate %36 Block
OpMemberDecorate %36 0 Offset 0
OpDecorate %38 DescriptorSet 10
OpDecorate %38 Binding 22
OpDecorate %46 BuiltIn Layer
OpDecorate %46 PerPrimitiveEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeVector %7 3
%17 = OpTypeArray %16 %15
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%20 = OpTypePointer Input %7
%21 = OpVariable %20 Input
%23 = OpVariable %20 Input
%25 = OpTypeBool
%27 = OpConstant %7 1
%29 = OpConstant %7 3
%32 = OpConstant %7 0
%36 = OpTypeStruct %7
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypePointer Uniform %7
%43 = OpConstant %7 16
%44 = OpTypeArray %7 %15
%45 = OpTypePointer Output %44
%46 = OpVariable %45 Output
%50 = OpTypePointer Output %7
%53 = OpTypePointer Output %5
%57 = OpConstant %7 2
%67 = OpTypePointer Output %16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %69
%69 = OpLabel
%22 = OpLoad %7 %21
OpSetMeshOutputsEXT %8 %15
%24 = OpLoad %7 %23
%26 = OpIEqual %25 %24 %27
%28 = OpGroupNonUniformBroadcastFirst %7 %29 %8
%30 = OpGroupNonUniformBroadcastFirst %7 %29 %15
%31 = OpIEqual %25 %28 %32
%33 = OpIEqual %25 %30 %32
%34 = OpLogicalOr %25 %31 %33
%35 = OpLogicalAnd %25 %26 %34
OpSelectionMerge %73 None
OpBranchConditional %35 %72 %73
%72 = OpLabel
OpReturn
%73 = OpLabel
%40 = OpAccessChain %39 %38 %32
%41 = OpLoad %7 %40
%42 = OpBitFieldUExtract %7 %41 %43 %43
%47 = OpLoad %7 %21
%48 = OpIAdd %7 %47 %32
%49 = OpULessThan %25 %48 %15
%51 = OpAccessChain %50 %46 %48
OpSelectionMerge %76 None
OpBranchConditional %49 %75 %76
%75 = OpLabel
OpStore %51 %42
OpBranch %76
%76 = OpLabel
%52 = OpConvertUToF %5 %22
%54 = OpAccessChain %53 %11 %22 %32
OpStore %54 %52
%55 = OpAccessChain %53 %11 %22 %27
OpStore %55 %52
%56 = OpAccessChain %53 %11 %22 %57
OpStore %56 %52
%58 = OpAccessChain %53 %11 %22 %29
OpStore %58 %52
%59 = OpBitwiseXor %7 %22 %27
%60 = OpConvertUToF %5 %59
%61 = OpAccessChain %53 %14 %22
OpStore %61 %60
%62 = OpULessThan %25 %22 %15
OpSelectionMerge %71 None
OpBranchConditional %62 %70 %71
%70 = OpLabel
%63 = OpIMul %7 %22 %29
%64 = OpIAdd %7 %63 %27
%65 = OpIAdd %7 %63 %57
%66 = OpCompositeConstruct %16 %63 %64 %65
%68 = OpAccessChain %67 %19 %22
OpStore %68 %66
OpBranch %71
%71 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic.view-instancing.last-pre-raster.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    uint _48 = gl_LocalInvocationIndex + 0u;
    if (_48 < 8u)
    {
        gl_MeshPrimitivesEXT[_48].gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    }
    float _52 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _52;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _52;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _52;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _52;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _63 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_63, _63 + 1u, _63 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 78
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %19 %21 %23 %38 %46
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %19 "indices"
OpName %36 "ViewInstancingOffsetsUBO"
OpMemberName %36 0 "ViewID_Layer"
OpName %38 "ViewInstancingOffsets"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %19 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %21 BuiltIn LocalInvocationIndex
OpDecorate %23 BuiltIn NumSubgroups
OpDecorate %36 Block
OpMemberDecorate %36 0 Offset 0
OpDecorate %38 DescriptorSet 10
OpDecorate %38 Binding 22
OpDecorate %46 BuiltIn Layer
OpDecorate %46 PerPrimitiveEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeVector %7 3
%17 = OpTypeArray %16 %15
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%20 = OpTypePointer Input %7
%21 = OpVariable %20 Input
%23 = OpVariable %20 Input
%25 = OpTypeBool
%27 = OpConstant %7 1
%29 = OpConstant %7 3
%32 = OpConstant %7 0
%36 = OpTypeStruct %7
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypePointer Uniform %7
%43 = OpConstant %7 16
%44 = OpTypeArray %7 %15
%45 = OpTypePointer Output %44
%46 = OpVariable %45 Output
%50 = OpTypePointer Output %7
%53 = OpTypePointer Output %5
%57 = OpConstant %7 2
%67 = OpTypePointer Output %16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %69
%69 = OpLabel
%22 = OpLoad %7 %21
OpSetMeshOutputsEXT %8 %15
%24 = OpLoad %7 %23
%26 = OpIEqual %25 %24 %27
%28 = OpGroupNonUniformBroadcastFirst %7 %29 %8
%30 = OpGroupNonUniformBroadcastFirst %7 %29 %15
%31 = OpIEqual %25 %28 %32
%33 = OpIEqual %25 %30 %32
%34 = OpLogicalOr %25 %31 %33
%35 = OpLogicalAnd %25 %26 %34
OpSelectionMerge %73 None
OpBranchConditional %35 %72 %73
%72 = OpLabel
OpReturn
%73 = OpLabel
%40 = OpAccessChain %39 %38 %32
%41 = OpLoad %7 %40
%42 = OpBitFieldUExtract %7 %41 %43 %43
%47 = OpLoad %7 %21
%48 = OpIAdd %7 %47 %32
%49 = OpULessThan %25 %48 %15
%51 = OpAccessChain %50 %46 %48
OpSelectionMerge %76 None
OpBranchConditional %49 %75 %76
%75 = OpLabel
OpStore %51 %42
OpBranch %76
%76 = OpLabel
%52 = OpConvertUToF %5 %22
%54 = OpAccessChain %53 %11 %22 %32
OpStore %54 %52
%55 = OpAccessChain %53 %11 %22 %27
OpStore %55 %52
%56 = OpAccessChain %53 %11 %22 %57
OpStore %56 %52
%58 = OpAccessChain %53 %11 %22 %29
OpStore %58 %52
%59 = OpBitwiseXor %7 %22 %27
%60 = OpConvertUToF %5 %59
%61 = OpAccessChain %53 %14 %22
OpStore %61 %60
%62 = OpULessThan %25 %22 %15
OpSelectionMerge %71 None
OpBranchConditional %62 %70 %71
%70 = OpLabel
%63 = OpIMul %7 %22 %29
%64 = OpIAdd %7 %63 %27
%65 = OpIAdd %7 %63 %57
%66 = OpCompositeConstruct %16 %63 %64 %65
%68 = OpAccessChain %67 %19 %22
OpStore %68 %66
OpBranch %71
%71 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic.view-instancing.last-pre-raster.view-instance-mask.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(set = 10, binding = 23, std140) uniform ViewInstanceMaskUBO
{
    uint Mask;
} ViewInstanceMask;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    if (bitfieldExtract(ViewInstanceMask.Mask, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u))), int(1u)) == 0u)
    {
        return;
    }
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    uint _58 = gl_LocalInvocationIndex + 0u;
    if (_58 < 8u)
    {
        gl_MeshPrimitivesEXT[_58].gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    }
    float _62 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _62;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _62;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _62;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _62;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _73 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_73, _73 + 1u, _73 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %19 %22 %25 %39 %41 %56
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %19 "indices"
OpName %20 "ViewInstanceMaskUBO"
OpMemberName %20 0 "Mask"
OpName %22 "ViewInstanceMask"
OpName %23 "ViewInstancingOffsetsUBO"
OpMemberName %23 0 "ViewID_Layer"
OpName %25 "ViewInstancingOffsets"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %19 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %20 Block
OpMemberDecorate %20 0 Offset 0
OpDecorate %22 DescriptorSet 10
OpDecorate %22 Binding 23
OpDecorate %23 Block
OpMemberDecorate %23 0 Offset 0
OpDecorate %25 DescriptorSet 10
OpDecorate %25 Binding 22
OpDecorate %39 BuiltIn LocalInvocationIndex
OpDecorate %41 BuiltIn NumSubgroups
OpDecorate %56 BuiltIn Layer
OpDecorate %56 PerPrimitiveEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeVector %7 3
%17 = OpTypeArray %16 %15
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%20 = OpTypeStruct %7
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypeStruct %7
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypePointer Uniform %7
%28 = OpConstant %7 0
%31 = OpConstant %7 16
%35 = OpConstant %7 1
%36 = OpTypeBool
%38 = OpTypePointer Input %7
%39 = OpVariable %38 Input
%41 = OpVariable %38 Input
%45 = OpConstant %7 3
%54 = OpTypeArray %7 %15
%55 = OpTypePointer Output %54
%56 = OpVariable %55 Output
%60 = OpTypePointer Output %7
%63 = OpTypePointer Output %5
%67 = OpConstant %7 2
%77 = OpTypePointer Output %16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %79
%79 = OpLabel
%27 = OpAccessChain %26 %25 %28
%29 = OpLoad %7 %27
%30 = OpBitFieldUExtract %7 %29 %28 %31
%32 = OpAccessChain %26 %22 %28
%33 = OpLoad %7 %32
%34 = OpBitFieldUExtract %7 %33 %30 %35
%37 = OpIEqual %36 %34 %28
OpSelectionMerge %83 None
OpBranchConditional %37 %82 %83
%82 = OpLabel
OpReturn
%83 = OpLabel
%40 = OpLoad %7 %39
OpSetMeshOutputsEXT %8 %15
%42 = OpLoad %7 %41
%43 = OpIEqual %36 %42 %35
%44 = OpGroupNonUniformBroadcastFirst %7 %45 %8
%46 = OpGroupNonUniformBroadcastFirst %7 %45 %15
%47 = OpIEqual %36 %44 %28
%48 = OpIEqual %36 %46 %28
%49 = OpLogicalOr %36 %47 %48
%50 = OpLogicalAnd %36 %43 %49
OpSelectionMerge %86 None
OpBranchConditional %50 %85 %86
%85 = OpLabel
OpReturn
%86 = OpLabel
%51 = OpAccessChain %26 %25 %28
%52 = OpLoad %7 %51
%53 = OpBitFieldUExtract %7 %52 %31 %31
%57 = OpLoad %7 %39
%58 = OpIAdd %7 %57 %28
%59 = OpULessThan %36 %58 %15
%61 = OpAccessChain %60 %56 %58
OpSelectionMerge %89 None
OpBranchConditional %59 %88 %89
%88 = OpLabel
OpStore %61 %53
OpBranch %89
%89 = OpLabel
%62 = OpConvertUToF %5 %40
%64 = OpAccessChain %63 %11 %40 %28
OpStore %64 %62
%65 = OpAccessChain %63 %11 %40 %35
OpStore %65 %62
%66 = OpAccessChain %63 %11 %40 %67
OpStore %66 %62
%68 = OpAccessChain %63 %11 %40 %45
OpStore %68 %62
%69 = OpBitwiseXor %7 %40 %35
%70 = OpConvertUToF %5 %69
%71 = OpAccessChain %63 %14 %40
OpStore %71 %70
%72 = OpULessThan %36 %40 %15
OpSelectionMerge %81 None
OpBranchConditional %72 %80 %81
%80 = OpLabel
%73 = OpIMul %7 %40 %45
%74 = OpIAdd %7 %73 %35
%75 = OpIAdd %7 %73 %67
%76 = OpCompositeConstruct %16 %73 %74 %75
%78 = OpAccessChain %77 %19 %40
OpStore %78 %76
OpBranch %81
%81 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(constant_id = 1001) const uint ViewIDToViewport = 0u;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    uint _48 = gl_LocalInvocationIndex + 0u;
    if (_48 < 8u)
    {
        gl_MeshPrimitivesEXT[_48].gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    }
    uint _60 = gl_LocalInvocationIndex + 0u;
    if (_60 < 8u)
    {
        gl_MeshPrimitivesEXT[_60].gl_ViewportIndex = int(bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)) * 8u), int(8u)));
    }
    float _63 = float(gl_LocalInvocationIndex);
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _63;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _63;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _63;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _63;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _74 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_74, _74 + 1u, _74 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability ShaderViewportIndexLayerEXT
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %19 %21 %23 %38 %46 %58
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %19 "indices"
OpName %36 "ViewInstancingOffsetsUBO"
OpMemberName %36 0 "ViewID_Layer"
OpName %38 "ViewInstancingOffsets"
OpName %55 "ViewIDToViewport"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %19 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %21 BuiltIn LocalInvocationIndex
OpDecorate %23 BuiltIn NumSubgroups
OpDecorate %36 Block
OpMemberDecorate %36 0 Offset 0
OpDecorate %38 DescriptorSet 10
OpDecorate %38 Binding 22
OpDecorate %46 BuiltIn Layer
OpDecorate %46 PerPrimitiveEXT
OpDecorate %55 SpecId 1001
OpDecorate %58 BuiltIn ViewportIndex
OpDecorate %58 PerPrimitiveEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeVector %7 3
%17 = OpTypeArray %16 %15
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%20 = OpTypePointer Input %7
%21 = OpVariable %20 Input
%23 = OpVariable %20 Input
%25 = OpTypeBool
%27 = OpConstant %7 1
%29 = OpConstant %7 3
%32 = OpConstant %7 0
%36 = OpTypeStruct %7
%37 = OpTypePointer Uniform %36
%38 = OpVariable %37 Uniform
%39 = OpTypePointer Uniform %7
%43 = OpConstant %7 16
%44 = OpTypeArray %7 %15
%45 = OpTypePointer Output %44
%46 = OpVariable %45 Output
%50 = OpTypePointer Output %7
%55 = OpSpecConstant %7 0
%58 = OpVariable %45 Output
%64 = OpTypePointer Output %5
%68 = OpConstant %7 2
%78 = OpTypePointer Output %16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%22 = OpLoad %7 %21
OpSetMeshOutputsEXT %8 %15
%24 = OpLoad %7 %23
%26 = OpIEqual %25 %24 %27
%28 = OpGroupNonUniformBroadcastFirst %7 %29 %8
%30 = OpGroupNonUniformBroadcastFirst %7 %29 %15
%31 = OpIEqual %25 %28 %32
%33 = OpIEqual %25 %30 %32
%34 = OpLogicalOr %25 %31 %33
%35 = OpLogicalAnd %25 %26 %34
OpSelectionMerge %84 None
OpBranchConditional %35 %83 %84
%83 = OpLabel
OpReturn
%84 = OpLabel
%40 = OpAccessChain %39 %38 %32
%41 = OpLoad %7 %40
%42 = OpBitFieldUExtract %7 %41 %43 %43
%47 = OpLoad %7 %21
%48 = OpIAdd %7 %47 %32
%49 = OpULessThan %25 %48 %15
%51 = OpAccessChain %50 %46 %48
OpSelectionMerge %87 None
OpBranchConditional %49 %86 %87
%86 = OpLabel
OpStore %51 %42
OpBranch %87
%87 = OpLabel
%52 = OpAccessChain %39 %38 %32
%53 = OpLoad %7 %52
%54 = OpBitFieldUExtract %7 %53 %32 %43
%56 = OpIMul %7 %54 %15
%57 = OpBitFieldUExtract %7 %55 %56 %15
%59 = OpLoad %7 %21
%60 = OpIAdd %7 %59 %32
%61 = OpULessThan %25 %60 %15
%62 = OpAccessChain %50 %58 %60
OpSelectionMerge %89 None
OpBranchConditional %61 %88 %89
%88 = OpLabel
OpStore %62 %57
OpBranch %89
%89 = OpLabel
%63 = OpConvertUToF %5 %22
%65 = OpAccessChain %64 %11 %22 %32
OpStore %65 %63
%66 = OpAccessChain %64 %11 %22 %27
OpStore %66 %63
%67 = OpAccessChain %64 %11 %22 %68
OpStore %67 %63
%69 = OpAccessChain %64 %11 %22 %29
OpStore %69 %63
%70 = OpBitwiseXor %7 %22 %27
%71 = OpConvertUToF %5 %70
%72 = OpAccessChain %64 %14 %22
OpStore %72 %71
%73 = OpULessThan %25 %22 %15
OpSelectionMerge %82 None
OpBranchConditional %73 %81 %82
%81 = OpLabel
%74 = OpIMul %7 %22 %29
%75 = OpIAdd %7 %74 %27
%76 = OpIAdd %7 %74 %68
%77 = OpCompositeConstruct %16 %74 %75 %76
%79 = OpAccessChain %78 %19 %22
OpStore %79 %77
OpBranch %82
%82 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/mesh/basic.view-instancing.mesh
================================================
#version 460
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
layout(max_vertices = 24, max_primitives = 8, triangles) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 1) out float B[24];

void main()
{
    SetMeshOutputsEXT(24u, 8u);
    if ((gl_NumSubgroups == 1u) && ((subgroupBroadcastFirst(24u) == 0u) || (subgroupBroadcastFirst(8u) == 0u)))
    {
        return;
    }
    float _45 = float(gl_LocalInvocationIndex + bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _45;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _45;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _45;
    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _45;
    B[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex ^ 1u);
    if (gl_LocalInvocationIndex < 8u)
    {
        uint _56 = gl_LocalInvocationIndex * 3u;
        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_56, _56 + 1u, _56 + 2u);
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.4
; Generator: Unknown(30017); 21022
; Bound: 69
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniform
OpCapability GroupNonUniformBallot
OpCapability MeshShadingEXT
OpExtension "SPV_EXT_mesh_shader"
OpMemoryModel Logical GLSL450
OpEntryPoint MeshEXT %3 "main" %11 %14 %19 %22 %30 %32
OpExecutionMode %3 OutputVertices 24
OpExecutionMode %3 OutputPrimitivesEXT 8
OpExecutionMode %3 OutputTrianglesEXT
OpExecutionMode %3 LocalSize 2 3 4
OpName %3 "main"
OpName %11 "SV_Position"
OpName %14 "B"
OpName %19 "indices"
OpName %20 "ViewInstancingOffsetsUBO"
OpMemberName %20 0 "ViewID_Layer"
OpName %22 "ViewInstancingOffsets"
OpDecorate %11 BuiltIn Position
OpDecorate %14 Location 1
OpDecorate %19 BuiltIn PrimitiveTriangleIndicesEXT
OpDecorate %20 Block
OpMemberDecorate %20 0 Offset 0
OpDecorate %22 DescriptorSet 10
OpDecorate %22 Binding 22
OpDecorate %30 BuiltIn LocalInvocationIndex
OpDecorate %32 BuiltIn NumSubgroups
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypeInt 32 0
%8 = OpConstant %7 24
%9 = OpTypeArray %6 %8
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypeArray %5 %8
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpConstant %7 8
%16 = OpTypeVector %7 3
%17 = OpTypeArray %16 %15
%18 = OpTypePointer Output %17
%19 = OpVariable %18 Output
%20 = OpTypeStruct %7
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypePointer Uniform %7
%25 = OpConstant %7 0
%28 = OpConstant %7 16
%29 = OpTypePointer Input %7
%30 = OpVariable %29 Input
%32 = OpVariable %29 Input
%34 = OpTypeBool
%36 = OpConstant %7 1
%38 = OpConstant %7 3
%46 = OpTypePointer Output %5
%50 = OpConstant %7 2
%60 = OpTypePointer Output %16
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %62
%62 = OpLabel
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %7 %24
%27 = OpBitFieldUExtract %7 %26 %25 %28
%31 = OpLoad %7 %30
OpSetMeshOutputsEXT %8 %15
%33 = OpLoad %7 %32
%35 = OpIEqual %34 %33 %36
%37 = OpGroupNonUniformBroadcastFirst %7 %38 %8
%39 = OpGroupNonUniformBroadcastFirst %7 %38 %15
%40 = OpIEqual %34 %37 %25
%41 = OpIEqual %34 %39 %25
%42 = OpLogicalOr %34 %40 %41
%43 = OpLogicalAnd %34 %35 %42
OpSelectionMerge %66 None
OpBranchConditional %43 %65 %66
%65 = OpLabel
OpReturn
%66 = OpLabel
%44 = OpIAdd %7 %31 %27
%45 = OpConvertUToF %5 %44
%47 = OpAccessChain %46 %11 %31 %25
OpStore %47 %45
%48 = OpAccessChain %46 %11 %31 %36
OpStore %48 %45
%49 = OpAccessChain %46 %11 %31 %50
OpStore %49 %45
%51 = OpAccessChain %46 %11 %31 %38
OpStore %51 %45
%52 = OpBitwiseXor %7 %31 %36
%53 = OpConvertUToF %5 %52
%54 = OpAccessChain %46 %14 %31
OpStore %54 %53
%55 = OpULessThan %34 %31 %15
OpSelectionMerge %64 None
OpBranchConditional %55 %63 %64
%63 = OpLabel
%56 = OpIMul %7 %31 %38
%57 = OpIAdd %7 %56 %36
%58 = OpIAdd %7 %56 %50
%59 = OpCompositeConstruct %16 %56 %57 %58
%61 = OpAccessChain %60 %19 %31
OpStore %61 %59
OpBranch %64
%64 = OpLabel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tesc/basic.view-instancing.tesc
================================================
#version 460
layout(vertices = 4) out;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float VSValue[];
layout(location = 0) out float HSValue[4];
layout(location = 1) out uint SV_RenderTargetArrayIndex[4];
layout(location = 1, component = 1) out uint SV_ViewportArrayIndex[4];

void hull_main()
{
    uint _35 = bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u));
    HSValue[gl_InvocationID] = float(gl_InvocationID + _35);
    SV_RenderTargetArrayIndex[gl_InvocationID] = _35;
    SV_ViewportArrayIndex[gl_InvocationID] = _35;
}

void patch_main()
{
    gl_TessLevelOuter[0u] = 1.0;
    gl_TessLevelOuter[1u] = 1.0;
    gl_TessLevelOuter[2u] = 1.0;
    gl_TessLevelOuter[3u] = 1.0;
    gl_TessLevelInner[0u] = 1.0;
    gl_TessLevelInner[1u] = 1.0;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %10 %14 %17 %18 %19 %23 %38
OpExecutionMode %3 Quads
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCw
OpExecutionMode %3 OutputVertices 4
OpName %3 "main"
OpName %10 "VSValue"
OpName %14 "HSValue"
OpName %17 "SV_RenderTargetArrayIndex"
OpName %18 "SV_ViewportArrayIndex"
OpName %19 "SV_TessFactor"
OpName %23 "SV_InsideTessFactor"
OpName %24 "hull_main"
OpName %26 "patch_main"
OpName %28 "ViewInstancingOffsetsUBO"
OpMemberName %28 0 "ViewID_Layer"
OpName %30 "ViewInstancingOffsets"
OpDecorate %10 Location 0
OpDecorate %14 Location 0
OpDecorate %17 Location 1
OpDecorate %18 Location 1
OpDecorate %18 Component 1
OpDecorate %19 BuiltIn TessLevelOuter
OpDecorate %19 Patch
OpDecorate %23 BuiltIn TessLevelInner
OpDecorate %23 Patch
OpDecorate %28 Block
OpMemberDecorate %28 0 Offset 0
OpDecorate %30 DescriptorSet 10
OpDecorate %30 Binding 22
OpDecorate %38 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 32
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpConstant %6 4
%12 = OpTypeArray %5 %11
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpTypeArray %6 %11
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%18 = OpVariable %16 Output
%19 = OpVariable %13 Output
%20 = OpConstant %6 2
%21 = OpTypeArray %5 %20
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%28 = OpTypeStruct %6
%29 = OpTypePointer Uniform %28
%30 = OpVariable %29 Uniform
%31 = OpTypePointer Uniform %6
%33 = OpConstant %6 0
%36 = OpConstant %6 16
%37 = OpTypePointer Input %6
%38 = OpVariable %37 Input
%42 = OpTypePointer Output %5
%45 = OpTypePointer Output %6
%51 = OpConstant %5 1
%53 = OpConstant %6 1
%56 = OpConstant %6 3
%61 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %64
%64 = OpLabel
%59 = OpFunctionCall %1 %24
%60 = OpLoad %6 %38
%62 = OpIEqual %61 %60 %33
OpControlBarrier %20 %11 %33
OpSelectionMerge %66 None
OpBranchConditional %62 %65 %66
%65 = OpLabel
%63 = OpFunctionCall %1 %26
OpBranch %66
%66 = OpLabel
OpReturn
OpFunctionEnd
%24 = OpFunction %1 None %2
%25 = OpLabel
OpBranch %68
%68 = OpLabel
%32 = OpAccessChain %31 %30 %33
%34 = OpLoad %6 %32
%35 = OpBitFieldUExtract %6 %34 %33 %36
%39 = OpLoad %6 %38
%40 = OpIAdd %6 %39 %35
%41 = OpConvertUToF %5 %40
%44 = OpLoad %6 %38
%43 = OpAccessChain %42 %14 %44
OpStore %43 %41
%47 = OpLoad %6 %38
%46 = OpAccessChain %45 %17 %47
OpStore %46 %35
%49 = OpLoad %6 %38
%48 = OpAccessChain %45 %18 %49
OpStore %48 %35
OpReturn
OpFunctionEnd
%26 = OpFunction %1 None %2
%27 = OpLabel
OpBranch %70
%70 = OpLabel
%50 = OpAccessChain %42 %19 %33
OpStore %50 %51
%52 = OpAccessChain %42 %19 %53
OpStore %52 %51
%54 = OpAccessChain %42 %19 %20
OpStore %54 %51
%55 = OpAccessChain %42 %19 %56
OpStore %55 %51
%57 = OpAccessChain %42 %23 %33
OpStore %57 %51
%58 = OpAccessChain %42 %23 %53
OpStore %58 %51
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tesc/basic.view-instancing.view-instancing-multiview.tesc
================================================
#version 460
#extension GL_EXT_multiview : require
layout(vertices = 4) out;

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;

layout(location = 0) in float VSValue[];
layout(location = 0) out float HSValue[4];
layout(location = 1) out uint SV_RenderTargetArrayIndex[4];
layout(location = 1, component = 1) out uint SV_ViewportArrayIndex[4];

void hull_main()
{
    uint _33 = bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u));
    HSValue[gl_InvocationID] = float(gl_InvocationID + _33);
    SV_RenderTargetArrayIndex[gl_InvocationID] = _33;
    SV_ViewportArrayIndex[gl_InvocationID] = _33;
}

void patch_main()
{
    gl_TessLevelOuter[0u] = 1.0;
    gl_TessLevelOuter[1u] = 1.0;
    gl_TessLevelOuter[2u] = 1.0;
    gl_TessLevelOuter[3u] = 1.0;
    gl_TessLevelInner[0u] = 1.0;
    gl_TessLevelInner[1u] = 1.0;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 69
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpCapability MultiView
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationControl %3 "main" %10 %14 %17 %18 %19 %23 %30 %34
OpExecutionMode %3 Quads
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCw
OpExecutionMode %3 OutputVertices 4
OpName %3 "main"
OpName %10 "VSValue"
OpName %14 "HSValue"
OpName %17 "SV_RenderTargetArrayIndex"
OpName %18 "SV_ViewportArrayIndex"
OpName %19 "SV_TessFactor"
OpName %23 "SV_InsideTessFactor"
OpName %24 "hull_main"
OpName %26 "patch_main"
OpName %28 "ViewIndexToViewInstanceMap"
OpDecorate %10 Location 0
OpDecorate %14 Location 0
OpDecorate %17 Location 1
OpDecorate %18 Location 1
OpDecorate %18 Component 1
OpDecorate %19 BuiltIn TessLevelOuter
OpDecorate %19 Patch
OpDecorate %23 BuiltIn TessLevelInner
OpDecorate %23 Patch
OpDecorate %28 SpecId 1000
OpDecorate %30 BuiltIn ViewIndex
OpDecorate %34 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 32
%8 = OpTypeArray %5 %7
%9 = OpTypePointer Input %8
%10 = OpVariable %9 Input
%11 = OpConstant %6 4
%12 = OpTypeArray %5 %11
%13 = OpTypePointer Output %12
%14 = OpVariable %13 Output
%15 = OpTypeArray %6 %11
%16 = OpTypePointer Output %15
%17 = OpVariable %16 Output
%18 = OpVariable %16 Output
%19 = OpVariable %13 Output
%20 = OpConstant %6 2
%21 = OpTypeArray %5 %20
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
%28 = OpSpecConstant %6 0
%29 = OpTypePointer Input %6
%30 = OpVariable %29 Input
%34 = OpVariable %29 Input
%38 = OpTypePointer Output %5
%41 = OpTypePointer Output %6
%47 = OpConstant %6 0
%48 = OpConstant %5 1
%50 = OpConstant %6 1
%53 = OpConstant %6 3
%58 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %61
%61 = OpLabel
%56 = OpFunctionCall %1 %24
%57 = OpLoad %6 %34
%59 = OpIEqual %58 %57 %47
OpControlBarrier %20 %11 %47
OpSelectionMerge %63 None
OpBranchConditional %59 %62 %63
%62 = OpLabel
%60 = OpFunctionCall %1 %26
OpBranch %63
%63 = OpLabel
OpReturn
OpFunctionEnd
%24 = OpFunction %1 None %2
%25 = OpLabel
OpBranch %65
%65 = OpLabel
%31 = OpLoad %6 %30
%32 = OpIMul %6 %31 %20
%33 = OpBitFieldUExtract %6 %28 %32 %20
%35 = OpLoad %6 %34
%36 = OpIAdd %6 %35 %33
%37 = OpConvertUToF %5 %36
%40 = OpLoad %6 %34
%39 = OpAccessChain %38 %14 %40
OpStore %39 %37
%43 = OpLoad %6 %34
%42 = OpAccessChain %41 %17 %43
OpStore %42 %33
%45 = OpLoad %6 %34
%44 = OpAccessChain %41 %18 %45
OpStore %44 %33
OpReturn
OpFunctionEnd
%26 = OpFunction %1 None %2
%27 = OpLabel
OpBranch %67
%67 = OpLabel
%46 = OpAccessChain %38 %19 %47
OpStore %46 %48
%49 = OpAccessChain %38 %19 %50
OpStore %49 %48
%51 = OpAccessChain %38 %19 %20
OpStore %51 %48
%52 = OpAccessChain %38 %19 %53
OpStore %52 %48
%54 = OpAccessChain %38 %23 %47
OpStore %54 %48
%55 = OpAccessChain %38 %23 %50
OpStore %55 %48
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain-export-layer-viewport.view-instancing.view-instancing-multiview.last-pre-raster.tese
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(triangles) in;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    uint _30 = bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u));
    float _64 = ((((A[2u][0u] + B[2u]) * gl_TessCoord.z) + float(_30)) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[0u][2u] + B[0u]) * gl_TessCoord.x);
    gl_Position.x = _64;
    gl_Position.y = _64;
    gl_Position.z = _64;
    gl_Position.w = _64;
    gl_Layer = int(_30 + bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    gl_ViewportIndex = int(_30 + 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability Tessellation
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %18 %19 %20 %22 %34
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %18 "SV_RenderTargetArrayIndex"
OpName %19 "SV_ViewportArrayIndex"
OpName %20 "C"
OpName %22 "D"
OpName %23 "ViewInstancingOffsetsUBO"
OpMemberName %23 0 "ViewID_Layer"
OpName %25 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %18 BuiltIn Layer
OpDecorate %19 BuiltIn ViewportIndex
OpDecorate %20 Location 3
OpDecorate %20 Patch
OpDecorate %22 Location 3
OpDecorate %22 Component 1
OpDecorate %22 Patch
OpDecorate %23 Block
OpMemberDecorate %23 0 Offset 0
OpDecorate %25 DescriptorSet 10
OpDecorate %25 Binding 22
OpDecorate %34 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpTypePointer Output %6
%18 = OpVariable %17 Output
%19 = OpVariable %17 Output
%20 = OpVariable %12 Input
%21 = OpTypePointer Input %5
%22 = OpVariable %21 Input
%23 = OpTypeStruct %6
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypePointer Uniform %6
%28 = OpConstant %6 0
%31 = OpConstant %6 16
%32 = OpTypeVector %5 3
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%38 = OpConstant %6 1
%41 = OpConstant %6 2
%66 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%27 = OpAccessChain %26 %25 %28
%29 = OpLoad %6 %27
%30 = OpBitFieldUExtract %6 %29 %28 %31
%35 = OpAccessChain %21 %34 %28
%36 = OpLoad %5 %35
%37 = OpAccessChain %21 %34 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %21 %34 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %21 %13 %28
%44 = OpLoad %5 %43
%45 = OpAccessChain %21 %13 %38
%46 = OpLoad %5 %45
%47 = OpAccessChain %21 %13 %41
%48 = OpLoad %5 %47
%49 = OpConvertUToF %5 %30
%50 = OpAccessChain %21 %11 %28 %41
%51 = OpLoad %5 %50
%52 = OpAccessChain %21 %11 %38 %38
%53 = OpLoad %5 %52
%54 = OpAccessChain %21 %11 %41 %28
%55 = OpLoad %5 %54
%56 = OpFAdd %5 %55 %48
%57 = OpFMul %5 %56 %42
%58 = OpFAdd %5 %53 %46
%59 = OpFMul %5 %58 %39
%60 = OpFAdd %5 %51 %44
%61 = OpFMul %5 %60 %36
%62 = OpFAdd %5 %57 %49
%63 = OpFAdd %5 %62 %59
%64 = OpFAdd %5 %63 %61
%65 = OpIAdd %6 %30 %38
%67 = OpAccessChain %66 %16 %28
OpStore %67 %64
%68 = OpAccessChain %66 %16 %38
OpStore %68 %64
%69 = OpAccessChain %66 %16 %41
OpStore %69 %64
%70 = OpAccessChain %66 %16 %7
OpStore %70 %64
%72 = OpAccessChain %26 %25 %28
%73 = OpLoad %6 %72
%74 = OpBitFieldUExtract %6 %73 %31 %31
%71 = OpIAdd %6 %30 %74
OpStore %18 %71
OpStore %19 %65
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain-export-layer-viewport.view-instancing.view-instancing-multiview.tese
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(triangles) in;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    uint _30 = bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u));
    float _64 = ((((A[2u][0u] + B[2u]) * gl_TessCoord.z) + float(_30)) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[0u][2u] + B[0u]) * gl_TessCoord.x);
    gl_Position.x = _64;
    gl_Position.y = _64;
    gl_Position.z = _64;
    gl_Position.w = _64;
    gl_Layer = int(_30);
    gl_ViewportIndex = int(_30 + 1u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability Tessellation
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %18 %19 %20 %22 %34
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %18 "SV_RenderTargetArrayIndex"
OpName %19 "SV_ViewportArrayIndex"
OpName %20 "C"
OpName %22 "D"
OpName %23 "ViewInstancingOffsetsUBO"
OpMemberName %23 0 "ViewID_Layer"
OpName %25 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %18 BuiltIn Layer
OpDecorate %19 BuiltIn ViewportIndex
OpDecorate %20 Location 3
OpDecorate %20 Patch
OpDecorate %22 Location 3
OpDecorate %22 Component 1
OpDecorate %22 Patch
OpDecorate %23 Block
OpMemberDecorate %23 0 Offset 0
OpDecorate %25 DescriptorSet 10
OpDecorate %25 Binding 22
OpDecorate %34 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpTypePointer Output %6
%18 = OpVariable %17 Output
%19 = OpVariable %17 Output
%20 = OpVariable %12 Input
%21 = OpTypePointer Input %5
%22 = OpVariable %21 Input
%23 = OpTypeStruct %6
%24 = OpTypePointer Uniform %23
%25 = OpVariable %24 Uniform
%26 = OpTypePointer Uniform %6
%28 = OpConstant %6 0
%31 = OpConstant %6 16
%32 = OpTypeVector %5 3
%33 = OpTypePointer Input %32
%34 = OpVariable %33 Input
%38 = OpConstant %6 1
%41 = OpConstant %6 2
%66 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %71
%71 = OpLabel
%27 = OpAccessChain %26 %25 %28
%29 = OpLoad %6 %27
%30 = OpBitFieldUExtract %6 %29 %28 %31
%35 = OpAccessChain %21 %34 %28
%36 = OpLoad %5 %35
%37 = OpAccessChain %21 %34 %38
%39 = OpLoad %5 %37
%40 = OpAccessChain %21 %34 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %21 %13 %28
%44 = OpLoad %5 %43
%45 = OpAccessChain %21 %13 %38
%46 = OpLoad %5 %45
%47 = OpAccessChain %21 %13 %41
%48 = OpLoad %5 %47
%49 = OpConvertUToF %5 %30
%50 = OpAccessChain %21 %11 %28 %41
%51 = OpLoad %5 %50
%52 = OpAccessChain %21 %11 %38 %38
%53 = OpLoad %5 %52
%54 = OpAccessChain %21 %11 %41 %28
%55 = OpLoad %5 %54
%56 = OpFAdd %5 %55 %48
%57 = OpFMul %5 %56 %42
%58 = OpFAdd %5 %53 %46
%59 = OpFMul %5 %58 %39
%60 = OpFAdd %5 %51 %44
%61 = OpFMul %5 %60 %36
%62 = OpFAdd %5 %57 %49
%63 = OpFAdd %5 %62 %59
%64 = OpFAdd %5 %63 %61
%65 = OpIAdd %6 %30 %38
%67 = OpAccessChain %66 %16 %28
OpStore %67 %64
%68 = OpAccessChain %66 %16 %38
OpStore %68 %64
%69 = OpAccessChain %66 %16 %41
OpStore %69 %64
%70 = OpAccessChain %66 %16 %7
OpStore %70 %64
OpStore %18 %30
OpStore %19 %65
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain.view-instancing.last-pre-raster.tese
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(triangles) in;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    gl_Position.x = ((((A[0u][2u] + B[0u]) * gl_TessCoord.x) + float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)))) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 82
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability Tessellation
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %30 %37
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpName %20 "ViewInstancingOffsetsUBO"
OpMemberName %20 0 "ViewID_Layer"
OpName %22 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %20 Block
OpMemberDecorate %20 0 Offset 0
OpDecorate %22 DescriptorSet 10
OpDecorate %22 Binding 22
OpDecorate %30 BuiltIn Layer
OpDecorate %37 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%20 = OpTypeStruct %6
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypePointer Uniform %6
%25 = OpConstant %6 0
%28 = OpConstant %6 16
%29 = OpTypePointer Output %6
%30 = OpVariable %29 Output
%35 = OpTypeVector %5 3
%36 = OpTypePointer Input %35
%37 = OpVariable %36 Input
%41 = OpConstant %6 1
%44 = OpConstant %6 2
%75 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %80
%80 = OpLabel
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %6 %24
%27 = OpBitFieldUExtract %6 %26 %28 %28
OpStore %30 %27
%31 = OpLoad %5 %19
%32 = OpAccessChain %23 %22 %25
%33 = OpLoad %6 %32
%34 = OpBitFieldUExtract %6 %33 %25 %28
%38 = OpAccessChain %18 %37 %25
%39 = OpLoad %5 %38
%40 = OpAccessChain %18 %37 %41
%42 = OpLoad %5 %40
%43 = OpAccessChain %18 %37 %44
%45 = OpLoad %5 %43
%46 = OpAccessChain %18 %13 %25
%47 = OpLoad %5 %46
%48 = OpAccessChain %18 %13 %41
%49 = OpLoad %5 %48
%50 = OpAccessChain %18 %13 %44
%51 = OpLoad %5 %50
%52 = OpConvertUToF %5 %34
%53 = OpAccessChain %18 %11 %25 %44
%54 = OpLoad %5 %53
%55 = OpAccessChain %18 %11 %41 %41
%56 = OpLoad %5 %55
%57 = OpAccessChain %18 %11 %44 %25
%58 = OpLoad %5 %57
%59 = OpFAdd %5 %58 %51
%60 = OpFMul %5 %59 %45
%61 = OpFAdd %5 %56 %49
%62 = OpFMul %5 %61 %42
%63 = OpFAdd %5 %54 %47
%64 = OpFMul %5 %63 %39
%65 = OpFAdd %5 %64 %52
%66 = OpFAdd %5 %65 %62
%67 = OpFAdd %5 %66 %60
%68 = OpAccessChain %18 %17 %25
%69 = OpLoad %5 %68
%70 = OpAccessChain %18 %17 %41
%71 = OpLoad %5 %70
%72 = OpFAdd %5 %71 %69
%73 = OpAccessChain %18 %17 %44
%74 = OpLoad %5 %73
%76 = OpAccessChain %75 %16 %25
OpStore %76 %67
%77 = OpAccessChain %75 %16 %41
OpStore %77 %72
%78 = OpAccessChain %75 %16 %44
OpStore %78 %74
%79 = OpAccessChain %75 %16 %7
OpStore %79 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain.view-instancing.tese
================================================
#version 460
layout(triangles) in;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Position.x = ((((A[0u][2u] + B[0u]) * gl_TessCoord.x) + float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)))) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %32
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpName %21 "ViewInstancingOffsetsUBO"
OpMemberName %21 0 "ViewID_Layer"
OpName %23 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %21 Block
OpMemberDecorate %21 0 Offset 0
OpDecorate %23 DescriptorSet 10
OpDecorate %23 Binding 22
OpDecorate %32 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%21 = OpTypeStruct %6
%22 = OpTypePointer Uniform %21
%23 = OpVariable %22 Uniform
%24 = OpTypePointer Uniform %6
%26 = OpConstant %6 0
%29 = OpConstant %6 16
%30 = OpTypeVector %5 3
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%36 = OpConstant %6 1
%39 = OpConstant %6 2
%70 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%20 = OpLoad %5 %19
%25 = OpAccessChain %24 %23 %26
%27 = OpLoad %6 %25
%28 = OpBitFieldUExtract %6 %27 %26 %29
%33 = OpAccessChain %18 %32 %26
%34 = OpLoad %5 %33
%35 = OpAccessChain %18 %32 %36
%37 = OpLoad %5 %35
%38 = OpAccessChain %18 %32 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %18 %13 %26
%42 = OpLoad %5 %41
%43 = OpAccessChain %18 %13 %36
%44 = OpLoad %5 %43
%45 = OpAccessChain %18 %13 %39
%46 = OpLoad %5 %45
%47 = OpConvertUToF %5 %28
%48 = OpAccessChain %18 %11 %26 %39
%49 = OpLoad %5 %48
%50 = OpAccessChain %18 %11 %36 %36
%51 = OpLoad %5 %50
%52 = OpAccessChain %18 %11 %39 %26
%53 = OpLoad %5 %52
%54 = OpFAdd %5 %53 %46
%55 = OpFMul %5 %54 %40
%56 = OpFAdd %5 %51 %44
%57 = OpFMul %5 %56 %37
%58 = OpFAdd %5 %49 %42
%59 = OpFMul %5 %58 %34
%60 = OpFAdd %5 %59 %47
%61 = OpFAdd %5 %60 %57
%62 = OpFAdd %5 %61 %55
%63 = OpAccessChain %18 %17 %26
%64 = OpLoad %5 %63
%65 = OpAccessChain %18 %17 %36
%66 = OpLoad %5 %65
%67 = OpFAdd %5 %66 %64
%68 = OpAccessChain %18 %17 %39
%69 = OpLoad %5 %68
%71 = OpAccessChain %70 %16 %26
OpStore %71 %62
%72 = OpAccessChain %70 %16 %36
OpStore %72 %67
%73 = OpAccessChain %70 %16 %39
OpStore %73 %69
%74 = OpAccessChain %70 %16 %7
OpStore %74 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain.view-instancing.view-instancing-multiview.last-pre-raster.tese
================================================
#version 460
#extension GL_EXT_multiview : require
layout(triangles) in;

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Position.x = ((((A[0u][2u] + B[0u]) * gl_TessCoord.x) + float(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)))) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 75
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpCapability MultiView
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %23 %30
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpName %21 "ViewIndexToViewInstanceMap"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %21 SpecId 1000
OpDecorate %23 BuiltIn ViewIndex
OpDecorate %30 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%21 = OpSpecConstant %6 0
%22 = OpTypePointer Input %6
%23 = OpVariable %22 Input
%26 = OpConstant %6 2
%28 = OpTypeVector %5 3
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%32 = OpConstant %6 0
%35 = OpConstant %6 1
%68 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%20 = OpLoad %5 %19
%24 = OpLoad %6 %23
%25 = OpIMul %6 %24 %26
%27 = OpBitFieldUExtract %6 %21 %25 %26
%31 = OpAccessChain %18 %30 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %18 %30 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %18 %30 %26
%38 = OpLoad %5 %37
%39 = OpAccessChain %18 %13 %32
%40 = OpLoad %5 %39
%41 = OpAccessChain %18 %13 %35
%42 = OpLoad %5 %41
%43 = OpAccessChain %18 %13 %26
%44 = OpLoad %5 %43
%45 = OpConvertUToF %5 %27
%46 = OpAccessChain %18 %11 %32 %26
%47 = OpLoad %5 %46
%48 = OpAccessChain %18 %11 %35 %35
%49 = OpLoad %5 %48
%50 = OpAccessChain %18 %11 %26 %32
%51 = OpLoad %5 %50
%52 = OpFAdd %5 %51 %44
%53 = OpFMul %5 %52 %38
%54 = OpFAdd %5 %49 %42
%55 = OpFMul %5 %54 %36
%56 = OpFAdd %5 %47 %40
%57 = OpFMul %5 %56 %33
%58 = OpFAdd %5 %57 %45
%59 = OpFAdd %5 %58 %55
%60 = OpFAdd %5 %59 %53
%61 = OpAccessChain %18 %17 %32
%62 = OpLoad %5 %61
%63 = OpAccessChain %18 %17 %35
%64 = OpLoad %5 %63
%65 = OpFAdd %5 %64 %62
%66 = OpAccessChain %18 %17 %26
%67 = OpLoad %5 %66
%69 = OpAccessChain %68 %16 %32
OpStore %69 %60
%70 = OpAccessChain %68 %16 %35
OpStore %70 %65
%71 = OpAccessChain %68 %16 %26
OpStore %71 %67
%72 = OpAccessChain %68 %16 %7
OpStore %72 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain.view-instancing.view-instancing-multiview.tese
================================================
#version 460
#extension GL_EXT_multiview : require
layout(triangles) in;

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Position.x = ((((A[0u][2u] + B[0u]) * gl_TessCoord.x) + float(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)))) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 75
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpCapability MultiView
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %23 %30
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpName %21 "ViewIndexToViewInstanceMap"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %21 SpecId 1000
OpDecorate %23 BuiltIn ViewIndex
OpDecorate %30 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%21 = OpSpecConstant %6 0
%22 = OpTypePointer Input %6
%23 = OpVariable %22 Input
%26 = OpConstant %6 2
%28 = OpTypeVector %5 3
%29 = OpTypePointer Input %28
%30 = OpVariable %29 Input
%32 = OpConstant %6 0
%35 = OpConstant %6 1
%68 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %73
%73 = OpLabel
%20 = OpLoad %5 %19
%24 = OpLoad %6 %23
%25 = OpIMul %6 %24 %26
%27 = OpBitFieldUExtract %6 %21 %25 %26
%31 = OpAccessChain %18 %30 %32
%33 = OpLoad %5 %31
%34 = OpAccessChain %18 %30 %35
%36 = OpLoad %5 %34
%37 = OpAccessChain %18 %30 %26
%38 = OpLoad %5 %37
%39 = OpAccessChain %18 %13 %32
%40 = OpLoad %5 %39
%41 = OpAccessChain %18 %13 %35
%42 = OpLoad %5 %41
%43 = OpAccessChain %18 %13 %26
%44 = OpLoad %5 %43
%45 = OpConvertUToF %5 %27
%46 = OpAccessChain %18 %11 %32 %26
%47 = OpLoad %5 %46
%48 = OpAccessChain %18 %11 %35 %35
%49 = OpLoad %5 %48
%50 = OpAccessChain %18 %11 %26 %32
%51 = OpLoad %5 %50
%52 = OpFAdd %5 %51 %44
%53 = OpFMul %5 %52 %38
%54 = OpFAdd %5 %49 %42
%55 = OpFMul %5 %54 %36
%56 = OpFAdd %5 %47 %40
%57 = OpFMul %5 %56 %33
%58 = OpFAdd %5 %57 %45
%59 = OpFAdd %5 %58 %55
%60 = OpFAdd %5 %59 %53
%61 = OpAccessChain %18 %17 %32
%62 = OpLoad %5 %61
%63 = OpAccessChain %18 %17 %35
%64 = OpLoad %5 %63
%65 = OpFAdd %5 %64 %62
%66 = OpAccessChain %18 %17 %26
%67 = OpLoad %5 %66
%69 = OpAccessChain %68 %16 %32
OpStore %69 %60
%70 = OpAccessChain %68 %16 %35
OpStore %70 %65
%71 = OpAccessChain %68 %16 %26
OpStore %71 %67
%72 = OpAccessChain %68 %16 %7
OpStore %72 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain.view-instancing.view-instancing-viewport-offset.last-pre-raster.tese
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(triangles) in;

layout(constant_id = 1001) const uint ViewIDToViewport = 0u;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    gl_ViewportIndex = int(bitfieldExtract(ViewIDToViewport, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)) * 8u), int(8u)));
    gl_Position.x = ((((A[0u][2u] + B[0u]) * gl_TessCoord.x) + float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)))) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 90
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability Tessellation
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %30 %38 %45
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpName %20 "ViewInstancingOffsetsUBO"
OpMemberName %20 0 "ViewID_Layer"
OpName %22 "ViewInstancingOffsets"
OpName %34 "ViewIDToViewport"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %20 Block
OpMemberDecorate %20 0 Offset 0
OpDecorate %22 DescriptorSet 10
OpDecorate %22 Binding 22
OpDecorate %30 BuiltIn Layer
OpDecorate %34 SpecId 1001
OpDecorate %38 BuiltIn ViewportIndex
OpDecorate %45 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%20 = OpTypeStruct %6
%21 = OpTypePointer Uniform %20
%22 = OpVariable %21 Uniform
%23 = OpTypePointer Uniform %6
%25 = OpConstant %6 0
%28 = OpConstant %6 16
%29 = OpTypePointer Output %6
%30 = OpVariable %29 Output
%34 = OpSpecConstant %6 0
%36 = OpConstant %6 8
%38 = OpVariable %29 Output
%43 = OpTypeVector %5 3
%44 = OpTypePointer Input %43
%45 = OpVariable %44 Input
%49 = OpConstant %6 1
%52 = OpConstant %6 2
%83 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %88
%88 = OpLabel
%24 = OpAccessChain %23 %22 %25
%26 = OpLoad %6 %24
%27 = OpBitFieldUExtract %6 %26 %28 %28
OpStore %30 %27
%31 = OpAccessChain %23 %22 %25
%32 = OpLoad %6 %31
%33 = OpBitFieldUExtract %6 %32 %25 %28
%35 = OpIMul %6 %33 %36
%37 = OpBitFieldUExtract %6 %34 %35 %36
OpStore %38 %37
%39 = OpLoad %5 %19
%40 = OpAccessChain %23 %22 %25
%41 = OpLoad %6 %40
%42 = OpBitFieldUExtract %6 %41 %25 %28
%46 = OpAccessChain %18 %45 %25
%47 = OpLoad %5 %46
%48 = OpAccessChain %18 %45 %49
%50 = OpLoad %5 %48
%51 = OpAccessChain %18 %45 %52
%53 = OpLoad %5 %51
%54 = OpAccessChain %18 %13 %25
%55 = OpLoad %5 %54
%56 = OpAccessChain %18 %13 %49
%57 = OpLoad %5 %56
%58 = OpAccessChain %18 %13 %52
%59 = OpLoad %5 %58
%60 = OpConvertUToF %5 %42
%61 = OpAccessChain %18 %11 %25 %52
%62 = OpLoad %5 %61
%63 = OpAccessChain %18 %11 %49 %49
%64 = OpLoad %5 %63
%65 = OpAccessChain %18 %11 %52 %25
%66 = OpLoad %5 %65
%67 = OpFAdd %5 %66 %59
%68 = OpFMul %5 %67 %53
%69 = OpFAdd %5 %64 %57
%70 = OpFMul %5 %69 %50
%71 = OpFAdd %5 %62 %55
%72 = OpFMul %5 %71 %47
%73 = OpFAdd %5 %72 %60
%74 = OpFAdd %5 %73 %70
%75 = OpFAdd %5 %74 %68
%76 = OpAccessChain %18 %17 %25
%77 = OpLoad %5 %76
%78 = OpAccessChain %18 %17 %49
%79 = OpLoad %5 %78
%80 = OpFAdd %5 %79 %77
%81 = OpAccessChain %18 %17 %52
%82 = OpLoad %5 %81
%84 = OpAccessChain %83 %16 %25
OpStore %84 %75
%85 = OpAccessChain %83 %16 %49
OpStore %85 %80
%86 = OpAccessChain %83 %16 %52
OpStore %86 %82
%87 = OpAccessChain %83 %16 %7
OpStore %87 %39
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/tese/domain.view-instancing.view-instancing-viewport-offset.tese
================================================
#version 460
layout(triangles) in;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in float A[][3];
layout(location = 0, component = 1) in float B[];
layout(location = 3) patch in float C[3];
layout(location = 3, component = 1) patch in float D;

void main()
{
    gl_Position.x = ((((A[0u][2u] + B[0u]) * gl_TessCoord.x) + float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)))) + ((A[1u][1u] + B[1u]) * gl_TessCoord.y)) + ((A[2u][0u] + B[2u]) * gl_TessCoord.z);
    gl_Position.y = C[1u] + C[0u];
    gl_Position.z = C[2u];
    gl_Position.w = D;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 77
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpMemoryModel Logical GLSL450
OpEntryPoint TessellationEvaluation %3 "main" %11 %13 %16 %17 %19 %32
OpExecutionMode %3 Triangles
OpName %3 "main"
OpName %11 "A"
OpName %13 "B"
OpName %16 "SV_Position"
OpName %17 "C"
OpName %19 "D"
OpName %21 "ViewInstancingOffsetsUBO"
OpMemberName %21 0 "ViewID_Layer"
OpName %23 "ViewInstancingOffsets"
OpDecorate %11 Location 0
OpDecorate %13 Location 0
OpDecorate %13 Component 1
OpDecorate %16 BuiltIn Position
OpDecorate %17 Location 3
OpDecorate %17 Patch
OpDecorate %19 Location 3
OpDecorate %19 Component 1
OpDecorate %19 Patch
OpDecorate %21 Block
OpMemberDecorate %21 0 Offset 0
OpDecorate %23 DescriptorSet 10
OpDecorate %23 Binding 22
OpDecorate %32 BuiltIn TessCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeInt 32 0
%7 = OpConstant %6 3
%8 = OpTypeArray %5 %7
%9 = OpTypeArray %8 %7
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%12 = OpTypePointer Input %8
%13 = OpVariable %12 Input
%14 = OpTypeVector %5 4
%15 = OpTypePointer Output %14
%16 = OpVariable %15 Output
%17 = OpVariable %12 Input
%18 = OpTypePointer Input %5
%19 = OpVariable %18 Input
%21 = OpTypeStruct %6
%22 = OpTypePointer Uniform %21
%23 = OpVariable %22 Uniform
%24 = OpTypePointer Uniform %6
%26 = OpConstant %6 0
%29 = OpConstant %6 16
%30 = OpTypeVector %5 3
%31 = OpTypePointer Input %30
%32 = OpVariable %31 Input
%36 = OpConstant %6 1
%39 = OpConstant %6 2
%70 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %75
%75 = OpLabel
%20 = OpLoad %5 %19
%25 = OpAccessChain %24 %23 %26
%27 = OpLoad %6 %25
%28 = OpBitFieldUExtract %6 %27 %26 %29
%33 = OpAccessChain %18 %32 %26
%34 = OpLoad %5 %33
%35 = OpAccessChain %18 %32 %36
%37 = OpLoad %5 %35
%38 = OpAccessChain %18 %32 %39
%40 = OpLoad %5 %38
%41 = OpAccessChain %18 %13 %26
%42 = OpLoad %5 %41
%43 = OpAccessChain %18 %13 %36
%44 = OpLoad %5 %43
%45 = OpAccessChain %18 %13 %39
%46 = OpLoad %5 %45
%47 = OpConvertUToF %5 %28
%48 = OpAccessChain %18 %11 %26 %39
%49 = OpLoad %5 %48
%50 = OpAccessChain %18 %11 %36 %36
%51 = OpLoad %5 %50
%52 = OpAccessChain %18 %11 %39 %26
%53 = OpLoad %5 %52
%54 = OpFAdd %5 %53 %46
%55 = OpFMul %5 %54 %40
%56 = OpFAdd %5 %51 %44
%57 = OpFMul %5 %56 %37
%58 = OpFAdd %5 %49 %42
%59 = OpFMul %5 %58 %34
%60 = OpFAdd %5 %59 %47
%61 = OpFAdd %5 %60 %57
%62 = OpFAdd %5 %61 %55
%63 = OpAccessChain %18 %17 %26
%64 = OpLoad %5 %63
%65 = OpAccessChain %18 %17 %36
%66 = OpLoad %5 %65
%67 = OpFAdd %5 %66 %64
%68 = OpAccessChain %18 %17 %39
%69 = OpLoad %5 %68
%71 = OpAccessChain %70 %16 %26
OpStore %71 %62
%72 = OpAccessChain %70 %16 %36
OpStore %72 %67
%73 = OpAccessChain %70 %16 %39
OpStore %73 %69
%74 = OpAccessChain %70 %16 %7
OpStore %74 %20
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.last-pre-raster.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;
layout(location = 1) in uint VP;

void main()
{
    float _27 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _27;
    gl_Position.y = _27;
    gl_Position.z = _27;
    gl_Position.w = _27;
    gl_Layer = int(LAYER + bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %8 %12 %14 %15
OpName %3 "main"
OpName %7 "LAYER"
OpName %8 "VP"
OpName %12 "SV_Position"
OpName %14 "SV_RenderTargetArrayIndex"
OpName %15 "SV_ViewportArrayIndex"
OpName %18 "ViewInstancingOffsetsUBO"
OpMemberName %18 0 "ViewID_Layer"
OpName %20 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %8 Location 1
OpDecorate %12 BuiltIn Position
OpDecorate %14 BuiltIn Layer
OpDecorate %15 BuiltIn ViewportIndex
OpDecorate %18 Block
OpMemberDecorate %18 0 Offset 0
OpDecorate %20 DescriptorSet 10
OpDecorate %20 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Output %10
%12 = OpVariable %11 Output
%13 = OpTypePointer Output %5
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%18 = OpTypeStruct %5
%19 = OpTypePointer Uniform %18
%20 = OpVariable %19 Uniform
%21 = OpTypePointer Uniform %5
%23 = OpConstant %5 0
%26 = OpConstant %5 16
%28 = OpTypePointer Output %9
%31 = OpConstant %5 1
%33 = OpConstant %5 2
%35 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %40
%40 = OpLabel
%16 = OpLoad %5 %8
%17 = OpLoad %5 %7
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpBitFieldUExtract %5 %24 %23 %26
%27 = OpConvertUToF %9 %25
%29 = OpAccessChain %28 %12 %23
OpStore %29 %27
%30 = OpAccessChain %28 %12 %31
OpStore %30 %27
%32 = OpAccessChain %28 %12 %33
OpStore %32 %27
%34 = OpAccessChain %28 %12 %35
OpStore %34 %27
%37 = OpAccessChain %21 %20 %23
%38 = OpLoad %5 %37
%39 = OpBitFieldUExtract %5 %38 %26 %26
%36 = OpIAdd %5 %17 %39
OpStore %14 %36
OpStore %15 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;
layout(location = 1) in uint VP;

void main()
{
    float _27 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _27;
    gl_Position.y = _27;
    gl_Position.z = _27;
    gl_Position.w = _27;
    gl_Layer = int(LAYER);
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %8 %12 %14 %15
OpName %3 "main"
OpName %7 "LAYER"
OpName %8 "VP"
OpName %12 "SV_Position"
OpName %14 "SV_RenderTargetArrayIndex"
OpName %15 "SV_ViewportArrayIndex"
OpName %18 "ViewInstancingOffsetsUBO"
OpMemberName %18 0 "ViewID_Layer"
OpName %20 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %8 Location 1
OpDecorate %12 BuiltIn Position
OpDecorate %14 BuiltIn Layer
OpDecorate %15 BuiltIn ViewportIndex
OpDecorate %18 Block
OpMemberDecorate %18 0 Offset 0
OpDecorate %20 DescriptorSet 10
OpDecorate %20 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Output %10
%12 = OpVariable %11 Output
%13 = OpTypePointer Output %5
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%18 = OpTypeStruct %5
%19 = OpTypePointer Uniform %18
%20 = OpVariable %19 Uniform
%21 = OpTypePointer Uniform %5
%23 = OpConstant %5 0
%26 = OpConstant %5 16
%28 = OpTypePointer Output %9
%31 = OpConstant %5 1
%33 = OpConstant %5 2
%35 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %36
%36 = OpLabel
%16 = OpLoad %5 %8
%17 = OpLoad %5 %7
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpBitFieldUExtract %5 %24 %23 %26
%27 = OpConvertUToF %9 %25
%29 = OpAccessChain %28 %12 %23
OpStore %29 %27
%30 = OpAccessChain %28 %12 %31
OpStore %30 %27
%32 = OpAccessChain %28 %12 %33
OpStore %32 %27
%34 = OpAccessChain %28 %12 %35
OpStore %34 %27
OpStore %14 %17
OpStore %15 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.view-instancing-multiview.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;
layout(location = 1) in uint VP;

void main()
{
    float _27 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _27;
    gl_Position.y = _27;
    gl_Position.z = _27;
    gl_Position.w = _27;
    gl_Layer = int(LAYER);
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %8 %12 %14 %15
OpName %3 "main"
OpName %7 "LAYER"
OpName %8 "VP"
OpName %12 "SV_Position"
OpName %14 "SV_RenderTargetArrayIndex"
OpName %15 "SV_ViewportArrayIndex"
OpName %18 "ViewInstancingOffsetsUBO"
OpMemberName %18 0 "ViewID_Layer"
OpName %20 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %8 Location 1
OpDecorate %12 BuiltIn Position
OpDecorate %14 BuiltIn Layer
OpDecorate %15 BuiltIn ViewportIndex
OpDecorate %18 Block
OpMemberDecorate %18 0 Offset 0
OpDecorate %20 DescriptorSet 10
OpDecorate %20 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Output %10
%12 = OpVariable %11 Output
%13 = OpTypePointer Output %5
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%18 = OpTypeStruct %5
%19 = OpTypePointer Uniform %18
%20 = OpVariable %19 Uniform
%21 = OpTypePointer Uniform %5
%23 = OpConstant %5 0
%26 = OpConstant %5 16
%28 = OpTypePointer Output %9
%31 = OpConstant %5 1
%33 = OpConstant %5 2
%35 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %36
%36 = OpLabel
%16 = OpLoad %5 %8
%17 = OpLoad %5 %7
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpBitFieldUExtract %5 %24 %23 %26
%27 = OpConvertUToF %9 %25
%29 = OpAccessChain %28 %12 %23
OpStore %29 %27
%30 = OpAccessChain %28 %12 %31
OpStore %30 %27
%32 = OpAccessChain %28 %12 %33
OpStore %32 %27
%34 = OpAccessChain %28 %12 %35
OpStore %34 %27
OpStore %14 %17
OpStore %15 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.view-instancing-viewport-offset.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;
layout(location = 1) in uint VP;

void main()
{
    float _27 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _27;
    gl_Position.y = _27;
    gl_Position.z = _27;
    gl_Position.w = _27;
    gl_Layer = int(LAYER);
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 38
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %8 %12 %14 %15
OpName %3 "main"
OpName %7 "LAYER"
OpName %8 "VP"
OpName %12 "SV_Position"
OpName %14 "SV_RenderTargetArrayIndex"
OpName %15 "SV_ViewportArrayIndex"
OpName %18 "ViewInstancingOffsetsUBO"
OpMemberName %18 0 "ViewID_Layer"
OpName %20 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %8 Location 1
OpDecorate %12 BuiltIn Position
OpDecorate %14 BuiltIn Layer
OpDecorate %15 BuiltIn ViewportIndex
OpDecorate %18 Block
OpMemberDecorate %18 0 Offset 0
OpDecorate %20 DescriptorSet 10
OpDecorate %20 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpVariable %6 Input
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Output %10
%12 = OpVariable %11 Output
%13 = OpTypePointer Output %5
%14 = OpVariable %13 Output
%15 = OpVariable %13 Output
%18 = OpTypeStruct %5
%19 = OpTypePointer Uniform %18
%20 = OpVariable %19 Uniform
%21 = OpTypePointer Uniform %5
%23 = OpConstant %5 0
%26 = OpConstant %5 16
%28 = OpTypePointer Output %9
%31 = OpConstant %5 1
%33 = OpConstant %5 2
%35 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %36
%36 = OpLabel
%16 = OpLoad %5 %8
%17 = OpLoad %5 %7
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %5 %22
%25 = OpBitFieldUExtract %5 %24 %23 %26
%27 = OpConvertUToF %9 %25
%29 = OpAccessChain %28 %12 %23
OpStore %29 %27
%30 = OpAccessChain %28 %12 %31
OpStore %30 %27
%32 = OpAccessChain %28 %12 %33
OpStore %32 %27
%34 = OpAccessChain %28 %12 %35
OpStore %34 %27
OpStore %14 %17
OpStore %15 %16
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer.last-pre-raster.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;

void main()
{
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
    gl_Layer = int(LAYER + bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "LAYER"
OpName %11 "SV_Position"
OpName %13 "SV_RenderTargetArrayIndex"
OpName %15 "ViewInstancingOffsetsUBO"
OpMemberName %15 0 "ViewID_Layer"
OpName %17 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn Layer
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpDecorate %17 DescriptorSet 10
OpDecorate %17 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypeStruct %5
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Uniform %5
%20 = OpConstant %5 0
%23 = OpConstant %5 16
%25 = OpTypePointer Output %8
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%32 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%14 = OpLoad %5 %7
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitFieldUExtract %5 %21 %20 %23
%24 = OpConvertUToF %8 %22
%26 = OpAccessChain %25 %11 %20
OpStore %26 %24
%27 = OpAccessChain %25 %11 %28
OpStore %27 %24
%29 = OpAccessChain %25 %11 %30
OpStore %29 %24
%31 = OpAccessChain %25 %11 %32
OpStore %31 %24
%34 = OpAccessChain %18 %17 %20
%35 = OpLoad %5 %34
%36 = OpBitFieldUExtract %5 %35 %23 %23
%33 = OpIAdd %5 %14 %36
OpStore %13 %33
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;

void main()
{
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
    gl_Layer = int(LAYER);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "LAYER"
OpName %11 "SV_Position"
OpName %13 "SV_RenderTargetArrayIndex"
OpName %15 "ViewInstancingOffsetsUBO"
OpMemberName %15 0 "ViewID_Layer"
OpName %17 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn Layer
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpDecorate %17 DescriptorSet 10
OpDecorate %17 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypeStruct %5
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Uniform %5
%20 = OpConstant %5 0
%23 = OpConstant %5 16
%25 = OpTypePointer Output %8
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%32 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpLoad %5 %7
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitFieldUExtract %5 %21 %20 %23
%24 = OpConvertUToF %8 %22
%26 = OpAccessChain %25 %11 %20
OpStore %26 %24
%27 = OpAccessChain %25 %11 %28
OpStore %27 %24
%29 = OpAccessChain %25 %11 %30
OpStore %29 %24
%31 = OpAccessChain %25 %11 %32
OpStore %31 %24
OpStore %13 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer.view-instancing-multiview.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;

void main()
{
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
    gl_Layer = int(LAYER);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "LAYER"
OpName %11 "SV_Position"
OpName %13 "SV_RenderTargetArrayIndex"
OpName %15 "ViewInstancingOffsetsUBO"
OpMemberName %15 0 "ViewID_Layer"
OpName %17 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn Layer
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpDecorate %17 DescriptorSet 10
OpDecorate %17 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypeStruct %5
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Uniform %5
%20 = OpConstant %5 0
%23 = OpConstant %5 16
%25 = OpTypePointer Output %8
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%32 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpLoad %5 %7
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitFieldUExtract %5 %21 %20 %23
%24 = OpConvertUToF %8 %22
%26 = OpAccessChain %25 %11 %20
OpStore %26 %24
%27 = OpAccessChain %25 %11 %28
OpStore %27 %24
%29 = OpAccessChain %25 %11 %30
OpStore %29 %24
%31 = OpAccessChain %25 %11 %32
OpStore %31 %24
OpStore %13 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-layer.view-instancing-viewport-offset.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint LAYER;

void main()
{
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
    gl_Layer = int(LAYER);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "LAYER"
OpName %11 "SV_Position"
OpName %13 "SV_RenderTargetArrayIndex"
OpName %15 "ViewInstancingOffsetsUBO"
OpMemberName %15 0 "ViewID_Layer"
OpName %17 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn Layer
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpDecorate %17 DescriptorSet 10
OpDecorate %17 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypeStruct %5
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Uniform %5
%20 = OpConstant %5 0
%23 = OpConstant %5 16
%25 = OpTypePointer Output %8
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%32 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpLoad %5 %7
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitFieldUExtract %5 %21 %20 %23
%24 = OpConvertUToF %8 %22
%26 = OpAccessChain %25 %11 %20
OpStore %26 %24
%27 = OpAccessChain %25 %11 %28
OpStore %27 %24
%29 = OpAccessChain %25 %11 %30
OpStore %29 %24
%31 = OpAccessChain %25 %11 %32
OpStore %31 %24
OpStore %13 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-viewport.last-pre-raster.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint VP;

void main()
{
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    float _28 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _28;
    gl_Position.y = _28;
    gl_Position.z = _28;
    gl_Position.w = _28;
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13 %23
OpName %3 "main"
OpName %7 "VP"
OpName %11 "SV_Position"
OpName %13 "SV_ViewportArrayIndex"
OpName %14 "ViewInstancingOffsetsUBO"
OpMemberName %14 0 "ViewID_Layer"
OpName %16 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn ViewportIndex
OpDecorate %14 Block
OpMemberDecorate %14 0 Offset 0
OpDecorate %16 DescriptorSet 10
OpDecorate %16 Binding 22
OpDecorate %23 BuiltIn Layer
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%14 = OpTypeStruct %5
%15 = OpTypePointer Uniform %14
%16 = OpVariable %15 Uniform
%17 = OpTypePointer Uniform %5
%19 = OpConstant %5 0
%22 = OpConstant %5 16
%23 = OpVariable %12 Output
%29 = OpTypePointer Output %8
%32 = OpConstant %5 1
%34 = OpConstant %5 2
%36 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %37
%37 = OpLabel
%18 = OpAccessChain %17 %16 %19
%20 = OpLoad %5 %18
%21 = OpBitFieldUExtract %5 %20 %22 %22
OpStore %23 %21
%24 = OpLoad %5 %7
%25 = OpAccessChain %17 %16 %19
%26 = OpLoad %5 %25
%27 = OpBitFieldUExtract %5 %26 %19 %22
%28 = OpConvertUToF %8 %27
%30 = OpAccessChain %29 %11 %19
OpStore %30 %28
%31 = OpAccessChain %29 %11 %32
OpStore %31 %28
%33 = OpAccessChain %29 %11 %34
OpStore %33 %28
%35 = OpAccessChain %29 %11 %36
OpStore %35 %28
OpStore %13 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-viewport.vert
================================================
#version 460

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint VP;

void main()
{
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "VP"
OpName %11 "SV_Position"
OpName %13 "SV_ViewportArrayIndex"
OpName %15 "ViewInstancingOffsetsUBO"
OpMemberName %15 0 "ViewID_Layer"
OpName %17 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn ViewportIndex
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpDecorate %17 DescriptorSet 10
OpDecorate %17 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypeStruct %5
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Uniform %5
%20 = OpConstant %5 0
%23 = OpConstant %5 16
%25 = OpTypePointer Output %8
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%32 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpLoad %5 %7
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitFieldUExtract %5 %21 %20 %23
%24 = OpConvertUToF %8 %22
%26 = OpAccessChain %25 %11 %20
OpStore %26 %24
%27 = OpAccessChain %25 %11 %28
OpStore %27 %24
%29 = OpAccessChain %25 %11 %30
OpStore %29 %24
%31 = OpAccessChain %25 %11 %32
OpStore %31 %24
OpStore %13 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-viewport.view-instancing-multiview.vert
================================================
#version 460
#extension GL_EXT_multiview : require

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;

layout(location = 0) in uint VP;

void main()
{
    float _21 = float(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)));
    gl_Position.x = _21;
    gl_Position.y = _21;
    gl_Position.z = _21;
    gl_Position.w = _21;
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
OpCapability Shader
OpCapability MultiViewport
OpCapability MultiView
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13 %16
OpName %3 "main"
OpName %7 "VP"
OpName %11 "SV_Position"
OpName %13 "SV_ViewportArrayIndex"
OpName %15 "ViewIndexToViewInstanceMap"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn ViewportIndex
OpDecorate %15 SpecId 1000
OpDecorate %16 BuiltIn ViewIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpSpecConstant %5 0
%16 = OpVariable %6 Input
%19 = OpConstant %5 2
%22 = OpTypePointer Output %8
%24 = OpConstant %5 0
%26 = OpConstant %5 1
%29 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %30
%30 = OpLabel
%14 = OpLoad %5 %7
%17 = OpLoad %5 %16
%18 = OpIMul %5 %17 %19
%20 = OpBitFieldUExtract %5 %15 %18 %19
%21 = OpConvertUToF %8 %20
%23 = OpAccessChain %22 %11 %24
OpStore %23 %21
%25 = OpAccessChain %22 %11 %26
OpStore %25 %21
%27 = OpAccessChain %22 %11 %19
OpStore %27 %21
%28 = OpAccessChain %22 %11 %29
OpStore %28 %21
OpStore %13 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.export-viewport.view-instancing-viewport-offset.vert
================================================
#version 460

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

layout(location = 0) in uint VP;

void main()
{
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
    gl_ViewportIndex = int(VP);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability MultiViewport
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %7 %11 %13
OpName %3 "main"
OpName %7 "VP"
OpName %11 "SV_Position"
OpName %13 "SV_ViewportArrayIndex"
OpName %15 "ViewInstancingOffsetsUBO"
OpMemberName %15 0 "ViewID_Layer"
OpName %17 "ViewInstancingOffsets"
OpDecorate %7 Location 0
OpDecorate %11 BuiltIn Position
OpDecorate %13 BuiltIn ViewportIndex
OpDecorate %15 Block
OpMemberDecorate %15 0 Offset 0
OpDecorate %17 DescriptorSet 10
OpDecorate %17 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypePointer Input %5
%7 = OpVariable %6 Input
%8 = OpTypeFloat 32
%9 = OpTypeVector %8 4
%10 = OpTypePointer Output %9
%11 = OpVariable %10 Output
%12 = OpTypePointer Output %5
%13 = OpVariable %12 Output
%15 = OpTypeStruct %5
%16 = OpTypePointer Uniform %15
%17 = OpVariable %16 Uniform
%18 = OpTypePointer Uniform %5
%20 = OpConstant %5 0
%23 = OpConstant %5 16
%25 = OpTypePointer Output %8
%28 = OpConstant %5 1
%30 = OpConstant %5 2
%32 = OpConstant %5 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpLoad %5 %7
%19 = OpAccessChain %18 %17 %20
%21 = OpLoad %5 %19
%22 = OpBitFieldUExtract %5 %21 %20 %23
%24 = OpConvertUToF %8 %22
%26 = OpAccessChain %25 %11 %20
OpStore %26 %24
%27 = OpAccessChain %25 %11 %28
OpStore %27 %24
%29 = OpAccessChain %25 %11 %30
OpStore %29 %24
%31 = OpAccessChain %25 %11 %32
OpStore %31 %24
OpStore %13 %14
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.last-pre-raster.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

void main()
{
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    float _24 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _24;
    gl_Position.y = _24;
    gl_Position.z = _24;
    gl_Position.w = _24;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %20
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "ViewInstancingOffsetsUBO"
OpMemberName %10 0 "ViewID_Layer"
OpName %12 "ViewInstancingOffsets"
OpDecorate %8 BuiltIn Position
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 10
OpDecorate %12 Binding 22
OpDecorate %20 BuiltIn Layer
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Uniform %9
%15 = OpConstant %9 0
%18 = OpConstant %9 16
%19 = OpTypePointer Output %9
%20 = OpVariable %19 Output
%25 = OpTypePointer Output %5
%28 = OpConstant %9 1
%30 = OpConstant %9 2
%32 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %33
%33 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %9 %14
%17 = OpBitFieldUExtract %9 %16 %18 %18
OpStore %20 %17
%21 = OpAccessChain %13 %12 %15
%22 = OpLoad %9 %21
%23 = OpBitFieldUExtract %9 %22 %15 %18
%24 = OpConvertUToF %5 %23
%26 = OpAccessChain %25 %8 %15
OpStore %26 %24
%27 = OpAccessChain %25 %8 %28
OpStore %27 %24
%29 = OpAccessChain %25 %8 %30
OpStore %29 %24
%31 = OpAccessChain %25 %8 %32
OpStore %31 %24
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.vert
================================================
#version 460

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

void main()
{
    float _19 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _19;
    gl_Position.y = _19;
    gl_Position.z = _19;
    gl_Position.w = _19;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "ViewInstancingOffsetsUBO"
OpMemberName %10 0 "ViewID_Layer"
OpName %12 "ViewInstancingOffsets"
OpDecorate %8 BuiltIn Position
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 10
OpDecorate %12 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Uniform %9
%15 = OpConstant %9 0
%18 = OpConstant %9 16
%20 = OpTypePointer Output %5
%23 = OpConstant %9 1
%25 = OpConstant %9 2
%27 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %9 %14
%17 = OpBitFieldUExtract %9 %16 %15 %18
%19 = OpConvertUToF %5 %17
%21 = OpAccessChain %20 %8 %15
OpStore %21 %19
%22 = OpAccessChain %20 %8 %23
OpStore %22 %19
%24 = OpAccessChain %20 %8 %25
OpStore %24 %19
%26 = OpAccessChain %20 %8 %27
OpStore %26 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.view-instance-mask.last-pre-raster.vert
================================================
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

layout(set = 10, binding = 23, std140) uniform ViewInstanceMaskUBO
{
    uint Mask;
} ViewInstanceMask;

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

void main()
{
    gl_Position = vec4(-1.0);
    if (bitfieldExtract(ViewInstanceMask.Mask, int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u))), int(1u)) == 0u)
    {
        return;
    }
    gl_Layer = int(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(16u), int(16u)));
    float _38 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _38;
    gl_Position.y = _38;
    gl_Position.z = _38;
    gl_Position.w = _38;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 51
; Schema: 0
OpCapability Shader
OpCapability Geometry
OpCapability ShaderViewportIndexLayerEXT
OpExtension "SPV_EXT_shader_viewport_index_layer"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %34
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "ViewInstanceMaskUBO"
OpMemberName %10 0 "Mask"
OpName %12 "ViewInstanceMask"
OpName %13 "ViewInstancingOffsetsUBO"
OpMemberName %13 0 "ViewID_Layer"
OpName %15 "ViewInstancingOffsets"
OpDecorate %8 BuiltIn Position
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 10
OpDecorate %12 Binding 23
OpDecorate %13 Block
OpMemberDecorate %13 0 Offset 0
OpDecorate %15 DescriptorSet 10
OpDecorate %15 Binding 22
OpDecorate %34 BuiltIn Layer
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeStruct %9
%14 = OpTypePointer Uniform %13
%15 = OpVariable %14 Uniform
%16 = OpTypePointer Uniform %9
%18 = OpConstant %9 0
%21 = OpConstant %9 16
%25 = OpConstant %9 1
%26 = OpTypeBool
%28 = OpConstant %5 -1
%29 = OpConstantComposite %6 %28 %28 %28 %28
%33 = OpTypePointer Output %9
%34 = OpVariable %33 Output
%39 = OpTypePointer Output %5
%43 = OpConstant %9 2
%45 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%17 = OpAccessChain %16 %15 %18
%19 = OpLoad %9 %17
%20 = OpBitFieldUExtract %9 %19 %18 %21
%22 = OpAccessChain %16 %12 %18
%23 = OpLoad %9 %22
%24 = OpBitFieldUExtract %9 %23 %20 %25
%27 = OpIEqual %26 %24 %18
OpStore %8 %29
OpSelectionMerge %48 None
OpBranchConditional %27 %47 %48
%47 = OpLabel
OpReturn
%48 = OpLabel
%30 = OpAccessChain %16 %15 %18
%31 = OpLoad %9 %30
%32 = OpBitFieldUExtract %9 %31 %21 %21
OpStore %34 %32
%35 = OpAccessChain %16 %15 %18
%36 = OpLoad %9 %35
%37 = OpBitFieldUExtract %9 %36 %18 %21
%38 = OpConvertUToF %5 %37
%40 = OpAccessChain %39 %8 %18
OpStore %40 %38
%41 = OpAccessChain %39 %8 %25
OpStore %41 %38
%42 = OpAccessChain %39 %8 %43
OpStore %42 %38
%44 = OpAccessChain %39 %8 %45
OpStore %44 %38
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.view-instance-mask.vert
================================================
#version 460

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

void main()
{
    float _19 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _19;
    gl_Position.y = _19;
    gl_Position.z = _19;
    gl_Position.w = _19;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "ViewInstancingOffsetsUBO"
OpMemberName %10 0 "ViewID_Layer"
OpName %12 "ViewInstancingOffsets"
OpDecorate %8 BuiltIn Position
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 10
OpDecorate %12 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Uniform %9
%15 = OpConstant %9 0
%18 = OpConstant %9 16
%20 = OpTypePointer Output %5
%23 = OpConstant %9 1
%25 = OpConstant %9 2
%27 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %9 %14
%17 = OpBitFieldUExtract %9 %16 %15 %18
%19 = OpConvertUToF %5 %17
%21 = OpAccessChain %20 %8 %15
OpStore %21 %19
%22 = OpAccessChain %20 %8 %23
OpStore %22 %19
%24 = OpAccessChain %20 %8 %25
OpStore %24 %19
%26 = OpAccessChain %20 %8 %27
OpStore %26 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.view-instancing-multiview.vert
================================================
#version 460
#extension GL_EXT_multiview : require

layout(constant_id = 1000) const uint ViewIndexToViewInstanceMap = 0u;

void main()
{
    float _17 = float(bitfieldExtract(ViewIndexToViewInstanceMap, int(gl_ViewIndex * 2u), int(2u)));
    gl_Position.x = _17;
    gl_Position.y = _17;
    gl_Position.z = _17;
    gl_Position.w = _17;
}


#if 0
// SPIR-V disassembly
// MultiviewCompatible
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
OpCapability Shader
OpCapability MultiView
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8 %12
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "ViewIndexToViewInstanceMap"
OpDecorate %8 BuiltIn Position
OpDecorate %10 SpecId 1000
OpDecorate %12 BuiltIn ViewIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpSpecConstant %9 0
%11 = OpTypePointer Input %9
%12 = OpVariable %11 Input
%15 = OpConstant %9 2
%18 = OpTypePointer Output %5
%20 = OpConstant %9 0
%22 = OpConstant %9 1
%25 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %26
%26 = OpLabel
%13 = OpLoad %9 %12
%14 = OpIMul %9 %13 %15
%16 = OpBitFieldUExtract %9 %10 %14 %15
%17 = OpConvertUToF %5 %16
%19 = OpAccessChain %18 %8 %20
OpStore %19 %17
%21 = OpAccessChain %18 %8 %22
OpStore %21 %17
%23 = OpAccessChain %18 %8 %15
OpStore %23 %17
%24 = OpAccessChain %18 %8 %25
OpStore %24 %17
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/view-instancing/vert/basic.view-instancing.view-instancing-viewport-offset.vert
================================================
#version 460

layout(set = 10, binding = 22, std140) uniform ViewInstancingOffsetsUBO
{
    uint ViewID_Layer;
} ViewInstancingOffsets;

void main()
{
    float _19 = float(bitfieldExtract(ViewInstancingOffsets.ViewID_Layer, int(0u), int(16u)));
    gl_Position.x = _19;
    gl_Position.y = _19;
    gl_Position.z = _19;
    gl_Position.w = _19;
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %3 "main" %8
OpName %3 "main"
OpName %8 "SV_Position"
OpName %10 "ViewInstancingOffsetsUBO"
OpMemberName %10 0 "ViewID_Layer"
OpName %12 "ViewInstancingOffsets"
OpDecorate %8 BuiltIn Position
OpDecorate %10 Block
OpMemberDecorate %10 0 Offset 0
OpDecorate %12 DescriptorSet 10
OpDecorate %12 Binding 22
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 4
%7 = OpTypePointer Output %6
%8 = OpVariable %7 Output
%9 = OpTypeInt 32 0
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypePointer Uniform %9
%15 = OpConstant %9 0
%18 = OpConstant %9 16
%20 = OpTypePointer Output %5
%23 = OpConstant %9 1
%25 = OpConstant %9 2
%27 = OpConstant %9 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %28
%28 = OpLabel
%14 = OpAccessChain %13 %12 %15
%16 = OpLoad %9 %14
%17 = OpBitFieldUExtract %9 %16 %15 %18
%19 = OpConvertUToF %5 %17
%21 = OpAccessChain %20 %8 %15
OpStore %21 %19
%22 = OpAccessChain %20 %8 %23
OpStore %22 %19
%24 = OpAccessChain %20 %8 %25
OpStore %24 %19
%26 = OpAccessChain %20 %8 %27
OpStore %26 %19
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/coopmat.sm66.ssbo.vkmm.comp
================================================
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_KHR_cooperative_matrix : require
#extension GL_KHR_memory_scope_semantics : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint8_t _m0[];
} _20;

layout(set = 0, binding = 0, std430) writeonly buffer _22_24
{
    uint8_t _m0[];
} _24;

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> CoopMatFP8toFP16(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _33)
{
    uint _41;
    _41 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> coop_output;
    for (;;)
    {
        uint _42 = _41 + 1u;
        coop_output[_41] = int16BitsToFloat16((int16_t(int8_t(_33[_41])) << 7s) & (-16385s));
        if (_42 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA>(0).length()))
        {
            _41 = _42;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> CoopMatFP8toFP16_1(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _68)
{
    uint _76;
    _76 = 0u;
    coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> coop_output;
    for (;;)
    {
        uint _77 = _76 + 1u;
        coop_output[_76] = int16BitsToFloat16((int16_t(int8_t(_68[_76])) << 7s) & (-16385s));
        if (_77 < uint(coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB>(0).length()))
        {
            _76 = _77;
        }
        else
        {
            break;
        }
    }
    return coop_output * float16_t(256.0);
}

void main()
{
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _29;
    coopMatLoad(_29, _20._m0, 0u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _60 = _29;
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _65;
    coopMatLoad(_65, _20._m0, 256u, 16u, gl_CooperativeMatrixLayoutRowMajor);
    coopmat<uint8_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _89 = _65;
    coopmat<float, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _94;
    coopMatLoad(_94, _20._m0, 512u, 64u, gl_CooperativeMatrixLayoutRowMajor);
    coopMatStore(coopMatMulAdd(CoopMatFP8toFP16(_60), CoopMatFP8toFP16_1(_89), _94, 0), _24._m0, 0u, 64u, gl_CooperativeMatrixLayoutRowMajor);
}


#if 0
// SPIR-V disassembly
// WaveSize(32)
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 99
; Schema: 0
OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability Int8
OpCapability StorageBuffer8BitAccess
OpCapability DenormPreserve
OpCapability VulkanMemoryModel
OpCapability CooperativeMatrixKHR
OpExtension "SPV_KHR_8bit_storage"
OpExtension "SPV_KHR_cooperative_matrix"
OpExtension "SPV_KHR_float_controls"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %14 "main" %20 %24
OpExecutionMode %14 LocalSize 32 1 1
OpExecutionMode %14 DenormPreserve 16
OpName %14 "main"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %34 "CoopMatFP8toFP16"
OpName %37 "coop_output"
OpName %69 "CoopMatFP8toFP16"
OpName %72 "coop_output"
OpDecorate %17 ArrayStride 1
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 1
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 0
OpDecorate %24 NonReadable
%1 = OpTypeFloat 16
%2 = OpTypeInt 32 0
%3 = OpConstant %2 0
%4 = OpConstant %2 16
%6 = OpConstant %2 3
%5 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %3
%7 = OpConstant %2 1
%8 = OpTypeCooperativeMatrixKHR %1 %6 %4 %4 %7
%9 = OpTypeFloat 32
%10 = OpConstant %2 2
%11 = OpTypeCooperativeMatrixKHR %9 %6 %4 %4 %10
%12 = OpTypeVoid
%13 = OpTypeFunction %12
%16 = OpTypeInt 8 0
%17 = OpTypeRuntimeArray %16
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %16
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%26 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %3
%27 = OpTypePointer StorageBuffer %16
%30 = OpTypeBool
%31 = OpTypePointer Function %26
%32 = OpTypeFunction %5 %31
%36 = OpTypePointer Function %5
%43 = OpTypePointer Function %16
%46 = OpTypeInt 16 1
%49 = OpConstant %46 7
%51 = OpConstant %46 -16385
%53 = OpTypePointer Function %1
%58 = OpConstant %1 0x1p+8
%62 = OpTypeCooperativeMatrixKHR %16 %6 %4 %4 %7
%64 = OpConstant %2 256
%66 = OpTypePointer Function %62
%67 = OpTypeFunction %8 %66
%71 = OpTypePointer Function %8
%92 = OpConstant %2 512
%93 = OpConstant %2 64
%14 = OpFunction %12 None %13
%15 = OpLabel
%60 = OpVariable %31 Function
%89 = OpVariable %66 Function
OpBranch %97
%97 = OpLabel
%28 = OpAccessChain %27 %20 %3 %3
%29 = OpCooperativeMatrixLoadKHR %26 %28 %3 %4 NonPrivatePointer
OpStore %60 %29
%61 = OpFunctionCall %5 %34 %60
%63 = OpAccessChain %27 %20 %3 %64
%65 = OpCooperativeMatrixLoadKHR %62 %63 %3 %4 NonPrivatePointer
OpStore %89 %65
%90 = OpFunctionCall %8 %69 %89
%91 = OpAccessChain %27 %20 %3 %92
%94 = OpCooperativeMatrixLoadKHR %11 %91 %3 %93 NonPrivatePointer
%95 = OpCooperativeMatrixMulAddKHR %11 %61 %90 %94
%96 = OpAccessChain %27 %24 %3 %3
OpCooperativeMatrixStoreKHR %96 %95 %3 %93 NonPrivatePointer
OpReturn
OpFunctionEnd
%34 = OpFunction %5 None %32
%33 = OpFunctionParameter %31
%35 = OpLabel
%37 = OpVariable %36 Function
%38 = OpCooperativeMatrixLengthKHR %2 %26
OpBranch %39
%39 = OpLabel
%41 = OpPhi %2 %3 %35 %42 %39
%42 = OpIAdd %2 %41 %7
%44 = OpInBoundsAccessChain %43 %33 %41
%45 = OpLoad %16 %44
%47 = OpSConvert %46 %45
%48 = OpShiftLeftLogical %46 %47 %49
%50 = OpBitwiseAnd %46 %48 %51
%52 = OpBitcast %1 %50
%54 = OpInBoundsAccessChain %53 %37 %41
OpStore %54 %52
%55 = OpULessThan %30 %42 %38
OpLoopMerge %40 %39 None
OpBranchConditional %55 %39 %40
%40 = OpLabel
%56 = OpLoad %5 %37
%57 = OpMatrixTimesScalar %5 %56 %58
OpReturnValue %57
OpFunctionEnd
%69 = OpFunction %8 None %67
%68 = OpFunctionParameter %66
%70 = OpLabel
%72 = OpVariable %71 Function
%73 = OpCooperativeMatrixLengthKHR %2 %62
OpBranch %74
%74 = OpLabel
%76 = OpPhi %2 %3 %70 %77 %74
%77 = OpIAdd %2 %76 %7
%78 = OpInBoundsAccessChain %43 %68 %76
%79 = OpLoad %16 %78
%80 = OpSConvert %46 %79
%81 = OpShiftLeftLogical %46 %80 %49
%82 = OpBitwiseAnd %46 %81 %51
%83 = OpBitcast %1 %82
%84 = OpInBoundsAccessChain %53 %72 %76
OpStore %84 %83
%85 = OpULessThan %30 %77 %73
OpLoopMerge %75 %74 None
OpBranchConditional %85 %74 %75
%75 = OpLabel
%86 = OpLoad %8 %72
%87 = OpMatrixTimesScalar %8 %86 %58
OpReturnValue %87
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/cross_group_sharing.vkmm.node.inline-ubo.comp
================================================
#version 460
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(buffer_reference) buffer NodeReadonlyU32Ptr;
layout(buffer_reference) buffer NodeReadonlyU32ArrayPtr;
layout(buffer_reference) buffer NodeReadonlyU64Ptr;
layout(buffer_reference) buffer _50;
layout(buffer_reference) buffer uintPointer;

layout(constant_id = 4) const bool NodeEntryIndirectPayloadStride = false;
layout(constant_id = 3) const bool NodeIsProgramEntry = false;
layout(constant_id = 5) const bool DispatchGridIsUpperBound = true;
layout(constant_id = 6) const bool DispatchStaticPayload = false;
layout(constant_id = 7) const uint MaxBroadcastGridX = 1024u;
const uint GridXMinus1 = (MaxBroadcastGridX - 1u);
layout(constant_id = 8) const uint MaxBroadcastGridY = 1u;
const uint GridYMinus1 = (MaxBroadcastGridY - 1u);
layout(constant_id = 9) const uint MaxBroadcastGridZ = 1u;
const uint GridZMinus1 = (MaxBroadcastGridZ - 1u);
const uvec3 ThreadGroupSize = uvec3(gl_WorkGroupSize.x, gl_WorkGroupSize.y, gl_WorkGroupSize.z);

struct anon_data
{
    uint _m0;
    uint _m1;
};

const uint _174 = (MaxBroadcastGridX * MaxBroadcastGridY);
const uint _175 = (_174 * MaxBroadcastGridZ);
const uint _239 = (MaxBroadcastGridX * MaxBroadcastGridY);
const uint _240 = (_239 * MaxBroadcastGridZ);

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer NodeReadonlyU32Ptr
{
    uint value;
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer NodeReadonlyU32ArrayPtr
{
    uint offsets[];
};

layout(buffer_reference, buffer_reference_align = 8, std430) readonly buffer NodeReadonlyU64Ptr
{
    uint64_t value;
};

layout(buffer_reference, buffer_reference_align = 4, std430) buffer _50
{
    anon_data data;
};

layout(push_constant, std430) uniform NodeDispatchRegisters
{
    uint64_t PayloadLinearBDA;
    NodeReadonlyU32Ptr NodeLinearOffsetBDA;
    NodeReadonlyU32Ptr NodeEndNodesBDA;
    uvec2 NodePayloadStrideOrOffsetsBDA;
    uint64_t NodePayloadOutputBDA;
    uint64_t NodePayloadOutputAtomicBDA;
    uvec2 NodeLocalRootSignatureBDA;
    uint NodePayloadOutputOffset;
    uint NodeRemainingRecursionLevels;
} NodeDispatch;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer RWBuf0;

uint64_t NodeInputPayloadBDA;
uint NodeInputStride;
uvec3 WorkgroupID;
uvec3 GlobalInvocationID;
uint NodeCoalesceOffset;
uint NodeCoalesceCount;
shared bool FinishCrossGroupSharingBroadcast;

bool FinishCrossGroupSharing(uint64_t CounterBDA)
{
    if (gl_LocalInvocationIndex == 0u)
    {
        uint _79 = atomicAdd(uintPointer(CounterBDA).value, uint(-1));
        FinishCrossGroupSharingBroadcast = _79 == 1u;
    }
    barrier();
    return FinishCrossGroupSharingBroadcast;
}

void node_main()
{
    uint64_t _53 = NodeInputPayloadBDA;
    _50 _54 = _50(_53);
    uint _59 = atomicAdd(_54.data._m1, 1u);
    _54.data._m1 += 3u;
    groupMemoryBarrier();
    barrier();
    uint64_t _84 = NodeInputPayloadBDA;
    bool _87 = FinishCrossGroupSharing(_84 + 8ul);
    if (_87)
    {
        uint _93 = imageAtomicAdd(RWBuf0, int(gl_LocalInvocationIndex), _54.data._m1);
    }
}

void main()
{
    uint _108 = DispatchStaticPayload ? NodeDispatch.NodeLinearOffsetBDA.value : (((gl_WorkGroupID.y * 32768u) + gl_WorkGroupID.x) + NodeDispatch.NodeLinearOffsetBDA.value);
    NodeInputPayloadBDA = NodeDispatch.PayloadLinearBDA;
    if (NodeIsProgramEntry)
    {
        NodeInputStride = NodeDispatch.NodePayloadStrideOrOffsetsBDA.x;
        if (NodeEntryIndirectPayloadStride)
        {
            NodeInputPayloadBDA = NodeReadonlyU64Ptr(NodeDispatch.PayloadLinearBDA).value;
            NodeInputStride = NodeReadonlyU32Ptr(NodeDispatch.NodePayloadStrideOrOffsetsBDA).value;
        }
    }
    if (DispatchGridIsUpperBound)
    {
        if (NodeIsProgramEntry)
        {
            NodeInputPayloadBDA += uint64_t(_108 * NodeInputStride);
            uintPointer _137 = uintPointer(NodeInputPayloadBDA + 0ul);
            uint _139 = _137.value - 1u;
            uint _141 = uint(findMSB(_139)) + 1u;
            uint _146 = (int(_139) < int(0u)) ? 0u : (_137.value << 0u);
            for (uint amplification_index = gl_WorkGroupID.z; amplification_index < _146; amplification_index += gl_NumWorkGroups.z)
            {
                uvec3 _151 = uvec3(bitfieldExtract(amplification_index, int(0u), int(_141)), 0u, 0u);
                WorkgroupID = _151;
                GlobalInvocationID = (ThreadGroupSize * _151) + gl_LocalInvocationID;
                if (all(lessThanEqual(_151, uvec3(_139, 0u, 0u))))
                {
                    node_main();
                    barrier();
                }
            }
        }
        else
        {
            NodeInputPayloadBDA += uint64_t(NodeReadonlyU32ArrayPtr(NodeDispatch.NodePayloadStrideOrOffsetsBDA).offsets[_108]);
            uintPointer _202 = uintPointer(NodeInputPayloadBDA + 0ul);
            uint _204 = _202.value - 1u;
            uint _206 = uint(findMSB(_204)) + 1u;
            uint _211 = (int(_204) < int(0u)) ? 0u : (_202.value << 0u);
            for (uint amplification_index_1 = gl_WorkGroupID.z; amplification_index_1 < _211; amplification_index_1 += gl_NumWorkGroups.z)
            {
                uvec3 _216 = uvec3(bitfieldExtract(amplification_index_1, int(0u), int(_206)), 0u, 0u);
                WorkgroupID = _216;
                GlobalInvocationID = (ThreadGroupSize * _216) + gl_LocalInvocationID;
                if (all(lessThanEqual(_216, uvec3(_204, 0u, 0u))))
                {
                    node_main();
                    barrier();
                }
            }
        }
    }
    else
    {
        if (NodeIsProgramEntry)
        {
            NodeInputPayloadBDA += uint64_t(_108 * NodeInputStride);
            if (DispatchStaticPayload)
            {
                WorkgroupID = gl_WorkGroupID;
                GlobalInvocationID = gl_GlobalInvocationID;
                node_main();
            }
            else
            {
                for (uint amplification_index_2 = gl_WorkGroupID.z; amplification_index_2 < _175; amplification_index_2 += gl_NumWorkGroups.z)
                {
                    uint _180 = amplification_index_2 / MaxBroadcastGridX;
                    uvec3 _183 = uvec3(amplification_index_2 % MaxBroadcastGridX, _180 % MaxBroadcastGridY, _180 / MaxBroadcastGridY);
                    WorkgroupID = _183;
                    GlobalInvocationID = (ThreadGroupSize * _183) + gl_LocalInvocationID;
                    node_main();
                    barrier();
                }
            }
        }
        else
        {
            NodeInputPayloadBDA += uint64_t(NodeReadonlyU32ArrayPtr(NodeDispatch.NodePayloadStrideOrOffsetsBDA).offsets[_108]);
            if (DispatchStaticPayload)
            {
                WorkgroupID = gl_WorkGroupID;
                GlobalInvocationID = gl_GlobalInvocationID;
                node_main();
            }
            else
            {
                for (uint amplification_index_3 = gl_WorkGroupID.z; amplification_index_3 < _240; amplification_index_3 += gl_NumWorkGroups.z)
                {
                    uint _245 = amplification_index_3 / MaxBroadcastGridX;
                    uvec3 _248 = uvec3(amplification_index_3 % MaxBroadcastGridX, _245 % MaxBroadcastGridY, _245 / MaxBroadcastGridY);
                    WorkgroupID = _248;
                    GlobalInvocationID = (ThreadGroupSize * _248) + gl_LocalInvocationID;
                    node_main();
                    barrier();
                }
            }
        }
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 300
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability ImageBuffer
OpCapability VulkanMemoryModel
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
%133 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 Vulkan
OpEntryPoint GLCompute %3 "main" %9 %11 %14 %15 %23 %24 %25 %46 %73 %75 %95 %130 %153 %166
OpExecutionModeId %3 LocalSizeId %40 %41 %42
OpName %3 "main"
OpName %9 "NodeInputPayloadBDA"
OpName %11 "NodeInputStride"
OpName %14 "WorkgroupID"
OpName %15 "GlobalInvocationID"
OpName %17 "NodeReadonlyU32Ptr"
OpMemberName %17 0 "value"
OpName %19 "NodeReadonlyU32ArrayPtr"
OpMemberName %19 0 "offsets"
OpName %21 "NodeDispatchRegisters"
OpMemberName %21 0 "PayloadLinearBDA"
OpMemberName %21 1 "NodeLinearOffsetBDA"
OpMemberName %21 2 "NodeEndNodesBDA"
OpMemberName %21 3 "NodePayloadStrideOrOffsetsBDA"
OpMemberName %21 4 "NodePayloadOutputBDA"
OpMemberName %21 5 "NodePayloadOutputAtomicBDA"
OpMemberName %21 6 "NodeLocalRootSignatureBDA"
OpMemberName %21 7 "NodePayloadOutputOffset"
OpMemberName %21 8 "NodeRemainingRecursionLevels"
OpName %23 "NodeDispatch"
OpName %24 "NodeCoalesceOffset"
OpName %25 "NodeCoalesceCount"
OpName %26 "NodeReadonlyU64Ptr"
OpMemberName %26 0 "value"
OpName %29 "NodeEntryIndirectPayloadStride"
OpName %30 "NodeIsProgramEntry"
OpName %31 "DispatchGridIsUpperBound"
OpName %32 "DispatchStaticPayload"
OpName %33 "MaxBroadcastGridX"
OpName %35 "GridXMinus1"
OpName %36 "MaxBroadcastGridY"
OpName %37 "GridYMinus1"
OpName %38 "MaxBroadcastGridZ"
OpName %39 "GridZMinus1"
OpName %43 "ThreadGroupSize"
OpName %46 "RWBuf0"
OpName %47 "node_main"
OpName %49 ""
OpName %50 ""
OpMemberName %50 0 "data"
OpName %68 "FinishCrossGroupSharing"
OpName %67 "CounterBDA"
OpName %75 "FinishCrossGroupSharingBroadcast"
OpName %147 "amplification_index"
OpName %176 "amplification_index"
OpName %212 "amplification_index"
OpName %241 "amplification_index"
OpDecorate %16 ArrayStride 4
OpDecorate %17 Block
OpMemberDecorate %17 0 Offset 0
OpMemberDecorate %17 0 NonWritable
OpDecorate %19 Block
OpMemberDecorate %19 0 Offset 0
OpMemberDecorate %19 0 NonWritable
OpMemberDecorate %21 0 Offset 0
OpMemberDecorate %21 1 Offset 8
OpMemberDecorate %21 2 Offset 16
OpMemberDecorate %21 3 Offset 24
OpMemberDecorate %21 4 Offset 32
OpMemberDecorate %21 5 Offset 40
OpMemberDecorate %21 6 Offset 48
OpMemberDecorate %21 7 Offset 56
OpMemberDecorate %21 8 Offset 60
OpDecorate %21 Block
OpDecorate %23 RestrictPointer
OpDecorate %26 Block
OpMemberDecorate %26 0 Offset 0
OpMemberDecorate %26 0 NonWritable
OpDecorate %29 SpecId 4
OpDecorate %30 SpecId 3
OpDecorate %31 SpecId 5
OpDecorate %32 SpecId 6
OpDecorate %33 SpecId 7
OpDecorate %36 SpecId 8
OpDecorate %38 SpecId 9
OpDecorate %40 SpecId 0
OpDecorate %41 SpecId 1
OpDecorate %42 SpecId 2
OpDecorate %46 DescriptorSet 0
OpDecorate %46 Binding 0
OpMemberDecorate %49 0 Offset 0
OpMemberDecorate %49 1 Offset 4
OpDecorate %50 Block
OpMemberDecorate %50 0 Offset 0
OpDecorate %73 BuiltIn LocalInvocationIndex
OpDecorate %95 BuiltIn WorkgroupId
OpDecorate %130 BuiltIn NumWorkgroups
OpDecorate %153 BuiltIn LocalInvocationId
OpDecorate %166 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeInt 64 0
%8 = OpTypePointer Private %7
%9 = OpVariable %8 Private
%10 = OpTypePointer Private %5
%11 = OpVariable %10 Private
%12 = OpTypeVector %5 3
%13 = OpTypePointer Private %12
%14 = OpVariable %13 Private
%15 = OpVariable %13 Private
%16 = OpTypeRuntimeArray %5
%17 = OpTypeStruct %5
%18 = OpTypePointer PhysicalStorageBuffer %17
%19 = OpTypeStruct %16
%20 = OpTypePointer PhysicalStorageBuffer %19
%21 = OpTypeStruct %7 %18 %18 %6 %7 %7 %6 %5 %5
%22 = OpTypePointer PushConstant %21
%23 = OpVariable %22 PushConstant
%24 = OpVariable %10 Private
%25 = OpVariable %10 Private
%26 = OpTypeStruct %7
%27 = OpTypePointer PhysicalStorageBuffer %26
%28 = OpTypeBool
%29 = OpSpecConstantFalse %28
%30 = OpSpecConstantFalse %28
%31 = OpSpecConstantTrue %28
%32 = OpSpecConstantFalse %28
%33 = OpSpecConstant %5 1024
%34 = OpConstant %5 1
%35 = OpSpecConstantOp %5 ISub %33 %34
%36 = OpSpecConstant %5 1
%37 = OpSpecConstantOp %5 ISub %36 %34
%38 = OpSpecConstant %5 1
%39 = OpSpecConstantOp %5 ISub %38 %34
%40 = OpSpecConstant %5 64
%41 = OpSpecConstant %5 1
%42 = OpSpecConstant %5 1
%43 = OpSpecConstantComposite %12 %40 %41 %42
%44 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%45 = OpTypePointer UniformConstant %44
%46 = OpVariable %45 UniformConstant
%49 = OpTypeStruct %5 %5
%50 = OpTypeStruct %49
%51 = OpTypePointer PhysicalStorageBuffer %50
%52 = OpTypePointer PhysicalStorageBuffer %49
%56 = OpConstant %5 0
%57 = OpTypePointer PhysicalStorageBuffer %5
%60 = OpConstant %5 2
%62 = OpConstant %5 5
%64 = OpConstant %5 3
%65 = OpConstant %5 24648
%66 = OpTypeFunction %28 %7
%72 = OpTypePointer Input %5
%73 = OpVariable %72 Input
%74 = OpTypePointer Workgroup %28
%75 = OpVariable %74 Workgroup
%81 = OpConstant %5 24840
%86 = OpConstant %7 8
%91 = OpTypePointer Image %5
%94 = OpTypePointer Input %12
%95 = OpVariable %94 Input
%100 = OpConstant %5 32768
%102 = OpTypePointer PushConstant %18
%109 = OpTypePointer PushConstant %7
%112 = OpTypePointer PushConstant %6
%117 = OpTypePointer PhysicalStorageBuffer %7
%130 = OpVariable %94 Input
%136 = OpConstant %7 0
%153 = OpVariable %94 Input
%156 = OpTypeVector %28 3
%166 = OpVariable %94 Input
%174 = OpSpecConstantOp %5 IMul %33 %36
%175 = OpSpecConstantOp %5 IMul %174 %38
%239 = OpSpecConstantOp %5 IMul %33 %36
%240 = OpSpecConstantOp %5 IMul %239 %38
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %253
%253 = OpLabel
%96 = OpLoad %12 %95
%97 = OpCompositeExtract %5 %96 0
%98 = OpCompositeExtract %5 %96 1
%99 = OpIMul %5 %98 %100
%101 = OpIAdd %5 %99 %97
%103 = OpAccessChain %102 %23 %34
%104 = OpLoad %18 %103
%105 = OpAccessChain %57 %104 %56
%106 = OpLoad %5 %105 Aligned 4
%107 = OpIAdd %5 %101 %106
%108 = OpSelect %5 %32 %106 %107
%110 = OpAccessChain %109 %23 %56
%111 = OpLoad %7 %110
OpStore %9 %111
OpSelectionMerge %257 None
OpBranchConditional %30 %254 %257
%254 = OpLabel
%113 = OpAccessChain %112 %23 %64
%114 = OpLoad %6 %113
%115 = OpCompositeExtract %5 %114 0
OpStore %11 %115
OpSelectionMerge %256 None
OpBranchConditional %29 %255 %256
%255 = OpLabel
%116 = OpConvertUToPtr %27 %111
%118 = OpAccessChain %117 %116 %56
%119 = OpLoad %7 %118 Aligned 8
OpStore %9 %119
%120 = OpBitcast %18 %114
%121 = OpAccessChain %57 %120 %56
%122 = OpLoad %5 %121 Aligned 4
OpStore %11 %122
OpBranch %256
%256 = OpLabel
OpBranch %257
%257 = OpLabel
OpSelectionMerge %290 None
OpBranchConditional %31 %278 %258
%278 = OpLabel
OpSelectionMerge %289 None
OpBranchConditional %30 %286 %279
%286 = OpLabel
%123 = OpLoad %7 %9
%124 = OpLoad %5 %11
%125 = OpIMul %5 %108 %124
%126 = OpUConvert %7 %125
%127 = OpIAdd %7 %123 %126
OpStore %9 %127
%128 = OpLoad %12 %95
%129 = OpCompositeExtract %5 %128 2
%131 = OpLoad %12 %130
%132 = OpCompositeExtract %5 %131 2
%134 = OpLoad %7 %9
%135 = OpIAdd %7 %134 %136
%137 = OpBitcast %57 %135
%138 = OpLoad %5 %137 Aligned 4
%139 = OpISub %5 %138 %34
%140 = OpExtInst %5 %133 FindUMsb %139
%141 = OpIAdd %5 %140 %34
%142 = OpIAdd %5 %141 %56
%143 = OpShiftLeftLogical %5 %138 %56
%144 = OpCompositeConstruct %12 %139 %56 %56
%145 = OpSLessThan %28 %139 %56
%146 = OpSelect %5 %145 %56 %143
OpBranch %287
%287 = OpLabel
%147 = OpPhi %5 %129 %286 %148 %294
%149 = OpULessThan %28 %147 %146
OpLoopMerge %288 %294 None
OpBranchConditional %149 %291 %288
%291 = OpLabel
%150 = OpBitFieldUExtract %5 %147 %56 %141
%151 = OpCompositeConstruct %12 %150 %56 %56
OpStore %14 %151
%152 = OpLoad %12 %153
%154 = OpIMul %12 %43 %151
%155 = OpIAdd %12 %154 %152
OpStore %15 %155
%158 = OpCompositeConstruct %12 %139 %56 %56
%157 = OpULessThanEqual %156 %151 %158
%159 = OpAll %28 %157
OpSelectionMerge %293 None
OpBranchConditional %159 %292 %293
%292 = OpLabel
%160 = OpFunctionCall %1 %47
OpControlBarrier %60 %60 %81
OpBranch %293
%293 = OpLabel
OpBranch %294
%294 = OpLabel
%148 = OpIAdd %5 %147 %132
OpBranch %287
%288 = OpLabel
OpBranch %289
%279 = OpLabel
%188 = OpLoad %7 %9
%189 = OpAccessChain %112 %23 %64
%190 = OpLoad %6 %189
%191 = OpBitcast %20 %190
%192 = OpInBoundsAccessChain %57 %191 %56 %108
%193 = OpLoad %5 %192 Aligned 4
%194 = OpUConvert %7 %193
%195 = OpIAdd %7 %188 %194
OpStore %9 %195
%196 = OpLoad %12 %95
%197 = OpCompositeExtract %5 %196 2
%198 = OpLoad %12 %130
%199 = OpCompositeExtract %5 %198 2
%200 = OpLoad %7 %9
%201 = OpIAdd %7 %200 %136
%202 = OpBitcast %57 %201
%203 = OpLoad %5 %202 Aligned 4
%204 = OpISub %5 %203 %34
%205 = OpExtInst %5 %133 FindUMsb %204
%206 = OpIAdd %5 %205 %34
%207 = OpIAdd %5 %206 %56
%208 = OpShiftLeftLogical %5 %203 %56
%209 = OpCompositeConstruct %12 %204 %56 %56
%210 = OpSLessThan %28 %204 %56
%211 = OpSelect %5 %210 %56 %208
OpBranch %280
%280 = OpLabel
%212 = OpPhi %5 %197 %279 %213 %285
%214 = OpULessThan %28 %212 %211
OpLoopMerge %281 %285 None
OpBranchConditional %214 %282 %281
%282 = OpLabel
%215 = OpBitFieldUExtract %5 %212 %56 %206
%216 = OpCompositeConstruct %12 %215 %56 %56
OpStore %14 %216
%217 = OpLoad %12 %153
%218 = OpIMul %12 %43 %216
%219 = OpIAdd %12 %218 %217
OpStore %15 %219
%221 = OpCompositeConstruct %12 %204 %56 %56
%220 = OpULessThanEqual %156 %216 %221
%222 = OpAll %28 %220
OpSelectionMerge %284 None
OpBranchConditional %222 %283 %284
%283 = OpLabel
%223 = OpFunctionCall %1 %47
OpControlBarrier %60 %60 %81
OpBranch %284
%284 = OpLabel
OpBranch %285
%285 = OpLabel
%213 = OpIAdd %5 %212 %199
OpBranch %280
%281 = OpLabel
OpBranch %289
%289 = OpLabel
OpBranch %290
%258 = OpLabel
OpSelectionMerge %277 None
OpBranchConditional %30 %268 %259
%268 = OpLabel
%161 = OpLoad %7 %9
%162 = OpLoad %5 %11
%163 = OpIMul %5 %108 %162
%164 = OpUConvert %7 %163
%165 = OpIAdd %7 %161 %164
OpStore %9 %165
OpSelectionMerge %276 None
OpBranchConditional %32 %275 %269
%275 = OpLabel
%167 = OpLoad %12 %95
%168 = OpLoad %12 %166
OpStore %14 %167
OpStore %15 %168
%169 = OpFunctionCall %1 %47
OpBranch %276
%269 = OpLabel
%170 = OpLoad %12 %95
%171 = OpCompositeExtract %5 %170 2
%172 = OpLoad %12 %130
%173 = OpCompositeExtract %5 %172 2
OpBranch %270
%270 = OpLabel
%176 = OpPhi %5 %171 %269 %177 %274
%178 = OpULessThan %28 %176 %175
OpLoopMerge %271 %274 None
OpBranchConditional %178 %272 %271
%272 = OpLabel
%179 = OpUMod %5 %176 %33
%180 = OpUDiv %5 %176 %33
%181 = OpUMod %5 %180 %36
%182 = OpUDiv %5 %180 %36
%183 = OpCompositeConstruct %12 %179 %181 %182
OpStore %14 %183
%184 = OpLoad %12 %153
%185 = OpIMul %12 %43 %183
%186 = OpIAdd %12 %185 %184
OpStore %15 %186
%187 = OpFunctionCall %1 %47
OpControlBarrier %60 %60 %81
OpBranch %273
%273 = OpLabel
OpBranch %274
%274 = OpLabel
%177 = OpIAdd %5 %176 %173
OpBranch %270
%271 = OpLabel
OpBranch %276
%276 = OpLabel
OpBranch %277
%259 = OpLabel
%224 = OpLoad %7 %9
%225 = OpAccessChain %112 %23 %64
%226 = OpLoad %6 %225
%227 = OpBitcast %20 %226
%228 = OpInBoundsAccessChain %57 %227 %56 %108
%229 = OpLoad %5 %228 Aligned 4
%230 = OpUConvert %7 %229
%231 = OpIAdd %7 %224 %230
OpStore %9 %231
OpSelectionMerge %267 None
OpBranchConditional %32 %266 %260
%266 = OpLabel
%232 = OpLoad %12 %95
%233 = OpLoad %12 %166
OpStore %14 %232
OpStore %15 %233
%234 = OpFunctionCall %1 %47
OpBranch %267
%260 = OpLabel
%235 = OpLoad %12 %95
%236 = OpCompositeExtract %5 %235 2
%237 = OpLoad %12 %130
%238 = OpCompositeExtract %5 %237 2
OpBranch %261
%261 = OpLabel
%241 = OpPhi %5 %236 %260 %242 %265
%243 = OpULessThan %28 %241 %240
OpLoopMerge %262 %265 None
OpBranchConditional %243 %263 %262
%263 = OpLabel
%244 = OpUMod %5 %241 %33
%245 = OpUDiv %5 %241 %33
%246 = OpUMod %5 %245 %36
%247 = OpUDiv %5 %245 %36
%248 = OpCompositeConstruct %12 %244 %246 %247
OpStore %14 %248
%249 = OpLoad %12 %153
%250 = OpIMul %12 %43 %248
%251 = OpIAdd %12 %250 %249
OpStore %15 %251
%252 = OpFunctionCall %1 %47
OpControlBarrier %60 %60 %81
OpBranch %264
%264 = OpLabel
OpBranch %265
%265 = OpLabel
%242 = OpIAdd %5 %241 %238
OpBranch %261
%262 = OpLabel
OpBranch %267
%267 = OpLabel
OpBranch %277
%277 = OpLabel
OpBranch %290
%290 = OpLabel
OpReturn
OpFunctionEnd
%47 = OpFunction %1 None %2
%48 = OpLabel
OpBranch %296
%296 = OpLabel
%53 = OpLoad %7 %9
%54 = OpConvertUToPtr %51 %53
%55 = OpAccessChain %52 %54 %56
%58 = OpAccessChain %57 %55 %34
%59 = OpAtomicIAdd %5 %58 %60 %56 %34
%61 = OpLoad %5 %58 Aligned|MakePointerVisible|NonPrivatePointer 4 %62
%63 = OpIAdd %5 %61 %64
OpStore %58 %63 Aligned|MakePointerAvailable|NonPrivatePointer 4 %62
OpControlBarrier %60 %60 %65
%84 = OpLoad %7 %9
%85 = OpIAdd %7 %84 %86
%87 = OpFunctionCall %28 %68 %85
OpSelectionMerge %298 None
OpBranchConditional %87 %297 %298
%297 = OpLabel
%88 = OpLoad %5 %73
%89 = OpLoad %5 %58 Aligned|MakePointerVisible|NonPrivatePointer 4 %62
%90 = OpLoad %44 %46
%92 = OpImageTexelPointer %91 %46 %88 %56
%93 = OpAtomicIAdd %5 %92 %62 %56 %89
OpBranch %298
%298 = OpLabel
OpReturn
OpFunctionEnd
%68 = OpFunction %28 None %66
%67 = OpFunctionParameter %7
%69 = OpLabel
%76 = OpLoad %5 %73
%77 = OpIEqual %28 %76 %56
OpSelectionMerge %71 None
OpBranchConditional %77 %70 %71
%70 = OpLabel
%78 = OpBitcast %57 %67
%79 = OpAtomicIDecrement %5 %78 %62 %56
%80 = OpIEqual %28 %79 %34
OpStore %75 %80 NonPrivatePointer
OpBranch %71
%71 = OpLabel
OpControlBarrier %60 %60 %81
%82 = OpLoad %28 %75 NonPrivatePointer
OpReturnValue %82
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/descriptor_qa.bindless.descriptor-qa.vkmm.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer uintPointer;

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
    uint value;
};

layout(set = 7, binding = 0, std430) readonly buffer AtomicCounters
{
    uvec2 counters[];
} _33;

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _47[];

layout(set = 10, binding = 10, std430) buffer DescriptorHeapGlobalQAData
{
    uvec2 failed_shader_hash;
    uint failed_offset;
    uint failed_heap;
    uint failed_cookie;
    uint fault_atomic;
    uint failed_instruction;
    uint failed_descriptor_type_mask;
    uint actual_descriptor_type_mask;
    uint fault_type;
    uint va_map_timestamp;
    uint live_status_table[];
} QAGlobalData;

layout(set = 10, binding = 11, scalar) readonly buffer DescriptorHeapQAData
{
    uint descriptor_count;
    uint heap_index;
    uvec3 cookies_descriptor_info[];
} QAHeapData;

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

layout(set = 0, binding = 0) uniform texture2D _13[];
layout(set = 1, binding = 0) uniform samplerBuffer _17[];
layout(set = 1, binding = 0) uniform usamplerBuffer _21[];
layout(set = 1, binding = 0) uniform usamplerBuffer _24[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _28[];
layout(set = 4, binding = 0, r32ui) uniform writeonly uimageBuffer _36[];
layout(set = 3, binding = 0) uniform writeonly image2D _40[];
layout(set = 2, binding = 0) uniform sampler _51[];

void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
    uint _86 = atomicAdd(QAGlobalData.fault_atomic, 1u);
    if (_86 == 0u)
    {
        QAGlobalData.failed_cookie = cookie;
        QAGlobalData.failed_offset = heap_offset;
        QAGlobalData.failed_heap = heap_index;
        QAGlobalData.failed_descriptor_type_mask = descriptor_type;
        QAGlobalData.actual_descriptor_type_mask = actual_descriptor_type;
        QAGlobalData.failed_instruction = instruction;
        QAGlobalData.failed_shader_hash = uvec2(3735928559u, 0u);
        memoryBarrierBuffer();
        QAGlobalData.fault_type = fault_type;
    }
}

uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
    uint _119 = QAHeapData.descriptor_count;
    uint _121 = QAHeapData.heap_index;
    uint _124 = QAGlobalData.va_map_timestamp;
    uvec3 _127 = QAHeapData.cookies_descriptor_info[heap_offset];
    uint _136 = QAGlobalData.live_status_table[_127.x >> 5u];
    uint _150 = ((uint(heap_offset >= _119) | (((_127.z & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_136 & (1u << (_127.x & 31u))) != 0u) ? 0u : 4u)) | ((_124 >= _127.y) ? 0u : 8u);
    if (_150 != 0u)
    {
        descriptor_qa_report_fault(_150, heap_offset, _127.x, _121, descriptor_type_mask, _127.z, instruction);
        return _119;
    }
    return heap_offset;
}

uint RobustPhysicalAtomicCounter(uvec2 _386, uint _387, uint _388)
{
    uint _401;
    if (any(notEqual(_386, uvec2(0u))))
    {
        uint _399 = atomicAdd(uintPointer(_386).value, _387);
        _401 = _399 + _388;
    }
    else
    {
        _401 = 0u;
    }
    return _401;
}

void main()
{
    float _161;
    if (gl_GlobalInvocationID.x > 2u)
    {
        uint _68 = descriptor_qa_check(registers._m5, 4u, 1u);
        _161 = _47[_68]._m0[0u].x;
    }
    else
    {
        _161 = 0.0;
    }
    float _174;
    if (gl_GlobalInvocationID.x > 3u)
    {
        uint _169 = descriptor_qa_check(registers._m5 + (gl_GlobalInvocationID.x + 1u), 4u, 2u);
        _174 = _47[_169]._m0[0u].x + _161;
    }
    else
    {
        _174 = _161;
    }
    float _195;
    if (gl_GlobalInvocationID.x > 4u)
    {
        uint _180 = descriptor_qa_check(registers._m0, 1u, 3u);
        _195 = textureLod(sampler2D(_13[_180], _51[registers._m2]), vec2(0.5), 0.0).x + _174;
    }
    else
    {
        _195 = _174;
    }
    float _209;
    if (gl_GlobalInvocationID.x > 5u)
    {
        uint _203 = descriptor_qa_check(registers._m0 + ((gl_GlobalInvocationID.x & 1u) + 1u), 1u, 4u);
        _209 = texelFetch(_13[_203], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), int(0u)).x + _195;
    }
    else
    {
        _209 = _195;
    }
    float _222;
    if (gl_GlobalInvocationID.x > 6u)
    {
        uint _216 = descriptor_qa_check(registers._m1 + 3u, 16u, 5u);
        _222 = texelFetch(_17[_216], int(gl_GlobalInvocationID.x)).x + _209;
    }
    else
    {
        _222 = _209;
    }
    float _235;
    if (gl_GlobalInvocationID.x > 7u)
    {
        uint _230 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 4u), 16u, 6u);
        _235 = texelFetch(_17[_230], int(gl_GlobalInvocationID.x)).x + _222;
    }
    else
    {
        _235 = _222;
    }
    float _249;
    if (gl_GlobalInvocationID.x > 8u)
    {
        uint _242 = descriptor_qa_check(registers._m1 + 6u, 16u, 7u);
        _249 = uintBitsToFloat(texelFetch(_21[_242], int(gl_GlobalInvocationID.x)).x) + _235;
    }
    else
    {
        _249 = _235;
    }
    float _263;
    if (gl_GlobalInvocationID.x > 9u)
    {
        uint _257 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 7u), 16u, 8u);
        _263 = uintBitsToFloat(texelFetch(_21[_257], int(gl_GlobalInvocationID.x)).x) + _249;
    }
    else
    {
        _263 = _249;
    }
    float _276;
    if (gl_GlobalInvocationID.x > 10u)
    {
        uint _269 = descriptor_qa_check(registers._m1 + 9u, 16u, 9u);
        _276 = uintBitsToFloat(texelFetch(_24[_269], int(gl_GlobalInvocationID.x)).x) + _263;
    }
    else
    {
        _276 = _263;
    }
    float _292;
    if (gl_GlobalInvocationID.x > 11u)
    {
        uint _285 = descriptor_qa_check(registers._m1 + ((gl_GlobalInvocationID.x & 1u) + 10u), 16u, 10u);
        _292 = uintBitsToFloat(texelFetch(_24[_285], int(gl_GlobalInvocationID.x >> 2u)).x) + _276;
    }
    else
    {
        _292 = _276;
    }
    if (gl_GlobalInvocationID.x > 1u)
    {
        uint _298 = descriptor_qa_check(registers._m4, 32u, 11u);
        uint _304 = descriptor_qa_check(registers._m4, 256u, 12u);
        imageStore(_28[_298], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 30u)
    {
        uint _318 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 1u), 32u, 13u);
        imageStore(_28[_318], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 40u)
    {
        uint _329 = descriptor_qa_check(registers._m4 + 3u, 32u, 14u);
        imageStore(_36[_329], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 50u)
    {
        uint _345 = descriptor_qa_check(registers._m4 + ((gl_GlobalInvocationID.x & 1u) + 4u), 32u, 15u);
        imageStore(_36[_345], int(gl_GlobalInvocationID.x), uvec4(floatBitsToUint(_292)));
    }
    if (gl_GlobalInvocationID.x > 80u)
    {
        uint _356 = descriptor_qa_check(registers._m3 + 6u, 2u, 16u);
        imageStore(_40[_356], ivec2(uvec2(gl_GlobalInvocationID.x, 0u)), vec4(_292));
    }
    if (gl_GlobalInvocationID.x > 90u)
    {
        uint _368 = descriptor_qa_check(registers._m3 + ((gl_GlobalInvocationID.x & 1u) + 7u), 2u, 17u);
        imageStore(_40[_368], ivec2(uvec2(0u, gl_GlobalInvocationID.x)), vec4(_292));
    }
    uint _376 = descriptor_qa_check(registers._m4, 32u, 18u);
    uint _382 = descriptor_qa_check(registers._m4, 256u, 19u);
    uvec2 _384 = _33.counters[_382];
    uint _403 = RobustPhysicalAtomicCounter(_384, 1u, 0u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 438
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability SampledBuffer
OpCapability ImageBuffer
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability UniformTexelBufferArrayDynamicIndexing
OpCapability StorageTexelBufferArrayDynamicIndexing
OpCapability VulkanMemoryModel
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Vulkan
OpEntryPoint GLCompute %3 "main" %8 %13 %17 %21 %24 %28 %33 %36 %40 %47 %51 %54 %72 %111
OpExecutionMode %3 LocalSize 1 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %31 "AtomicCounters"
OpMemberName %31 0 "counters"
OpName %44 "BindlessCBV"
OpName %70 "DescriptorHeapGlobalQAData"
OpMemberName %70 0 "failed_shader_hash"
OpMemberName %70 1 "failed_offset"
OpMemberName %70 2 "failed_heap"
OpMemberName %70 3 "failed_cookie"
OpMemberName %70 4 "fault_atomic"
OpMemberName %70 5 "failed_instruction"
OpMemberName %70 6 "failed_descriptor_type_mask"
OpMemberName %70 7 "actual_descriptor_type_mask"
OpMemberName %70 8 "fault_type"
OpMemberName %70 9 "va_map_timestamp"
OpMemberName %70 10 "live_status_table"
OpName %72 "QAGlobalData"
OpName %81 "descriptor_qa_report_fault"
OpName %74 "fault_type"
OpName %75 "heap_offset"
OpName %76 "cookie"
OpName %77 "heap_index"
OpName %78 "descriptor_type"
OpName %79 "actual_descriptor_type"
OpName %80 "instruction"
OpName %109 "DescriptorHeapQAData"
OpMemberName %109 0 "descriptor_count"
OpMemberName %109 1 "heap_index"
OpMemberName %109 2 "cookies_descriptor_info"
OpName %111 "QAHeapData"
OpName %116 "descriptor_qa_check"
OpName %113 "heap_offset"
OpName %114 "descriptor_type_mask"
OpName %115 "instruction"
OpName %389 "RobustPhysicalAtomicCounter"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %13 DescriptorSet 0
OpDecorate %13 Binding 0
OpDecorate %17 DescriptorSet 1
OpDecorate %17 Binding 0
OpDecorate %21 DescriptorSet 1
OpDecorate %21 Binding 0
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %28 DescriptorSet 4
OpDecorate %28 Binding 0
OpDecorate %28 NonReadable
OpDecorate %30 ArrayStride 8
OpDecorate %31 Block
OpMemberDecorate %31 0 Offset 0
OpMemberDecorate %31 0 NonWritable
OpDecorate %33 DescriptorSet 7
OpDecorate %33 Binding 0
OpDecorate %33 AliasedPointer
OpDecorate %36 DescriptorSet 4
OpDecorate %36 Binding 0
OpDecorate %36 NonReadable
OpDecorate %40 DescriptorSet 3
OpDecorate %40 Binding 0
OpDecorate %40 NonReadable
OpDecorate %43 ArrayStride 16
OpDecorate %44 Block
OpMemberDecorate %44 0 Offset 0
OpDecorate %47 DescriptorSet 5
OpDecorate %47 Binding 0
OpDecorate %51 DescriptorSet 2
OpDecorate %51 Binding 0
OpDecorate %54 BuiltIn GlobalInvocationId
OpDecorate %69 ArrayStride 4
OpMemberDecorate %70 0 Offset 0
OpMemberDecorate %70 1 Offset 8
OpMemberDecorate %70 2 Offset 12
OpMemberDecorate %70 3 Offset 16
OpMemberDecorate %70 4 Offset 20
OpMemberDecorate %70 5 Offset 24
OpMemberDecorate %70 6 Offset 28
OpMemberDecorate %70 7 Offset 32
OpMemberDecorate %70 8 Offset 36
OpMemberDecorate %70 9 Offset 40
OpMemberDecorate %70 10 Offset 44
OpDecorate %70 Block
OpDecorate %72 DescriptorSet 10
OpDecorate %72 Binding 10
OpDecorate %108 ArrayStride 12
OpMemberDecorate %109 0 Offset 0
OpMemberDecorate %109 1 Offset 4
OpMemberDecorate %109 2 Offset 8
OpDecorate %109 Block
OpDecorate %111 DescriptorSet 10
OpDecorate %111 Binding 11
OpDecorate %111 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeFloat 32
%10 = OpTypeImage %9 2D 0 0 0 1 Unknown
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer UniformConstant %11
%13 = OpVariable %12 UniformConstant
%14 = OpTypeImage %9 Buffer 0 0 0 1 Unknown
%15 = OpTypeRuntimeArray %14
%16 = OpTypePointer UniformConstant %15
%17 = OpVariable %16 UniformConstant
%18 = OpTypeImage %5 Buffer 0 0 0 1 Unknown
%19 = OpTypeRuntimeArray %18
%20 = OpTypePointer UniformConstant %19
%21 = OpVariable %20 UniformConstant
%22 = OpTypeRuntimeArray %18
%23 = OpTypePointer UniformConstant %22
%24 = OpVariable %23 UniformConstant
%25 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%26 = OpTypeRuntimeArray %25
%27 = OpTypePointer UniformConstant %26
%28 = OpVariable %27 UniformConstant
%29 = OpTypeVector %5 2
%30 = OpTypeRuntimeArray %29
%31 = OpTypeStruct %30
%32 = OpTypePointer StorageBuffer %31
%33 = OpVariable %32 StorageBuffer
%34 = OpTypeRuntimeArray %25
%35 = OpTypePointer UniformConstant %34
%36 = OpVariable %35 UniformConstant
%37 = OpTypeImage %9 2D 0 0 0 2 Unknown
%38 = OpTypeRuntimeArray %37
%39 = OpTypePointer UniformConstant %38
%40 = OpVariable %39 UniformConstant
%41 = OpTypeVector %9 4
%42 = OpConstant %5 4096
%43 = OpTypeArray %41 %42
%44 = OpTypeStruct %43
%45 = OpTypeRuntimeArray %44
%46 = OpTypePointer Uniform %45
%47 = OpVariable %46 Uniform
%48 = OpTypeSampler
%49 = OpTypeRuntimeArray %48
%50 = OpTypePointer UniformConstant %49
%51 = OpVariable %50 UniformConstant
%52 = OpTypeVector %5 3
%53 = OpTypePointer Input %52
%54 = OpVariable %53 Input
%55 = OpTypePointer Input %5
%57 = OpConstant %5 0
%59 = OpTypeBool
%61 = OpConstant %5 2
%62 = OpTypePointer Uniform %44
%64 = OpTypePointer PushConstant %5
%66 = OpConstant %5 5
%69 = OpTypeRuntimeArray %5
%70 = OpTypeStruct %29 %5 %5 %5 %5 %5 %5 %5 %5 %5 %69
%71 = OpTypePointer StorageBuffer %70
%72 = OpVariable %71 StorageBuffer
%73 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
%83 = OpTypePointer StorageBuffer %5
%85 = OpConstant %5 4
%87 = OpConstant %5 1
%92 = OpConstant %5 3
%96 = OpConstant %5 6
%98 = OpConstant %5 7
%100 = OpConstant %5 3735928559
%101 = OpConstantComposite %29 %100 %57
%102 = OpTypePointer StorageBuffer %29
%104 = OpConstant %5 72
%106 = OpConstant %5 8
%108 = OpTypeRuntimeArray %52
%109 = OpTypeStruct %5 %5 %108
%110 = OpTypePointer StorageBuffer %109
%111 = OpVariable %110 StorageBuffer
%112 = OpTypeFunction %5 %5 %5 %5
%123 = OpConstant %5 9
%125 = OpTypePointer StorageBuffer %52
%133 = OpConstant %5 31
%135 = OpConstant %5 10
%157 = OpTypePointer Uniform %41
%162 = OpConstant %9 0
%176 = OpTypePointer UniformConstant %10
%182 = OpTypePointer UniformConstant %48
%187 = OpTypeSampledImage %10
%189 = OpConstant %9 0.5
%191 = OpTypeVector %9 2
%211 = OpTypePointer UniformConstant %14
%217 = OpConstant %5 16
%237 = OpTypePointer UniformConstant %18
%244 = OpTypeVector %5 4
%278 = OpConstant %5 11
%294 = OpTypePointer UniformConstant %25
%299 = OpConstant %5 32
%305 = OpConstant %5 256
%306 = OpConstant %5 12
%311 = OpConstant %5 30
%319 = OpConstant %5 13
%324 = OpConstant %5 40
%330 = OpConstant %5 14
%336 = OpConstant %5 50
%346 = OpConstant %5 15
%350 = OpConstant %5 80
%351 = OpTypePointer UniformConstant %37
%361 = OpConstant %5 90
%369 = OpConstant %5 17
%377 = OpConstant %5 18
%383 = OpConstant %5 19
%385 = OpTypeFunction %5 %29 %5 %5
%393 = OpTypeVector %59 2
%394 = OpConstantNull %29
%397 = OpTypePointer PhysicalStorageBuffer %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %404
%404 = OpLabel
%56 = OpAccessChain %55 %54 %57
%58 = OpLoad %5 %56
%60 = OpUGreaterThan %59 %58 %61
OpSelectionMerge %406 None
OpBranchConditional %60 %405 %406
%405 = OpLabel
%65 = OpAccessChain %64 %8 %66
%67 = OpLoad %5 %65
%68 = OpFunctionCall %5 %116 %67 %85 %87
%63 = OpAccessChain %62 %47 %68
%158 = OpAccessChain %157 %63 %57 %57
%159 = OpLoad %41 %158
%160 = OpCompositeExtract %9 %159 0
OpBranch %406
%406 = OpLabel
%161 = OpPhi %9 %162 %404 %160 %405
%163 = OpUGreaterThan %59 %58 %92
OpSelectionMerge %408 None
OpBranchConditional %163 %407 %408
%407 = OpLabel
%164 = OpIAdd %5 %58 %87
%166 = OpAccessChain %64 %8 %66
%167 = OpLoad %5 %166
%168 = OpIAdd %5 %167 %164
%169 = OpFunctionCall %5 %116 %168 %85 %61
%165 = OpAccessChain %62 %47 %169
%170 = OpAccessChain %157 %165 %57 %57
%171 = OpLoad %41 %170
%172 = OpCompositeExtract %9 %171 0
%173 = OpFAdd %9 %172 %161
OpBranch %408
%408 = OpLabel
%174 = OpPhi %9 %161 %406 %173 %407
%175 = OpUGreaterThan %59 %58 %85
OpSelectionMerge %410 None
OpBranchConditional %175 %409 %410
%409 = OpLabel
%178 = OpAccessChain %64 %8 %57
%179 = OpLoad %5 %178
%180 = OpFunctionCall %5 %116 %179 %87 %92
%177 = OpAccessChain %176 %13 %180
%181 = OpLoad %10 %177
%184 = OpAccessChain %64 %8 %61
%185 = OpLoad %5 %184
%183 = OpAccessChain %182 %51 %185
%186 = OpLoad %48 %183
%188 = OpSampledImage %187 %181 %186
%192 = OpCompositeConstruct %191 %189 %189
%190 = OpImageSampleExplicitLod %41 %188 %192 Lod %162
%193 = OpCompositeExtract %9 %190 0
%194 = OpFAdd %9 %193 %174
OpBranch %410
%410 = OpLabel
%195 = OpPhi %9 %174 %408 %194 %409
%196 = OpUGreaterThan %59 %58 %66
OpSelectionMerge %412 None
OpBranchConditional %196 %411 %412
%411 = OpLabel
%197 = OpBitwiseAnd %5 %58 %87
%198 = OpIAdd %5 %197 %87
%200 = OpAccessChain %64 %8 %57
%201 = OpLoad %5 %200
%202 = OpIAdd %5 %201 %198
%203 = OpFunctionCall %5 %116 %202 %87 %85
%199 = OpAccessChain %176 %13 %203
%204 = OpLoad %10 %199
%206 = OpCompositeConstruct %29 %57 %58
%205 = OpImageFetch %41 %204 %206 Lod %57
%207 = OpCompositeExtract %9 %205 0
%208 = OpFAdd %9 %207 %195
OpBranch %412
%412 = OpLabel
%209 = OpPhi %9 %195 %410 %208 %411
%210 = OpUGreaterThan %59 %58 %96
OpSelectionMerge %414 None
OpBranchConditional %210 %413 %414
%413 = OpLabel
%213 = OpAccessChain %64 %8 %87
%214 = OpLoad %5 %213
%215 = OpIAdd %5 %214 %92
%216 = OpFunctionCall %5 %116 %215 %217 %66
%212 = OpAccessChain %211 %17 %216
%218 = OpLoad %14 %212
%219 = OpImageFetch %41 %218 %58
%220 = OpCompositeExtract %9 %219 0
%221 = OpFAdd %9 %220 %209
OpBranch %414
%414 = OpLabel
%222 = OpPhi %9 %209 %412 %221 %413
%223 = OpUGreaterThan %59 %58 %98
OpSelectionMerge %416 None
OpBranchConditional %223 %415 %416
%415 = OpLabel
%224 = OpBitwiseAnd %5 %58 %87
%225 = OpIAdd %5 %224 %85
%227 = OpAccessChain %64 %8 %87
%228 = OpLoad %5 %227
%229 = OpIAdd %5 %228 %225
%230 = OpFunctionCall %5 %116 %229 %217 %96
%226 = OpAccessChain %211 %17 %230
%231 = OpLoad %14 %226
%232 = OpImageFetch %41 %231 %58
%233 = OpCompositeExtract %9 %232 0
%234 = OpFAdd %9 %233 %222
OpBranch %416
%416 = OpLabel
%235 = OpPhi %9 %222 %414 %234 %415
%236 = OpUGreaterThan %59 %58 %106
OpSelectionMerge %418 None
OpBranchConditional %236 %417 %418
%417 = OpLabel
%239 = OpAccessChain %64 %8 %87
%240 = OpLoad %5 %239
%241 = OpIAdd %5 %240 %96
%242 = OpFunctionCall %5 %116 %241 %217 %98
%238 = OpAccessChain %237 %21 %242
%243 = OpLoad %18 %238
%245 = OpImageFetch %244 %243 %58
%246 = OpCompositeExtract %5 %245 0
%247 = OpBitcast %9 %246
%248 = OpFAdd %9 %247 %235
OpBranch %418
%418 = OpLabel
%249 = OpPhi %9 %235 %416 %248 %417
%250 = OpUGreaterThan %59 %58 %123
OpSelectionMerge %420 None
OpBranchConditional %250 %419 %420
%419 = OpLabel
%251 = OpBitwiseAnd %5 %58 %87
%252 = OpIAdd %5 %251 %98
%254 = OpAccessChain %64 %8 %87
%255 = OpLoad %5 %254
%256 = OpIAdd %5 %255 %252
%257 = OpFunctionCall %5 %116 %256 %217 %106
%253 = OpAccessChain %237 %21 %257
%258 = OpLoad %18 %253
%259 = OpImageFetch %244 %258 %58
%260 = OpCompositeExtract %5 %259 0
%261 = OpBitcast %9 %260
%262 = OpFAdd %9 %261 %249
OpBranch %420
%420 = OpLabel
%263 = OpPhi %9 %249 %418 %262 %419
%264 = OpUGreaterThan %59 %58 %135
OpSelectionMerge %422 None
OpBranchConditional %264 %421 %422
%421 = OpLabel
%266 = OpAccessChain %64 %8 %87
%267 = OpLoad %5 %266
%268 = OpIAdd %5 %267 %123
%269 = OpFunctionCall %5 %116 %268 %217 %123
%265 = OpAccessChain %237 %24 %269
%270 = OpLoad %18 %265
%271 = OpShiftLeftLogical %5 %58 %61
%272 = OpImageFetch %244 %270 %58
%273 = OpCompositeExtract %5 %272 0
%274 = OpBitcast %9 %273
%275 = OpFAdd %9 %274 %263
OpBranch %422
%422 = OpLabel
%276 = OpPhi %9 %263 %420 %275 %421
%277 = OpUGreaterThan %59 %58 %278
OpSelectionMerge %424 None
OpBranchConditional %277 %423 %424
%423 = OpLabel
%279 = OpBitwiseAnd %5 %58 %87
%280 = OpIAdd %5 %279 %135
%282 = OpAccessChain %64 %8 %87
%283 = OpLoad %5 %282
%284 = OpIAdd %5 %283 %280
%285 = OpFunctionCall %5 %116 %284 %217 %135
%281 = OpAccessChain %237 %24 %285
%286 = OpLoad %18 %281
%287 = OpShiftRightLogical %5 %58 %61
%288 = OpImageFetch %244 %286 %287
%289 = OpCompositeExtract %5 %288 0
%290 = OpBitcast %9 %289
%291 = OpFAdd %9 %290 %276
OpBranch %424
%424 = OpLabel
%292 = OpPhi %9 %276 %422 %291 %423
%293 = OpUGreaterThan %59 %58 %87
OpSelectionMerge %426 None
OpBranchConditional %293 %425 %426
%425 = OpLabel
%296 = OpAccessChain %64 %8 %85
%297 = OpLoad %5 %296
%298 = OpFunctionCall %5 %116 %297 %299 %278
%295 = OpAccessChain %294 %28 %298
%300 = OpLoad %25 %295
%302 = OpAccessChain %64 %8 %85
%303 = OpLoad %5 %302
%304 = OpFunctionCall %5 %116 %303 %305 %306
%301 = OpAccessChain %102 %33 %57 %304
%307 = OpLoad %29 %301
%308 = OpBitcast %5 %292
%309 = OpCompositeConstruct %244 %308 %308 %308 %308
OpImageWrite %300 %58 %309 NonPrivateTexel
OpBranch %426
%426 = OpLabel
%310 = OpUGreaterThan %59 %58 %311
OpSelectionMerge %428 None
OpBranchConditional %310 %427 %428
%427 = OpLabel
%312 = OpBitwiseAnd %5 %58 %87
%313 = OpIAdd %5 %312 %87
%315 = OpAccessChain %64 %8 %85
%316 = OpLoad %5 %315
%317 = OpIAdd %5 %316 %313
%318 = OpFunctionCall %5 %116 %317 %299 %319
%314 = OpAccessChain %294 %28 %318
%320 = OpLoad %25 %314
%321 = OpBitcast %5 %292
%322 = OpCompositeConstruct %244 %321 %321 %321 %321
OpImageWrite %320 %58 %322 NonPrivateTexel
OpBranch %428
%428 = OpLabel
%323 = OpUGreaterThan %59 %58 %324
OpSelectionMerge %430 None
OpBranchConditional %323 %429 %430
%429 = OpLabel
%326 = OpAccessChain %64 %8 %85
%327 = OpLoad %5 %326
%328 = OpIAdd %5 %327 %92
%329 = OpFunctionCall %5 %116 %328 %299 %330
%325 = OpAccessChain %294 %36 %329
%331 = OpLoad %25 %325
%332 = OpBitcast %5 %292
%333 = OpShiftLeftLogical %5 %58 %61
%334 = OpCompositeConstruct %244 %332 %332 %332 %332
OpImageWrite %331 %58 %334 NonPrivateTexel
OpBranch %430
%430 = OpLabel
%335 = OpUGreaterThan %59 %58 %336
OpSelectionMerge %432 None
OpBranchConditional %335 %431 %432
%431 = OpLabel
%337 = OpBitwiseAnd %5 %58 %87
%338 = OpBitcast %5 %292
%339 = OpShiftLeftLogical %5 %58 %61
%340 = OpIAdd %5 %337 %85
%342 = OpAccessChain %64 %8 %85
%343 = OpLoad %5 %342
%344 = OpIAdd %5 %343 %340
%345 = OpFunctionCall %5 %116 %344 %299 %346
%341 = OpAccessChain %294 %36 %345
%347 = OpLoad %25 %341
%348 = OpCompositeConstruct %244 %338 %338 %338 %338
OpImageWrite %347 %58 %348 NonPrivateTexel
OpBranch %432
%432 = OpLabel
%349 = OpUGreaterThan %59 %58 %350
OpSelectionMerge %434 None
OpBranchConditional %349 %433 %434
%433 = OpLabel
%353 = OpAccessChain %64 %8 %92
%354 = OpLoad %5 %353
%355 = OpIAdd %5 %354 %96
%356 = OpFunctionCall %5 %116 %355 %61 %217
%352 = OpAccessChain %351 %40 %356
%357 = OpLoad %37 %352
%358 = OpCompositeConstruct %29 %58 %57
%359 = OpCompositeConstruct %41 %292 %292 %292 %292
OpImageWrite %357 %358 %359 NonPrivateTexel
OpBranch %434
%434 = OpLabel
%360 = OpUGreaterThan %59 %58 %361
OpSelectionMerge %436 None
OpBranchConditional %360 %435 %436
%435 = OpLabel
%362 = OpBitwiseAnd %5 %58 %87
%363 = OpIAdd %5 %362 %98
%365 = OpAccessChain %64 %8 %92
%366 = OpLoad %5 %365
%367 = OpIAdd %5 %366 %363
%368 = OpFunctionCall %5 %116 %367 %61 %369
%364 = OpAccessChain %351 %40 %368
%370 = OpLoad %37 %364
%371 = OpCompositeConstruct %29 %57 %58
%372 = OpCompositeConstruct %41 %292 %292 %292 %292
OpImageWrite %370 %371 %372 NonPrivateTexel
OpBranch %436
%436 = OpLabel
%374 = OpAccessChain %64 %8 %85
%375 = OpLoad %5 %374
%376 = OpFunctionCall %5 %116 %375 %299 %377
%373 = OpAccessChain %294 %28 %376
%378 = OpLoad %25 %373
%380 = OpAccessChain %64 %8 %85
%381 = OpLoad %5 %380
%382 = OpFunctionCall %5 %116 %381 %305 %383
%379 = OpAccessChain %102 %33 %57 %382
%384 = OpLoad %29 %379
%403 = OpFunctionCall %5 %389 %384 %87 %57
OpReturn
OpFunctionEnd
%81 = OpFunction %1 None %73
%74 = OpFunctionParameter %5
%75 = OpFunctionParameter %5
%76 = OpFunctionParameter %5
%77 = OpFunctionParameter %5
%78 = OpFunctionParameter %5
%79 = OpFunctionParameter %5
%80 = OpFunctionParameter %5
%82 = OpLabel
%84 = OpAccessChain %83 %72 %85
%86 = OpAtomicIAdd %5 %84 %66 %57 %87
%88 = OpIEqual %59 %86 %57
OpSelectionMerge %90 None
OpBranchConditional %88 %89 %90
%89 = OpLabel
%91 = OpAccessChain %83 %72 %92
OpStore %91 %76 NonPrivatePointer
%93 = OpAccessChain %83 %72 %87
OpStore %93 %75 NonPrivatePointer
%94 = OpAccessChain %83 %72 %61
OpStore %94 %77 NonPrivatePointer
%95 = OpAccessChain %83 %72 %96
OpStore %95 %78 NonPrivatePointer
%97 = OpAccessChain %83 %72 %98
OpStore %97 %79 NonPrivatePointer
%99 = OpAccessChain %83 %72 %66
OpStore %99 %80 NonPrivatePointer
%103 = OpAccessChain %102 %72 %57
OpStore %103 %101 NonPrivatePointer
OpMemoryBarrier %66 %104
%105 = OpAccessChain %83 %72 %106
OpStore %105 %74 NonPrivatePointer
OpBranch %90
%90 = OpLabel
OpReturn
OpFunctionEnd
%116 = OpFunction %5 None %112
%113 = OpFunctionParameter %5
%114 = OpFunctionParameter %5
%115 = OpFunctionParameter %5
%117 = OpLabel
%118 = OpAccessChain %83 %111 %57
%119 = OpLoad %5 %118
%120 = OpAccessChain %83 %111 %87
%121 = OpLoad %5 %120
%122 = OpAccessChain %83 %72 %123
%124 = OpLoad %5 %122
%126 = OpAccessChain %125 %111 %61 %113
%127 = OpLoad %52 %126
%128 = OpCompositeExtract %5 %127 0
%129 = OpCompositeExtract %5 %127 1
%130 = OpCompositeExtract %5 %127 2
%131 = OpShiftRightLogical %5 %128 %66
%132 = OpBitwiseAnd %5 %128 %133
%134 = OpAccessChain %83 %72 %135 %131
%136 = OpLoad %5 %134
%137 = OpShiftLeftLogical %5 %87 %132
%138 = OpBitwiseAnd %5 %136 %137
%139 = OpINotEqual %59 %138 %57
%140 = OpBitwiseAnd %5 %130 %114
%141 = OpIEqual %59 %140 %114
%142 = OpUGreaterThanEqual %59 %113 %119
%143 = OpSelect %5 %142 %87 %57
%144 = OpSelect %5 %141 %57 %61
%145 = OpSelect %5 %139 %57 %85
%146 = OpUGreaterThanEqual %59 %124 %129
%147 = OpSelect %5 %146 %57 %106
%148 = OpBitwiseOr %5 %143 %144
%149 = OpBitwiseOr %5 %148 %145
%150 = OpBitwiseOr %5 %149 %147
%151 = OpINotEqual %59 %150 %57
OpSelectionMerge %153 None
OpBranchConditional %151 %152 %153
%152 = OpLabel
%154 = OpFunctionCall %1 %81 %150 %113 %128 %121 %114 %130 %115
OpReturnValue %119
%153 = OpLabel
OpReturnValue %113
OpFunctionEnd
%389 = OpFunction %5 None %385
%386 = OpFunctionParameter %29
%387 = OpFunctionParameter %5
%388 = OpFunctionParameter %5
%390 = OpLabel
%395 = OpINotEqual %393 %386 %394
%396 = OpAny %59 %395
OpSelectionMerge %392 None
OpBranchConditional %396 %391 %392
%391 = OpLabel
%398 = OpBitcast %397 %386
%399 = OpAtomicIAdd %5 %398 %66 %57 %387
%400 = OpIAdd %5 %399 %388
OpBranch %392
%392 = OpLabel
%401 = OpPhi %5 %57 %390 %400 %391
OpReturnValue %401
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/groupshared.vkmm.comp
================================================
#version 460
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;

shared uint _12[64];

void main()
{
    _12[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
    barrier();
    imageStore(_8, int(gl_LocalInvocationIndex), uvec4(_12[gl_LocalInvocationIndex ^ 3u]));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability VulkanMemoryModel
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %8 %12 %15
OpExecutionMode %3 LocalSize 64 1 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %8 NonReadable
OpDecorate %15 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpConstant %5 64
%10 = OpTypeArray %5 %9
%11 = OpTypePointer Workgroup %10
%12 = OpVariable %11 Workgroup
%14 = OpTypePointer Input %5
%15 = OpVariable %14 Input
%17 = OpTypePointer Workgroup %5
%19 = OpConstant %5 2
%20 = OpConstant %5 24840
%22 = OpConstant %5 3
%25 = OpTypeVector %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %27
%27 = OpLabel
%13 = OpLoad %6 %8
%16 = OpLoad %5 %15
%18 = OpAccessChain %17 %12 %16
OpStore %18 %16 NonPrivatePointer
OpControlBarrier %19 %19 %20
%21 = OpBitwiseXor %5 %16 %22
%23 = OpAccessChain %17 %12 %21
%24 = OpLoad %5 %23 NonPrivatePointer
%26 = OpCompositeConstruct %25 %24 %24 %24 %24
OpImageWrite %13 %16 %26 NonPrivateTexel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/hull.vkmm.tesc
================================================
#version 460
layout(vertices = 4) out;

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[1];
} _12;

layout(location = 0) in float VSValue[];
layout(location = 0) out float HSValue[4];
layout(location = 1) patch out float PATCH;

void hull_main()
{
    float _36[4];
    _36[0u] = VSValue[0u];
    _36[1u] = VSValue[1u];
    _36[2u] = VSValue[2u];
    _36[3u] = VSValue[3u];
    uint _53 = uint(int(VSValue[0u]));
    _36[_53] += 40.0;
    HSValue[gl_InvocationID] = (((VSValue[0u] + float(gl_InvocationID)) + VSValue[1u]) + VSValue[2u]) + _36[3u];
}

void patch_main()
{
    float _66[4];
    _66[0u] = VSValue[0u];
    _66[1u] = VSValue[1u];
    _66[2u] = VSValue[2u];
    _66[3u] = VSValue[3u];
    uint _79 = uint(int(VSValue[0u]));
    _66[_79] += 40.0;
    gl_TessLevelOuter[0u] = VSValue[0u];
    gl_TessLevelOuter[1u] = VSValue[1u];
    gl_TessLevelOuter[2u] = VSValue[2u];
    gl_TessLevelOuter[3u] = HSValue[0u] + VSValue[0u];
    gl_TessLevelInner[0u] = HSValue[0u];
    gl_TessLevelInner[1u] = HSValue[1u];
    PATCH = _12._m0[0u].x + _66[3u];
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 114
; Schema: 0
OpCapability Shader
OpCapability Tessellation
OpCapability VulkanMemoryModel
OpMemoryModel Logical Vulkan
OpEntryPoint TessellationControl %3 "main" %12 %16 %20 %21 %25 %27 %33
OpExecutionMode %3 Quads
OpExecutionMode %3 SpacingEqual
OpExecutionMode %3 VertexOrderCw
OpExecutionMode %3 OutputVertices 4
OpName %3 "main"
OpName %10 ""
OpName %16 "VSValue"
OpName %20 "HSValue"
OpName %21 "SV_TessFactor"
OpName %25 "SV_InsideTessFactor"
OpName %27 "PATCH"
OpName %28 "hull_main"
OpName %30 "patch_main"
OpDecorate %9 ArrayStride 16
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %16 Location 0
OpDecorate %20 Location 0
OpDecorate %21 BuiltIn TessLevelOuter
OpDecorate %21 Patch
OpDecorate %25 BuiltIn TessLevelInner
OpDecorate %25 Patch
OpDecorate %27 Location 1
OpDecorate %27 Patch
OpDecorate %33 BuiltIn InvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpConstant %5 1
%7 = OpTypeFloat 32
%8 = OpTypeVector %7 4
%9 = OpTypeArray %8 %6
%10 = OpTypeStruct %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpConstant %5 32
%14 = OpTypeArray %7 %13
%15 = OpTypePointer Input %14
%16 = OpVariable %15 Input
%17 = OpConstant %5 4
%18 = OpTypeArray %7 %17
%19 = OpTypePointer Output %18
%20 = OpVariable %19 Output
%21 = OpVariable %19 Output
%22 = OpConstant %5 2
%23 = OpTypeArray %7 %22
%24 = OpTypePointer Output %23
%25 = OpVariable %24 Output
%26 = OpTypePointer Output %7
%27 = OpVariable %26 Output
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypePointer Function %18
%37 = OpTypePointer Input %7
%39 = OpConstant %5 0
%41 = OpTypePointer Function %7
%50 = OpConstant %5 3
%57 = OpConstant %7 40
%89 = OpTypePointer Uniform %8
%102 = OpTypeBool
%104 = OpConstant %5 4104
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %106
%106 = OpLabel
%100 = OpFunctionCall %1 %28
%101 = OpLoad %5 %33
%103 = OpIEqual %102 %101 %39
OpControlBarrier %22 %22 %104
OpSelectionMerge %108 None
OpBranchConditional %103 %107 %108
%107 = OpLabel
%105 = OpFunctionCall %1 %30
OpBranch %108
%108 = OpLabel
OpReturn
OpFunctionEnd
%28 = OpFunction %1 None %2
%29 = OpLabel
%36 = OpVariable %35 Function
OpBranch %110
%110 = OpLabel
%34 = OpLoad %5 %33
%38 = OpAccessChain %37 %16 %39
%40 = OpLoad %7 %38
%42 = OpAccessChain %41 %36 %39
OpStore %42 %40
%43 = OpAccessChain %37 %16 %6
%44 = OpLoad %7 %43
%45 = OpAccessChain %41 %36 %6
OpStore %45 %44
%46 = OpAccessChain %37 %16 %22
%47 = OpLoad %7 %46
%48 = OpAccessChain %41 %36 %22
OpStore %48 %47
%49 = OpAccessChain %37 %16 %50
%51 = OpLoad %7 %49
%52 = OpAccessChain %41 %36 %50
OpStore %52 %51
%53 = OpConvertFToS %5 %40
%54 = OpAccessChain %41 %36 %53
%55 = OpLoad %7 %54
%56 = OpFAdd %7 %55 %57
OpStore %54 %56
%58 = OpLoad %7 %52
%59 = OpConvertUToF %7 %34
%60 = OpFAdd %7 %40 %59
%61 = OpFAdd %7 %60 %44
%62 = OpFAdd %7 %61 %47
%63 = OpFAdd %7 %62 %58
%65 = OpLoad %5 %33
%64 = OpAccessChain %26 %20 %65
OpStore %64 %63
OpReturn
OpFunctionEnd
%30 = OpFunction %1 None %2
%31 = OpLabel
%66 = OpVariable %35 Function
OpBranch %112
%112 = OpLabel
%67 = OpAccessChain %37 %16 %39
%68 = OpLoad %7 %67
%69 = OpAccessChain %41 %66 %39
OpStore %69 %68
%70 = OpAccessChain %37 %16 %6
%71 = OpLoad %7 %70
%72 = OpAccessChain %41 %66 %6
OpStore %72 %71
%73 = OpAccessChain %37 %16 %22
%74 = OpLoad %7 %73
%75 = OpAccessChain %41 %66 %22
OpStore %75 %74
%76 = OpAccessChain %37 %16 %50
%77 = OpLoad %7 %76
%78 = OpAccessChain %41 %66 %50
OpStore %78 %77
%79 = OpConvertFToS %5 %68
%80 = OpAccessChain %41 %66 %79
%81 = OpLoad %7 %80
%82 = OpFAdd %7 %81 %57
OpStore %80 %82
%83 = OpAccessChain %26 %20 %39
%84 = OpLoad %7 %83
%85 = OpAccessChain %26 %20 %6
%86 = OpLoad %7 %85
%87 = OpFAdd %7 %84 %68
%88 = OpLoad %7 %78
%90 = OpAccessChain %89 %12 %39 %39
%91 = OpLoad %8 %90
%92 = OpCompositeExtract %7 %91 0
%93 = OpFAdd %7 %92 %88
%94 = OpAccessChain %26 %21 %39
OpStore %94 %68
%95 = OpAccessChain %26 %21 %6
OpStore %95 %71
%96 = OpAccessChain %26 %21 %22
OpStore %96 %74
%97 = OpAccessChain %26 %21 %50
OpStore %97 %87
%98 = OpAccessChain %26 %25 %39
OpStore %98 %84
%99 = OpAccessChain %26 %25 %6
OpStore %99 %86
OpStore %27 %93
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/image-load-store.vkmm.comp
================================================
#version 460
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0, r32f) uniform image2D _11;
layout(set = 0, binding = 1, r32f) uniform image2D _12;

void main()
{
    imageStore(_11, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), vec4(imageLoad(_11, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y))).x + 1.0));
    imageStore(_12, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), vec4(imageLoad(_12, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y))).x + texelFetch(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(0u)).x));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability VulkanMemoryModel
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %8 %11 %12 %19
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 1
OpDecorate %19 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeImage %5 2D 0 0 0 2 R32f
%10 = OpTypePointer UniformConstant %9
%11 = OpVariable %10 UniformConstant
%12 = OpVariable %10 UniformConstant
%16 = OpTypeInt 32 0
%17 = OpTypeVector %16 3
%18 = OpTypePointer Input %17
%19 = OpVariable %18 Input
%20 = OpTypePointer Input %16
%22 = OpConstant %16 0
%25 = OpConstant %16 1
%27 = OpTypeVector %5 4
%29 = OpTypeVector %16 2
%31 = OpConstant %16 5
%34 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %46
%46 = OpLabel
%13 = OpLoad %9 %12
%14 = OpLoad %9 %11
%15 = OpLoad %6 %8
%21 = OpAccessChain %20 %19 %22
%23 = OpLoad %16 %21
%24 = OpAccessChain %20 %19 %25
%26 = OpLoad %16 %24
%30 = OpCompositeConstruct %29 %23 %26
%28 = OpImageRead %27 %14 %30 MakeTexelVisible|NonPrivateTexel %31
%32 = OpCompositeExtract %5 %28 0
%33 = OpFAdd %5 %32 %34
%35 = OpCompositeConstruct %29 %23 %26
%36 = OpCompositeConstruct %27 %33 %33 %33 %33
OpImageWrite %14 %35 %36 MakeTexelAvailable|NonPrivateTexel %31
%38 = OpCompositeConstruct %29 %23 %26
%37 = OpImageFetch %27 %15 %38 Lod %22
%39 = OpCompositeExtract %5 %37 0
%41 = OpCompositeConstruct %29 %23 %26
%40 = OpImageRead %27 %13 %41 NonPrivateTexel
%42 = OpCompositeExtract %5 %40 0
%43 = OpFAdd %5 %42 %39
%44 = OpCompositeConstruct %29 %23 %26
%45 = OpCompositeConstruct %27 %43 %43 %43 %43
OpImageWrite %13 %44 %45 NonPrivateTexel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/image-load-store.vkmm.sm66.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

layout(set = 0, binding = 0, r32f) uniform image2D _9[];
layout(set = 0, binding = 0, r32f) uniform image2D _12[];
layout(set = 0, binding = 0) uniform texture2D _16[];

void main()
{
    imageStore(_9[0u], ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), vec4(imageLoad(_9[0u], ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y))).x + 1.0));
    imageStore(_12[1u], ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), vec4(imageLoad(_12[1u], ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y))).x + texelFetch(_16[2u], ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(0u)).x));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability RuntimeDescriptorArray
OpCapability VulkanMemoryModel
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %9 %12 %16 %20
OpExecutionMode %3 LocalSize 8 8 1
OpName %3 "main"
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %20 BuiltIn GlobalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypeRuntimeArray %6
%8 = OpTypePointer UniformConstant %7
%9 = OpVariable %8 UniformConstant
%10 = OpTypeRuntimeArray %6
%11 = OpTypePointer UniformConstant %10
%12 = OpVariable %11 UniformConstant
%13 = OpTypeImage %5 2D 0 0 0 1 Unknown
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer UniformConstant %14
%16 = OpVariable %15 UniformConstant
%17 = OpTypeInt 32 0
%18 = OpTypeVector %17 3
%19 = OpTypePointer Input %18
%20 = OpVariable %19 Input
%21 = OpTypePointer Input %17
%23 = OpConstant %17 0
%26 = OpConstant %17 1
%28 = OpTypePointer UniformConstant %6
%33 = OpTypePointer UniformConstant %13
%35 = OpConstant %17 2
%37 = OpTypeVector %5 4
%39 = OpTypeVector %17 2
%41 = OpConstant %17 5
%44 = OpConstant %5 1
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %56
%56 = OpLabel
%22 = OpAccessChain %21 %20 %23
%24 = OpLoad %17 %22
%25 = OpAccessChain %21 %20 %26
%27 = OpLoad %17 %25
%29 = OpAccessChain %28 %9 %23
%30 = OpLoad %6 %29
%31 = OpAccessChain %28 %12 %26
%32 = OpLoad %6 %31
%34 = OpAccessChain %33 %16 %35
%36 = OpLoad %13 %34
%40 = OpCompositeConstruct %39 %24 %27
%38 = OpImageRead %37 %30 %40 MakeTexelVisible|NonPrivateTexel %41
%42 = OpCompositeExtract %5 %38 0
%43 = OpFAdd %5 %42 %44
%45 = OpCompositeConstruct %39 %24 %27
%46 = OpCompositeConstruct %37 %43 %43 %43 %43
OpImageWrite %30 %45 %46 MakeTexelAvailable|NonPrivateTexel %41
%48 = OpCompositeConstruct %39 %24 %27
%47 = OpImageFetch %37 %36 %48 Lod %23
%49 = OpCompositeExtract %5 %47 0
%51 = OpCompositeConstruct %39 %24 %27
%50 = OpImageRead %37 %32 %51 NonPrivateTexel
%52 = OpCompositeExtract %5 %50 0
%53 = OpFAdd %5 %52 %49
%54 = OpCompositeConstruct %39 %24 %27
%55 = OpCompositeConstruct %37 %53 %53 %53 %53
OpImageWrite %32 %54 %55 NonPrivateTexel
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent-promotion.bindless.ssbo.vkmm.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 1, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _13[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _16_19
{
    uvec4 _m0[];
} _19[];

layout(set = 4, binding = 0, std430) buffer _21_24
{
    uvec4 _m0[];
} _24[];

layout(set = 4, binding = 0, std430) readonly buffer _26_29
{
    uvec4 _m0[];
} _29[];

layout(set = 4, binding = 0, std430) writeonly buffer _31_34
{
    uvec4 _m0[];
} _34[];

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _42[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    uint _65 = registers._m1 + 1u;
    _24[registers._m4]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_42[registers._m5]._m0[0u]).x == 0u))
    {
        uint _107 = 0u;
        uint _111;
        uint _114;
        bool _116;
        for (;;)
        {
            _111 = (_107 << 8u) + gl_LocalInvocationIndex;
            _114 = _13[registers._m1]._m0[_111];
            groupMemoryBarrier();
            barrier();
            _116 = _114 < 256u;
            if (_116)
            {
                vec4 _120 = uintBitsToFloat(_19[_65]._m0[_111]);
                vec4 _127 = uintBitsToFloat(_24[registers._m4]._m0[_114]);
                _24[registers._m4]._m0[_114] = uvec4(floatBitsToUint(_127.x + _120.x), floatBitsToUint(_127.y + _120.y), floatBitsToUint(_127.z + _120.z), floatBitsToUint(_127.w + _120.w));
            }
            uint _108 = _107 + 1u;
            if (_108 < floatBitsToUint(_42[registers._m5]._m0[0u]).x)
            {
                _107 = _108;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _96 = uintBitsToFloat(_29[registers._m4 + 1u]._m0[gl_LocalInvocationIndex]);
    _34[registers._m4 + 2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_96.x), floatBitsToUint(_96.y), floatBitsToUint(_96.z), floatBitsToUint(_96.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 157
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability VulkanMemoryModel
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Vulkan
OpEntryPoint GLCompute %3 "main" %8 %13 %19 %24 %29 %34 %42 %76
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %10 "SSBO"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpName %26 "SSBO"
OpName %31 "SSBO"
OpName %39 "BindlessCBV"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %9 ArrayStride 4
OpMemberDecorate %10 0 Offset 0
OpDecorate %10 Block
OpDecorate %13 DescriptorSet 1
OpDecorate %13 Binding 0
OpDecorate %13 NonWritable
OpDecorate %13 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 4
OpDecorate %24 Binding 0
OpDecorate %25 ArrayStride 16
OpMemberDecorate %26 0 Offset 0
OpDecorate %26 Block
OpDecorate %29 DescriptorSet 4
OpDecorate %29 Binding 0
OpDecorate %29 NonWritable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %34 DescriptorSet 4
OpDecorate %34 Binding 0
OpDecorate %34 NonReadable
OpDecorate %38 ArrayStride 16
OpDecorate %39 Block
OpMemberDecorate %39 0 Offset 0
OpDecorate %42 DescriptorSet 5
OpDecorate %42 Binding 0
OpDecorate %76 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeRuntimeArray %5
%10 = OpTypeStruct %9
%11 = OpTypeRuntimeArray %10
%12 = OpTypePointer StorageBuffer %11
%13 = OpVariable %12 StorageBuffer
%14 = OpTypeVector %5 4
%15 = OpTypeRuntimeArray %14
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %14
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeRuntimeArray %14
%26 = OpTypeStruct %25
%27 = OpTypeRuntimeArray %26
%28 = OpTypePointer StorageBuffer %27
%29 = OpVariable %28 StorageBuffer
%30 = OpTypeRuntimeArray %14
%31 = OpTypeStruct %30
%32 = OpTypeRuntimeArray %31
%33 = OpTypePointer StorageBuffer %32
%34 = OpVariable %33 StorageBuffer
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%37 = OpConstant %5 4096
%38 = OpTypeArray %36 %37
%39 = OpTypeStruct %38
%40 = OpTypeRuntimeArray %39
%41 = OpTypePointer Uniform %40
%42 = OpVariable %41 Uniform
%43 = OpTypePointer StorageBuffer %31
%45 = OpTypePointer PushConstant %5
%47 = OpConstant %5 4
%50 = OpConstant %5 2
%51 = OpTypePointer StorageBuffer %26
%56 = OpConstant %5 1
%57 = OpTypePointer StorageBuffer %21
%61 = OpTypePointer StorageBuffer %16
%66 = OpTypePointer StorageBuffer %10
%70 = OpTypePointer Uniform %39
%73 = OpConstant %5 5
%75 = OpTypePointer Input %5
%76 = OpVariable %75 Input
%78 = OpConstant %35 0
%84 = OpTypePointer StorageBuffer %14
%86 = OpConstant %5 0
%87 = OpTypePointer Uniform %36
%92 = OpTypeBool
%110 = OpConstant %5 8
%112 = OpTypePointer StorageBuffer %5
%115 = OpConstant %5 26696
%117 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %147
%147 = OpLabel
%46 = OpAccessChain %45 %8 %47
%48 = OpLoad %5 %46
%49 = OpIAdd %5 %48 %50
%44 = OpAccessChain %43 %34 %49
%53 = OpAccessChain %45 %8 %47
%54 = OpLoad %5 %53
%55 = OpIAdd %5 %54 %56
%52 = OpAccessChain %51 %29 %55
%59 = OpAccessChain %45 %8 %47
%60 = OpLoad %5 %59
%58 = OpAccessChain %57 %24 %60
%63 = OpAccessChain %45 %8 %56
%64 = OpLoad %5 %63
%65 = OpIAdd %5 %64 %56
%62 = OpAccessChain %61 %19 %65
%68 = OpAccessChain %45 %8 %56
%69 = OpLoad %5 %68
%67 = OpAccessChain %66 %13 %69
%72 = OpAccessChain %45 %8 %73
%74 = OpLoad %5 %72
%71 = OpAccessChain %70 %42 %74
%77 = OpLoad %5 %76
%79 = OpBitcast %5 %78
%80 = OpBitcast %5 %78
%81 = OpBitcast %5 %78
%82 = OpBitcast %5 %78
%83 = OpCompositeConstruct %14 %79 %80 %81 %82
%85 = OpAccessChain %84 %58 %86 %77
OpStore %85 %83 NonPrivatePointer
%88 = OpAccessChain %87 %71 %86 %86
%89 = OpLoad %36 %88
%90 = OpBitcast %14 %89
%91 = OpCompositeExtract %5 %90 0
%93 = OpIEqual %92 %91 %86
OpSelectionMerge %155 None
OpBranchConditional %93 %155 %148
%148 = OpLabel
OpBranch %149
%149 = OpLabel
%107 = OpPhi %5 %86 %148 %108 %153
%109 = OpShiftLeftLogical %5 %107 %110
%111 = OpIAdd %5 %109 %77
%113 = OpAccessChain %112 %67 %86 %111
%114 = OpLoad %5 %113
OpControlBarrier %50 %50 %115
%116 = OpULessThan %92 %114 %117
OpLoopMerge %154 %153 None
OpBranch %150
%150 = OpLabel
OpSelectionMerge %152 None
OpBranchConditional %116 %151 %152
%151 = OpLabel
%118 = OpAccessChain %84 %62 %86 %111
%119 = OpLoad %14 %118
%120 = OpBitcast %36 %119
%121 = OpCompositeExtract %35 %120 0
%122 = OpCompositeExtract %35 %120 1
%123 = OpCompositeExtract %35 %120 2
%124 = OpCompositeExtract %35 %120 3
%125 = OpAccessChain %84 %58 %86 %114
%126 = OpLoad %14 %125 NonPrivatePointer
%127 = OpBitcast %36 %126
%128 = OpCompositeExtract %35 %127 0
%129 = OpCompositeExtract %35 %127 1
%130 = OpCompositeExtract %35 %127 2
%131 = OpCompositeExtract %35 %127 3
%132 = OpFAdd %35 %128 %121
%133 = OpFAdd %35 %129 %122
%134 = OpFAdd %35 %130 %123
%135 = OpFAdd %35 %131 %124
%136 = OpBitcast %5 %132
%137 = OpBitcast %5 %133
%138 = OpBitcast %5 %134
%139 = OpBitcast %5 %135
%140 = OpCompositeConstruct %14 %136 %137 %138 %139
%141 = OpAccessChain %84 %58 %86 %114
OpStore %141 %140 NonPrivatePointer
OpBranch %152
%152 = OpLabel
OpBranch %153
%153 = OpLabel
%108 = OpIAdd %5 %107 %56
%142 = OpAccessChain %87 %71 %86 %86
%143 = OpLoad %36 %142
%144 = OpBitcast %14 %143
%145 = OpCompositeExtract %5 %144 0
%146 = OpULessThan %92 %108 %145
OpBranchConditional %146 %149 %154
%154 = OpLabel
OpBranch %155
%155 = OpLabel
%94 = OpAccessChain %84 %52 %86 %77
%95 = OpLoad %14 %94 NonPrivatePointer
%96 = OpBitcast %36 %95
%97 = OpCompositeExtract %35 %96 0
%98 = OpCompositeExtract %35 %96 1
%99 = OpCompositeExtract %35 %96 2
%100 = OpCompositeExtract %35 %96 3
%101 = OpBitcast %5 %97
%102 = OpBitcast %5 %98
%103 = OpBitcast %5 %99
%104 = OpBitcast %5 %100
%105 = OpCompositeConstruct %14 %101 %102 %103 %104
%106 = OpAccessChain %84 %44 %86 %77
OpStore %106 %105 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent-promotion.root-descriptor.ssbo.vkmm.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4Array;
layout(buffer_reference) buffer PhysicalPointerUint4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerUintNonWriteArray;

layout(buffer_reference, buffer_reference_align = 16, std430) buffer PhysicalPointerFloat4Array
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint4NonWriteCBVArray
{
    uvec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUintNonWriteArray
{
    uint value[];
};

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 2, std430) writeonly buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    PhysicalPointerFloat4Array(registers._m2).value[gl_LocalInvocationIndex] = vec4(0.0);
    if (!(PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x == 0u))
    {
        uint _70 = 0u;
        uint _74;
        uint _81;
        bool _83;
        for (;;)
        {
            _74 = (_70 << 8u) + gl_LocalInvocationIndex;
            _81 = PhysicalPointerUintNonWriteArray(registers._m1).value[_74];
            groupMemoryBarrier();
            barrier();
            _83 = _81 < 256u;
            if (_83)
            {
                vec4 _87 = uintBitsToFloat(_14._m0[_74]);
                PhysicalPointerFloat4Array _92 = PhysicalPointerFloat4Array(registers._m2);
                PhysicalPointerFloat4Array(registers._m2).value[_81] = vec4(_92.value[_81].x + _87.x, _92.value[_81].y + _87.y, _92.value[_81].z + _87.z, _92.value[_81].w + _87.w);
            }
            uint _71 = _70 + 1u;
            if (_71 < PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x)
            {
                _70 = _71;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    PhysicalPointerFloat4Array _56 = PhysicalPointerFloat4Array(registers._m3);
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_56.value[gl_LocalInvocationIndex].x), floatBitsToUint(_56.value[gl_LocalInvocationIndex].y), floatBitsToUint(_56.value[gl_LocalInvocationIndex].z), floatBitsToUint(_56.value[gl_LocalInvocationIndex].w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 121
; Schema: 0
OpCapability Shader
OpCapability VulkanMemoryModel
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Vulkan
OpEntryPoint GLCompute %3 "main" %9 %14 %18 %33
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %38 "PhysicalPointerFloat4Array"
OpMemberName %38 0 "value"
OpName %47 "PhysicalPointerUint4NonWriteCBVArray"
OpMemberName %47 0 "value"
OpName %76 "PhysicalPointerUintNonWriteArray"
OpMemberName %76 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonReadable
OpDecorate %33 BuiltIn LocalInvocationIndex
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %46 ArrayStride 16
OpMemberDecorate %47 0 Offset 0
OpDecorate %47 Block
OpMemberDecorate %47 0 NonWritable
OpDecorate %75 ArrayStride 4
OpMemberDecorate %76 0 Offset 0
OpDecorate %76 Block
OpMemberDecorate %76 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypePointer PushConstant %6
%21 = OpConstant %5 3
%24 = OpConstant %5 2
%27 = OpConstant %5 1
%30 = OpConstant %5 0
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%37 = OpTypeRuntimeArray %36
%38 = OpTypeStruct %37
%39 = OpTypePointer PhysicalStorageBuffer %38
%41 = OpTypePointer PhysicalStorageBuffer %36
%43 = OpConstant %35 0
%45 = OpConstant %5 4096
%46 = OpTypeArray %10 %45
%47 = OpTypeStruct %46
%48 = OpTypePointer PhysicalStorageBuffer %47
%50 = OpTypePointer PhysicalStorageBuffer %10
%54 = OpTypeBool
%68 = OpTypePointer StorageBuffer %10
%73 = OpConstant %5 8
%75 = OpTypeRuntimeArray %5
%76 = OpTypeStruct %75
%77 = OpTypePointer PhysicalStorageBuffer %76
%79 = OpTypePointer PhysicalStorageBuffer %5
%82 = OpConstant %5 26696
%84 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %111
%111 = OpLabel
%20 = OpAccessChain %19 %9 %21
%22 = OpLoad %6 %20
%23 = OpAccessChain %19 %9 %24
%25 = OpLoad %6 %23
%26 = OpAccessChain %19 %9 %27
%28 = OpLoad %6 %26
%29 = OpAccessChain %19 %9 %30
%31 = OpLoad %6 %29
%34 = OpLoad %5 %33
%40 = OpBitcast %39 %25
%42 = OpInBoundsAccessChain %41 %40 %30 %34
%44 = OpCompositeConstruct %36 %43 %43 %43 %43
OpStore %42 %44 Aligned|NonPrivatePointer 16
%49 = OpBitcast %48 %31
%51 = OpInBoundsAccessChain %50 %49 %30 %30
%52 = OpLoad %10 %51 Aligned 16
%53 = OpCompositeExtract %5 %52 0
%55 = OpIEqual %54 %53 %30
OpSelectionMerge %119 None
OpBranchConditional %55 %119 %112
%112 = OpLabel
OpBranch %113
%113 = OpLabel
%70 = OpPhi %5 %30 %112 %71 %117
%72 = OpShiftLeftLogical %5 %70 %73
%74 = OpIAdd %5 %72 %34
%78 = OpBitcast %77 %28
%80 = OpInBoundsAccessChain %79 %78 %30 %74
%81 = OpLoad %5 %80 Aligned 4
OpControlBarrier %24 %24 %82
%83 = OpULessThan %54 %81 %84
OpLoopMerge %118 %117 None
OpBranch %114
%114 = OpLabel
OpSelectionMerge %116 None
OpBranchConditional %83 %115 %116
%115 = OpLabel
%85 = OpAccessChain %68 %14 %30 %74
%86 = OpLoad %10 %85
%87 = OpBitcast %36 %86
%88 = OpCompositeExtract %35 %87 0
%89 = OpCompositeExtract %35 %87 1
%90 = OpCompositeExtract %35 %87 2
%91 = OpCompositeExtract %35 %87 3
%92 = OpBitcast %39 %25
%93 = OpInBoundsAccessChain %41 %92 %30 %81
%94 = OpLoad %36 %93 Aligned|NonPrivatePointer 16
%95 = OpCompositeExtract %35 %94 0
%96 = OpCompositeExtract %35 %94 1
%97 = OpCompositeExtract %35 %94 2
%98 = OpCompositeExtract %35 %94 3
%99 = OpFAdd %35 %95 %88
%100 = OpFAdd %35 %96 %89
%101 = OpFAdd %35 %97 %90
%102 = OpFAdd %35 %98 %91
%103 = OpBitcast %39 %25
%104 = OpInBoundsAccessChain %41 %103 %30 %81
%105 = OpCompositeConstruct %36 %99 %100 %101 %102
OpStore %104 %105 Aligned|NonPrivatePointer 16
OpBranch %116
%116 = OpLabel
OpBranch %117
%117 = OpLabel
%71 = OpIAdd %5 %70 %27
%106 = OpBitcast %48 %31
%107 = OpInBoundsAccessChain %50 %106 %30 %30
%108 = OpLoad %10 %107 Aligned 16
%109 = OpCompositeExtract %5 %108 0
%110 = OpULessThan %54 %71 %109
OpBranchConditional %110 %113 %118
%118 = OpLabel
OpBranch %119
%119 = OpLabel
%56 = OpBitcast %39 %22
%57 = OpInBoundsAccessChain %41 %56 %30 %34
%58 = OpLoad %36 %57 Aligned|NonPrivatePointer 16
%59 = OpCompositeExtract %35 %58 0
%60 = OpCompositeExtract %35 %58 1
%61 = OpCompositeExtract %35 %58 2
%62 = OpCompositeExtract %35 %58 3
%63 = OpBitcast %5 %59
%64 = OpBitcast %5 %60
%65 = OpBitcast %5 %61
%66 = OpBitcast %5 %62
%67 = OpCompositeConstruct %10 %63 %64 %65 %66
%69 = OpAccessChain %68 %18 %30 %34
OpStore %69 %67 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent-promotion.sm66.bindless.ssbo.vkmm.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _14[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _16_19
{
    uint _m0[];
} _19[];

layout(set = 1, binding = 0, std430) restrict readonly buffer _21_24
{
    uvec4 _m0[];
} _24[];

layout(set = 5, binding = 0, std140) uniform BindlessCBV
{
    vec4 _m0[4096];
} _32[];

layout(push_constant, std430) uniform RootConstants
{
    uint _m0;
    uint _m1;
    uint _m2;
    uint _m3;
    uint _m4;
    uint _m5;
    uint _m6;
    uint _m7;
} registers;

void main()
{
    _14[0u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_32[registers._m5]._m0[0u]).x == 0u))
    {
        uint _77 = 0u;
        uint _81;
        uint _88;
        bool _90;
        for (;;)
        {
            _81 = (_77 << 8u) + gl_LocalInvocationIndex;
            _88 = _19[registers._m1]._m0[_81];
            groupMemoryBarrier();
            barrier();
            _90 = _88 < 256u;
            if (_90)
            {
                vec4 _99 = uintBitsToFloat(_24[registers._m1 + 1u]._m0[_81]);
                vec4 _106 = uintBitsToFloat(_14[0u]._m0[_88]);
                _14[0u]._m0[_88] = uvec4(floatBitsToUint(_106.x + _99.x), floatBitsToUint(_106.y + _99.y), floatBitsToUint(_106.z + _99.z), floatBitsToUint(_106.w + _99.w));
            }
            uint _78 = _77 + 1u;
            if (_78 < floatBitsToUint(_32[registers._m5]._m0[0u]).x)
            {
                _77 = _78;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _66 = uintBitsToFloat(_14[1u]._m0[gl_LocalInvocationIndex]);
    _14[2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_66.x), floatBitsToUint(_66.y), floatBitsToUint(_66.z), floatBitsToUint(_66.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 136
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability VulkanMemoryModel
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Vulkan
OpEntryPoint GLCompute %3 "main" %8 %14 %19 %24 %32 %40
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %11 "SSBO"
OpName %16 "SSBO"
OpName %21 "SSBO"
OpName %29 "BindlessCBV"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
OpMemberDecorate %6 2 Offset 8
OpMemberDecorate %6 3 Offset 12
OpMemberDecorate %6 4 Offset 16
OpMemberDecorate %6 5 Offset 20
OpMemberDecorate %6 6 Offset 24
OpMemberDecorate %6 7 Offset 28
OpDecorate %10 ArrayStride 16
OpMemberDecorate %11 0 Offset 0
OpDecorate %11 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 0
OpDecorate %15 ArrayStride 4
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %19 DescriptorSet 1
OpDecorate %19 Binding 0
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %20 ArrayStride 16
OpMemberDecorate %21 0 Offset 0
OpDecorate %21 Block
OpDecorate %24 DescriptorSet 1
OpDecorate %24 Binding 0
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %28 ArrayStride 16
OpDecorate %29 Block
OpMemberDecorate %29 0 Offset 0
OpDecorate %32 DescriptorSet 5
OpDecorate %32 Binding 0
OpDecorate %40 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
%7 = OpTypePointer PushConstant %6
%8 = OpVariable %7 PushConstant
%9 = OpTypeVector %5 4
%10 = OpTypeRuntimeArray %9
%11 = OpTypeStruct %10
%12 = OpTypeRuntimeArray %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %5
%16 = OpTypeStruct %15
%17 = OpTypeRuntimeArray %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpTypeRuntimeArray %9
%21 = OpTypeStruct %20
%22 = OpTypeRuntimeArray %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpTypeFloat 32
%26 = OpTypeVector %25 4
%27 = OpConstant %5 4096
%28 = OpTypeArray %26 %27
%29 = OpTypeStruct %28
%30 = OpTypeRuntimeArray %29
%31 = OpTypePointer Uniform %30
%32 = OpVariable %31 Uniform
%33 = OpTypePointer Uniform %29
%35 = OpTypePointer PushConstant %5
%37 = OpConstant %5 5
%39 = OpTypePointer Input %5
%40 = OpVariable %39 Input
%42 = OpTypePointer StorageBuffer %11
%44 = OpConstant %5 0
%46 = OpConstant %5 1
%48 = OpConstant %5 2
%49 = OpConstant %25 0
%55 = OpTypePointer StorageBuffer %9
%57 = OpTypePointer Uniform %26
%62 = OpTypeBool
%80 = OpConstant %5 8
%82 = OpTypePointer StorageBuffer %16
%86 = OpTypePointer StorageBuffer %5
%89 = OpConstant %5 26696
%91 = OpConstant %5 256
%92 = OpTypePointer StorageBuffer %21
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %126
%126 = OpLabel
%36 = OpAccessChain %35 %8 %37
%38 = OpLoad %5 %36
%34 = OpAccessChain %33 %32 %38
%41 = OpLoad %5 %40
%43 = OpAccessChain %42 %14 %44
%45 = OpAccessChain %42 %14 %46
%47 = OpAccessChain %42 %14 %48
%50 = OpBitcast %5 %49
%51 = OpBitcast %5 %49
%52 = OpBitcast %5 %49
%53 = OpBitcast %5 %49
%54 = OpCompositeConstruct %9 %50 %51 %52 %53
%56 = OpAccessChain %55 %43 %44 %41
OpStore %56 %54 NonPrivatePointer
%58 = OpAccessChain %57 %34 %44 %44
%59 = OpLoad %26 %58
%60 = OpBitcast %9 %59
%61 = OpCompositeExtract %5 %60 0
%63 = OpIEqual %62 %61 %44
OpSelectionMerge %134 None
OpBranchConditional %63 %134 %127
%127 = OpLabel
OpBranch %128
%128 = OpLabel
%77 = OpPhi %5 %44 %127 %78 %132
%79 = OpShiftLeftLogical %5 %77 %80
%81 = OpIAdd %5 %79 %41
%84 = OpAccessChain %35 %8 %46
%85 = OpLoad %5 %84
%83 = OpAccessChain %82 %19 %85
%87 = OpAccessChain %86 %83 %44 %81
%88 = OpLoad %5 %87
OpControlBarrier %48 %48 %89
%90 = OpULessThan %62 %88 %91
OpLoopMerge %133 %132 None
OpBranch %129
%129 = OpLabel
OpSelectionMerge %131 None
OpBranchConditional %90 %130 %131
%130 = OpLabel
%94 = OpAccessChain %35 %8 %46
%95 = OpLoad %5 %94
%96 = OpIAdd %5 %95 %46
%93 = OpAccessChain %92 %24 %96
%97 = OpAccessChain %55 %93 %44 %81
%98 = OpLoad %9 %97
%99 = OpBitcast %26 %98
%100 = OpCompositeExtract %25 %99 0
%101 = OpCompositeExtract %25 %99 1
%102 = OpCompositeExtract %25 %99 2
%103 = OpCompositeExtract %25 %99 3
%104 = OpAccessChain %55 %43 %44 %88
%105 = OpLoad %9 %104 NonPrivatePointer
%106 = OpBitcast %26 %105
%107 = OpCompositeExtract %25 %106 0
%108 = OpCompositeExtract %25 %106 1
%109 = OpCompositeExtract %25 %106 2
%110 = OpCompositeExtract %25 %106 3
%111 = OpFAdd %25 %107 %100
%112 = OpFAdd %25 %108 %101
%113 = OpFAdd %25 %109 %102
%114 = OpFAdd %25 %110 %103
%115 = OpBitcast %5 %111
%116 = OpBitcast %5 %112
%117 = OpBitcast %5 %113
%118 = OpBitcast %5 %114
%119 = OpCompositeConstruct %9 %115 %116 %117 %118
%120 = OpAccessChain %55 %43 %44 %88
OpStore %120 %119 NonPrivatePointer
OpBranch %131
%131 = OpLabel
OpBranch %132
%132 = OpLabel
%78 = OpIAdd %5 %77 %46
%121 = OpAccessChain %57 %34 %44 %44
%122 = OpLoad %26 %121
%123 = OpBitcast %9 %122
%124 = OpCompositeExtract %5 %123 0
%125 = OpULessThan %62 %78 %124
OpBranchConditional %125 %128 %133
%133 = OpLabel
OpBranch %134
%134 = OpLabel
%64 = OpAccessChain %55 %45 %44 %41
%65 = OpLoad %9 %64 NonPrivatePointer
%66 = OpBitcast %26 %65
%67 = OpCompositeExtract %25 %66 0
%68 = OpCompositeExtract %25 %66 1
%69 = OpCompositeExtract %25 %66 2
%70 = OpCompositeExtract %25 %66 3
%71 = OpBitcast %5 %67
%72 = OpBitcast %5 %68
%73 = OpBitcast %5 %69
%74 = OpBitcast %5 %70
%75 = OpCompositeConstruct %9 %71 %72 %73 %74
%76 = OpAccessChain %55 %47 %44 %41
OpStore %76 %75 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent-promotion.sm66.ssbo.vkmm.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _11[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _13_15
{
    uint _m0[];
} _15;

layout(set = 0, binding = 1, std430) restrict readonly buffer _17_19
{
    uvec4 _m0[];
} _19;

layout(set = 0, binding = 0, std140) uniform _24_26
{
    vec4 _m0[1];
} _26;

void main()
{
    _11[0u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_26._m0[0u]).x == 0u))
    {
        uint _64 = 0u;
        uint _68;
        uint _71;
        bool _73;
        for (;;)
        {
            _68 = (_64 << 8u) + gl_LocalInvocationIndex;
            _71 = _15._m0[_68];
            groupMemoryBarrier();
            barrier();
            _73 = _71 < 256u;
            if (_73)
            {
                vec4 _77 = uintBitsToFloat(_19._m0[_68]);
                vec4 _84 = uintBitsToFloat(_11[0u]._m0[_71]);
                _11[0u]._m0[_71] = uvec4(floatBitsToUint(_84.x + _77.x), floatBitsToUint(_84.y + _77.y), floatBitsToUint(_84.z + _77.z), floatBitsToUint(_84.w + _77.w));
            }
            uint _65 = _64 + 1u;
            if (_65 < floatBitsToUint(_26._m0[0u]).x)
            {
                _64 = _65;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _53 = uintBitsToFloat(_11[1u]._m0[gl_LocalInvocationIndex]);
    _11[2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_53.x), floatBitsToUint(_53.y), floatBitsToUint(_53.z), floatBitsToUint(_53.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 114
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability VulkanMemoryModel
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %11 %15 %19 %26 %28
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "SSBO"
OpName %17 "SSBO"
OpName %24 ""
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %12 ArrayStride 4
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %15 DescriptorSet 0
OpDecorate %15 Binding 0
OpDecorate %15 NonWritable
OpDecorate %15 Restrict
OpDecorate %16 ArrayStride 16
OpMemberDecorate %17 0 Offset 0
OpDecorate %17 Block
OpDecorate %19 DescriptorSet 0
OpDecorate %19 Binding 1
OpDecorate %19 NonWritable
OpDecorate %19 Restrict
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 0
OpDecorate %28 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %5
%13 = OpTypeStruct %12
%14 = OpTypePointer StorageBuffer %13
%15 = OpVariable %14 StorageBuffer
%16 = OpTypeRuntimeArray %6
%17 = OpTypeStruct %16
%18 = OpTypePointer StorageBuffer %17
%19 = OpVariable %18 StorageBuffer
%20 = OpConstant %5 1
%21 = OpTypeFloat 32
%22 = OpTypeVector %21 4
%23 = OpTypeArray %22 %20
%24 = OpTypeStruct %23
%25 = OpTypePointer Uniform %24
%26 = OpVariable %25 Uniform
%27 = OpTypePointer Input %5
%28 = OpVariable %27 Input
%30 = OpTypePointer StorageBuffer %8
%32 = OpConstant %5 0
%35 = OpConstant %5 2
%36 = OpConstant %21 0
%42 = OpTypePointer StorageBuffer %6
%44 = OpTypePointer Uniform %22
%49 = OpTypeBool
%67 = OpConstant %5 8
%69 = OpTypePointer StorageBuffer %5
%72 = OpConstant %5 26696
%74 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %104
%104 = OpLabel
%29 = OpLoad %5 %28
%31 = OpAccessChain %30 %11 %32
%33 = OpAccessChain %30 %11 %20
%34 = OpAccessChain %30 %11 %35
%37 = OpBitcast %5 %36
%38 = OpBitcast %5 %36
%39 = OpBitcast %5 %36
%40 = OpBitcast %5 %36
%41 = OpCompositeConstruct %6 %37 %38 %39 %40
%43 = OpAccessChain %42 %31 %32 %29
OpStore %43 %41 NonPrivatePointer
%45 = OpAccessChain %44 %26 %32 %32
%46 = OpLoad %22 %45
%47 = OpBitcast %6 %46
%48 = OpCompositeExtract %5 %47 0
%50 = OpIEqual %49 %48 %32
OpSelectionMerge %112 None
OpBranchConditional %50 %112 %105
%105 = OpLabel
OpBranch %106
%106 = OpLabel
%64 = OpPhi %5 %32 %105 %65 %110
%66 = OpShiftLeftLogical %5 %64 %67
%68 = OpIAdd %5 %66 %29
%70 = OpAccessChain %69 %15 %32 %68
%71 = OpLoad %5 %70
OpControlBarrier %35 %35 %72
%73 = OpULessThan %49 %71 %74
OpLoopMerge %111 %110 None
OpBranch %107
%107 = OpLabel
OpSelectionMerge %109 None
OpBranchConditional %73 %108 %109
%108 = OpLabel
%75 = OpAccessChain %42 %19 %32 %68
%76 = OpLoad %6 %75
%77 = OpBitcast %22 %76
%78 = OpCompositeExtract %21 %77 0
%79 = OpCompositeExtract %21 %77 1
%80 = OpCompositeExtract %21 %77 2
%81 = OpCompositeExtract %21 %77 3
%82 = OpAccessChain %42 %31 %32 %71
%83 = OpLoad %6 %82 NonPrivatePointer
%84 = OpBitcast %22 %83
%85 = OpCompositeExtract %21 %84 0
%86 = OpCompositeExtract %21 %84 1
%87 = OpCompositeExtract %21 %84 2
%88 = OpCompositeExtract %21 %84 3
%89 = OpFAdd %21 %85 %78
%90 = OpFAdd %21 %86 %79
%91 = OpFAdd %21 %87 %80
%92 = OpFAdd %21 %88 %81
%93 = OpBitcast %5 %89
%94 = OpBitcast %5 %90
%95 = OpBitcast %5 %91
%96 = OpBitcast %5 %92
%97 = OpCompositeConstruct %6 %93 %94 %95 %96
%98 = OpAccessChain %42 %31 %32 %71
OpStore %98 %97 NonPrivatePointer
OpBranch %109
%109 = OpLabel
OpBranch %110
%110 = OpLabel
%65 = OpIAdd %5 %64 %20
%99 = OpAccessChain %44 %26 %32 %32
%100 = OpLoad %22 %99
%101 = OpBitcast %6 %100
%102 = OpCompositeExtract %5 %101 0
%103 = OpULessThan %49 %65 %102
OpBranchConditional %103 %106 %111
%111 = OpLabel
OpBranch %112
%112 = OpLabel
%51 = OpAccessChain %42 %33 %32 %29
%52 = OpLoad %6 %51 NonPrivatePointer
%53 = OpBitcast %22 %52
%54 = OpCompositeExtract %21 %53 0
%55 = OpCompositeExtract %21 %53 1
%56 = OpCompositeExtract %21 %53 2
%57 = OpCompositeExtract %21 %53 3
%58 = OpBitcast %5 %54
%59 = OpBitcast %5 %55
%60 = OpBitcast %5 %56
%61 = OpBitcast %5 %57
%62 = OpCompositeConstruct %6 %58 %59 %60 %61
%63 = OpAccessChain %42 %34 %32 %29
OpStore %63 %62 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent-promotion.ssbo.vkmm.comp
================================================
#version 460
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _12_14
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 0, std430) buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(set = 0, binding = 1, std430) readonly buffer _20_22
{
    uvec4 _m0[];
} _22;

layout(set = 0, binding = 2, std430) writeonly buffer _24_26
{
    uvec4 _m0[];
} _26;

layout(set = 0, binding = 0, std140) uniform _31_33
{
    vec4 _m0[1];
} _33;

void main()
{
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_33._m0[0u]).x == 0u))
    {
        uint _66 = 0u;
        uint _70;
        uint _73;
        bool _76;
        for (;;)
        {
            _70 = (_66 << 8u) + gl_LocalInvocationIndex;
            _73 = _9._m0[_70];
            groupMemoryBarrier();
            barrier();
            _76 = _73 < 256u;
            if (_76)
            {
                vec4 _80 = uintBitsToFloat(_14._m0[_70]);
                vec4 _87 = uintBitsToFloat(_18._m0[_73]);
                _18._m0[_73] = uvec4(floatBitsToUint(_87.x + _80.x), floatBitsToUint(_87.y + _80.y), floatBitsToUint(_87.z + _80.z), floatBitsToUint(_87.w + _80.w));
            }
            uint _67 = _66 + 1u;
            if (_67 < floatBitsToUint(_33._m0[0u]).x)
            {
                _66 = _67;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _55 = uintBitsToFloat(_22._m0[gl_LocalInvocationIndex]);
    _26._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_55.x), floatBitsToUint(_55.y), floatBitsToUint(_55.z), floatBitsToUint(_55.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 117
; Schema: 0
OpCapability Shader
OpCapability VulkanMemoryModel
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %9 %14 %18 %22 %26 %33 %35
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %24 "SSBO"
OpName %31 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 1
OpDecorate %22 NonWritable
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 2
OpDecorate %26 NonReadable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %35 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %10
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %10
%24 = OpTypeStruct %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpConstant %5 1
%28 = OpTypeFloat 32
%29 = OpTypeVector %28 4
%30 = OpTypeArray %29 %27
%31 = OpTypeStruct %30
%32 = OpTypePointer Uniform %31
%33 = OpVariable %32 Uniform
%34 = OpTypePointer Input %5
%35 = OpVariable %34 Input
%37 = OpConstant %28 0
%43 = OpTypePointer StorageBuffer %10
%45 = OpConstant %5 0
%46 = OpTypePointer Uniform %29
%51 = OpTypeBool
%69 = OpConstant %5 8
%71 = OpTypePointer StorageBuffer %5
%74 = OpConstant %5 2
%75 = OpConstant %5 26696
%77 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %107
%107 = OpLabel
%36 = OpLoad %5 %35
%38 = OpBitcast %5 %37
%39 = OpBitcast %5 %37
%40 = OpBitcast %5 %37
%41 = OpBitcast %5 %37
%42 = OpCompositeConstruct %10 %38 %39 %40 %41
%44 = OpAccessChain %43 %18 %45 %36
OpStore %44 %42 NonPrivatePointer
%47 = OpAccessChain %46 %33 %45 %45
%48 = OpLoad %29 %47
%49 = OpBitcast %10 %48
%50 = OpCompositeExtract %5 %49 0
%52 = OpIEqual %51 %50 %45
OpSelectionMerge %115 None
OpBranchConditional %52 %115 %108
%108 = OpLabel
OpBranch %109
%109 = OpLabel
%66 = OpPhi %5 %45 %108 %67 %113
%68 = OpShiftLeftLogical %5 %66 %69
%70 = OpIAdd %5 %68 %36
%72 = OpAccessChain %71 %9 %45 %70
%73 = OpLoad %5 %72
OpControlBarrier %74 %74 %75
%76 = OpULessThan %51 %73 %77
OpLoopMerge %114 %113 None
OpBranch %110
%110 = OpLabel
OpSelectionMerge %112 None
OpBranchConditional %76 %111 %112
%111 = OpLabel
%78 = OpAccessChain %43 %14 %45 %70
%79 = OpLoad %10 %78
%80 = OpBitcast %29 %79
%81 = OpCompositeExtract %28 %80 0
%82 = OpCompositeExtract %28 %80 1
%83 = OpCompositeExtract %28 %80 2
%84 = OpCompositeExtract %28 %80 3
%85 = OpAccessChain %43 %18 %45 %73
%86 = OpLoad %10 %85 NonPrivatePointer
%87 = OpBitcast %29 %86
%88 = OpCompositeExtract %28 %87 0
%89 = OpCompositeExtract %28 %87 1
%90 = OpCompositeExtract %28 %87 2
%91 = OpCompositeExtract %28 %87 3
%92 = OpFAdd %28 %88 %81
%93 = OpFAdd %28 %89 %82
%94 = OpFAdd %28 %90 %83
%95 = OpFAdd %28 %91 %84
%96 = OpBitcast %5 %92
%97 = OpBitcast %5 %93
%98 = OpBitcast %5 %94
%99 = OpBitcast %5 %95
%100 = OpCompositeConstruct %10 %96 %97 %98 %99
%101 = OpAccessChain %43 %18 %45 %73
OpStore %101 %100 NonPrivatePointer
OpBranch %112
%112 = OpLabel
OpBranch %113
%113 = OpLabel
%67 = OpIAdd %5 %66 %27
%102 = OpAccessChain %46 %33 %45 %45
%103 = OpLoad %29 %102
%104 = OpBitcast %10 %103
%105 = OpCompositeExtract %5 %104 0
%106 = OpULessThan %51 %67 %105
OpBranchConditional %106 %109 %114
%114 = OpLabel
OpBranch %115
%115 = OpLabel
%53 = OpAccessChain %43 %22 %45 %36
%54 = OpLoad %10 %53 NonPrivatePointer
%55 = OpBitcast %29 %54
%56 = OpCompositeExtract %28 %55 0
%57 = OpCompositeExtract %28 %55 1
%58 = OpCompositeExtract %28 %55 2
%59 = OpCompositeExtract %28 %55 3
%60 = OpBitcast %5 %56
%61 = OpBitcast %5 %57
%62 = OpBitcast %5 %58
%63 = OpBitcast %5 %59
%64 = OpCompositeConstruct %10 %60 %61 %62 %63
%65 = OpAccessChain %43 %26 %45 %36
OpStore %65 %64 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent.root-descriptor.ssbo.vkmm.comp
================================================
#version 460
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray;
layout(buffer_reference) buffer PhysicalPointerUint4NonWriteCBVArray;
layout(buffer_reference) buffer PhysicalPointerFloat4Array;
layout(buffer_reference) buffer PhysicalPointerUintNonWriteArray;

layout(buffer_reference, buffer_reference_align = 16, std430) buffer PhysicalPointerFloat4CoherentArray
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint4NonWriteCBVArray
{
    uvec4 value[4096];
};

layout(buffer_reference, buffer_reference_align = 16, std430) buffer PhysicalPointerFloat4Array
{
    vec4 value[];
};

layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUintNonWriteArray
{
    uint value[];
};

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 2, std430) writeonly buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(push_constant, std430) uniform RootConstants
{
    uvec2 _m0;
    uvec2 _m1;
    uvec2 _m2;
    uvec2 _m3;
} registers;

void main()
{
    PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_LocalInvocationIndex] = vec4(0.0);
    if (!(PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x == 0u))
    {
        uint _74 = 0u;
        uint _78;
        uint _85;
        bool _87;
        for (;;)
        {
            _78 = (_74 << 8u) + gl_LocalInvocationIndex;
            _85 = PhysicalPointerUintNonWriteArray(registers._m1).value[_78];
            groupMemoryBarrier();
            barrier();
            _87 = _85 < 256u;
            if (_87)
            {
                vec4 _91 = uintBitsToFloat(_14._m0[_78]);
                PhysicalPointerFloat4CoherentArray _96 = PhysicalPointerFloat4CoherentArray(registers._m2);
                PhysicalPointerFloat4CoherentArray(registers._m2).value[_85] = vec4(_96.value[_85].x + _91.x, _96.value[_85].y + _91.y, _96.value[_85].z + _91.z, _96.value[_85].w + _91.w);
            }
            uint _75 = _74 + 1u;
            if (_75 < PhysicalPointerUint4NonWriteCBVArray(registers._m0).value[0u].x)
            {
                _74 = _75;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    PhysicalPointerFloat4Array _60 = PhysicalPointerFloat4Array(registers._m3);
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_60.value[gl_LocalInvocationIndex].x), floatBitsToUint(_60.value[gl_LocalInvocationIndex].y), floatBitsToUint(_60.value[gl_LocalInvocationIndex].z), floatBitsToUint(_60.value[gl_LocalInvocationIndex].w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 125
; Schema: 0
OpCapability Shader
OpCapability VulkanMemoryModel
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Vulkan
OpEntryPoint GLCompute %3 "main" %9 %14 %18 %33
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "RootConstants"
OpName %9 "registers"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %38 "PhysicalPointerFloat4CoherentArray"
OpMemberName %38 0 "value"
OpName %48 "PhysicalPointerUint4NonWriteCBVArray"
OpMemberName %48 0 "value"
OpName %58 "PhysicalPointerFloat4Array"
OpMemberName %58 0 "value"
OpName %80 "PhysicalPointerUintNonWriteArray"
OpMemberName %80 0 "value"
OpDecorate %7 Block
OpMemberDecorate %7 0 Offset 0
OpMemberDecorate %7 1 Offset 8
OpMemberDecorate %7 2 Offset 16
OpMemberDecorate %7 3 Offset 24
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 2
OpDecorate %18 NonReadable
OpDecorate %33 BuiltIn LocalInvocationIndex
OpDecorate %37 ArrayStride 16
OpMemberDecorate %38 0 Offset 0
OpDecorate %38 Block
OpDecorate %47 ArrayStride 16
OpMemberDecorate %48 0 Offset 0
OpDecorate %48 Block
OpMemberDecorate %48 0 NonWritable
OpDecorate %57 ArrayStride 16
OpMemberDecorate %58 0 Offset 0
OpDecorate %58 Block
OpDecorate %79 ArrayStride 4
OpMemberDecorate %80 0 Offset 0
OpDecorate %80 Block
OpMemberDecorate %80 0 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 2
%7 = OpTypeStruct %6 %6 %6 %6
%8 = OpTypePointer PushConstant %7
%9 = OpVariable %8 PushConstant
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypePointer PushConstant %6
%21 = OpConstant %5 3
%24 = OpConstant %5 2
%27 = OpConstant %5 1
%30 = OpConstant %5 0
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypeFloat 32
%36 = OpTypeVector %35 4
%37 = OpTypeRuntimeArray %36
%38 = OpTypeStruct %37
%39 = OpTypePointer PhysicalStorageBuffer %38
%41 = OpTypePointer PhysicalStorageBuffer %36
%43 = OpConstant %35 0
%45 = OpConstant %5 5
%46 = OpConstant %5 4096
%47 = OpTypeArray %10 %46
%48 = OpTypeStruct %47
%49 = OpTypePointer PhysicalStorageBuffer %48
%51 = OpTypePointer PhysicalStorageBuffer %10
%55 = OpTypeBool
%57 = OpTypeRuntimeArray %36
%58 = OpTypeStruct %57
%59 = OpTypePointer PhysicalStorageBuffer %58
%72 = OpTypePointer StorageBuffer %10
%77 = OpConstant %5 8
%79 = OpTypeRuntimeArray %5
%80 = OpTypeStruct %79
%81 = OpTypePointer PhysicalStorageBuffer %80
%83 = OpTypePointer PhysicalStorageBuffer %5
%86 = OpConstant %5 26696
%88 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %115
%115 = OpLabel
%20 = OpAccessChain %19 %9 %21
%22 = OpLoad %6 %20
%23 = OpAccessChain %19 %9 %24
%25 = OpLoad %6 %23
%26 = OpAccessChain %19 %9 %27
%28 = OpLoad %6 %26
%29 = OpAccessChain %19 %9 %30
%31 = OpLoad %6 %29
%34 = OpLoad %5 %33
%40 = OpBitcast %39 %25
%42 = OpInBoundsAccessChain %41 %40 %30 %34
%44 = OpCompositeConstruct %36 %43 %43 %43 %43
OpStore %42 %44 Aligned|MakePointerAvailable|NonPrivatePointer 16 %45
%50 = OpBitcast %49 %31
%52 = OpInBoundsAccessChain %51 %50 %30 %30
%53 = OpLoad %10 %52 Aligned 16
%54 = OpCompositeExtract %5 %53 0
%56 = OpIEqual %55 %54 %30
OpSelectionMerge %123 None
OpBranchConditional %56 %123 %116
%116 = OpLabel
OpBranch %117
%117 = OpLabel
%74 = OpPhi %5 %30 %116 %75 %121
%76 = OpShiftLeftLogical %5 %74 %77
%78 = OpIAdd %5 %76 %34
%82 = OpBitcast %81 %28
%84 = OpInBoundsAccessChain %83 %82 %30 %78
%85 = OpLoad %5 %84 Aligned 4
OpControlBarrier %24 %24 %86
%87 = OpULessThan %55 %85 %88
OpLoopMerge %122 %121 None
OpBranch %118
%118 = OpLabel
OpSelectionMerge %120 None
OpBranchConditional %87 %119 %120
%119 = OpLabel
%89 = OpAccessChain %72 %14 %30 %78
%90 = OpLoad %10 %89
%91 = OpBitcast %36 %90
%92 = OpCompositeExtract %35 %91 0
%93 = OpCompositeExtract %35 %91 1
%94 = OpCompositeExtract %35 %91 2
%95 = OpCompositeExtract %35 %91 3
%96 = OpBitcast %39 %25
%97 = OpInBoundsAccessChain %41 %96 %30 %85
%98 = OpLoad %36 %97 Aligned|MakePointerVisible|NonPrivatePointer 16 %45
%99 = OpCompositeExtract %35 %98 0
%100 = OpCompositeExtract %35 %98 1
%101 = OpCompositeExtract %35 %98 2
%102 = OpCompositeExtract %35 %98 3
%103 = OpFAdd %35 %99 %92
%104 = OpFAdd %35 %100 %93
%105 = OpFAdd %35 %101 %94
%106 = OpFAdd %35 %102 %95
%107 = OpBitcast %39 %25
%108 = OpInBoundsAccessChain %41 %107 %30 %85
%109 = OpCompositeConstruct %36 %103 %104 %105 %106
OpStore %108 %109 Aligned|MakePointerAvailable|NonPrivatePointer 16 %45
OpBranch %120
%120 = OpLabel
OpBranch %121
%121 = OpLabel
%75 = OpIAdd %5 %74 %27
%110 = OpBitcast %49 %31
%111 = OpInBoundsAccessChain %51 %110 %30 %30
%112 = OpLoad %10 %111 Aligned 16
%113 = OpCompositeExtract %5 %112 0
%114 = OpULessThan %55 %75 %113
OpBranchConditional %114 %117 %122
%122 = OpLabel
OpBranch %123
%123 = OpLabel
%60 = OpBitcast %59 %22
%61 = OpInBoundsAccessChain %41 %60 %30 %34
%62 = OpLoad %36 %61 Aligned|NonPrivatePointer 16
%63 = OpCompositeExtract %35 %62 0
%64 = OpCompositeExtract %35 %62 1
%65 = OpCompositeExtract %35 %62 2
%66 = OpCompositeExtract %35 %62 3
%67 = OpBitcast %5 %63
%68 = OpBitcast %5 %64
%69 = OpBitcast %5 %65
%70 = OpBitcast %5 %66
%71 = OpCompositeConstruct %10 %67 %68 %69 %70
%73 = OpAccessChain %72 %18 %30 %34
OpStore %73 %71 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent.sm66.ssbo.vkmm.comp
================================================
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uvec4 _m0[];
} _11[];

layout(set = 0, binding = 0, std430) buffer _13_16
{
    uvec4 _m0[];
} _16[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _18_20
{
    uint _m0[];
} _20;

layout(set = 0, binding = 1, std430) restrict readonly buffer _22_24
{
    uvec4 _m0[];
} _24;

layout(set = 0, binding = 0, std140) uniform _29_31
{
    vec4 _m0[1];
} _31;

void main()
{
    _11[0u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_31._m0[0u]).x == 0u))
    {
        uint _71 = 0u;
        uint _75;
        uint _78;
        bool _80;
        for (;;)
        {
            _75 = (_71 << 8u) + gl_LocalInvocationIndex;
            _78 = _20._m0[_75];
            groupMemoryBarrier();
            barrier();
            _80 = _78 < 256u;
            if (_80)
            {
                vec4 _84 = uintBitsToFloat(_24._m0[_75]);
                vec4 _91 = uintBitsToFloat(_11[0u]._m0[_78]);
                _11[0u]._m0[_78] = uvec4(floatBitsToUint(_91.x + _84.x), floatBitsToUint(_91.y + _84.y), floatBitsToUint(_91.z + _84.z), floatBitsToUint(_91.w + _84.w));
            }
            uint _72 = _71 + 1u;
            if (_72 < floatBitsToUint(_31._m0[0u]).x)
            {
                _71 = _72;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _60 = uintBitsToFloat(_16[1u]._m0[gl_LocalInvocationIndex]);
    _16[2u]._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_60.x), floatBitsToUint(_60.y), floatBitsToUint(_60.z), floatBitsToUint(_60.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 121
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
OpCapability VulkanMemoryModel
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %11 %16 %20 %24 %31 %33
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %8 "SSBO"
OpName %13 "SSBO"
OpName %18 "SSBO"
OpName %22 "SSBO"
OpName %29 ""
OpDecorate %7 ArrayStride 16
OpMemberDecorate %8 0 Offset 0
OpDecorate %8 Block
OpDecorate %11 DescriptorSet 0
OpDecorate %11 Binding 0
OpDecorate %12 ArrayStride 16
OpMemberDecorate %13 0 Offset 0
OpDecorate %13 Block
OpDecorate %16 DescriptorSet 0
OpDecorate %16 Binding 0
OpDecorate %17 ArrayStride 4
OpMemberDecorate %18 0 Offset 0
OpDecorate %18 Block
OpDecorate %20 DescriptorSet 0
OpDecorate %20 Binding 0
OpDecorate %20 NonWritable
OpDecorate %20 Restrict
OpDecorate %21 ArrayStride 16
OpMemberDecorate %22 0 Offset 0
OpDecorate %22 Block
OpDecorate %24 DescriptorSet 0
OpDecorate %24 Binding 1
OpDecorate %24 NonWritable
OpDecorate %24 Restrict
OpDecorate %28 ArrayStride 16
OpMemberDecorate %29 0 Offset 0
OpDecorate %29 Block
OpDecorate %31 DescriptorSet 0
OpDecorate %31 Binding 0
OpDecorate %33 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeVector %5 4
%7 = OpTypeRuntimeArray %6
%8 = OpTypeStruct %7
%9 = OpTypeRuntimeArray %8
%10 = OpTypePointer StorageBuffer %9
%11 = OpVariable %10 StorageBuffer
%12 = OpTypeRuntimeArray %6
%13 = OpTypeStruct %12
%14 = OpTypeRuntimeArray %13
%15 = OpTypePointer StorageBuffer %14
%16 = OpVariable %15 StorageBuffer
%17 = OpTypeRuntimeArray %5
%18 = OpTypeStruct %17
%19 = OpTypePointer StorageBuffer %18
%20 = OpVariable %19 StorageBuffer
%21 = OpTypeRuntimeArray %6
%22 = OpTypeStruct %21
%23 = OpTypePointer StorageBuffer %22
%24 = OpVariable %23 StorageBuffer
%25 = OpConstant %5 1
%26 = OpTypeFloat 32
%27 = OpTypeVector %26 4
%28 = OpTypeArray %27 %25
%29 = OpTypeStruct %28
%30 = OpTypePointer Uniform %29
%31 = OpVariable %30 Uniform
%32 = OpTypePointer Input %5
%33 = OpVariable %32 Input
%35 = OpTypePointer StorageBuffer %8
%37 = OpConstant %5 0
%38 = OpTypePointer StorageBuffer %13
%41 = OpConstant %5 2
%42 = OpConstant %26 0
%48 = OpTypePointer StorageBuffer %6
%50 = OpConstant %5 5
%51 = OpTypePointer Uniform %27
%56 = OpTypeBool
%74 = OpConstant %5 8
%76 = OpTypePointer StorageBuffer %5
%79 = OpConstant %5 26696
%81 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %111
%111 = OpLabel
%34 = OpLoad %5 %33
%36 = OpAccessChain %35 %11 %37
%39 = OpAccessChain %38 %16 %25
%40 = OpAccessChain %38 %16 %41
%43 = OpBitcast %5 %42
%44 = OpBitcast %5 %42
%45 = OpBitcast %5 %42
%46 = OpBitcast %5 %42
%47 = OpCompositeConstruct %6 %43 %44 %45 %46
%49 = OpAccessChain %48 %36 %37 %34
OpStore %49 %47 MakePointerAvailable|NonPrivatePointer %50
%52 = OpAccessChain %51 %31 %37 %37
%53 = OpLoad %27 %52
%54 = OpBitcast %6 %53
%55 = OpCompositeExtract %5 %54 0
%57 = OpIEqual %56 %55 %37
OpSelectionMerge %119 None
OpBranchConditional %57 %119 %112
%112 = OpLabel
OpBranch %113
%113 = OpLabel
%71 = OpPhi %5 %37 %112 %72 %117
%73 = OpShiftLeftLogical %5 %71 %74
%75 = OpIAdd %5 %73 %34
%77 = OpAccessChain %76 %20 %37 %75
%78 = OpLoad %5 %77
OpControlBarrier %41 %41 %79
%80 = OpULessThan %56 %78 %81
OpLoopMerge %118 %117 None
OpBranch %114
%114 = OpLabel
OpSelectionMerge %116 None
OpBranchConditional %80 %115 %116
%115 = OpLabel
%82 = OpAccessChain %48 %24 %37 %75
%83 = OpLoad %6 %82
%84 = OpBitcast %27 %83
%85 = OpCompositeExtract %26 %84 0
%86 = OpCompositeExtract %26 %84 1
%87 = OpCompositeExtract %26 %84 2
%88 = OpCompositeExtract %26 %84 3
%89 = OpAccessChain %48 %36 %37 %78
%90 = OpLoad %6 %89 MakePointerVisible|NonPrivatePointer %50
%91 = OpBitcast %27 %90
%92 = OpCompositeExtract %26 %91 0
%93 = OpCompositeExtract %26 %91 1
%94 = OpCompositeExtract %26 %91 2
%95 = OpCompositeExtract %26 %91 3
%96 = OpFAdd %26 %92 %85
%97 = OpFAdd %26 %93 %86
%98 = OpFAdd %26 %94 %87
%99 = OpFAdd %26 %95 %88
%100 = OpBitcast %5 %96
%101 = OpBitcast %5 %97
%102 = OpBitcast %5 %98
%103 = OpBitcast %5 %99
%104 = OpCompositeConstruct %6 %100 %101 %102 %103
%105 = OpAccessChain %48 %36 %37 %78
OpStore %105 %104 MakePointerAvailable|NonPrivatePointer %50
OpBranch %116
%116 = OpLabel
OpBranch %117
%117 = OpLabel
%72 = OpIAdd %5 %71 %25
%106 = OpAccessChain %51 %31 %37 %37
%107 = OpLoad %27 %106
%108 = OpBitcast %6 %107
%109 = OpCompositeExtract %5 %108 0
%110 = OpULessThan %56 %72 %109
OpBranchConditional %110 %113 %118
%118 = OpLabel
OpBranch %119
%119 = OpLabel
%58 = OpAccessChain %48 %39 %37 %34
%59 = OpLoad %6 %58 NonPrivatePointer
%60 = OpBitcast %27 %59
%61 = OpCompositeExtract %26 %60 0
%62 = OpCompositeExtract %26 %60 1
%63 = OpCompositeExtract %26 %60 2
%64 = OpCompositeExtract %26 %60 3
%65 = OpBitcast %5 %61
%66 = OpBitcast %5 %62
%67 = OpBitcast %5 %63
%68 = OpBitcast %5 %64
%69 = OpCompositeConstruct %6 %65 %66 %67 %68
%70 = OpAccessChain %48 %40 %37 %34
OpStore %70 %69 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/memory-model/uav-coherent.ssbo.vkmm.comp
================================================
#version 460
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 1, std430) restrict readonly buffer _12_14
{
    uvec4 _m0[];
} _14;

layout(set = 0, binding = 0, std430) buffer _16_18
{
    uvec4 _m0[];
} _18;

layout(set = 0, binding = 1, std430) readonly buffer _20_22
{
    uvec4 _m0[];
} _22;

layout(set = 0, binding = 2, std430) writeonly buffer _24_26
{
    uvec4 _m0[];
} _26;

layout(set = 0, binding = 0, std140) uniform _31_33
{
    vec4 _m0[1];
} _33;

void main()
{
    _18._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0), floatBitsToUint(0.0));
    if (!(floatBitsToUint(_33._m0[0u]).x == 0u))
    {
        uint _67 = 0u;
        uint _71;
        uint _74;
        bool _77;
        for (;;)
        {
            _71 = (_67 << 8u) + gl_LocalInvocationIndex;
            _74 = _9._m0[_71];
            groupMemoryBarrier();
            barrier();
            _77 = _74 < 256u;
            if (_77)
            {
                vec4 _81 = uintBitsToFloat(_14._m0[_71]);
                vec4 _88 = uintBitsToFloat(_18._m0[_74]);
                _18._m0[_74] = uvec4(floatBitsToUint(_88.x + _81.x), floatBitsToUint(_88.y + _81.y), floatBitsToUint(_88.z + _81.z), floatBitsToUint(_88.w + _81.w));
            }
            uint _68 = _67 + 1u;
            if (_68 < floatBitsToUint(_33._m0[0u]).x)
            {
                _67 = _68;
                continue;
            }
            else
            {
                break;
            }
        }
    }
    vec4 _56 = uintBitsToFloat(_22._m0[gl_LocalInvocationIndex]);
    _26._m0[gl_LocalInvocationIndex] = uvec4(floatBitsToUint(_56.x), floatBitsToUint(_56.y), floatBitsToUint(_56.z), floatBitsToUint(_56.w));
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 118
; Schema: 0
OpCapability Shader
OpCapability VulkanMemoryModel
OpMemoryModel Logical Vulkan
OpEntryPoint GLCompute %3 "main" %9 %14 %18 %22 %26 %33 %35
OpExecutionMode %3 LocalSize 256 1 1
OpName %3 "main"
OpName %7 "SSBO"
OpName %12 "SSBO"
OpName %16 "SSBO"
OpName %20 "SSBO"
OpName %24 "SSBO"
OpName %31 ""
OpDecorate %6 ArrayStride 4
OpMemberDecorate %7 0 Offset 0
OpDecorate %7 Block
OpDecorate %9 DescriptorSet 0
OpDecorate %9 Binding 0
OpDecorate %9 NonWritable
OpDecorate %9 Restrict
OpDecorate %11 ArrayStride 16
OpMemberDecorate %12 0 Offset 0
OpDecorate %12 Block
OpDecorate %14 DescriptorSet 0
OpDecorate %14 Binding 1
OpDecorate %14 NonWritable
OpDecorate %14 Restrict
OpDecorate %15 ArrayStride 16
OpMemberDecorate %16 0 Offset 0
OpDecorate %16 Block
OpDecorate %18 DescriptorSet 0
OpDecorate %18 Binding 0
OpDecorate %19 ArrayStride 16
OpMemberDecorate %20 0 Offset 0
OpDecorate %20 Block
OpDecorate %22 DescriptorSet 0
OpDecorate %22 Binding 1
OpDecorate %22 NonWritable
OpDecorate %23 ArrayStride 16
OpMemberDecorate %24 0 Offset 0
OpDecorate %24 Block
OpDecorate %26 DescriptorSet 0
OpDecorate %26 Binding 2
OpDecorate %26 NonReadable
OpDecorate %30 ArrayStride 16
OpMemberDecorate %31 0 Offset 0
OpDecorate %31 Block
OpDecorate %33 DescriptorSet 0
OpDecorate %33 Binding 0
OpDecorate %35 BuiltIn LocalInvocationIndex
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeRuntimeArray %5
%7 = OpTypeStruct %6
%8 = OpTypePointer StorageBuffer %7
%9 = OpVariable %8 StorageBuffer
%10 = OpTypeVector %5 4
%11 = OpTypeRuntimeArray %10
%12 = OpTypeStruct %11
%13 = OpTypePointer StorageBuffer %12
%14 = OpVariable %13 StorageBuffer
%15 = OpTypeRuntimeArray %10
%16 = OpTypeStruct %15
%17 = OpTypePointer StorageBuffer %16
%18 = OpVariable %17 StorageBuffer
%19 = OpTypeRuntimeArray %10
%20 = OpTypeStruct %19
%21 = OpTypePointer StorageBuffer %20
%22 = OpVariable %21 StorageBuffer
%23 = OpTypeRuntimeArray %10
%24 = OpTypeStruct %23
%25 = OpTypePointer StorageBuffer %24
%26 = OpVariable %25 StorageBuffer
%27 = OpConstant %5 1
%28 = OpTypeFloat 32
%29 = OpTypeVector %28 4
%30 = OpTypeArray %29 %27
%31 = OpTypeStruct %30
%32 = OpTypePointer Uniform %31
%33 = OpVariable %32 Uniform
%34 = OpTypePointer Input %5
%35 = OpVariable %34 Input
%37 = OpConstant %28 0
%43 = OpTypePointer StorageBuffer %10
%45 = OpConstant %5 0
%46 = OpConstant %5 5
%47 = OpTypePointer Uniform %29
%52 = OpTypeBool
%70 = OpConstant %5 8
%72 = OpTypePointer StorageBuffer %5
%75 = OpConstant %5 2
%76 = OpConstant %5 26696
%78 = OpConstant %5 256
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %108
%108 = OpLabel
%36 = OpLoad %5 %35
%38 = OpBitcast %5 %37
%39 = OpBitcast %5 %37
%40 = OpBitcast %5 %37
%41 = OpBitcast %5 %37
%42 = OpCompositeConstruct %10 %38 %39 %40 %41
%44 = OpAccessChain %43 %18 %45 %36
OpStore %44 %42 MakePointerAvailable|NonPrivatePointer %46
%48 = OpAccessChain %47 %33 %45 %45
%49 = OpLoad %29 %48
%50 = OpBitcast %10 %49
%51 = OpCompositeExtract %5 %50 0
%53 = OpIEqual %52 %51 %45
OpSelectionMerge %116 None
OpBranchConditional %53 %116 %109
%109 = OpLabel
OpBranch %110
%110 = OpLabel
%67 = OpPhi %5 %45 %109 %68 %114
%69 = OpShiftLeftLogical %5 %67 %70
%71 = OpIAdd %5 %69 %36
%73 = OpAccessChain %72 %9 %45 %71
%74 = OpLoad %5 %73
OpControlBarrier %75 %75 %76
%77 = OpULessThan %52 %74 %78
OpLoopMerge %115 %114 None
OpBranch %111
%111 = OpLabel
OpSelectionMerge %113 None
OpBranchConditional %77 %112 %113
%112 = OpLabel
%79 = OpAccessChain %43 %14 %45 %71
%80 = OpLoad %10 %79
%81 = OpBitcast %29 %80
%82 = OpCompositeExtract %28 %81 0
%83 = OpCompositeExtract %28 %81 1
%84 = OpCompositeExtract %28 %81 2
%85 = OpCompositeExtract %28 %81 3
%86 = OpAccessChain %43 %18 %45 %74
%87 = OpLoad %10 %86 MakePointerVisible|NonPrivatePointer %46
%88 = OpBitcast %29 %87
%89 = OpCompositeExtract %28 %88 0
%90 = OpCompositeExtract %28 %88 1
%91 = OpCompositeExtract %28 %88 2
%92 = OpCompositeExtract %28 %88 3
%93 = OpFAdd %28 %89 %82
%94 = OpFAdd %28 %90 %83
%95 = OpFAdd %28 %91 %84
%96 = OpFAdd %28 %92 %85
%97 = OpBitcast %5 %93
%98 = OpBitcast %5 %94
%99 = OpBitcast %5 %95
%100 = OpBitcast %5 %96
%101 = OpCompositeConstruct %10 %97 %98 %99 %100
%102 = OpAccessChain %43 %18 %45 %74
OpStore %102 %101 MakePointerAvailable|NonPrivatePointer %46
OpBranch %113
%113 = OpLabel
OpBranch %114
%114 = OpLabel
%68 = OpIAdd %5 %67 %27
%103 = OpAccessChain %47 %33 %45 %45
%104 = OpLoad %29 %103
%105 = OpBitcast %10 %104
%106 = OpCompositeExtract %5 %105 0
%107 = OpULessThan %52 %68 %106
OpBranchConditional %107 %110 %115
%115 = OpLabel
OpBranch %116
%116 = OpLabel
%54 = OpAccessChain %43 %22 %45 %36
%55 = OpLoad %10 %54 NonPrivatePointer
%56 = OpBitcast %29 %55
%57 = OpCompositeExtract %28 %56 0
%58 = OpCompositeExtract %28 %56 1
%59 = OpCompositeExtract %28 %56 2
%60 = OpCompositeExtract %28 %56 3
%61 = OpBitcast %5 %57
%62 = OpBitcast %5 %58
%63 = OpBitcast %5 %59
%64 = OpBitcast %5 %60
%65 = OpCompositeConstruct %10 %61 %62 %63 %64
%66 = OpAccessChain %43 %26 %45 %36
OpStore %66 %65 NonPrivatePointer
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/report-hit.vkmm.rint
================================================
#version 460
#extension GL_EXT_ray_tracing : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_basic : require

struct _6
{
    float _m0;
};

hitAttributeEXT _6 hit;

void main()
{
    _6 _12;
    _12._m0 = gl_RayTmaxEXT;
    hit = _12;
    bool _24 = reportIntersectionEXT(4.0, 100u);
    _6 _11;
    _11._m0 = gl_RayTminEXT;
    hit = _11;
    bool _29 = reportIntersectionEXT(3.0, 50u);
    _6 _10;
    _10._m0 = float(gl_SubgroupInvocationID);
    hit = _10;
    bool _38 = reportIntersectionEXT(2.0, 50u);
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
OpCapability Shader
OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
OpCapability GroupNonUniform
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
OpCapability SampledImageArrayNonUniformIndexing
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability StorageImageArrayNonUniformIndexing
OpCapability VulkanMemoryModel
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_ray_tracing"
OpMemoryModel Logical Vulkan
OpEntryPoint IntersectionKHR %3 "main" %8 %14 %16 %33
OpName %3 "main"
OpName %6 ""
OpName %8 "hit"
OpDecorate %14 BuiltIn RayTmaxKHR
OpDecorate %16 BuiltIn RayTminKHR
OpDecorate %33 BuiltIn SubgroupLocalInvocationId
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeStruct %5
%7 = OpTypePointer HitAttributeKHR %6
%8 = OpVariable %7 HitAttributeKHR
%9 = OpTypePointer Function %6
%13 = OpTypePointer Input %5
%14 = OpVariable %13 Input
%16 = OpVariable %13 Input
%18 = OpTypePointer Function %5
%20 = OpTypeInt 32 0
%21 = OpConstant %20 0
%23 = OpTypeBool
%25 = OpConstant %5 4
%26 = OpConstant %20 100
%30 = OpConstant %5 3
%31 = OpConstant %20 50
%32 = OpTypePointer Input %20
%33 = OpVariable %32 Input
%39 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
%10 = OpVariable %9 Function
%11 = OpVariable %9 Function
%12 = OpVariable %9 Function
OpBranch %40
%40 = OpLabel
%15 = OpLoad %5 %14 Volatile
%17 = OpLoad %5 %16
%19 = OpInBoundsAccessChain %18 %12 %21
OpStore %19 %15
%22 = OpLoad %6 %12
OpStore %8 %22
%24 = OpReportIntersectionKHR %23 %25 %26
%27 = OpInBoundsAccessChain %18 %11 %21
OpStore %27 %17
%28 = OpLoad %6 %11
OpStore %8 %28
%29 = OpReportIntersectionKHR %23 %30 %31
%34 = OpLoad %20 %33 Volatile
%35 = OpConvertUToF %5 %34
%36 = OpInBoundsAccessChain %18 %10 %21
OpStore %36 %35
%37 = OpLoad %6 %10
OpStore %8 %37
%38 = OpReportIntersectionKHR %23 %39 %31
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/rov-structured.vkmm.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

void main()
{
    uint _25 = (uint(gl_FragCoord.y) * 1000u) + uint(gl_FragCoord.x);
    uint _26 = _25 * 4u;
    SPIRV_Cross_beginInvocationInterlock();
    uvec4 _29 = imageLoad(_8, int(_26));
    uvec4 _32 = imageLoad(_8, int(_26 + 1u));
    uvec4 _35 = imageLoad(_8, int(_26 + 2u));
    uvec4 _39 = imageLoad(_8, int(_26 + 3u));
    vec4 _44 = uintBitsToFloat(uvec4(_29.x, _32.x, _35.x, _39.x));
    uint _57 = _25 * 4u;
    imageStore(_8, int(_57), uvec4(floatBitsToUint(_44.x + 1.0)));
    imageStore(_8, int(_57 + 1u), uvec4(floatBitsToUint(_44.y + 2.0)));
    imageStore(_8, int(_57 + 2u), uvec4(floatBitsToUint(_44.z + 3.0)));
    imageStore(_8, int(_57 + 3u), uvec4(floatBitsToUint(_44.w + 4.0)));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
OpCapability Shader
OpCapability ImageBuffer
OpCapability VulkanMemoryModel
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical Vulkan
OpEntryPoint Fragment %3 "main" %8 %12
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %12 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %12 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeFloat 32
%10 = OpTypeVector %9 4
%11 = OpTypePointer Input %10
%12 = OpVariable %11 Input
%14 = OpTypePointer Input %9
%16 = OpConstant %5 0
%19 = OpConstant %5 1
%24 = OpConstant %5 1000
%27 = OpConstant %5 4
%28 = OpTypeVector %5 4
%30 = OpConstant %5 5
%37 = OpConstant %5 2
%41 = OpConstant %5 3
%50 = OpConstant %9 1
%52 = OpConstant %9 2
%54 = OpConstant %9 3
%56 = OpConstant %9 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %69
%69 = OpLabel
%13 = OpLoad %6 %8
%15 = OpAccessChain %14 %12 %16
%17 = OpLoad %9 %15
%18 = OpAccessChain %14 %12 %19
%20 = OpLoad %9 %18
%21 = OpConvertFToU %5 %17
%22 = OpConvertFToU %5 %20
%23 = OpIMul %5 %22 %24
%25 = OpIAdd %5 %23 %21
%26 = OpIMul %5 %25 %27
OpBeginInvocationInterlockEXT
%29 = OpImageRead %28 %13 %26 MakeTexelVisible|NonPrivateTexel %30
%31 = OpCompositeExtract %5 %29 0
%33 = OpIAdd %5 %26 %19
%32 = OpImageRead %28 %13 %33 MakeTexelVisible|NonPrivateTexel %30
%34 = OpCompositeExtract %5 %32 0
%36 = OpIAdd %5 %26 %37
%35 = OpImageRead %28 %13 %36 MakeTexelVisible|NonPrivateTexel %30
%38 = OpCompositeExtract %5 %35 0
%40 = OpIAdd %5 %26 %41
%39 = OpImageRead %28 %13 %40 MakeTexelVisible|NonPrivateTexel %30
%42 = OpCompositeExtract %5 %39 0
%43 = OpCompositeConstruct %28 %31 %34 %38 %42
%44 = OpBitcast %10 %43
%45 = OpCompositeExtract %9 %44 0
%46 = OpCompositeExtract %9 %44 1
%47 = OpCompositeExtract %9 %44 2
%48 = OpCompositeExtract %9 %44 3
%49 = OpFAdd %9 %45 %50
%51 = OpFAdd %9 %46 %52
%53 = OpFAdd %9 %47 %54
%55 = OpFAdd %9 %48 %56
%57 = OpIMul %5 %25 %27
%58 = OpBitcast %5 %49
%59 = OpBitcast %5 %51
%60 = OpBitcast %5 %53
%61 = OpBitcast %5 %55
%62 = OpCompositeConstruct %28 %58 %58 %58 %58
OpImageWrite %13 %57 %62 MakeTexelAvailable|NonPrivateTexel %30
%63 = OpCompositeConstruct %28 %59 %59 %59 %59
%64 = OpIAdd %5 %57 %19
OpImageWrite %13 %64 %63 MakeTexelAvailable|NonPrivateTexel %30
%65 = OpCompositeConstruct %28 %60 %60 %60 %60
%66 = OpIAdd %5 %57 %37
OpImageWrite %13 %66 %65 MakeTexelAvailable|NonPrivateTexel %30
%67 = OpCompositeConstruct %28 %61 %61 %61 %61
%68 = OpIAdd %5 %57 %41
OpImageWrite %13 %68 %67 MakeTexelAvailable|NonPrivateTexel %30
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference/shaders/vkmm/rov-tex2d.vkmm.frag
================================================
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, r32f) uniform image2D _8;

void main()
{
    uint _21 = uint(gl_FragCoord.x);
    uint _22 = uint(gl_FragCoord.y);
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _23 = imageLoad(_8, ivec2(uvec2(_21, _22)));
    imageStore(_8, ivec2(uvec2(_21, _22)), vec4(_23.x + 1.0, _23.y + 2.0, _23.z + 3.0, _23.w + 4.0));
    SPIRV_Cross_endInvocationInterlock();
}


#if 0
// SPIR-V disassembly
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
OpCapability Shader
OpCapability StorageImageWriteWithoutFormat
OpCapability VulkanMemoryModel
OpCapability FragmentShaderPixelInterlockEXT
OpExtension "SPV_EXT_fragment_shader_interlock"
OpMemoryModel Logical Vulkan
OpEntryPoint Fragment %3 "main" %8 %11
OpExecutionMode %3 OriginUpperLeft
OpExecutionMode %3 EarlyFragmentTests
OpExecutionMode %3 PixelInterlockOrderedEXT
OpName %3 "main"
OpName %11 "SV_Position"
OpDecorate %8 DescriptorSet 0
OpDecorate %8 Binding 0
OpDecorate %11 BuiltIn FragCoord
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeImage %5 2D 0 0 0 2 R32f
%7 = OpTypePointer UniformConstant %6
%8 = OpVariable %7 UniformConstant
%9 = OpTypeVector %5 4
%10 = OpTypePointer Input %9
%11 = OpVariable %10 Input
%13 = OpTypePointer Input %5
%15 = OpTypeInt 32 0
%16 = OpConstant %15 0
%19 = OpConstant %15 1
%24 = OpTypeVector %15 2
%26 = OpConstant %15 5
%32 = OpConstant %5 1
%34 = OpConstant %5 2
%36 = OpConstant %5 3
%38 = OpConstant %5 4
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %41
%41 = OpLabel
%12 = OpLoad %6 %8
%14 = OpAccessChain %13 %11 %16
%17 = OpLoad %5 %14
%18 = OpAccessChain %13 %11 %19
%20 = OpLoad %5 %18
%21 = OpConvertFToU %15 %17
%22 = OpConvertFToU %15 %20
%25 = OpCompositeConstruct %24 %21 %22
OpBeginInvocationInterlockEXT
%23 = OpImageRead %9 %12 %25 MakeTexelVisible|NonPrivateTexel %26
%27 = OpCompositeExtract %5 %23 0
%28 = OpCompositeExtract %5 %23 1
%29 = OpCompositeExtract %5 %23 2
%30 = OpCompositeExtract %5 %23 3
%31 = OpFAdd %5 %27 %32
%33 = OpFAdd %5 %28 %34
%35 = OpFAdd %5 %29 %36
%37 = OpFAdd %5 %30 %38
%39 = OpCompositeConstruct %24 %21 %22
%40 = OpCompositeConstruct %9 %31 %33 %35 %37
OpImageWrite %12 %39 %40 MakeTexelAvailable|NonPrivateTexel %26
OpEndInvocationInterlockEXT
OpReturn
OpFunctionEnd
#endif


================================================
FILE: reference-dxbc/test_arithmetic_bool.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 69
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
       %bool = OpTypeBool
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %67

         %67 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpINotEqual %bool %uint_0 %17
         %21 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %22 =   OpLoad %uint %21
         %23 =   OpINotEqual %bool %uint_0 %22
         %24 =   OpLogicalAnd %bool %19 %23
         %25 =   OpSelect %uint %24 %uint_1 %uint_0
         %26 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %26 %25 NonPrivatePointer
         %28 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %29 =   OpLoad %uint %28
         %30 =   OpINotEqual %bool %uint_0 %29
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %33 =   OpLoad %uint %32
         %34 =   OpINotEqual %bool %uint_0 %33
         %35 =   OpLogicalOr %bool %30 %34
         %36 =   OpSelect %uint %35 %uint_1 %uint_0
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %37 %36 NonPrivatePointer
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %40 =   OpLoad %uint %39
         %41 =   OpINotEqual %bool %uint_0 %40
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %44 =   OpLoad %uint %43
         %45 =   OpINotEqual %bool %uint_0 %44
         %46 =   OpLogicalEqual %bool %41 %45
         %47 =   OpSelect %uint %46 %uint_1 %uint_0
         %48 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %48 %47 NonPrivatePointer
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %51 =   OpLoad %uint %50
         %52 =   OpINotEqual %bool %uint_0 %51
         %54 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %55 =   OpLoad %uint %54
         %56 =   OpINotEqual %bool %uint_0 %55
         %57 =   OpLogicalNotEqual %bool %52 %56
         %58 =   OpSelect %uint %57 %uint_1 %uint_0
         %59 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %59 %58 NonPrivatePointer
         %61 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %62 =   OpLoad %uint %61
         %63 =   OpINotEqual %bool %uint_0 %62
         %64 =   OpLogicalNot %bool %63
         %65 =   OpSelect %uint %64 %uint_1 %uint_0
         %66 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %66 %65 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_bool.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = uint((0u != _9._m0[0u]) && (0u != _9._m0[1u]));
    _13._m0[1u] = uint((0u != _9._m0[2u]) || (0u != _9._m0[3u]));
    _13._m0[2u] = uint((0u != _9._m0[4u]) == (0u != _9._m0[5u]));
    _13._m0[3u] = uint((0u != _9._m0[6u]) != (0u != _9._m0[7u]));
    _13._m0[4u] = uint(!(0u != _9._m0[8u]));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp16_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
       %half = OpTypeFloat 16
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %54

         %54 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpFConvert %half %19
         %23 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %24 =   OpLoad %uint %23
         %25 =   OpBitcast %float %24
         %26 =   OpFConvert %half %25
         %28 =   OpFOrdEqual %bool %21 %26
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %30 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %30 %29 NonPrivatePointer
         %31 =   OpFUnordNotEqual %bool %21 %26
         %32 =   OpSelect %uint %31 %uint_1 %uint_0
         %33 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %33 %32 NonPrivatePointer
         %34 =   OpFOrdLessThan %bool %21 %26
         %35 =   OpSelect %uint %34 %uint_1 %uint_0
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %37 %35 NonPrivatePointer
         %38 =   OpFOrdLessThanEqual %bool %21 %26
         %39 =   OpSelect %uint %38 %uint_1 %uint_0
         %41 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %41 %39 NonPrivatePointer
         %42 =   OpFOrdGreaterThan %bool %21 %26
         %43 =   OpSelect %uint %42 %uint_1 %uint_0
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %45 %43 NonPrivatePointer
         %46 =   OpFOrdGreaterThanEqual %bool %21 %26
         %47 =   OpSelect %uint %46 %uint_1 %uint_0
         %49 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %49 %47 NonPrivatePointer
         %50 =   OpIsNan %bool %21
         %51 =   OpSelect %uint %50 %uint_1 %uint_0
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %53 %51 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp16_compare.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    float16_t _21 = float16_t(uintBitsToFloat(_9._m0[0u]));
    float16_t _26 = float16_t(uintBitsToFloat(_9._m0[1u]));
    _13._m0[0u] = uint(_21 == _26);
    _13._m0[1u] = uint(_21 != _26);
    _13._m0[2u] = uint(_21 < _26);
    _13._m0[3u] = uint(_21 <= _26);
    _13._m0[4u] = uint(_21 > _26);
    _13._m0[5u] = uint(_21 >= _26);
    _13._m0[6u] = uint(isnan(_21));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp16_packing.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_1 = OpConstant %uint 1
       %half = OpTypeFloat 16
     %v2half = OpTypeVector %half 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %20 =   OpLoad %uint %19
         %23 =   OpBitcast %v2half %17
         %24 =   OpBitcast %v2half %20
         %25 =   OpFAdd %v2half %23 %24
         %26 =   OpBitcast %uint %25
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %27 %26 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp16_packing.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = packFloat2x16(unpackFloat2x16(_9._m0[0u]) + unpackFloat2x16(_9._m0[1u]));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp16_packing_legacy.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %21 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_1 = OpConstant %uint 1
      %float = OpTypeFloat 32
    %v2float = OpTypeVector %float 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %35

         %35 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %20 =   OpLoad %uint %19
         %24 =   OpExtInst %v2float %21 UnpackHalf2x16 %17
         %25 =   OpExtInst %v2float %21 UnpackHalf2x16 %20
         %26 =   OpCompositeExtract %float %25 0
         %27 =   OpCompositeExtract %float %24 0
         %28 =   OpFAdd %float %27 %26
         %29 =   OpCompositeExtract %float %25 1
         %30 =   OpCompositeExtract %float %24 1
         %31 =   OpFAdd %float %30 %29
         %32 =   OpCompositeConstruct %v2float %28 %31
         %33 =   OpExtInst %uint %21 PackHalf2x16 %32
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %34 %33 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp16_packing_legacy.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    vec2 _24 = unpackHalf2x16(_9._m0[0u]);
    vec2 _25 = unpackHalf2x16(_9._m0[1u]);
    _13._m0[0u] = packHalf2x16(vec2(_24.x + _25.x, _24.y + _25.y));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp16_scalar.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %22 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
       %half = OpTypeFloat 16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
%half_0x1p_0 = OpConstant %half 0x1p+0
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %93

         %93 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpFConvert %half %19
         %23 =   OpExtInst %half %22 Trunc %21
         %24 =   OpExtInst %half %22 FAbs %23
         %25 =   OpFNegate %half %24
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %28 =   OpLoad %uint %27
         %29 =   OpBitcast %float %28
         %30 =   OpFConvert %half %29
         %31 =   OpFAdd %half %25 %30
         %32 =   OpExtInst %half %22 RoundEven %31
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %35 =   OpLoad %uint %34
         %36 =   OpBitcast %float %35
         %37 =   OpFConvert %half %36
         %38 =   OpFSub %half %32 %37
         %39 =   OpExtInst %half %22 Floor %38
         %41 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %42 =   OpLoad %uint %41
         %43 =   OpBitcast %float %42
         %44 =   OpFConvert %half %43
         %45 =   OpFMul %half %39 %44
         %46 =   OpExtInst %half %22 Ceil %45
         %48 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %49 =   OpLoad %uint %48
         %50 =   OpBitcast %float %49
         %51 =   OpFConvert %half %50
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %54 =   OpLoad %uint %53
         %55 =   OpBitcast %float %54
         %56 =   OpFConvert %half %55
         %57 =   OpExtInst %half %22 Fma %46 %51 %56
         %59 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %60 =   OpLoad %uint %59
         %61 =   OpBitcast %float %60
         %62 =   OpFConvert %half %61
         %63 =   OpFDiv %half %57 %62
         %64 =   OpFDiv %half %half_0x1p_0 %63
         %66 =   OpExtInst %half %22 Fract %64
         %68 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %69 =   OpLoad %uint %68
         %70 =   OpBitcast %float %69
         %71 =   OpFConvert %half %70
         %72 =   OpExtInst %half %22 NMin %66 %71
         %74 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %75 =   OpLoad %uint %74
         %76 =   OpBitcast %float %75
         %77 =   OpFConvert %half %76
         %78 =   OpExtInst %half %22 NMax %72 %77
         %80 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %81 =   OpLoad %uint %80
         %82 =   OpBitcast %float %81
         %83 =   OpFConvert %half %82
         %85 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %86 =   OpLoad %uint %85
         %87 =   OpBitcast %float %86
         %88 =   OpFConvert %half %87
         %89 =   OpExtInst %half %22 NClamp %78 %83 %88
         %90 =   OpFConvert %float %89
         %91 =   OpBitcast %uint %90
         %92 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %92 %91 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp16_scalar.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_spirv_intrinsics : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

spirv_instruction(set = "GLSL.std.450", id = 79) float16_t spvNMin(float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 79) f16vec2 spvNMin(f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) f16vec3 spvNMin(f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) f16vec4 spvNMin(f16vec4, f16vec4);
spirv_instruction(set = "GLSL.std.450", id = 80) float16_t spvNMax(float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 80) f16vec2 spvNMax(f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 80) f16vec3 spvNMax(f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 80) f16vec4 spvNMax(f16vec4, f16vec4);
spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

void main()
{
    _13._m0[0u] = floatBitsToUint(float(spvNClamp(spvNMax(spvNMin(fract(float16_t(1.0) / (fma(ceil(floor(roundEven((-abs(trunc(float16_t(uintBitsToFloat(_9._m0[0u]))))) + float16_t(uintBitsToFloat(_9._m0[1u]))) - float16_t(uintBitsToFloat(_9._m0[2u]))) * float16_t(uintBitsToFloat(_9._m0[3u]))), float16_t(uintBitsToFloat(_9._m0[4u])), float16_t(uintBitsToFloat(_9._m0[5u]))) / float16_t(uintBitsToFloat(_9._m0[6u])))), float16_t(uintBitsToFloat(_9._m0[7u]))), float16_t(uintBitsToFloat(_9._m0[8u]))), float16_t(uintBitsToFloat(_9._m0[9u])), float16_t(uintBitsToFloat(_9._m0[10u])))));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp16_vector.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 168
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %29 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
       %half = OpTypeFloat 16
     %uint_1 = OpConstant %uint 1
     %v2half = OpTypeVector %half 2
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
%half_0x1p_0 = OpConstant %half 0x1p+0
        %109 = OpConstantComposite %v2half %half_0x1p_0 %half_0x1p_0
    %uint_14 = OpConstant %uint 14
    %uint_15 = OpConstant %uint 15
    %uint_16 = OpConstant %uint 16
    %uint_17 = OpConstant %uint 17
    %uint_18 = OpConstant %uint 18
    %uint_19 = OpConstant %uint 19
    %uint_20 = OpConstant %uint 20
    %uint_21 = OpConstant %uint 21
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %166

        %166 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpFConvert %half %19
         %23 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %24 =   OpLoad %uint %23
         %25 =   OpBitcast %float %24
         %26 =   OpFConvert %half %25
         %28 =   OpCompositeConstruct %v2half %21 %26
         %30 =   OpExtInst %v2half %29 Trunc %28
         %31 =   OpExtInst %v2half %29 FAbs %30
         %32 =   OpFNegate %v2half %31
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %35 =   OpLoad %uint %34
         %36 =   OpBitcast %float %35
         %37 =   OpFConvert %half %36
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %40 =   OpLoad %uint %39
         %41 =   OpBitcast %float %40
         %42 =   OpFConvert %half %41
         %43 =   OpCompositeConstruct %v2half %37 %42
         %44 =   OpFAdd %v2half %32 %43
         %45 =   OpExtInst %v2half %29 RoundEven %44
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %48 =   OpLoad %uint %47
         %49 =   OpBitcast %float %48
         %50 =   OpFConvert %half %49
         %52 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %53 =   OpLoad %uint %52
         %54 =   OpBitcast %float %53
         %55 =   OpFConvert %half %54
         %56 =   OpCompositeConstruct %v2half %50 %55
         %57 =   OpFSub %v2half %45 %56
         %58 =   OpExtInst %v2half %29 Floor %57
         %60 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %61 =   OpLoad %uint %60
         %62 =   OpBitcast %float %61
         %63 =   OpFConvert %half %62
         %65 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %66 =   OpLoad %uint %65
         %67 =   OpBitcast %float %66
         %68 =   OpFConvert %half %67
         %69 =   OpCompositeConstruct %v2half %63 %68
         %70 =   OpFMul %v2half %58 %69
         %71 =   OpExtInst %v2half %29 Ceil %70
         %73 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %74 =   OpLoad %uint %73
         %75 =   OpBitcast %float %74
         %76 =   OpFConvert %half %75
         %78 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %79 =   OpLoad %uint %78
         %80 =   OpBitcast %float %79
         %81 =   OpFConvert %half %80
         %82 =   OpCompositeConstruct %v2half %76 %81
         %84 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %85 =   OpLoad %uint %84
         %86 =   OpBitcast %float %85
         %87 =   OpFConvert %half %86
         %89 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %90 =   OpLoad %uint %89
         %91 =   OpBitcast %float %90
         %92 =   OpFConvert %half %91
         %93 =   OpCompositeConstruct %v2half %87 %92
         %94 =   OpExtInst %v2half %29 Fma %71 %82 %93
         %96 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %97 =   OpLoad %uint %96
         %98 =   OpBitcast %float %97
         %99 =   OpFConvert %half %98
        %101 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
        %102 =   OpLoad %uint %101
        %103 =   OpBitcast %float %102
        %104 =   OpFConvert %half %103
        %105 =   OpCompositeConstruct %v2half %99 %104
        %106 =   OpFDiv %v2half %94 %105
        %107 =   OpFDiv %v2half %109 %106
        %110 =   OpExtInst %v2half %29 Fract %107
        %112 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_14
        %113 =   OpLoad %uint %112
        %114 =   OpBitcast %float %113
        %115 =   OpFConvert %half %114
        %117 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_15
        %118 =   OpLoad %uint %117
        %119 =   OpBitcast %float %118
        %120 =   OpFConvert %half %119
        %121 =   OpCompositeConstruct %v2half %115 %120
        %122 =   OpExtInst %v2half %29 NMin %110 %121
        %124 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_16
        %125 =   OpLoad %uint %124
        %126 =   OpBitcast %float %125
        %127 =   OpFConvert %half %126
        %129 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_17
        %130 =   OpLoad %uint %129
        %131 =   OpBitcast %float %130
        %132 =   OpFConvert %half %131
        %133 =   OpCompositeConstruct %v2half %127 %132
        %134 =   OpExtInst %v2half %29 NMax %122 %133
        %136 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_18
        %137 =   OpLoad %uint %136
        %138 =   OpBitcast %float %137
        %139 =   OpFConvert %half %138
        %141 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_19
        %142 =   OpLoad %uint %141
        %143 =   OpBitcast %float %142
        %144 =   OpFConvert %half %143
        %145 =   OpCompositeConstruct %v2half %139 %144
        %147 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_20
        %148 =   OpLoad %uint %147
        %149 =   OpBitcast %float %148
        %150 =   OpFConvert %half %149
        %152 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_21
        %153 =   OpLoad %uint %152
        %154 =   OpBitcast %float %153
        %155 =   OpFConvert %half %154
        %156 =   OpCompositeConstruct %v2half %150 %155
        %157 =   OpExtInst %v2half %29 NClamp %134 %145 %156
        %158 =   OpCompositeExtract %half %157 0
        %159 =   OpFConvert %float %158
        %160 =   OpBitcast %uint %159
        %161 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %161 %160 NonPrivatePointer
        %162 =   OpCompositeExtract %half %157 1
        %163 =   OpFConvert %float %162
        %164 =   OpBitcast %uint %163
        %165 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %165 %164 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp16_vector.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_spirv_intrinsics : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

spirv_instruction(set = "GLSL.std.450", id = 79) float16_t spvNMin(float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 79) f16vec2 spvNMin(f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) f16vec3 spvNMin(f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) f16vec4 spvNMin(f16vec4, f16vec4);
spirv_instruction(set = "GLSL.std.450", id = 80) float16_t spvNMax(float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 80) f16vec2 spvNMax(f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 80) f16vec3 spvNMax(f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 80) f16vec4 spvNMax(f16vec4, f16vec4);
spirv_instruction(set = "GLSL.std.450", id = 81) float16_t spvNClamp(float16_t, float16_t, float16_t);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec2 spvNClamp(f16vec2, f16vec2, f16vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec3 spvNClamp(f16vec3, f16vec3, f16vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) f16vec4 spvNClamp(f16vec4, f16vec4, f16vec4);

void main()
{
    f16vec2 _107 = f16vec2(float16_t(1.0)) / (fma(ceil(floor(roundEven((-abs(trunc(f16vec2(float16_t(uintBitsToFloat(_9._m0[0u])), float16_t(uintBitsToFloat(_9._m0[1u])))))) + f16vec2(float16_t(uintBitsToFloat(_9._m0[2u])), float16_t(uintBitsToFloat(_9._m0[3u])))) - f16vec2(float16_t(uintBitsToFloat(_9._m0[4u])), float16_t(uintBitsToFloat(_9._m0[5u])))) * f16vec2(float16_t(uintBitsToFloat(_9._m0[6u])), float16_t(uintBitsToFloat(_9._m0[7u])))), f16vec2(float16_t(uintBitsToFloat(_9._m0[8u])), float16_t(uintBitsToFloat(_9._m0[9u]))), f16vec2(float16_t(uintBitsToFloat(_9._m0[10u])), float16_t(uintBitsToFloat(_9._m0[11u])))) / f16vec2(float16_t(uintBitsToFloat(_9._m0[12u])), float16_t(uintBitsToFloat(_9._m0[13u]))));
    f16vec2 _157 = spvNClamp(spvNMax(spvNMin(fract(_107), f16vec2(float16_t(uintBitsToFloat(_9._m0[14u])), float16_t(uintBitsToFloat(_9._m0[15u])))), f16vec2(float16_t(uintBitsToFloat(_9._m0[16u])), float16_t(uintBitsToFloat(_9._m0[17u])))), f16vec2(float16_t(uintBitsToFloat(_9._m0[18u])), float16_t(uintBitsToFloat(_9._m0[19u]))), f16vec2(float16_t(uintBitsToFloat(_9._m0[20u])), float16_t(uintBitsToFloat(_9._m0[21u]))));
    _13._m0[0u] = floatBitsToUint(float(_157.x));
    _13._m0[1u] = floatBitsToUint(float(_157.y));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp32.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 82
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %20 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
    %float_1 = OpConstant %float 1
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %80

         %80 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpExtInst %float %20 Trunc %19
         %22 =   OpExtInst %float %20 FAbs %21
         %23 =   OpFNegate %float %22
         %25 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %26 =   OpLoad %uint %25
         %27 =   OpBitcast %float %26
         %28 =   OpFAdd %float %23 %27
         %29 =   OpExtInst %float %20 RoundEven %28
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %32 =   OpLoad %uint %31
         %33 =   OpBitcast %float %32
         %34 =   OpFSub %float %29 %33
         %35 =   OpExtInst %float %20 Floor %34
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %38 =   OpLoad %uint %37
         %39 =   OpBitcast %float %38
         %40 =   OpFMul %float %35 %39
         %41 =   OpExtInst %float %20 Ceil %40
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %44 =   OpLoad %uint %43
         %45 =   OpBitcast %float %44
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %48 =   OpLoad %uint %47
         %49 =   OpBitcast %float %48
         %50 =   OpExtInst %float %20 Fma %41 %45 %49
         %52 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %53 =   OpLoad %uint %52
         %54 =   OpBitcast %float %53
         %55 =   OpFDiv %float %50 %54
         %56 =   OpFDiv %float %float_1 %55
         %58 =   OpExtInst %float %20 Fract %56
         %60 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %61 =   OpLoad %uint %60
         %62 =   OpBitcast %float %61
         %63 =   OpExtInst %float %20 NMin %58 %62
         %65 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %66 =   OpLoad %uint %65
         %67 =   OpBitcast %float %66
         %68 =   OpExtInst %float %20 NMax %63 %67
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %71 =   OpLoad %uint %70
         %72 =   OpBitcast %float %71
         %74 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %75 =   OpLoad %uint %74
         %76 =   OpBitcast %float %75
         %77 =   OpExtInst %float %20 NClamp %68 %72 %76
         %78 =   OpBitcast %uint %77
         %79 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %79 %78 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp32.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);
spirv_instruction(set = "GLSL.std.450", id = 80) float spvNMax(float, float);
spirv_instruction(set = "GLSL.std.450", id = 80) vec2 spvNMax(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 80) vec3 spvNMax(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 80) vec4 spvNMax(vec4, vec4);
spirv_instruction(set = "GLSL.std.450", id = 81) float spvNClamp(float, float, float);
spirv_instruction(set = "GLSL.std.450", id = 81) vec2 spvNClamp(vec2, vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) vec3 spvNClamp(vec3, vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) vec4 spvNClamp(vec4, vec4, vec4);

void main()
{
    _13._m0[0u] = floatBitsToUint(spvNClamp(spvNMax(spvNMin(fract(1.0 / (fma(ceil(floor(roundEven((-abs(trunc(uintBitsToFloat(_9._m0[0u])))) + uintBitsToFloat(_9._m0[1u])) - uintBitsToFloat(_9._m0[2u])) * uintBitsToFloat(_9._m0[3u])), uintBitsToFloat(_9._m0[4u]), uintBitsToFloat(_9._m0[5u])) / uintBitsToFloat(_9._m0[6u]))), uintBitsToFloat(_9._m0[7u])), uintBitsToFloat(_9._m0[8u])), uintBitsToFloat(_9._m0[9u]), uintBitsToFloat(_9._m0[10u])));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp32_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 53
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %51

         %51 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %22 =   OpLoad %uint %21
         %23 =   OpBitcast %float %22
         %25 =   OpFOrdEqual %bool %19 %23
         %26 =   OpSelect %uint %25 %uint_1 %uint_0
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %27 %26 NonPrivatePointer
         %28 =   OpFUnordNotEqual %bool %19 %23
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %30 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %30 %29 NonPrivatePointer
         %31 =   OpFOrdLessThan %bool %19 %23
         %32 =   OpSelect %uint %31 %uint_1 %uint_0
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %34 %32 NonPrivatePointer
         %35 =   OpFOrdLessThanEqual %bool %19 %23
         %36 =   OpSelect %uint %35 %uint_1 %uint_0
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %38 %36 NonPrivatePointer
         %39 =   OpFOrdGreaterThan %bool %19 %23
         %40 =   OpSelect %uint %39 %uint_1 %uint_0
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %42 %40 NonPrivatePointer
         %43 =   OpFOrdGreaterThanEqual %bool %19 %23
         %44 =   OpSelect %uint %43 %uint_1 %uint_0
         %46 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %46 %44 NonPrivatePointer
         %47 =   OpIsNan %bool %19
         %48 =   OpSelect %uint %47 %uint_1 %uint_0
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %50 %48 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp32_compare.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    float _19 = uintBitsToFloat(_9._m0[0u]);
    float _23 = uintBitsToFloat(_9._m0[1u]);
    _13._m0[0u] = uint(_19 == _23);
    _13._m0[1u] = uint(_19 != _23);
    _13._m0[2u] = uint(_19 < _23);
    _13._m0[3u] = uint(_19 <= _23);
    _13._m0[4u] = uint(_19 > _23);
    _13._m0[5u] = uint(_19 >= _23);
    _13._m0[6u] = uint(isnan(_19));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp32_precise.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 82
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %20 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
               OpDecorate %28 NoContraction
               OpDecorate %34 NoContraction
               OpDecorate %40 NoContraction
               OpDecorate %55 NoContraction
               OpDecorate %56 NoContraction
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
    %float_1 = OpConstant %float 1
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %80

         %80 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpExtInst %float %20 Trunc %19
         %22 =   OpExtInst %float %20 FAbs %21
         %23 =   OpFNegate %float %22
         %25 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %26 =   OpLoad %uint %25
         %27 =   OpBitcast %float %26
         %28 =   OpFAdd %float %23 %27
         %29 =   OpExtInst %float %20 RoundEven %28
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %32 =   OpLoad %uint %31
         %33 =   OpBitcast %float %32
         %34 =   OpFSub %float %29 %33
         %35 =   OpExtInst %float %20 Floor %34
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %38 =   OpLoad %uint %37
         %39 =   OpBitcast %float %38
         %40 =   OpFMul %float %35 %39
         %41 =   OpExtInst %float %20 Ceil %40
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %44 =   OpLoad %uint %43
         %45 =   OpBitcast %float %44
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %48 =   OpLoad %uint %47
         %49 =   OpBitcast %float %48
         %50 =   OpExtInst %float %20 Fma %41 %45 %49
         %52 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %53 =   OpLoad %uint %52
         %54 =   OpBitcast %float %53
         %55 =   OpFDiv %float %50 %54
         %56 =   OpFDiv %float %float_1 %55
         %58 =   OpExtInst %float %20 Fract %56
         %60 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %61 =   OpLoad %uint %60
         %62 =   OpBitcast %float %61
         %63 =   OpExtInst %float %20 NMin %58 %62
         %65 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %66 =   OpLoad %uint %65
         %67 =   OpBitcast %float %66
         %68 =   OpExtInst %float %20 NMax %63 %67
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %71 =   OpLoad %uint %70
         %72 =   OpBitcast %float %71
         %74 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %75 =   OpLoad %uint %74
         %76 =   OpBitcast %float %75
         %77 =   OpExtInst %float %20 NClamp %68 %72 %76
         %78 =   OpBitcast %uint %77
         %79 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %79 %78 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp32_precise.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);
spirv_instruction(set = "GLSL.std.450", id = 80) float spvNMax(float, float);
spirv_instruction(set = "GLSL.std.450", id = 80) vec2 spvNMax(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 80) vec3 spvNMax(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 80) vec4 spvNMax(vec4, vec4);
spirv_instruction(set = "GLSL.std.450", id = 81) float spvNClamp(float, float, float);
spirv_instruction(set = "GLSL.std.450", id = 81) vec2 spvNClamp(vec2, vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 81) vec3 spvNClamp(vec3, vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 81) vec4 spvNClamp(vec4, vec4, vec4);

void main()
{
    precise float _28 = (-abs(trunc(uintBitsToFloat(_9._m0[0u])))) + uintBitsToFloat(_9._m0[1u]);
    precise float _34 = roundEven(_28) - uintBitsToFloat(_9._m0[2u]);
    precise float _40 = floor(_34) * uintBitsToFloat(_9._m0[3u]);
    precise float _55 = fma(ceil(_40), uintBitsToFloat(_9._m0[4u]), uintBitsToFloat(_9._m0[5u])) / uintBitsToFloat(_9._m0[6u]);
    precise float _56 = 1.0 / _55;
    _13._m0[0u] = floatBitsToUint(spvNClamp(spvNMax(spvNMin(fract(_56), uintBitsToFloat(_9._m0[7u])), uintBitsToFloat(_9._m0[8u])), uintBitsToFloat(_9._m0[9u]), uintBitsToFloat(_9._m0[10u])));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp32_special.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %20 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
%float_2_4000001 = OpConstant %float 2.4000001
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpExtInst %float %20 Log2 %19
         %22 =   OpExtInst %float %20 Sin %21
         %23 =   OpExtInst %float %20 Sqrt %22
         %24 =   OpExtInst %float %20 Exp2 %23
         %25 =   OpExtInst %float %20 InverseSqrt %24
         %26 =   OpExtInst %float %20 Cos %25
         %27 =   OpExtInst %float %20 Pow %26 %float_2_4000001
         %29 =   OpBitcast %uint %27
         %30 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %30 %29 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp32_special.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = floatBitsToUint(pow(cos(inversesqrt(exp2(sqrt(sin(log2(uintBitsToFloat(_9._m0[0u]))))))), 2.400000095367431640625));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp64.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 90
; Schema: 0
               OpCapability Shader
               OpCapability Float64
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %22 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 64
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
     %double = OpTypeFloat 64
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
   %double_1 = OpConstant %double 1
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %88

         %88 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpFConvert %double %19
         %23 =   OpExtInst %double %22 FAbs %21
         %24 =   OpFNegate %double %23
         %26 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %27 =   OpLoad %uint %26
         %28 =   OpBitcast %float %27
         %29 =   OpFConvert %double %28
         %30 =   OpFAdd %double %24 %29
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %33 =   OpLoad %uint %32
         %34 =   OpBitcast %float %33
         %35 =   OpFConvert %double %34
         %36 =   OpFSub %double %30 %35
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %39 =   OpLoad %uint %38
         %40 =   OpBitcast %float %39
         %41 =   OpFConvert %double %40
         %42 =   OpFMul %double %36 %41
         %44 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %45 =   OpLoad %uint %44
         %46 =   OpBitcast %float %45
         %47 =   OpFConvert %double %46
         %49 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %50 =   OpLoad %uint %49
         %51 =   OpBitcast %float %50
         %52 =   OpFConvert %double %51
         %53 =   OpExtInst %double %22 Fma %42 %47 %52
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %56 =   OpLoad %uint %55
         %57 =   OpBitcast %float %56
         %58 =   OpFConvert %double %57
         %59 =   OpFDiv %double %53 %58
         %60 =   OpFDiv %double %double_1 %59
         %63 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %64 =   OpLoad %uint %63
         %65 =   OpBitcast %float %64
         %66 =   OpFConvert %double %65
         %67 =   OpExtInst %double %22 NMin %60 %66
         %69 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %70 =   OpLoad %uint %69
         %71 =   OpBitcast %float %70
         %72 =   OpFConvert %double %71
         %73 =   OpExtInst %double %22 NMax %67 %72
         %75 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %76 =   OpLoad %uint %75
         %77 =   OpBitcast %float %76
         %78 =   OpFConvert %double %77
         %80 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %81 =   OpLoad %uint %80
         %82 =   OpBitcast %float %81
         %83 =   OpFConvert %double %82
         %84 =   OpExtInst %double %22 NClamp %73 %78 %83
         %85 =   OpFConvert %float %84
         %86 =   OpBitcast %uint %85
         %87 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %87 %86 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp64.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

spirv_instruction(set = "GLSL.std.450", id = 79) double spvNMin(double, double);
spirv_instruction(set = "GLSL.std.450", id = 79) dvec2 spvNMin(dvec2, dvec2);
spirv_instruction(set = "GLSL.std.450", id = 79) dvec3 spvNMin(dvec3, dvec3);
spirv_instruction(set = "GLSL.std.450", id = 79) dvec4 spvNMin(dvec4, dvec4);
spirv_instruction(set = "GLSL.std.450", id = 80) double spvNMax(double, double);
spirv_instruction(set = "GLSL.std.450", id = 80) dvec2 spvNMax(dvec2, dvec2);
spirv_instruction(set = "GLSL.std.450", id = 80) dvec3 spvNMax(dvec3, dvec3);
spirv_instruction(set = "GLSL.std.450", id = 80) dvec4 spvNMax(dvec4, dvec4);
spirv_instruction(set = "GLSL.std.450", id = 81) double spvNClamp(double, double, double);
spirv_instruction(set = "GLSL.std.450", id = 81) dvec2 spvNClamp(dvec2, dvec2, dvec2);
spirv_instruction(set = "GLSL.std.450", id = 81) dvec3 spvNClamp(dvec3, dvec3, dvec3);
spirv_instruction(set = "GLSL.std.450", id = 81) dvec4 spvNClamp(dvec4, dvec4, dvec4);

void main()
{
    _13._m0[0u] = floatBitsToUint(float(spvNClamp(spvNMax(spvNMin(1.0lf / (fma((((-abs(double(uintBitsToFloat(_9._m0[0u])))) + double(uintBitsToFloat(_9._m0[1u]))) - double(uintBitsToFloat(_9._m0[2u]))) * double(uintBitsToFloat(_9._m0[3u])), double(uintBitsToFloat(_9._m0[4u])), double(uintBitsToFloat(_9._m0[5u]))) / double(uintBitsToFloat(_9._m0[6u]))), double(uintBitsToFloat(_9._m0[7u]))), double(uintBitsToFloat(_9._m0[8u]))), double(uintBitsToFloat(_9._m0[9u])), double(uintBitsToFloat(_9._m0[10u])))));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp64_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
               OpCapability Shader
               OpCapability Float64
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 64
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
      %float = OpTypeFloat 32
     %double = OpTypeFloat 64
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %54

         %54 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpBitcast %float %17
         %21 =   OpFConvert %double %19
         %23 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %24 =   OpLoad %uint %23
         %25 =   OpBitcast %float %24
         %26 =   OpFConvert %double %25
         %28 =   OpFOrdEqual %bool %21 %26
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %30 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %30 %29 NonPrivatePointer
         %31 =   OpFUnordNotEqual %bool %21 %26
         %32 =   OpSelect %uint %31 %uint_1 %uint_0
         %33 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %33 %32 NonPrivatePointer
         %34 =   OpFOrdLessThan %bool %21 %26
         %35 =   OpSelect %uint %34 %uint_1 %uint_0
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %37 %35 NonPrivatePointer
         %38 =   OpFOrdLessThanEqual %bool %21 %26
         %39 =   OpSelect %uint %38 %uint_1 %uint_0
         %41 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %41 %39 NonPrivatePointer
         %42 =   OpFOrdGreaterThan %bool %21 %26
         %43 =   OpSelect %uint %42 %uint_1 %uint_0
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %45 %43 NonPrivatePointer
         %46 =   OpFOrdGreaterThanEqual %bool %21 %26
         %47 =   OpSelect %uint %46 %uint_1 %uint_0
         %49 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %49 %47 NonPrivatePointer
         %50 =   OpIsNan %bool %21
         %51 =   OpSelect %uint %50 %uint_1 %uint_0
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %53 %51 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp64_compare.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    double _21 = double(uintBitsToFloat(_9._m0[0u]));
    double _26 = double(uintBitsToFloat(_9._m0[1u]));
    _13._m0[0u] = uint(_21 == _26);
    _13._m0[1u] = uint(_21 != _26);
    _13._m0[2u] = uint(_21 < _26);
    _13._m0[3u] = uint(_21 <= _26);
    _13._m0[4u] = uint(_21 > _26);
    _13._m0[5u] = uint(_21 >= _26);
    _13._m0[6u] = uint(isnan(_21));
}


================================================
FILE: reference-dxbc/test_arithmetic_fp64_packing.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
               OpCapability Shader
               OpCapability Float64
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %10 %14
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 64
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %_runtimearr_v2uint_0 ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 0
               OpDecorate %14 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
       %SSBO = OpTypeStruct %_runtimearr_v2uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
         %10 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_v2uint_0 = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %14 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
     %uint_1 = OpConstant %uint 1
     %double = OpTypeFloat 64
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %37

         %37 = OpLabel
         %17 =   OpAccessChain %_ptr_StorageBuffer_v2uint %10 %uint_0 %uint_0
         %18 =   OpLoad %v2uint %17
         %19 =   OpCompositeExtract %uint %18 0
         %20 =   OpCompositeExtract %uint %18 1
         %21 =   OpCompositeConstruct %v2uint %19 %20
         %23 =   OpAccessChain %_ptr_StorageBuffer_v2uint %10 %uint_0 %uint_1
         %24 =   OpLoad %v2uint %23
         %25 =   OpCompositeExtract %uint %24 0
         %26 =   OpCompositeExtract %uint %24 1
         %27 =   OpCompositeConstruct %v2uint %25 %26
         %29 =   OpBitcast %double %21
         %30 =   OpBitcast %double %27
         %31 =   OpFAdd %double %29 %30
         %32 =   OpBitcast %v2uint %31
         %33 =   OpCompositeExtract %uint %32 0
         %34 =   OpCompositeExtract %uint %32 1
         %35 =   OpCompositeConstruct %v2uint %33 %34
         %36 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %uint_0
                 OpStore %36 %35 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_fp64_packing.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0, std430) writeonly buffer _12_14
{
    uvec2 _m0[];
} _14;

void main()
{
    _14._m0[0u] = uvec2((uvec2(_10._m0[0u])) + (uvec2(_10._m0[1u])));
}


================================================
FILE: reference-dxbc/test_arithmetic_int_extended.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 76
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %10 %14
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %WideArithResult "WideArithResult"
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %_runtimearr_v2uint_0 ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 0
               OpDecorate %14 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
       %SSBO = OpTypeStruct %_runtimearr_v2uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
         %10 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_v2uint_0 = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %14 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
%WideArithResult = OpTypeStruct %uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %74

         %74 = OpLabel
         %17 =   OpAccessChain %_ptr_StorageBuffer_v2uint %10 %uint_0 %uint_0
         %18 =   OpLoad %v2uint %17
         %19 =   OpCompositeExtract %uint %18 0
         %20 =   OpCompositeExtract %uint %18 1
         %23 =   OpIAddCarry %WideArithResult %19 %20
         %24 =   OpCompositeExtract %uint %23 0
         %25 =   OpCompositeExtract %uint %23 1
         %26 =   OpCompositeConstruct %v2uint %24 %25
         %27 =   OpCompositeExtract %uint %26 0
         %28 =   OpCompositeExtract %uint %26 1
         %29 =   OpCompositeConstruct %v2uint %27 %28
         %30 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %uint_0
                 OpStore %30 %29 NonPrivatePointer
         %32 =   OpAccessChain %_ptr_StorageBuffer_v2uint %10 %uint_0 %uint_1
         %33 =   OpLoad %v2uint %32
         %34 =   OpCompositeExtract %uint %33 0
         %35 =   OpCompositeExtract %uint %33 1
         %37 =   OpISubBorrow %WideArithResult %34 %35
         %38 =   OpCompositeExtract %uint %37 0
         %39 =   OpCompositeExtract %uint %37 1
         %40 =   OpCompositeConstruct %v2uint %38 %39
         %41 =   OpCompositeExtract %uint %40 0
         %42 =   OpCompositeExtract %uint %40 1
         %43 =   OpCompositeConstruct %v2uint %41 %42
         %44 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %uint_1
                 OpStore %44 %43 NonPrivatePointer
         %46 =   OpAccessChain %_ptr_StorageBuffer_v2uint %10 %uint_0 %uint_2
         %47 =   OpLoad %v2uint %46
         %48 =   OpCompositeExtract %uint %47 0
         %49 =   OpCompositeExtract %uint %47 1
         %51 =   OpSMulExtended %WideArithResult %48 %49
         %52 =   OpCompositeExtract %uint %51 0
         %53 =   OpCompositeExtract %uint %51 1
         %54 =   OpCompositeConstruct %v2uint %52 %53
         %55 =   OpCompositeExtract %uint %54 0
         %56 =   OpCompositeExtract %uint %54 1
         %58 =   OpCompositeConstruct %v2uint %55 %56
         %59 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %uint_2
                 OpStore %59 %58 NonPrivatePointer
         %61 =   OpAccessChain %_ptr_StorageBuffer_v2uint %10 %uint_0 %uint_3
         %62 =   OpLoad %v2uint %61
         %63 =   OpCompositeExtract %uint %62 0
         %64 =   OpCompositeExtract %uint %62 1
         %66 =   OpUMulExtended %WideArithResult %63 %64
         %67 =   OpCompositeExtract %uint %66 0
         %68 =   OpCompositeExtract %uint %66 1
         %69 =   OpCompositeConstruct %v2uint %67 %68
         %70 =   OpCompositeExtract %uint %69 0
         %71 =   OpCompositeExtract %uint %69 1
         %72 =   OpCompositeConstruct %v2uint %70 %71
         %73 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %uint_3
                 OpStore %73 %72 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_int_extended.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

struct WideArithResult
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uvec2 _m0[];
} _10;

layout(set = 0, binding = 0, std430) writeonly buffer _12_14
{
    uvec2 _m0[];
} _14;

void main()
{
    WideArithResult _23;
    _23._m0 = uaddCarry(_10._m0[0u].x, _10._m0[0u].y, _23._m1);
    _14._m0[0u] = uvec2(uvec2(_23._m0, _23._m1));
    WideArithResult _37;
    _37._m0 = usubBorrow(_10._m0[1u].x, _10._m0[1u].y, _37._m1);
    _14._m0[1u] = uvec2(uvec2(_37._m0, _37._m1));
    WideArithResult _51;
    imulExtended(_10._m0[2u].x, _10._m0[2u].y, _51._m1, _51._m0);
    _14._m0[2u] = uvec2(uvec2(_51._m0, _51._m1));
    WideArithResult _66;
    umulExtended(_10._m0[3u].x, _10._m0[3u].y, _66._m1, _66._m0);
    _14._m0[3u] = uvec2(uvec2(_66._m0, _66._m1));
}


================================================
FILE: reference-dxbc/test_arithmetic_sint16_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
               OpCapability Shader
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %ushort = OpTypeInt 16 0
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %63

         %63 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpUConvert %ushort %17
         %21 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %22 =   OpLoad %uint %21
         %23 =   OpUConvert %ushort %22
         %25 =   OpIEqual %bool %19 %23
         %26 =   OpSelect %uint %25 %uint_1 %uint_0
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %27 %26 NonPrivatePointer
         %28 =   OpINotEqual %bool %19 %23
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %30 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %30 %29 NonPrivatePointer
         %31 =   OpSLessThan %bool %19 %23
         %32 =   OpSelect %uint %31 %uint_1 %uint_0
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %34 %32 NonPrivatePointer
         %35 =   OpSLessThanEqual %bool %19 %23
         %36 =   OpSelect %uint %35 %uint_1 %uint_0
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %38 %36 NonPrivatePointer
         %39 =   OpSGreaterThan %bool %19 %23
         %40 =   OpSelect %uint %39 %uint_1 %uint_0
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %42 %40 NonPrivatePointer
         %43 =   OpSGreaterThanEqual %bool %19 %23
         %44 =   OpSelect %uint %43 %uint_1 %uint_0
         %46 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %46 %44 NonPrivatePointer
         %47 =   OpULessThan %bool %19 %23
         %48 =   OpSelect %uint %47 %uint_1 %uint_0
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %50 %48 NonPrivatePointer
         %51 =   OpULessThanEqual %bool %19 %23
         %52 =   OpSelect %uint %51 %uint_1 %uint_0
         %54 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_7
                 OpStore %54 %52 NonPrivatePointer
         %55 =   OpUGreaterThan %bool %19 %23
         %56 =   OpSelect %uint %55 %uint_1 %uint_0
         %58 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_8
                 OpStore %58 %56 NonPrivatePointer
         %59 =   OpUGreaterThanEqual %bool %19 %23
         %60 =   OpSelect %uint %59 %uint_1 %uint_0
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_9
                 OpStore %62 %60 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_sint16_compare.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    uint16_t _19 = uint16_t(_9._m0[0u]);
    uint16_t _23 = uint16_t(_9._m0[1u]);
    _13._m0[0u] = uint(_19 == _23);
    _13._m0[1u] = uint(_19 != _23);
    _13._m0[2u] = uint(int16_t(_19) < int16_t(_23));
    _13._m0[3u] = uint(int16_t(_19) <= int16_t(_23));
    _13._m0[4u] = uint(int16_t(_19) > int16_t(_23));
    _13._m0[5u] = uint(int16_t(_19) >= int16_t(_23));
    _13._m0[6u] = uint(_19 < _23);
    _13._m0[7u] = uint(_19 <= _23);
    _13._m0[8u] = uint(_19 > _23);
    _13._m0[9u] = uint(_19 >= _23);
}


================================================
FILE: reference-dxbc/test_arithmetic_sint16_scalar.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 92
; Schema: 0
               OpCapability Shader
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %47 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %ushort = OpTypeInt 16 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %90

         %90 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpUConvert %ushort %17
         %21 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %22 =   OpLoad %uint %21
         %23 =   OpUConvert %ushort %22
         %24 =   OpBitwiseAnd %ushort %19 %23
         %26 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %27 =   OpLoad %uint %26
         %28 =   OpUConvert %ushort %27
         %29 =   OpBitwiseOr %ushort %24 %28
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %32 =   OpLoad %uint %31
         %33 =   OpUConvert %ushort %32
         %34 =   OpBitwiseXor %ushort %29 %33
         %35 =   OpNot %ushort %34
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %38 =   OpLoad %uint %37
         %39 =   OpUConvert %ushort %38
         %40 =   OpIAdd %ushort %35 %39
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %43 =   OpLoad %uint %42
         %44 =   OpUConvert %ushort %43
         %45 =   OpISub %ushort %40 %44
         %46 =   OpSNegate %ushort %45
         %48 =   OpExtInst %ushort %47 SAbs %46
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %51 =   OpLoad %uint %50
         %52 =   OpUConvert %ushort %51
         %53 =   OpIMul %ushort %48 %52
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %56 =   OpLoad %uint %55
         %57 =   OpUConvert %ushort %56
         %58 =   OpShiftLeftLogical %ushort %53 %57
         %60 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %61 =   OpLoad %uint %60
         %62 =   OpUConvert %ushort %61
         %63 =   OpShiftRightArithmetic %ushort %58 %62
         %65 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %66 =   OpLoad %uint %65
         %67 =   OpUConvert %ushort %66
         %68 =   OpShiftRightLogical %ushort %63 %67
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %71 =   OpLoad %uint %70
         %72 =   OpUConvert %ushort %71
         %73 =   OpExtInst %ushort %47 SMin %68 %72
         %75 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %76 =   OpLoad %uint %75
         %77 =   OpUConvert %ushort %76
         %78 =   OpExtInst %ushort %47 SMax %73 %77
         %80 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %81 =   OpLoad %uint %80
         %82 =   OpUConvert %ushort %81
         %84 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
         %85 =   OpLoad %uint %84
         %86 =   OpUConvert %ushort %85
         %87 =   OpExtInst %ushort %47 SClamp %78 %82 %86
         %88 =   OpSConvert %uint %87
         %89 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %89 %88 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_sint16_scalar.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = uint(int16_t(uint16_t(clamp(int16_t(uint16_t(max(int16_t(uint16_t(min(int16_t(uint16_t(int16_t((uint16_t(abs(int16_t(-(((~(((uint16_t(_9._m0[0u]) & uint16_t(_9._m0[1u])) | uint16_t(_9._m0[2u])) ^ uint16_t(_9._m0[3u]))) + uint16_t(_9._m0[4u])) - uint16_t(_9._m0[5u]))))) * uint16_t(_9._m0[6u])) << uint16_t(_9._m0[7u])) >> int16_t(uint16_t(_9._m0[8u]))) >> uint16_t(_9._m0[9u])), int16_t(uint16_t(_9._m0[10u]))))), int16_t(uint16_t(_9._m0[11u]))))), int16_t(uint16_t(_9._m0[12u])), int16_t(uint16_t(_9._m0[13u]))))));
}


================================================
FILE: reference-dxbc/test_arithmetic_sint16_vector.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 93
; Schema: 0
               OpCapability Shader
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %48 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %ushort = OpTypeInt 16 0
   %v2ushort = OpTypeVector %ushort 2
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %91

         %91 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %20 =   OpBitcast %v2ushort %17
         %22 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %23 =   OpLoad %uint %22
         %24 =   OpBitcast %v2ushort %23
         %25 =   OpBitwiseAnd %v2ushort %20 %24
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %28 =   OpLoad %uint %27
         %29 =   OpBitcast %v2ushort %28
         %30 =   OpBitwiseOr %v2ushort %25 %29
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %33 =   OpLoad %uint %32
         %34 =   OpBitcast %v2ushort %33
         %35 =   OpBitwiseXor %v2ushort %30 %34
         %36 =   OpNot %v2ushort %35
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %39 =   OpLoad %uint %38
         %40 =   OpBitcast %v2ushort %39
         %41 =   OpIAdd %v2ushort %36 %40
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %44 =   OpLoad %uint %43
         %45 =   OpBitcast %v2ushort %44
         %46 =   OpISub %v2ushort %41 %45
         %47 =   OpSNegate %v2ushort %46
         %49 =   OpExtInst %v2ushort %48 SAbs %47
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %52 =   OpLoad %uint %51
         %53 =   OpBitcast %v2ushort %52
         %54 =   OpIMul %v2ushort %49 %53
         %56 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %57 =   OpLoad %uint %56
         %58 =   OpBitcast %v2ushort %57
         %59 =   OpShiftLeftLogical %v2ushort %54 %58
         %61 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %62 =   OpLoad %uint %61
         %63 =   OpBitcast %v2ushort %62
         %64 =   OpShiftRightArithmetic %v2ushort %59 %63
         %66 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %67 =   OpLoad %uint %66
         %68 =   OpBitcast %v2ushort %67
         %69 =   OpShiftRightLogical %v2ushort %64 %68
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %72 =   OpLoad %uint %71
         %73 =   OpBitcast %v2ushort %72
         %74 =   OpExtInst %v2ushort %48 SMin %69 %73
         %76 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %77 =   OpLoad %uint %76
         %78 =   OpBitcast %v2ushort %77
         %79 =   OpExtInst %v2ushort %48 SMax %74 %78
         %81 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %82 =   OpLoad %uint %81
         %83 =   OpBitcast %v2ushort %82
         %85 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
         %86 =   OpLoad %uint %85
         %87 =   OpBitcast %v2ushort %86
         %88 =   OpExtInst %v2ushort %48 SClamp %79 %83 %87
         %89 =   OpBitcast %uint %88
         %90 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %90 %89 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_sint16_vector.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = packUint2x16(u16vec2(clamp(i16vec2(u16vec2(max(i16vec2(u16vec2(min(i16vec2(u16vec2(i16vec2((u16vec2(abs(i16vec2(-(((~(((unpackUint2x16(_9._m0[0u]) & unpackUint2x16(_9._m0[1u])) | unpackUint2x16(_9._m0[2u])) ^ unpackUint2x16(_9._m0[3u]))) + unpackUint2x16(_9._m0[4u])) - unpackUint2x16(_9._m0[5u]))))) * unpackUint2x16(_9._m0[6u])) << unpackUint2x16(_9._m0[7u])) >> i16vec2(unpackUint2x16(_9._m0[8u]))) >> unpackUint2x16(_9._m0[9u])), i16vec2(unpackUint2x16(_9._m0[10u]))))), i16vec2(unpackUint2x16(_9._m0[11u]))))), i16vec2(unpackUint2x16(_9._m0[12u])), i16vec2(unpackUint2x16(_9._m0[13u])))));
}


================================================
FILE: reference-dxbc/test_arithmetic_sint32.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 97
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %40 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
    %uint_14 = OpConstant %uint 14
    %uint_15 = OpConstant %uint 15
    %uint_16 = OpConstant %uint 16
    %uint_17 = OpConstant %uint 17
    %uint_18 = OpConstant %uint 18
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %95

         %95 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %20 =   OpLoad %uint %19
         %21 =   OpBitwiseAnd %uint %17 %20
         %23 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %24 =   OpLoad %uint %23
         %25 =   OpBitwiseOr %uint %21 %24
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %28 =   OpLoad %uint %27
         %29 =   OpBitwiseXor %uint %25 %28
         %30 =   OpNot %uint %29
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %33 =   OpLoad %uint %32
         %34 =   OpIAdd %uint %30 %33
         %36 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %37 =   OpLoad %uint %36
         %38 =   OpISub %uint %34 %37
         %39 =   OpSNegate %uint %38
         %41 =   OpExtInst %uint %40 SAbs %39
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %44 =   OpLoad %uint %43
         %45 =   OpIMul %uint %41 %44
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %48 =   OpLoad %uint %47
         %49 =   OpShiftLeftLogical %uint %45 %48
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %52 =   OpLoad %uint %51
         %53 =   OpShiftRightArithmetic %uint %49 %52
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %56 =   OpLoad %uint %55
         %57 =   OpShiftRightLogical %uint %53 %56
         %59 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %60 =   OpLoad %uint %59
         %61 =   OpExtInst %uint %40 SMin %57 %60
         %63 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %64 =   OpLoad %uint %63
         %65 =   OpExtInst %uint %40 SMax %61 %64
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %68 =   OpLoad %uint %67
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
         %71 =   OpLoad %uint %70
         %72 =   OpExtInst %uint %40 SClamp %65 %68 %71
         %74 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_14
         %75 =   OpLoad %uint %74
         %77 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_15
         %78 =   OpLoad %uint %77
         %80 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_16
         %81 =   OpLoad %uint %80
         %82 =   OpBitFieldInsert %uint %72 %75 %78 %81
         %84 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_17
         %85 =   OpLoad %uint %84
         %87 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_18
         %88 =   OpLoad %uint %87
         %89 =   OpBitFieldSExtract %uint %82 %85 %88
         %90 =   OpBitCount %uint %89
         %91 =   OpBitReverse %uint %90
         %92 =   OpExtInst %uint %40 FindILsb %91
         %93 =   OpExtInst %uint %40 FindSMsb %92
         %94 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %94 %93 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_sint32.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = uint(findMSB(int(uint(findLSB(bitfieldReverse(uint(bitCount(uint(bitfieldExtract(int(bitfieldInsert(uint(clamp(int(uint(max(int(uint(min(int(uint(int((uint(abs(int(-(((~(((_9._m0[0u] & _9._m0[1u]) | _9._m0[2u]) ^ _9._m0[3u])) + _9._m0[4u]) - _9._m0[5u])))) * _9._m0[6u]) << _9._m0[7u]) >> int(_9._m0[8u])) >> _9._m0[9u]), int(_9._m0[10u])))), int(_9._m0[11u])))), int(_9._m0[12u]), int(_9._m0[13u]))), _9._m0[14u], int(_9._m0[15u]), int(_9._m0[16u]))), int(_9._m0[17u]), int(_9._m0[18u])))))))))));
}


================================================
FILE: reference-dxbc/test_arithmetic_sint32_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %60

         %60 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %20 =   OpLoad %uint %19
         %22 =   OpIEqual %bool %17 %20
         %23 =   OpSelect %uint %22 %uint_1 %uint_0
         %24 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %24 %23 NonPrivatePointer
         %25 =   OpINotEqual %bool %17 %20
         %26 =   OpSelect %uint %25 %uint_1 %uint_0
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %27 %26 NonPrivatePointer
         %28 =   OpSLessThan %bool %17 %20
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %31 %29 NonPrivatePointer
         %32 =   OpSLessThanEqual %bool %17 %20
         %33 =   OpSelect %uint %32 %uint_1 %uint_0
         %35 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %35 %33 NonPrivatePointer
         %36 =   OpSGreaterThan %bool %17 %20
         %37 =   OpSelect %uint %36 %uint_1 %uint_0
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %39 %37 NonPrivatePointer
         %40 =   OpSGreaterThanEqual %bool %17 %20
         %41 =   OpSelect %uint %40 %uint_1 %uint_0
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %43 %41 NonPrivatePointer
         %44 =   OpULessThan %bool %17 %20
         %45 =   OpSelect %uint %44 %uint_1 %uint_0
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %47 %45 NonPrivatePointer
         %48 =   OpULessThanEqual %bool %17 %20
         %49 =   OpSelect %uint %48 %uint_1 %uint_0
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_7
                 OpStore %51 %49 NonPrivatePointer
         %52 =   OpUGreaterThan %bool %17 %20
         %53 =   OpSelect %uint %52 %uint_1 %uint_0
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_8
                 OpStore %55 %53 NonPrivatePointer
         %56 =   OpUGreaterThanEqual %bool %17 %20
         %57 =   OpSelect %uint %56 %uint_1 %uint_0
         %59 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_9
                 OpStore %59 %57 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_sint32_compare.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = uint(_9._m0[0u] == _9._m0[1u]);
    _13._m0[1u] = uint(_9._m0[0u] != _9._m0[1u]);
    _13._m0[2u] = uint(int(_9._m0[0u]) < int(_9._m0[1u]));
    _13._m0[3u] = uint(int(_9._m0[0u]) <= int(_9._m0[1u]));
    _13._m0[4u] = uint(int(_9._m0[0u]) > int(_9._m0[1u]));
    _13._m0[5u] = uint(int(_9._m0[0u]) >= int(_9._m0[1u]));
    _13._m0[6u] = uint(_9._m0[0u] < _9._m0[1u]);
    _13._m0[7u] = uint(_9._m0[0u] <= _9._m0[1u]);
    _13._m0[8u] = uint(_9._m0[0u] > _9._m0[1u]);
    _13._m0[9u] = uint(_9._m0[0u] >= _9._m0[1u]);
}


================================================
FILE: reference-dxbc/test_arithmetic_uint16_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
               OpCapability Shader
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %ushort = OpTypeInt 16 0
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %63

         %63 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpUConvert %ushort %17
         %21 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %22 =   OpLoad %uint %21
         %23 =   OpUConvert %ushort %22
         %25 =   OpIEqual %bool %19 %23
         %26 =   OpSelect %uint %25 %uint_1 %uint_0
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %27 %26 NonPrivatePointer
         %28 =   OpINotEqual %bool %19 %23
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %30 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %30 %29 NonPrivatePointer
         %31 =   OpSLessThan %bool %19 %23
         %32 =   OpSelect %uint %31 %uint_1 %uint_0
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %34 %32 NonPrivatePointer
         %35 =   OpSLessThanEqual %bool %19 %23
         %36 =   OpSelect %uint %35 %uint_1 %uint_0
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %38 %36 NonPrivatePointer
         %39 =   OpSGreaterThan %bool %19 %23
         %40 =   OpSelect %uint %39 %uint_1 %uint_0
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %42 %40 NonPrivatePointer
         %43 =   OpSGreaterThanEqual %bool %19 %23
         %44 =   OpSelect %uint %43 %uint_1 %uint_0
         %46 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %46 %44 NonPrivatePointer
         %47 =   OpULessThan %bool %19 %23
         %48 =   OpSelect %uint %47 %uint_1 %uint_0
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %50 %48 NonPrivatePointer
         %51 =   OpULessThanEqual %bool %19 %23
         %52 =   OpSelect %uint %51 %uint_1 %uint_0
         %54 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_7
                 OpStore %54 %52 NonPrivatePointer
         %55 =   OpUGreaterThan %bool %19 %23
         %56 =   OpSelect %uint %55 %uint_1 %uint_0
         %58 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_8
                 OpStore %58 %56 NonPrivatePointer
         %59 =   OpUGreaterThanEqual %bool %19 %23
         %60 =   OpSelect %uint %59 %uint_1 %uint_0
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_9
                 OpStore %62 %60 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_uint16_compare.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    uint16_t _19 = uint16_t(_9._m0[0u]);
    uint16_t _23 = uint16_t(_9._m0[1u]);
    _13._m0[0u] = uint(_19 == _23);
    _13._m0[1u] = uint(_19 != _23);
    _13._m0[2u] = uint(int16_t(_19) < int16_t(_23));
    _13._m0[3u] = uint(int16_t(_19) <= int16_t(_23));
    _13._m0[4u] = uint(int16_t(_19) > int16_t(_23));
    _13._m0[5u] = uint(int16_t(_19) >= int16_t(_23));
    _13._m0[6u] = uint(_19 < _23);
    _13._m0[7u] = uint(_19 <= _23);
    _13._m0[8u] = uint(_19 > _23);
    _13._m0[9u] = uint(_19 >= _23);
}


================================================
FILE: reference-dxbc/test_arithmetic_uint16_scalar.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 102
; Schema: 0
               OpCapability Shader
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %47 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %ushort = OpTypeInt 16 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
    %uint_14 = OpConstant %uint 14
    %uint_15 = OpConstant %uint 15
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %100

        %100 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpUConvert %ushort %17
         %21 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %22 =   OpLoad %uint %21
         %23 =   OpUConvert %ushort %22
         %24 =   OpBitwiseAnd %ushort %19 %23
         %26 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %27 =   OpLoad %uint %26
         %28 =   OpUConvert %ushort %27
         %29 =   OpBitwiseOr %ushort %24 %28
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %32 =   OpLoad %uint %31
         %33 =   OpUConvert %ushort %32
         %34 =   OpBitwiseXor %ushort %29 %33
         %35 =   OpNot %ushort %34
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %38 =   OpLoad %uint %37
         %39 =   OpUConvert %ushort %38
         %40 =   OpIAdd %ushort %35 %39
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %43 =   OpLoad %uint %42
         %44 =   OpUConvert %ushort %43
         %45 =   OpISub %ushort %40 %44
         %46 =   OpSNegate %ushort %45
         %48 =   OpExtInst %ushort %47 SAbs %46
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %51 =   OpLoad %uint %50
         %52 =   OpUConvert %ushort %51
         %53 =   OpIMul %ushort %48 %52
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %56 =   OpLoad %uint %55
         %57 =   OpUConvert %ushort %56
         %58 =   OpShiftLeftLogical %ushort %53 %57
         %60 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %61 =   OpLoad %uint %60
         %62 =   OpUConvert %ushort %61
         %63 =   OpShiftRightArithmetic %ushort %58 %62
         %65 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %66 =   OpLoad %uint %65
         %67 =   OpUConvert %ushort %66
         %68 =   OpShiftRightLogical %ushort %63 %67
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %71 =   OpLoad %uint %70
         %72 =   OpUConvert %ushort %71
         %73 =   OpUDiv %ushort %68 %72
         %75 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %76 =   OpLoad %uint %75
         %77 =   OpUConvert %ushort %76
         %78 =   OpUMod %ushort %73 %77
         %80 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %81 =   OpLoad %uint %80
         %82 =   OpUConvert %ushort %81
         %83 =   OpExtInst %ushort %47 UMin %78 %82
         %85 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
         %86 =   OpLoad %uint %85
         %87 =   OpUConvert %ushort %86
         %88 =   OpExtInst %ushort %47 UMax %83 %87
         %90 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_14
         %91 =   OpLoad %uint %90
         %92 =   OpUConvert %ushort %91
         %94 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_15
         %95 =   OpLoad %uint %94
         %96 =   OpUConvert %ushort %95
         %97 =   OpExtInst %ushort %47 UClamp %88 %92 %96
         %98 =   OpUConvert %uint %97
         %99 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %99 %98 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_uint16_scalar.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    uint _98 = uint(clamp(max(min((((uint16_t(int16_t((uint16_t(abs(int16_t(-(((~(((uint16_t(_9._m0[0u]) & uint16_t(_9._m0[1u])) | uint16_t(_9._m0[2u])) ^ uint16_t(_9._m0[3u]))) + uint16_t(_9._m0[4u])) - uint16_t(_9._m0[5u]))))) * uint16_t(_9._m0[6u])) << uint16_t(_9._m0[7u])) >> int16_t(uint16_t(_9._m0[8u]))) >> uint16_t(_9._m0[9u])) / uint16_t(_9._m0[10u])) % uint16_t(_9._m0[11u])), uint16_t(_9._m0[12u])), uint16_t(_9._m0[13u])), uint16_t(_9._m0[14u]), uint16_t(_9._m0[15u])));
    _13._m0[0u] = _98;
}


================================================
FILE: reference-dxbc/test_arithmetic_uint16_vector.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 103
; Schema: 0
               OpCapability Shader
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %48 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %ushort = OpTypeInt 16 0
   %v2ushort = OpTypeVector %ushort 2
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
    %uint_14 = OpConstant %uint 14
    %uint_15 = OpConstant %uint 15
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %101

        %101 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %20 =   OpBitcast %v2ushort %17
         %22 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %23 =   OpLoad %uint %22
         %24 =   OpBitcast %v2ushort %23
         %25 =   OpBitwiseAnd %v2ushort %20 %24
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %28 =   OpLoad %uint %27
         %29 =   OpBitcast %v2ushort %28
         %30 =   OpBitwiseOr %v2ushort %25 %29
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %33 =   OpLoad %uint %32
         %34 =   OpBitcast %v2ushort %33
         %35 =   OpBitwiseXor %v2ushort %30 %34
         %36 =   OpNot %v2ushort %35
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %39 =   OpLoad %uint %38
         %40 =   OpBitcast %v2ushort %39
         %41 =   OpIAdd %v2ushort %36 %40
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %44 =   OpLoad %uint %43
         %45 =   OpBitcast %v2ushort %44
         %46 =   OpISub %v2ushort %41 %45
         %47 =   OpSNegate %v2ushort %46
         %49 =   OpExtInst %v2ushort %48 SAbs %47
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %52 =   OpLoad %uint %51
         %53 =   OpBitcast %v2ushort %52
         %54 =   OpIMul %v2ushort %49 %53
         %56 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %57 =   OpLoad %uint %56
         %58 =   OpBitcast %v2ushort %57
         %59 =   OpShiftLeftLogical %v2ushort %54 %58
         %61 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %62 =   OpLoad %uint %61
         %63 =   OpBitcast %v2ushort %62
         %64 =   OpShiftRightArithmetic %v2ushort %59 %63
         %66 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %67 =   OpLoad %uint %66
         %68 =   OpBitcast %v2ushort %67
         %69 =   OpShiftRightLogical %v2ushort %64 %68
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %72 =   OpLoad %uint %71
         %73 =   OpBitcast %v2ushort %72
         %74 =   OpUDiv %v2ushort %69 %73
         %76 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %77 =   OpLoad %uint %76
         %78 =   OpBitcast %v2ushort %77
         %79 =   OpUMod %v2ushort %74 %78
         %81 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %82 =   OpLoad %uint %81
         %83 =   OpBitcast %v2ushort %82
         %84 =   OpExtInst %v2ushort %48 UMin %79 %83
         %86 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
         %87 =   OpLoad %uint %86
         %88 =   OpBitcast %v2ushort %87
         %89 =   OpExtInst %v2ushort %48 UMax %84 %88
         %91 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_14
         %92 =   OpLoad %uint %91
         %93 =   OpBitcast %v2ushort %92
         %95 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_15
         %96 =   OpLoad %uint %95
         %97 =   OpBitcast %v2ushort %96
         %98 =   OpExtInst %v2ushort %48 UClamp %89 %93 %97
         %99 =   OpBitcast %uint %98
        %100 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %100 %99 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_uint16_vector.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    uint _99 = packUint2x16(clamp(max(min((((u16vec2(i16vec2((u16vec2(abs(i16vec2(-(((~(((unpackUint2x16(_9._m0[0u]) & unpackUint2x16(_9._m0[1u])) | unpackUint2x16(_9._m0[2u])) ^ unpackUint2x16(_9._m0[3u]))) + unpackUint2x16(_9._m0[4u])) - unpackUint2x16(_9._m0[5u]))))) * unpackUint2x16(_9._m0[6u])) << unpackUint2x16(_9._m0[7u])) >> i16vec2(unpackUint2x16(_9._m0[8u]))) >> unpackUint2x16(_9._m0[9u])) / unpackUint2x16(_9._m0[10u])) % unpackUint2x16(_9._m0[11u])), unpackUint2x16(_9._m0[12u])), unpackUint2x16(_9._m0[13u])), unpackUint2x16(_9._m0[14u]), unpackUint2x16(_9._m0[15u])));
    _13._m0[0u] = _99;
}


================================================
FILE: reference-dxbc/test_arithmetic_uint32.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 105
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %40 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
    %uint_14 = OpConstant %uint 14
    %uint_15 = OpConstant %uint 15
    %uint_16 = OpConstant %uint 16
    %uint_17 = OpConstant %uint 17
    %uint_18 = OpConstant %uint 18
    %uint_19 = OpConstant %uint 19
    %uint_20 = OpConstant %uint 20
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %103

        %103 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %20 =   OpLoad %uint %19
         %21 =   OpBitwiseAnd %uint %17 %20
         %23 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_2
         %24 =   OpLoad %uint %23
         %25 =   OpBitwiseOr %uint %21 %24
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_3
         %28 =   OpLoad %uint %27
         %29 =   OpBitwiseXor %uint %25 %28
         %30 =   OpNot %uint %29
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_4
         %33 =   OpLoad %uint %32
         %34 =   OpIAdd %uint %30 %33
         %36 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_5
         %37 =   OpLoad %uint %36
         %38 =   OpISub %uint %34 %37
         %39 =   OpSNegate %uint %38
         %41 =   OpExtInst %uint %40 SAbs %39
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_6
         %44 =   OpLoad %uint %43
         %45 =   OpIMul %uint %41 %44
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %48 =   OpLoad %uint %47
         %49 =   OpShiftLeftLogical %uint %45 %48
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_8
         %52 =   OpLoad %uint %51
         %53 =   OpShiftRightArithmetic %uint %49 %52
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_9
         %56 =   OpLoad %uint %55
         %57 =   OpShiftRightLogical %uint %53 %56
         %59 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_10
         %60 =   OpLoad %uint %59
         %61 =   OpUDiv %uint %57 %60
         %63 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_11
         %64 =   OpLoad %uint %63
         %65 =   OpUMod %uint %61 %64
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_12
         %68 =   OpLoad %uint %67
         %69 =   OpExtInst %uint %40 UMin %65 %68
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_13
         %72 =   OpLoad %uint %71
         %73 =   OpExtInst %uint %40 UMax %69 %72
         %75 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_14
         %76 =   OpLoad %uint %75
         %78 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_15
         %79 =   OpLoad %uint %78
         %80 =   OpExtInst %uint %40 UClamp %73 %76 %79
         %82 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_16
         %83 =   OpLoad %uint %82
         %85 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_17
         %86 =   OpLoad %uint %85
         %88 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_18
         %89 =   OpLoad %uint %88
         %90 =   OpBitFieldInsert %uint %80 %83 %86 %89
         %92 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_19
         %93 =   OpLoad %uint %92
         %95 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_20
         %96 =   OpLoad %uint %95
         %97 =   OpBitFieldUExtract %uint %90 %93 %96
         %98 =   OpBitCount %uint %97
         %99 =   OpBitReverse %uint %98
        %100 =   OpExtInst %uint %40 FindILsb %99
        %101 =   OpExtInst %uint %40 FindUMsb %100
        %102 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %102 %101 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_uint32.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = uint(findMSB(uint(findLSB(bitfieldReverse(uint(bitCount(bitfieldExtract(bitfieldInsert(clamp(max(min((((uint(int((uint(abs(int(-(((~(((_9._m0[0u] & _9._m0[1u]) | _9._m0[2u]) ^ _9._m0[3u])) + _9._m0[4u]) - _9._m0[5u])))) * _9._m0[6u]) << _9._m0[7u]) >> int(_9._m0[8u])) >> _9._m0[9u]) / _9._m0[10u]) % _9._m0[11u]), _9._m0[12u]), _9._m0[13u]), _9._m0[14u], _9._m0[15u]), _9._m0[16u], int(_9._m0[17u]), int(_9._m0[18u])), int(_9._m0[19u]), int(_9._m0[20u])))))))));
}


================================================
FILE: reference-dxbc/test_arithmetic_uint32_compare.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_1 = OpConstant %uint 1
       %bool = OpTypeBool
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %60

         %60 = OpLabel
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_0
         %17 =   OpLoad %uint %16
         %19 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_1
         %20 =   OpLoad %uint %19
         %22 =   OpIEqual %bool %17 %20
         %23 =   OpSelect %uint %22 %uint_1 %uint_0
         %24 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_0
                 OpStore %24 %23 NonPrivatePointer
         %25 =   OpINotEqual %bool %17 %20
         %26 =   OpSelect %uint %25 %uint_1 %uint_0
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_1
                 OpStore %27 %26 NonPrivatePointer
         %28 =   OpSLessThan %bool %17 %20
         %29 =   OpSelect %uint %28 %uint_1 %uint_0
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_2
                 OpStore %31 %29 NonPrivatePointer
         %32 =   OpSLessThanEqual %bool %17 %20
         %33 =   OpSelect %uint %32 %uint_1 %uint_0
         %35 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_3
                 OpStore %35 %33 NonPrivatePointer
         %36 =   OpSGreaterThan %bool %17 %20
         %37 =   OpSelect %uint %36 %uint_1 %uint_0
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_4
                 OpStore %39 %37 NonPrivatePointer
         %40 =   OpSGreaterThanEqual %bool %17 %20
         %41 =   OpSelect %uint %40 %uint_1 %uint_0
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_5
                 OpStore %43 %41 NonPrivatePointer
         %44 =   OpULessThan %bool %17 %20
         %45 =   OpSelect %uint %44 %uint_1 %uint_0
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_6
                 OpStore %47 %45 NonPrivatePointer
         %48 =   OpULessThanEqual %bool %17 %20
         %49 =   OpSelect %uint %48 %uint_1 %uint_0
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_7
                 OpStore %51 %49 NonPrivatePointer
         %52 =   OpUGreaterThan %bool %17 %20
         %53 =   OpSelect %uint %52 %uint_1 %uint_0
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_8
                 OpStore %55 %53 NonPrivatePointer
         %56 =   OpUGreaterThanEqual %bool %17 %20
         %57 =   OpSelect %uint %56 %uint_1 %uint_0
         %59 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %uint_9
                 OpStore %59 %57 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_arithmetic_uint32_compare.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

void main()
{
    _13._m0[0u] = uint(_9._m0[0u] == _9._m0[1u]);
    _13._m0[1u] = uint(_9._m0[0u] != _9._m0[1u]);
    _13._m0[2u] = uint(int(_9._m0[0u]) < int(_9._m0[1u]));
    _13._m0[3u] = uint(int(_9._m0[0u]) <= int(_9._m0[1u]));
    _13._m0[4u] = uint(int(_9._m0[0u]) > int(_9._m0[1u]));
    _13._m0[5u] = uint(int(_9._m0[0u]) >= int(_9._m0[1u]));
    _13._m0[6u] = uint(_9._m0[0u] < _9._m0[1u]);
    _13._m0[7u] = uint(_9._m0[0u] <= _9._m0[1u]);
    _13._m0[8u] = uint(_9._m0[0u] > _9._m0[1u]);
    _13._m0[9u] = uint(_9._m0[0u] >= _9._m0[1u]);
}


================================================
FILE: reference-dxbc/test_cfg_if.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Input_float = OpTypePointer Input %float
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_0 = OpConstant %float 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %24

         %24 = OpLabel
         %12 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %15 =   OpLoad %float %12
         %16 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_3
         %18 =   OpLoad %float %16
         %20 =   OpFUnordNotEqual %bool %18 %float_0
                 OpSelectionMerge %26 None
                 OpBranchConditional %20 %25 %26

         %25 =     OpLabel
         %22 =       OpFDiv %float %15 %18
                     OpBranch %26

         %26 = OpLabel
         %23 =   OpPhi %float %15 %24 %22 %25
                 OpStore %SV_TARGET %23
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_cfg_if.glsl
================================================
GLSL:
#version 460

layout(location = 0) out float SV_TARGET;

void main()
{
    float _23;
    if (gl_FragCoord.w != 0.0)
    {
        _23 = gl_FragCoord.z / gl_FragCoord.w;
    }
    else
    {
        _23 = gl_FragCoord.z;
    }
    SV_TARGET = _23;
}


================================================
FILE: reference-dxbc/test_cfg_if_else.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Input_float = OpTypePointer Input %float
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %24

         %24 = OpLabel
         %12 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %15 =   OpLoad %float %12
         %16 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_3
         %18 =   OpLoad %float %16
         %20 =   OpFOrdGreaterThan %bool %15 %18
                 OpSelectionMerge %27 None
                 OpBranchConditional %20 %26 %25

         %26 =     OpLabel
         %21 =       OpFDiv %float %15 %18
                     OpBranch %27

         %25 =     OpLabel
         %22 =       OpFDiv %float %18 %15
                     OpBranch %27

         %27 = OpLabel
         %23 =   OpPhi %float %21 %26 %22 %25
                 OpStore %SV_TARGET %23
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_cfg_if_else.glsl
================================================
GLSL:
#version 460

layout(location = 0) out float SV_TARGET;

void main()
{
    float _23;
    if (gl_FragCoord.z > gl_FragCoord.w)
    {
        _23 = gl_FragCoord.z / gl_FragCoord.w;
    }
    else
    {
        _23 = gl_FragCoord.w / gl_FragCoord.z;
    }
    SV_TARGET = _23;
}


================================================
FILE: reference-dxbc/test_cfg_loop_infinite.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Input_float = OpTypePointer Input %float
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
       %bool = OpTypeBool
%float_10000 = OpConstant %float 10000
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %22

         %22 = OpLabel
         %12 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_0
         %15 =   OpLoad %float %12
                 OpBranch %23

         %23 = OpLabel
         %16 =   OpPhi %float %15 %22 %17 %26
                 OpLoopMerge %28 %26 None
                 OpBranch %24

         %24 =     OpLabel
         %17 =       OpFAdd %float %16 %float_1
         %20 =       OpFOrdGreaterThanEqual %bool %17 %float_10000
                     OpSelectionMerge %25 None
                     OpBranchConditional %20 %27 %25

         %27 =         OpLabel
                         OpStore %SV_TARGET %17
                         OpReturn

         %25 =     OpLabel
                     OpBranch %26

         %26 =   OpLabel
                   OpBranch %23

         %28 = OpLabel
                 OpUnreachable
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_cfg_loop_infinite.glsl
================================================
GLSL:
#version 460

layout(location = 0) out float SV_TARGET;

void main()
{
    float _17;
    float _16 = gl_FragCoord.x;
    for (;;)
    {
        _17 = _16 + 1.0;
        if (_17 >= 10000.0)
        {
            SV_TARGET = _17;
            return;
        }
        _16 = _17;
        continue;
    }
}


================================================
FILE: reference-dxbc/test_cfg_loop_once.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 24
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Input_float = OpTypePointer Input %float
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %18

         %18 = OpLabel
         %12 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_0
         %15 =   OpLoad %float %12
                 OpBranch %19

         %19 = OpLabel
                 OpLoopMerge %21 %22 None
                 OpBranch %20

         %20 =     OpLabel
         %16 =       OpFAdd %float %15 %float_1
                     OpBranch %21

         %22 =   OpLabel
                   OpBranch %19

         %21 = OpLabel
                 OpStore %SV_TARGET %16
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_cfg_loop_once.glsl
================================================
GLSL:
#version 460

layout(location = 0) out float SV_TARGET;

void main()
{
    float _16;
    for (;;)
    {
        _16 = gl_FragCoord.x + 1.0;
        break;
    }
    SV_TARGET = _16;
}


================================================
FILE: reference-dxbc/test_cfg_switch_complex.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %SEL %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SEL "SEL"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %SEL Flat
               OpDecorate %SEL Location 1
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
        %int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
        %SEL = OpVariable %_ptr_Input_int Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
    %float_0 = OpConstant %float 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
   %float_n1 = OpConstant %float -1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %35

         %35 = OpLabel
         %14 =   OpLoad %int %SEL
         %16 =   OpBitcast %uint %14
                 OpSelectionMerge %40 None
                 OpSwitch %16 %40 3 %39 7 %37 9 %38 17 %36

         %39 =     OpLabel
         %23 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_0
         %19 =       OpLoad %float %23
                     OpBranch %40

         %37 =     OpLabel
         %25 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_1
         %27 =       OpLoad %float %25
                     OpBranch %38

         %38 =     OpLabel
         %28 =       OpPhi %float %float_n1 %35 %27 %37
         %30 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %32 =       OpLoad %float %30
         %20 =       OpFAdd %float %32 %28
                     OpBranch %40

         %36 =     OpLabel
         %33 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_3
         %21 =       OpLoad %float %33
                     OpBranch %40

         %40 = OpLabel
         %17 =   OpPhi %float %float_0 %35 %19 %39 %20 %38 %21 %36
                 OpStore %SV_TARGET %17
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_cfg_switch_complex.glsl
================================================
GLSL:
#version 460

layout(location = 1) flat in int SEL;
layout(location = 0) out float SV_TARGET;

void main()
{
    uint _16 = uint(SEL);
    float _17;
    float _28;
    if (_16 == 9)
    {
        _28 = -1.0;
    }
    switch (_16)
    {
        case 3u:
        {
            _17 = gl_FragCoord.x;
            break;
        }
        case 7u:
        {
            _28 = gl_FragCoord.y;
        }
        case 9u:
        {
            _17 = gl_FragCoord.z + _28;
            break;
        }
        case 17u:
        {
            _17 = gl_FragCoord.w;
            break;
        }
        default:
        {
            _17 = 0.0;
            break;
        }
    }
    SV_TARGET = _17;
}


================================================
FILE: reference-dxbc/test_cfg_switch_simple.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %SEL %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SEL "SEL"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %SEL Flat
               OpDecorate %SEL Location 1
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
        %int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
        %SEL = OpVariable %_ptr_Input_int Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_3 = OpConstant %uint 3
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %30

         %30 = OpLabel
         %14 =   OpLoad %int %SEL
         %16 =   OpBitcast %uint %14
                 OpSelectionMerge %35 None
                 OpSwitch %16 %34 3 %33 6 %32 7 %32 9 %31

         %34 =     OpLabel
         %18 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_3
         %20 =       OpLoad %float %18
                     OpStore %SV_TARGET %20
                     OpBranch %35

         %33 =     OpLabel
         %21 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_0
         %23 =       OpLoad %float %21
                     OpStore %SV_TARGET %23
                     OpBranch %35

         %32 =     OpLabel
         %24 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_1
         %26 =       OpLoad %float %24
                     OpStore %SV_TARGET %26
                     OpBranch %35

         %31 =     OpLabel
         %27 =       OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %29 =       OpLoad %float %27
                     OpStore %SV_TARGET %29
                     OpBranch %35

         %35 = OpLabel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_cfg_switch_simple.glsl
================================================
GLSL:
#version 460

layout(location = 1) flat in int SEL;
layout(location = 0) out float SV_TARGET;

void main()
{
    switch (uint(SEL))
    {
        case 3u:
        {
            SV_TARGET = gl_FragCoord.x;
            break;
        }
        case 6u:
        case 7u:
        {
            SV_TARGET = gl_FragCoord.y;
            break;
        }
        case 9u:
        {
            SV_TARGET = gl_FragCoord.z;
            break;
        }
        default:
        {
            SV_TARGET = gl_FragCoord.w;
            break;
        }
    }
}


================================================
FILE: reference-dxbc/test_convert_f_to_f.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability Float64
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %INPUT %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpExecutionMode %main SignedZeroInfNanPreserve 64
               OpName %main "main"
               OpName %INPUT "INPUT"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %INPUT Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
%_ptr_Input_float = OpTypePointer Input %float
      %INPUT = OpVariable %_ptr_Input_float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
       %half = OpTypeFloat 16
     %double = OpTypeFloat 64
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %10 =   OpLoad %float %INPUT
         %12 =   OpFConvert %half %10
         %14 =   OpFConvert %double %12
         %15 =   OpFConvert %float %14
         %16 =   OpFConvert %double %15
         %17 =   OpFConvert %half %16
         %18 =   OpFConvert %float %17
                 OpStore %SV_TARGET %18
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_convert_f_to_f.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) in float INPUT;
layout(location = 0) out float SV_TARGET;

void main()
{
    SV_TARGET = float(float16_t(double(float(double(float16_t(INPUT))))));
}


================================================
FILE: reference-dxbc/test_convert_f_to_i.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability Float64
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %INPUT %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpExecutionMode %main SignedZeroInfNanPreserve 64
               OpName %main "main"
               OpName %INPUT "INPUT"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %INPUT Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
%_ptr_Input_float = OpTypePointer Input %float
      %INPUT = OpVariable %_ptr_Input_float Input
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
        %int = OpTypeInt 32 1
%_ptr_Output_int = OpTypePointer Output %int
%SV_TARGET_1 = OpVariable %_ptr_Output_int Output
     %double = OpTypeFloat 64
       %half = OpTypeFloat 16
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %32

         %32 = OpLabel
         %14 =   OpLoad %float %INPUT
         %15 =   OpConvertFToU %uint %14
         %16 =   OpConvertFToS %uint %14
         %18 =   OpFConvert %double %14
         %19 =   OpConvertFToU %uint %18
         %20 =   OpIAdd %uint %15 %19
         %21 =   OpFConvert %double %14
         %22 =   OpConvertFToS %uint %21
         %23 =   OpIAdd %uint %16 %22
         %25 =   OpFConvert %half %14
         %26 =   OpConvertFToU %uint %25
         %27 =   OpIAdd %uint %20 %26
         %28 =   OpFConvert %half %14
         %29 =   OpConvertFToS %uint %28
         %30 =   OpIAdd %uint %23 %29
                 OpStore %SV_TARGET %27
         %31 =   OpBitcast %int %30
                 OpStore %SV_TARGET_1 %31
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_convert_f_to_i.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) in float INPUT;
layout(location = 0) out uint SV_TARGET;
layout(location = 1) out int SV_TARGET_1;

void main()
{
    SV_TARGET = (uint(INPUT) + uint(double(INPUT))) + uint(float16_t(INPUT));
    SV_TARGET_1 = int((uint(int(INPUT)) + uint(int(double(INPUT)))) + uint(int(float16_t(INPUT))));
}


================================================
FILE: reference-dxbc/test_convert_i_to_f.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
               OpCapability Shader
               OpCapability Float16
               OpCapability Float64
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %UINPUT %SINPUT %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpExecutionMode %main SignedZeroInfNanPreserve 16
               OpExecutionMode %main SignedZeroInfNanPreserve 64
               OpName %main "main"
               OpName %UINPUT "UINPUT"
               OpName %SINPUT "SINPUT"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %UINPUT Flat
               OpDecorate %UINPUT Location 0
               OpDecorate %SINPUT Flat
               OpDecorate %SINPUT Location 0
               OpDecorate %SINPUT Component 1
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
     %UINPUT = OpVariable %_ptr_Input_uint Input
        %int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
     %SINPUT = OpVariable %_ptr_Input_int Input
      %float = OpTypeFloat 32
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
       %half = OpTypeFloat 16
     %double = OpTypeFloat 64
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %34

         %34 = OpLabel
         %15 =   OpLoad %uint %UINPUT
         %16 =   OpLoad %int %SINPUT
         %17 =   OpBitcast %uint %16
         %18 =   OpConvertUToF %float %15
         %19 =   OpConvertSToF %float %17
         %21 =   OpConvertUToF %half %15
         %22 =   OpFConvert %float %21
         %23 =   OpFAdd %float %18 %22
         %24 =   OpConvertSToF %half %17
         %25 =   OpFConvert %float %24
         %26 =   OpFAdd %float %19 %25
         %28 =   OpConvertUToF %double %15
         %29 =   OpFConvert %float %28
         %30 =   OpFAdd %float %23 %29
         %31 =   OpConvertSToF %double %17
         %32 =   OpFConvert %float %31
         %33 =   OpFAdd %float %26 %32
                 OpStore %SV_TARGET %30
                 OpStore %SV_TARGET_1 %33
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_convert_i_to_f.glsl
================================================
GLSL:
#version 460
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#else
#error No extension available for FP16.
#endif
#extension GL_EXT_shader_16bit_storage : require

layout(location = 0) flat in uint UINPUT;
layout(location = 0, component = 1) flat in int SINPUT;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    uint _17 = uint(SINPUT);
    SV_TARGET = (float(UINPUT) + float(float16_t(UINPUT))) + float(double(UINPUT));
    SV_TARGET_1 = (float(int(_17)) + float(float16_t(int(_17)))) + float(double(int(_17)));
}


================================================
FILE: reference-dxbc/test_convert_i_to_i.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
               OpCapability Shader
               OpCapability Int64
               OpCapability Int16
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %INPUT %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %INPUT "INPUT"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %INPUT Flat
               OpDecorate %INPUT Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
      %INPUT = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %ushort = OpTypeInt 16 0
      %ulong = OpTypeInt 64 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %33

         %33 = OpLabel
         %10 =   OpLoad %uint %INPUT
         %12 =   OpUConvert %ushort %10
         %14 =   OpUConvert %ulong %12
         %15 =   OpUConvert %uint %14
         %16 =   OpUConvert %ulong %15
         %17 =   OpUConvert %ushort %16
         %18 =   OpUConvert %uint %17
         %19 =   OpSConvert %ulong %18
         %20 =   OpUConvert %ushort %19
         %21 =   OpSConvert %ulong %20
         %22 =   OpUConvert %uint %21
         %23 =   OpUConvert %ushort %22
         %24 =   OpSConvert %uint %23
         %25 =   OpSConvert %ulong %24
         %26 =   OpUConvert %uint %25
         %27 =   OpUConvert %ushort %26
         %28 =   OpUConvert %uint %27
         %29 =   OpUConvert %ushort %28
         %30 =   OpUConvert %ulong %29
         %31 =   OpUConvert %ushort %30
         %32 =   OpUConvert %uint %31
                 OpStore %SV_TARGET %32
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_convert_i_to_i.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_16bit_storage : require
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif

layout(location = 0) flat in uint INPUT;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(uint16_t(uint64_t(uint16_t(uint(uint16_t(uint(uint64_t(int(uint(int16_t(uint16_t(uint(uint64_t(int16_t(uint16_t(uint64_t(int(uint(uint16_t(uint64_t(uint(uint64_t(uint16_t(INPUT))))))))))))))))))))))));
}


================================================
FILE: reference-dxbc/test_io_cs_builtins.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %8 %9 %10 %SV_DispatchThreadId %SV_GroupId %SV_GroupThreadId %gl_LocalInvocationIndex
               OpExecutionMode %main LocalSize 4 4 4
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_DispatchThreadId "SV_DispatchThreadId"
               OpName %SV_GroupId "SV_GroupId"
               OpName %SV_GroupThreadId "SV_GroupThreadId"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 1
               OpDecorate %9 NonReadable
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 2
               OpDecorate %10 NonReadable
               OpDecorate %SV_DispatchThreadId BuiltIn GlobalInvocationId
               OpDecorate %SV_GroupId BuiltIn WorkgroupId
               OpDecorate %SV_GroupThreadId BuiltIn LocalInvocationId
               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 3D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpVariable %_ptr_UniformConstant_6 UniformConstant
         %10 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%SV_DispatchThreadId = OpVariable %_ptr_Input_v3uint Input
 %SV_GroupId = OpVariable %_ptr_Input_v3uint Input
%SV_GroupThreadId = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
     %v4uint = OpTypeVector %uint 4
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %54

         %54 = OpLabel
         %16 =   OpLoad %6 %8
         %17 =   OpLoad %6 %9
         %18 =   OpLoad %6 %10
         %21 =   OpLoad %uint %gl_LocalInvocationIndex
         %24 =   OpAccessChain %_ptr_Input_uint %SV_DispatchThreadId %uint_0
         %26 =   OpLoad %uint %24
         %27 =   OpAccessChain %_ptr_Input_uint %SV_DispatchThreadId %uint_1
         %29 =   OpLoad %uint %27
         %30 =   OpAccessChain %_ptr_Input_uint %SV_DispatchThreadId %uint_2
         %32 =   OpLoad %uint %30
         %34 =   OpAccessChain %_ptr_Input_uint %SV_GroupId %uint_0
         %35 =   OpLoad %uint %34
         %36 =   OpAccessChain %_ptr_Input_uint %SV_GroupId %uint_1
         %37 =   OpLoad %uint %36
         %38 =   OpAccessChain %_ptr_Input_uint %SV_GroupId %uint_2
         %39 =   OpLoad %uint %38
         %41 =   OpAccessChain %_ptr_Input_uint %SV_GroupThreadId %uint_0
         %42 =   OpLoad %uint %41
         %43 =   OpAccessChain %_ptr_Input_uint %SV_GroupThreadId %uint_1
         %44 =   OpLoad %uint %43
         %45 =   OpAccessChain %_ptr_Input_uint %SV_GroupThreadId %uint_2
         %46 =   OpLoad %uint %45
         %48 =   OpCompositeConstruct %v3uint %26 %29 %32
         %49 =   OpCompositeConstruct %v4uint %21 %21 %21 %21
                 OpImageWrite %16 %48 %49 NonPrivateTexel
         %50 =   OpCompositeConstruct %v3uint %35 %37 %39
         %51 =   OpCompositeConstruct %v4uint %21 %21 %21 %21
                 OpImageWrite %17 %50 %51 NonPrivateTexel
         %52 =   OpCompositeConstruct %v3uint %42 %44 %46
         %53 =   OpCompositeConstruct %v4uint %21 %21 %21 %21
                 OpImageWrite %18 %52 %53 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_cs_builtins.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

layout(set = 0, binding = 0) uniform writeonly uimage3D _8;
layout(set = 0, binding = 1) uniform writeonly uimage3D _9;
layout(set = 0, binding = 2) uniform writeonly uimage3D _10;

void main()
{
    imageStore(_8, ivec3(uvec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, gl_GlobalInvocationID.z)), uvec4(gl_LocalInvocationIndex));
    imageStore(_9, ivec3(uvec3(gl_WorkGroupID.x, gl_WorkGroupID.y, gl_WorkGroupID.z)), uvec4(gl_LocalInvocationIndex));
    imageStore(_10, ivec3(uvec3(gl_LocalInvocationID.x, gl_LocalInvocationID.y, gl_LocalInvocationID.z)), uvec4(gl_LocalInvocationIndex));
}


================================================
FILE: reference-dxbc/test_io_ds_isoline.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 192
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability Tessellation
               OpCapability ClipDistance
               OpCapability MultiViewport
               OpCapability SignedZeroInfNanPreserve
               OpCapability ShaderViewportIndexLayerEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_shader_viewport_index_layer"
               OpExtension "SPV_KHR_float_controls"
        %102 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationEvaluation %main "main" %SV_DOMAINLOCATION %SV_POSITION %R_COLOR %G_COLOR %B_COLOR %TEXCOORD %SV_POSITION_0 %SV_RENDERTARGETARRAYINDEX %SV_VIEWPORTARRAYINDEX %COLOR %PRIMID %TEXCOORD_0 %TESS_INNER %TESS_OUTER %gl_ClipDistance %SV_INSIDETESSFACTOR %SV_TESSFACTOR %LAYER %VIEWPORT %INDEX %gl_PrimitiveID
               OpExecutionMode %main Isolines
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_DOMAINLOCATION "SV_DOMAINLOCATION"
               OpName %SV_POSITION "SV_POSITION"
               OpName %R_COLOR "R_COLOR"
               OpName %G_COLOR "G_COLOR"
               OpName %B_COLOR "B_COLOR"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %SV_RENDERTARGETARRAYINDEX "SV_RENDERTARGETARRAYINDEX"
               OpName %SV_VIEWPORTARRAYINDEX "SV_VIEWPORTARRAYINDEX"
               OpName %COLOR "COLOR"
               OpName %PRIMID "PRIMID"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %TESS_INNER "TESS_INNER"
               OpName %TESS_OUTER "TESS_OUTER"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %LAYER "LAYER"
               OpName %VIEWPORT "VIEWPORT"
               OpName %INDEX "INDEX"
               OpDecorate %SV_DOMAINLOCATION BuiltIn TessCoord
               OpDecorate %SV_POSITION Location 0
               OpDecorate %R_COLOR Location 1
               OpDecorate %G_COLOR Location 1
               OpDecorate %G_COLOR Component 1
               OpDecorate %B_COLOR Location 1
               OpDecorate %B_COLOR Component 2
               OpDecorate %TEXCOORD Location 2
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %SV_RENDERTARGETARRAYINDEX BuiltIn Layer
               OpDecorate %SV_VIEWPORTARRAYINDEX BuiltIn ViewportIndex
               OpDecorate %COLOR Location 6
               OpDecorate %PRIMID Location 6
               OpDecorate %PRIMID Component 3
               OpDecorate %TEXCOORD_0 Location 7
               OpDecorate %TESS_INNER Location 7
               OpDecorate %TESS_INNER Component 2
               OpDecorate %TESS_OUTER Location 8
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %LAYER Location 6
               OpDecorate %LAYER Patch
               OpDecorate %VIEWPORT Location 6
               OpDecorate %VIEWPORT Component 1
               OpDecorate %VIEWPORT Patch
               OpDecorate %INDEX Location 6
               OpDecorate %INDEX Component 2
               OpDecorate %INDEX Patch
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
%SV_DOMAINLOCATION = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
    %R_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %G_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %B_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_32 = OpTypeArray %v2float %uint_32
%_ptr_Input__arr_v2float_uint_32 = OpTypePointer Input %_arr_v2float_uint_32
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_32 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
     %uint_8 = OpConstant %uint 8
%_arr_float_uint_8 = OpTypeArray %float %uint_8
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_RENDERTARGETARRAYINDEX = OpVariable %_ptr_Output_uint Output
%SV_VIEWPORTARRAYINDEX = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
      %COLOR = OpVariable %_ptr_Output_v3float Output
     %PRIMID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
 %TESS_INNER = OpVariable %_ptr_Output_v2float Output
 %TESS_OUTER = OpVariable %_ptr_Output_v4float Output
%_ptr_Output__arr_float_uint_8 = OpTypePointer Output %_arr_float_uint_8
%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_8 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Input__arr_float_uint_2 Input
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Input__arr_float_uint_4 Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %LAYER = OpVariable %_ptr_Input_uint Input
   %VIEWPORT = OpVariable %_ptr_Input_uint Input
      %INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %190

        %190 = OpLabel
         %53 =   OpAccessChain %_ptr_Input_float %SV_INSIDETESSFACTOR %uint_0
         %55 =   OpLoad %float %53
         %57 =   OpAccessChain %_ptr_Output_float %TESS_INNER %uint_0
                 OpStore %57 %55
         %58 =   OpAccessChain %_ptr_Input_float %SV_INSIDETESSFACTOR %uint_1
         %60 =   OpLoad %float %58
         %61 =   OpAccessChain %_ptr_Output_float %TESS_INNER %uint_1
                 OpStore %61 %60
         %62 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_0
         %63 =   OpLoad %float %62
         %64 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_0
                 OpStore %64 %63
         %65 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_1
         %66 =   OpLoad %float %65
         %67 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_1
                 OpStore %67 %66
         %68 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_2
         %69 =   OpLoad %float %68
         %70 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_2
                 OpStore %70 %69
         %71 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_3
         %73 =   OpLoad %float %71
         %74 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_3
                 OpStore %74 %73
         %75 =   OpLoad %uint %LAYER
                 OpStore %SV_RENDERTARGETARRAYINDEX %75
         %76 =   OpLoad %uint %VIEWPORT
                 OpStore %SV_VIEWPORTARRAYINDEX %76
         %78 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %PRIMID %78
         %79 =   OpLoad %uint %INDEX
         %80 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_0
         %81 =   OpLoad %float %80
         %82 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_1
         %83 =   OpLoad %float %82
         %86 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_0
         %87 =   OpLoad %float %86
         %88 =   OpBitwiseXor %uint %79 %uint_1
         %90 =   OpAccessChain %_ptr_Input_float %SV_POSITION %88 %uint_0
         %91 =   OpLoad %float %90
         %92 =   OpBitwiseXor %uint %79 %uint_2
         %94 =   OpAccessChain %_ptr_Input_float %SV_POSITION %92 %uint_0
         %95 =   OpLoad %float %94
         %96 =   OpBitwiseXor %uint %79 %uint_3
         %98 =   OpAccessChain %_ptr_Input_float %SV_POSITION %96 %uint_0
         %99 =   OpLoad %float %98
        %100 =   OpFSub %float %91 %87
        %101 =   OpFSub %float %99 %95
        %103 =   OpExtInst %float %102 Fma %81 %100 %87
        %104 =   OpExtInst %float %102 Fma %81 %101 %95
        %105 =   OpFSub %float %104 %103
        %106 =   OpExtInst %float %102 Fma %83 %105 %103
        %108 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_1
        %109 =   OpLoad %float %108
        %110 =   OpBitwiseXor %uint %79 %uint_1
        %112 =   OpAccessChain %_ptr_Input_float %SV_POSITION %110 %uint_1
        %113 =   OpLoad %float %112
        %114 =   OpBitwiseXor %uint %79 %uint_2
        %116 =   OpAccessChain %_ptr_Input_float %SV_POSITION %114 %uint_1
        %117 =   OpLoad %float %116
        %118 =   OpBitwiseXor %uint %79 %uint_3
        %120 =   OpAccessChain %_ptr_Input_float %SV_POSITION %118 %uint_1
        %121 =   OpLoad %float %120
        %122 =   OpFSub %float %113 %109
        %123 =   OpFSub %float %121 %117
        %124 =   OpExtInst %float %102 Fma %81 %122 %109
        %125 =   OpExtInst %float %102 Fma %81 %123 %117
        %126 =   OpFSub %float %125 %124
        %127 =   OpExtInst %float %102 Fma %83 %126 %124
        %129 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_2
        %130 =   OpLoad %float %129
        %131 =   OpBitwiseXor %uint %79 %uint_1
        %133 =   OpAccessChain %_ptr_Input_float %SV_POSITION %131 %uint_2
        %134 =   OpLoad %float %133
        %135 =   OpBitwiseXor %uint %79 %uint_2
        %137 =   OpAccessChain %_ptr_Input_float %SV_POSITION %135 %uint_2
        %138 =   OpLoad %float %137
        %139 =   OpBitwiseXor %uint %79 %uint_3
        %141 =   OpAccessChain %_ptr_Input_float %SV_POSITION %139 %uint_2
        %142 =   OpLoad %float %141
        %143 =   OpFSub %float %134 %130
        %144 =   OpFSub %float %142 %138
        %145 =   OpExtInst %float %102 Fma %81 %143 %130
        %146 =   OpExtInst %float %102 Fma %81 %144 %138
        %147 =   OpFSub %float %146 %145
        %148 =   OpExtInst %float %102 Fma %83 %147 %145
        %150 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_3
        %151 =   OpLoad %float %150
        %152 =   OpBitwiseXor %uint %79 %uint_1
        %154 =   OpAccessChain %_ptr_Input_float %SV_POSITION %152 %uint_3
        %155 =   OpLoad %float %154
        %156 =   OpBitwiseXor %uint %79 %uint_2
        %158 =   OpAccessChain %_ptr_Input_float %SV_POSITION %156 %uint_3
        %159 =   OpLoad %float %158
        %160 =   OpBitwiseXor %uint %79 %uint_3
        %162 =   OpAccessChain %_ptr_Input_float %SV_POSITION %160 %uint_3
        %163 =   OpLoad %float %162
        %164 =   OpFSub %float %155 %151
        %165 =   OpFSub %float %163 %159
        %166 =   OpExtInst %float %102 Fma %81 %164 %151
        %167 =   OpExtInst %float %102 Fma %81 %165 %159
        %168 =   OpFSub %float %167 %166
        %169 =   OpExtInst %float %102 Fma %83 %168 %166
        %170 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                 OpStore %170 %106
        %171 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                 OpStore %171 %127
        %172 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                 OpStore %172 %148
        %173 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                 OpStore %173 %169
        %174 =   OpAccessChain %_ptr_Input_float %R_COLOR %uint_0
        %175 =   OpLoad %float %174
        %176 =   OpAccessChain %_ptr_Output_float %COLOR %uint_0
                 OpStore %176 %175
        %177 =   OpAccessChain %_ptr_Input_float %G_COLOR %uint_1
        %178 =   OpLoad %float %177
        %179 =   OpAccessChain %_ptr_Output_float %COLOR %uint_1
                 OpStore %179 %178
        %180 =   OpAccessChain %_ptr_Input_float %B_COLOR %uint_2
        %181 =   OpLoad %float %180
        %182 =   OpAccessChain %_ptr_Output_float %COLOR %uint_2
                 OpStore %182 %181
        %183 =   OpAccessChain %_ptr_Input_float %TEXCOORD %79 %uint_0
        %184 =   OpLoad %float %183
        %185 =   OpAccessChain %_ptr_Input_float %TEXCOORD %79 %uint_1
        %186 =   OpLoad %float %185
        %188 =   OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                 OpStore %188 %184
        %189 =   OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                 OpStore %189 %186
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ds_isoline.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(isolines) in;

layout(location = 0) in vec4 SV_POSITION[];
layout(location = 1) in float R_COLOR[];
layout(location = 1, component = 1) in float G_COLOR[];
layout(location = 1, component = 2) in float B_COLOR[];
layout(location = 2) in vec2 TEXCOORD[];
layout(location = 6) out vec3 COLOR;
layout(location = 6, component = 3) out uint PRIMID;
layout(location = 7) out vec2 TEXCOORD_1;
layout(location = 7, component = 2) out vec2 TESS_INNER;
layout(location = 8) out vec4 TESS_OUTER;
layout(location = 6) patch in uint LAYER;
layout(location = 6, component = 1) patch in uint VIEWPORT;
layout(location = 6, component = 2) patch in uint INDEX;

void main()
{
    TESS_INNER.x = gl_TessLevelInner[0u];
    TESS_INNER.y = gl_TessLevelInner[1u];
    TESS_OUTER.x = gl_TessLevelOuter[0u];
    TESS_OUTER.y = gl_TessLevelOuter[1u];
    TESS_OUTER.z = gl_TessLevelOuter[2u];
    TESS_OUTER.w = gl_TessLevelOuter[3u];
    gl_Layer = int(LAYER);
    gl_ViewportIndex = int(VIEWPORT);
    PRIMID = uint(gl_PrimitiveID);
    uint _92 = INDEX ^ 2u;
    float _103 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].x - SV_POSITION[INDEX].x, SV_POSITION[INDEX].x);
    uint _114 = INDEX ^ 2u;
    float _124 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].y - SV_POSITION[INDEX].y, SV_POSITION[INDEX].y);
    uint _135 = INDEX ^ 2u;
    float _145 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].z - SV_POSITION[INDEX].z, SV_POSITION[INDEX].z);
    uint _156 = INDEX ^ 2u;
    float _166 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].w - SV_POSITION[INDEX].w, SV_POSITION[INDEX].w);
    gl_Position.x = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].x - SV_POSITION[_92].x, SV_POSITION[_92].x) - _103, _103);
    gl_Position.y = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].y - SV_POSITION[_114].y, SV_POSITION[_114].y) - _124, _124);
    gl_Position.z = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].z - SV_POSITION[_135].z, SV_POSITION[_135].z) - _145, _145);
    gl_Position.w = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].w - SV_POSITION[_156].w, SV_POSITION[_156].w) - _166, _166);
    COLOR.x = R_COLOR[0u];
    COLOR.y = G_COLOR[1u];
    COLOR.z = B_COLOR[2u];
    TEXCOORD_1.x = TEXCOORD[INDEX].x;
    TEXCOORD_1.y = TEXCOORD[INDEX].y;
}


================================================
FILE: reference-dxbc/test_io_ds_quad.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 192
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability Tessellation
               OpCapability ClipDistance
               OpCapability MultiViewport
               OpCapability SignedZeroInfNanPreserve
               OpCapability ShaderViewportIndexLayerEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_shader_viewport_index_layer"
               OpExtension "SPV_KHR_float_controls"
        %102 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationEvaluation %main "main" %SV_DOMAINLOCATION %SV_POSITION %R_COLOR %G_COLOR %B_COLOR %TEXCOORD %SV_POSITION_0 %SV_RENDERTARGETARRAYINDEX %SV_VIEWPORTARRAYINDEX %COLOR %PRIMID %TEXCOORD_0 %TESS_INNER %TESS_OUTER %gl_ClipDistance %SV_INSIDETESSFACTOR %SV_TESSFACTOR %LAYER %VIEWPORT %INDEX %gl_PrimitiveID
               OpExecutionMode %main Quads
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_DOMAINLOCATION "SV_DOMAINLOCATION"
               OpName %SV_POSITION "SV_POSITION"
               OpName %R_COLOR "R_COLOR"
               OpName %G_COLOR "G_COLOR"
               OpName %B_COLOR "B_COLOR"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %SV_RENDERTARGETARRAYINDEX "SV_RENDERTARGETARRAYINDEX"
               OpName %SV_VIEWPORTARRAYINDEX "SV_VIEWPORTARRAYINDEX"
               OpName %COLOR "COLOR"
               OpName %PRIMID "PRIMID"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %TESS_INNER "TESS_INNER"
               OpName %TESS_OUTER "TESS_OUTER"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %LAYER "LAYER"
               OpName %VIEWPORT "VIEWPORT"
               OpName %INDEX "INDEX"
               OpDecorate %SV_DOMAINLOCATION BuiltIn TessCoord
               OpDecorate %SV_POSITION Location 0
               OpDecorate %R_COLOR Location 1
               OpDecorate %G_COLOR Location 1
               OpDecorate %G_COLOR Component 1
               OpDecorate %B_COLOR Location 1
               OpDecorate %B_COLOR Component 2
               OpDecorate %TEXCOORD Location 2
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %SV_RENDERTARGETARRAYINDEX BuiltIn Layer
               OpDecorate %SV_VIEWPORTARRAYINDEX BuiltIn ViewportIndex
               OpDecorate %COLOR Location 6
               OpDecorate %PRIMID Location 6
               OpDecorate %PRIMID Component 3
               OpDecorate %TEXCOORD_0 Location 7
               OpDecorate %TESS_INNER Location 7
               OpDecorate %TESS_INNER Component 2
               OpDecorate %TESS_OUTER Location 8
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %LAYER Location 6
               OpDecorate %LAYER Patch
               OpDecorate %VIEWPORT Location 6
               OpDecorate %VIEWPORT Component 1
               OpDecorate %VIEWPORT Patch
               OpDecorate %INDEX Location 6
               OpDecorate %INDEX Component 2
               OpDecorate %INDEX Patch
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
%SV_DOMAINLOCATION = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
    %R_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %G_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %B_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_32 = OpTypeArray %v2float %uint_32
%_ptr_Input__arr_v2float_uint_32 = OpTypePointer Input %_arr_v2float_uint_32
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_32 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
     %uint_8 = OpConstant %uint 8
%_arr_float_uint_8 = OpTypeArray %float %uint_8
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_RENDERTARGETARRAYINDEX = OpVariable %_ptr_Output_uint Output
%SV_VIEWPORTARRAYINDEX = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
      %COLOR = OpVariable %_ptr_Output_v3float Output
     %PRIMID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
 %TESS_INNER = OpVariable %_ptr_Output_v2float Output
 %TESS_OUTER = OpVariable %_ptr_Output_v4float Output
%_ptr_Output__arr_float_uint_8 = OpTypePointer Output %_arr_float_uint_8
%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_8 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Input__arr_float_uint_2 Input
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Input__arr_float_uint_4 Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %LAYER = OpVariable %_ptr_Input_uint Input
   %VIEWPORT = OpVariable %_ptr_Input_uint Input
      %INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %190

        %190 = OpLabel
         %53 =   OpAccessChain %_ptr_Input_float %SV_INSIDETESSFACTOR %uint_0
         %55 =   OpLoad %float %53
         %57 =   OpAccessChain %_ptr_Output_float %TESS_INNER %uint_0
                 OpStore %57 %55
         %58 =   OpAccessChain %_ptr_Input_float %SV_INSIDETESSFACTOR %uint_1
         %60 =   OpLoad %float %58
         %61 =   OpAccessChain %_ptr_Output_float %TESS_INNER %uint_1
                 OpStore %61 %60
         %62 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_0
         %63 =   OpLoad %float %62
         %64 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_0
                 OpStore %64 %63
         %65 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_1
         %66 =   OpLoad %float %65
         %67 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_1
                 OpStore %67 %66
         %68 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_2
         %69 =   OpLoad %float %68
         %70 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_2
                 OpStore %70 %69
         %71 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_3
         %73 =   OpLoad %float %71
         %74 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_3
                 OpStore %74 %73
         %75 =   OpLoad %uint %LAYER
                 OpStore %SV_RENDERTARGETARRAYINDEX %75
         %76 =   OpLoad %uint %VIEWPORT
                 OpStore %SV_VIEWPORTARRAYINDEX %76
         %78 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %PRIMID %78
         %79 =   OpLoad %uint %INDEX
         %80 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_0
         %81 =   OpLoad %float %80
         %82 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_1
         %83 =   OpLoad %float %82
         %86 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_0
         %87 =   OpLoad %float %86
         %88 =   OpBitwiseXor %uint %79 %uint_1
         %90 =   OpAccessChain %_ptr_Input_float %SV_POSITION %88 %uint_0
         %91 =   OpLoad %float %90
         %92 =   OpBitwiseXor %uint %79 %uint_2
         %94 =   OpAccessChain %_ptr_Input_float %SV_POSITION %92 %uint_0
         %95 =   OpLoad %float %94
         %96 =   OpBitwiseXor %uint %79 %uint_3
         %98 =   OpAccessChain %_ptr_Input_float %SV_POSITION %96 %uint_0
         %99 =   OpLoad %float %98
        %100 =   OpFSub %float %91 %87
        %101 =   OpFSub %float %99 %95
        %103 =   OpExtInst %float %102 Fma %81 %100 %87
        %104 =   OpExtInst %float %102 Fma %81 %101 %95
        %105 =   OpFSub %float %104 %103
        %106 =   OpExtInst %float %102 Fma %83 %105 %103
        %108 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_1
        %109 =   OpLoad %float %108
        %110 =   OpBitwiseXor %uint %79 %uint_1
        %112 =   OpAccessChain %_ptr_Input_float %SV_POSITION %110 %uint_1
        %113 =   OpLoad %float %112
        %114 =   OpBitwiseXor %uint %79 %uint_2
        %116 =   OpAccessChain %_ptr_Input_float %SV_POSITION %114 %uint_1
        %117 =   OpLoad %float %116
        %118 =   OpBitwiseXor %uint %79 %uint_3
        %120 =   OpAccessChain %_ptr_Input_float %SV_POSITION %118 %uint_1
        %121 =   OpLoad %float %120
        %122 =   OpFSub %float %113 %109
        %123 =   OpFSub %float %121 %117
        %124 =   OpExtInst %float %102 Fma %81 %122 %109
        %125 =   OpExtInst %float %102 Fma %81 %123 %117
        %126 =   OpFSub %float %125 %124
        %127 =   OpExtInst %float %102 Fma %83 %126 %124
        %129 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_2
        %130 =   OpLoad %float %129
        %131 =   OpBitwiseXor %uint %79 %uint_1
        %133 =   OpAccessChain %_ptr_Input_float %SV_POSITION %131 %uint_2
        %134 =   OpLoad %float %133
        %135 =   OpBitwiseXor %uint %79 %uint_2
        %137 =   OpAccessChain %_ptr_Input_float %SV_POSITION %135 %uint_2
        %138 =   OpLoad %float %137
        %139 =   OpBitwiseXor %uint %79 %uint_3
        %141 =   OpAccessChain %_ptr_Input_float %SV_POSITION %139 %uint_2
        %142 =   OpLoad %float %141
        %143 =   OpFSub %float %134 %130
        %144 =   OpFSub %float %142 %138
        %145 =   OpExtInst %float %102 Fma %81 %143 %130
        %146 =   OpExtInst %float %102 Fma %81 %144 %138
        %147 =   OpFSub %float %146 %145
        %148 =   OpExtInst %float %102 Fma %83 %147 %145
        %150 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_3
        %151 =   OpLoad %float %150
        %152 =   OpBitwiseXor %uint %79 %uint_1
        %154 =   OpAccessChain %_ptr_Input_float %SV_POSITION %152 %uint_3
        %155 =   OpLoad %float %154
        %156 =   OpBitwiseXor %uint %79 %uint_2
        %158 =   OpAccessChain %_ptr_Input_float %SV_POSITION %156 %uint_3
        %159 =   OpLoad %float %158
        %160 =   OpBitwiseXor %uint %79 %uint_3
        %162 =   OpAccessChain %_ptr_Input_float %SV_POSITION %160 %uint_3
        %163 =   OpLoad %float %162
        %164 =   OpFSub %float %155 %151
        %165 =   OpFSub %float %163 %159
        %166 =   OpExtInst %float %102 Fma %81 %164 %151
        %167 =   OpExtInst %float %102 Fma %81 %165 %159
        %168 =   OpFSub %float %167 %166
        %169 =   OpExtInst %float %102 Fma %83 %168 %166
        %170 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                 OpStore %170 %106
        %171 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                 OpStore %171 %127
        %172 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                 OpStore %172 %148
        %173 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                 OpStore %173 %169
        %174 =   OpAccessChain %_ptr_Input_float %R_COLOR %uint_0
        %175 =   OpLoad %float %174
        %176 =   OpAccessChain %_ptr_Output_float %COLOR %uint_0
                 OpStore %176 %175
        %177 =   OpAccessChain %_ptr_Input_float %G_COLOR %uint_1
        %178 =   OpLoad %float %177
        %179 =   OpAccessChain %_ptr_Output_float %COLOR %uint_1
                 OpStore %179 %178
        %180 =   OpAccessChain %_ptr_Input_float %B_COLOR %uint_2
        %181 =   OpLoad %float %180
        %182 =   OpAccessChain %_ptr_Output_float %COLOR %uint_2
                 OpStore %182 %181
        %183 =   OpAccessChain %_ptr_Input_float %TEXCOORD %79 %uint_0
        %184 =   OpLoad %float %183
        %185 =   OpAccessChain %_ptr_Input_float %TEXCOORD %79 %uint_1
        %186 =   OpLoad %float %185
        %188 =   OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                 OpStore %188 %184
        %189 =   OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                 OpStore %189 %186
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ds_quad.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(quads) in;

layout(location = 0) in vec4 SV_POSITION[];
layout(location = 1) in float R_COLOR[];
layout(location = 1, component = 1) in float G_COLOR[];
layout(location = 1, component = 2) in float B_COLOR[];
layout(location = 2) in vec2 TEXCOORD[];
layout(location = 6) out vec3 COLOR;
layout(location = 6, component = 3) out uint PRIMID;
layout(location = 7) out vec2 TEXCOORD_1;
layout(location = 7, component = 2) out vec2 TESS_INNER;
layout(location = 8) out vec4 TESS_OUTER;
layout(location = 6) patch in uint LAYER;
layout(location = 6, component = 1) patch in uint VIEWPORT;
layout(location = 6, component = 2) patch in uint INDEX;

void main()
{
    TESS_INNER.x = gl_TessLevelInner[0u];
    TESS_INNER.y = gl_TessLevelInner[1u];
    TESS_OUTER.x = gl_TessLevelOuter[0u];
    TESS_OUTER.y = gl_TessLevelOuter[1u];
    TESS_OUTER.z = gl_TessLevelOuter[2u];
    TESS_OUTER.w = gl_TessLevelOuter[3u];
    gl_Layer = int(LAYER);
    gl_ViewportIndex = int(VIEWPORT);
    PRIMID = uint(gl_PrimitiveID);
    uint _92 = INDEX ^ 2u;
    float _103 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].x - SV_POSITION[INDEX].x, SV_POSITION[INDEX].x);
    uint _114 = INDEX ^ 2u;
    float _124 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].y - SV_POSITION[INDEX].y, SV_POSITION[INDEX].y);
    uint _135 = INDEX ^ 2u;
    float _145 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].z - SV_POSITION[INDEX].z, SV_POSITION[INDEX].z);
    uint _156 = INDEX ^ 2u;
    float _166 = fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 1u].w - SV_POSITION[INDEX].w, SV_POSITION[INDEX].w);
    gl_Position.x = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].x - SV_POSITION[_92].x, SV_POSITION[_92].x) - _103, _103);
    gl_Position.y = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].y - SV_POSITION[_114].y, SV_POSITION[_114].y) - _124, _124);
    gl_Position.z = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].z - SV_POSITION[_135].z, SV_POSITION[_135].z) - _145, _145);
    gl_Position.w = fma(gl_TessCoord.y, fma(gl_TessCoord.x, SV_POSITION[INDEX ^ 3u].w - SV_POSITION[_156].w, SV_POSITION[_156].w) - _166, _166);
    COLOR.x = R_COLOR[0u];
    COLOR.y = G_COLOR[1u];
    COLOR.z = B_COLOR[2u];
    TEXCOORD_1.x = TEXCOORD[INDEX].x;
    TEXCOORD_1.y = TEXCOORD[INDEX].y;
}


================================================
FILE: reference-dxbc/test_io_ds_triangle.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 160
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability Tessellation
               OpCapability ClipDistance
               OpCapability MultiViewport
               OpCapability SignedZeroInfNanPreserve
               OpCapability ShaderViewportIndexLayerEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_shader_viewport_index_layer"
               OpExtension "SPV_KHR_float_controls"
        %107 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationEvaluation %main "main" %SV_DOMAINLOCATION %SV_POSITION %R_COLOR %G_COLOR %B_COLOR %TEXCOORD %SV_POSITION_0 %SV_RENDERTARGETARRAYINDEX %SV_VIEWPORTARRAYINDEX %COLOR %PRIMID %TEXCOORD_0 %TESS_INNER %TESS_OUTER %gl_ClipDistance %SV_INSIDETESSFACTOR %SV_TESSFACTOR %LAYER %VIEWPORT %INDEX %gl_PrimitiveID
               OpExecutionMode %main Triangles
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_DOMAINLOCATION "SV_DOMAINLOCATION"
               OpName %SV_POSITION "SV_POSITION"
               OpName %R_COLOR "R_COLOR"
               OpName %G_COLOR "G_COLOR"
               OpName %B_COLOR "B_COLOR"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %SV_RENDERTARGETARRAYINDEX "SV_RENDERTARGETARRAYINDEX"
               OpName %SV_VIEWPORTARRAYINDEX "SV_VIEWPORTARRAYINDEX"
               OpName %COLOR "COLOR"
               OpName %PRIMID "PRIMID"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %TESS_INNER "TESS_INNER"
               OpName %TESS_OUTER "TESS_OUTER"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %LAYER "LAYER"
               OpName %VIEWPORT "VIEWPORT"
               OpName %INDEX "INDEX"
               OpDecorate %SV_DOMAINLOCATION BuiltIn TessCoord
               OpDecorate %SV_POSITION Location 0
               OpDecorate %R_COLOR Location 1
               OpDecorate %G_COLOR Location 1
               OpDecorate %G_COLOR Component 1
               OpDecorate %B_COLOR Location 1
               OpDecorate %B_COLOR Component 2
               OpDecorate %TEXCOORD Location 2
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %SV_RENDERTARGETARRAYINDEX BuiltIn Layer
               OpDecorate %SV_VIEWPORTARRAYINDEX BuiltIn ViewportIndex
               OpDecorate %COLOR Location 6
               OpDecorate %PRIMID Location 6
               OpDecorate %PRIMID Component 3
               OpDecorate %TEXCOORD_0 Location 7
               OpDecorate %TESS_INNER Location 7
               OpDecorate %TESS_INNER Component 2
               OpDecorate %TESS_OUTER Location 8
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %LAYER Location 6
               OpDecorate %LAYER Patch
               OpDecorate %VIEWPORT Location 6
               OpDecorate %VIEWPORT Component 1
               OpDecorate %VIEWPORT Patch
               OpDecorate %INDEX Location 6
               OpDecorate %INDEX Component 2
               OpDecorate %INDEX Patch
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
%SV_DOMAINLOCATION = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
    %R_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %G_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %B_COLOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_32 = OpTypeArray %v2float %uint_32
%_ptr_Input__arr_v2float_uint_32 = OpTypePointer Input %_arr_v2float_uint_32
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_32 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
     %uint_8 = OpConstant %uint 8
%_arr_float_uint_8 = OpTypeArray %float %uint_8
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_RENDERTARGETARRAYINDEX = OpVariable %_ptr_Output_uint Output
%SV_VIEWPORTARRAYINDEX = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
      %COLOR = OpVariable %_ptr_Output_v3float Output
     %PRIMID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
 %TESS_INNER = OpVariable %_ptr_Output_v2float Output
 %TESS_OUTER = OpVariable %_ptr_Output_v4float Output
%_ptr_Output__arr_float_uint_8 = OpTypePointer Output %_arr_float_uint_8
%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_8 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Input__arr_float_uint_2 Input
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Input__arr_float_uint_4 Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %LAYER = OpVariable %_ptr_Input_uint Input
   %VIEWPORT = OpVariable %_ptr_Input_uint Input
      %INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %158

        %158 = OpLabel
         %53 =   OpAccessChain %_ptr_Input_float %SV_INSIDETESSFACTOR %uint_0
         %55 =   OpLoad %float %53
         %57 =   OpAccessChain %_ptr_Output_float %TESS_INNER %uint_0
                 OpStore %57 %55
         %58 =   OpAccessChain %_ptr_Input_float %SV_INSIDETESSFACTOR %uint_1
         %60 =   OpLoad %float %58
         %61 =   OpAccessChain %_ptr_Output_float %TESS_INNER %uint_1
                 OpStore %61 %60
         %62 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_0
         %63 =   OpLoad %float %62
         %64 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_0
                 OpStore %64 %63
         %65 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_1
         %66 =   OpLoad %float %65
         %67 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_1
                 OpStore %67 %66
         %68 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_2
         %69 =   OpLoad %float %68
         %70 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_2
                 OpStore %70 %69
         %71 =   OpAccessChain %_ptr_Input_float %SV_TESSFACTOR %uint_3
         %73 =   OpLoad %float %71
         %74 =   OpAccessChain %_ptr_Output_float %TESS_OUTER %uint_3
                 OpStore %74 %73
         %75 =   OpLoad %uint %LAYER
                 OpStore %SV_RENDERTARGETARRAYINDEX %75
         %76 =   OpLoad %uint %VIEWPORT
                 OpStore %SV_VIEWPORTARRAYINDEX %76
         %78 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %PRIMID %78
         %79 =   OpLoad %uint %INDEX
         %80 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_0
         %81 =   OpLoad %float %80
         %82 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_1
         %83 =   OpLoad %float %82
         %84 =   OpAccessChain %_ptr_Input_float %SV_DOMAINLOCATION %uint_2
         %85 =   OpLoad %float %84
         %88 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_0
         %89 =   OpLoad %float %88
         %90 =   OpFMul %float %89 %81
         %92 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_1
         %93 =   OpLoad %float %92
         %94 =   OpFMul %float %93 %81
         %96 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_2
         %97 =   OpLoad %float %96
         %98 =   OpFMul %float %97 %81
        %100 =   OpAccessChain %_ptr_Input_float %SV_POSITION %79 %uint_3
        %101 =   OpLoad %float %100
        %102 =   OpFMul %float %101 %81
        %103 =   OpBitwiseXor %uint %79 %uint_1
        %105 =   OpAccessChain %_ptr_Input_float %SV_POSITION %103 %uint_0
        %106 =   OpLoad %float %105
        %108 =   OpExtInst %float %107 Fma %106 %83 %90
        %110 =   OpAccessChain %_ptr_Input_float %SV_POSITION %103 %uint_1
        %111 =   OpLoad %float %110
        %112 =   OpExtInst %float %107 Fma %111 %83 %94
        %114 =   OpAccessChain %_ptr_Input_float %SV_POSITION %103 %uint_2
        %115 =   OpLoad %float %114
        %116 =   OpExtInst %float %107 Fma %115 %83 %98
        %118 =   OpAccessChain %_ptr_Input_float %SV_POSITION %103 %uint_3
        %119 =   OpLoad %float %118
        %120 =   OpExtInst %float %107 Fma %119 %83 %102
        %121 =   OpBitwiseXor %uint %79 %uint_2
        %123 =   OpAccessChain %_ptr_Input_float %SV_POSITION %121 %uint_0
        %124 =   OpLoad %float %123
        %125 =   OpExtInst %float %107 Fma %124 %85 %108
        %127 =   OpAccessChain %_ptr_Input_float %SV_POSITION %121 %uint_1
        %128 =   OpLoad %float %127
        %129 =   OpExtInst %float %107 Fma %128 %85 %112
        %131 =   OpAccessChain %_ptr_Input_float %SV_POSITION %121 %uint_2
        %132 =   OpLoad %float %131
        %133 =   OpExtInst %float %107 Fma %132 %85 %116
        %135 =   OpAccessChain %_ptr_Input_float %SV_POSITION %121 %uint_3
        %136 =   OpLoad %float %135
        %137 =   OpExtInst %float %107 Fma %136 %85 %120
        %138 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                 OpStore %138 %125
        %139 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                 OpStore %139 %129
        %140 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                 OpStore %140 %133
        %141 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                 OpStore %141 %137
        %142 =   OpAccessChain %_ptr_Input_float %R_COLOR %uint_0
        %143 =   OpLoad %float %142
        %144 =   OpAccessChain %_ptr_Output_float %COLOR %uint_0
                 OpStore %144 %143
        %145 =   OpAccessChain %_ptr_Input_float %G_COLOR %uint_1
        %146 =   OpLoad %float %145
        %147 =   OpAccessChain %_ptr_Output_float %COLOR %uint_1
                 OpStore %147 %146
        %148 =   OpAccessChain %_ptr_Input_float %B_COLOR %uint_2
        %149 =   OpLoad %float %148
        %150 =   OpAccessChain %_ptr_Output_float %COLOR %uint_2
                 OpStore %150 %149
        %151 =   OpAccessChain %_ptr_Input_float %TEXCOORD %79 %uint_0
        %152 =   OpLoad %float %151
        %153 =   OpAccessChain %_ptr_Input_float %TEXCOORD %79 %uint_1
        %154 =   OpLoad %float %153
        %156 =   OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                 OpStore %156 %152
        %157 =   OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                 OpStore %157 %154
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ds_triangle.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_shader_viewport_layer_array : require
layout(triangles) in;

layout(location = 0) in vec4 SV_POSITION[];
layout(location = 1) in float R_COLOR[];
layout(location = 1, component = 1) in float G_COLOR[];
layout(location = 1, component = 2) in float B_COLOR[];
layout(location = 2) in vec2 TEXCOORD[];
layout(location = 6) out vec3 COLOR;
layout(location = 6, component = 3) out uint PRIMID;
layout(location = 7) out vec2 TEXCOORD_1;
layout(location = 7, component = 2) out vec2 TESS_INNER;
layout(location = 8) out vec4 TESS_OUTER;
layout(location = 6) patch in uint LAYER;
layout(location = 6, component = 1) patch in uint VIEWPORT;
layout(location = 6, component = 2) patch in uint INDEX;

void main()
{
    TESS_INNER.x = gl_TessLevelInner[0u];
    TESS_INNER.y = gl_TessLevelInner[1u];
    TESS_OUTER.x = gl_TessLevelOuter[0u];
    TESS_OUTER.y = gl_TessLevelOuter[1u];
    TESS_OUTER.z = gl_TessLevelOuter[2u];
    TESS_OUTER.w = gl_TessLevelOuter[3u];
    gl_Layer = int(LAYER);
    gl_ViewportIndex = int(VIEWPORT);
    PRIMID = uint(gl_PrimitiveID);
    uint _103 = INDEX ^ 1u;
    uint _121 = INDEX ^ 2u;
    gl_Position.x = fma(SV_POSITION[_121].x, gl_TessCoord.z, fma(SV_POSITION[_103].x, gl_TessCoord.y, SV_POSITION[INDEX].x * gl_TessCoord.x));
    gl_Position.y = fma(SV_POSITION[_121].y, gl_TessCoord.z, fma(SV_POSITION[_103].y, gl_TessCoord.y, SV_POSITION[INDEX].y * gl_TessCoord.x));
    gl_Position.z = fma(SV_POSITION[_121].z, gl_TessCoord.z, fma(SV_POSITION[_103].z, gl_TessCoord.y, SV_POSITION[INDEX].z * gl_TessCoord.x));
    gl_Position.w = fma(SV_POSITION[_121].w, gl_TessCoord.z, fma(SV_POSITION[_103].w, gl_TessCoord.y, SV_POSITION[INDEX].w * gl_TessCoord.x));
    COLOR.x = R_COLOR[0u];
    COLOR.y = G_COLOR[1u];
    COLOR.z = B_COLOR[2u];
    TEXCOORD_1.x = TEXCOORD[INDEX].x;
    TEXCOORD_1.y = TEXCOORD[INDEX].y;
}


================================================
FILE: reference-dxbc/test_io_gs_basic_line.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability ClipDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %TEXCOORD %NORMAL %gl_ClipDistance %SV_POSITION_0 %TEXCOORD_0 %NORMAL_0 %gl_ClipDistance_0
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 2
               OpExecutionMode %main InputLines
               OpExecutionMode %main OutputLineStrip
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %TEXCOORD "TEXCOORD"
               OpName %NORMAL "NORMAL"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %NORMAL_0 "NORMAL"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %TEXCOORD Location 2
               OpDecorate %NORMAL Location 3
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %TEXCOORD_0 Location 2
               OpDecorate %NORMAL_0 Location 3
               OpDecorate %gl_ClipDistance_0 BuiltIn ClipDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
%_ptr_Input__arr_v4float_uint_2 = OpTypePointer Input %_arr_v4float_uint_2
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_2 Input
%_arr_float_uint_2 = OpTypeArray %float %uint_2
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2
%_ptr_Input__arr_v2float_uint_2 = OpTypePointer Input %_arr_v2float_uint_2
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_2 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
%_ptr_Input__arr_v3float_uint_2 = OpTypePointer Input %_arr_v3float_uint_2
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_2 Input
%_arr__arr_float_uint_2_uint_2 = OpTypeArray %_arr_float_uint_2 %uint_2
%_ptr_Input__arr__arr_float_uint_2_uint_2 = OpTypePointer Input %_arr__arr_float_uint_2_uint_2
%gl_ClipDistance = OpVariable %_ptr_Input__arr__arr_float_uint_2_uint_2 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
   %NORMAL_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%gl_ClipDistance_0 = OpVariable %_ptr_Output__arr_float_uint_2 Output
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
     %v2uint = OpTypeVector %uint 2
       %bool = OpTypeBool
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %81

         %81 = OpLabel
                 OpBranch %82

         %82 = OpLabel
         %32 =   OpPhi %uint %uint_0 %81 %34 %84
                 OpLoopMerge %85 %84 None
                 OpBranch %83

         %83 =     OpLabel
         %36 =       OpAccessChain %_ptr_Input_float %SV_POSITION %32 %uint_0
         %37 =       OpLoad %float %36
         %38 =       OpAccessChain %_ptr_Input_float %SV_POSITION %32 %uint_1
         %40 =       OpLoad %float %38
         %41 =       OpAccessChain %_ptr_Input_float %SV_POSITION %32 %uint_2
         %42 =       OpLoad %float %41
         %43 =       OpAccessChain %_ptr_Input_float %SV_POSITION %32 %uint_3
         %45 =       OpLoad %float %43
         %48 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                     OpStore %48 %37
         %49 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                     OpStore %49 %40
         %50 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                     OpStore %50 %42
         %51 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                     OpStore %51 %45
         %52 =       OpAccessChain %_ptr_Input_float %NORMAL %32 %uint_0
         %53 =       OpLoad %float %52
         %54 =       OpAccessChain %_ptr_Input_float %NORMAL %32 %uint_1
         %55 =       OpLoad %float %54
         %56 =       OpAccessChain %_ptr_Input_float %NORMAL %32 %uint_2
         %57 =       OpLoad %float %56
         %59 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_0
                     OpStore %59 %53
         %60 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_1
                     OpStore %60 %55
         %61 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_2
                     OpStore %61 %57
         %64 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %32 %uint_0
         %65 =       OpLoad %float %64
         %66 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_0
                     OpStore %66 %65
         %68 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %32 %uint_1
         %69 =       OpLoad %float %68
         %70 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_1
                     OpStore %70 %69
         %72 =       OpAccessChain %_ptr_Input_float %TEXCOORD %32 %uint_0
         %73 =       OpLoad %float %72
         %74 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                     OpStore %74 %73
         %76 =       OpAccessChain %_ptr_Input_float %TEXCOORD %32 %uint_1
         %77 =       OpLoad %float %76
         %78 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                     OpStore %78 %77
                     OpEmitVertex
                     OpBranch %84

         %84 =   OpLabel
         %34 =     OpIAdd %uint %32 %uint_1
         %80 =     OpULessThan %bool %34 %uint_2
                   OpBranchConditional %80 %82 %85

         %85 = OpLabel
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_basic_line.glsl
================================================
GLSL:
#version 460
layout(lines) in;
layout(max_vertices = 2, line_strip) out;

out float gl_ClipDistance[2];

layout(location = 2) in vec2 TEXCOORD[2];
layout(location = 3) in vec3 NORMAL[2];
layout(location = 2) out vec2 TEXCOORD_1;
layout(location = 3) out vec3 NORMAL_1;

void main()
{
    uint _32 = 0u;
    for (;;)
    {
        gl_Position.x = gl_in[_32].gl_Position.x;
        gl_Position.y = gl_in[_32].gl_Position.y;
        gl_Position.z = gl_in[_32].gl_Position.z;
        gl_Position.w = gl_in[_32].gl_Position.w;
        NORMAL_1.x = NORMAL[_32].x;
        NORMAL_1.y = NORMAL[_32].y;
        NORMAL_1.z = NORMAL[_32].z;
        gl_ClipDistance[0u] = gl_in[_32].gl_ClipDistance[0u];
        gl_ClipDistance[1u] = gl_in[_32].gl_ClipDistance[1u];
        TEXCOORD_1.x = TEXCOORD[_32].x;
        TEXCOORD_1.y = TEXCOORD[_32].y;
        EmitVertex();
        uint _34 = _32 + 1u;
        if (_34 < 2u)
        {
            _32 = _34;
            continue;
        }
        else
        {
            break;
        }
    }
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_gs_basic_line_adj.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 88
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability ClipDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %TEXCOORD %NORMAL %gl_ClipDistance %SV_POSITION_0 %TEXCOORD_0 %NORMAL_0 %gl_ClipDistance_0
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 4
               OpExecutionMode %main InputLinesAdjacency
               OpExecutionMode %main OutputLineStrip
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %TEXCOORD "TEXCOORD"
               OpName %NORMAL "NORMAL"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %NORMAL_0 "NORMAL"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %TEXCOORD Location 2
               OpDecorate %NORMAL Location 3
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %TEXCOORD_0 Location 2
               OpDecorate %NORMAL_0 Location 3
               OpDecorate %gl_ClipDistance_0 BuiltIn ClipDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Input__arr_v4float_uint_4 = OpTypePointer Input %_arr_v4float_uint_4
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_4 Input
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_4 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
%_ptr_Input__arr_v3float_uint_4 = OpTypePointer Input %_arr_v3float_uint_4
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_4 Input
%_arr__arr_float_uint_2_uint_4 = OpTypeArray %_arr_float_uint_2 %uint_4
%_ptr_Input__arr__arr_float_uint_2_uint_4 = OpTypePointer Input %_arr__arr_float_uint_2_uint_4
%gl_ClipDistance = OpVariable %_ptr_Input__arr__arr_float_uint_2_uint_4 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
   %NORMAL_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%gl_ClipDistance_0 = OpVariable %_ptr_Output__arr_float_uint_2 Output
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
     %v2uint = OpTypeVector %uint 2
       %bool = OpTypeBool
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %82

         %82 = OpLabel
                 OpBranch %83

         %83 = OpLabel
         %33 =   OpPhi %uint %uint_0 %82 %35 %85
                 OpLoopMerge %86 %85 None
                 OpBranch %84

         %84 =     OpLabel
         %37 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_0
         %38 =       OpLoad %float %37
         %39 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_1
         %41 =       OpLoad %float %39
         %42 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_2
         %43 =       OpLoad %float %42
         %44 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_3
         %46 =       OpLoad %float %44
         %49 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                     OpStore %49 %38
         %50 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                     OpStore %50 %41
         %51 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                     OpStore %51 %43
         %52 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                     OpStore %52 %46
         %53 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_0
         %54 =       OpLoad %float %53
         %55 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_1
         %56 =       OpLoad %float %55
         %57 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_2
         %58 =       OpLoad %float %57
         %60 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_0
                     OpStore %60 %54
         %61 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_1
                     OpStore %61 %56
         %62 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_2
                     OpStore %62 %58
         %65 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_0
         %66 =       OpLoad %float %65
         %67 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_0
                     OpStore %67 %66
         %69 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_1
         %70 =       OpLoad %float %69
         %71 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_1
                     OpStore %71 %70
         %73 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_0
         %74 =       OpLoad %float %73
         %75 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                     OpStore %75 %74
         %77 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_1
         %78 =       OpLoad %float %77
         %79 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                     OpStore %79 %78
                     OpEmitVertex
                     OpBranch %85

         %85 =   OpLabel
         %35 =     OpIAdd %uint %33 %uint_1
         %81 =     OpULessThan %bool %35 %uint_4
                   OpBranchConditional %81 %83 %86

         %86 = OpLabel
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_basic_line_adj.glsl
================================================
GLSL:
#version 460
layout(lines_adjacency) in;
layout(max_vertices = 4, line_strip) out;

out float gl_ClipDistance[2];

layout(location = 2) in vec2 TEXCOORD[4];
layout(location = 3) in vec3 NORMAL[4];
layout(location = 2) out vec2 TEXCOORD_1;
layout(location = 3) out vec3 NORMAL_1;

void main()
{
    uint _33 = 0u;
    for (;;)
    {
        gl_Position.x = gl_in[_33].gl_Position.x;
        gl_Position.y = gl_in[_33].gl_Position.y;
        gl_Position.z = gl_in[_33].gl_Position.z;
        gl_Position.w = gl_in[_33].gl_Position.w;
        NORMAL_1.x = NORMAL[_33].x;
        NORMAL_1.y = NORMAL[_33].y;
        NORMAL_1.z = NORMAL[_33].z;
        gl_ClipDistance[0u] = gl_in[_33].gl_ClipDistance[0u];
        gl_ClipDistance[1u] = gl_in[_33].gl_ClipDistance[1u];
        TEXCOORD_1.x = TEXCOORD[_33].x;
        TEXCOORD_1.y = TEXCOORD[_33].y;
        EmitVertex();
        uint _35 = _33 + 1u;
        if (_35 < 4u)
        {
            _33 = _35;
            continue;
        }
        else
        {
            break;
        }
    }
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_gs_basic_point.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability ClipDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %TEXCOORD %NORMAL %gl_ClipDistance %SV_POSITION_0 %TEXCOORD_0 %NORMAL_0 %gl_ClipDistance_0
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 1
               OpExecutionMode %main InputPoints
               OpExecutionMode %main OutputPoints
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %TEXCOORD "TEXCOORD"
               OpName %NORMAL "NORMAL"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %NORMAL_0 "NORMAL"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %TEXCOORD Location 2
               OpDecorate %NORMAL Location 3
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %TEXCOORD_0 Location 2
               OpDecorate %NORMAL_0 Location 3
               OpDecorate %gl_ClipDistance_0 BuiltIn ClipDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
     %uint_1 = OpConstant %uint 1
%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_1 Input
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_1 = OpTypeArray %v2float %uint_1
%_ptr_Input__arr_v2float_uint_1 = OpTypePointer Input %_arr_v2float_uint_1
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_1 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_1 = OpTypeArray %v3float %uint_1
%_ptr_Input__arr_v3float_uint_1 = OpTypePointer Input %_arr_v3float_uint_1
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_1 Input
%_arr__arr_float_uint_2_uint_1 = OpTypeArray %_arr_float_uint_2 %uint_1
%_ptr_Input__arr__arr_float_uint_2_uint_1 = OpTypePointer Input %_arr__arr_float_uint_2_uint_1
%gl_ClipDistance = OpVariable %_ptr_Input__arr__arr_float_uint_2_uint_1 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
   %NORMAL_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%gl_ClipDistance_0 = OpVariable %_ptr_Output__arr_float_uint_2 Output
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
     %v2uint = OpTypeVector %uint 2
       %bool = OpTypeBool
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %81

         %81 = OpLabel
                 OpBranch %82

         %82 = OpLabel
         %33 =   OpPhi %uint %uint_0 %81 %35 %84
                 OpLoopMerge %85 %84 None
                 OpBranch %83

         %83 =     OpLabel
         %37 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_0
         %38 =       OpLoad %float %37
         %39 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_1
         %40 =       OpLoad %float %39
         %41 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_2
         %42 =       OpLoad %float %41
         %43 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_3
         %45 =       OpLoad %float %43
         %48 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                     OpStore %48 %38
         %49 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                     OpStore %49 %40
         %50 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                     OpStore %50 %42
         %51 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                     OpStore %51 %45
         %52 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_0
         %53 =       OpLoad %float %52
         %54 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_1
         %55 =       OpLoad %float %54
         %56 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_2
         %57 =       OpLoad %float %56
         %59 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_0
                     OpStore %59 %53
         %60 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_1
                     OpStore %60 %55
         %61 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_2
                     OpStore %61 %57
         %64 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_0
         %65 =       OpLoad %float %64
         %66 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_0
                     OpStore %66 %65
         %68 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_1
         %69 =       OpLoad %float %68
         %70 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_1
                     OpStore %70 %69
         %72 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_0
         %73 =       OpLoad %float %72
         %74 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                     OpStore %74 %73
         %76 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_1
         %77 =       OpLoad %float %76
         %78 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                     OpStore %78 %77
                     OpEmitVertex
                     OpBranch %84

         %84 =   OpLabel
         %35 =     OpIAdd %uint %33 %uint_1
         %80 =     OpULessThan %bool %35 %uint_1
                   OpBranchConditional %80 %82 %85

         %85 = OpLabel
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_basic_point.glsl
================================================
GLSL:
#version 460
layout(points) in;
layout(max_vertices = 1, points) out;

out float gl_ClipDistance[2];

layout(location = 2) in vec2 TEXCOORD[1];
layout(location = 3) in vec3 NORMAL[1];
layout(location = 2) out vec2 TEXCOORD_1;
layout(location = 3) out vec3 NORMAL_1;

void main()
{
    uint _33 = 0u;
    for (;;)
    {
        gl_Position.x = gl_in[_33].gl_Position.x;
        gl_Position.y = gl_in[_33].gl_Position.y;
        gl_Position.z = gl_in[_33].gl_Position.z;
        gl_Position.w = gl_in[_33].gl_Position.w;
        NORMAL_1.x = NORMAL[_33].x;
        NORMAL_1.y = NORMAL[_33].y;
        NORMAL_1.z = NORMAL[_33].z;
        gl_ClipDistance[0u] = gl_in[_33].gl_ClipDistance[0u];
        gl_ClipDistance[1u] = gl_in[_33].gl_ClipDistance[1u];
        TEXCOORD_1.x = TEXCOORD[_33].x;
        TEXCOORD_1.y = TEXCOORD[_33].y;
        EmitVertex();
        uint _35 = _33 + 1u;
        if (_35 < 1u)
        {
            _33 = _35;
            continue;
        }
        else
        {
            break;
        }
    }
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_gs_basic_triangle.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 87
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability ClipDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %TEXCOORD %NORMAL %gl_ClipDistance %SV_POSITION_0 %TEXCOORD_0 %NORMAL_0 %gl_ClipDistance_0
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 3
               OpExecutionMode %main Triangles
               OpExecutionMode %main OutputTriangleStrip
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %TEXCOORD "TEXCOORD"
               OpName %NORMAL "NORMAL"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %NORMAL_0 "NORMAL"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %TEXCOORD Location 2
               OpDecorate %NORMAL Location 3
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %TEXCOORD_0 Location 2
               OpDecorate %NORMAL_0 Location 3
               OpDecorate %gl_ClipDistance_0 BuiltIn ClipDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
     %uint_3 = OpConstant %uint 3
%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
%_arr__arr_float_uint_2_uint_3 = OpTypeArray %_arr_float_uint_2 %uint_3
%_ptr_Input__arr__arr_float_uint_2_uint_3 = OpTypePointer Input %_arr__arr_float_uint_2_uint_3
%gl_ClipDistance = OpVariable %_ptr_Input__arr__arr_float_uint_2_uint_3 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
   %NORMAL_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%gl_ClipDistance_0 = OpVariable %_ptr_Output__arr_float_uint_2 Output
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
%_ptr_Output_float = OpTypePointer Output %float
     %v2uint = OpTypeVector %uint 2
       %bool = OpTypeBool
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %81

         %81 = OpLabel
                 OpBranch %82

         %82 = OpLabel
         %33 =   OpPhi %uint %uint_0 %81 %35 %84
                 OpLoopMerge %85 %84 None
                 OpBranch %83

         %83 =     OpLabel
         %37 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_0
         %38 =       OpLoad %float %37
         %39 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_1
         %41 =       OpLoad %float %39
         %42 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_2
         %43 =       OpLoad %float %42
         %44 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_3
         %45 =       OpLoad %float %44
         %48 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                     OpStore %48 %38
         %49 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                     OpStore %49 %41
         %50 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                     OpStore %50 %43
         %51 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                     OpStore %51 %45
         %52 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_0
         %53 =       OpLoad %float %52
         %54 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_1
         %55 =       OpLoad %float %54
         %56 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_2
         %57 =       OpLoad %float %56
         %59 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_0
                     OpStore %59 %53
         %60 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_1
                     OpStore %60 %55
         %61 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_2
                     OpStore %61 %57
         %64 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_0
         %65 =       OpLoad %float %64
         %66 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_0
                     OpStore %66 %65
         %68 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_1
         %69 =       OpLoad %float %68
         %70 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_1
                     OpStore %70 %69
         %72 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_0
         %73 =       OpLoad %float %72
         %74 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                     OpStore %74 %73
         %76 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_1
         %77 =       OpLoad %float %76
         %78 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                     OpStore %78 %77
                     OpEmitVertex
                     OpBranch %84

         %84 =   OpLabel
         %35 =     OpIAdd %uint %33 %uint_1
         %80 =     OpULessThan %bool %35 %uint_3
                   OpBranchConditional %80 %82 %85

         %85 = OpLabel
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_basic_triangle.glsl
================================================
GLSL:
#version 460
layout(triangles) in;
layout(max_vertices = 3, triangle_strip) out;

out float gl_ClipDistance[2];

layout(location = 2) in vec2 TEXCOORD[3];
layout(location = 3) in vec3 NORMAL[3];
layout(location = 2) out vec2 TEXCOORD_1;
layout(location = 3) out vec3 NORMAL_1;

void main()
{
    uint _33 = 0u;
    for (;;)
    {
        gl_Position.x = gl_in[_33].gl_Position.x;
        gl_Position.y = gl_in[_33].gl_Position.y;
        gl_Position.z = gl_in[_33].gl_Position.z;
        gl_Position.w = gl_in[_33].gl_Position.w;
        NORMAL_1.x = NORMAL[_33].x;
        NORMAL_1.y = NORMAL[_33].y;
        NORMAL_1.z = NORMAL[_33].z;
        gl_ClipDistance[0u] = gl_in[_33].gl_ClipDistance[0u];
        gl_ClipDistance[1u] = gl_in[_33].gl_ClipDistance[1u];
        TEXCOORD_1.x = TEXCOORD[_33].x;
        TEXCOORD_1.y = TEXCOORD[_33].y;
        EmitVertex();
        uint _35 = _33 + 1u;
        if (_35 < 3u)
        {
            _33 = _35;
            continue;
        }
        else
        {
            break;
        }
    }
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_gs_basic_triangle_adj.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 88
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability ClipDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %TEXCOORD %NORMAL %gl_ClipDistance %SV_POSITION_0 %TEXCOORD_0 %NORMAL_0 %gl_ClipDistance_0
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 6
               OpExecutionMode %main InputTrianglesAdjacency
               OpExecutionMode %main OutputTriangleStrip
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %TEXCOORD "TEXCOORD"
               OpName %NORMAL "NORMAL"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %TEXCOORD_0 "TEXCOORD"
               OpName %NORMAL_0 "NORMAL"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %TEXCOORD Location 2
               OpDecorate %NORMAL Location 3
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %SV_POSITION_0 BuiltIn Position
               OpDecorate %TEXCOORD_0 Location 2
               OpDecorate %NORMAL_0 Location 3
               OpDecorate %gl_ClipDistance_0 BuiltIn ClipDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
     %uint_6 = OpConstant %uint 6
%_arr_v4float_uint_6 = OpTypeArray %v4float %uint_6
%_ptr_Input__arr_v4float_uint_6 = OpTypePointer Input %_arr_v4float_uint_6
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_6 Input
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
    %v2float = OpTypeVector %float 2
%_arr_v2float_uint_6 = OpTypeArray %v2float %uint_6
%_ptr_Input__arr_v2float_uint_6 = OpTypePointer Input %_arr_v2float_uint_6
   %TEXCOORD = OpVariable %_ptr_Input__arr_v2float_uint_6 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_6 = OpTypeArray %v3float %uint_6
%_ptr_Input__arr_v3float_uint_6 = OpTypePointer Input %_arr_v3float_uint_6
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_6 Input
%_arr__arr_float_uint_2_uint_6 = OpTypeArray %_arr_float_uint_2 %uint_6
%_ptr_Input__arr__arr_float_uint_2_uint_6 = OpTypePointer Input %_arr__arr_float_uint_2_uint_6
%gl_ClipDistance = OpVariable %_ptr_Input__arr__arr_float_uint_2_uint_6 Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION_0 = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_v2float = OpTypePointer Output %v2float
 %TEXCOORD_0 = OpVariable %_ptr_Output_v2float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
   %NORMAL_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%gl_ClipDistance_0 = OpVariable %_ptr_Output__arr_float_uint_2 Output
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
     %v2uint = OpTypeVector %uint 2
       %bool = OpTypeBool
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %82

         %82 = OpLabel
                 OpBranch %83

         %83 = OpLabel
         %33 =   OpPhi %uint %uint_0 %82 %35 %85
                 OpLoopMerge %86 %85 None
                 OpBranch %84

         %84 =     OpLabel
         %37 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_0
         %38 =       OpLoad %float %37
         %39 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_1
         %41 =       OpLoad %float %39
         %42 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_2
         %43 =       OpLoad %float %42
         %44 =       OpAccessChain %_ptr_Input_float %SV_POSITION %33 %uint_3
         %46 =       OpLoad %float %44
         %49 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0
                     OpStore %49 %38
         %50 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1
                     OpStore %50 %41
         %51 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2
                     OpStore %51 %43
         %52 =       OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_3
                     OpStore %52 %46
         %53 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_0
         %54 =       OpLoad %float %53
         %55 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_1
         %56 =       OpLoad %float %55
         %57 =       OpAccessChain %_ptr_Input_float %NORMAL %33 %uint_2
         %58 =       OpLoad %float %57
         %60 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_0
                     OpStore %60 %54
         %61 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_1
                     OpStore %61 %56
         %62 =       OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_2
                     OpStore %62 %58
         %65 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_0
         %66 =       OpLoad %float %65
         %67 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_0
                     OpStore %67 %66
         %69 =       OpAccessChain %_ptr_Input_float %gl_ClipDistance %33 %uint_1
         %70 =       OpLoad %float %69
         %71 =       OpAccessChain %_ptr_Output_float %gl_ClipDistance_0 %uint_1
                     OpStore %71 %70
         %73 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_0
         %74 =       OpLoad %float %73
         %75 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_0
                     OpStore %75 %74
         %77 =       OpAccessChain %_ptr_Input_float %TEXCOORD %33 %uint_1
         %78 =       OpLoad %float %77
         %79 =       OpAccessChain %_ptr_Output_float %TEXCOORD_0 %uint_1
                     OpStore %79 %78
                     OpEmitVertex
                     OpBranch %85

         %85 =   OpLabel
         %35 =     OpIAdd %uint %33 %uint_1
         %81 =     OpULessThan %bool %35 %uint_6
                   OpBranchConditional %81 %83 %86

         %86 = OpLabel
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_basic_triangle_adj.glsl
================================================
GLSL:
#version 460
layout(triangles_adjacency) in;
layout(max_vertices = 6, triangle_strip) out;

out float gl_ClipDistance[2];

layout(location = 2) in vec2 TEXCOORD[6];
layout(location = 3) in vec3 NORMAL[6];
layout(location = 2) out vec2 TEXCOORD_1;
layout(location = 3) out vec3 NORMAL_1;

void main()
{
    uint _33 = 0u;
    for (;;)
    {
        gl_Position.x = gl_in[_33].gl_Position.x;
        gl_Position.y = gl_in[_33].gl_Position.y;
        gl_Position.z = gl_in[_33].gl_Position.z;
        gl_Position.w = gl_in[_33].gl_Position.w;
        NORMAL_1.x = NORMAL[_33].x;
        NORMAL_1.y = NORMAL[_33].y;
        NORMAL_1.z = NORMAL[_33].z;
        gl_ClipDistance[0u] = gl_in[_33].gl_ClipDistance[0u];
        gl_ClipDistance[1u] = gl_in[_33].gl_ClipDistance[1u];
        TEXCOORD_1.x = TEXCOORD[_33].x;
        TEXCOORD_1.y = TEXCOORD[_33].y;
        EmitVertex();
        uint _35 = _33 + 1u;
        if (_35 < 6u)
        {
            _33 = _35;
            continue;
        }
        else
        {
            break;
        }
    }
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_gs_instanced.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability MultiViewport
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %SV_RenderTargetArrayIndex %SV_ViewportArrayIndex %SV_PrimitiveId %gl_InvocationID %gl_PrimitiveID
               OpExecutionMode %main Invocations 12
               OpExecutionMode %main OutputVertices 1
               OpExecutionMode %main InputPoints
               OpExecutionMode %main OutputPoints
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
               OpName %SV_ViewportArrayIndex "SV_ViewportArrayIndex"
               OpName %SV_PrimitiveId "SV_PrimitiveId"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
               OpDecorate %SV_ViewportArrayIndex BuiltIn ViewportIndex
               OpDecorate %SV_PrimitiveId BuiltIn PrimitiveId
               OpDecorate %gl_InvocationID BuiltIn InvocationId
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output_uint Output
%SV_ViewportArrayIndex = OpVariable %_ptr_Output_uint Output
%SV_PrimitiveId = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_InvocationID = OpVariable %_ptr_Input_uint Input
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
    %uint_12 = OpConstant %uint 12
     %uint_1 = OpConstant %uint 1
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %34

         %34 = OpLabel
         %16 =   OpLoad %uint %gl_InvocationID
         %18 =   OpLoad %uint %gl_PrimitiveID
         %19 =   OpIMul %uint %18 %uint_12
         %21 =   OpIAdd %uint %16 %19
                 OpStore %SV_PrimitiveId %21
         %22 =   OpShiftRightLogical %uint %16 %uint_1
                 OpStore %SV_RenderTargetArrayIndex %22
         %24 =   OpBitwiseAnd %uint %16 %uint_1
                 OpStore %SV_ViewportArrayIndex %24
         %26 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_0
                 OpStore %26 %float_1
         %29 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_1
                 OpStore %29 %float_1
         %30 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_2
                 OpStore %30 %float_1
         %32 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_3
                 OpStore %32 %float_1
                 OpEmitVertex
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_instanced.glsl
================================================
GLSL:
#version 460
layout(invocations = 12, points) in;
layout(max_vertices = 1, points) out;

void main()
{
    gl_PrimitiveID = int(gl_InvocationID + (uint(gl_PrimitiveIDIn) * 12u));
    gl_Layer = int(gl_InvocationID >> 1u);
    gl_ViewportIndex = int(gl_InvocationID & 1u);
    gl_Position.x = 1.0;
    gl_Position.y = 1.0;
    gl_Position.z = 1.0;
    gl_Position.w = 1.0;
    EmitVertex();
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_gs_multi_stream_xfb_raster_0.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability GeometryStreams
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %SV_POSITION %BUFFER_A_ATTR_1 %BUFFER_B_ATTR
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 1
               OpExecutionMode %main InputPoints
               OpExecutionMode %main OutputPoints
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %BUFFER_A_ATTR_1 "BUFFER_A_ATTR_1"
               OpName %BUFFER_B_ATTR "BUFFER_B_ATTR"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %BUFFER_A_ATTR_1 Location 1
               OpDecorate %BUFFER_B_ATTR Stream 1
               OpDecorate %BUFFER_B_ATTR Location 4
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
%BUFFER_A_ATTR_1 = OpVariable %_ptr_Output_uint Output
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Output_v2int = OpTypePointer Output %v2int
%BUFFER_B_ATTR = OpVariable %_ptr_Output_v2int Output
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
     %uint_1 = OpConstant %uint 1
    %float_2 = OpConstant %float 2
     %uint_2 = OpConstant %uint 2
    %float_3 = OpConstant %float 3
     %uint_3 = OpConstant %uint 3
    %float_4 = OpConstant %float 4
     %uint_4 = OpConstant %uint 4
%_ptr_Output_int = OpTypePointer Output %int
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %37

         %37 = OpLabel
         %17 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_0
                 OpStore %17 %float_1
         %20 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_1
                 OpStore %20 %float_2
         %23 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_2
                 OpStore %23 %float_3
         %26 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_3
                 OpStore %26 %float_4
                 OpStore %BUFFER_A_ATTR_1 %uint_4
                 OpEmitStreamVertex %uint_0
                 OpEndStreamPrimitive %uint_0
         %31 =   OpAccessChain %_ptr_Output_int %BUFFER_B_ATTR %uint_0
         %33 =   OpBitcast %int %uint_5
                 OpStore %31 %33
         %34 =   OpAccessChain %_ptr_Output_int %BUFFER_B_ATTR %uint_1
         %36 =   OpBitcast %int %uint_6
                 OpStore %34 %36
                 OpEmitStreamVertex %uint_1
                 OpEndStreamPrimitive %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_multi_stream_xfb_raster_0.glsl
================================================
GLSL:
#version 460
layout(points) in;
layout(max_vertices = 1, points) out;

layout(location = 1) out uint BUFFER_A_ATTR_1;
layout(location = 4, stream = 1) out ivec2 BUFFER_B_ATTR;

void main()
{
    gl_Position.x = 1.0;
    gl_Position.y = 2.0;
    gl_Position.z = 3.0;
    gl_Position.w = 4.0;
    BUFFER_A_ATTR_1 = 4u;
    EmitStreamVertex(int(0u));
    EndStreamPrimitive(int(0u));
    BUFFER_B_ATTR.x = int(5u);
    BUFFER_B_ATTR.y = int(6u);
    EmitStreamVertex(int(1u));
    EndStreamPrimitive(int(1u));
}


================================================
FILE: reference-dxbc/test_io_gs_multi_stream_xfb_raster_1.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability GeometryStreams
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %BUFFER_A_ATTR %BUFFER_A_ATTR_1 %SV_POSITION
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 1
               OpExecutionMode %main InputPoints
               OpExecutionMode %main OutputPoints
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_A_ATTR "BUFFER_A_ATTR"
               OpName %BUFFER_A_ATTR_1 "BUFFER_A_ATTR_1"
               OpName %SV_POSITION "SV_POSITION"
               OpDecorate %BUFFER_A_ATTR Location 0
               OpDecorate %BUFFER_A_ATTR_1 Location 1
               OpDecorate %SV_POSITION Stream 1
               OpDecorate %SV_POSITION BuiltIn Position
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Output_v2int = OpTypePointer Output %v2int
%BUFFER_A_ATTR = OpVariable %_ptr_Output_v2int Output
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
%BUFFER_A_ATTR_1 = OpVariable %_ptr_Output_uint Output
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_int = OpTypePointer Output %int
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
    %float_4 = OpConstant %float 4
    %float_5 = OpConstant %float 5
    %float_6 = OpConstant %float 6
    %float_7 = OpConstant %float 7
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %34

         %34 = OpLabel
         %17 =   OpAccessChain %_ptr_Output_int %BUFFER_A_ATTR %uint_0
         %20 =   OpBitcast %int %uint_1
                 OpStore %17 %20
         %21 =   OpAccessChain %_ptr_Output_int %BUFFER_A_ATTR %uint_1
         %23 =   OpBitcast %int %uint_2
                 OpStore %21 %23
                 OpStore %BUFFER_A_ATTR_1 %uint_3
                 OpEmitStreamVertex %uint_0
                 OpEndStreamPrimitive %uint_0
         %26 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_0
                 OpStore %26 %float_4
         %28 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_1
                 OpStore %28 %float_5
         %30 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_2
                 OpStore %30 %float_6
         %32 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_3
                 OpStore %32 %float_7
                 OpEmitStreamVertex %uint_1
                 OpEndStreamPrimitive %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_multi_stream_xfb_raster_1.glsl
================================================
GLSL:
#version 460
layout(points) in;
layout(max_vertices = 1, points) out;

layout(location = 0) out ivec2 BUFFER_A_ATTR;
layout(location = 1) out uint BUFFER_A_ATTR_1;

void main()
{
    BUFFER_A_ATTR.x = int(1u);
    BUFFER_A_ATTR.y = int(2u);
    BUFFER_A_ATTR_1 = 3u;
    EmitStreamVertex(int(0u));
    EndStreamPrimitive(int(0u));
    gl_Position.x = 4.0;
    gl_Position.y = 5.0;
    gl_Position.z = 6.0;
    gl_Position.w = 7.0;
    EmitStreamVertex(int(1u));
    EndStreamPrimitive(int(1u));
}


================================================
FILE: reference-dxbc/test_io_gs_xfb.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Geometry %main "main" %BUFFER_A_ATTR %BUFFER_A_ATTR_1 %BUFFER_B_ATTR
               OpExecutionMode %main Invocations 1
               OpExecutionMode %main OutputVertices 1
               OpExecutionMode %main InputPoints
               OpExecutionMode %main OutputPoints
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_A_ATTR "BUFFER_A_ATTR"
               OpName %BUFFER_A_ATTR_1 "BUFFER_A_ATTR_1"
               OpName %BUFFER_B_ATTR "BUFFER_B_ATTR"
               OpDecorate %BUFFER_A_ATTR Location 0
               OpDecorate %BUFFER_A_ATTR_1 Location 0
               OpDecorate %BUFFER_A_ATTR_1 Component 3
               OpDecorate %BUFFER_B_ATTR Location 1
               OpDecorate %BUFFER_B_ATTR Component 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v3float = OpTypeVector %float 3
%_ptr_Output_v3float = OpTypePointer Output %v3float
%BUFFER_A_ATTR = OpVariable %_ptr_Output_v3float Output
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
%BUFFER_A_ATTR_1 = OpVariable %_ptr_Output_uint Output
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Output_v2int = OpTypePointer Output %v2int
%BUFFER_B_ATTR = OpVariable %_ptr_Output_v2int Output
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
     %uint_1 = OpConstant %uint 1
    %float_2 = OpConstant %float 2
     %uint_2 = OpConstant %uint 2
    %float_3 = OpConstant %float 3
     %uint_4 = OpConstant %uint 4
%_ptr_Output_int = OpTypePointer Output %int
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %34

         %34 = OpLabel
         %17 =   OpAccessChain %_ptr_Output_float %BUFFER_A_ATTR %uint_0
                 OpStore %17 %float_1
         %20 =   OpAccessChain %_ptr_Output_float %BUFFER_A_ATTR %uint_1
                 OpStore %20 %float_2
         %23 =   OpAccessChain %_ptr_Output_float %BUFFER_A_ATTR %uint_2
                 OpStore %23 %float_3
                 OpStore %BUFFER_A_ATTR_1 %uint_4
                 OpEmitVertex
                 OpEndPrimitive
         %28 =   OpAccessChain %_ptr_Output_int %BUFFER_B_ATTR %uint_0
         %30 =   OpBitcast %int %uint_5
                 OpStore %28 %30
         %31 =   OpAccessChain %_ptr_Output_int %BUFFER_B_ATTR %uint_1
         %33 =   OpBitcast %int %uint_6
                 OpStore %31 %33
                 OpEmitVertex
                 OpEndPrimitive
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_gs_xfb.glsl
================================================
GLSL:
#version 460
layout(points) in;
layout(max_vertices = 1, points) out;

layout(location = 0) out vec3 BUFFER_A_ATTR;
layout(location = 0, component = 3) out uint BUFFER_A_ATTR_1;
layout(location = 1, component = 2) out ivec2 BUFFER_B_ATTR;

void main()
{
    BUFFER_A_ATTR.x = 1.0;
    BUFFER_A_ATTR.y = 2.0;
    BUFFER_A_ATTR.z = 3.0;
    BUFFER_A_ATTR_1 = 4u;
    EmitVertex();
    EndPrimitive();
    BUFFER_B_ATTR.x = int(5u);
    BUFFER_B_ATTR.y = int(6u);
    EmitVertex();
    EndPrimitive();
}


================================================
FILE: reference-dxbc/test_io_hs_line.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 137
; Schema: 0
               OpCapability Shader
               OpCapability Tessellation
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %84 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationControl %main "main" %SV_POSITION %NORMAL %FACTOR %SV_POSITION_0 %NORMAL_0 %INSTANCE_ID %TANGENT %TANGENT_1 %SV_TESSFACTOR %SV_INSIDETESSFACTOR %gl_InvocationID %gl_PrimitiveID
               OpExecutionMode %main Isolines
               OpExecutionMode %main SpacingEqual
               OpExecutionMode %main OutputVertices 4
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %NORMAL "NORMAL"
               OpName %FACTOR "FACTOR"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %NORMAL_0 "NORMAL"
               OpName %INSTANCE_ID "INSTANCE_ID"
               OpName %TANGENT "TANGENT"
               OpName %TANGENT_1 "TANGENT_1"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %hull_main "hull_main"
               OpName %patch_main "patch_main"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %NORMAL Location 1
               OpDecorate %FACTOR Location 1
               OpDecorate %FACTOR Component 3
               OpDecorate %SV_POSITION_0 Location 0
               OpDecorate %NORMAL_0 Location 1
               OpDecorate %INSTANCE_ID Location 4
               OpDecorate %INSTANCE_ID Patch
               OpDecorate %TANGENT Location 5
               OpDecorate %TANGENT Patch
               OpDecorate %TANGENT_1 Location 6
               OpDecorate %TANGENT_1 Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %gl_InvocationID BuiltIn InvocationId
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_32 = OpTypeArray %v3float %uint_32
%_ptr_Input__arr_v3float_uint_32 = OpTypePointer Input %_arr_v3float_uint_32
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
     %FACTOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
     %uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
%SV_POSITION_0 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output
%_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
%_ptr_Output__arr_v3float_uint_4 = OpTypePointer Output %_arr_v3float_uint_4
   %NORMAL_0 = OpVariable %_ptr_Output__arr_v3float_uint_4 Output
%_ptr_Output_uint = OpTypePointer Output %uint
%INSTANCE_ID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
    %TANGENT = OpVariable %_ptr_Output_v3float Output
  %TANGENT_1 = OpVariable %_ptr_Output_v3float Output
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_4 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_2 Output
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_InvocationID = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
   %float_64 = OpConstant %float 64
       %bool = OpTypeBool
  %uint_4104 = OpConstant %uint 4104
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %129

        %129 = OpLabel
        %123 =   OpFunctionCall %void %hull_main
        %124 =   OpLoad %uint %gl_InvocationID
        %126 =   OpIEqual %bool %124 %uint_0
                 OpControlBarrier %uint_2 %uint_2 %uint_4104
                 OpSelectionMerge %131 None
                 OpBranchConditional %126 %130 %131

        %130 =     OpLabel
        %128 =       OpFunctionCall %void %patch_main
                     OpBranch %131

        %131 = OpLabel
                 OpReturn
               OpFunctionEnd
  %hull_main = OpFunction %void None %2

         %39 = OpLabel
                 OpBranch %133

        %133 = OpLabel
         %44 =   OpLoad %uint %gl_InvocationID
         %46 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_0
         %48 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_2
         %53 =   OpLoad %float %52
         %54 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_3
         %56 =   OpLoad %float %54
         %60 =   OpLoad %uint %gl_InvocationID
         %59 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %60 %uint_0
                 OpStore %59 %48
         %62 =   OpLoad %uint %gl_InvocationID
         %61 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %62 %uint_1
                 OpStore %61 %51
         %64 =   OpLoad %uint %gl_InvocationID
         %63 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %64 %uint_2
                 OpStore %63 %53
         %66 =   OpLoad %uint %gl_InvocationID
         %65 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %66 %uint_3
                 OpStore %65 %56
         %67 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_0
         %68 =   OpLoad %float %67
         %69 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_1
         %70 =   OpLoad %float %69
         %71 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_2
         %72 =   OpLoad %float %71
         %75 =   OpLoad %uint %gl_InvocationID
         %74 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %75 %uint_0
                 OpStore %74 %68
         %77 =   OpLoad %uint %gl_InvocationID
         %76 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %77 %uint_1
                 OpStore %76 %70
         %79 =   OpLoad %uint %gl_InvocationID
         %78 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %79 %uint_2
                 OpStore %78 %72
                 OpReturn
               OpFunctionEnd
 %patch_main = OpFunction %void None %2

         %41 = OpLabel
                 OpBranch %135

        %135 = OpLabel
         %81 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %INSTANCE_ID %81
         %82 =   OpAccessChain %_ptr_Input_float %FACTOR %uint_0
         %83 =   OpLoad %float %82
         %85 =   OpExtInst %float %84 NMin %83 %float_64
         %87 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_0
                 OpStore %87 %85
         %88 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_1
                 OpStore %88 %85
         %89 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_2
                 OpStore %89 %85
         %90 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_3
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_0
                 OpStore %91 %85
         %92 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_1
                 OpStore %92 %85
         %93 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_0
         %94 =   OpLoad %float %93
         %95 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_0
         %96 =   OpLoad %float %95
         %97 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_0
         %98 =   OpLoad %float %97
         %99 =   OpFSub %float %96 %94
        %100 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_0
                 OpStore %100 %99
        %101 =   OpFSub %float %98 %94
        %102 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_0
                 OpStore %102 %101
        %103 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_1
        %104 =   OpLoad %float %103
        %105 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_1
        %106 =   OpLoad %float %105
        %107 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_1
        %108 =   OpLoad %float %107
        %109 =   OpFSub %float %106 %104
        %110 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_1
                 OpStore %110 %109
        %111 =   OpFSub %float %108 %104
        %112 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_1
                 OpStore %112 %111
        %113 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_2
        %114 =   OpLoad %float %113
        %115 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_2
        %116 =   OpLoad %float %115
        %117 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_2
        %118 =   OpLoad %float %117
        %119 =   OpFSub %float %116 %114
        %120 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_2
                 OpStore %120 %119
        %121 =   OpFSub %float %118 %114
        %122 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_2
                 OpStore %122 %121
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_hs_line.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(vertices = 4) out;

layout(location = 1) in vec3 NORMAL[];
layout(location = 1, component = 3) in float FACTOR[];
layout(location = 0) out vec4 SV_POSITION[4];
layout(location = 1) out vec3 NORMAL_1[4];
layout(location = 4) patch out uint INSTANCE_ID;
layout(location = 5) patch out vec3 TANGENT;
layout(location = 6) patch out vec3 TANGENT_1;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);

void hull_main()
{
    SV_POSITION[gl_InvocationID].x = gl_in[gl_InvocationID].gl_Position.x;
    SV_POSITION[gl_InvocationID].y = gl_in[gl_InvocationID].gl_Position.y;
    SV_POSITION[gl_InvocationID].z = gl_in[gl_InvocationID].gl_Position.z;
    SV_POSITION[gl_InvocationID].w = gl_in[gl_InvocationID].gl_Position.w;
    NORMAL_1[gl_InvocationID].x = NORMAL[gl_InvocationID].x;
    NORMAL_1[gl_InvocationID].y = NORMAL[gl_InvocationID].y;
    NORMAL_1[gl_InvocationID].z = NORMAL[gl_InvocationID].z;
}

void patch_main()
{
    INSTANCE_ID = uint(gl_PrimitiveID);
    float _85 = spvNMin(FACTOR[0u], 64.0);
    gl_TessLevelOuter[0u] = _85;
    gl_TessLevelOuter[1u] = _85;
    gl_TessLevelOuter[2u] = _85;
    gl_TessLevelOuter[3u] = _85;
    gl_TessLevelInner[0u] = _85;
    gl_TessLevelInner[1u] = _85;
    TANGENT.x = SV_POSITION[1u].x - SV_POSITION[0u].x;
    TANGENT_1.x = SV_POSITION[2u].x - SV_POSITION[0u].x;
    TANGENT.y = SV_POSITION[1u].y - SV_POSITION[0u].y;
    TANGENT_1.y = SV_POSITION[2u].y - SV_POSITION[0u].y;
    TANGENT.z = SV_POSITION[1u].z - SV_POSITION[0u].z;
    TANGENT_1.z = SV_POSITION[2u].z - SV_POSITION[0u].z;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


================================================
FILE: reference-dxbc/test_io_hs_point.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 137
; Schema: 0
               OpCapability Shader
               OpCapability Tessellation
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %84 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationControl %main "main" %SV_POSITION %NORMAL %FACTOR %SV_POSITION_0 %NORMAL_0 %INSTANCE_ID %TANGENT %TANGENT_1 %SV_TESSFACTOR %SV_INSIDETESSFACTOR %gl_InvocationID %gl_PrimitiveID
               OpExecutionMode %main Triangles
               OpExecutionMode %main SpacingEqual
               OpExecutionMode %main PointMode
               OpExecutionMode %main OutputVertices 4
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %NORMAL "NORMAL"
               OpName %FACTOR "FACTOR"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %NORMAL_0 "NORMAL"
               OpName %INSTANCE_ID "INSTANCE_ID"
               OpName %TANGENT "TANGENT"
               OpName %TANGENT_1 "TANGENT_1"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %hull_main "hull_main"
               OpName %patch_main "patch_main"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %NORMAL Location 1
               OpDecorate %FACTOR Location 1
               OpDecorate %FACTOR Component 3
               OpDecorate %SV_POSITION_0 Location 0
               OpDecorate %NORMAL_0 Location 1
               OpDecorate %INSTANCE_ID Location 4
               OpDecorate %INSTANCE_ID Patch
               OpDecorate %TANGENT Location 5
               OpDecorate %TANGENT Patch
               OpDecorate %TANGENT_1 Location 6
               OpDecorate %TANGENT_1 Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %gl_InvocationID BuiltIn InvocationId
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_32 = OpTypeArray %v3float %uint_32
%_ptr_Input__arr_v3float_uint_32 = OpTypePointer Input %_arr_v3float_uint_32
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
     %FACTOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
     %uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
%SV_POSITION_0 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output
%_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
%_ptr_Output__arr_v3float_uint_4 = OpTypePointer Output %_arr_v3float_uint_4
   %NORMAL_0 = OpVariable %_ptr_Output__arr_v3float_uint_4 Output
%_ptr_Output_uint = OpTypePointer Output %uint
%INSTANCE_ID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
    %TANGENT = OpVariable %_ptr_Output_v3float Output
  %TANGENT_1 = OpVariable %_ptr_Output_v3float Output
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_4 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_2 Output
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_InvocationID = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
   %float_64 = OpConstant %float 64
       %bool = OpTypeBool
  %uint_4104 = OpConstant %uint 4104
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %129

        %129 = OpLabel
        %123 =   OpFunctionCall %void %hull_main
        %124 =   OpLoad %uint %gl_InvocationID
        %126 =   OpIEqual %bool %124 %uint_0
                 OpControlBarrier %uint_2 %uint_2 %uint_4104
                 OpSelectionMerge %131 None
                 OpBranchConditional %126 %130 %131

        %130 =     OpLabel
        %128 =       OpFunctionCall %void %patch_main
                     OpBranch %131

        %131 = OpLabel
                 OpReturn
               OpFunctionEnd
  %hull_main = OpFunction %void None %2

         %39 = OpLabel
                 OpBranch %133

        %133 = OpLabel
         %44 =   OpLoad %uint %gl_InvocationID
         %46 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_0
         %48 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_2
         %53 =   OpLoad %float %52
         %54 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_3
         %56 =   OpLoad %float %54
         %60 =   OpLoad %uint %gl_InvocationID
         %59 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %60 %uint_0
                 OpStore %59 %48
         %62 =   OpLoad %uint %gl_InvocationID
         %61 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %62 %uint_1
                 OpStore %61 %51
         %64 =   OpLoad %uint %gl_InvocationID
         %63 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %64 %uint_2
                 OpStore %63 %53
         %66 =   OpLoad %uint %gl_InvocationID
         %65 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %66 %uint_3
                 OpStore %65 %56
         %67 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_0
         %68 =   OpLoad %float %67
         %69 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_1
         %70 =   OpLoad %float %69
         %71 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_2
         %72 =   OpLoad %float %71
         %75 =   OpLoad %uint %gl_InvocationID
         %74 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %75 %uint_0
                 OpStore %74 %68
         %77 =   OpLoad %uint %gl_InvocationID
         %76 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %77 %uint_1
                 OpStore %76 %70
         %79 =   OpLoad %uint %gl_InvocationID
         %78 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %79 %uint_2
                 OpStore %78 %72
                 OpReturn
               OpFunctionEnd
 %patch_main = OpFunction %void None %2

         %41 = OpLabel
                 OpBranch %135

        %135 = OpLabel
         %81 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %INSTANCE_ID %81
         %82 =   OpAccessChain %_ptr_Input_float %FACTOR %uint_0
         %83 =   OpLoad %float %82
         %85 =   OpExtInst %float %84 NMin %83 %float_64
         %87 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_0
                 OpStore %87 %85
         %88 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_1
                 OpStore %88 %85
         %89 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_2
                 OpStore %89 %85
         %90 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_3
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_0
                 OpStore %91 %85
         %92 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_1
                 OpStore %92 %85
         %93 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_0
         %94 =   OpLoad %float %93
         %95 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_0
         %96 =   OpLoad %float %95
         %97 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_0
         %98 =   OpLoad %float %97
         %99 =   OpFSub %float %96 %94
        %100 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_0
                 OpStore %100 %99
        %101 =   OpFSub %float %98 %94
        %102 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_0
                 OpStore %102 %101
        %103 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_1
        %104 =   OpLoad %float %103
        %105 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_1
        %106 =   OpLoad %float %105
        %107 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_1
        %108 =   OpLoad %float %107
        %109 =   OpFSub %float %106 %104
        %110 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_1
                 OpStore %110 %109
        %111 =   OpFSub %float %108 %104
        %112 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_1
                 OpStore %112 %111
        %113 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_2
        %114 =   OpLoad %float %113
        %115 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_2
        %116 =   OpLoad %float %115
        %117 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_2
        %118 =   OpLoad %float %117
        %119 =   OpFSub %float %116 %114
        %120 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_2
                 OpStore %120 %119
        %121 =   OpFSub %float %118 %114
        %122 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_2
                 OpStore %122 %121
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_hs_point.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(vertices = 4) out;

layout(location = 1) in vec3 NORMAL[];
layout(location = 1, component = 3) in float FACTOR[];
layout(location = 0) out vec4 SV_POSITION[4];
layout(location = 1) out vec3 NORMAL_1[4];
layout(location = 4) patch out uint INSTANCE_ID;
layout(location = 5) patch out vec3 TANGENT;
layout(location = 6) patch out vec3 TANGENT_1;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);

void hull_main()
{
    SV_POSITION[gl_InvocationID].x = gl_in[gl_InvocationID].gl_Position.x;
    SV_POSITION[gl_InvocationID].y = gl_in[gl_InvocationID].gl_Position.y;
    SV_POSITION[gl_InvocationID].z = gl_in[gl_InvocationID].gl_Position.z;
    SV_POSITION[gl_InvocationID].w = gl_in[gl_InvocationID].gl_Position.w;
    NORMAL_1[gl_InvocationID].x = NORMAL[gl_InvocationID].x;
    NORMAL_1[gl_InvocationID].y = NORMAL[gl_InvocationID].y;
    NORMAL_1[gl_InvocationID].z = NORMAL[gl_InvocationID].z;
}

void patch_main()
{
    INSTANCE_ID = uint(gl_PrimitiveID);
    float _85 = spvNMin(FACTOR[0u], 64.0);
    gl_TessLevelOuter[0u] = _85;
    gl_TessLevelOuter[1u] = _85;
    gl_TessLevelOuter[2u] = _85;
    gl_TessLevelOuter[3u] = _85;
    gl_TessLevelInner[0u] = _85;
    gl_TessLevelInner[1u] = _85;
    TANGENT.x = SV_POSITION[1u].x - SV_POSITION[0u].x;
    TANGENT_1.x = SV_POSITION[2u].x - SV_POSITION[0u].x;
    TANGENT.y = SV_POSITION[1u].y - SV_POSITION[0u].y;
    TANGENT_1.y = SV_POSITION[2u].y - SV_POSITION[0u].y;
    TANGENT.z = SV_POSITION[1u].z - SV_POSITION[0u].z;
    TANGENT_1.z = SV_POSITION[2u].z - SV_POSITION[0u].z;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


================================================
FILE: reference-dxbc/test_io_hs_triangle_ccw.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 137
; Schema: 0
               OpCapability Shader
               OpCapability Tessellation
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %84 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationControl %main "main" %SV_POSITION %NORMAL %FACTOR %SV_POSITION_0 %NORMAL_0 %INSTANCE_ID %TANGENT %TANGENT_1 %SV_TESSFACTOR %SV_INSIDETESSFACTOR %gl_InvocationID %gl_PrimitiveID
               OpExecutionMode %main Triangles
               OpExecutionMode %main SpacingFractionalOdd
               OpExecutionMode %main VertexOrderCcw
               OpExecutionMode %main OutputVertices 4
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %NORMAL "NORMAL"
               OpName %FACTOR "FACTOR"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %NORMAL_0 "NORMAL"
               OpName %INSTANCE_ID "INSTANCE_ID"
               OpName %TANGENT "TANGENT"
               OpName %TANGENT_1 "TANGENT_1"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %hull_main "hull_main"
               OpName %patch_main "patch_main"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %NORMAL Location 1
               OpDecorate %FACTOR Location 1
               OpDecorate %FACTOR Component 3
               OpDecorate %SV_POSITION_0 Location 0
               OpDecorate %NORMAL_0 Location 1
               OpDecorate %INSTANCE_ID Location 4
               OpDecorate %INSTANCE_ID Patch
               OpDecorate %TANGENT Location 5
               OpDecorate %TANGENT Patch
               OpDecorate %TANGENT_1 Location 6
               OpDecorate %TANGENT_1 Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %gl_InvocationID BuiltIn InvocationId
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_32 = OpTypeArray %v3float %uint_32
%_ptr_Input__arr_v3float_uint_32 = OpTypePointer Input %_arr_v3float_uint_32
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
     %FACTOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
     %uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
%SV_POSITION_0 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output
%_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
%_ptr_Output__arr_v3float_uint_4 = OpTypePointer Output %_arr_v3float_uint_4
   %NORMAL_0 = OpVariable %_ptr_Output__arr_v3float_uint_4 Output
%_ptr_Output_uint = OpTypePointer Output %uint
%INSTANCE_ID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
    %TANGENT = OpVariable %_ptr_Output_v3float Output
  %TANGENT_1 = OpVariable %_ptr_Output_v3float Output
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_4 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_2 Output
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_InvocationID = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
   %float_64 = OpConstant %float 64
       %bool = OpTypeBool
  %uint_4104 = OpConstant %uint 4104
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %129

        %129 = OpLabel
        %123 =   OpFunctionCall %void %hull_main
        %124 =   OpLoad %uint %gl_InvocationID
        %126 =   OpIEqual %bool %124 %uint_0
                 OpControlBarrier %uint_2 %uint_2 %uint_4104
                 OpSelectionMerge %131 None
                 OpBranchConditional %126 %130 %131

        %130 =     OpLabel
        %128 =       OpFunctionCall %void %patch_main
                     OpBranch %131

        %131 = OpLabel
                 OpReturn
               OpFunctionEnd
  %hull_main = OpFunction %void None %2

         %39 = OpLabel
                 OpBranch %133

        %133 = OpLabel
         %44 =   OpLoad %uint %gl_InvocationID
         %46 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_0
         %48 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_2
         %53 =   OpLoad %float %52
         %54 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_3
         %56 =   OpLoad %float %54
         %60 =   OpLoad %uint %gl_InvocationID
         %59 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %60 %uint_0
                 OpStore %59 %48
         %62 =   OpLoad %uint %gl_InvocationID
         %61 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %62 %uint_1
                 OpStore %61 %51
         %64 =   OpLoad %uint %gl_InvocationID
         %63 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %64 %uint_2
                 OpStore %63 %53
         %66 =   OpLoad %uint %gl_InvocationID
         %65 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %66 %uint_3
                 OpStore %65 %56
         %67 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_0
         %68 =   OpLoad %float %67
         %69 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_1
         %70 =   OpLoad %float %69
         %71 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_2
         %72 =   OpLoad %float %71
         %75 =   OpLoad %uint %gl_InvocationID
         %74 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %75 %uint_0
                 OpStore %74 %68
         %77 =   OpLoad %uint %gl_InvocationID
         %76 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %77 %uint_1
                 OpStore %76 %70
         %79 =   OpLoad %uint %gl_InvocationID
         %78 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %79 %uint_2
                 OpStore %78 %72
                 OpReturn
               OpFunctionEnd
 %patch_main = OpFunction %void None %2

         %41 = OpLabel
                 OpBranch %135

        %135 = OpLabel
         %81 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %INSTANCE_ID %81
         %82 =   OpAccessChain %_ptr_Input_float %FACTOR %uint_0
         %83 =   OpLoad %float %82
         %85 =   OpExtInst %float %84 NMin %83 %float_64
         %87 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_0
                 OpStore %87 %85
         %88 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_1
                 OpStore %88 %85
         %89 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_2
                 OpStore %89 %85
         %90 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_3
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_0
                 OpStore %91 %85
         %92 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_1
                 OpStore %92 %85
         %93 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_0
         %94 =   OpLoad %float %93
         %95 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_0
         %96 =   OpLoad %float %95
         %97 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_0
         %98 =   OpLoad %float %97
         %99 =   OpFSub %float %96 %94
        %100 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_0
                 OpStore %100 %99
        %101 =   OpFSub %float %98 %94
        %102 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_0
                 OpStore %102 %101
        %103 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_1
        %104 =   OpLoad %float %103
        %105 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_1
        %106 =   OpLoad %float %105
        %107 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_1
        %108 =   OpLoad %float %107
        %109 =   OpFSub %float %106 %104
        %110 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_1
                 OpStore %110 %109
        %111 =   OpFSub %float %108 %104
        %112 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_1
                 OpStore %112 %111
        %113 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_2
        %114 =   OpLoad %float %113
        %115 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_2
        %116 =   OpLoad %float %115
        %117 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_2
        %118 =   OpLoad %float %117
        %119 =   OpFSub %float %116 %114
        %120 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_2
                 OpStore %120 %119
        %121 =   OpFSub %float %118 %114
        %122 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_2
                 OpStore %122 %121
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_hs_triangle_ccw.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(vertices = 4) out;

layout(location = 1) in vec3 NORMAL[];
layout(location = 1, component = 3) in float FACTOR[];
layout(location = 0) out vec4 SV_POSITION[4];
layout(location = 1) out vec3 NORMAL_1[4];
layout(location = 4) patch out uint INSTANCE_ID;
layout(location = 5) patch out vec3 TANGENT;
layout(location = 6) patch out vec3 TANGENT_1;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);

void hull_main()
{
    SV_POSITION[gl_InvocationID].x = gl_in[gl_InvocationID].gl_Position.x;
    SV_POSITION[gl_InvocationID].y = gl_in[gl_InvocationID].gl_Position.y;
    SV_POSITION[gl_InvocationID].z = gl_in[gl_InvocationID].gl_Position.z;
    SV_POSITION[gl_InvocationID].w = gl_in[gl_InvocationID].gl_Position.w;
    NORMAL_1[gl_InvocationID].x = NORMAL[gl_InvocationID].x;
    NORMAL_1[gl_InvocationID].y = NORMAL[gl_InvocationID].y;
    NORMAL_1[gl_InvocationID].z = NORMAL[gl_InvocationID].z;
}

void patch_main()
{
    INSTANCE_ID = uint(gl_PrimitiveID);
    float _85 = spvNMin(FACTOR[0u], 64.0);
    gl_TessLevelOuter[0u] = _85;
    gl_TessLevelOuter[1u] = _85;
    gl_TessLevelOuter[2u] = _85;
    gl_TessLevelOuter[3u] = _85;
    gl_TessLevelInner[0u] = _85;
    gl_TessLevelInner[1u] = _85;
    TANGENT.x = SV_POSITION[1u].x - SV_POSITION[0u].x;
    TANGENT_1.x = SV_POSITION[2u].x - SV_POSITION[0u].x;
    TANGENT.y = SV_POSITION[1u].y - SV_POSITION[0u].y;
    TANGENT_1.y = SV_POSITION[2u].y - SV_POSITION[0u].y;
    TANGENT.z = SV_POSITION[1u].z - SV_POSITION[0u].z;
    TANGENT_1.z = SV_POSITION[2u].z - SV_POSITION[0u].z;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


================================================
FILE: reference-dxbc/test_io_hs_triangle_cw.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 137
; Schema: 0
               OpCapability Shader
               OpCapability Tessellation
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %84 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint TessellationControl %main "main" %SV_POSITION %NORMAL %FACTOR %SV_POSITION_0 %NORMAL_0 %INSTANCE_ID %TANGENT %TANGENT_1 %SV_TESSFACTOR %SV_INSIDETESSFACTOR %gl_InvocationID %gl_PrimitiveID
               OpExecutionMode %main Triangles
               OpExecutionMode %main SpacingFractionalEven
               OpExecutionMode %main VertexOrderCw
               OpExecutionMode %main OutputVertices 4
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %NORMAL "NORMAL"
               OpName %FACTOR "FACTOR"
               OpName %SV_POSITION_0 "SV_POSITION"
               OpName %NORMAL_0 "NORMAL"
               OpName %INSTANCE_ID "INSTANCE_ID"
               OpName %TANGENT "TANGENT"
               OpName %TANGENT_1 "TANGENT_1"
               OpName %SV_TESSFACTOR "SV_TESSFACTOR"
               OpName %SV_INSIDETESSFACTOR "SV_INSIDETESSFACTOR"
               OpName %hull_main "hull_main"
               OpName %patch_main "patch_main"
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %NORMAL Location 1
               OpDecorate %FACTOR Location 1
               OpDecorate %FACTOR Component 3
               OpDecorate %SV_POSITION_0 Location 0
               OpDecorate %NORMAL_0 Location 1
               OpDecorate %INSTANCE_ID Location 4
               OpDecorate %INSTANCE_ID Patch
               OpDecorate %TANGENT Location 5
               OpDecorate %TANGENT Patch
               OpDecorate %TANGENT_1 Location 6
               OpDecorate %TANGENT_1 Patch
               OpDecorate %SV_TESSFACTOR BuiltIn TessLevelOuter
               OpDecorate %SV_TESSFACTOR Patch
               OpDecorate %SV_INSIDETESSFACTOR BuiltIn TessLevelInner
               OpDecorate %SV_INSIDETESSFACTOR Patch
               OpDecorate %gl_InvocationID BuiltIn InvocationId
               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
       %uint = OpTypeInt 32 0
    %uint_32 = OpConstant %uint 32
%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
%SV_POSITION = OpVariable %_ptr_Input__arr_v4float_uint_32 Input
    %v3float = OpTypeVector %float 3
%_arr_v3float_uint_32 = OpTypeArray %v3float %uint_32
%_ptr_Input__arr_v3float_uint_32 = OpTypePointer Input %_arr_v3float_uint_32
     %NORMAL = OpVariable %_ptr_Input__arr_v3float_uint_32 Input
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Input__arr_float_uint_32 = OpTypePointer Input %_arr_float_uint_32
     %FACTOR = OpVariable %_ptr_Input__arr_float_uint_32 Input
     %uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
%SV_POSITION_0 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output
%_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
%_ptr_Output__arr_v3float_uint_4 = OpTypePointer Output %_arr_v3float_uint_4
   %NORMAL_0 = OpVariable %_ptr_Output__arr_v3float_uint_4 Output
%_ptr_Output_uint = OpTypePointer Output %uint
%INSTANCE_ID = OpVariable %_ptr_Output_uint Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
    %TANGENT = OpVariable %_ptr_Output_v3float Output
  %TANGENT_1 = OpVariable %_ptr_Output_v3float Output
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
%SV_TESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_4 Output
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%SV_INSIDETESSFACTOR = OpVariable %_ptr_Output__arr_float_uint_2 Output
%_ptr_Input_uint = OpTypePointer Input %uint
%gl_InvocationID = OpVariable %_ptr_Input_uint Input
%_ptr_Input_float = OpTypePointer Input %float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
%_ptr_Output_float = OpTypePointer Output %float
%gl_PrimitiveID = OpVariable %_ptr_Input_uint Input
   %float_64 = OpConstant %float 64
       %bool = OpTypeBool
  %uint_4104 = OpConstant %uint 4104
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %129

        %129 = OpLabel
        %123 =   OpFunctionCall %void %hull_main
        %124 =   OpLoad %uint %gl_InvocationID
        %126 =   OpIEqual %bool %124 %uint_0
                 OpControlBarrier %uint_2 %uint_2 %uint_4104
                 OpSelectionMerge %131 None
                 OpBranchConditional %126 %130 %131

        %130 =     OpLabel
        %128 =       OpFunctionCall %void %patch_main
                     OpBranch %131

        %131 = OpLabel
                 OpReturn
               OpFunctionEnd
  %hull_main = OpFunction %void None %2

         %39 = OpLabel
                 OpBranch %133

        %133 = OpLabel
         %44 =   OpLoad %uint %gl_InvocationID
         %46 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_0
         %48 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_2
         %53 =   OpLoad %float %52
         %54 =   OpAccessChain %_ptr_Input_float %SV_POSITION %44 %uint_3
         %56 =   OpLoad %float %54
         %60 =   OpLoad %uint %gl_InvocationID
         %59 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %60 %uint_0
                 OpStore %59 %48
         %62 =   OpLoad %uint %gl_InvocationID
         %61 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %62 %uint_1
                 OpStore %61 %51
         %64 =   OpLoad %uint %gl_InvocationID
         %63 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %64 %uint_2
                 OpStore %63 %53
         %66 =   OpLoad %uint %gl_InvocationID
         %65 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %66 %uint_3
                 OpStore %65 %56
         %67 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_0
         %68 =   OpLoad %float %67
         %69 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_1
         %70 =   OpLoad %float %69
         %71 =   OpAccessChain %_ptr_Input_float %NORMAL %44 %uint_2
         %72 =   OpLoad %float %71
         %75 =   OpLoad %uint %gl_InvocationID
         %74 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %75 %uint_0
                 OpStore %74 %68
         %77 =   OpLoad %uint %gl_InvocationID
         %76 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %77 %uint_1
                 OpStore %76 %70
         %79 =   OpLoad %uint %gl_InvocationID
         %78 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %79 %uint_2
                 OpStore %78 %72
                 OpReturn
               OpFunctionEnd
 %patch_main = OpFunction %void None %2

         %41 = OpLabel
                 OpBranch %135

        %135 = OpLabel
         %81 =   OpLoad %uint %gl_PrimitiveID
                 OpStore %INSTANCE_ID %81
         %82 =   OpAccessChain %_ptr_Input_float %FACTOR %uint_0
         %83 =   OpLoad %float %82
         %85 =   OpExtInst %float %84 NMin %83 %float_64
         %87 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_0
                 OpStore %87 %85
         %88 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_1
                 OpStore %88 %85
         %89 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_2
                 OpStore %89 %85
         %90 =   OpAccessChain %_ptr_Output_float %SV_TESSFACTOR %uint_3
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_0
                 OpStore %91 %85
         %92 =   OpAccessChain %_ptr_Output_float %SV_INSIDETESSFACTOR %uint_1
                 OpStore %92 %85
         %93 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_0
         %94 =   OpLoad %float %93
         %95 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_0
         %96 =   OpLoad %float %95
         %97 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_0
         %98 =   OpLoad %float %97
         %99 =   OpFSub %float %96 %94
        %100 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_0
                 OpStore %100 %99
        %101 =   OpFSub %float %98 %94
        %102 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_0
                 OpStore %102 %101
        %103 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_1
        %104 =   OpLoad %float %103
        %105 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_1
        %106 =   OpLoad %float %105
        %107 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_1
        %108 =   OpLoad %float %107
        %109 =   OpFSub %float %106 %104
        %110 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_1
                 OpStore %110 %109
        %111 =   OpFSub %float %108 %104
        %112 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_1
                 OpStore %112 %111
        %113 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_0 %uint_2
        %114 =   OpLoad %float %113
        %115 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_1 %uint_2
        %116 =   OpLoad %float %115
        %117 =   OpAccessChain %_ptr_Output_float %SV_POSITION_0 %uint_2 %uint_2
        %118 =   OpLoad %float %117
        %119 =   OpFSub %float %116 %114
        %120 =   OpAccessChain %_ptr_Output_float %TANGENT %uint_2
                 OpStore %120 %119
        %121 =   OpFSub %float %118 %114
        %122 =   OpAccessChain %_ptr_Output_float %TANGENT_1 %uint_2
                 OpStore %122 %121
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_hs_triangle_cw.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_spirv_intrinsics : require
layout(vertices = 4) out;

layout(location = 1) in vec3 NORMAL[];
layout(location = 1, component = 3) in float FACTOR[];
layout(location = 0) out vec4 SV_POSITION[4];
layout(location = 1) out vec3 NORMAL_1[4];
layout(location = 4) patch out uint INSTANCE_ID;
layout(location = 5) patch out vec3 TANGENT;
layout(location = 6) patch out vec3 TANGENT_1;

spirv_instruction(set = "GLSL.std.450", id = 79) float spvNMin(float, float);
spirv_instruction(set = "GLSL.std.450", id = 79) vec2 spvNMin(vec2, vec2);
spirv_instruction(set = "GLSL.std.450", id = 79) vec3 spvNMin(vec3, vec3);
spirv_instruction(set = "GLSL.std.450", id = 79) vec4 spvNMin(vec4, vec4);

void hull_main()
{
    SV_POSITION[gl_InvocationID].x = gl_in[gl_InvocationID].gl_Position.x;
    SV_POSITION[gl_InvocationID].y = gl_in[gl_InvocationID].gl_Position.y;
    SV_POSITION[gl_InvocationID].z = gl_in[gl_InvocationID].gl_Position.z;
    SV_POSITION[gl_InvocationID].w = gl_in[gl_InvocationID].gl_Position.w;
    NORMAL_1[gl_InvocationID].x = NORMAL[gl_InvocationID].x;
    NORMAL_1[gl_InvocationID].y = NORMAL[gl_InvocationID].y;
    NORMAL_1[gl_InvocationID].z = NORMAL[gl_InvocationID].z;
}

void patch_main()
{
    INSTANCE_ID = uint(gl_PrimitiveID);
    float _85 = spvNMin(FACTOR[0u], 64.0);
    gl_TessLevelOuter[0u] = _85;
    gl_TessLevelOuter[1u] = _85;
    gl_TessLevelOuter[2u] = _85;
    gl_TessLevelOuter[3u] = _85;
    gl_TessLevelInner[0u] = _85;
    gl_TessLevelInner[1u] = _85;
    TANGENT.x = SV_POSITION[1u].x - SV_POSITION[0u].x;
    TANGENT_1.x = SV_POSITION[2u].x - SV_POSITION[0u].x;
    TANGENT.y = SV_POSITION[1u].y - SV_POSITION[0u].y;
    TANGENT_1.y = SV_POSITION[2u].y - SV_POSITION[0u].y;
    TANGENT.z = SV_POSITION[1u].z - SV_POSITION[0u].z;
    TANGENT_1.z = SV_POSITION[2u].z - SV_POSITION[0u].z;
}

void main()
{
    hull_main();
    barrier();
    if (gl_InvocationID == 0u)
    {
        patch_main();
    }
}


================================================
FILE: reference-dxbc/test_io_ps_builtins.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
               OpCapability Shader
               OpCapability SampleRateShading
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_ISFRONTFACE %SV_SAMPLEINDEX %SV_COVERAGE %gl_SampleMask
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_ISFRONTFACE "SV_ISFRONTFACE"
               OpName %SV_SAMPLEINDEX "SV_SAMPLEINDEX"
               OpName %SV_COVERAGE "SV_COVERAGE"
               OpDecorate %SV_ISFRONTFACE BuiltIn FrontFacing
               OpDecorate %SV_SAMPLEINDEX BuiltIn SampleId
               OpDecorate %SV_SAMPLEINDEX Flat
               OpDecorate %SV_COVERAGE BuiltIn SampleMask
               OpDecorate %gl_SampleMask BuiltIn SampleMask
               OpDecorate %gl_SampleMask Flat
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
       %bool = OpTypeBool
%_ptr_Input_bool = OpTypePointer Input %bool
%SV_ISFRONTFACE = OpVariable %_ptr_Input_bool Input
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_SAMPLEINDEX = OpVariable %_ptr_Input_uint Input
     %uint_1 = OpConstant %uint 1
%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
%_ptr_Output__arr_uint_uint_1 = OpTypePointer Output %_arr_uint_uint_1
%SV_COVERAGE = OpVariable %_ptr_Output__arr_uint_uint_1 Output
%_ptr_Input__arr_uint_uint_1 = OpTypePointer Input %_arr_uint_uint_1
%gl_SampleMask = OpVariable %_ptr_Input__arr_uint_uint_1 Input
     %uint_0 = OpConstant %uint 0
%_ptr_Output_uint = OpTypePointer Output %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %26

         %26 = OpLabel
         %17 =   OpAccessChain %_ptr_Input_uint %gl_SampleMask %uint_0
         %19 =   OpLoad %uint %17
         %20 =   OpLoad %uint %SV_SAMPLEINDEX
         %21 =   OpBitFieldUExtract %uint %19 %uint_0 %20
         %22 =   OpLoad %bool %SV_ISFRONTFACE
         %23 =   OpSelect %uint %22 %21 %uint_0
         %25 =   OpAccessChain %_ptr_Output_uint %SV_COVERAGE %uint_0
                 OpStore %25 %23
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_builtins.glsl
================================================
GLSL:
#version 460

void main()
{
    gl_SampleMask[0u] = int(gl_FrontFacing ? bitfieldExtract(uint(gl_SampleMaskIn[0u]), int(0u), int(uint(gl_SampleID))) : 0u);
}


================================================
FILE: reference-dxbc/test_io_ps_export_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %DELTA %SV_DEPTH
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main DepthReplacing
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %DELTA "DELTA"
               OpName %SV_DEPTH "SV_DEPTH"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %DELTA Location 1
               OpDecorate %SV_DEPTH BuiltIn FragDepth
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_float = OpTypePointer Input %float
      %DELTA = OpVariable %_ptr_Input_float Input
%_ptr_Output_float = OpTypePointer Output %float
   %SV_DEPTH = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %13 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %16 =   OpLoad %float %13
         %17 =   OpLoad %float %DELTA
         %18 =   OpFAdd %float %17 %16
                 OpStore %SV_DEPTH %18
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_export_depth.glsl
================================================
GLSL:
#version 460

layout(location = 1) in float DELTA;

void main()
{
    gl_FragDepth = DELTA + gl_FragCoord.z;
}


================================================
FILE: reference-dxbc/test_io_ps_export_depth_greater.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %DELTA %SV_DEPTH
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main DepthReplacing
               OpExecutionMode %main DepthGreater
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %DELTA "DELTA"
               OpName %SV_DEPTH "SV_DEPTH"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %DELTA Location 1
               OpDecorate %SV_DEPTH BuiltIn FragDepth
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_float = OpTypePointer Input %float
      %DELTA = OpVariable %_ptr_Input_float Input
%_ptr_Output_float = OpTypePointer Output %float
   %SV_DEPTH = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %13 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %16 =   OpLoad %float %13
         %17 =   OpLoad %float %DELTA
         %18 =   OpFAdd %float %17 %16
                 OpStore %SV_DEPTH %18
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_export_depth_greater.glsl
================================================
GLSL:
#version 460
layout(depth_greater) out float gl_FragDepth;

layout(location = 1) in float DELTA;

void main()
{
    gl_FragDepth = DELTA + gl_FragCoord.z;
}


================================================
FILE: reference-dxbc/test_io_ps_export_depth_less.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_POSITION %DELTA %SV_DEPTH
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main DepthReplacing
               OpExecutionMode %main DepthLess
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_POSITION "SV_POSITION"
               OpName %DELTA "DELTA"
               OpName %SV_DEPTH "SV_DEPTH"
               OpDecorate %SV_POSITION BuiltIn FragCoord
               OpDecorate %DELTA Location 1
               OpDecorate %SV_DEPTH BuiltIn FragDepth
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
%SV_POSITION = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_float = OpTypePointer Input %float
      %DELTA = OpVariable %_ptr_Input_float Input
%_ptr_Output_float = OpTypePointer Output %float
   %SV_DEPTH = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %13 =   OpAccessChain %_ptr_Input_float %SV_POSITION %uint_2
         %16 =   OpLoad %float %13
         %17 =   OpLoad %float %DELTA
         %18 =   OpFAdd %float %17 %16
                 OpStore %SV_DEPTH %18
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_export_depth_less.glsl
================================================
GLSL:
#version 460
layout(depth_less) out float gl_FragDepth;

layout(location = 1) in float DELTA;

void main()
{
    gl_FragDepth = DELTA + gl_FragCoord.z;
}


================================================
FILE: reference-dxbc/test_io_ps_export_stencil.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 13
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability StencilExportEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_shader_stencil_export"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %STENCIL_REF %SV_STENCILREF
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main StencilRefReplacingEXT
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %STENCIL_REF "STENCIL_REF"
               OpName %SV_STENCILREF "SV_STENCILREF"
               OpDecorate %STENCIL_REF Flat
               OpDecorate %STENCIL_REF Location 0
               OpDecorate %SV_STENCILREF BuiltIn FragStencilRefEXT
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%STENCIL_REF = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_STENCILREF = OpVariable %_ptr_Output_uint Output
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %11

         %11 = OpLabel
         %10 =   OpLoad %uint %STENCIL_REF
                 OpStore %SV_STENCILREF %10
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_export_stencil.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_shader_stencil_export : require

layout(location = 0) flat in uint STENCIL_REF;

void main()
{
    gl_FragStencilRefARB = int(STENCIL_REF);
}


================================================
FILE: reference-dxbc/test_io_ps_fully_covered.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 17
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability FragmentFullyCoveredEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_fragment_fully_covered"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_TARGET %10
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %SV_TARGET Location 0
               OpDecorate %10 BuiltIn FullyCoveredEXT
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
       %bool = OpTypeBool
%_ptr_Input_bool = OpTypePointer Input %bool
         %10 = OpVariable %_ptr_Input_bool Input
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %15

         %15 = OpLabel
         %11 =   OpLoad %bool %10
         %12 =   OpSelect %float %11 %float_1 %float_0
                 OpStore %SV_TARGET %12
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_fully_covered.glsl
================================================
GLSL:
#version 460
#extension GL_NV_conservative_raster_underestimation : require

layout(location = 0) out float SV_TARGET;

void main()
{
    SV_TARGET = float(gl_FragFullyCoveredNV);
}


================================================
FILE: reference-dxbc/test_io_ps_interpolate_centroid.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
               OpCapability Shader
               OpCapability SampleRateShading
               OpCapability InterpolationFunction
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %17 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %IN_SCALAR %IN_VECTOR %IN_VECTOR_1 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %IN_SCALAR "IN_SCALAR"
               OpName %IN_VECTOR "IN_VECTOR"
               OpName %IN_VECTOR_1 "IN_VECTOR_1"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %IN_SCALAR NoPerspective
               OpDecorate %IN_SCALAR Location 0
               OpDecorate %IN_VECTOR Location 1
               OpDecorate %IN_VECTOR_1 Sample
               OpDecorate %IN_VECTOR_1 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
%_ptr_Input_float = OpTypePointer Input %float
  %IN_SCALAR = OpVariable %_ptr_Input_float Input
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
  %IN_VECTOR = OpVariable %_ptr_Input_v3float Input
%IN_VECTOR_1 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
%SV_TARGET_1 = OpVariable %_ptr_Output_v3float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %35

         %35 = OpLabel
         %18 =   OpExtInst %float %17 InterpolateAtCentroid %IN_SCALAR
                 OpStore %SV_TARGET %18
         %19 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_0
         %22 =   OpExtInst %float %17 InterpolateAtCentroid %19
         %23 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_1
         %25 =   OpExtInst %float %17 InterpolateAtCentroid %23
         %26 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_2
         %28 =   OpExtInst %float %17 InterpolateAtCentroid %26
         %30 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %30 %22
         %31 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %31 %25
         %32 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %32 %28
         %33 =   OpAccessChain %_ptr_Input_float %IN_VECTOR_1 %uint_1
         %34 =   OpExtInst %float %17 InterpolateAtCentroid %33
                 OpStore %SV_TARGET_2 %34
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_interpolate_centroid.glsl
================================================
GLSL:
#version 460

layout(location = 0) noperspective in float IN_SCALAR;
layout(location = 1) in vec3 IN_VECTOR;
layout(location = 2) sample in vec3 IN_VECTOR_1;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out vec3 SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    SV_TARGET = interpolateAtCentroid(IN_SCALAR);
    SV_TARGET_1.x = interpolateAtCentroid(IN_VECTOR.x);
    SV_TARGET_1.y = interpolateAtCentroid(IN_VECTOR.y);
    SV_TARGET_1.z = interpolateAtCentroid(IN_VECTOR.z);
    SV_TARGET_2 = interpolateAtCentroid(IN_VECTOR_1.y);
}


================================================
FILE: reference-dxbc/test_io_ps_interpolate_offset.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
               OpCapability Shader
               OpCapability InterpolationFunction
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %28 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %OFFSET %IN_SCALAR %IN_VECTOR %IN_VECTOR_1 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %OFFSET "OFFSET"
               OpName %IN_SCALAR "IN_SCALAR"
               OpName %IN_VECTOR "IN_VECTOR"
               OpName %IN_VECTOR_1 "IN_VECTOR_1"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %OFFSET Location 3
               OpDecorate %IN_SCALAR NoPerspective
               OpDecorate %IN_SCALAR Location 0
               OpDecorate %IN_VECTOR Location 1
               OpDecorate %IN_VECTOR_1 Centroid
               OpDecorate %IN_VECTOR_1 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
     %OFFSET = OpVariable %_ptr_Input_v2float Input
%_ptr_Input_float = OpTypePointer Input %float
  %IN_SCALAR = OpVariable %_ptr_Input_float Input
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
  %IN_VECTOR = OpVariable %_ptr_Input_v3float Input
%IN_VECTOR_1 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
%SV_TARGET_1 = OpVariable %_ptr_Output_v3float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %43

         %43 = OpLabel
         %20 =   OpAccessChain %_ptr_Input_float %OFFSET %uint_0
         %23 =   OpLoad %float %20
         %24 =   OpAccessChain %_ptr_Input_float %OFFSET %uint_1
         %26 =   OpLoad %float %24
         %27 =   OpCompositeConstruct %v2float %23 %26
         %29 =   OpExtInst %float %28 InterpolateAtOffset %IN_SCALAR %27
                 OpStore %SV_TARGET %29
         %30 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_0
         %31 =   OpExtInst %float %28 InterpolateAtOffset %30 %27
         %32 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_1
         %33 =   OpExtInst %float %28 InterpolateAtOffset %32 %27
         %34 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_2
         %36 =   OpExtInst %float %28 InterpolateAtOffset %34 %27
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %38 %31
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %39 %33
         %40 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %40 %36
         %41 =   OpAccessChain %_ptr_Input_float %IN_VECTOR_1 %uint_1
         %42 =   OpExtInst %float %28 InterpolateAtOffset %41 %27
                 OpStore %SV_TARGET_2 %42
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_interpolate_offset.glsl
================================================
GLSL:
#version 460

layout(location = 3) in vec2 OFFSET;
layout(location = 0) noperspective in float IN_SCALAR;
layout(location = 1) in vec3 IN_VECTOR;
layout(location = 2) centroid in vec3 IN_VECTOR_1;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out vec3 SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    vec2 _27 = vec2(OFFSET.x, OFFSET.y);
    SV_TARGET = interpolateAtOffset(IN_SCALAR, _27);
    SV_TARGET_1.x = interpolateAtOffset(IN_VECTOR.x, _27);
    SV_TARGET_1.y = interpolateAtOffset(IN_VECTOR.y, _27);
    SV_TARGET_1.z = interpolateAtOffset(IN_VECTOR.z, _27);
    SV_TARGET_2 = interpolateAtOffset(IN_VECTOR_1.y, _27);
}


================================================
FILE: reference-dxbc/test_io_ps_interpolate_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SampleRateShading
               OpCapability InterpolationFunction
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %21 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_SAMPLEINDEX %IN_SCALAR %IN_VECTOR %IN_VECTOR_1 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_SAMPLEINDEX "SV_SAMPLEINDEX"
               OpName %IN_SCALAR "IN_SCALAR"
               OpName %IN_VECTOR "IN_VECTOR"
               OpName %IN_VECTOR_1 "IN_VECTOR_1"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %SV_SAMPLEINDEX BuiltIn SampleId
               OpDecorate %SV_SAMPLEINDEX Flat
               OpDecorate %IN_SCALAR NoPerspective
               OpDecorate %IN_SCALAR Location 0
               OpDecorate %IN_VECTOR Location 1
               OpDecorate %IN_VECTOR_1 Centroid
               OpDecorate %IN_VECTOR_1 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_SAMPLEINDEX = OpVariable %_ptr_Input_uint Input
      %float = OpTypeFloat 32
%_ptr_Input_float = OpTypePointer Input %float
  %IN_SCALAR = OpVariable %_ptr_Input_float Input
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
  %IN_VECTOR = OpVariable %_ptr_Input_v3float Input
%IN_VECTOR_1 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
%SV_TARGET_1 = OpVariable %_ptr_Output_v3float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %20 =   OpLoad %uint %SV_SAMPLEINDEX
         %22 =   OpExtInst %float %21 InterpolateAtSample %IN_SCALAR %20
                 OpStore %SV_TARGET %22
         %23 =   OpLoad %uint %SV_SAMPLEINDEX
         %24 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_0
         %26 =   OpExtInst %float %21 InterpolateAtSample %24 %23
         %27 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_1
         %29 =   OpExtInst %float %21 InterpolateAtSample %27 %23
         %30 =   OpAccessChain %_ptr_Input_float %IN_VECTOR %uint_2
         %32 =   OpExtInst %float %21 InterpolateAtSample %30 %23
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %34 %26
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %35 %29
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %36 %32
         %37 =   OpLoad %uint %SV_SAMPLEINDEX
         %38 =   OpAccessChain %_ptr_Input_float %IN_VECTOR_1 %uint_1
         %39 =   OpExtInst %float %21 InterpolateAtSample %38 %37
                 OpStore %SV_TARGET_2 %39
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_ps_interpolate_sample.glsl
================================================
GLSL:
#version 460

layout(location = 0) noperspective in float IN_SCALAR;
layout(location = 1) in vec3 IN_VECTOR;
layout(location = 2) centroid in vec3 IN_VECTOR_1;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out vec3 SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    SV_TARGET = interpolateAtSample(IN_SCALAR, uint(gl_SampleID));
    SV_TARGET_1.x = interpolateAtSample(IN_VECTOR.x, uint(gl_SampleID));
    SV_TARGET_1.y = interpolateAtSample(IN_VECTOR.y, uint(gl_SampleID));
    SV_TARGET_1.z = interpolateAtSample(IN_VECTOR.z, uint(gl_SampleID));
    SV_TARGET_2 = interpolateAtSample(IN_VECTOR_1.y, uint(gl_SampleID));
}


================================================
FILE: reference-dxbc/test_io_vs.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %POSITION %NORMAL %TANGENT %TANGENT_1 %COLOR_1 %SV_POSITION %NORMAL_0 %COLOR %TANGENT_0 %TANGENT_1_0
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %POSITION "POSITION"
               OpName %NORMAL "NORMAL"
               OpName %TANGENT "TANGENT"
               OpName %TANGENT_1 "TANGENT_1"
               OpName %COLOR_1 "COLOR_1"
               OpName %SV_POSITION "SV_POSITION"
               OpName %NORMAL_0 "NORMAL"
               OpName %COLOR "COLOR"
               OpName %TANGENT_0 "TANGENT"
               OpName %TANGENT_1_0 "TANGENT_1"
               OpDecorate %POSITION Location 0
               OpDecorate %NORMAL Location 1
               OpDecorate %TANGENT Location 2
               OpDecorate %TANGENT_1 Location 3
               OpDecorate %COLOR_1 Location 4
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %NORMAL_0 Location 1
               OpDecorate %COLOR Location 1
               OpDecorate %COLOR Component 3
               OpDecorate %TANGENT_0 Location 2
               OpDecorate %TANGENT_1_0 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %POSITION = OpVariable %_ptr_Input_v3float Input
     %NORMAL = OpVariable %_ptr_Input_v3float Input
    %TANGENT = OpVariable %_ptr_Input_v3float Input
  %TANGENT_1 = OpVariable %_ptr_Input_v3float Input
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
    %COLOR_1 = OpVariable %_ptr_Input_uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_v3float = OpTypePointer Output %v3float
   %NORMAL_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Output_uint = OpTypePointer Output %uint
      %COLOR = OpVariable %_ptr_Output_uint Output
  %TANGENT_0 = OpVariable %_ptr_Output_v3float Output
%TANGENT_1_0 = OpVariable %_ptr_Output_v3float Output
%_ptr_Input_float = OpTypePointer Input %float
     %uint_2 = OpConstant %uint 2
     %uint_1 = OpConstant %uint 1
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %72

         %72 = OpLabel
         %25 =   OpAccessChain %_ptr_Input_float %POSITION %uint_2
         %27 =   OpLoad %float %25
         %28 =   OpAccessChain %_ptr_Input_float %POSITION %uint_1
         %30 =   OpLoad %float %28
         %31 =   OpAccessChain %_ptr_Input_float %POSITION %uint_0
         %33 =   OpLoad %float %31
         %37 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_0
                 OpStore %37 %33
         %38 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_1
                 OpStore %38 %30
         %39 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_2
                 OpStore %39 %27
         %40 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_3
                 OpStore %40 %float_1
         %42 =   OpAccessChain %_ptr_Input_float %NORMAL %uint_0
         %43 =   OpLoad %float %42
         %44 =   OpAccessChain %_ptr_Input_float %NORMAL %uint_1
         %45 =   OpLoad %float %44
         %46 =   OpAccessChain %_ptr_Input_float %NORMAL %uint_2
         %47 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_0
                 OpStore %49 %43
         %50 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_1
                 OpStore %50 %45
         %51 =   OpAccessChain %_ptr_Output_float %NORMAL_0 %uint_2
                 OpStore %51 %47
         %52 =   OpLoad %uint %COLOR_1
                 OpStore %COLOR %52
         %53 =   OpAccessChain %_ptr_Input_float %TANGENT %uint_0
         %54 =   OpLoad %float %53
         %55 =   OpAccessChain %_ptr_Input_float %TANGENT %uint_1
         %56 =   OpLoad %float %55
         %57 =   OpAccessChain %_ptr_Input_float %TANGENT %uint_2
         %58 =   OpLoad %float %57
         %60 =   OpAccessChain %_ptr_Output_float %TANGENT_0 %uint_0
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %TANGENT_0 %uint_1
                 OpStore %61 %56
         %62 =   OpAccessChain %_ptr_Output_float %TANGENT_0 %uint_2
                 OpStore %62 %58
         %63 =   OpAccessChain %_ptr_Input_float %TANGENT_1 %uint_0
         %64 =   OpLoad %float %63
         %65 =   OpAccessChain %_ptr_Output_float %TANGENT_1_0 %uint_0
                 OpStore %65 %64
         %66 =   OpAccessChain %_ptr_Input_float %TANGENT_1 %uint_1
         %67 =   OpLoad %float %66
         %68 =   OpAccessChain %_ptr_Output_float %TANGENT_1_0 %uint_1
                 OpStore %68 %67
         %69 =   OpAccessChain %_ptr_Input_float %TANGENT_1 %uint_2
         %70 =   OpLoad %float %69
         %71 =   OpAccessChain %_ptr_Output_float %TANGENT_1_0 %uint_2
                 OpStore %71 %70
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs.glsl
================================================
GLSL:
#version 460

layout(location = 0) in vec3 POSITION;
layout(location = 1) in vec3 NORMAL;
layout(location = 2) in vec3 TANGENT;
layout(location = 3) in vec3 TANGENT_1;
layout(location = 4) in uint COLOR_1;
layout(location = 1) out vec3 NORMAL_1;
layout(location = 1, component = 3) out uint COLOR;
layout(location = 2) out vec3 TANGENT_2;
layout(location = 3) out vec3 TANGENT_1_1;

void main()
{
    gl_Position.x = POSITION.x;
    gl_Position.y = POSITION.y;
    gl_Position.z = POSITION.z;
    gl_Position.w = 1.0;
    NORMAL_1.x = NORMAL.x;
    NORMAL_1.y = NORMAL.y;
    NORMAL_1.z = NORMAL.z;
    COLOR = COLOR_1;
    TANGENT_2.x = TANGENT.x;
    TANGENT_2.y = TANGENT.y;
    TANGENT_2.z = TANGENT.z;
    TANGENT_1_1.x = TANGENT_1.x;
    TANGENT_1_1.y = TANGENT_1.y;
    TANGENT_1_1.z = TANGENT_1.z;
}


================================================
FILE: reference-dxbc/test_io_vs_clip_cull_dist.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
               OpCapability Shader
               OpCapability ClipDistance
               OpCapability CullDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %gl_ClipDistance %gl_CullDistance
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
               OpDecorate %gl_CullDistance BuiltIn CullDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
       %uint = OpTypeInt 32 0
     %uint_7 = OpConstant %uint 7
%_arr_float_uint_7 = OpTypeArray %float %uint_7
%_ptr_Output__arr_float_uint_7 = OpTypePointer Output %_arr_float_uint_7
%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_7 Output
     %uint_1 = OpConstant %uint 1
%_arr_float_uint_1 = OpTypeArray %float %uint_1
%_ptr_Output__arr_float_uint_1 = OpTypePointer Output %_arr_float_uint_1
%gl_CullDistance = OpVariable %_ptr_Output__arr_float_uint_1 Output
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
 %float_n2_5 = OpConstant %float -2.5
 %float_n1_5 = OpConstant %float -1.5
     %uint_2 = OpConstant %uint 2
 %float_n0_5 = OpConstant %float -0.5
     %uint_3 = OpConstant %uint 3
  %float_0_5 = OpConstant %float 0.5
     %uint_4 = OpConstant %uint 4
  %float_1_5 = OpConstant %float 1.5
     %uint_5 = OpConstant %uint 5
  %float_2_5 = OpConstant %float 2.5
     %uint_6 = OpConstant %uint 6
  %float_3_5 = OpConstant %float 3.5
   %float_n2 = OpConstant %float -2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %38

         %38 = OpLabel
         %16 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_0
                 OpStore %16 %float_n2_5
         %19 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_1
                 OpStore %19 %float_n1_5
         %21 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_2
                 OpStore %21 %float_n0_5
         %24 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_3
                 OpStore %24 %float_0_5
         %27 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_4
                 OpStore %27 %float_1_5
         %30 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_5
                 OpStore %30 %float_2_5
         %33 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_6
                 OpStore %33 %float_3_5
         %36 =   OpAccessChain %_ptr_Output_float %gl_CullDistance %uint_0
                 OpStore %36 %float_n2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_clip_cull_dist.glsl
================================================
GLSL:
#version 460

out float gl_ClipDistance[7];
out float gl_CullDistance[1];

void main()
{
    gl_ClipDistance[0u] = -2.5;
    gl_ClipDistance[1u] = -1.5;
    gl_ClipDistance[2u] = -0.5;
    gl_ClipDistance[3u] = 0.5;
    gl_ClipDistance[4u] = 1.5;
    gl_ClipDistance[5u] = 2.5;
    gl_ClipDistance[6u] = 3.5;
    gl_CullDistance[0u] = -2.0;
}


================================================
FILE: reference-dxbc/test_io_vs_clip_dist.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
               OpCapability Shader
               OpCapability ClipDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %gl_ClipDistance
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
       %uint = OpTypeInt 32 0
     %uint_6 = OpConstant %uint 6
%_arr_float_uint_6 = OpTypeArray %float %uint_6
%_ptr_Output__arr_float_uint_6 = OpTypePointer Output %_arr_float_uint_6
%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_6 Output
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
 %float_n2_5 = OpConstant %float -2.5
     %uint_1 = OpConstant %uint 1
 %float_n1_5 = OpConstant %float -1.5
     %uint_2 = OpConstant %uint 2
 %float_n0_5 = OpConstant %float -0.5
     %uint_3 = OpConstant %uint 3
  %float_0_5 = OpConstant %float 0.5
     %uint_4 = OpConstant %uint 4
  %float_1_5 = OpConstant %float 1.5
     %uint_5 = OpConstant %uint 5
  %float_2_5 = OpConstant %float 2.5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %30

         %30 = OpLabel
         %12 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_0
                 OpStore %12 %float_n2_5
         %15 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_1
                 OpStore %15 %float_n1_5
         %18 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_2
                 OpStore %18 %float_n0_5
         %21 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_3
                 OpStore %21 %float_0_5
         %24 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_4
                 OpStore %24 %float_1_5
         %27 =   OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_5
                 OpStore %27 %float_2_5
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_clip_dist.glsl
================================================
GLSL:
#version 460

out float gl_ClipDistance[6];

void main()
{
    gl_ClipDistance[0u] = -2.5;
    gl_ClipDistance[1u] = -1.5;
    gl_ClipDistance[2u] = -0.5;
    gl_ClipDistance[3u] = 0.5;
    gl_ClipDistance[4u] = 1.5;
    gl_ClipDistance[5u] = 2.5;
}


================================================
FILE: reference-dxbc/test_io_vs_cull_dist.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 20
; Schema: 0
               OpCapability Shader
               OpCapability CullDistance
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %gl_CullDistance
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpDecorate %gl_CullDistance BuiltIn CullDistance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
%_arr_float_uint_2 = OpTypeArray %float %uint_2
%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
%gl_CullDistance = OpVariable %_ptr_Output__arr_float_uint_2 Output
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
%float_0_699999988 = OpConstant %float 0.699999988
     %uint_1 = OpConstant %uint 1
%float_0_100000001 = OpConstant %float 0.100000001
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %18

         %18 = OpLabel
         %12 =   OpAccessChain %_ptr_Output_float %gl_CullDistance %uint_0
                 OpStore %12 %float_0_699999988
         %15 =   OpAccessChain %_ptr_Output_float %gl_CullDistance %uint_1
                 OpStore %15 %float_0_100000001
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_cull_dist.glsl
================================================
GLSL:
#version 460

out float gl_CullDistance[2];

void main()
{
    gl_CullDistance[0u] = 0.699999988079071044921875;
    gl_CullDistance[1u] = 0.100000001490116119384765625;
}


================================================
FILE: reference-dxbc/test_io_vs_instance_id.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
               OpCapability Shader
               OpCapability DrawParameters
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %SV_INSTANCEID %SHADER_OUT %gl_BaseInstance
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_INSTANCEID "SV_INSTANCEID"
               OpName %SHADER_OUT "SHADER_OUT"
               OpDecorate %SV_INSTANCEID BuiltIn InstanceIndex
               OpDecorate %SHADER_OUT Location 0
               OpDecorate %gl_BaseInstance BuiltIn BaseInstance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_INSTANCEID = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
 %SHADER_OUT = OpVariable %_ptr_Output_uint Output
%gl_BaseInstance = OpVariable %_ptr_Input_uint Input
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %14

         %14 = OpLabel
         %10 =   OpLoad %uint %SV_INSTANCEID
         %12 =   OpLoad %uint %gl_BaseInstance
         %13 =   OpISub %uint %10 %12
                 OpStore %SHADER_OUT %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_instance_id.glsl
================================================
GLSL:
#version 460

layout(location = 0) out uint SHADER_OUT;

void main()
{
    SHADER_OUT = uint(gl_InstanceIndex) - uint(gl_BaseInstance);
}


================================================
FILE: reference-dxbc/test_io_vs_layer.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
               OpCapability Shader
               OpCapability Geometry
               OpCapability DrawParameters
               OpCapability SignedZeroInfNanPreserve
               OpCapability ShaderViewportIndexLayerEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_shader_viewport_index_layer"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %SV_INSTANCEID %SV_RenderTargetArrayIndex %gl_BaseInstance
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_INSTANCEID "SV_INSTANCEID"
               OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
               OpDecorate %SV_INSTANCEID BuiltIn InstanceIndex
               OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
               OpDecorate %gl_BaseInstance BuiltIn BaseInstance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_INSTANCEID = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output_uint Output
%gl_BaseInstance = OpVariable %_ptr_Input_uint Input
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %14

         %14 = OpLabel
         %10 =   OpLoad %uint %SV_INSTANCEID
         %12 =   OpLoad %uint %gl_BaseInstance
         %13 =   OpISub %uint %10 %12
                 OpStore %SV_RenderTargetArrayIndex %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_layer.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_shader_viewport_layer_array : require

void main()
{
    gl_Layer = int(uint(gl_InstanceIndex) - uint(gl_BaseInstance));
}


================================================
FILE: reference-dxbc/test_io_vs_vertex_id.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
               OpCapability Shader
               OpCapability DrawParameters
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %SV_VERTEXID %SHADER_OUT %11
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_VERTEXID "SV_VERTEXID"
               OpName %SHADER_OUT "SHADER_OUT"
               OpDecorate %SV_VERTEXID BuiltIn VertexIndex
               OpDecorate %SHADER_OUT Location 0
               OpDecorate %11 BuiltIn BaseVertex
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_VERTEXID = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
 %SHADER_OUT = OpVariable %_ptr_Output_uint Output
         %11 = OpVariable %_ptr_Input_uint Input
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %14

         %14 = OpLabel
         %10 =   OpLoad %uint %SV_VERTEXID
         %12 =   OpLoad %uint %11
         %13 =   OpISub %uint %10 %12
                 OpStore %SHADER_OUT %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_vertex_id.glsl
================================================
GLSL:
#version 460

layout(location = 0) out uint SHADER_OUT;

void main()
{
    SHADER_OUT = uint(gl_VertexIndex) - uint(gl_BaseVertex);
}


================================================
FILE: reference-dxbc/test_io_vs_viewport.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
               OpCapability Shader
               OpCapability MultiViewport
               OpCapability DrawParameters
               OpCapability SignedZeroInfNanPreserve
               OpCapability ShaderViewportIndexLayerEXT
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_shader_viewport_index_layer"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %SV_INSTANCEID %SV_ViewportArrayIndex %gl_BaseInstance
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_INSTANCEID "SV_INSTANCEID"
               OpName %SV_ViewportArrayIndex "SV_ViewportArrayIndex"
               OpDecorate %SV_INSTANCEID BuiltIn InstanceIndex
               OpDecorate %SV_ViewportArrayIndex BuiltIn ViewportIndex
               OpDecorate %gl_BaseInstance BuiltIn BaseInstance
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_INSTANCEID = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_ViewportArrayIndex = OpVariable %_ptr_Output_uint Output
%gl_BaseInstance = OpVariable %_ptr_Input_uint Input
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %14

         %14 = OpLabel
         %10 =   OpLoad %uint %SV_INSTANCEID
         %12 =   OpLoad %uint %gl_BaseInstance
         %13 =   OpISub %uint %10 %12
                 OpStore %SV_ViewportArrayIndex %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_io_vs_viewport.glsl
================================================
GLSL:
#version 460

void main()
{
    gl_ViewportIndex = int(uint(gl_InstanceIndex) - uint(gl_BaseInstance));
}


================================================
FILE: reference-dxbc/test_misc_constant_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
               OpCapability Shader
               OpCapability DrawParameters
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %27 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Vertex %main "main" %SV_VERTEXID %SV_POSITION %22 %24
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_VERTEXID "SV_VERTEXID"
               OpName %SV_POSITION "SV_POSITION"
               OpDecorate %SV_VERTEXID BuiltIn VertexIndex
               OpDecorate %SV_POSITION BuiltIn Position
               OpDecorate %24 BuiltIn BaseVertex
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_VERTEXID = OpVariable %_ptr_Input_uint Input
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%SV_POSITION = OpVariable %_ptr_Output_v4float Output
     %uint_5 = OpConstant %uint 5
%_arr_v4float_uint_5 = OpTypeArray %v4float %uint_5
    %float_0 = OpConstant %float 0
         %15 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
    %float_1 = OpConstant %float 1
         %17 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_0
         %18 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_0
         %19 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_0
         %20 = OpConstantComposite %_arr_v4float_uint_5 %15 %17 %18 %19 %15
%_ptr_Private__arr_v4float_uint_5 = OpTypePointer Private %_arr_v4float_uint_5
         %22 = OpVariable %_ptr_Private__arr_v4float_uint_5 Private %20
         %24 = OpVariable %_ptr_Input_uint Input
     %uint_4 = OpConstant %uint 4
%_ptr_Private_v4float = OpTypePointer Private %v4float
%_ptr_Output_float = OpTypePointer Output %float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %46

         %46 = OpLabel
         %23 =   OpLoad %uint %SV_VERTEXID
         %25 =   OpLoad %uint %24
         %26 =   OpISub %uint %23 %25
         %28 =   OpExtInst %uint %27 UMin %26 %uint_4
         %31 =   OpInBoundsAccessChain %_ptr_Private_v4float %22 %28
         %32 =   OpLoad %v4float %31
         %33 =   OpCompositeExtract %float %32 0
         %35 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_0
                 OpStore %35 %33
         %37 =   OpCompositeExtract %float %32 1
         %38 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_1
                 OpStore %38 %37
         %40 =   OpCompositeExtract %float %32 2
         %41 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_2
                 OpStore %41 %40
         %43 =   OpCompositeExtract %float %32 3
         %44 =   OpAccessChain %_ptr_Output_float %SV_POSITION %uint_3
                 OpStore %44 %43
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_constant_load.glsl
================================================
GLSL:
#version 460

const vec4 _20[5] = vec4[](vec4(0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(1.0, 0.0, 0.0, 0.0), vec4(1.0, 1.0, 0.0, 0.0), vec4(0.0));

void main()
{
    uint _28 = min((uint(gl_VertexIndex) - uint(gl_BaseVertex)), 4u);
    gl_Position.x = _20[_28].x;
    gl_Position.y = _20[_28].y;
    gl_Position.z = _20[_28].z;
    gl_Position.w = _20[_28].w;
}


================================================
FILE: reference-dxbc/test_misc_function.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %_ ""
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
    %float_1 = OpConstant %float 1
     %uint_1 = OpConstant %uint 1
    %float_2 = OpConstant %float 2
     %uint_2 = OpConstant %uint 2
    %float_3 = OpConstant %float 3
     %uint_3 = OpConstant %uint 3
    %float_4 = OpConstant %float 4
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %26

         %26 = OpLabel
         %25 =   OpFunctionCall %void %_
                 OpReturn
               OpFunctionEnd
          %_ = OpFunction %void None %2

         %10 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %12 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %12 %float_1
         %16 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %16 %float_2
         %19 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %19 %float_3
         %22 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %22 %float_4
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_function.glsl
================================================
GLSL:
#version 460

layout(location = 0) out vec4 SV_TARGET;

void _9()
{
    SV_TARGET.x = 1.0;
    SV_TARGET.y = 2.0;
    SV_TARGET.z = 3.0;
    SV_TARGET.w = 4.0;
}

void main()
{
    _9();
}


================================================
FILE: reference-dxbc/test_misc_function_with_args.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %_ ""
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
          %9 = OpTypeFunction %void %float %float %float %float
%_ptr_Output_float = OpTypePointer Output %float
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
    %float_1 = OpConstant %float 1
    %float_2 = OpConstant %float 2
    %float_3 = OpConstant %float 3
    %float_4 = OpConstant %float 4
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %26 =   OpFunctionCall %void %_ %float_1 %float_2 %float_3 %float_4
                 OpReturn
               OpFunctionEnd
          %_ = OpFunction %void None %9
         %10 = OpFunctionParameter %float
         %11 = OpFunctionParameter %float
         %12 = OpFunctionParameter %float
         %13 = OpFunctionParameter %float

         %15 = OpLabel
                 OpBranch %33

         %33 = OpLabel
         %17 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %17 %10
         %20 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %20 %11
         %22 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %22 %12
         %24 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %24 %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_function_with_args.glsl
================================================
GLSL:
#version 460

layout(location = 0) out vec4 SV_TARGET;

void _14(float _10, float _11, float _12, float _13)
{
    SV_TARGET.x = _10;
    SV_TARGET.y = _11;
    SV_TARGET.z = _12;
    SV_TARGET.w = _13;
}

void main()
{
    _14(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: reference-dxbc/test_misc_function_with_return.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %_ ""
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
          %9 = OpTypeFunction %v4float %float %float %float %float
%float_0_200000003 = OpConstant %float 0.200000003
  %float_0_5 = OpConstant %float 0.5
    %float_1 = OpConstant %float 1
%float_0_800000012 = OpConstant %float 0.800000012
%_ptr_Output_float = OpTypePointer Output %float
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %39

         %39 = OpLabel
         %20 =   OpFunctionCall %v4float %_ %float_0_200000003 %float_0_5 %float_1 %float_0_800000012
         %25 =   OpCompositeExtract %float %20 0
         %27 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %27 %25
         %30 =   OpCompositeExtract %float %20 1
         %31 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %31 %30
         %33 =   OpCompositeExtract %float %20 2
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %34 %33
         %36 =   OpCompositeExtract %float %20 3
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %37 %36
                 OpReturn
               OpFunctionEnd
          %_ = OpFunction %v4float None %9
         %10 = OpFunctionParameter %float
         %11 = OpFunctionParameter %float
         %12 = OpFunctionParameter %float
         %13 = OpFunctionParameter %float

         %15 = OpLabel
                 OpBranch %41

         %41 = OpLabel
         %16 =   OpFMul %float %10 %13
         %17 =   OpFMul %float %11 %13
         %18 =   OpFMul %float %12 %13
         %19 =   OpCompositeConstruct %v4float %16 %17 %18 %13
                 OpReturnValue %19
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_function_with_return.glsl
================================================
GLSL:
#version 460

layout(location = 0) out vec4 SV_TARGET;

vec4 _14(float _10, float _11, float _12, float _13)
{
    return vec4(_10 * _13, _11 * _13, _12 * _13, _13);
}

void main()
{
    vec4 _20 = _14(0.20000000298023223876953125, 0.5, 1.0, 0.800000011920928955078125);
    SV_TARGET.x = _20.x;
    SV_TARGET.y = _20.y;
    SV_TARGET.z = _20.z;
    SV_TARGET.w = _20.w;
}


================================================
FILE: reference-dxbc/test_misc_function_with_undef.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 31
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
         %17 = OpExtInstImport "GLSL.std.450"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %INPUT %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %INPUT "INPUT"
               OpName %SV_TARGET "SV_TARGET"
               OpName %_ ""
               OpDecorate %INPUT Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
%_ptr_Input_float = OpTypePointer Input %float
      %INPUT = OpVariable %_ptr_Input_float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
         %10 = OpTypeFunction %float %float
       %bool = OpTypeBool
    %float_0 = OpConstant %float 0
         %20 = OpUndef %float
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %25

         %25 = OpLabel
         %21 =   OpLoad %float %INPUT
         %22 =   OpFOrdGreaterThanEqual %bool %21 %float_0
         %23 =   OpFunctionCall %float %_ %21
         %24 =   OpSelect %float %22 %23 %20
                 OpStore %SV_TARGET %24
                 OpReturn
               OpFunctionEnd
          %_ = OpFunction %float None %10
         %11 = OpFunctionParameter %float

         %13 = OpLabel
                 OpBranch %27

         %27 = OpLabel
         %15 =   OpFOrdGreaterThanEqual %bool %11 %float_0
                 OpSelectionMerge %29 None
                 OpBranchConditional %15 %28 %29

         %28 =     OpLabel
         %18 =       OpExtInst %float %17 Sqrt %11
                     OpBranch %29

         %29 = OpLabel
         %19 =   OpPhi %float %20 %27 %18 %28
                 OpReturnValue %19
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_function_with_undef.glsl
================================================
GLSL:
#version 460

float _20;

layout(location = 0) in float INPUT;
layout(location = 0) out float SV_TARGET;

float _12(float _11)
{
    float _19;
    if (_11 >= 0.0)
    {
        _19 = sqrt(_11);
    }
    else
    {
        _19 = _20;
    }
    return _19;
}

void main()
{
    SV_TARGET = (INPUT >= 0.0) ? _12(INPUT) : _20;
}


================================================
FILE: reference-dxbc/test_misc_lds.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 68
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13 %SV_DispatchThreadID %21 %gl_LocalInvocationIndex
               OpExecutionMode %main LocalSize 32 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %SV_DispatchThreadID "SV_DispatchThreadID"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
               OpDecorate %SV_DispatchThreadID BuiltIn GlobalInvocationId
               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%SV_DispatchThreadID = OpVariable %_ptr_Input_v3uint Input
    %uint_32 = OpConstant %uint 32
      %float = OpTypeFloat 32
%_arr_float_uint_32 = OpTypeArray %float %uint_32
%_ptr_Workgroup__arr_float_uint_32 = OpTypePointer Workgroup %_arr_float_uint_32
         %21 = OpVariable %_ptr_Workgroup__arr_float_uint_32 Workgroup
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
    %uint_16 = OpConstant %uint 16
     %uint_2 = OpConstant %uint 2
 %uint_24840 = OpConstant %uint 24840
       %bool = OpTypeBool
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
         %50 =   OpUndef %float
                 OpBranch %59

         %59 = OpLabel
         %23 =   OpAccessChain %_ptr_Input_uint %SV_DispatchThreadID %uint_0
         %25 =   OpLoad %uint %23
         %27 =   OpLoad %uint %gl_LocalInvocationIndex
         %31 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %25
         %32 =   OpLoad %uint %31
         %33 =   OpBitcast %float %32
         %35 =   OpInBoundsAccessChain %_ptr_Workgroup_float %21 %27
                 OpStore %35 %33 NonPrivatePointer
                 OpBranch %60

         %60 = OpLabel
         %36 =   OpPhi %uint %uint_16 %59 %38 %65
                 OpLoopMerge %66 %61 None
                 OpBranch %61

         %61 =   OpLabel
                   OpControlBarrier %uint_2 %uint_2 %uint_24840
         %42 =     OpULessThan %bool %27 %36
                   OpSelectionMerge %63 None
                   OpBranchConditional %42 %62 %63

         %62 =       OpLabel
         %43 =         OpIAdd %uint %27 %36
         %44 =         OpInBoundsAccessChain %_ptr_Workgroup_float %21 %43
         %45 =         OpLoad %float %44 NonPrivatePointer
         %46 =         OpInBoundsAccessChain %_ptr_Workgroup_float %21 %27
         %47 =         OpLoad %float %46 NonPrivatePointer
         %48 =         OpFAdd %float %47 %45
                       OpBranch %63

         %63 =   OpLabel
         %49 =     OpPhi %float %50 %61 %48 %62
                   OpControlBarrier %uint_2 %uint_2 %uint_24840
                   OpSelectionMerge %65 None
                   OpBranchConditional %42 %64 %65

         %64 =       OpLabel
         %51 =         OpInBoundsAccessChain %_ptr_Workgroup_float %21 %27
                       OpStore %51 %49 NonPrivatePointer
                       OpBranch %65

         %65 =   OpLabel
         %38 =     OpShiftRightLogical %uint %36 %uint_1
         %53 =     OpINotEqual %bool %38 %uint_0
                   OpBranchConditional %53 %60 %66

         %66 = OpLabel
                 OpControlBarrier %uint_2 %uint_2 %uint_24840
         %54 =   OpInBoundsAccessChain %_ptr_Workgroup_float %21 %uint_0
         %55 =   OpLoad %float %54 NonPrivatePointer
         %57 =   OpBitcast %uint %55
         %58 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %25
                 OpStore %58 %57 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_lds.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

float _50;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

shared float _21[32];

void main()
{
    _21[gl_LocalInvocationIndex] = uintBitsToFloat(_9._m0[gl_GlobalInvocationID.x]);
    bool _42;
    float _49;
    uint _36 = 16u;
    for (;;)
    {
        barrier();
        _42 = gl_LocalInvocationIndex < _36;
        if (_42)
        {
            _49 = _21[gl_LocalInvocationIndex] + _21[gl_LocalInvocationIndex + _36];
        }
        else
        {
            _49 = _50;
        }
        barrier();
        if (_42)
        {
            _21[gl_LocalInvocationIndex] = _49;
        }
        uint _38 = _36 >> 1u;
        if (_38 != 0u)
        {
            _36 = _38;
            continue;
        }
        else
        {
            break;
        }
    }
    barrier();
    _13._m0[gl_GlobalInvocationID.x] = floatBitsToUint(_21[0u]);
}


================================================
FILE: reference-dxbc/test_misc_lds_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %13 %SV_DispatchThreadID %20
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %SV_DispatchThreadID "SV_DispatchThreadID"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 1
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %_runtimearr_uint_0 ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %13 NonReadable
               OpDecorate %SV_DispatchThreadID BuiltIn GlobalInvocationId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint_0
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %13 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%SV_DispatchThreadID = OpVariable %_ptr_Input_v3uint Input
     %uint_1 = OpConstant %uint 1
%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
%_ptr_Workgroup__arr_uint_uint_1 = OpTypePointer Workgroup %_arr_uint_uint_1
         %20 = OpVariable %_ptr_Workgroup__arr_uint_uint_1 Workgroup
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %v2uint = OpTypeVector %uint 2
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %38

         %38 = OpLabel
         %22 =   OpAccessChain %_ptr_Input_uint %SV_DispatchThreadID %uint_0
         %24 =   OpLoad %uint %22
         %28 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %24
         %29 =   OpLoad %uint %28
         %31 =   OpInBoundsAccessChain %_ptr_Workgroup_uint %20 %uint_0
         %32 =   OpAtomicIAdd %uint %31 %uint_2 %uint_0 %29
         %34 =   OpInBoundsAccessChain %_ptr_Workgroup_uint %20 %uint_0
         %35 =   OpAtomicXor %uint %34 %uint_2 %uint_0 %29
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %13 %uint_0 %24
                 OpStore %37 %35 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_lds_atomic.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) writeonly buffer _11_13
{
    uint _m0[];
} _13;

shared uint _20[1];

void main()
{
    uint _32 = atomicAdd(_20[0u], _9._m0[gl_GlobalInvocationID.x]);
    uint _35 = atomicXor(_20[0u], _9._m0[gl_GlobalInvocationID.x]);
    _13._m0[gl_GlobalInvocationID.x] = _35;
}


================================================
FILE: reference-dxbc/test_misc_ps_demote.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 53
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpCapability DemoteToHelperInvocation
               OpExtension "SPV_EXT_demote_to_helper_invocation"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
   %TEXCOORD = OpVariable %_ptr_Input_v2float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_float = OpTypePointer Input %float
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %29 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
       %bool = OpTypeBool
%float_0_00499999989 = OpConstant %float 0.00499999989
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %49

         %49 = OpLabel
         %18 =   OpLoad %6 %8
         %19 =   OpLoad %9 %11
         %21 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %24 =   OpLoad %float %21
         %25 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %27 =   OpLoad %float %25
         %30 =   OpSampledImage %29 %18 %19
         %33 =   OpCompositeConstruct %v2float %24 %27
         %32 =   OpImageSampleImplicitLod %v4float %30 %33 None
         %34 =   OpCompositeExtract %float %32 0
         %35 =   OpCompositeExtract %float %32 1
         %36 =   OpCompositeExtract %float %32 2
         %37 =   OpCompositeExtract %float %32 3
         %40 =   OpFOrdLessThan %bool %37 %float_0_00499999989
                 OpSelectionMerge %51 None
                 OpBranchConditional %40 %50 %51

         %50 =     OpLabel
                     OpDemoteToHelperInvocation
                     OpBranch %51

         %51 = OpLabel
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %43 %34
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %44 %35
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %45 %36
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %47 %37
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_ps_demote.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_demote_to_helper_invocation : require

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec2 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _32 = texture(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y));
    float _37 = _32.w;
    if (_37 < 0.004999999888241291046142578125)
    {
        demote;
    }
    SV_TARGET.x = _32.x;
    SV_TARGET.y = _32.y;
    SV_TARGET.z = _32.z;
    SV_TARGET.w = _37;
}


================================================
FILE: reference-dxbc/test_misc_ps_early_z.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %INDEX
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main EarlyFragmentTests
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %INDEX "INDEX"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %INDEX Flat
               OpDecorate %INDEX Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
      %INDEX = OpVariable %_ptr_Input_uint Input
     %uint_4 = OpConstant %uint 4
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_0 = OpConstant %uint 0
     %uint_5 = OpConstant %uint 5
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %21

         %21 = OpLabel
         %12 =   OpLoad %uint %INDEX
         %16 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %12
         %18 =   OpAtomicOr %uint %16 %uint_5 %uint_0 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_ps_early_z.glsl
================================================
GLSL:
#version 460
layout(early_fragment_tests) in;

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uint INDEX;

void main()
{
    uint _18 = atomicOr(_9._m0[INDEX], 1u);
}


================================================
FILE: reference-dxbc/test_misc_scratch.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 1498
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %10 %14 %18 %SV_DispatchThreadID %27 %31 %34
               OpExecutionMode %main LocalSize 1 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %SSBO_1 "SSBO"
               OpName %SV_DispatchThreadID "SV_DispatchThreadID"
               OpDecorate %_runtimearr_v4uint ArrayStride 16
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 1
               OpDecorate %14 NonWritable
               OpDecorate %14 Restrict
               OpDecorate %_runtimearr_v4uint_0 ArrayStride 16
               OpMemberDecorate %SSBO_1 0 Offset 0
               OpDecorate %SSBO_1 Block
               OpDecorate %18 DescriptorSet 0
               OpDecorate %18 Binding 0
               OpDecorate %18 NonReadable
               OpDecorate %SV_DispatchThreadID BuiltIn GlobalInvocationId
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %v4uint = OpTypeVector %uint 4
%_runtimearr_v4uint = OpTypeRuntimeArray %v4uint
       %SSBO = OpTypeStruct %_runtimearr_v4uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
         %10 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_runtimearr_uint = OpTypeRuntimeArray %uint
     %SSBO_0 = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %14 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
%_runtimearr_v4uint_0 = OpTypeRuntimeArray %v4uint
     %SSBO_1 = OpTypeStruct %_runtimearr_v4uint_0
%_ptr_StorageBuffer_SSBO_1 = OpTypePointer StorageBuffer %SSBO_1
         %18 = OpVariable %_ptr_StorageBuffer_SSBO_1 StorageBuffer
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%SV_DispatchThreadID = OpVariable %_ptr_Input_v3uint Input
    %uint_16 = OpConstant %uint 16
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
%_ptr_Private__arr_v4float_uint_16 = OpTypePointer Private %_arr_v4float_uint_16
         %27 = OpVariable %_ptr_Private__arr_v4float_uint_16 Private
     %uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Private__arr_v4float_uint_4 = OpTypePointer Private %_arr_v4float_uint_4
         %31 = OpVariable %_ptr_Private__arr_v4float_uint_4 Private
%_arr_uint_uint_16 = OpTypeArray %uint %uint_16
%_ptr_Private__arr_uint_uint_16 = OpTypePointer Private %_arr_uint_uint_16
         %34 = OpVariable %_ptr_Private__arr_uint_uint_16 Private
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%_ptr_Private_v4float = OpTypePointer Private %v4float
    %float_1 = OpConstant %float 1
    %float_2 = OpConstant %float 2
    %float_3 = OpConstant %float 3
    %float_4 = OpConstant %float 4
         %45 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4
%_ptr_Private_uint = OpTypePointer Private %uint
%uint_4294967295 = OpConstant %uint 4294967295
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
     %uint_5 = OpConstant %uint 5
     %uint_6 = OpConstant %uint 6
     %uint_7 = OpConstant %uint 7
     %uint_8 = OpConstant %uint 8
     %uint_9 = OpConstant %uint 9
    %uint_10 = OpConstant %uint 10
    %uint_11 = OpConstant %uint 11
    %uint_12 = OpConstant %uint 12
    %uint_13 = OpConstant %uint 13
    %uint_14 = OpConstant %uint 14
    %uint_15 = OpConstant %uint 15
     %v2uint = OpTypeVector %uint 2
    %uint_64 = OpConstant %uint 64
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%_ptr_Private_float = OpTypePointer Private %float
    %uint_17 = OpConstant %uint 17
    %uint_18 = OpConstant %uint 18
    %uint_19 = OpConstant %uint 19
    %uint_20 = OpConstant %uint 20
    %uint_21 = OpConstant %uint 21
    %uint_22 = OpConstant %uint 22
    %uint_23 = OpConstant %uint 23
    %uint_24 = OpConstant %uint 24
    %uint_25 = OpConstant %uint 25
    %uint_26 = OpConstant %uint 26
    %uint_27 = OpConstant %uint 27
    %uint_28 = OpConstant %uint 28
    %uint_29 = OpConstant %uint 29
    %uint_30 = OpConstant %uint 30
    %uint_31 = OpConstant %uint 31
    %uint_32 = OpConstant %uint 32
    %uint_33 = OpConstant %uint 33
    %uint_34 = OpConstant %uint 34
    %uint_35 = OpConstant %uint 35
    %uint_36 = OpConstant %uint 36
    %uint_37 = OpConstant %uint 37
    %uint_38 = OpConstant %uint 38
    %uint_39 = OpConstant %uint 39
    %uint_40 = OpConstant %uint 40
    %uint_41 = OpConstant %uint 41
    %uint_42 = OpConstant %uint 42
    %uint_43 = OpConstant %uint 43
    %uint_44 = OpConstant %uint 44
    %uint_45 = OpConstant %uint 45
    %uint_46 = OpConstant %uint 46
    %uint_47 = OpConstant %uint 47
    %uint_48 = OpConstant %uint 48
    %uint_49 = OpConstant %uint 49
    %uint_50 = OpConstant %uint 50
    %uint_51 = OpConstant %uint 51
    %uint_52 = OpConstant %uint 52
    %uint_53 = OpConstant %uint 53
    %uint_54 = OpConstant %uint 54
    %uint_55 = OpConstant %uint 55
    %uint_56 = OpConstant %uint 56
    %uint_57 = OpConstant %uint 57
    %uint_58 = OpConstant %uint 58
    %uint_59 = OpConstant %uint 59
    %uint_60 = OpConstant %uint 60
    %uint_61 = OpConstant %uint 61
    %uint_62 = OpConstant %uint 62
    %uint_63 = OpConstant %uint 63
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %1496

       %1496 = OpLabel
         %36 =   OpAccessChain %_ptr_Input_uint %SV_DispatchThreadID %uint_0
         %38 =   OpLoad %uint %36
         %40 =   OpInBoundsAccessChain %_ptr_Private_v4float %31 %uint_0
                 OpStore %40 %45
         %47 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %uint_0
                 OpStore %47 %uint_4294967295
         %49 =   OpInBoundsAccessChain %_ptr_Private_v4float %31 %uint_1
                 OpStore %49 %45
         %51 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %uint_1
                 OpStore %51 %uint_4294967295
         %52 =   OpInBoundsAccessChain %_ptr_Private_v4float %31 %uint_2
                 OpStore %52 %45
         %54 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %uint_2
                 OpStore %54 %uint_4294967295
         %55 =   OpInBoundsAccessChain %_ptr_Private_v4float %31 %uint_3
                 OpStore %55 %45
         %57 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %uint_3
                 OpStore %57 %uint_4294967295
         %59 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_0
         %60 =   OpLoad %v4uint %59
         %61 =   OpBitcast %v4float %60
         %62 =   OpCompositeExtract %float %61 0
         %63 =   OpCompositeExtract %float %61 1
         %64 =   OpCompositeExtract %float %61 2
         %65 =   OpCompositeExtract %float %61 3
         %66 =   OpCompositeConstruct %v4float %62 %63 %64 %65
         %67 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_0
                 OpStore %67 %66
         %68 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_1
         %69 =   OpLoad %v4uint %68
         %70 =   OpBitcast %v4float %69
         %71 =   OpCompositeExtract %float %70 0
         %72 =   OpCompositeExtract %float %70 1
         %73 =   OpCompositeExtract %float %70 2
         %74 =   OpCompositeExtract %float %70 3
         %75 =   OpCompositeConstruct %v4float %71 %72 %73 %74
         %76 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_1
                 OpStore %76 %75
         %77 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_2
         %78 =   OpLoad %v4uint %77
         %79 =   OpBitcast %v4float %78
         %80 =   OpCompositeExtract %float %79 0
         %81 =   OpCompositeExtract %float %79 1
         %82 =   OpCompositeExtract %float %79 2
         %83 =   OpCompositeExtract %float %79 3
         %84 =   OpCompositeConstruct %v4float %80 %81 %82 %83
         %85 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_2
                 OpStore %85 %84
         %86 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_3
         %87 =   OpLoad %v4uint %86
         %88 =   OpBitcast %v4float %87
         %89 =   OpCompositeExtract %float %88 0
         %90 =   OpCompositeExtract %float %88 1
         %91 =   OpCompositeExtract %float %88 2
         %92 =   OpCompositeExtract %float %88 3
         %93 =   OpCompositeConstruct %v4float %89 %90 %91 %92
         %94 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_3
                 OpStore %94 %93
         %95 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_4
         %96 =   OpLoad %v4uint %95
         %97 =   OpBitcast %v4float %96
         %98 =   OpCompositeExtract %float %97 0
         %99 =   OpCompositeExtract %float %97 1
        %100 =   OpCompositeExtract %float %97 2
        %101 =   OpCompositeExtract %float %97 3
        %102 =   OpCompositeConstruct %v4float %98 %99 %100 %101
        %103 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_4
                 OpStore %103 %102
        %105 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_5
        %106 =   OpLoad %v4uint %105
        %107 =   OpBitcast %v4float %106
        %108 =   OpCompositeExtract %float %107 0
        %109 =   OpCompositeExtract %float %107 1
        %110 =   OpCompositeExtract %float %107 2
        %111 =   OpCompositeExtract %float %107 3
        %112 =   OpCompositeConstruct %v4float %108 %109 %110 %111
        %113 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_5
                 OpStore %113 %112
        %115 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_6
        %116 =   OpLoad %v4uint %115
        %117 =   OpBitcast %v4float %116
        %118 =   OpCompositeExtract %float %117 0
        %119 =   OpCompositeExtract %float %117 1
        %120 =   OpCompositeExtract %float %117 2
        %121 =   OpCompositeExtract %float %117 3
        %122 =   OpCompositeConstruct %v4float %118 %119 %120 %121
        %123 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_6
                 OpStore %123 %122
        %125 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_7
        %126 =   OpLoad %v4uint %125
        %127 =   OpBitcast %v4float %126
        %128 =   OpCompositeExtract %float %127 0
        %129 =   OpCompositeExtract %float %127 1
        %130 =   OpCompositeExtract %float %127 2
        %131 =   OpCompositeExtract %float %127 3
        %132 =   OpCompositeConstruct %v4float %128 %129 %130 %131
        %133 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_7
                 OpStore %133 %132
        %135 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_8
        %136 =   OpLoad %v4uint %135
        %137 =   OpBitcast %v4float %136
        %138 =   OpCompositeExtract %float %137 0
        %139 =   OpCompositeExtract %float %137 1
        %140 =   OpCompositeExtract %float %137 2
        %141 =   OpCompositeExtract %float %137 3
        %142 =   OpCompositeConstruct %v4float %138 %139 %140 %141
        %143 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_8
                 OpStore %143 %142
        %145 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_9
        %146 =   OpLoad %v4uint %145
        %147 =   OpBitcast %v4float %146
        %148 =   OpCompositeExtract %float %147 0
        %149 =   OpCompositeExtract %float %147 1
        %150 =   OpCompositeExtract %float %147 2
        %151 =   OpCompositeExtract %float %147 3
        %152 =   OpCompositeConstruct %v4float %148 %149 %150 %151
        %153 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_9
                 OpStore %153 %152
        %155 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_10
        %156 =   OpLoad %v4uint %155
        %157 =   OpBitcast %v4float %156
        %158 =   OpCompositeExtract %float %157 0
        %159 =   OpCompositeExtract %float %157 1
        %160 =   OpCompositeExtract %float %157 2
        %161 =   OpCompositeExtract %float %157 3
        %162 =   OpCompositeConstruct %v4float %158 %159 %160 %161
        %163 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_10
                 OpStore %163 %162
        %165 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_11
        %166 =   OpLoad %v4uint %165
        %167 =   OpBitcast %v4float %166
        %168 =   OpCompositeExtract %float %167 0
        %169 =   OpCompositeExtract %float %167 1
        %170 =   OpCompositeExtract %float %167 2
        %171 =   OpCompositeExtract %float %167 3
        %172 =   OpCompositeConstruct %v4float %168 %169 %170 %171
        %173 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_11
                 OpStore %173 %172
        %175 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_12
        %176 =   OpLoad %v4uint %175
        %177 =   OpBitcast %v4float %176
        %178 =   OpCompositeExtract %float %177 0
        %179 =   OpCompositeExtract %float %177 1
        %180 =   OpCompositeExtract %float %177 2
        %181 =   OpCompositeExtract %float %177 3
        %182 =   OpCompositeConstruct %v4float %178 %179 %180 %181
        %183 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_12
                 OpStore %183 %182
        %185 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_13
        %186 =   OpLoad %v4uint %185
        %187 =   OpBitcast %v4float %186
        %188 =   OpCompositeExtract %float %187 0
        %189 =   OpCompositeExtract %float %187 1
        %190 =   OpCompositeExtract %float %187 2
        %191 =   OpCompositeExtract %float %187 3
        %192 =   OpCompositeConstruct %v4float %188 %189 %190 %191
        %193 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_13
                 OpStore %193 %192
        %195 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_14
        %196 =   OpLoad %v4uint %195
        %197 =   OpBitcast %v4float %196
        %198 =   OpCompositeExtract %float %197 0
        %199 =   OpCompositeExtract %float %197 1
        %200 =   OpCompositeExtract %float %197 2
        %201 =   OpCompositeExtract %float %197 3
        %202 =   OpCompositeConstruct %v4float %198 %199 %200 %201
        %203 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_14
                 OpStore %203 %202
        %205 =   OpAccessChain %_ptr_StorageBuffer_v4uint %10 %uint_0 %uint_15
        %206 =   OpLoad %v4uint %205
        %207 =   OpBitcast %v4float %206
        %208 =   OpCompositeExtract %float %207 0
        %209 =   OpCompositeExtract %float %207 1
        %210 =   OpCompositeExtract %float %207 2
        %211 =   OpCompositeExtract %float %207 3
        %212 =   OpCompositeConstruct %v4float %208 %209 %210 %211
        %213 =   OpInBoundsAccessChain %_ptr_Private_v4float %27 %uint_15
                 OpStore %213 %212
        %216 =   OpIMul %uint %38 %uint_64
        %219 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %216
        %220 =   OpLoad %uint %219
        %221 =   OpBitwiseAnd %uint %220 %uint_3
        %224 =   OpInBoundsAccessChain %_ptr_Private_float %31 %221 %uint_0
        %225 =   OpLoad %float %224
        %227 =   OpInBoundsAccessChain %_ptr_Private_float %27 %220 %uint_0
        %228 =   OpLoad %float %227
        %229 =   OpFAdd %float %225 %228
        %231 =   OpInBoundsAccessChain %_ptr_Private_float %31 %221 %uint_0
                 OpStore %231 %229
        %232 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %220
        %233 =   OpLoad %uint %232
        %234 =   OpIAdd %uint %233 %uint_1
        %235 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %220
                 OpStore %235 %234
        %237 =   OpIMul %uint %38 %uint_64
        %238 =   OpIAdd %uint %237 %uint_1
        %239 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %238
        %240 =   OpLoad %uint %239
        %241 =   OpBitwiseAnd %uint %240 %uint_3
        %243 =   OpInBoundsAccessChain %_ptr_Private_float %31 %241 %uint_0
        %244 =   OpLoad %float %243
        %246 =   OpInBoundsAccessChain %_ptr_Private_float %27 %240 %uint_1
        %247 =   OpLoad %float %246
        %248 =   OpFAdd %float %244 %247
        %250 =   OpInBoundsAccessChain %_ptr_Private_float %31 %241 %uint_0
                 OpStore %250 %248
        %251 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %240
        %252 =   OpLoad %uint %251
        %253 =   OpIAdd %uint %252 %uint_1
        %254 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %240
                 OpStore %254 %253
        %256 =   OpIMul %uint %38 %uint_64
        %257 =   OpIAdd %uint %256 %uint_2
        %258 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %257
        %259 =   OpLoad %uint %258
        %260 =   OpBitwiseAnd %uint %259 %uint_3
        %262 =   OpInBoundsAccessChain %_ptr_Private_float %31 %260 %uint_0
        %263 =   OpLoad %float %262
        %265 =   OpInBoundsAccessChain %_ptr_Private_float %27 %259 %uint_2
        %266 =   OpLoad %float %265
        %267 =   OpFAdd %float %263 %266
        %269 =   OpInBoundsAccessChain %_ptr_Private_float %31 %260 %uint_0
                 OpStore %269 %267
        %270 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %259
        %271 =   OpLoad %uint %270
        %272 =   OpIAdd %uint %271 %uint_1
        %273 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %259
                 OpStore %273 %272
        %275 =   OpIMul %uint %38 %uint_64
        %276 =   OpIAdd %uint %275 %uint_3
        %277 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %276
        %278 =   OpLoad %uint %277
        %279 =   OpBitwiseAnd %uint %278 %uint_3
        %281 =   OpInBoundsAccessChain %_ptr_Private_float %31 %279 %uint_0
        %282 =   OpLoad %float %281
        %284 =   OpInBoundsAccessChain %_ptr_Private_float %27 %278 %uint_3
        %285 =   OpLoad %float %284
        %286 =   OpFAdd %float %282 %285
        %288 =   OpInBoundsAccessChain %_ptr_Private_float %31 %279 %uint_0
                 OpStore %288 %286
        %289 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %278
        %290 =   OpLoad %uint %289
        %291 =   OpIAdd %uint %290 %uint_1
        %292 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %278
                 OpStore %292 %291
        %294 =   OpIMul %uint %38 %uint_64
        %295 =   OpIAdd %uint %294 %uint_4
        %296 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %295
        %297 =   OpLoad %uint %296
        %298 =   OpBitwiseAnd %uint %297 %uint_3
        %300 =   OpInBoundsAccessChain %_ptr_Private_float %31 %298 %uint_1
        %301 =   OpLoad %float %300
        %303 =   OpInBoundsAccessChain %_ptr_Private_float %27 %297 %uint_0
        %304 =   OpLoad %float %303
        %305 =   OpFAdd %float %301 %304
        %307 =   OpInBoundsAccessChain %_ptr_Private_float %31 %298 %uint_1
                 OpStore %307 %305
        %308 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %297
        %309 =   OpLoad %uint %308
        %310 =   OpIAdd %uint %309 %uint_1
        %311 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %297
                 OpStore %311 %310
        %313 =   OpIMul %uint %38 %uint_64
        %314 =   OpIAdd %uint %313 %uint_5
        %315 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %314
        %316 =   OpLoad %uint %315
        %317 =   OpBitwiseAnd %uint %316 %uint_3
        %319 =   OpInBoundsAccessChain %_ptr_Private_float %31 %317 %uint_1
        %320 =   OpLoad %float %319
        %322 =   OpInBoundsAccessChain %_ptr_Private_float %27 %316 %uint_1
        %323 =   OpLoad %float %322
        %324 =   OpFAdd %float %320 %323
        %326 =   OpInBoundsAccessChain %_ptr_Private_float %31 %317 %uint_1
                 OpStore %326 %324
        %327 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %316
        %328 =   OpLoad %uint %327
        %329 =   OpIAdd %uint %328 %uint_1
        %330 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %316
                 OpStore %330 %329
        %332 =   OpIMul %uint %38 %uint_64
        %333 =   OpIAdd %uint %332 %uint_6
        %334 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %333
        %335 =   OpLoad %uint %334
        %336 =   OpBitwiseAnd %uint %335 %uint_3
        %338 =   OpInBoundsAccessChain %_ptr_Private_float %31 %336 %uint_1
        %339 =   OpLoad %float %338
        %341 =   OpInBoundsAccessChain %_ptr_Private_float %27 %335 %uint_2
        %342 =   OpLoad %float %341
        %343 =   OpFAdd %float %339 %342
        %345 =   OpInBoundsAccessChain %_ptr_Private_float %31 %336 %uint_1
                 OpStore %345 %343
        %346 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %335
        %347 =   OpLoad %uint %346
        %348 =   OpIAdd %uint %347 %uint_1
        %349 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %335
                 OpStore %349 %348
        %351 =   OpIMul %uint %38 %uint_64
        %352 =   OpIAdd %uint %351 %uint_7
        %353 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %352
        %354 =   OpLoad %uint %353
        %355 =   OpBitwiseAnd %uint %354 %uint_3
        %357 =   OpInBoundsAccessChain %_ptr_Private_float %31 %355 %uint_1
        %358 =   OpLoad %float %357
        %360 =   OpInBoundsAccessChain %_ptr_Private_float %27 %354 %uint_3
        %361 =   OpLoad %float %360
        %362 =   OpFAdd %float %358 %361
        %364 =   OpInBoundsAccessChain %_ptr_Private_float %31 %355 %uint_1
                 OpStore %364 %362
        %365 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %354
        %366 =   OpLoad %uint %365
        %367 =   OpIAdd %uint %366 %uint_1
        %368 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %354
                 OpStore %368 %367
        %370 =   OpIMul %uint %38 %uint_64
        %371 =   OpIAdd %uint %370 %uint_8
        %372 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %371
        %373 =   OpLoad %uint %372
        %374 =   OpBitwiseAnd %uint %373 %uint_3
        %376 =   OpInBoundsAccessChain %_ptr_Private_float %31 %374 %uint_2
        %377 =   OpLoad %float %376
        %379 =   OpInBoundsAccessChain %_ptr_Private_float %27 %373 %uint_0
        %380 =   OpLoad %float %379
        %381 =   OpFAdd %float %377 %380
        %383 =   OpInBoundsAccessChain %_ptr_Private_float %31 %374 %uint_2
                 OpStore %383 %381
        %384 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %373
        %385 =   OpLoad %uint %384
        %386 =   OpIAdd %uint %385 %uint_1
        %387 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %373
                 OpStore %387 %386
        %389 =   OpIMul %uint %38 %uint_64
        %390 =   OpIAdd %uint %389 %uint_9
        %391 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %390
        %392 =   OpLoad %uint %391
        %393 =   OpBitwiseAnd %uint %392 %uint_3
        %395 =   OpInBoundsAccessChain %_ptr_Private_float %31 %393 %uint_2
        %396 =   OpLoad %float %395
        %398 =   OpInBoundsAccessChain %_ptr_Private_float %27 %392 %uint_1
        %399 =   OpLoad %float %398
        %400 =   OpFAdd %float %396 %399
        %402 =   OpInBoundsAccessChain %_ptr_Private_float %31 %393 %uint_2
                 OpStore %402 %400
        %403 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %392
        %404 =   OpLoad %uint %403
        %405 =   OpIAdd %uint %404 %uint_1
        %406 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %392
                 OpStore %406 %405
        %408 =   OpIMul %uint %38 %uint_64
        %409 =   OpIAdd %uint %408 %uint_10
        %410 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %409
        %411 =   OpLoad %uint %410
        %412 =   OpBitwiseAnd %uint %411 %uint_3
        %414 =   OpInBoundsAccessChain %_ptr_Private_float %31 %412 %uint_2
        %415 =   OpLoad %float %414
        %417 =   OpInBoundsAccessChain %_ptr_Private_float %27 %411 %uint_2
        %418 =   OpLoad %float %417
        %419 =   OpFAdd %float %415 %418
        %421 =   OpInBoundsAccessChain %_ptr_Private_float %31 %412 %uint_2
                 OpStore %421 %419
        %422 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %411
        %423 =   OpLoad %uint %422
        %424 =   OpIAdd %uint %423 %uint_1
        %425 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %411
                 OpStore %425 %424
        %427 =   OpIMul %uint %38 %uint_64
        %428 =   OpIAdd %uint %427 %uint_11
        %429 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %428
        %430 =   OpLoad %uint %429
        %431 =   OpBitwiseAnd %uint %430 %uint_3
        %433 =   OpInBoundsAccessChain %_ptr_Private_float %31 %431 %uint_2
        %434 =   OpLoad %float %433
        %436 =   OpInBoundsAccessChain %_ptr_Private_float %27 %430 %uint_3
        %437 =   OpLoad %float %436
        %438 =   OpFAdd %float %434 %437
        %440 =   OpInBoundsAccessChain %_ptr_Private_float %31 %431 %uint_2
                 OpStore %440 %438
        %441 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %430
        %442 =   OpLoad %uint %441
        %443 =   OpIAdd %uint %442 %uint_1
        %444 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %430
                 OpStore %444 %443
        %446 =   OpIMul %uint %38 %uint_64
        %447 =   OpIAdd %uint %446 %uint_12
        %448 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %447
        %449 =   OpLoad %uint %448
        %450 =   OpBitwiseAnd %uint %449 %uint_3
        %452 =   OpInBoundsAccessChain %_ptr_Private_float %31 %450 %uint_3
        %453 =   OpLoad %float %452
        %455 =   OpInBoundsAccessChain %_ptr_Private_float %27 %449 %uint_0
        %456 =   OpLoad %float %455
        %457 =   OpFAdd %float %453 %456
        %459 =   OpInBoundsAccessChain %_ptr_Private_float %31 %450 %uint_3
                 OpStore %459 %457
        %460 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %449
        %461 =   OpLoad %uint %460
        %462 =   OpIAdd %uint %461 %uint_1
        %463 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %449
                 OpStore %463 %462
        %465 =   OpIMul %uint %38 %uint_64
        %466 =   OpIAdd %uint %465 %uint_13
        %467 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %466
        %468 =   OpLoad %uint %467
        %469 =   OpBitwiseAnd %uint %468 %uint_3
        %471 =   OpInBoundsAccessChain %_ptr_Private_float %31 %469 %uint_3
        %472 =   OpLoad %float %471
        %474 =   OpInBoundsAccessChain %_ptr_Private_float %27 %468 %uint_1
        %475 =   OpLoad %float %474
        %476 =   OpFAdd %float %472 %475
        %478 =   OpInBoundsAccessChain %_ptr_Private_float %31 %469 %uint_3
                 OpStore %478 %476
        %479 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %468
        %480 =   OpLoad %uint %479
        %481 =   OpIAdd %uint %480 %uint_1
        %482 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %468
                 OpStore %482 %481
        %484 =   OpIMul %uint %38 %uint_64
        %485 =   OpIAdd %uint %484 %uint_14
        %486 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %485
        %487 =   OpLoad %uint %486
        %488 =   OpBitwiseAnd %uint %487 %uint_3
        %490 =   OpInBoundsAccessChain %_ptr_Private_float %31 %488 %uint_3
        %491 =   OpLoad %float %490
        %493 =   OpInBoundsAccessChain %_ptr_Private_float %27 %487 %uint_2
        %494 =   OpLoad %float %493
        %495 =   OpFAdd %float %491 %494
        %497 =   OpInBoundsAccessChain %_ptr_Private_float %31 %488 %uint_3
                 OpStore %497 %495
        %498 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %487
        %499 =   OpLoad %uint %498
        %500 =   OpIAdd %uint %499 %uint_1
        %501 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %487
                 OpStore %501 %500
        %503 =   OpIMul %uint %38 %uint_64
        %504 =   OpIAdd %uint %503 %uint_15
        %505 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %504
        %506 =   OpLoad %uint %505
        %507 =   OpBitwiseAnd %uint %506 %uint_3
        %509 =   OpInBoundsAccessChain %_ptr_Private_float %31 %507 %uint_3
        %510 =   OpLoad %float %509
        %512 =   OpInBoundsAccessChain %_ptr_Private_float %27 %506 %uint_3
        %513 =   OpLoad %float %512
        %514 =   OpFAdd %float %510 %513
        %516 =   OpInBoundsAccessChain %_ptr_Private_float %31 %507 %uint_3
                 OpStore %516 %514
        %517 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %506
        %518 =   OpLoad %uint %517
        %519 =   OpIAdd %uint %518 %uint_1
        %520 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %506
                 OpStore %520 %519
        %522 =   OpIMul %uint %38 %uint_64
        %523 =   OpIAdd %uint %522 %uint_16
        %524 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %523
        %525 =   OpLoad %uint %524
        %526 =   OpBitwiseAnd %uint %525 %uint_3
        %528 =   OpInBoundsAccessChain %_ptr_Private_float %31 %526 %uint_0
        %529 =   OpLoad %float %528
        %531 =   OpInBoundsAccessChain %_ptr_Private_float %27 %525 %uint_0
        %532 =   OpLoad %float %531
        %533 =   OpFAdd %float %529 %532
        %535 =   OpInBoundsAccessChain %_ptr_Private_float %31 %526 %uint_0
                 OpStore %535 %533
        %536 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %525
        %537 =   OpLoad %uint %536
        %538 =   OpIAdd %uint %537 %uint_1
        %539 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %525
                 OpStore %539 %538
        %542 =   OpIMul %uint %38 %uint_64
        %543 =   OpIAdd %uint %542 %uint_17
        %544 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %543
        %545 =   OpLoad %uint %544
        %546 =   OpBitwiseAnd %uint %545 %uint_3
        %548 =   OpInBoundsAccessChain %_ptr_Private_float %31 %546 %uint_0
        %549 =   OpLoad %float %548
        %551 =   OpInBoundsAccessChain %_ptr_Private_float %27 %545 %uint_1
        %552 =   OpLoad %float %551
        %553 =   OpFAdd %float %549 %552
        %555 =   OpInBoundsAccessChain %_ptr_Private_float %31 %546 %uint_0
                 OpStore %555 %553
        %556 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %545
        %557 =   OpLoad %uint %556
        %558 =   OpIAdd %uint %557 %uint_1
        %559 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %545
                 OpStore %559 %558
        %562 =   OpIMul %uint %38 %uint_64
        %563 =   OpIAdd %uint %562 %uint_18
        %564 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %563
        %565 =   OpLoad %uint %564
        %566 =   OpBitwiseAnd %uint %565 %uint_3
        %568 =   OpInBoundsAccessChain %_ptr_Private_float %31 %566 %uint_0
        %569 =   OpLoad %float %568
        %571 =   OpInBoundsAccessChain %_ptr_Private_float %27 %565 %uint_2
        %572 =   OpLoad %float %571
        %573 =   OpFAdd %float %569 %572
        %575 =   OpInBoundsAccessChain %_ptr_Private_float %31 %566 %uint_0
                 OpStore %575 %573
        %576 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %565
        %577 =   OpLoad %uint %576
        %578 =   OpIAdd %uint %577 %uint_1
        %579 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %565
                 OpStore %579 %578
        %582 =   OpIMul %uint %38 %uint_64
        %583 =   OpIAdd %uint %582 %uint_19
        %584 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %583
        %585 =   OpLoad %uint %584
        %586 =   OpBitwiseAnd %uint %585 %uint_3
        %588 =   OpInBoundsAccessChain %_ptr_Private_float %31 %586 %uint_0
        %589 =   OpLoad %float %588
        %591 =   OpInBoundsAccessChain %_ptr_Private_float %27 %585 %uint_3
        %592 =   OpLoad %float %591
        %593 =   OpFAdd %float %589 %592
        %595 =   OpInBoundsAccessChain %_ptr_Private_float %31 %586 %uint_0
                 OpStore %595 %593
        %596 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %585
        %597 =   OpLoad %uint %596
        %598 =   OpIAdd %uint %597 %uint_1
        %599 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %585
                 OpStore %599 %598
        %602 =   OpIMul %uint %38 %uint_64
        %603 =   OpIAdd %uint %602 %uint_20
        %604 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %603
        %605 =   OpLoad %uint %604
        %606 =   OpBitwiseAnd %uint %605 %uint_3
        %608 =   OpInBoundsAccessChain %_ptr_Private_float %31 %606 %uint_1
        %609 =   OpLoad %float %608
        %611 =   OpInBoundsAccessChain %_ptr_Private_float %27 %605 %uint_0
        %612 =   OpLoad %float %611
        %613 =   OpFAdd %float %609 %612
        %615 =   OpInBoundsAccessChain %_ptr_Private_float %31 %606 %uint_1
                 OpStore %615 %613
        %616 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %605
        %617 =   OpLoad %uint %616
        %618 =   OpIAdd %uint %617 %uint_1
        %619 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %605
                 OpStore %619 %618
        %622 =   OpIMul %uint %38 %uint_64
        %623 =   OpIAdd %uint %622 %uint_21
        %624 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %623
        %625 =   OpLoad %uint %624
        %626 =   OpBitwiseAnd %uint %625 %uint_3
        %628 =   OpInBoundsAccessChain %_ptr_Private_float %31 %626 %uint_1
        %629 =   OpLoad %float %628
        %631 =   OpInBoundsAccessChain %_ptr_Private_float %27 %625 %uint_1
        %632 =   OpLoad %float %631
        %633 =   OpFAdd %float %629 %632
        %635 =   OpInBoundsAccessChain %_ptr_Private_float %31 %626 %uint_1
                 OpStore %635 %633
        %636 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %625
        %637 =   OpLoad %uint %636
        %638 =   OpIAdd %uint %637 %uint_1
        %639 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %625
                 OpStore %639 %638
        %642 =   OpIMul %uint %38 %uint_64
        %643 =   OpIAdd %uint %642 %uint_22
        %644 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %643
        %645 =   OpLoad %uint %644
        %646 =   OpBitwiseAnd %uint %645 %uint_3
        %648 =   OpInBoundsAccessChain %_ptr_Private_float %31 %646 %uint_1
        %649 =   OpLoad %float %648
        %651 =   OpInBoundsAccessChain %_ptr_Private_float %27 %645 %uint_2
        %652 =   OpLoad %float %651
        %653 =   OpFAdd %float %649 %652
        %655 =   OpInBoundsAccessChain %_ptr_Private_float %31 %646 %uint_1
                 OpStore %655 %653
        %656 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %645
        %657 =   OpLoad %uint %656
        %658 =   OpIAdd %uint %657 %uint_1
        %659 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %645
                 OpStore %659 %658
        %662 =   OpIMul %uint %38 %uint_64
        %663 =   OpIAdd %uint %662 %uint_23
        %664 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %663
        %665 =   OpLoad %uint %664
        %666 =   OpBitwiseAnd %uint %665 %uint_3
        %668 =   OpInBoundsAccessChain %_ptr_Private_float %31 %666 %uint_1
        %669 =   OpLoad %float %668
        %671 =   OpInBoundsAccessChain %_ptr_Private_float %27 %665 %uint_3
        %672 =   OpLoad %float %671
        %673 =   OpFAdd %float %669 %672
        %675 =   OpInBoundsAccessChain %_ptr_Private_float %31 %666 %uint_1
                 OpStore %675 %673
        %676 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %665
        %677 =   OpLoad %uint %676
        %678 =   OpIAdd %uint %677 %uint_1
        %679 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %665
                 OpStore %679 %678
        %682 =   OpIMul %uint %38 %uint_64
        %683 =   OpIAdd %uint %682 %uint_24
        %684 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %683
        %685 =   OpLoad %uint %684
        %686 =   OpBitwiseAnd %uint %685 %uint_3
        %688 =   OpInBoundsAccessChain %_ptr_Private_float %31 %686 %uint_2
        %689 =   OpLoad %float %688
        %691 =   OpInBoundsAccessChain %_ptr_Private_float %27 %685 %uint_0
        %692 =   OpLoad %float %691
        %693 =   OpFAdd %float %689 %692
        %695 =   OpInBoundsAccessChain %_ptr_Private_float %31 %686 %uint_2
                 OpStore %695 %693
        %696 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %685
        %697 =   OpLoad %uint %696
        %698 =   OpIAdd %uint %697 %uint_1
        %699 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %685
                 OpStore %699 %698
        %702 =   OpIMul %uint %38 %uint_64
        %703 =   OpIAdd %uint %702 %uint_25
        %704 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %703
        %705 =   OpLoad %uint %704
        %706 =   OpBitwiseAnd %uint %705 %uint_3
        %708 =   OpInBoundsAccessChain %_ptr_Private_float %31 %706 %uint_2
        %709 =   OpLoad %float %708
        %711 =   OpInBoundsAccessChain %_ptr_Private_float %27 %705 %uint_1
        %712 =   OpLoad %float %711
        %713 =   OpFAdd %float %709 %712
        %715 =   OpInBoundsAccessChain %_ptr_Private_float %31 %706 %uint_2
                 OpStore %715 %713
        %716 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %705
        %717 =   OpLoad %uint %716
        %718 =   OpIAdd %uint %717 %uint_1
        %719 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %705
                 OpStore %719 %718
        %722 =   OpIMul %uint %38 %uint_64
        %723 =   OpIAdd %uint %722 %uint_26
        %724 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %723
        %725 =   OpLoad %uint %724
        %726 =   OpBitwiseAnd %uint %725 %uint_3
        %728 =   OpInBoundsAccessChain %_ptr_Private_float %31 %726 %uint_2
        %729 =   OpLoad %float %728
        %731 =   OpInBoundsAccessChain %_ptr_Private_float %27 %725 %uint_2
        %732 =   OpLoad %float %731
        %733 =   OpFAdd %float %729 %732
        %735 =   OpInBoundsAccessChain %_ptr_Private_float %31 %726 %uint_2
                 OpStore %735 %733
        %736 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %725
        %737 =   OpLoad %uint %736
        %738 =   OpIAdd %uint %737 %uint_1
        %739 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %725
                 OpStore %739 %738
        %742 =   OpIMul %uint %38 %uint_64
        %743 =   OpIAdd %uint %742 %uint_27
        %744 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %743
        %745 =   OpLoad %uint %744
        %746 =   OpBitwiseAnd %uint %745 %uint_3
        %748 =   OpInBoundsAccessChain %_ptr_Private_float %31 %746 %uint_2
        %749 =   OpLoad %float %748
        %751 =   OpInBoundsAccessChain %_ptr_Private_float %27 %745 %uint_3
        %752 =   OpLoad %float %751
        %753 =   OpFAdd %float %749 %752
        %755 =   OpInBoundsAccessChain %_ptr_Private_float %31 %746 %uint_2
                 OpStore %755 %753
        %756 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %745
        %757 =   OpLoad %uint %756
        %758 =   OpIAdd %uint %757 %uint_1
        %759 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %745
                 OpStore %759 %758
        %762 =   OpIMul %uint %38 %uint_64
        %763 =   OpIAdd %uint %762 %uint_28
        %764 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %763
        %765 =   OpLoad %uint %764
        %766 =   OpBitwiseAnd %uint %765 %uint_3
        %768 =   OpInBoundsAccessChain %_ptr_Private_float %31 %766 %uint_3
        %769 =   OpLoad %float %768
        %771 =   OpInBoundsAccessChain %_ptr_Private_float %27 %765 %uint_0
        %772 =   OpLoad %float %771
        %773 =   OpFAdd %float %769 %772
        %775 =   OpInBoundsAccessChain %_ptr_Private_float %31 %766 %uint_3
                 OpStore %775 %773
        %776 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %765
        %777 =   OpLoad %uint %776
        %778 =   OpIAdd %uint %777 %uint_1
        %779 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %765
                 OpStore %779 %778
        %782 =   OpIMul %uint %38 %uint_64
        %783 =   OpIAdd %uint %782 %uint_29
        %784 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %783
        %785 =   OpLoad %uint %784
        %786 =   OpBitwiseAnd %uint %785 %uint_3
        %788 =   OpInBoundsAccessChain %_ptr_Private_float %31 %786 %uint_3
        %789 =   OpLoad %float %788
        %791 =   OpInBoundsAccessChain %_ptr_Private_float %27 %785 %uint_1
        %792 =   OpLoad %float %791
        %793 =   OpFAdd %float %789 %792
        %795 =   OpInBoundsAccessChain %_ptr_Private_float %31 %786 %uint_3
                 OpStore %795 %793
        %796 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %785
        %797 =   OpLoad %uint %796
        %798 =   OpIAdd %uint %797 %uint_1
        %799 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %785
                 OpStore %799 %798
        %802 =   OpIMul %uint %38 %uint_64
        %803 =   OpIAdd %uint %802 %uint_30
        %804 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %803
        %805 =   OpLoad %uint %804
        %806 =   OpBitwiseAnd %uint %805 %uint_3
        %808 =   OpInBoundsAccessChain %_ptr_Private_float %31 %806 %uint_3
        %809 =   OpLoad %float %808
        %811 =   OpInBoundsAccessChain %_ptr_Private_float %27 %805 %uint_2
        %812 =   OpLoad %float %811
        %813 =   OpFAdd %float %809 %812
        %815 =   OpInBoundsAccessChain %_ptr_Private_float %31 %806 %uint_3
                 OpStore %815 %813
        %816 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %805
        %817 =   OpLoad %uint %816
        %818 =   OpIAdd %uint %817 %uint_1
        %819 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %805
                 OpStore %819 %818
        %822 =   OpIMul %uint %38 %uint_64
        %823 =   OpIAdd %uint %822 %uint_31
        %824 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %823
        %825 =   OpLoad %uint %824
        %826 =   OpBitwiseAnd %uint %825 %uint_3
        %828 =   OpInBoundsAccessChain %_ptr_Private_float %31 %826 %uint_3
        %829 =   OpLoad %float %828
        %831 =   OpInBoundsAccessChain %_ptr_Private_float %27 %825 %uint_3
        %832 =   OpLoad %float %831
        %833 =   OpFAdd %float %829 %832
        %835 =   OpInBoundsAccessChain %_ptr_Private_float %31 %826 %uint_3
                 OpStore %835 %833
        %836 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %825
        %837 =   OpLoad %uint %836
        %838 =   OpIAdd %uint %837 %uint_1
        %839 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %825
                 OpStore %839 %838
        %842 =   OpIMul %uint %38 %uint_64
        %843 =   OpIAdd %uint %842 %uint_32
        %844 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %843
        %845 =   OpLoad %uint %844
        %846 =   OpBitwiseAnd %uint %845 %uint_3
        %848 =   OpInBoundsAccessChain %_ptr_Private_float %31 %846 %uint_0
        %849 =   OpLoad %float %848
        %851 =   OpInBoundsAccessChain %_ptr_Private_float %27 %845 %uint_0
        %852 =   OpLoad %float %851
        %853 =   OpFAdd %float %849 %852
        %855 =   OpInBoundsAccessChain %_ptr_Private_float %31 %846 %uint_0
                 OpStore %855 %853
        %856 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %845
        %857 =   OpLoad %uint %856
        %858 =   OpIAdd %uint %857 %uint_1
        %859 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %845
                 OpStore %859 %858
        %862 =   OpIMul %uint %38 %uint_64
        %863 =   OpIAdd %uint %862 %uint_33
        %864 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %863
        %865 =   OpLoad %uint %864
        %866 =   OpBitwiseAnd %uint %865 %uint_3
        %868 =   OpInBoundsAccessChain %_ptr_Private_float %31 %866 %uint_0
        %869 =   OpLoad %float %868
        %871 =   OpInBoundsAccessChain %_ptr_Private_float %27 %865 %uint_1
        %872 =   OpLoad %float %871
        %873 =   OpFAdd %float %869 %872
        %875 =   OpInBoundsAccessChain %_ptr_Private_float %31 %866 %uint_0
                 OpStore %875 %873
        %876 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %865
        %877 =   OpLoad %uint %876
        %878 =   OpIAdd %uint %877 %uint_1
        %879 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %865
                 OpStore %879 %878
        %882 =   OpIMul %uint %38 %uint_64
        %883 =   OpIAdd %uint %882 %uint_34
        %884 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %883
        %885 =   OpLoad %uint %884
        %886 =   OpBitwiseAnd %uint %885 %uint_3
        %888 =   OpInBoundsAccessChain %_ptr_Private_float %31 %886 %uint_0
        %889 =   OpLoad %float %888
        %891 =   OpInBoundsAccessChain %_ptr_Private_float %27 %885 %uint_2
        %892 =   OpLoad %float %891
        %893 =   OpFAdd %float %889 %892
        %895 =   OpInBoundsAccessChain %_ptr_Private_float %31 %886 %uint_0
                 OpStore %895 %893
        %896 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %885
        %897 =   OpLoad %uint %896
        %898 =   OpIAdd %uint %897 %uint_1
        %899 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %885
                 OpStore %899 %898
        %902 =   OpIMul %uint %38 %uint_64
        %903 =   OpIAdd %uint %902 %uint_35
        %904 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %903
        %905 =   OpLoad %uint %904
        %906 =   OpBitwiseAnd %uint %905 %uint_3
        %908 =   OpInBoundsAccessChain %_ptr_Private_float %31 %906 %uint_0
        %909 =   OpLoad %float %908
        %911 =   OpInBoundsAccessChain %_ptr_Private_float %27 %905 %uint_3
        %912 =   OpLoad %float %911
        %913 =   OpFAdd %float %909 %912
        %915 =   OpInBoundsAccessChain %_ptr_Private_float %31 %906 %uint_0
                 OpStore %915 %913
        %916 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %905
        %917 =   OpLoad %uint %916
        %918 =   OpIAdd %uint %917 %uint_1
        %919 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %905
                 OpStore %919 %918
        %922 =   OpIMul %uint %38 %uint_64
        %923 =   OpIAdd %uint %922 %uint_36
        %924 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %923
        %925 =   OpLoad %uint %924
        %926 =   OpBitwiseAnd %uint %925 %uint_3
        %928 =   OpInBoundsAccessChain %_ptr_Private_float %31 %926 %uint_1
        %929 =   OpLoad %float %928
        %931 =   OpInBoundsAccessChain %_ptr_Private_float %27 %925 %uint_0
        %932 =   OpLoad %float %931
        %933 =   OpFAdd %float %929 %932
        %935 =   OpInBoundsAccessChain %_ptr_Private_float %31 %926 %uint_1
                 OpStore %935 %933
        %936 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %925
        %937 =   OpLoad %uint %936
        %938 =   OpIAdd %uint %937 %uint_1
        %939 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %925
                 OpStore %939 %938
        %942 =   OpIMul %uint %38 %uint_64
        %943 =   OpIAdd %uint %942 %uint_37
        %944 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %943
        %945 =   OpLoad %uint %944
        %946 =   OpBitwiseAnd %uint %945 %uint_3
        %948 =   OpInBoundsAccessChain %_ptr_Private_float %31 %946 %uint_1
        %949 =   OpLoad %float %948
        %951 =   OpInBoundsAccessChain %_ptr_Private_float %27 %945 %uint_1
        %952 =   OpLoad %float %951
        %953 =   OpFAdd %float %949 %952
        %955 =   OpInBoundsAccessChain %_ptr_Private_float %31 %946 %uint_1
                 OpStore %955 %953
        %956 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %945
        %957 =   OpLoad %uint %956
        %958 =   OpIAdd %uint %957 %uint_1
        %959 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %945
                 OpStore %959 %958
        %962 =   OpIMul %uint %38 %uint_64
        %963 =   OpIAdd %uint %962 %uint_38
        %964 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %963
        %965 =   OpLoad %uint %964
        %966 =   OpBitwiseAnd %uint %965 %uint_3
        %968 =   OpInBoundsAccessChain %_ptr_Private_float %31 %966 %uint_1
        %969 =   OpLoad %float %968
        %971 =   OpInBoundsAccessChain %_ptr_Private_float %27 %965 %uint_2
        %972 =   OpLoad %float %971
        %973 =   OpFAdd %float %969 %972
        %975 =   OpInBoundsAccessChain %_ptr_Private_float %31 %966 %uint_1
                 OpStore %975 %973
        %976 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %965
        %977 =   OpLoad %uint %976
        %978 =   OpIAdd %uint %977 %uint_1
        %979 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %965
                 OpStore %979 %978
        %982 =   OpIMul %uint %38 %uint_64
        %983 =   OpIAdd %uint %982 %uint_39
        %984 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %983
        %985 =   OpLoad %uint %984
        %986 =   OpBitwiseAnd %uint %985 %uint_3
        %988 =   OpInBoundsAccessChain %_ptr_Private_float %31 %986 %uint_1
        %989 =   OpLoad %float %988
        %991 =   OpInBoundsAccessChain %_ptr_Private_float %27 %985 %uint_3
        %992 =   OpLoad %float %991
        %993 =   OpFAdd %float %989 %992
        %995 =   OpInBoundsAccessChain %_ptr_Private_float %31 %986 %uint_1
                 OpStore %995 %993
        %996 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %985
        %997 =   OpLoad %uint %996
        %998 =   OpIAdd %uint %997 %uint_1
        %999 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %985
                 OpStore %999 %998
       %1002 =   OpIMul %uint %38 %uint_64
       %1003 =   OpIAdd %uint %1002 %uint_40
       %1004 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1003
       %1005 =   OpLoad %uint %1004
       %1006 =   OpBitwiseAnd %uint %1005 %uint_3
       %1008 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1006 %uint_2
       %1009 =   OpLoad %float %1008
       %1011 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1005 %uint_0
       %1012 =   OpLoad %float %1011
       %1013 =   OpFAdd %float %1009 %1012
       %1015 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1006 %uint_2
                 OpStore %1015 %1013
       %1016 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1005
       %1017 =   OpLoad %uint %1016
       %1018 =   OpIAdd %uint %1017 %uint_1
       %1019 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1005
                 OpStore %1019 %1018
       %1022 =   OpIMul %uint %38 %uint_64
       %1023 =   OpIAdd %uint %1022 %uint_41
       %1024 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1023
       %1025 =   OpLoad %uint %1024
       %1026 =   OpBitwiseAnd %uint %1025 %uint_3
       %1028 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1026 %uint_2
       %1029 =   OpLoad %float %1028
       %1031 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1025 %uint_1
       %1032 =   OpLoad %float %1031
       %1033 =   OpFAdd %float %1029 %1032
       %1035 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1026 %uint_2
                 OpStore %1035 %1033
       %1036 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1025
       %1037 =   OpLoad %uint %1036
       %1038 =   OpIAdd %uint %1037 %uint_1
       %1039 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1025
                 OpStore %1039 %1038
       %1042 =   OpIMul %uint %38 %uint_64
       %1043 =   OpIAdd %uint %1042 %uint_42
       %1044 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1043
       %1045 =   OpLoad %uint %1044
       %1046 =   OpBitwiseAnd %uint %1045 %uint_3
       %1048 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1046 %uint_2
       %1049 =   OpLoad %float %1048
       %1051 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1045 %uint_2
       %1052 =   OpLoad %float %1051
       %1053 =   OpFAdd %float %1049 %1052
       %1055 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1046 %uint_2
                 OpStore %1055 %1053
       %1056 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1045
       %1057 =   OpLoad %uint %1056
       %1058 =   OpIAdd %uint %1057 %uint_1
       %1059 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1045
                 OpStore %1059 %1058
       %1062 =   OpIMul %uint %38 %uint_64
       %1063 =   OpIAdd %uint %1062 %uint_43
       %1064 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1063
       %1065 =   OpLoad %uint %1064
       %1066 =   OpBitwiseAnd %uint %1065 %uint_3
       %1068 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1066 %uint_2
       %1069 =   OpLoad %float %1068
       %1071 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1065 %uint_3
       %1072 =   OpLoad %float %1071
       %1073 =   OpFAdd %float %1069 %1072
       %1075 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1066 %uint_2
                 OpStore %1075 %1073
       %1076 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1065
       %1077 =   OpLoad %uint %1076
       %1078 =   OpIAdd %uint %1077 %uint_1
       %1079 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1065
                 OpStore %1079 %1078
       %1082 =   OpIMul %uint %38 %uint_64
       %1083 =   OpIAdd %uint %1082 %uint_44
       %1084 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1083
       %1085 =   OpLoad %uint %1084
       %1086 =   OpBitwiseAnd %uint %1085 %uint_3
       %1088 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1086 %uint_3
       %1089 =   OpLoad %float %1088
       %1091 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1085 %uint_0
       %1092 =   OpLoad %float %1091
       %1093 =   OpFAdd %float %1089 %1092
       %1095 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1086 %uint_3
                 OpStore %1095 %1093
       %1096 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1085
       %1097 =   OpLoad %uint %1096
       %1098 =   OpIAdd %uint %1097 %uint_1
       %1099 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1085
                 OpStore %1099 %1098
       %1102 =   OpIMul %uint %38 %uint_64
       %1103 =   OpIAdd %uint %1102 %uint_45
       %1104 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1103
       %1105 =   OpLoad %uint %1104
       %1106 =   OpBitwiseAnd %uint %1105 %uint_3
       %1108 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1106 %uint_3
       %1109 =   OpLoad %float %1108
       %1111 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1105 %uint_1
       %1112 =   OpLoad %float %1111
       %1113 =   OpFAdd %float %1109 %1112
       %1115 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1106 %uint_3
                 OpStore %1115 %1113
       %1116 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1105
       %1117 =   OpLoad %uint %1116
       %1118 =   OpIAdd %uint %1117 %uint_1
       %1119 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1105
                 OpStore %1119 %1118
       %1122 =   OpIMul %uint %38 %uint_64
       %1123 =   OpIAdd %uint %1122 %uint_46
       %1124 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1123
       %1125 =   OpLoad %uint %1124
       %1126 =   OpBitwiseAnd %uint %1125 %uint_3
       %1128 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1126 %uint_3
       %1129 =   OpLoad %float %1128
       %1131 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1125 %uint_2
       %1132 =   OpLoad %float %1131
       %1133 =   OpFAdd %float %1129 %1132
       %1135 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1126 %uint_3
                 OpStore %1135 %1133
       %1136 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1125
       %1137 =   OpLoad %uint %1136
       %1138 =   OpIAdd %uint %1137 %uint_1
       %1139 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1125
                 OpStore %1139 %1138
       %1142 =   OpIMul %uint %38 %uint_64
       %1143 =   OpIAdd %uint %1142 %uint_47
       %1144 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1143
       %1145 =   OpLoad %uint %1144
       %1146 =   OpBitwiseAnd %uint %1145 %uint_3
       %1148 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1146 %uint_3
       %1149 =   OpLoad %float %1148
       %1151 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1145 %uint_3
       %1152 =   OpLoad %float %1151
       %1153 =   OpFAdd %float %1149 %1152
       %1155 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1146 %uint_3
                 OpStore %1155 %1153
       %1156 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1145
       %1157 =   OpLoad %uint %1156
       %1158 =   OpIAdd %uint %1157 %uint_1
       %1159 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1145
                 OpStore %1159 %1158
       %1162 =   OpIMul %uint %38 %uint_64
       %1163 =   OpIAdd %uint %1162 %uint_48
       %1164 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1163
       %1165 =   OpLoad %uint %1164
       %1166 =   OpBitwiseAnd %uint %1165 %uint_3
       %1168 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1166 %uint_0
       %1169 =   OpLoad %float %1168
       %1171 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1165 %uint_0
       %1172 =   OpLoad %float %1171
       %1173 =   OpFAdd %float %1169 %1172
       %1175 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1166 %uint_0
                 OpStore %1175 %1173
       %1176 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1165
       %1177 =   OpLoad %uint %1176
       %1178 =   OpIAdd %uint %1177 %uint_1
       %1179 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1165
                 OpStore %1179 %1178
       %1182 =   OpIMul %uint %38 %uint_64
       %1183 =   OpIAdd %uint %1182 %uint_49
       %1184 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1183
       %1185 =   OpLoad %uint %1184
       %1186 =   OpBitwiseAnd %uint %1185 %uint_3
       %1188 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1186 %uint_0
       %1189 =   OpLoad %float %1188
       %1191 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1185 %uint_1
       %1192 =   OpLoad %float %1191
       %1193 =   OpFAdd %float %1189 %1192
       %1195 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1186 %uint_0
                 OpStore %1195 %1193
       %1196 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1185
       %1197 =   OpLoad %uint %1196
       %1198 =   OpIAdd %uint %1197 %uint_1
       %1199 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1185
                 OpStore %1199 %1198
       %1202 =   OpIMul %uint %38 %uint_64
       %1203 =   OpIAdd %uint %1202 %uint_50
       %1204 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1203
       %1205 =   OpLoad %uint %1204
       %1206 =   OpBitwiseAnd %uint %1205 %uint_3
       %1208 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1206 %uint_0
       %1209 =   OpLoad %float %1208
       %1211 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1205 %uint_2
       %1212 =   OpLoad %float %1211
       %1213 =   OpFAdd %float %1209 %1212
       %1215 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1206 %uint_0
                 OpStore %1215 %1213
       %1216 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1205
       %1217 =   OpLoad %uint %1216
       %1218 =   OpIAdd %uint %1217 %uint_1
       %1219 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1205
                 OpStore %1219 %1218
       %1222 =   OpIMul %uint %38 %uint_64
       %1223 =   OpIAdd %uint %1222 %uint_51
       %1224 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1223
       %1225 =   OpLoad %uint %1224
       %1226 =   OpBitwiseAnd %uint %1225 %uint_3
       %1228 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1226 %uint_0
       %1229 =   OpLoad %float %1228
       %1231 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1225 %uint_3
       %1232 =   OpLoad %float %1231
       %1233 =   OpFAdd %float %1229 %1232
       %1235 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1226 %uint_0
                 OpStore %1235 %1233
       %1236 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1225
       %1237 =   OpLoad %uint %1236
       %1238 =   OpIAdd %uint %1237 %uint_1
       %1239 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1225
                 OpStore %1239 %1238
       %1242 =   OpIMul %uint %38 %uint_64
       %1243 =   OpIAdd %uint %1242 %uint_52
       %1244 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1243
       %1245 =   OpLoad %uint %1244
       %1246 =   OpBitwiseAnd %uint %1245 %uint_3
       %1248 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1246 %uint_1
       %1249 =   OpLoad %float %1248
       %1251 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1245 %uint_0
       %1252 =   OpLoad %float %1251
       %1253 =   OpFAdd %float %1249 %1252
       %1255 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1246 %uint_1
                 OpStore %1255 %1253
       %1256 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1245
       %1257 =   OpLoad %uint %1256
       %1258 =   OpIAdd %uint %1257 %uint_1
       %1259 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1245
                 OpStore %1259 %1258
       %1262 =   OpIMul %uint %38 %uint_64
       %1263 =   OpIAdd %uint %1262 %uint_53
       %1264 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1263
       %1265 =   OpLoad %uint %1264
       %1266 =   OpBitwiseAnd %uint %1265 %uint_3
       %1268 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1266 %uint_1
       %1269 =   OpLoad %float %1268
       %1271 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1265 %uint_1
       %1272 =   OpLoad %float %1271
       %1273 =   OpFAdd %float %1269 %1272
       %1275 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1266 %uint_1
                 OpStore %1275 %1273
       %1276 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1265
       %1277 =   OpLoad %uint %1276
       %1278 =   OpIAdd %uint %1277 %uint_1
       %1279 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1265
                 OpStore %1279 %1278
       %1282 =   OpIMul %uint %38 %uint_64
       %1283 =   OpIAdd %uint %1282 %uint_54
       %1284 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1283
       %1285 =   OpLoad %uint %1284
       %1286 =   OpBitwiseAnd %uint %1285 %uint_3
       %1288 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1286 %uint_1
       %1289 =   OpLoad %float %1288
       %1291 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1285 %uint_2
       %1292 =   OpLoad %float %1291
       %1293 =   OpFAdd %float %1289 %1292
       %1295 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1286 %uint_1
                 OpStore %1295 %1293
       %1296 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1285
       %1297 =   OpLoad %uint %1296
       %1298 =   OpIAdd %uint %1297 %uint_1
       %1299 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1285
                 OpStore %1299 %1298
       %1302 =   OpIMul %uint %38 %uint_64
       %1303 =   OpIAdd %uint %1302 %uint_55
       %1304 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1303
       %1305 =   OpLoad %uint %1304
       %1306 =   OpBitwiseAnd %uint %1305 %uint_3
       %1308 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1306 %uint_1
       %1309 =   OpLoad %float %1308
       %1311 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1305 %uint_3
       %1312 =   OpLoad %float %1311
       %1313 =   OpFAdd %float %1309 %1312
       %1315 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1306 %uint_1
                 OpStore %1315 %1313
       %1316 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1305
       %1317 =   OpLoad %uint %1316
       %1318 =   OpIAdd %uint %1317 %uint_1
       %1319 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1305
                 OpStore %1319 %1318
       %1322 =   OpIMul %uint %38 %uint_64
       %1323 =   OpIAdd %uint %1322 %uint_56
       %1324 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1323
       %1325 =   OpLoad %uint %1324
       %1326 =   OpBitwiseAnd %uint %1325 %uint_3
       %1328 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1326 %uint_2
       %1329 =   OpLoad %float %1328
       %1331 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1325 %uint_0
       %1332 =   OpLoad %float %1331
       %1333 =   OpFAdd %float %1329 %1332
       %1335 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1326 %uint_2
                 OpStore %1335 %1333
       %1336 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1325
       %1337 =   OpLoad %uint %1336
       %1338 =   OpIAdd %uint %1337 %uint_1
       %1339 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1325
                 OpStore %1339 %1338
       %1342 =   OpIMul %uint %38 %uint_64
       %1343 =   OpIAdd %uint %1342 %uint_57
       %1344 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1343
       %1345 =   OpLoad %uint %1344
       %1346 =   OpBitwiseAnd %uint %1345 %uint_3
       %1348 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1346 %uint_2
       %1349 =   OpLoad %float %1348
       %1351 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1345 %uint_1
       %1352 =   OpLoad %float %1351
       %1353 =   OpFAdd %float %1349 %1352
       %1355 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1346 %uint_2
                 OpStore %1355 %1353
       %1356 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1345
       %1357 =   OpLoad %uint %1356
       %1358 =   OpIAdd %uint %1357 %uint_1
       %1359 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1345
                 OpStore %1359 %1358
       %1362 =   OpIMul %uint %38 %uint_64
       %1363 =   OpIAdd %uint %1362 %uint_58
       %1364 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1363
       %1365 =   OpLoad %uint %1364
       %1366 =   OpBitwiseAnd %uint %1365 %uint_3
       %1368 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1366 %uint_2
       %1369 =   OpLoad %float %1368
       %1371 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1365 %uint_2
       %1372 =   OpLoad %float %1371
       %1373 =   OpFAdd %float %1369 %1372
       %1375 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1366 %uint_2
                 OpStore %1375 %1373
       %1376 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1365
       %1377 =   OpLoad %uint %1376
       %1378 =   OpIAdd %uint %1377 %uint_1
       %1379 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1365
                 OpStore %1379 %1378
       %1382 =   OpIMul %uint %38 %uint_64
       %1383 =   OpIAdd %uint %1382 %uint_59
       %1384 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1383
       %1385 =   OpLoad %uint %1384
       %1386 =   OpBitwiseAnd %uint %1385 %uint_3
       %1388 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1386 %uint_2
       %1389 =   OpLoad %float %1388
       %1391 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1385 %uint_3
       %1392 =   OpLoad %float %1391
       %1393 =   OpFAdd %float %1389 %1392
       %1395 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1386 %uint_2
                 OpStore %1395 %1393
       %1396 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1385
       %1397 =   OpLoad %uint %1396
       %1398 =   OpIAdd %uint %1397 %uint_1
       %1399 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1385
                 OpStore %1399 %1398
       %1402 =   OpIMul %uint %38 %uint_64
       %1403 =   OpIAdd %uint %1402 %uint_60
       %1404 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1403
       %1405 =   OpLoad %uint %1404
       %1406 =   OpBitwiseAnd %uint %1405 %uint_3
       %1408 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1406 %uint_3
       %1409 =   OpLoad %float %1408
       %1411 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1405 %uint_0
       %1412 =   OpLoad %float %1411
       %1413 =   OpFAdd %float %1409 %1412
       %1415 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1406 %uint_3
                 OpStore %1415 %1413
       %1416 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1405
       %1417 =   OpLoad %uint %1416
       %1418 =   OpIAdd %uint %1417 %uint_1
       %1419 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1405
                 OpStore %1419 %1418
       %1422 =   OpIMul %uint %38 %uint_64
       %1423 =   OpIAdd %uint %1422 %uint_61
       %1424 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1423
       %1425 =   OpLoad %uint %1424
       %1426 =   OpBitwiseAnd %uint %1425 %uint_3
       %1428 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1426 %uint_3
       %1429 =   OpLoad %float %1428
       %1431 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1425 %uint_1
       %1432 =   OpLoad %float %1431
       %1433 =   OpFAdd %float %1429 %1432
       %1435 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1426 %uint_3
                 OpStore %1435 %1433
       %1436 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1425
       %1437 =   OpLoad %uint %1436
       %1438 =   OpIAdd %uint %1437 %uint_1
       %1439 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1425
                 OpStore %1439 %1438
       %1442 =   OpIMul %uint %38 %uint_64
       %1443 =   OpIAdd %uint %1442 %uint_62
       %1444 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1443
       %1445 =   OpLoad %uint %1444
       %1446 =   OpBitwiseAnd %uint %1445 %uint_3
       %1448 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1446 %uint_3
       %1449 =   OpLoad %float %1448
       %1451 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1445 %uint_2
       %1452 =   OpLoad %float %1451
       %1453 =   OpFAdd %float %1449 %1452
       %1455 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1446 %uint_3
                 OpStore %1455 %1453
       %1456 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1445
       %1457 =   OpLoad %uint %1456
       %1458 =   OpIAdd %uint %1457 %uint_1
       %1459 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1445
                 OpStore %1459 %1458
       %1462 =   OpIMul %uint %38 %uint_64
       %1463 =   OpIAdd %uint %1462 %uint_63
       %1464 =   OpAccessChain %_ptr_StorageBuffer_uint %14 %uint_0 %1463
       %1465 =   OpLoad %uint %1464
       %1466 =   OpBitwiseAnd %uint %1465 %uint_3
       %1468 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1466 %uint_3
       %1469 =   OpLoad %float %1468
       %1471 =   OpInBoundsAccessChain %_ptr_Private_float %27 %1465 %uint_3
       %1472 =   OpLoad %float %1471
       %1473 =   OpFAdd %float %1469 %1472
       %1475 =   OpInBoundsAccessChain %_ptr_Private_float %31 %1466 %uint_3
                 OpStore %1475 %1473
       %1476 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1465
       %1477 =   OpLoad %uint %1476
       %1478 =   OpIAdd %uint %1477 %uint_1
       %1479 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %1465
                 OpStore %1479 %1478
       %1480 =   OpInBoundsAccessChain %_ptr_Private_uint %34 %uint_15
       %1481 =   OpLoad %uint %1480
       %1482 =   OpIAdd %uint %38 %1481
       %1483 =   OpInBoundsAccessChain %_ptr_Private_v4float %31 %uint_3
       %1484 =   OpLoad %v4float %1483
       %1486 =   OpCompositeExtract %float %1484 0
       %1487 =   OpCompositeExtract %float %1484 1
       %1488 =   OpCompositeExtract %float %1484 2
       %1489 =   OpCompositeExtract %float %1484 3
       %1490 =   OpBitcast %uint %1486
       %1491 =   OpBitcast %uint %1487
       %1492 =   OpBitcast %uint %1488
       %1493 =   OpBitcast %uint %1489
       %1494 =   OpCompositeConstruct %v4uint %1490 %1491 %1492 %1493
       %1495 =   OpAccessChain %_ptr_StorageBuffer_v4uint %18 %uint_0 %1482
                 OpStore %1495 %1494 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_misc_scratch.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uvec4 _m0[];
} _10;

layout(set = 0, binding = 1, std430) restrict readonly buffer _12_14
{
    uint _m0[];
} _14;

layout(set = 0, binding = 0, std430) writeonly buffer _16_18
{
    uvec4 _m0[];
} _18;

vec4 _27[16];
vec4 _31[4];
uint _34[16];

void main()
{
    _31[0u] = vec4(1.0, 2.0, 3.0, 4.0);
    _34[0u] = 4294967295u;
    _31[1u] = vec4(1.0, 2.0, 3.0, 4.0);
    _34[1u] = 4294967295u;
    _31[2u] = vec4(1.0, 2.0, 3.0, 4.0);
    _34[2u] = 4294967295u;
    _31[3u] = vec4(1.0, 2.0, 3.0, 4.0);
    _34[3u] = 4294967295u;
    _27[0u] = vec4(uintBitsToFloat(_10._m0[0u]));
    _27[1u] = vec4(uintBitsToFloat(_10._m0[1u]));
    _27[2u] = vec4(uintBitsToFloat(_10._m0[2u]));
    _27[3u] = vec4(uintBitsToFloat(_10._m0[3u]));
    _27[4u] = vec4(uintBitsToFloat(_10._m0[4u]));
    _27[5u] = vec4(uintBitsToFloat(_10._m0[5u]));
    _27[6u] = vec4(uintBitsToFloat(_10._m0[6u]));
    _27[7u] = vec4(uintBitsToFloat(_10._m0[7u]));
    _27[8u] = vec4(uintBitsToFloat(_10._m0[8u]));
    _27[9u] = vec4(uintBitsToFloat(_10._m0[9u]));
    _27[10u] = vec4(uintBitsToFloat(_10._m0[10u]));
    _27[11u] = vec4(uintBitsToFloat(_10._m0[11u]));
    _27[12u] = vec4(uintBitsToFloat(_10._m0[12u]));
    _27[13u] = vec4(uintBitsToFloat(_10._m0[13u]));
    _27[14u] = vec4(uintBitsToFloat(_10._m0[14u]));
    _27[15u] = vec4(uintBitsToFloat(_10._m0[15u]));
    uint _216 = gl_GlobalInvocationID.x * 64u;
    uint _221 = _14._m0[_216] & 3u;
    _31[_221].x += _27[_14._m0[_216]].x;
    _34[_14._m0[_216]]++;
    uint _238 = (gl_GlobalInvocationID.x * 64u) + 1u;
    uint _241 = _14._m0[_238] & 3u;
    _31[_241].x += _27[_14._m0[_238]].y;
    _34[_14._m0[_238]]++;
    uint _257 = (gl_GlobalInvocationID.x * 64u) + 2u;
    uint _260 = _14._m0[_257] & 3u;
    _31[_260].x += _27[_14._m0[_257]].z;
    _34[_14._m0[_257]]++;
    uint _276 = (gl_GlobalInvocationID.x * 64u) + 3u;
    uint _279 = _14._m0[_276] & 3u;
    _31[_279].x += _27[_14._m0[_276]].w;
    _34[_14._m0[_276]]++;
    uint _295 = (gl_GlobalInvocationID.x * 64u) + 4u;
    uint _298 = _14._m0[_295] & 3u;
    _31[_298].y += _27[_14._m0[_295]].x;
    _34[_14._m0[_295]]++;
    uint _314 = (gl_GlobalInvocationID.x * 64u) + 5u;
    uint _317 = _14._m0[_314] & 3u;
    _31[_317].y += _27[_14._m0[_314]].y;
    _34[_14._m0[_314]]++;
    uint _333 = (gl_GlobalInvocationID.x * 64u) + 6u;
    uint _336 = _14._m0[_333] & 3u;
    _31[_336].y += _27[_14._m0[_333]].z;
    _34[_14._m0[_333]]++;
    uint _352 = (gl_GlobalInvocationID.x * 64u) + 7u;
    uint _355 = _14._m0[_352] & 3u;
    _31[_355].y += _27[_14._m0[_352]].w;
    _34[_14._m0[_352]]++;
    uint _371 = (gl_GlobalInvocationID.x * 64u) + 8u;
    uint _374 = _14._m0[_371] & 3u;
    _31[_374].z += _27[_14._m0[_371]].x;
    _34[_14._m0[_371]]++;
    uint _390 = (gl_GlobalInvocationID.x * 64u) + 9u;
    uint _393 = _14._m0[_390] & 3u;
    _31[_393].z += _27[_14._m0[_390]].y;
    _34[_14._m0[_390]]++;
    uint _409 = (gl_GlobalInvocationID.x * 64u) + 10u;
    uint _412 = _14._m0[_409] & 3u;
    _31[_412].z += _27[_14._m0[_409]].z;
    _34[_14._m0[_409]]++;
    uint _428 = (gl_GlobalInvocationID.x * 64u) + 11u;
    uint _431 = _14._m0[_428] & 3u;
    _31[_431].z += _27[_14._m0[_428]].w;
    _34[_14._m0[_428]]++;
    uint _447 = (gl_GlobalInvocationID.x * 64u) + 12u;
    uint _450 = _14._m0[_447] & 3u;
    _31[_450].w += _27[_14._m0[_447]].x;
    _34[_14._m0[_447]]++;
    uint _466 = (gl_GlobalInvocationID.x * 64u) + 13u;
    uint _469 = _14._m0[_466] & 3u;
    _31[_469].w += _27[_14._m0[_466]].y;
    _34[_14._m0[_466]]++;
    uint _485 = (gl_GlobalInvocationID.x * 64u) + 14u;
    uint _488 = _14._m0[_485] & 3u;
    _31[_488].w += _27[_14._m0[_485]].z;
    _34[_14._m0[_485]]++;
    uint _504 = (gl_GlobalInvocationID.x * 64u) + 15u;
    uint _507 = _14._m0[_504] & 3u;
    _31[_507].w += _27[_14._m0[_504]].w;
    _34[_14._m0[_504]]++;
    uint _523 = (gl_GlobalInvocationID.x * 64u) + 16u;
    uint _526 = _14._m0[_523] & 3u;
    _31[_526].x += _27[_14._m0[_523]].x;
    _34[_14._m0[_523]]++;
    uint _543 = (gl_GlobalInvocationID.x * 64u) + 17u;
    uint _546 = _14._m0[_543] & 3u;
    _31[_546].x += _27[_14._m0[_543]].y;
    _34[_14._m0[_543]]++;
    uint _563 = (gl_GlobalInvocationID.x * 64u) + 18u;
    uint _566 = _14._m0[_563] & 3u;
    _31[_566].x += _27[_14._m0[_563]].z;
    _34[_14._m0[_563]]++;
    uint _583 = (gl_GlobalInvocationID.x * 64u) + 19u;
    uint _586 = _14._m0[_583] & 3u;
    _31[_586].x += _27[_14._m0[_583]].w;
    _34[_14._m0[_583]]++;
    uint _603 = (gl_GlobalInvocationID.x * 64u) + 20u;
    uint _606 = _14._m0[_603] & 3u;
    _31[_606].y += _27[_14._m0[_603]].x;
    _34[_14._m0[_603]]++;
    uint _623 = (gl_GlobalInvocationID.x * 64u) + 21u;
    uint _626 = _14._m0[_623] & 3u;
    _31[_626].y += _27[_14._m0[_623]].y;
    _34[_14._m0[_623]]++;
    uint _643 = (gl_GlobalInvocationID.x * 64u) + 22u;
    uint _646 = _14._m0[_643] & 3u;
    _31[_646].y += _27[_14._m0[_643]].z;
    _34[_14._m0[_643]]++;
    uint _663 = (gl_GlobalInvocationID.x * 64u) + 23u;
    uint _666 = _14._m0[_663] & 3u;
    _31[_666].y += _27[_14._m0[_663]].w;
    _34[_14._m0[_663]]++;
    uint _683 = (gl_GlobalInvocationID.x * 64u) + 24u;
    uint _686 = _14._m0[_683] & 3u;
    _31[_686].z += _27[_14._m0[_683]].x;
    _34[_14._m0[_683]]++;
    uint _703 = (gl_GlobalInvocationID.x * 64u) + 25u;
    uint _706 = _14._m0[_703] & 3u;
    _31[_706].z += _27[_14._m0[_703]].y;
    _34[_14._m0[_703]]++;
    uint _723 = (gl_GlobalInvocationID.x * 64u) + 26u;
    uint _726 = _14._m0[_723] & 3u;
    _31[_726].z += _27[_14._m0[_723]].z;
    _34[_14._m0[_723]]++;
    uint _743 = (gl_GlobalInvocationID.x * 64u) + 27u;
    uint _746 = _14._m0[_743] & 3u;
    _31[_746].z += _27[_14._m0[_743]].w;
    _34[_14._m0[_743]]++;
    uint _763 = (gl_GlobalInvocationID.x * 64u) + 28u;
    uint _766 = _14._m0[_763] & 3u;
    _31[_766].w += _27[_14._m0[_763]].x;
    _34[_14._m0[_763]]++;
    uint _783 = (gl_GlobalInvocationID.x * 64u) + 29u;
    uint _786 = _14._m0[_783] & 3u;
    _31[_786].w += _27[_14._m0[_783]].y;
    _34[_14._m0[_783]]++;
    uint _803 = (gl_GlobalInvocationID.x * 64u) + 30u;
    uint _806 = _14._m0[_803] & 3u;
    _31[_806].w += _27[_14._m0[_803]].z;
    _34[_14._m0[_803]]++;
    uint _823 = (gl_GlobalInvocationID.x * 64u) + 31u;
    uint _826 = _14._m0[_823] & 3u;
    _31[_826].w += _27[_14._m0[_823]].w;
    _34[_14._m0[_823]]++;
    uint _843 = (gl_GlobalInvocationID.x * 64u) + 32u;
    uint _846 = _14._m0[_843] & 3u;
    _31[_846].x += _27[_14._m0[_843]].x;
    _34[_14._m0[_843]]++;
    uint _863 = (gl_GlobalInvocationID.x * 64u) + 33u;
    uint _866 = _14._m0[_863] & 3u;
    _31[_866].x += _27[_14._m0[_863]].y;
    _34[_14._m0[_863]]++;
    uint _883 = (gl_GlobalInvocationID.x * 64u) + 34u;
    uint _886 = _14._m0[_883] & 3u;
    _31[_886].x += _27[_14._m0[_883]].z;
    _34[_14._m0[_883]]++;
    uint _903 = (gl_GlobalInvocationID.x * 64u) + 35u;
    uint _906 = _14._m0[_903] & 3u;
    _31[_906].x += _27[_14._m0[_903]].w;
    _34[_14._m0[_903]]++;
    uint _923 = (gl_GlobalInvocationID.x * 64u) + 36u;
    uint _926 = _14._m0[_923] & 3u;
    _31[_926].y += _27[_14._m0[_923]].x;
    _34[_14._m0[_923]]++;
    uint _943 = (gl_GlobalInvocationID.x * 64u) + 37u;
    uint _946 = _14._m0[_943] & 3u;
    _31[_946].y += _27[_14._m0[_943]].y;
    _34[_14._m0[_943]]++;
    uint _963 = (gl_GlobalInvocationID.x * 64u) + 38u;
    uint _966 = _14._m0[_963] & 3u;
    _31[_966].y += _27[_14._m0[_963]].z;
    _34[_14._m0[_963]]++;
    uint _983 = (gl_GlobalInvocationID.x * 64u) + 39u;
    uint _986 = _14._m0[_983] & 3u;
    _31[_986].y += _27[_14._m0[_983]].w;
    _34[_14._m0[_983]]++;
    uint _1003 = (gl_GlobalInvocationID.x * 64u) + 40u;
    uint _1006 = _14._m0[_1003] & 3u;
    _31[_1006].z += _27[_14._m0[_1003]].x;
    _34[_14._m0[_1003]]++;
    uint _1023 = (gl_GlobalInvocationID.x * 64u) + 41u;
    uint _1026 = _14._m0[_1023] & 3u;
    _31[_1026].z += _27[_14._m0[_1023]].y;
    _34[_14._m0[_1023]]++;
    uint _1043 = (gl_GlobalInvocationID.x * 64u) + 42u;
    uint _1046 = _14._m0[_1043] & 3u;
    _31[_1046].z += _27[_14._m0[_1043]].z;
    _34[_14._m0[_1043]]++;
    uint _1063 = (gl_GlobalInvocationID.x * 64u) + 43u;
    uint _1066 = _14._m0[_1063] & 3u;
    _31[_1066].z += _27[_14._m0[_1063]].w;
    _34[_14._m0[_1063]]++;
    uint _1083 = (gl_GlobalInvocationID.x * 64u) + 44u;
    uint _1086 = _14._m0[_1083] & 3u;
    _31[_1086].w += _27[_14._m0[_1083]].x;
    _34[_14._m0[_1083]]++;
    uint _1103 = (gl_GlobalInvocationID.x * 64u) + 45u;
    uint _1106 = _14._m0[_1103] & 3u;
    _31[_1106].w += _27[_14._m0[_1103]].y;
    _34[_14._m0[_1103]]++;
    uint _1123 = (gl_GlobalInvocationID.x * 64u) + 46u;
    uint _1126 = _14._m0[_1123] & 3u;
    _31[_1126].w += _27[_14._m0[_1123]].z;
    _34[_14._m0[_1123]]++;
    uint _1143 = (gl_GlobalInvocationID.x * 64u) + 47u;
    uint _1146 = _14._m0[_1143] & 3u;
    _31[_1146].w += _27[_14._m0[_1143]].w;
    _34[_14._m0[_1143]]++;
    uint _1163 = (gl_GlobalInvocationID.x * 64u) + 48u;
    uint _1166 = _14._m0[_1163] & 3u;
    _31[_1166].x += _27[_14._m0[_1163]].x;
    _34[_14._m0[_1163]]++;
    uint _1183 = (gl_GlobalInvocationID.x * 64u) + 49u;
    uint _1186 = _14._m0[_1183] & 3u;
    _31[_1186].x += _27[_14._m0[_1183]].y;
    _34[_14._m0[_1183]]++;
    uint _1203 = (gl_GlobalInvocationID.x * 64u) + 50u;
    uint _1206 = _14._m0[_1203] & 3u;
    _31[_1206].x += _27[_14._m0[_1203]].z;
    _34[_14._m0[_1203]]++;
    uint _1223 = (gl_GlobalInvocationID.x * 64u) + 51u;
    uint _1226 = _14._m0[_1223] & 3u;
    _31[_1226].x += _27[_14._m0[_1223]].w;
    _34[_14._m0[_1223]]++;
    uint _1243 = (gl_GlobalInvocationID.x * 64u) + 52u;
    uint _1246 = _14._m0[_1243] & 3u;
    _31[_1246].y += _27[_14._m0[_1243]].x;
    _34[_14._m0[_1243]]++;
    uint _1263 = (gl_GlobalInvocationID.x * 64u) + 53u;
    uint _1266 = _14._m0[_1263] & 3u;
    _31[_1266].y += _27[_14._m0[_1263]].y;
    _34[_14._m0[_1263]]++;
    uint _1283 = (gl_GlobalInvocationID.x * 64u) + 54u;
    uint _1286 = _14._m0[_1283] & 3u;
    _31[_1286].y += _27[_14._m0[_1283]].z;
    _34[_14._m0[_1283]]++;
    uint _1303 = (gl_GlobalInvocationID.x * 64u) + 55u;
    uint _1306 = _14._m0[_1303] & 3u;
    _31[_1306].y += _27[_14._m0[_1303]].w;
    _34[_14._m0[_1303]]++;
    uint _1323 = (gl_GlobalInvocationID.x * 64u) + 56u;
    uint _1326 = _14._m0[_1323] & 3u;
    _31[_1326].z += _27[_14._m0[_1323]].x;
    _34[_14._m0[_1323]]++;
    uint _1343 = (gl_GlobalInvocationID.x * 64u) + 57u;
    uint _1346 = _14._m0[_1343] & 3u;
    _31[_1346].z += _27[_14._m0[_1343]].y;
    _34[_14._m0[_1343]]++;
    uint _1363 = (gl_GlobalInvocationID.x * 64u) + 58u;
    uint _1366 = _14._m0[_1363] & 3u;
    _31[_1366].z += _27[_14._m0[_1363]].z;
    _34[_14._m0[_1363]]++;
    uint _1383 = (gl_GlobalInvocationID.x * 64u) + 59u;
    uint _1386 = _14._m0[_1383] & 3u;
    _31[_1386].z += _27[_14._m0[_1383]].w;
    _34[_14._m0[_1383]]++;
    uint _1403 = (gl_GlobalInvocationID.x * 64u) + 60u;
    uint _1406 = _14._m0[_1403] & 3u;
    _31[_1406].w += _27[_14._m0[_1403]].x;
    _34[_14._m0[_1403]]++;
    uint _1423 = (gl_GlobalInvocationID.x * 64u) + 61u;
    uint _1426 = _14._m0[_1423] & 3u;
    _31[_1426].w += _27[_14._m0[_1423]].y;
    _34[_14._m0[_1423]]++;
    uint _1443 = (gl_GlobalInvocationID.x * 64u) + 62u;
    uint _1446 = _14._m0[_1443] & 3u;
    _31[_1446].w += _27[_14._m0[_1443]].z;
    _34[_14._m0[_1443]]++;
    uint _1463 = (gl_GlobalInvocationID.x * 64u) + 63u;
    uint _1466 = _14._m0[_1463] & 3u;
    _31[_1466].w += _27[_14._m0[_1463]].w;
    _34[_14._m0[_1463]]++;
    _18._m0[gl_GlobalInvocationID.x + _34[15u]] = uvec4(floatBitsToUint(_31[3u].x), floatBitsToUint(_31[3u].y), floatBitsToUint(_31[3u].z), floatBitsToUint(_31[3u].w));
}


================================================
FILE: reference-dxbc/test_resource_rov.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpCapability FragmentShaderPixelInterlockEXT
               OpExtension "SPV_EXT_fragment_shader_interlock"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %_
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main PixelInterlockOrderedEXT
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %_ BuiltIn FragCoord
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
          %_ = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_float = OpTypePointer Input %float
       %uint = OpTypeInt 32 0
     %uint_1 = OpConstant %uint 1
     %uint_0 = OpConstant %uint 0
     %v2uint = OpTypeVector %uint 2
     %uint_5 = OpConstant %uint 5
    %float_2 = OpConstant %float 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %42

         %42 = OpLabel
         %12 =   OpLoad %6 %8
         %14 =   OpAccessChain %_ptr_Input_float %_ %uint_1
         %17 =   OpLoad %float %14
         %18 =   OpConvertFToS %uint %17
         %19 =   OpAccessChain %_ptr_Input_float %_ %uint_0
         %21 =   OpLoad %float %19
         %22 =   OpConvertFToS %uint %21
         %26 =   OpCompositeConstruct %v2uint %22 %18
                 OpBeginInvocationInterlockEXT
         %25 =   OpImageRead %v4float %12 %26 MakeTexelVisible|NonPrivateTexel %uint_5
         %28 =   OpCompositeExtract %float %25 0
         %29 =   OpCompositeExtract %float %25 1
         %30 =   OpCompositeExtract %float %25 2
         %31 =   OpCompositeExtract %float %25 3
         %32 =   OpCompositeConstruct %v4float %28 %29 %30 %31
         %33 =   OpFMul %float %28 %float_2
         %35 =   OpCompositeInsert %v4float %33 %32 0
         %36 =   OpCompositeExtract %float %35 0
         %37 =   OpCompositeExtract %float %35 1
         %38 =   OpCompositeExtract %float %35 2
         %39 =   OpCompositeExtract %float %35 3
         %40 =   OpCompositeConstruct %v2uint %22 %18
         %41 =   OpCompositeConstruct %v4float %36 %37 %38 %39
                 OpImageWrite %12 %40 %41 MakeTexelAvailable|NonPrivateTexel %uint_5
                 OpEndInvocationInterlockEXT
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_rov.glsl
================================================
GLSL:
#version 460
#ifdef GL_ARB_fragment_shader_interlock
#extension GL_ARB_fragment_shader_interlock : enable
#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
#define SPIRV_Cross_endInvocationInterlock()
#endif
#if defined(GL_ARB_fragment_shader_interlock)
layout(pixel_interlock_ordered) in;
#elif !defined(GL_INTEL_fragment_shader_ordering)
#error Fragment Shader Interlock/Ordering extension missing!
#endif

layout(set = 0, binding = 0, r32f) uniform image2D _8;

void main()
{
    uint _18 = uint(int(gl_FragCoord.y));
    uint _22 = uint(int(gl_FragCoord.x));
    SPIRV_Cross_beginInvocationInterlock();
    vec4 _25 = imageLoad(_8, ivec2(uvec2(_22, _18)));
    float _28 = _25.x;
    vec4 _32 = vec4(_28, _25.yzw);
    _32.x = _28 * 2.0;
    imageStore(_8, ivec2(uvec2(_22, _18)), vec4(_32));
    SPIRV_Cross_endInvocationInterlock();
}


================================================
FILE: reference-dxbc/test_resource_srv_buffer_load_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 50
; Schema: 0
               OpCapability Shader
               OpCapability SparseResidency
               OpCapability SampledBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
 %uint_12345 = OpConstant %uint 12345
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %48

         %48 = OpLabel
         %14 =   OpLoad %6 %8
         %18 =   OpImageSparseFetch %SparseTexel %14 %uint_12345
         %19 =   OpCompositeExtract %uint %18 0
         %20 =   OpCompositeExtract %v4float %18 1
         %21 =   OpCompositeExtract %float %20 0
         %22 =   OpCompositeExtract %float %20 1
         %23 =   OpCompositeExtract %float %20 2
         %24 =   OpCompositeExtract %float %20 3
         %26 =   OpCompositeConstruct %_ %21 %22 %23 %24 %19
         %27 =   OpCompositeExtract %uint %26 4
         %28 =   OpCompositeExtract %float %26 0
         %29 =   OpCompositeExtract %float %26 1
         %30 =   OpCompositeExtract %float %26 2
         %31 =   OpCompositeExtract %float %26 3
         %32 =   OpCompositeConstruct %v4float %28 %29 %30 %31
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %35 %28
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %29
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %30
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %31
         %44 =   OpImageSparseTexelsResident %bool %27
         %45 =   OpSelect %float %44 %float_1 %float_0
                 OpStore %SV_TARGET_1 %45
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_buffer_load_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _25
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _33
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0) uniform samplerBuffer _8;

layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    uint _50;
    vec4 _51;
    _50 = sparseTexelFetchARB(_8, int(12345u), _51);
    SparseTexel _18 = SparseTexel(_50, _51);
    vec4 _20 = _18._m1;
    _25 _26 = _25(_20.x, _20.y, _20.z, _20.w, _18._m0);
    float _28 = _26._m0;
    float _29 = _26._m1;
    float _30 = _26._m2;
    float _31 = _26._m3;
    SV_TARGET.x = _28;
    SV_TARGET.y = _29;
    SV_TARGET.z = _30;
    SV_TARGET.w = _31;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_26._m4)));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 53
; Schema: 0
               OpCapability Shader
               OpCapability Sampled1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %51

         %51 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %21 =   OpLoad %uint %19
         %26 =   OpCompositeConstruct %v2uint %21 %uint_2
         %24 =   OpImageFetch %v4float %17 %26 Lod %uint_1
         %27 =   OpCompositeExtract %float %24 0
         %28 =   OpCompositeExtract %float %24 1
         %29 =   OpCompositeExtract %float %24 2
         %30 =   OpCompositeExtract %float %24 3
         %33 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %33 %27
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %34 %28
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %35 %29
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %36 %30
         %41 =   OpCompositeConstruct %v2uint %21 %uint_2
         %40 =   OpImageFetch %v4float %17 %41 Lod|ConstOffset %uint_1 %int_n1
         %42 =   OpCompositeExtract %float %40 0
         %43 =   OpCompositeExtract %float %40 1
         %44 =   OpCompositeExtract %float %40 2
         %45 =   OpCompositeExtract %float %40 3
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %47 %42
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %48 %43
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %49 %44
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %50 %45
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture1DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _24 = texelFetch(_8, ivec2(uvec2(TEXCOORD.x, 2u)), int(1u));
    SV_TARGET.x = _24.x;
    SV_TARGET.y = _24.y;
    SV_TARGET.z = _24.z;
    SV_TARGET.w = _24.w;
    vec4 _40 = texelFetchOffset(_8, ivec2(uvec2(TEXCOORD.x, 2u)), int(1u), -1);
    SV_TARGET_1.x = _40.x;
    SV_TARGET_1.y = _40.y;
    SV_TARGET_1.z = _40.z;
    SV_TARGET_1.w = _40.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 25
; Schema: 0
               OpCapability Shader
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %v2uint = OpTypeVector %uint 2
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %uint %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %23

         %23 = OpLabel
         %14 =   OpLoad %6 %8
         %16 =   OpImageQuerySizeLod %v2uint %14 %uint_0
         %18 =   OpCompositeExtract %uint %16 0
         %19 =   OpCompositeExtract %uint %16 1
                 OpStore %SV_TARGET %18
                 OpStore %SV_TARGET_1 %19
         %22 =   OpImageQueryLevels %uint %14
                 OpStore %SV_TARGET_2 %22
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _20
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture1DArray _8;

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uvec2 _16 = uvec2(textureSize(_8, int(0u)));
    SV_TARGET = _16.x;
    SV_TARGET_1 = _16.y;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_array_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 145
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
         %38 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %143

        %143 = OpLabel
         %31 =   OpLoad %6 %8
         %32 =   OpLoad %9 %11
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %36 =   OpLoad %float %33
         %37 =   OpLoad %float %LAYER
         %39 =   OpSampledImage %38 %31 %32
         %41 =   OpImageQueryLod %v2float %39 %36
         %42 =   OpCompositeExtract %float %41 0
         %43 =   OpLoad %float %LOD_BIAS
         %44 =   OpLoad %float %LOD_CLAMP
         %47 =   OpCompositeConstruct %v2float %36 %37
         %46 =   OpImageSampleImplicitLod %v4float %39 %47 None
         %48 =   OpCompositeExtract %float %46 0
         %49 =   OpCompositeExtract %float %46 1
         %50 =   OpCompositeExtract %float %46 2
         %51 =   OpCompositeExtract %float %46 3
         %54 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %54 %48
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %55 %49
         %57 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %57 %50
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %59 %51
         %64 =   OpCompositeConstruct %v2float %36 %37
         %63 =   OpImageSampleImplicitLod %v4float %39 %64 ConstOffset %int_n1
         %65 =   OpCompositeExtract %float %63 0
         %66 =   OpCompositeExtract %float %63 1
         %67 =   OpCompositeExtract %float %63 2
         %68 =   OpCompositeExtract %float %63 3
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %70 %65
         %71 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %71 %66
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %72 %67
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %73 %68
         %75 =   OpCompositeConstruct %v2float %36 %37
         %74 =   OpImageSampleExplicitLod %v4float %39 %75 Lod %42
         %76 =   OpCompositeExtract %float %74 0
         %77 =   OpCompositeExtract %float %74 1
         %78 =   OpCompositeExtract %float %74 2
         %79 =   OpCompositeExtract %float %74 3
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %83 %78
         %84 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %84 %79
         %86 =   OpCompositeConstruct %v2float %36 %37
         %85 =   OpImageSampleImplicitLod %v4float %39 %86 Bias %43
         %87 =   OpCompositeExtract %float %85 0
         %88 =   OpCompositeExtract %float %85 1
         %89 =   OpCompositeExtract %float %85 2
         %90 =   OpCompositeExtract %float %85 3
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %95 %90
         %97 =   OpCompositeConstruct %v2float %36 %37
         %96 =   OpImageSampleImplicitLod %v4float %39 %97 MinLod %44
         %98 =   OpCompositeExtract %float %96 0
         %99 =   OpCompositeExtract %float %96 1
        %100 =   OpCompositeExtract %float %96 2
        %101 =   OpCompositeExtract %float %96 3
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %106 %101
        %108 =   OpCompositeConstruct %v2float %36 %37
        %107 =   OpImageSampleImplicitLod %v4float %39 %108 Bias|ConstOffset|MinLod %43 %int_n1 %44
        %109 =   OpCompositeExtract %float %107 0
        %110 =   OpCompositeExtract %float %107 1
        %111 =   OpCompositeExtract %float %107 2
        %112 =   OpCompositeExtract %float %107 3
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %114 %109
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %115 %110
        %116 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %116 %111
        %117 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %117 %112
        %118 =   OpLoad %float %TEXCOORD_2
        %119 =   OpDPdx %float %118
        %120 =   OpDPdy %float %118
        %122 =   OpCompositeConstruct %v2float %36 %37
        %121 =   OpImageSampleExplicitLod %v4float %39 %122 Grad %119 %120
        %123 =   OpCompositeExtract %float %121 0
        %124 =   OpCompositeExtract %float %121 1
        %125 =   OpCompositeExtract %float %121 2
        %126 =   OpCompositeExtract %float %121 3
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %128 %123
        %129 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %129 %124
        %130 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %130 %125
        %131 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %131 %126
        %133 =   OpCompositeConstruct %v2float %36 %37
        %132 =   OpImageSampleExplicitLod %v4float %39 %133 Grad|ConstOffset %119 %120 %int_n1
        %134 =   OpCompositeExtract %float %132 0
        %135 =   OpCompositeExtract %float %132 1
        %136 =   OpCompositeExtract %float %132 2
        %137 =   OpCompositeExtract %float %132 3
        %139 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %139 %134
        %140 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %140 %135
        %141 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %141 %136
        %142 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %142 %137
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_array_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform texture1DArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in float TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    vec4 _46 = texture(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER));
    SV_TARGET.x = _46.x;
    SV_TARGET.y = _46.y;
    SV_TARGET.z = _46.z;
    SV_TARGET.w = _46.w;
    vec4 _63 = textureOffset(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), -1);
    SV_TARGET_1.x = _63.x;
    SV_TARGET_1.y = _63.y;
    SV_TARGET_1.z = _63.z;
    SV_TARGET_1.w = _63.w;
    vec4 _74 = textureLod(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), textureQueryLod(sampler1DArray(_8, _11), TEXCOORD.x).x);
    SV_TARGET_2.x = _74.x;
    SV_TARGET_2.y = _74.y;
    SV_TARGET_2.z = _74.z;
    SV_TARGET_2.w = _74.w;
    vec4 _85 = texture(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), LOD_BIAS);
    SV_TARGET_3.x = _85.x;
    SV_TARGET_3.y = _85.y;
    SV_TARGET_3.z = _85.z;
    SV_TARGET_3.w = _85.w;
    vec4 _96 = textureClampARB(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), LOD_CLAMP);
    SV_TARGET_4.x = _96.x;
    SV_TARGET_4.y = _96.y;
    SV_TARGET_4.z = _96.z;
    SV_TARGET_4.w = _96.w;
    vec4 _107 = textureOffsetClampARB(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), -1, LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _107.x;
    SV_TARGET_5.y = _107.y;
    SV_TARGET_5.z = _107.z;
    SV_TARGET_5.w = _107.w;
    float _119 = dFdx(TEXCOORD_2);
    float _120 = dFdy(TEXCOORD_2);
    vec4 _121 = textureGrad(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), _119, _120);
    SV_TARGET_6.x = _121.x;
    SV_TARGET_6.y = _121.y;
    SV_TARGET_6.z = _121.z;
    SV_TARGET_6.w = _121.w;
    vec4 _132 = textureGradOffset(sampler1DArray(_8, _11), vec2(TEXCOORD.x, LAYER), _119, _120, -1);
    SV_TARGET_7.x = _132.x;
    SV_TARGET_7.y = _132.y;
    SV_TARGET_7.z = _132.z;
    SV_TARGET_7.w = _132.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 50
; Schema: 0
               OpCapability Shader
               OpCapability Sampled1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %48

         %48 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %21 =   OpLoad %uint %19
         %23 =   OpImageFetch %v4float %17 %21 Lod %uint_1
         %24 =   OpCompositeExtract %float %23 0
         %25 =   OpCompositeExtract %float %23 1
         %26 =   OpCompositeExtract %float %23 2
         %27 =   OpCompositeExtract %float %23 3
         %30 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %30 %24
         %31 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %31 %25
         %32 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %32 %26
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %34 %27
         %38 =   OpImageFetch %v4float %17 %21 Lod|ConstOffset %uint_1 %int_n1
         %39 =   OpCompositeExtract %float %38 0
         %40 =   OpCompositeExtract %float %38 1
         %41 =   OpCompositeExtract %float %38 2
         %42 =   OpCompositeExtract %float %38 3
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %44 %39
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %45 %40
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %46 %41
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %47 %42
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture1D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _23 = texelFetch(_8, int(TEXCOORD.x), int(1u));
    SV_TARGET.x = _23.x;
    SV_TARGET.y = _23.y;
    SV_TARGET.z = _23.z;
    SV_TARGET.w = _23.w;
    vec4 _38 = texelFetchOffset(_8, int(TEXCOORD.x), int(1u), -1);
    SV_TARGET_1.x = _38.x;
    SV_TARGET_1.y = _38.y;
    SV_TARGET_1.z = _38.z;
    SV_TARGET_1.w = _38.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
               OpCapability Shader
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %21

         %21 = OpLabel
         %14 =   OpLoad %6 %8
         %15 =   OpImageQuerySizeLod %uint %14 %uint_0
                 OpStore %SV_TARGET %15
                 OpStore %SV_TARGET_1 %uint_1
         %20 =   OpImageQueryLevels %uint %14
                 OpStore %SV_TARGET_2 %20
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _17
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture1D _8;

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    SV_TARGET = uint(textureSize(_8, int(0u)));
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 136
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
         %37 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %134

        %134 = OpLabel
         %31 =   OpLoad %6 %8
         %32 =   OpLoad %9 %11
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %36 =   OpLoad %float %33
         %38 =   OpSampledImage %37 %31 %32
         %40 =   OpImageQueryLod %v2float %38 %36
         %41 =   OpCompositeExtract %float %40 0
         %42 =   OpLoad %float %LOD_BIAS
         %43 =   OpLoad %float %LOD_CLAMP
         %45 =   OpImageSampleImplicitLod %v4float %38 %36 None
         %46 =   OpCompositeExtract %float %45 0
         %47 =   OpCompositeExtract %float %45 1
         %48 =   OpCompositeExtract %float %45 2
         %49 =   OpCompositeExtract %float %45 3
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %52 %46
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %53 %47
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %55 %48
         %57 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %57 %49
         %61 =   OpImageSampleImplicitLod %v4float %38 %36 ConstOffset %int_n1
         %62 =   OpCompositeExtract %float %61 0
         %63 =   OpCompositeExtract %float %61 1
         %64 =   OpCompositeExtract %float %61 2
         %65 =   OpCompositeExtract %float %61 3
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %67 %62
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %68 %63
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %69 %64
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %70 %65
         %71 =   OpImageSampleExplicitLod %v4float %38 %36 Lod %41
         %72 =   OpCompositeExtract %float %71 0
         %73 =   OpCompositeExtract %float %71 1
         %74 =   OpCompositeExtract %float %71 2
         %75 =   OpCompositeExtract %float %71 3
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %80 %75
         %81 =   OpImageSampleImplicitLod %v4float %38 %36 Bias %42
         %82 =   OpCompositeExtract %float %81 0
         %83 =   OpCompositeExtract %float %81 1
         %84 =   OpCompositeExtract %float %81 2
         %85 =   OpCompositeExtract %float %81 3
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %88 %83
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %90 %85
         %91 =   OpImageSampleImplicitLod %v4float %38 %36 MinLod %43
         %92 =   OpCompositeExtract %float %91 0
         %93 =   OpCompositeExtract %float %91 1
         %94 =   OpCompositeExtract %float %91 2
         %95 =   OpCompositeExtract %float %91 3
         %97 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %97 %92
         %98 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %98 %93
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %99 %94
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %100 %95
        %101 =   OpImageSampleImplicitLod %v4float %38 %36 Bias|ConstOffset|MinLod %42 %int_n1 %43
        %102 =   OpCompositeExtract %float %101 0
        %103 =   OpCompositeExtract %float %101 1
        %104 =   OpCompositeExtract %float %101 2
        %105 =   OpCompositeExtract %float %101 3
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %107 %102
        %108 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %108 %103
        %109 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %109 %104
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %110 %105
        %111 =   OpLoad %float %TEXCOORD_2
        %112 =   OpDPdx %float %111
        %113 =   OpDPdy %float %111
        %114 =   OpImageSampleExplicitLod %v4float %38 %36 Grad %112 %113
        %115 =   OpCompositeExtract %float %114 0
        %116 =   OpCompositeExtract %float %114 1
        %117 =   OpCompositeExtract %float %114 2
        %118 =   OpCompositeExtract %float %114 3
        %120 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %120 %115
        %121 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %121 %116
        %122 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %122 %117
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %123 %118
        %124 =   OpImageSampleExplicitLod %v4float %38 %36 Grad|ConstOffset %112 %113 %int_n1
        %125 =   OpCompositeExtract %float %124 0
        %126 =   OpCompositeExtract %float %124 1
        %127 =   OpCompositeExtract %float %124 2
        %128 =   OpCompositeExtract %float %124 3
        %130 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %130 %125
        %131 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %131 %126
        %132 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %132 %127
        %133 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %133 %128
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_1d_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform texture1D _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in float TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    vec4 _45 = texture(sampler1D(_8, _11), TEXCOORD.x);
    SV_TARGET.x = _45.x;
    SV_TARGET.y = _45.y;
    SV_TARGET.z = _45.z;
    SV_TARGET.w = _45.w;
    vec4 _61 = textureOffset(sampler1D(_8, _11), TEXCOORD.x, -1);
    SV_TARGET_1.x = _61.x;
    SV_TARGET_1.y = _61.y;
    SV_TARGET_1.z = _61.z;
    SV_TARGET_1.w = _61.w;
    vec4 _71 = textureLod(sampler1D(_8, _11), TEXCOORD.x, textureQueryLod(sampler1D(_8, _11), TEXCOORD.x).x);
    SV_TARGET_2.x = _71.x;
    SV_TARGET_2.y = _71.y;
    SV_TARGET_2.z = _71.z;
    SV_TARGET_2.w = _71.w;
    vec4 _81 = texture(sampler1D(_8, _11), TEXCOORD.x, LOD_BIAS);
    SV_TARGET_3.x = _81.x;
    SV_TARGET_3.y = _81.y;
    SV_TARGET_3.z = _81.z;
    SV_TARGET_3.w = _81.w;
    vec4 _91 = textureClampARB(sampler1D(_8, _11), TEXCOORD.x, LOD_CLAMP);
    SV_TARGET_4.x = _91.x;
    SV_TARGET_4.y = _91.y;
    SV_TARGET_4.z = _91.z;
    SV_TARGET_4.w = _91.w;
    vec4 _101 = textureOffsetClampARB(sampler1D(_8, _11), TEXCOORD.x, -1, LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _101.x;
    SV_TARGET_5.y = _101.y;
    SV_TARGET_5.z = _101.z;
    SV_TARGET_5.w = _101.w;
    float _112 = dFdx(TEXCOORD_2);
    float _113 = dFdy(TEXCOORD_2);
    vec4 _114 = textureGrad(sampler1D(_8, _11), TEXCOORD.x, _112, _113);
    SV_TARGET_6.x = _114.x;
    SV_TARGET_6.y = _114.y;
    SV_TARGET_6.z = _114.z;
    SV_TARGET_6.w = _114.w;
    vec4 _124 = textureGradOffset(sampler1D(_8, _11), TEXCOORD.x, _112, _113, -1);
    SV_TARGET_7.x = _124.x;
    SV_TARGET_7.y = _124.y;
    SV_TARGET_7.z = _124.z;
    SV_TARGET_7.w = _124.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 130
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %51 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %71 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %128

        %128 = OpLabel
         %30 =   OpLoad %6 %8
         %31 =   OpLoad %9 %11
         %32 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %35 =   OpLoad %float %32
         %36 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %38 =   OpLoad %float %36
         %41 =   OpLoad %float %LAYER
         %43 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %44 =   OpLoad %int %43
         %45 =   OpBitcast %uint %44
         %46 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %47 =   OpLoad %int %46
         %48 =   OpBitcast %uint %47
         %52 =   OpSampledImage %51 %30 %31
         %53 =   OpCompositeConstruct %v3float %35 %38 %41
         %54 =   OpImageGather %v4float %52 %53 %uint_0
         %55 =   OpCompositeExtract %float %54 0
         %56 =   OpCompositeExtract %float %54 1
         %57 =   OpCompositeExtract %float %54 2
         %58 =   OpCompositeExtract %float %54 3
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %61 %55
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %62 %56
         %63 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %63 %57
         %65 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %65 %58
         %67 =   OpCompositeConstruct %v3float %35 %38 %41
         %70 =   OpImageGather %v4float %52 %67 %uint_0 ConstOffset %71
         %72 =   OpCompositeExtract %float %70 0
         %73 =   OpCompositeExtract %float %70 1
         %74 =   OpCompositeExtract %float %70 2
         %75 =   OpCompositeExtract %float %70 3
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %80 %75
         %81 =   OpCompositeConstruct %v3float %35 %38 %41
         %82 =   OpImageGather %v4float %52 %81 %uint_1
         %83 =   OpCompositeExtract %float %82 0
         %84 =   OpCompositeExtract %float %82 1
         %85 =   OpCompositeExtract %float %82 2
         %86 =   OpCompositeExtract %float %82 3
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %88 %83
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %91 %86
         %92 =   OpCompositeConstruct %v3float %35 %38 %41
         %93 =   OpImageGather %v4float %52 %92 %uint_2
         %94 =   OpCompositeExtract %float %93 0
         %95 =   OpCompositeExtract %float %93 1
         %96 =   OpCompositeExtract %float %93 2
         %97 =   OpCompositeExtract %float %93 3
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %99 %94
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %100 %95
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %101 %96
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %102 %97
        %103 =   OpCompositeConstruct %v3float %35 %38 %41
        %104 =   OpImageGather %v4float %52 %103 %uint_3
        %105 =   OpCompositeExtract %float %104 0
        %106 =   OpCompositeExtract %float %104 1
        %107 =   OpCompositeExtract %float %104 2
        %108 =   OpCompositeExtract %float %104 3
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %110 %105
        %111 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %111 %106
        %112 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %112 %107
        %113 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %113 %108
        %114 =   OpCompositeConstruct %v3float %35 %38 %41
        %115 =   OpBitcast %int %45
        %116 =   OpBitcast %int %48
        %118 =   OpCompositeConstruct %v2int %115 %116
        %117 =   OpImageGather %v4float %52 %114 %uint_0 Offset %118
        %119 =   OpCompositeExtract %float %117 0
        %120 =   OpCompositeExtract %float %117 1
        %121 =   OpCompositeExtract %float %117 2
        %122 =   OpCompositeExtract %float %117 3
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %126 %121
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %127 %122
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_gather.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform texture2DArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;

void main()
{
    vec4 _54 = textureGather(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER));
    SV_TARGET.x = _54.x;
    SV_TARGET.y = _54.y;
    SV_TARGET.z = _54.z;
    SV_TARGET.w = _54.w;
    vec4 _70 = textureGatherOffset(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0));
    SV_TARGET_1.x = _70.x;
    SV_TARGET_1.y = _70.y;
    SV_TARGET_1.z = _70.z;
    SV_TARGET_1.w = _70.w;
    vec4 _82 = textureGather(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), int(1u));
    SV_TARGET_2.x = _82.x;
    SV_TARGET_2.y = _82.y;
    SV_TARGET_2.z = _82.z;
    SV_TARGET_2.w = _82.w;
    vec4 _93 = textureGather(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), int(2u));
    SV_TARGET_3.x = _93.x;
    SV_TARGET_3.y = _93.y;
    SV_TARGET_3.z = _93.z;
    SV_TARGET_3.w = _93.w;
    vec4 _104 = textureGather(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), int(3u));
    SV_TARGET_4.x = _104.x;
    SV_TARGET_4.y = _104.y;
    SV_TARGET_4.z = _104.z;
    SV_TARGET_4.w = _104.w;
    vec4 _117 = textureGatherOffset(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_5.x = _117.x;
    SV_TARGET_5.y = _117.y;
    SV_TARGET_5.z = _117.z;
    SV_TARGET_5.w = _117.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 96
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %49 = OpTypeImage %float 2D 1 1 0 1 Unknown
         %50 = OpTypeSampledImage %49
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %70 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %94

         %94 = OpLabel
         %27 =   OpLoad %6 %8
         %28 =   OpLoad %9 %11
         %29 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %32 =   OpLoad %float %29
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %35 =   OpLoad %float %33
         %38 =   OpLoad %float %DEPTH_REF
         %39 =   OpLoad %float %LAYER
         %41 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %42 =   OpLoad %int %41
         %43 =   OpBitcast %uint %42
         %44 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %45 =   OpLoad %int %44
         %46 =   OpBitcast %uint %45
         %51 =   OpSampledImage %50 %27 %28
         %52 =   OpCompositeConstruct %v3float %32 %35 %39
         %53 =   OpImageDrefGather %v4float %51 %52 %38
         %54 =   OpCompositeExtract %float %53 0
         %55 =   OpCompositeExtract %float %53 1
         %56 =   OpCompositeExtract %float %53 2
         %57 =   OpCompositeExtract %float %53 3
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %61 %55
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %62 %56
         %64 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %64 %57
         %66 =   OpCompositeConstruct %v3float %32 %35 %39
         %69 =   OpImageDrefGather %v4float %51 %66 %38 ConstOffset %70
         %71 =   OpCompositeExtract %float %69 0
         %72 =   OpCompositeExtract %float %69 1
         %73 =   OpCompositeExtract %float %69 2
         %74 =   OpCompositeExtract %float %69 3
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %76 %71
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %79 %74
         %80 =   OpCompositeConstruct %v3float %32 %35 %39
         %81 =   OpBitcast %int %43
         %82 =   OpBitcast %int %46
         %84 =   OpCompositeConstruct %v2int %81 %82
         %83 =   OpImageDrefGather %v4float %51 %80 %38 Offset %84
         %85 =   OpCompositeExtract %float %83 0
         %86 =   OpCompositeExtract %float %83 1
         %87 =   OpCompositeExtract %float %83 2
         %88 =   OpCompositeExtract %float %83 3
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %93 %88
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_gather_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform texture2DArray _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;

void main()
{
    vec3 _52 = vec3(TEXCOORD.x, TEXCOORD.y, LAYER);
    vec4 _53 = textureGather(sampler2DArrayShadow(_8, _11), _52, DEPTH_REF);
    SV_TARGET.x = _53.x;
    SV_TARGET.y = _53.y;
    SV_TARGET.z = _53.z;
    SV_TARGET.w = _53.w;
    vec3 _66 = vec3(TEXCOORD.x, TEXCOORD.y, LAYER);
    vec4 _69 = textureGatherOffset(sampler2DArrayShadow(_8, _11), _66, DEPTH_REF, ivec2(-1, 0));
    SV_TARGET_1.x = _69.x;
    SV_TARGET_1.y = _69.y;
    SV_TARGET_1.z = _69.z;
    SV_TARGET_1.w = _69.w;
    vec3 _80 = vec3(TEXCOORD.x, TEXCOORD.y, LAYER);
    vec4 _83 = textureGatherOffset(sampler2DArrayShadow(_8, _11), _80, DEPTH_REF, ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_2.x = _83.x;
    SV_TARGET_2.y = _83.y;
    SV_TARGET_2.z = _83.z;
    SV_TARGET_2.w = _83.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %47 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %57

         %57 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %24 =   OpLoad %uint %22
         %29 =   OpCompositeConstruct %v3uint %21 %24 %uint_2
         %28 =   OpImageFetch %v4float %17 %29 Lod %uint_1
         %30 =   OpCompositeExtract %float %28 0
         %31 =   OpCompositeExtract %float %28 1
         %32 =   OpCompositeExtract %float %28 2
         %33 =   OpCompositeExtract %float %28 3
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %36 %30
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %39 %33
         %45 =   OpCompositeConstruct %v3uint %21 %24 %uint_2
         %44 =   OpImageFetch %v4float %17 %45 Lod|ConstOffset %uint_1 %47
         %48 =   OpCompositeExtract %float %44 0
         %49 =   OpCompositeExtract %float %44 1
         %50 =   OpCompositeExtract %float %44 2
         %51 =   OpCompositeExtract %float %44 3
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %53 %48
         %54 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %54 %49
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %55 %50
         %56 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %56 %51
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture2DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _28 = texelFetch(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), int(1u));
    SV_TARGET.x = _28.x;
    SV_TARGET.y = _28.y;
    SV_TARGET.z = _28.z;
    SV_TARGET.w = _28.w;
    vec4 _44 = texelFetchOffset(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), int(1u), ivec2(-1, 0));
    SV_TARGET_1.x = _44.x;
    SV_TARGET_1.y = _44.y;
    SV_TARGET_1.z = _44.z;
    SV_TARGET_1.w = _44.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %v3uint = OpTypeVector %uint 3
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %30

         %30 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpImageQuerySizeLod %v3uint %16 %uint_0
         %20 =   OpCompositeExtract %uint %18 0
         %21 =   OpCompositeExtract %uint %18 1
         %22 =   OpCompositeConstruct %v2uint %20 %21
         %23 =   OpCompositeExtract %uint %18 2
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %26 %20
         %27 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %27 %21
                 OpStore %SV_TARGET_1 %23
         %29 =   OpImageQueryLevels %uint %16
                 OpStore %SV_TARGET_2 %29
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _24
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture2DArray _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uvec3 _18 = uvec3(textureSize(_8, int(0u)));
    uint _20 = _18.x;
    SV_TARGET.x = _20;
    SV_TARGET.y = _18.y;
    SV_TARGET_1 = _18.z;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 164
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %44 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %72 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %162

        %162 = OpLabel
         %33 =   OpLoad %6 %8
         %34 =   OpLoad %9 %11
         %35 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %38 =   OpLoad %float %35
         %39 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %41 =   OpLoad %float %39
         %43 =   OpLoad %float %LAYER
         %45 =   OpSampledImage %44 %33 %34
         %47 =   OpCompositeConstruct %v2float %38 %41
         %46 =   OpImageQueryLod %v2float %45 %47
         %48 =   OpCompositeExtract %float %46 0
         %49 =   OpLoad %float %LOD_BIAS
         %50 =   OpLoad %float %LOD_CLAMP
         %53 =   OpCompositeConstruct %v3float %38 %41 %43
         %52 =   OpImageSampleImplicitLod %v4float %45 %53 None
         %54 =   OpCompositeExtract %float %52 0
         %55 =   OpCompositeExtract %float %52 1
         %56 =   OpCompositeExtract %float %52 2
         %57 =   OpCompositeExtract %float %52 3
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %61 %55
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %62 %56
         %64 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %64 %57
         %70 =   OpCompositeConstruct %v3float %38 %41 %43
         %69 =   OpImageSampleImplicitLod %v4float %45 %70 ConstOffset %72
         %73 =   OpCompositeExtract %float %69 0
         %74 =   OpCompositeExtract %float %69 1
         %75 =   OpCompositeExtract %float %69 2
         %76 =   OpCompositeExtract %float %69 3
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %81 %76
         %83 =   OpCompositeConstruct %v3float %38 %41 %43
         %82 =   OpImageSampleExplicitLod %v4float %45 %83 Lod %48
         %84 =   OpCompositeExtract %float %82 0
         %85 =   OpCompositeExtract %float %82 1
         %86 =   OpCompositeExtract %float %82 2
         %87 =   OpCompositeExtract %float %82 3
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %92 %87
         %94 =   OpCompositeConstruct %v3float %38 %41 %43
         %93 =   OpImageSampleImplicitLod %v4float %45 %94 Bias %49
         %95 =   OpCompositeExtract %float %93 0
         %96 =   OpCompositeExtract %float %93 1
         %97 =   OpCompositeExtract %float %93 2
         %98 =   OpCompositeExtract %float %93 3
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %100 %95
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %101 %96
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %102 %97
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %103 %98
        %105 =   OpCompositeConstruct %v3float %38 %41 %43
        %104 =   OpImageSampleImplicitLod %v4float %45 %105 MinLod %50
        %106 =   OpCompositeExtract %float %104 0
        %107 =   OpCompositeExtract %float %104 1
        %108 =   OpCompositeExtract %float %104 2
        %109 =   OpCompositeExtract %float %104 3
        %111 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %111 %106
        %112 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %112 %107
        %113 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %113 %108
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %114 %109
        %116 =   OpCompositeConstruct %v3float %38 %41 %43
        %115 =   OpImageSampleImplicitLod %v4float %45 %116 Bias|ConstOffset|MinLod %49 %72 %50
        %117 =   OpCompositeExtract %float %115 0
        %118 =   OpCompositeExtract %float %115 1
        %119 =   OpCompositeExtract %float %115 2
        %120 =   OpCompositeExtract %float %115 3
        %122 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %122 %117
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %123 %118
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %127 =   OpLoad %float %126
        %128 =   OpDPdx %float %127
        %129 =   OpDPdy %float %127
        %130 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %131 =   OpLoad %float %130
        %132 =   OpDPdx %float %131
        %133 =   OpDPdy %float %131
        %137 =   OpCompositeConstruct %v3float %38 %41 %43
        %138 =   OpCompositeConstruct %v2float %128 %132
        %139 =   OpCompositeConstruct %v2float %129 %133
        %136 =   OpImageSampleExplicitLod %v4float %45 %137 Grad %138 %139
        %140 =   OpCompositeExtract %float %136 0
        %141 =   OpCompositeExtract %float %136 1
        %142 =   OpCompositeExtract %float %136 2
        %143 =   OpCompositeExtract %float %136 3
        %145 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %145 %140
        %146 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %146 %141
        %147 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %147 %142
        %148 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %148 %143
        %150 =   OpCompositeConstruct %v3float %38 %41 %43
        %151 =   OpCompositeConstruct %v2float %128 %132
        %152 =   OpCompositeConstruct %v2float %129 %133
        %149 =   OpImageSampleExplicitLod %v4float %45 %150 Grad|ConstOffset %151 %152 %72
        %153 =   OpCompositeExtract %float %149 0
        %154 =   OpCompositeExtract %float %149 1
        %155 =   OpCompositeExtract %float %149 2
        %156 =   OpCompositeExtract %float %149 3
        %158 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %158 %153
        %159 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %159 %154
        %160 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %160 %155
        %161 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %161 %156
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform texture2DArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    vec4 _52 = texture(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER));
    SV_TARGET.x = _52.x;
    SV_TARGET.y = _52.y;
    SV_TARGET.z = _52.z;
    SV_TARGET.w = _52.w;
    vec4 _69 = textureOffset(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0));
    SV_TARGET_1.x = _69.x;
    SV_TARGET_1.y = _69.y;
    SV_TARGET_1.z = _69.z;
    SV_TARGET_1.w = _69.w;
    vec4 _82 = textureLod(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), textureQueryLod(sampler2DArray(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y)).x);
    SV_TARGET_2.x = _82.x;
    SV_TARGET_2.y = _82.y;
    SV_TARGET_2.z = _82.z;
    SV_TARGET_2.w = _82.w;
    vec4 _93 = texture(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), LOD_BIAS);
    SV_TARGET_3.x = _93.x;
    SV_TARGET_3.y = _93.y;
    SV_TARGET_3.z = _93.z;
    SV_TARGET_3.w = _93.w;
    vec4 _104 = textureClampARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), LOD_CLAMP);
    SV_TARGET_4.x = _104.x;
    SV_TARGET_4.y = _104.y;
    SV_TARGET_4.z = _104.z;
    SV_TARGET_4.w = _104.w;
    vec4 _115 = textureOffsetClampARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0), LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _115.x;
    SV_TARGET_5.y = _115.y;
    SV_TARGET_5.z = _115.z;
    SV_TARGET_5.w = _115.w;
    float _128 = dFdx(TEXCOORD_2.x);
    float _129 = dFdy(TEXCOORD_2.x);
    float _132 = dFdx(TEXCOORD_2.y);
    float _133 = dFdy(TEXCOORD_2.y);
    vec4 _136 = textureGrad(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), vec2(_128, _132), vec2(_129, _133));
    SV_TARGET_6.x = _136.x;
    SV_TARGET_6.y = _136.y;
    SV_TARGET_6.z = _136.z;
    SV_TARGET_6.w = _136.w;
    vec4 _149 = textureGradOffset(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), vec2(_128, _132), vec2(_129, _133), ivec2(-1, 0));
    SV_TARGET_7.x = _149.x;
    SV_TARGET_7.y = _149.y;
    SV_TARGET_7.z = _149.z;
    SV_TARGET_7.w = _149.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %41 = OpTypeImage %float 2D 1 1 0 1 Unknown
         %42 = OpTypeSampledImage %41
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %56 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %63

         %63 = OpLabel
         %27 =   OpLoad %6 %8
         %28 =   OpLoad %9 %11
         %29 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %32 =   OpLoad %float %29
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %35 =   OpLoad %float %33
         %37 =   OpLoad %float %DEPTH_REF
         %38 =   OpLoad %float %LAYER
         %43 =   OpSampledImage %42 %27 %28
         %46 =   OpCompositeConstruct %v3float %32 %35 %38
         %45 =   OpImageSampleDrefImplicitLod %float %43 %46 %37 None
         %48 =   OpCompositeConstruct %v4float %45 %45 %45 %45
         %49 =   OpCompositeExtract %float %48 0
                 OpStore %SV_TARGET %49
         %54 =   OpCompositeConstruct %v3float %32 %35 %38
         %53 =   OpImageSampleDrefImplicitLod %float %43 %54 %37 ConstOffset %56
         %57 =   OpCompositeConstruct %v4float %53 %53 %53 %53
         %58 =   OpCompositeExtract %float %57 0
                 OpStore %SV_TARGET_1 %58
         %60 =   OpCompositeConstruct %v3float %32 %35 %38
         %59 =   OpImageSampleDrefExplicitLod %float %43 %60 %37 Lod %float_0
         %61 =   OpCompositeConstruct %v4float %59 %59 %59 %59
         %62 =   OpCompositeExtract %float %61 0
                 OpStore %SV_TARGET_2 %62
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_array_sample_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform texture2DArray _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    SV_TARGET = vec4(texture(sampler2DArrayShadow(_8, _11), vec4(vec3(TEXCOORD.x, TEXCOORD.y, LAYER), DEPTH_REF))).x;
    SV_TARGET_1 = vec4(textureOffset(sampler2DArrayShadow(_8, _11), vec4(vec3(TEXCOORD.x, TEXCOORD.y, LAYER), DEPTH_REF), ivec2(-1, 0))).x;
    SV_TARGET_2 = vec4(textureGrad(sampler2DArrayShadow(_8, _11), vec4(vec3(TEXCOORD.x, TEXCOORD.y, LAYER), DEPTH_REF), vec2(0.0), vec2(0.0))).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 129
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %50 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %70 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %127

        %127 = OpLabel
         %30 =   OpLoad %6 %8
         %31 =   OpLoad %9 %11
         %32 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %35 =   OpLoad %float %32
         %36 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %38 =   OpLoad %float %36
         %42 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %43 =   OpLoad %int %42
         %44 =   OpBitcast %uint %43
         %45 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %46 =   OpLoad %int %45
         %47 =   OpBitcast %uint %46
         %51 =   OpSampledImage %50 %30 %31
         %52 =   OpCompositeConstruct %v2float %35 %38
         %53 =   OpImageGather %v4float %51 %52 %uint_0
         %54 =   OpCompositeExtract %float %53 0
         %55 =   OpCompositeExtract %float %53 1
         %56 =   OpCompositeExtract %float %53 2
         %57 =   OpCompositeExtract %float %53 3
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %61 %55
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %62 %56
         %64 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %64 %57
         %66 =   OpCompositeConstruct %v2float %35 %38
         %69 =   OpImageGather %v4float %51 %66 %uint_0 ConstOffset %70
         %71 =   OpCompositeExtract %float %69 0
         %72 =   OpCompositeExtract %float %69 1
         %73 =   OpCompositeExtract %float %69 2
         %74 =   OpCompositeExtract %float %69 3
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %76 %71
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %79 %74
         %80 =   OpCompositeConstruct %v2float %35 %38
         %81 =   OpImageGather %v4float %51 %80 %uint_1
         %82 =   OpCompositeExtract %float %81 0
         %83 =   OpCompositeExtract %float %81 1
         %84 =   OpCompositeExtract %float %81 2
         %85 =   OpCompositeExtract %float %81 3
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %88 %83
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %90 %85
         %91 =   OpCompositeConstruct %v2float %35 %38
         %92 =   OpImageGather %v4float %51 %91 %uint_2
         %93 =   OpCompositeExtract %float %92 0
         %94 =   OpCompositeExtract %float %92 1
         %95 =   OpCompositeExtract %float %92 2
         %96 =   OpCompositeExtract %float %92 3
         %98 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %98 %93
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %99 %94
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %100 %95
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %101 %96
        %102 =   OpCompositeConstruct %v2float %35 %38
        %103 =   OpImageGather %v4float %51 %102 %uint_3
        %104 =   OpCompositeExtract %float %103 0
        %105 =   OpCompositeExtract %float %103 1
        %106 =   OpCompositeExtract %float %103 2
        %107 =   OpCompositeExtract %float %103 3
        %109 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %109 %104
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %110 %105
        %111 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %111 %106
        %112 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %112 %107
        %113 =   OpCompositeConstruct %v2float %35 %38
        %114 =   OpBitcast %int %44
        %115 =   OpBitcast %int %47
        %117 =   OpCompositeConstruct %v2int %114 %115
        %116 =   OpImageGather %v4float %51 %113 %uint_0 Offset %117
        %118 =   OpCompositeExtract %float %116 0
        %119 =   OpCompositeExtract %float %116 1
        %120 =   OpCompositeExtract %float %116 2
        %121 =   OpCompositeExtract %float %116 3
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %123 %118
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %126 %121
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_gather.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;

void main()
{
    vec4 _53 = textureGather(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y));
    SV_TARGET.x = _53.x;
    SV_TARGET.y = _53.y;
    SV_TARGET.z = _53.z;
    SV_TARGET.w = _53.w;
    vec4 _69 = textureGatherOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0));
    SV_TARGET_1.x = _69.x;
    SV_TARGET_1.y = _69.y;
    SV_TARGET_1.z = _69.z;
    SV_TARGET_1.w = _69.w;
    vec4 _81 = textureGather(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), int(1u));
    SV_TARGET_2.x = _81.x;
    SV_TARGET_2.y = _81.y;
    SV_TARGET_2.z = _81.z;
    SV_TARGET_2.w = _81.w;
    vec4 _92 = textureGather(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), int(2u));
    SV_TARGET_3.x = _92.x;
    SV_TARGET_3.y = _92.y;
    SV_TARGET_3.z = _92.z;
    SV_TARGET_3.w = _92.w;
    vec4 _103 = textureGather(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), int(3u));
    SV_TARGET_4.x = _103.x;
    SV_TARGET_4.y = _103.y;
    SV_TARGET_4.z = _103.z;
    SV_TARGET_4.w = _103.w;
    vec4 _116 = textureGatherOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_5.x = _116.x;
    SV_TARGET_5.y = _116.y;
    SV_TARGET_5.z = _116.z;
    SV_TARGET_5.w = _116.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 95
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %48 = OpTypeImage %float 2D 1 0 0 1 Unknown
         %49 = OpTypeSampledImage %48
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %69 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %93

         %93 = OpLabel
         %27 =   OpLoad %6 %8
         %28 =   OpLoad %9 %11
         %29 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %32 =   OpLoad %float %29
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %35 =   OpLoad %float %33
         %38 =   OpLoad %float %DEPTH_REF
         %40 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %41 =   OpLoad %int %40
         %42 =   OpBitcast %uint %41
         %43 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %44 =   OpLoad %int %43
         %45 =   OpBitcast %uint %44
         %50 =   OpSampledImage %49 %27 %28
         %51 =   OpCompositeConstruct %v2float %32 %35
         %52 =   OpImageDrefGather %v4float %50 %51 %38
         %53 =   OpCompositeExtract %float %52 0
         %54 =   OpCompositeExtract %float %52 1
         %55 =   OpCompositeExtract %float %52 2
         %56 =   OpCompositeExtract %float %52 3
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %59 %53
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %61 %55
         %63 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %63 %56
         %65 =   OpCompositeConstruct %v2float %32 %35
         %68 =   OpImageDrefGather %v4float %50 %65 %38 ConstOffset %69
         %70 =   OpCompositeExtract %float %68 0
         %71 =   OpCompositeExtract %float %68 1
         %72 =   OpCompositeExtract %float %68 2
         %73 =   OpCompositeExtract %float %68 3
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %75 %70
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %76 %71
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %78 %73
         %79 =   OpCompositeConstruct %v2float %32 %35
         %80 =   OpBitcast %int %42
         %81 =   OpBitcast %int %45
         %83 =   OpCompositeConstruct %v2int %80 %81
         %82 =   OpImageDrefGather %v4float %50 %79 %38 Offset %83
         %84 =   OpCompositeExtract %float %82 0
         %85 =   OpCompositeExtract %float %82 1
         %86 =   OpCompositeExtract %float %82 2
         %87 =   OpCompositeExtract %float %82 3
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %92 %87
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_gather_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;

void main()
{
    vec2 _51 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _52 = textureGather(sampler2DShadow(_8, _11), _51, DEPTH_REF);
    SV_TARGET.x = _52.x;
    SV_TARGET.y = _52.y;
    SV_TARGET.z = _52.z;
    SV_TARGET.w = _52.w;
    vec2 _65 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _68 = textureGatherOffset(sampler2DShadow(_8, _11), _65, DEPTH_REF, ivec2(-1, 0));
    SV_TARGET_1.x = _68.x;
    SV_TARGET_1.y = _68.y;
    SV_TARGET_1.z = _68.z;
    SV_TARGET_1.w = _68.w;
    vec2 _79 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _82 = textureGatherOffset(sampler2DShadow(_8, _11), _79, DEPTH_REF, ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_2.x = _82.x;
    SV_TARGET_2.y = _82.y;
    SV_TARGET_2.z = _82.z;
    SV_TARGET_2.w = _82.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %47 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %57

         %57 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %24 =   OpLoad %uint %22
         %28 =   OpCompositeConstruct %v2uint %21 %24
         %27 =   OpImageFetch %v4float %17 %28 Lod %uint_1
         %29 =   OpCompositeExtract %float %27 0
         %30 =   OpCompositeExtract %float %27 1
         %31 =   OpCompositeExtract %float %27 2
         %32 =   OpCompositeExtract %float %27 3
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %35 %29
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %36 %30
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %37 %31
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %39 %32
         %45 =   OpCompositeConstruct %v2uint %21 %24
         %44 =   OpImageFetch %v4float %17 %45 Lod|ConstOffset %uint_1 %47
         %48 =   OpCompositeExtract %float %44 0
         %49 =   OpCompositeExtract %float %44 1
         %50 =   OpCompositeExtract %float %44 2
         %51 =   OpCompositeExtract %float %44 3
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %53 %48
         %54 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %54 %49
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %55 %50
         %56 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %56 %51
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _27 = texelFetch(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(1u));
    SV_TARGET.x = _27.x;
    SV_TARGET.y = _27.y;
    SV_TARGET.z = _27.z;
    SV_TARGET.w = _27.w;
    vec4 _44 = texelFetchOffset(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(1u), ivec2(-1, 0));
    SV_TARGET_1.x = _44.x;
    SV_TARGET_1.y = _44.y;
    SV_TARGET_1.z = _44.z;
    SV_TARGET_1.w = _44.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
               OpCapability Shader
               OpCapability SampleRateShading
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_SAMPLEINDEX %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_SAMPLEINDEX "SV_SAMPLEINDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_SAMPLEINDEX BuiltIn SampleId
               OpDecorate %SV_SAMPLEINDEX Flat
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 1 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_SAMPLEINDEX = OpVariable %_ptr_Input_uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %49 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %59

         %59 = OpLabel
         %19 =   OpLoad %6 %8
         %20 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %22 =   OpLoad %uint %20
         %23 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %25 =   OpLoad %uint %23
         %28 =   OpLoad %uint %SV_SAMPLEINDEX
         %31 =   OpCompositeConstruct %v3uint %22 %25 %uint_2
         %30 =   OpImageFetch %v4float %19 %31 Sample %28
         %32 =   OpCompositeExtract %float %30 0
         %33 =   OpCompositeExtract %float %30 1
         %34 =   OpCompositeExtract %float %30 2
         %35 =   OpCompositeExtract %float %30 3
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %39 %33
         %40 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %40 %34
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %35
         %47 =   OpCompositeConstruct %v3uint %22 %25 %uint_2
         %46 =   OpImageFetch %v4float %19 %47 ConstOffset|Sample %49 %28
         %50 =   OpCompositeExtract %float %46 0
         %51 =   OpCompositeExtract %float %46 1
         %52 =   OpCompositeExtract %float %46 2
         %53 =   OpCompositeExtract %float %46 3
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %55 %50
         %56 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %57 %52
         %58 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %58 %53
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture2DMSArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _30 = texelFetch(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), int(uint(gl_SampleID)));
    SV_TARGET.x = _30.x;
    SV_TARGET.y = _30.y;
    SV_TARGET.z = _30.z;
    SV_TARGET.w = _30.w;
    vec4 _46 = texelFetchOffset(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), ivec2(-1, 0), int(uint(gl_SampleID)));
    SV_TARGET_1.x = _46.x;
    SV_TARGET_1.y = _46.y;
    SV_TARGET_1.z = _46.z;
    SV_TARGET_1.w = _46.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 1 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
     %v3uint = OpTypeVector %uint 3
          %_ = OpTypeStruct %v2uint %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %30

         %30 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpImageQuerySize %v3uint %16
         %19 =   OpCompositeExtract %uint %18 0
         %20 =   OpCompositeExtract %uint %18 1
         %21 =   OpCompositeConstruct %v2uint %19 %20
         %22 =   OpCompositeExtract %uint %18 2
         %25 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %25 %19
         %27 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %27 %20
                 OpStore %SV_TARGET_1 %22
         %29 =   OpImageQuerySamples %uint %16
                 OpStore %SV_TARGET_3 %29
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _23
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture2DMSArray _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uvec3 _18 = uvec3(textureSize(_8));
    uint _19 = _18.x;
    SV_TARGET.x = _19;
    SV_TARGET.y = _18.y;
    SV_TARGET_1 = _18.z;
    SV_TARGET_3 = uint(textureSamples(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
               OpCapability Shader
               OpCapability SampleRateShading
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_SAMPLEINDEX %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_SAMPLEINDEX "SV_SAMPLEINDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_SAMPLEINDEX BuiltIn SampleId
               OpDecorate %SV_SAMPLEINDEX Flat
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 1 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_SAMPLEINDEX = OpVariable %_ptr_Input_uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %49 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %59

         %59 = OpLabel
         %19 =   OpLoad %6 %8
         %20 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %22 =   OpLoad %uint %20
         %23 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %25 =   OpLoad %uint %23
         %28 =   OpLoad %uint %SV_SAMPLEINDEX
         %30 =   OpCompositeConstruct %v2uint %22 %25
         %29 =   OpImageFetch %v4float %19 %30 Sample %28
         %31 =   OpCompositeExtract %float %29 0
         %32 =   OpCompositeExtract %float %29 1
         %33 =   OpCompositeExtract %float %29 2
         %34 =   OpCompositeExtract %float %29 3
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %33
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %34
         %47 =   OpCompositeConstruct %v2uint %22 %25
         %46 =   OpImageFetch %v4float %19 %47 ConstOffset|Sample %49 %28
         %50 =   OpCompositeExtract %float %46 0
         %51 =   OpCompositeExtract %float %46 1
         %52 =   OpCompositeExtract %float %46 2
         %53 =   OpCompositeExtract %float %46 3
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %55 %50
         %56 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %57 %52
         %58 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %58 %53
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture2DMS _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _29 = texelFetch(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(uint(gl_SampleID)));
    SV_TARGET.x = _29.x;
    SV_TARGET.y = _29.y;
    SV_TARGET.z = _29.z;
    SV_TARGET.w = _29.w;
    vec4 _46 = texelFetchOffset(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), ivec2(-1, 0), int(uint(gl_SampleID)));
    SV_TARGET_1.x = _46.x;
    SV_TARGET_1.y = _46.y;
    SV_TARGET_1.z = _46.z;
    SV_TARGET_1.w = _46.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 1 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
          %_ = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_0 = OpConstant %uint 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %16 =   OpLoad %6 %8
         %17 =   OpImageQuerySize %v2uint %16
         %18 =   OpCompositeExtract %uint %17 0
         %19 =   OpCompositeExtract %uint %17 1
         %20 =   OpCompositeConstruct %v2uint %18 %19
         %24 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %24 %18
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %26 %19
                 OpStore %SV_TARGET_1 %uint_1
         %27 =   OpImageQuerySamples %uint %16
                 OpStore %SV_TARGET_3 %27
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_ms_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _21
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture2DMS _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uvec2 _17 = uvec2(textureSize(_8));
    uint _18 = _17.x;
    SV_TARGET.x = _18;
    SV_TARGET.y = _17.y;
    SV_TARGET_1 = 1u;
    SV_TARGET_3 = uint(textureSamples(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %16 =   OpLoad %6 %8
         %17 =   OpImageQuerySizeLod %v2uint %16 %uint_0
         %19 =   OpCompositeExtract %uint %17 0
         %20 =   OpCompositeExtract %uint %17 1
         %21 =   OpCompositeConstruct %v2uint %19 %20
         %25 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %25 %19
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %26 %20
                 OpStore %SV_TARGET_1 %uint_1
         %27 =   OpImageQueryLevels %uint %16
                 OpStore %SV_TARGET_2 %27
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _22
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture2D _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uvec2 _17 = uvec2(textureSize(_8, int(0u)));
    uint _19 = _17.x;
    SV_TARGET.x = _19;
    SV_TARGET.y = _17.y;
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 163
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %43 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %71 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %161

        %161 = OpLabel
         %33 =   OpLoad %6 %8
         %34 =   OpLoad %9 %11
         %35 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %38 =   OpLoad %float %35
         %39 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %41 =   OpLoad %float %39
         %44 =   OpSampledImage %43 %33 %34
         %46 =   OpCompositeConstruct %v2float %38 %41
         %45 =   OpImageQueryLod %v2float %44 %46
         %47 =   OpCompositeExtract %float %45 0
         %48 =   OpLoad %float %LOD_BIAS
         %49 =   OpLoad %float %LOD_CLAMP
         %52 =   OpCompositeConstruct %v2float %38 %41
         %51 =   OpImageSampleImplicitLod %v4float %44 %52 None
         %53 =   OpCompositeExtract %float %51 0
         %54 =   OpCompositeExtract %float %51 1
         %55 =   OpCompositeExtract %float %51 2
         %56 =   OpCompositeExtract %float %51 3
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %59 %53
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %61 %55
         %63 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %63 %56
         %69 =   OpCompositeConstruct %v2float %38 %41
         %68 =   OpImageSampleImplicitLod %v4float %44 %69 ConstOffset %71
         %72 =   OpCompositeExtract %float %68 0
         %73 =   OpCompositeExtract %float %68 1
         %74 =   OpCompositeExtract %float %68 2
         %75 =   OpCompositeExtract %float %68 3
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %80 %75
         %82 =   OpCompositeConstruct %v2float %38 %41
         %81 =   OpImageSampleExplicitLod %v4float %44 %82 Lod %47
         %83 =   OpCompositeExtract %float %81 0
         %84 =   OpCompositeExtract %float %81 1
         %85 =   OpCompositeExtract %float %81 2
         %86 =   OpCompositeExtract %float %81 3
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %88 %83
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %91 %86
         %93 =   OpCompositeConstruct %v2float %38 %41
         %92 =   OpImageSampleImplicitLod %v4float %44 %93 Bias %48
         %94 =   OpCompositeExtract %float %92 0
         %95 =   OpCompositeExtract %float %92 1
         %96 =   OpCompositeExtract %float %92 2
         %97 =   OpCompositeExtract %float %92 3
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %99 %94
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %100 %95
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %101 %96
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %102 %97
        %104 =   OpCompositeConstruct %v2float %38 %41
        %103 =   OpImageSampleImplicitLod %v4float %44 %104 MinLod %49
        %105 =   OpCompositeExtract %float %103 0
        %106 =   OpCompositeExtract %float %103 1
        %107 =   OpCompositeExtract %float %103 2
        %108 =   OpCompositeExtract %float %103 3
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %110 %105
        %111 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %111 %106
        %112 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %112 %107
        %113 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %113 %108
        %115 =   OpCompositeConstruct %v2float %38 %41
        %114 =   OpImageSampleImplicitLod %v4float %44 %115 Bias|ConstOffset|MinLod %48 %71 %49
        %116 =   OpCompositeExtract %float %114 0
        %117 =   OpCompositeExtract %float %114 1
        %118 =   OpCompositeExtract %float %114 2
        %119 =   OpCompositeExtract %float %114 3
        %121 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %121 %116
        %122 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %122 %117
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %123 %118
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %126 =   OpLoad %float %125
        %127 =   OpDPdx %float %126
        %128 =   OpDPdy %float %126
        %129 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %130 =   OpLoad %float %129
        %131 =   OpDPdx %float %130
        %132 =   OpDPdy %float %130
        %136 =   OpCompositeConstruct %v2float %38 %41
        %137 =   OpCompositeConstruct %v2float %127 %131
        %138 =   OpCompositeConstruct %v2float %128 %132
        %135 =   OpImageSampleExplicitLod %v4float %44 %136 Grad %137 %138
        %139 =   OpCompositeExtract %float %135 0
        %140 =   OpCompositeExtract %float %135 1
        %141 =   OpCompositeExtract %float %135 2
        %142 =   OpCompositeExtract %float %135 3
        %144 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %144 %139
        %145 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %145 %140
        %146 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %146 %141
        %147 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %147 %142
        %149 =   OpCompositeConstruct %v2float %38 %41
        %150 =   OpCompositeConstruct %v2float %127 %131
        %151 =   OpCompositeConstruct %v2float %128 %132
        %148 =   OpImageSampleExplicitLod %v4float %44 %149 Grad|ConstOffset %150 %151 %71
        %152 =   OpCompositeExtract %float %148 0
        %153 =   OpCompositeExtract %float %148 1
        %154 =   OpCompositeExtract %float %148 2
        %155 =   OpCompositeExtract %float %148 3
        %157 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %157 %152
        %158 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %158 %153
        %159 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %159 %154
        %160 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %160 %155
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    vec4 _51 = texture(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y));
    SV_TARGET.x = _51.x;
    SV_TARGET.y = _51.y;
    SV_TARGET.z = _51.z;
    SV_TARGET.w = _51.w;
    vec4 _68 = textureOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0));
    SV_TARGET_1.x = _68.x;
    SV_TARGET_1.y = _68.y;
    SV_TARGET_1.z = _68.z;
    SV_TARGET_1.w = _68.w;
    vec4 _81 = textureLod(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), textureQueryLod(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y)).x);
    SV_TARGET_2.x = _81.x;
    SV_TARGET_2.y = _81.y;
    SV_TARGET_2.z = _81.z;
    SV_TARGET_2.w = _81.w;
    vec4 _92 = texture(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), LOD_BIAS);
    SV_TARGET_3.x = _92.x;
    SV_TARGET_3.y = _92.y;
    SV_TARGET_3.z = _92.z;
    SV_TARGET_3.w = _92.w;
    vec4 _103 = textureClampARB(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), LOD_CLAMP);
    SV_TARGET_4.x = _103.x;
    SV_TARGET_4.y = _103.y;
    SV_TARGET_4.z = _103.z;
    SV_TARGET_4.w = _103.w;
    vec4 _114 = textureOffsetClampARB(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0), LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _114.x;
    SV_TARGET_5.y = _114.y;
    SV_TARGET_5.z = _114.z;
    SV_TARGET_5.w = _114.w;
    float _127 = dFdx(TEXCOORD_2.x);
    float _128 = dFdy(TEXCOORD_2.x);
    float _131 = dFdx(TEXCOORD_2.y);
    float _132 = dFdy(TEXCOORD_2.y);
    vec4 _135 = textureGrad(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), vec2(_127, _131), vec2(_128, _132));
    SV_TARGET_6.x = _135.x;
    SV_TARGET_6.y = _135.y;
    SV_TARGET_6.z = _135.z;
    SV_TARGET_6.w = _135.w;
    vec4 _148 = textureGradOffset(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), vec2(_127, _131), vec2(_128, _132), ivec2(-1, 0));
    SV_TARGET_7.x = _148.x;
    SV_TARGET_7.y = _148.y;
    SV_TARGET_7.z = _148.z;
    SV_TARGET_7.w = _148.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %40 = OpTypeImage %float 2D 1 0 0 1 Unknown
         %41 = OpTypeSampledImage %40
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %55 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %62

         %62 = OpLabel
         %27 =   OpLoad %6 %8
         %28 =   OpLoad %9 %11
         %29 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %32 =   OpLoad %float %29
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %35 =   OpLoad %float %33
         %37 =   OpLoad %float %DEPTH_REF
         %42 =   OpSampledImage %41 %27 %28
         %45 =   OpCompositeConstruct %v2float %32 %35
         %44 =   OpImageSampleDrefImplicitLod %float %42 %45 %37 None
         %47 =   OpCompositeConstruct %v4float %44 %44 %44 %44
         %48 =   OpCompositeExtract %float %47 0
                 OpStore %SV_TARGET %48
         %53 =   OpCompositeConstruct %v2float %32 %35
         %52 =   OpImageSampleDrefImplicitLod %float %42 %53 %37 ConstOffset %55
         %56 =   OpCompositeConstruct %v4float %52 %52 %52 %52
         %57 =   OpCompositeExtract %float %56 0
                 OpStore %SV_TARGET_1 %57
         %59 =   OpCompositeConstruct %v2float %32 %35
         %58 =   OpImageSampleDrefExplicitLod %float %42 %59 %37 Lod %float_0
         %60 =   OpCompositeConstruct %v4float %58 %58 %58 %58
         %61 =   OpCompositeExtract %float %60 0
                 OpStore %SV_TARGET_2 %61
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_2d_sample_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    SV_TARGET = vec4(texture(sampler2DShadow(_8, _11), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DEPTH_REF))).x;
    SV_TARGET_1 = vec4(textureOffset(sampler2DShadow(_8, _11), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DEPTH_REF), ivec2(-1, 0))).x;
    SV_TARGET_2 = vec4(textureLod(sampler2DShadow(_8, _11), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DEPTH_REF), 0.0)).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_3d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 61
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %int_1 = OpConstant %int 1
      %v3int = OpTypeVector %int 3
         %49 = OpConstantComposite %v3int %int_n1 %int_0 %int_1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %59

         %59 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %24 =   OpLoad %uint %22
         %25 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %27 =   OpLoad %uint %25
         %30 =   OpCompositeConstruct %v3uint %21 %24 %27
         %29 =   OpImageFetch %v4float %17 %30 Lod %uint_1
         %31 =   OpCompositeExtract %float %29 0
         %32 =   OpCompositeExtract %float %29 1
         %33 =   OpCompositeExtract %float %29 2
         %34 =   OpCompositeExtract %float %29 3
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %33
         %40 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %40 %34
         %47 =   OpCompositeConstruct %v3uint %21 %24 %27
         %46 =   OpImageFetch %v4float %17 %47 Lod|ConstOffset %uint_1 %49
         %50 =   OpCompositeExtract %float %46 0
         %51 =   OpCompositeExtract %float %46 1
         %52 =   OpCompositeExtract %float %46 2
         %53 =   OpCompositeExtract %float %46 3
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %55 %50
         %56 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %57 %52
         %58 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %58 %53
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_3d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0) uniform texture3D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _29 = texelFetch(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(1u));
    SV_TARGET.x = _29.x;
    SV_TARGET.y = _29.y;
    SV_TARGET.z = _29.z;
    SV_TARGET.w = _29.w;
    vec4 _46 = texelFetchOffset(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(1u), ivec3(-1, 0, 1));
    SV_TARGET_1.x = _46.x;
    SV_TARGET_1.y = _46.y;
    SV_TARGET_1.z = _46.z;
    SV_TARGET_1.w = _46.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_3d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Output_v3uint = OpTypePointer Output %v3uint
  %SV_TARGET = OpVariable %_ptr_Output_v3uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %v3uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %16 =   OpLoad %6 %8
         %17 =   OpImageQuerySizeLod %v3uint %16 %uint_0
         %19 =   OpCompositeExtract %uint %17 0
         %20 =   OpCompositeExtract %uint %17 1
         %21 =   OpCompositeExtract %uint %17 2
         %22 =   OpCompositeConstruct %v3uint %19 %20 %21
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %26 %19
         %27 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %27 %20
         %28 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_2
                 OpStore %28 %21
                 OpStore %SV_TARGET_1 %uint_1
         %30 =   OpImageQueryLevels %uint %16
                 OpStore %SV_TARGET_2 %30
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_3d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _23
{
    uvec3 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform texture3D _8;

layout(location = 0) out uvec3 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uvec3 _17 = uvec3(textureSize(_8, int(0u)));
    uint _19 = _17.x;
    SV_TARGET.x = _19;
    SV_TARGET.y = _17.y;
    SV_TARGET.z = _17.z;
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_3d_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 169
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %44 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %int_1 = OpConstant %int 1
      %v3int = OpTypeVector %int 3
         %73 = OpConstantComposite %v3int %int_n1 %int_0 %int_1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %167

        %167 = OpLabel
         %31 =   OpLoad %6 %8
         %32 =   OpLoad %9 %11
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %36 =   OpLoad %float %33
         %37 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %39 =   OpLoad %float %37
         %40 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %42 =   OpLoad %float %40
         %45 =   OpSampledImage %44 %31 %32
         %48 =   OpCompositeConstruct %v3float %36 %39 %42
         %47 =   OpImageQueryLod %v2float %45 %48
         %49 =   OpCompositeExtract %float %47 0
         %50 =   OpLoad %float %LOD_BIAS
         %51 =   OpLoad %float %LOD_CLAMP
         %54 =   OpCompositeConstruct %v3float %36 %39 %42
         %53 =   OpImageSampleImplicitLod %v4float %45 %54 None
         %55 =   OpCompositeExtract %float %53 0
         %56 =   OpCompositeExtract %float %53 1
         %57 =   OpCompositeExtract %float %53 2
         %58 =   OpCompositeExtract %float %53 3
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %61 %55
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %62 %56
         %63 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %63 %57
         %64 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %64 %58
         %71 =   OpCompositeConstruct %v3float %36 %39 %42
         %70 =   OpImageSampleImplicitLod %v4float %45 %71 ConstOffset %73
         %74 =   OpCompositeExtract %float %70 0
         %75 =   OpCompositeExtract %float %70 1
         %76 =   OpCompositeExtract %float %70 2
         %77 =   OpCompositeExtract %float %70 3
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %82 %77
         %84 =   OpCompositeConstruct %v3float %36 %39 %42
         %83 =   OpImageSampleExplicitLod %v4float %45 %84 Lod %49
         %85 =   OpCompositeExtract %float %83 0
         %86 =   OpCompositeExtract %float %83 1
         %87 =   OpCompositeExtract %float %83 2
         %88 =   OpCompositeExtract %float %83 3
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %93 %88
         %95 =   OpCompositeConstruct %v3float %36 %39 %42
         %94 =   OpImageSampleImplicitLod %v4float %45 %95 Bias %50
         %96 =   OpCompositeExtract %float %94 0
         %97 =   OpCompositeExtract %float %94 1
         %98 =   OpCompositeExtract %float %94 2
         %99 =   OpCompositeExtract %float %94 3
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %101 %96
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %102 %97
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %104 %99
        %106 =   OpCompositeConstruct %v3float %36 %39 %42
        %105 =   OpImageSampleImplicitLod %v4float %45 %106 MinLod %51
        %107 =   OpCompositeExtract %float %105 0
        %108 =   OpCompositeExtract %float %105 1
        %109 =   OpCompositeExtract %float %105 2
        %110 =   OpCompositeExtract %float %105 3
        %112 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %112 %107
        %113 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %113 %108
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %114 %109
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %115 %110
        %117 =   OpCompositeConstruct %v3float %36 %39 %42
        %116 =   OpImageSampleImplicitLod %v4float %45 %117 Bias|ConstOffset|MinLod %50 %73 %51
        %118 =   OpCompositeExtract %float %116 0
        %119 =   OpCompositeExtract %float %116 1
        %120 =   OpCompositeExtract %float %116 2
        %121 =   OpCompositeExtract %float %116 3
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %123 %118
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %126 %121
        %127 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %128 =   OpLoad %float %127
        %129 =   OpDPdx %float %128
        %130 =   OpDPdy %float %128
        %131 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %132 =   OpLoad %float %131
        %133 =   OpDPdx %float %132
        %134 =   OpDPdy %float %132
        %135 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_2
        %136 =   OpLoad %float %135
        %137 =   OpDPdx %float %136
        %138 =   OpDPdy %float %136
        %142 =   OpCompositeConstruct %v3float %36 %39 %42
        %143 =   OpCompositeConstruct %v3float %129 %133 %137
        %144 =   OpCompositeConstruct %v3float %130 %134 %138
        %141 =   OpImageSampleExplicitLod %v4float %45 %142 Grad %143 %144
        %145 =   OpCompositeExtract %float %141 0
        %146 =   OpCompositeExtract %float %141 1
        %147 =   OpCompositeExtract %float %141 2
        %148 =   OpCompositeExtract %float %141 3
        %150 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %150 %145
        %151 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %151 %146
        %152 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %152 %147
        %153 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %153 %148
        %155 =   OpCompositeConstruct %v3float %36 %39 %42
        %156 =   OpCompositeConstruct %v3float %129 %133 %137
        %157 =   OpCompositeConstruct %v3float %130 %134 %138
        %154 =   OpImageSampleExplicitLod %v4float %45 %155 Grad|ConstOffset %156 %157 %73
        %158 =   OpCompositeExtract %float %154 0
        %159 =   OpCompositeExtract %float %154 1
        %160 =   OpCompositeExtract %float %154 2
        %161 =   OpCompositeExtract %float %154 3
        %163 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %163 %158
        %164 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %164 %159
        %165 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %165 %160
        %166 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %166 %161
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_3d_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform texture3D _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    vec4 _53 = texture(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    SV_TARGET.x = _53.x;
    SV_TARGET.y = _53.y;
    SV_TARGET.z = _53.z;
    SV_TARGET.w = _53.w;
    vec4 _70 = textureOffset(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec3(-1, 0, 1));
    SV_TARGET_1.x = _70.x;
    SV_TARGET_1.y = _70.y;
    SV_TARGET_1.z = _70.z;
    SV_TARGET_1.w = _70.w;
    vec4 _83 = textureLod(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), textureQueryLod(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x);
    SV_TARGET_2.x = _83.x;
    SV_TARGET_2.y = _83.y;
    SV_TARGET_2.z = _83.z;
    SV_TARGET_2.w = _83.w;
    vec4 _94 = texture(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_BIAS);
    SV_TARGET_3.x = _94.x;
    SV_TARGET_3.y = _94.y;
    SV_TARGET_3.z = _94.z;
    SV_TARGET_3.w = _94.w;
    vec4 _105 = textureClampARB(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_CLAMP);
    SV_TARGET_4.x = _105.x;
    SV_TARGET_4.y = _105.y;
    SV_TARGET_4.z = _105.z;
    SV_TARGET_4.w = _105.w;
    vec4 _116 = textureOffsetClampARB(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec3(-1, 0, 1), LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _116.x;
    SV_TARGET_5.y = _116.y;
    SV_TARGET_5.z = _116.z;
    SV_TARGET_5.w = _116.w;
    float _129 = dFdx(TEXCOORD_2.x);
    float _130 = dFdy(TEXCOORD_2.x);
    float _133 = dFdx(TEXCOORD_2.y);
    float _134 = dFdy(TEXCOORD_2.y);
    float _137 = dFdx(TEXCOORD_2.z);
    float _138 = dFdy(TEXCOORD_2.z);
    vec4 _141 = textureGrad(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(_129, _133, _137), vec3(_130, _134, _138));
    SV_TARGET_6.x = _141.x;
    SV_TARGET_6.y = _141.y;
    SV_TARGET_6.z = _141.z;
    SV_TARGET_6.w = _141.w;
    vec4 _154 = textureGradOffset(sampler3D(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(_129, _133, _137), vec3(_130, _134, _138), ivec3(-1, 0, 1));
    SV_TARGET_7.x = _154.x;
    SV_TARGET_7.y = _154.y;
    SV_TARGET_7.z = _154.z;
    SV_TARGET_7.w = _154.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 92
; Schema: 0
               OpCapability Shader
               OpCapability SampledCubeArray
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %42 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %90

         %90 = OpLabel
         %28 =   OpLoad %6 %8
         %29 =   OpLoad %9 %11
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %33 =   OpLoad %float %30
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %36 =   OpLoad %float %34
         %37 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %39 =   OpLoad %float %37
         %41 =   OpLoad %float %LAYER
         %43 =   OpSampledImage %42 %28 %29
         %44 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %45 =   OpImageGather %v4float %43 %44 %uint_0
         %46 =   OpCompositeExtract %float %45 0
         %47 =   OpCompositeExtract %float %45 1
         %48 =   OpCompositeExtract %float %45 2
         %49 =   OpCompositeExtract %float %45 3
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %52 %46
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %53 %47
         %54 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %54 %48
         %55 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %55 %49
         %57 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %58 =   OpImageGather %v4float %43 %57 %uint_1
         %59 =   OpCompositeExtract %float %58 0
         %60 =   OpCompositeExtract %float %58 1
         %61 =   OpCompositeExtract %float %58 2
         %62 =   OpCompositeExtract %float %58 3
         %64 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %64 %59
         %65 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %65 %60
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %66 %61
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %67 %62
         %68 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %69 =   OpImageGather %v4float %43 %68 %uint_2
         %70 =   OpCompositeExtract %float %69 0
         %71 =   OpCompositeExtract %float %69 1
         %72 =   OpCompositeExtract %float %69 2
         %73 =   OpCompositeExtract %float %69 3
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %75 %70
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %76 %71
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %77 %72
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %78 %73
         %79 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %80 =   OpImageGather %v4float %43 %79 %uint_3
         %81 =   OpCompositeExtract %float %80 0
         %82 =   OpCompositeExtract %float %80 1
         %83 =   OpCompositeExtract %float %80 2
         %84 =   OpCompositeExtract %float %80 3
         %86 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %88 %83
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %89 %84
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_gather.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform textureCubeArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;

void main()
{
    vec4 _45 = textureGather(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER));
    SV_TARGET.x = _45.x;
    SV_TARGET.y = _45.y;
    SV_TARGET.z = _45.z;
    SV_TARGET.w = _45.w;
    vec4 _58 = textureGather(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), int(1u));
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    SV_TARGET_1.z = _58.z;
    SV_TARGET_1.w = _58.w;
    vec4 _69 = textureGather(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), int(2u));
    SV_TARGET_2.x = _69.x;
    SV_TARGET_2.y = _69.y;
    SV_TARGET_2.z = _69.z;
    SV_TARGET_2.w = _69.w;
    vec4 _80 = textureGather(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), int(3u));
    SV_TARGET_3.x = _80.x;
    SV_TARGET_3.y = _80.y;
    SV_TARGET_3.z = _80.z;
    SV_TARGET_3.w = _80.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 58
; Schema: 0
               OpCapability Shader
               OpCapability SampledCubeArray
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %40 = OpTypeImage %float Cube 1 1 0 1 Unknown
         %41 = OpTypeSampledImage %40
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %56

         %56 = OpLabel
         %25 =   OpLoad %6 %8
         %26 =   OpLoad %9 %11
         %27 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %30 =   OpLoad %float %27
         %31 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %33 =   OpLoad %float %31
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %36 =   OpLoad %float %34
         %38 =   OpLoad %float %DEPTH_REF
         %39 =   OpLoad %float %LAYER
         %42 =   OpSampledImage %41 %25 %26
         %43 =   OpCompositeConstruct %v4float %30 %33 %36 %39
         %44 =   OpImageDrefGather %v4float %42 %43 %38
         %45 =   OpCompositeExtract %float %44 0
         %46 =   OpCompositeExtract %float %44 1
         %47 =   OpCompositeExtract %float %44 2
         %48 =   OpCompositeExtract %float %44 3
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %51 %45
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %52 %46
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %53 %47
         %54 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %54 %48
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_gather_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform textureCubeArray _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _43 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER);
    vec4 _44 = textureGather(samplerCubeArrayShadow(_8, _11), _43, DEPTH_REF);
    SV_TARGET.x = _44.x;
    SV_TARGET.y = _44.y;
    SV_TARGET.z = _44.z;
    SV_TARGET.w = _44.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 32
; Schema: 0
               OpCapability Shader
               OpCapability SampledCubeArray
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %v3uint = OpTypeVector %uint 3
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %30

         %30 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpImageQuerySizeLod %v3uint %16 %uint_0
         %20 =   OpCompositeExtract %uint %18 0
         %21 =   OpCompositeExtract %uint %18 1
         %22 =   OpCompositeConstruct %v2uint %20 %21
         %23 =   OpCompositeExtract %uint %18 2
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %26 %20
         %27 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %27 %21
                 OpStore %SV_TARGET_1 %23
         %29 =   OpImageQueryLevels %uint %16
                 OpStore %SV_TARGET_2 %29
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _24
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform textureCubeArray _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uvec3 _18 = uvec3(textureSize(_8, int(0u)));
    uint _20 = _18.x;
    SV_TARGET.x = _20;
    SV_TARGET.y = _18.y;
    SV_TARGET_1 = _18.z;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 126
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability SampledCubeArray
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %42 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %124

        %124 = OpLabel
         %28 =   OpLoad %6 %8
         %29 =   OpLoad %9 %11
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %33 =   OpLoad %float %30
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %36 =   OpLoad %float %34
         %37 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %39 =   OpLoad %float %37
         %41 =   OpLoad %float %LAYER
         %43 =   OpSampledImage %42 %28 %29
         %46 =   OpCompositeConstruct %v3float %33 %36 %39
         %45 =   OpImageQueryLod %v2float %43 %46
         %47 =   OpCompositeExtract %float %45 0
         %48 =   OpLoad %float %LOD_BIAS
         %49 =   OpLoad %float %LOD_CLAMP
         %52 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %51 =   OpImageSampleImplicitLod %v4float %43 %52 None
         %53 =   OpCompositeExtract %float %51 0
         %54 =   OpCompositeExtract %float %51 1
         %55 =   OpCompositeExtract %float %51 2
         %56 =   OpCompositeExtract %float %51 3
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %59 %53
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %61 %55
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %62 %56
         %65 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %64 =   OpImageSampleExplicitLod %v4float %43 %65 Lod %47
         %66 =   OpCompositeExtract %float %64 0
         %67 =   OpCompositeExtract %float %64 1
         %68 =   OpCompositeExtract %float %64 2
         %69 =   OpCompositeExtract %float %64 3
         %71 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %71 %66
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %72 %67
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %73 %68
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %74 %69
         %76 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %75 =   OpImageSampleImplicitLod %v4float %43 %76 Bias %48
         %77 =   OpCompositeExtract %float %75 0
         %78 =   OpCompositeExtract %float %75 1
         %79 =   OpCompositeExtract %float %75 2
         %80 =   OpCompositeExtract %float %75 3
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %83 %78
         %84 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %84 %79
         %85 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %85 %80
         %87 =   OpCompositeConstruct %v4float %33 %36 %39 %41
         %86 =   OpImageSampleImplicitLod %v4float %43 %87 MinLod %49
         %88 =   OpCompositeExtract %float %86 0
         %89 =   OpCompositeExtract %float %86 1
         %90 =   OpCompositeExtract %float %86 2
         %91 =   OpCompositeExtract %float %86 3
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %95 %90
         %96 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %96 %91
         %97 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
         %98 =   OpLoad %float %97
         %99 =   OpDPdx %float %98
        %100 =   OpDPdy %float %98
        %101 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %102 =   OpLoad %float %101
        %103 =   OpDPdx %float %102
        %104 =   OpDPdy %float %102
        %105 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_2
        %106 =   OpLoad %float %105
        %107 =   OpDPdx %float %106
        %108 =   OpDPdy %float %106
        %112 =   OpCompositeConstruct %v4float %33 %36 %39 %41
        %113 =   OpCompositeConstruct %v3float %99 %103 %107
        %114 =   OpCompositeConstruct %v3float %100 %104 %108
        %111 =   OpImageSampleExplicitLod %v4float %43 %112 Grad %113 %114
        %115 =   OpCompositeExtract %float %111 0
        %116 =   OpCompositeExtract %float %111 1
        %117 =   OpCompositeExtract %float %111 2
        %118 =   OpCompositeExtract %float %111 3
        %120 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %120 %115
        %121 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %121 %116
        %122 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %122 %117
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %123 %118
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform textureCubeArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;

void main()
{
    vec4 _51 = texture(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER));
    SV_TARGET.x = _51.x;
    SV_TARGET.y = _51.y;
    SV_TARGET.z = _51.z;
    SV_TARGET.w = _51.w;
    vec4 _64 = textureLod(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), textureQueryLod(samplerCubeArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x);
    SV_TARGET_1.x = _64.x;
    SV_TARGET_1.y = _64.y;
    SV_TARGET_1.z = _64.z;
    SV_TARGET_1.w = _64.w;
    vec4 _75 = texture(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), LOD_BIAS);
    SV_TARGET_2.x = _75.x;
    SV_TARGET_2.y = _75.y;
    SV_TARGET_2.z = _75.z;
    SV_TARGET_2.w = _75.w;
    vec4 _86 = textureClampARB(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), LOD_CLAMP);
    SV_TARGET_3.x = _86.x;
    SV_TARGET_3.y = _86.y;
    SV_TARGET_3.z = _86.z;
    SV_TARGET_3.w = _86.w;
    vec4 _111 = textureGrad(samplerCubeArray(_8, _11), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), vec3(dFdx(TEXCOORD_2.x), dFdx(TEXCOORD_2.y), dFdx(TEXCOORD_2.z)), vec3(dFdy(TEXCOORD_2.x), dFdy(TEXCOORD_2.y), dFdy(TEXCOORD_2.z)));
    SV_TARGET_4.x = _111.x;
    SV_TARGET_4.y = _111.y;
    SV_TARGET_4.z = _111.z;
    SV_TARGET_4.w = _111.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 56
; Schema: 0
               OpCapability Shader
               OpCapability SampledCubeArray
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %41 = OpTypeImage %float Cube 1 1 0 1 Unknown
         %42 = OpTypeSampledImage %41
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %54

         %54 = OpLabel
         %24 =   OpLoad %6 %8
         %25 =   OpLoad %9 %11
         %26 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %29 =   OpLoad %float %26
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %32 =   OpLoad %float %30
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %35 =   OpLoad %float %33
         %37 =   OpLoad %float %DEPTH_REF
         %38 =   OpLoad %float %LAYER
         %43 =   OpSampledImage %42 %24 %25
         %47 =   OpCompositeConstruct %v4float %29 %32 %35 %38
         %45 =   OpImageSampleDrefImplicitLod %float %43 %47 %37 None
         %48 =   OpCompositeConstruct %v4float %45 %45 %45 %45
         %49 =   OpCompositeExtract %float %48 0
                 OpStore %SV_TARGET %49
         %51 =   OpCompositeConstruct %v4float %29 %32 %35 %38
         %50 =   OpImageSampleDrefExplicitLod %float %43 %51 %37 Lod %float_0
         %52 =   OpCompositeConstruct %v4float %50 %50 %50 %50
         %53 =   OpCompositeExtract %float %52 0
                 OpStore %SV_TARGET_1 %53
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_array_sample_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform textureCubeArray _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    vec4 _47 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER);
    SV_TARGET = vec4(texture(samplerCubeArrayShadow(_8, _11), _47, DEPTH_REF)).x;
    vec4 _51 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER);
    SV_TARGET_1 = vec4(textureGrad(samplerCubeArrayShadow(_8, _11), _51, DEPTH_REF, vec3(0.0), vec3(0.0))).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %41 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %89

         %89 = OpLabel
         %28 =   OpLoad %6 %8
         %29 =   OpLoad %9 %11
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %33 =   OpLoad %float %30
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %36 =   OpLoad %float %34
         %37 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %39 =   OpLoad %float %37
         %42 =   OpSampledImage %41 %28 %29
         %43 =   OpCompositeConstruct %v3float %33 %36 %39
         %44 =   OpImageGather %v4float %42 %43 %uint_0
         %45 =   OpCompositeExtract %float %44 0
         %46 =   OpCompositeExtract %float %44 1
         %47 =   OpCompositeExtract %float %44 2
         %48 =   OpCompositeExtract %float %44 3
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %51 %45
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %52 %46
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %53 %47
         %54 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %54 %48
         %56 =   OpCompositeConstruct %v3float %33 %36 %39
         %57 =   OpImageGather %v4float %42 %56 %uint_1
         %58 =   OpCompositeExtract %float %57 0
         %59 =   OpCompositeExtract %float %57 1
         %60 =   OpCompositeExtract %float %57 2
         %61 =   OpCompositeExtract %float %57 3
         %63 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %63 %58
         %64 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %64 %59
         %65 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %65 %60
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %66 %61
         %67 =   OpCompositeConstruct %v3float %33 %36 %39
         %68 =   OpImageGather %v4float %42 %67 %uint_2
         %69 =   OpCompositeExtract %float %68 0
         %70 =   OpCompositeExtract %float %68 1
         %71 =   OpCompositeExtract %float %68 2
         %72 =   OpCompositeExtract %float %68 3
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %74 %69
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %75 %70
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %76 %71
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %77 %72
         %78 =   OpCompositeConstruct %v3float %33 %36 %39
         %79 =   OpImageGather %v4float %42 %78 %uint_3
         %80 =   OpCompositeExtract %float %79 0
         %81 =   OpCompositeExtract %float %79 1
         %82 =   OpCompositeExtract %float %79 2
         %83 =   OpCompositeExtract %float %79 3
         %85 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %85 %80
         %86 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %88 %83
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_gather.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform textureCube _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;

void main()
{
    vec4 _44 = textureGather(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    SV_TARGET.x = _44.x;
    SV_TARGET.y = _44.y;
    SV_TARGET.z = _44.z;
    SV_TARGET.w = _44.w;
    vec4 _57 = textureGather(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(1u));
    SV_TARGET_1.x = _57.x;
    SV_TARGET_1.y = _57.y;
    SV_TARGET_1.z = _57.z;
    SV_TARGET_1.w = _57.w;
    vec4 _68 = textureGather(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(2u));
    SV_TARGET_2.x = _68.x;
    SV_TARGET_2.y = _68.y;
    SV_TARGET_2.z = _68.z;
    SV_TARGET_2.w = _68.w;
    vec4 _79 = textureGather(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(3u));
    SV_TARGET_3.x = _79.x;
    SV_TARGET_3.y = _79.y;
    SV_TARGET_3.z = _79.z;
    SV_TARGET_3.w = _79.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 57
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %39 = OpTypeImage %float Cube 1 0 0 1 Unknown
         %40 = OpTypeSampledImage %39
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %55

         %55 = OpLabel
         %25 =   OpLoad %6 %8
         %26 =   OpLoad %9 %11
         %27 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %30 =   OpLoad %float %27
         %31 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %33 =   OpLoad %float %31
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %36 =   OpLoad %float %34
         %38 =   OpLoad %float %DEPTH_REF
         %41 =   OpSampledImage %40 %25 %26
         %42 =   OpCompositeConstruct %v3float %30 %33 %36
         %43 =   OpImageDrefGather %v4float %41 %42 %38
         %44 =   OpCompositeExtract %float %43 0
         %45 =   OpCompositeExtract %float %43 1
         %46 =   OpCompositeExtract %float %43 2
         %47 =   OpCompositeExtract %float %43 3
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %50 %44
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %51 %45
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %52 %46
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %53 %47
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_gather_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform textureCube _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec3 _42 = vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z);
    vec4 _43 = textureGather(samplerCubeShadow(_8, _11), _42, DEPTH_REF);
    SV_TARGET.x = _43.x;
    SV_TARGET.y = _43.y;
    SV_TARGET.z = _43.z;
    SV_TARGET.w = _43.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
          %_ = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %16 =   OpLoad %6 %8
         %17 =   OpImageQuerySizeLod %v2uint %16 %uint_0
         %19 =   OpCompositeExtract %uint %17 0
         %20 =   OpCompositeExtract %uint %17 1
         %21 =   OpCompositeConstruct %v2uint %19 %20
         %25 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %25 %19
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %26 %20
                 OpStore %SV_TARGET_1 %uint_1
         %27 =   OpImageQueryLevels %uint %16
                 OpStore %SV_TARGET_2 %27
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_samplerless_texture_functions : require

struct _22
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform textureCube _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uvec2 _17 = uvec2(textureSize(_8, int(0u)));
    uint _19 = _17.x;
    SV_TARGET.x = _19;
    SV_TARGET.y = _17.y;
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_8));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 125
; Schema: 0
               OpCapability Shader
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %41 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %123

        %123 = OpLabel
         %28 =   OpLoad %6 %8
         %29 =   OpLoad %9 %11
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %33 =   OpLoad %float %30
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %36 =   OpLoad %float %34
         %37 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %39 =   OpLoad %float %37
         %42 =   OpSampledImage %41 %28 %29
         %45 =   OpCompositeConstruct %v3float %33 %36 %39
         %44 =   OpImageQueryLod %v2float %42 %45
         %46 =   OpCompositeExtract %float %44 0
         %47 =   OpLoad %float %LOD_BIAS
         %48 =   OpLoad %float %LOD_CLAMP
         %51 =   OpCompositeConstruct %v3float %33 %36 %39
         %50 =   OpImageSampleImplicitLod %v4float %42 %51 None
         %52 =   OpCompositeExtract %float %50 0
         %53 =   OpCompositeExtract %float %50 1
         %54 =   OpCompositeExtract %float %50 2
         %55 =   OpCompositeExtract %float %50 3
         %58 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %58 %52
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %59 %53
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %60 %54
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %61 %55
         %64 =   OpCompositeConstruct %v3float %33 %36 %39
         %63 =   OpImageSampleExplicitLod %v4float %42 %64 Lod %46
         %65 =   OpCompositeExtract %float %63 0
         %66 =   OpCompositeExtract %float %63 1
         %67 =   OpCompositeExtract %float %63 2
         %68 =   OpCompositeExtract %float %63 3
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %70 %65
         %71 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %71 %66
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %72 %67
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %73 %68
         %75 =   OpCompositeConstruct %v3float %33 %36 %39
         %74 =   OpImageSampleImplicitLod %v4float %42 %75 Bias %47
         %76 =   OpCompositeExtract %float %74 0
         %77 =   OpCompositeExtract %float %74 1
         %78 =   OpCompositeExtract %float %74 2
         %79 =   OpCompositeExtract %float %74 3
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %83 %78
         %84 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %84 %79
         %86 =   OpCompositeConstruct %v3float %33 %36 %39
         %85 =   OpImageSampleImplicitLod %v4float %42 %86 MinLod %48
         %87 =   OpCompositeExtract %float %85 0
         %88 =   OpCompositeExtract %float %85 1
         %89 =   OpCompositeExtract %float %85 2
         %90 =   OpCompositeExtract %float %85 3
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %95 %90
         %96 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
         %97 =   OpLoad %float %96
         %98 =   OpDPdx %float %97
         %99 =   OpDPdy %float %97
        %100 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %101 =   OpLoad %float %100
        %102 =   OpDPdx %float %101
        %103 =   OpDPdy %float %101
        %104 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_2
        %105 =   OpLoad %float %104
        %106 =   OpDPdx %float %105
        %107 =   OpDPdy %float %105
        %111 =   OpCompositeConstruct %v3float %33 %36 %39
        %112 =   OpCompositeConstruct %v3float %98 %102 %106
        %113 =   OpCompositeConstruct %v3float %99 %103 %107
        %110 =   OpImageSampleExplicitLod %v4float %42 %111 Grad %112 %113
        %114 =   OpCompositeExtract %float %110 0
        %115 =   OpCompositeExtract %float %110 1
        %116 =   OpCompositeExtract %float %110 2
        %117 =   OpCompositeExtract %float %110 3
        %119 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %119 %114
        %120 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %120 %115
        %121 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %121 %116
        %122 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %122 %117
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_sample.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0) uniform textureCube _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;

void main()
{
    vec4 _50 = texture(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    SV_TARGET.x = _50.x;
    SV_TARGET.y = _50.y;
    SV_TARGET.z = _50.z;
    SV_TARGET.w = _50.w;
    vec4 _63 = textureLod(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), textureQueryLod(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x);
    SV_TARGET_1.x = _63.x;
    SV_TARGET_1.y = _63.y;
    SV_TARGET_1.z = _63.z;
    SV_TARGET_1.w = _63.w;
    vec4 _74 = texture(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_BIAS);
    SV_TARGET_2.x = _74.x;
    SV_TARGET_2.y = _74.y;
    SV_TARGET_2.z = _74.z;
    SV_TARGET_2.w = _74.w;
    vec4 _85 = textureClampARB(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_CLAMP);
    SV_TARGET_3.x = _85.x;
    SV_TARGET_3.y = _85.y;
    SV_TARGET_3.z = _85.z;
    SV_TARGET_3.w = _85.w;
    vec4 _110 = textureGrad(samplerCube(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(dFdx(TEXCOORD_2.x), dFdx(TEXCOORD_2.y), dFdx(TEXCOORD_2.z)), vec3(dFdy(TEXCOORD_2.x), dFdy(TEXCOORD_2.y), dFdy(TEXCOORD_2.z)));
    SV_TARGET_4.x = _110.x;
    SV_TARGET_4.y = _110.y;
    SV_TARGET_4.z = _110.z;
    SV_TARGET_4.w = _110.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %40 = OpTypeImage %float Cube 1 0 0 1 Unknown
         %41 = OpTypeSampledImage %40
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %53

         %53 = OpLabel
         %24 =   OpLoad %6 %8
         %25 =   OpLoad %9 %11
         %26 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %29 =   OpLoad %float %26
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %32 =   OpLoad %float %30
         %33 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %35 =   OpLoad %float %33
         %37 =   OpLoad %float %DEPTH_REF
         %42 =   OpSampledImage %41 %24 %25
         %45 =   OpCompositeConstruct %v3float %29 %32 %35
         %44 =   OpImageSampleDrefImplicitLod %float %42 %45 %37 None
         %47 =   OpCompositeConstruct %v4float %44 %44 %44 %44
         %48 =   OpCompositeExtract %float %47 0
                 OpStore %SV_TARGET %48
         %50 =   OpCompositeConstruct %v3float %29 %32 %35
         %49 =   OpImageSampleDrefExplicitLod %float %42 %50 %37 Lod %float_0
         %51 =   OpCompositeConstruct %v4float %49 %49 %49 %49
         %52 =   OpCompositeExtract %float %51 0
                 OpStore %SV_TARGET_1 %52
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_cube_sample_depth.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform textureCube _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    SV_TARGET = vec4(texture(samplerCubeShadow(_8, _11), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DEPTH_REF))).x;
    SV_TARGET_1 = vec4(textureGrad(samplerCubeShadow(_8, _11), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DEPTH_REF), vec3(0.0), vec3(0.0))).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_image_gather_depth_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 136
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SparseResidency
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %51 = OpTypeImage %float 2D 1 0 0 1 Unknown
         %52 = OpTypeSampledImage %51
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %88 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %134

        %134 = OpLabel
         %30 =   OpLoad %6 %8
         %31 =   OpLoad %9 %11
         %32 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %35 =   OpLoad %float %32
         %36 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %38 =   OpLoad %float %36
         %41 =   OpLoad %float %DEPTH_REF
         %43 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %44 =   OpLoad %int %43
         %45 =   OpBitcast %uint %44
         %46 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %47 =   OpLoad %int %46
         %48 =   OpBitcast %uint %47
         %53 =   OpSampledImage %52 %30 %31
         %54 =   OpCompositeConstruct %v2float %35 %38
         %56 =   OpImageSparseDrefGather %SparseTexel %53 %54 %41
         %57 =   OpCompositeExtract %uint %56 0
         %58 =   OpCompositeExtract %v4float %56 1
         %59 =   OpCompositeExtract %float %58 0
         %60 =   OpCompositeExtract %float %58 1
         %61 =   OpCompositeExtract %float %58 2
         %62 =   OpCompositeExtract %float %58 3
         %64 =   OpCompositeConstruct %_ %59 %60 %61 %62 %57
         %65 =   OpCompositeExtract %uint %64 4
         %66 =   OpCompositeExtract %float %64 0
         %67 =   OpCompositeExtract %float %64 1
         %68 =   OpCompositeExtract %float %64 2
         %69 =   OpCompositeExtract %float %64 3
         %70 =   OpCompositeConstruct %v4float %66 %67 %68 %69
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %73 %66
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %74 %67
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %75 %68
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %77 %69
         %80 =   OpImageSparseTexelsResident %bool %65
         %81 =   OpSelect %float %80 %float_1 %float_0
                 OpStore %SV_TARGET_1 %81
         %84 =   OpCompositeConstruct %v2float %35 %38
         %87 =   OpImageSparseDrefGather %SparseTexel %53 %84 %41 ConstOffset %88
         %89 =   OpCompositeExtract %uint %87 0
         %90 =   OpCompositeExtract %v4float %87 1
         %91 =   OpCompositeExtract %float %90 0
         %92 =   OpCompositeExtract %float %90 1
         %93 =   OpCompositeExtract %float %90 2
         %94 =   OpCompositeExtract %float %90 3
         %95 =   OpCompositeConstruct %_ %91 %92 %93 %94 %89
         %96 =   OpCompositeExtract %uint %95 4
         %97 =   OpCompositeExtract %float %95 0
         %98 =   OpCompositeExtract %float %95 1
         %99 =   OpCompositeExtract %float %95 2
        %100 =   OpCompositeExtract %float %95 3
        %101 =   OpCompositeConstruct %v4float %97 %98 %99 %100
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %103 %97
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %104 %98
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %105 %99
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %106 %100
        %107 =   OpImageSparseTexelsResident %bool %96
        %108 =   OpSelect %float %107 %float_1 %float_0
                 OpStore %SV_TARGET_3 %108
        %109 =   OpCompositeConstruct %v2float %35 %38
        %110 =   OpBitcast %int %45
        %111 =   OpBitcast %int %48
        %113 =   OpCompositeConstruct %v2int %110 %111
        %112 =   OpImageSparseDrefGather %SparseTexel %53 %109 %41 Offset %113
        %114 =   OpCompositeExtract %uint %112 0
        %115 =   OpCompositeExtract %v4float %112 1
        %116 =   OpCompositeExtract %float %115 0
        %117 =   OpCompositeExtract %float %115 1
        %118 =   OpCompositeExtract %float %115 2
        %119 =   OpCompositeExtract %float %115 3
        %120 =   OpCompositeConstruct %_ %116 %117 %118 %119 %114
        %121 =   OpCompositeExtract %uint %120 4
        %122 =   OpCompositeExtract %float %120 0
        %123 =   OpCompositeExtract %float %120 1
        %124 =   OpCompositeExtract %float %120 2
        %125 =   OpCompositeExtract %float %120 3
        %126 =   OpCompositeConstruct %v4float %122 %123 %124 %125
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %128 %122
        %129 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %129 %123
        %130 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %130 %124
        %131 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %131 %125
        %132 =   OpImageSparseTexelsResident %bool %121
        %133 =   OpSelect %float %132 %float_1 %float_0
                 OpStore %SV_TARGET_5 %133
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_gather_depth_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _63
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _71
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform samplerShadow _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out float SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out float SV_TARGET_5;

void main()
{
    vec2 _54 = vec2(TEXCOORD.x, TEXCOORD.y);
    uint _136;
    vec4 _137;
    _136 = sparseTextureGatherARB(sampler2DShadow(_8, _11), _54, DEPTH_REF, _137);
    SparseTexel _56 = SparseTexel(_136, _137);
    vec4 _58 = _56._m1;
    _63 _64 = _63(_58.x, _58.y, _58.z, _58.w, _56._m0);
    float _66 = _64._m0;
    float _67 = _64._m1;
    float _68 = _64._m2;
    float _69 = _64._m3;
    SV_TARGET.x = _66;
    SV_TARGET.y = _67;
    SV_TARGET.z = _68;
    SV_TARGET.w = _69;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_64._m4)));
    vec2 _84 = vec2(TEXCOORD.x, TEXCOORD.y);
    uint _138;
    vec4 _139;
    _138 = sparseTextureGatherOffsetARB(sampler2DShadow(_8, _11), _84, DEPTH_REF, ivec2(-1, 0), _139);
    SparseTexel _87 = SparseTexel(_138, _139);
    vec4 _90 = _87._m1;
    _63 _95 = _63(_90.x, _90.y, _90.z, _90.w, _87._m0);
    float _97 = _95._m0;
    float _98 = _95._m1;
    float _99 = _95._m2;
    float _100 = _95._m3;
    SV_TARGET_2.x = _97;
    SV_TARGET_2.y = _98;
    SV_TARGET_2.z = _99;
    SV_TARGET_2.w = _100;
    SV_TARGET_3 = float(sparseTexelsResidentARB(int(_95._m4)));
    vec2 _109 = vec2(TEXCOORD.x, TEXCOORD.y);
    uint _140;
    vec4 _141;
    _140 = sparseTextureGatherOffsetARB(sampler2DShadow(_8, _11), _109, DEPTH_REF, ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))), _141);
    SparseTexel _112 = SparseTexel(_140, _141);
    vec4 _115 = _112._m1;
    _63 _120 = _63(_115.x, _115.y, _115.z, _115.w, _112._m0);
    float _122 = _120._m0;
    float _123 = _120._m1;
    float _124 = _120._m2;
    float _125 = _120._m3;
    SV_TARGET_4.x = _122;
    SV_TARGET_4.y = _123;
    SV_TARGET_4.z = _124;
    SV_TARGET_4.w = _125;
    SV_TARGET_5 = float(sparseTexelsResidentARB(int(_120._m4)));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_gather_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 134
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SparseResidency
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %50 = OpTypeSampledImage %6
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %86 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %132

        %132 = OpLabel
         %30 =   OpLoad %6 %8
         %31 =   OpLoad %9 %11
         %32 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %35 =   OpLoad %float %32
         %36 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %38 =   OpLoad %float %36
         %42 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %43 =   OpLoad %int %42
         %44 =   OpBitcast %uint %43
         %45 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %46 =   OpLoad %int %45
         %47 =   OpBitcast %uint %46
         %51 =   OpSampledImage %50 %30 %31
         %52 =   OpCompositeConstruct %v2float %35 %38
         %54 =   OpImageSparseGather %SparseTexel %51 %52 %uint_0
         %55 =   OpCompositeExtract %uint %54 0
         %56 =   OpCompositeExtract %v4float %54 1
         %57 =   OpCompositeExtract %float %56 0
         %58 =   OpCompositeExtract %float %56 1
         %59 =   OpCompositeExtract %float %56 2
         %60 =   OpCompositeExtract %float %56 3
         %62 =   OpCompositeConstruct %_ %57 %58 %59 %60 %55
         %63 =   OpCompositeExtract %uint %62 4
         %64 =   OpCompositeExtract %float %62 0
         %65 =   OpCompositeExtract %float %62 1
         %66 =   OpCompositeExtract %float %62 2
         %67 =   OpCompositeExtract %float %62 3
         %68 =   OpCompositeConstruct %v4float %64 %65 %66 %67
         %71 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %71 %64
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %72 %65
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %73 %66
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %75 %67
         %78 =   OpImageSparseTexelsResident %bool %63
         %79 =   OpSelect %float %78 %float_1 %float_0
                 OpStore %SV_TARGET_1 %79
         %82 =   OpCompositeConstruct %v2float %35 %38
         %85 =   OpImageSparseGather %SparseTexel %51 %82 %uint_0 ConstOffset %86
         %87 =   OpCompositeExtract %uint %85 0
         %88 =   OpCompositeExtract %v4float %85 1
         %89 =   OpCompositeExtract %float %88 0
         %90 =   OpCompositeExtract %float %88 1
         %91 =   OpCompositeExtract %float %88 2
         %92 =   OpCompositeExtract %float %88 3
         %93 =   OpCompositeConstruct %_ %89 %90 %91 %92 %87
         %94 =   OpCompositeExtract %uint %93 4
         %95 =   OpCompositeExtract %float %93 0
         %96 =   OpCompositeExtract %float %93 1
         %97 =   OpCompositeExtract %float %93 2
         %98 =   OpCompositeExtract %float %93 3
         %99 =   OpCompositeConstruct %v4float %95 %96 %97 %98
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %101 %95
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %102 %96
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %103 %97
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %104 %98
        %105 =   OpImageSparseTexelsResident %bool %94
        %106 =   OpSelect %float %105 %float_1 %float_0
                 OpStore %SV_TARGET_3 %106
        %107 =   OpCompositeConstruct %v2float %35 %38
        %108 =   OpBitcast %int %44
        %109 =   OpBitcast %int %47
        %111 =   OpCompositeConstruct %v2int %108 %109
        %110 =   OpImageSparseGather %SparseTexel %51 %107 %uint_0 Offset %111
        %112 =   OpCompositeExtract %uint %110 0
        %113 =   OpCompositeExtract %v4float %110 1
        %114 =   OpCompositeExtract %float %113 0
        %115 =   OpCompositeExtract %float %113 1
        %116 =   OpCompositeExtract %float %113 2
        %117 =   OpCompositeExtract %float %113 3
        %118 =   OpCompositeConstruct %_ %114 %115 %116 %117 %112
        %119 =   OpCompositeExtract %uint %118 4
        %120 =   OpCompositeExtract %float %118 0
        %121 =   OpCompositeExtract %float %118 1
        %122 =   OpCompositeExtract %float %118 2
        %123 =   OpCompositeExtract %float %118 3
        %124 =   OpCompositeConstruct %v4float %120 %121 %122 %123
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %126 %120
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %127 %121
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %128 %122
        %129 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %129 %123
        %130 =   OpImageSparseTexelsResident %bool %119
        %131 =   OpSelect %float %130 %float_1 %float_0
                 OpStore %SV_TARGET_5 %131
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_gather_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _61
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _69
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0) uniform texture2D _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out float SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out float SV_TARGET_5;

void main()
{
    uint _134;
    vec4 _135;
    _134 = sparseTextureGatherARB(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), _135);
    SparseTexel _54 = SparseTexel(_134, _135);
    vec4 _56 = _54._m1;
    _61 _62 = _61(_56.x, _56.y, _56.z, _56.w, _54._m0);
    float _64 = _62._m0;
    float _65 = _62._m1;
    float _66 = _62._m2;
    float _67 = _62._m3;
    SV_TARGET.x = _64;
    SV_TARGET.y = _65;
    SV_TARGET.z = _66;
    SV_TARGET.w = _67;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_62._m4)));
    uint _136;
    vec4 _137;
    _136 = sparseTextureGatherOffsetARB(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0), _137);
    SparseTexel _85 = SparseTexel(_136, _137);
    vec4 _88 = _85._m1;
    _61 _93 = _61(_88.x, _88.y, _88.z, _88.w, _85._m0);
    float _95 = _93._m0;
    float _96 = _93._m1;
    float _97 = _93._m2;
    float _98 = _93._m3;
    SV_TARGET_2.x = _95;
    SV_TARGET_2.y = _96;
    SV_TARGET_2.z = _97;
    SV_TARGET_2.w = _98;
    SV_TARGET_3 = float(sparseTexelsResidentARB(int(_93._m4)));
    uint _138;
    vec4 _139;
    _138 = sparseTextureGatherOffsetARB(sampler2D(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))), _139);
    SparseTexel _110 = SparseTexel(_138, _139);
    vec4 _113 = _110._m1;
    _61 _118 = _61(_113.x, _113.y, _113.z, _113.w, _110._m0);
    float _120 = _118._m0;
    float _121 = _118._m1;
    float _122 = _118._m2;
    float _123 = _118._m3;
    SV_TARGET_4.x = _120;
    SV_TARGET_4.y = _121;
    SV_TARGET_4.z = _122;
    SV_TARGET_4.w = _123;
    SV_TARGET_5 = float(sparseTexelsResidentARB(int(_118._m4)));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_load_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 89
; Schema: 0
               OpCapability Shader
               OpCapability SparseResidency
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %66 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %87

         %87 = OpLabel
         %20 =   OpLoad %6 %8
         %22 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %24 =   OpLoad %uint %22
         %25 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %27 =   OpLoad %uint %25
         %32 =   OpCompositeConstruct %v2uint %24 %27
         %31 =   OpImageSparseFetch %SparseTexel %20 %32 Lod %uint_1
         %33 =   OpCompositeExtract %uint %31 0
         %34 =   OpCompositeExtract %v4float %31 1
         %35 =   OpCompositeExtract %float %34 0
         %36 =   OpCompositeExtract %float %34 1
         %37 =   OpCompositeExtract %float %34 2
         %38 =   OpCompositeExtract %float %34 3
         %40 =   OpCompositeConstruct %_ %35 %36 %37 %38 %33
         %41 =   OpCompositeExtract %uint %40 4
         %42 =   OpCompositeExtract %float %40 0
         %43 =   OpCompositeExtract %float %40 1
         %44 =   OpCompositeExtract %float %40 2
         %45 =   OpCompositeExtract %float %40 3
         %46 =   OpCompositeConstruct %v4float %42 %43 %44 %45
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %49 %42
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %50 %43
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %51 %44
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %53 %45
         %56 =   OpImageSparseTexelsResident %bool %41
         %57 =   OpSelect %float %56 %float_1 %float_0
                 OpStore %SV_TARGET_1 %57
         %64 =   OpCompositeConstruct %v2uint %24 %27
         %63 =   OpImageSparseFetch %SparseTexel %20 %64 Lod|ConstOffset %uint_1 %66
         %67 =   OpCompositeExtract %uint %63 0
         %68 =   OpCompositeExtract %v4float %63 1
         %69 =   OpCompositeExtract %float %68 0
         %70 =   OpCompositeExtract %float %68 1
         %71 =   OpCompositeExtract %float %68 2
         %72 =   OpCompositeExtract %float %68 3
         %73 =   OpCompositeConstruct %_ %69 %70 %71 %72 %67
         %74 =   OpCompositeExtract %uint %73 4
         %75 =   OpCompositeExtract %float %73 0
         %76 =   OpCompositeExtract %float %73 1
         %77 =   OpCompositeExtract %float %73 2
         %78 =   OpCompositeExtract %float %73 3
         %79 =   OpCompositeConstruct %v4float %75 %76 %77 %78
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %81 %75
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %82 %76
         %83 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %83 %77
         %84 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %84 %78
         %85 =   OpImageSparseTexelsResident %bool %74
         %86 =   OpSelect %float %85 %float_1 %float_0
                 OpStore %SV_TARGET_3 %86
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_load_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require
#extension GL_EXT_samplerless_texture_functions : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _39
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _47
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0) uniform texture2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out float SV_TARGET_3;

void main()
{
    uint _89;
    vec4 _90;
    _89 = sparseTexelFetchARB(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(1u), _90);
    SparseTexel _31 = SparseTexel(_89, _90);
    vec4 _34 = _31._m1;
    _39 _40 = _39(_34.x, _34.y, _34.z, _34.w, _31._m0);
    float _42 = _40._m0;
    float _43 = _40._m1;
    float _44 = _40._m2;
    float _45 = _40._m3;
    SV_TARGET.x = _42;
    SV_TARGET.y = _43;
    SV_TARGET.z = _44;
    SV_TARGET.w = _45;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_40._m4)));
    uint _91;
    vec4 _92;
    _91 = sparseTexelFetchOffsetARB(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(1u), ivec2(-1, 0), _92);
    SparseTexel _63 = SparseTexel(_91, _92);
    vec4 _68 = _63._m1;
    _39 _73 = _39(_68.x, _68.y, _68.z, _68.w, _63._m0);
    float _75 = _73._m0;
    float _76 = _73._m1;
    float _77 = _73._m2;
    float _78 = _73._m3;
    SV_TARGET_2.x = _75;
    SV_TARGET_2.y = _76;
    SV_TARGET_2.z = _77;
    SV_TARGET_2.w = _78;
    SV_TARGET_3 = float(sparseTexelsResidentARB(int(_73._m4)));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_sample_depth_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 81
; Schema: 0
               OpCapability Shader
               OpCapability SparseResidency
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %42 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %float
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %79

         %79 = OpLabel
         %28 =   OpLoad %6 %8
         %29 =   OpLoad %9 %11
         %30 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %33 =   OpLoad %float %30
         %34 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %36 =   OpLoad %float %34
         %39 =   OpLoad %float %LAYER
         %43 =   OpSampledImage %42 %28 %29
         %48 =   OpCompositeConstruct %v3float %33 %36 %39
         %47 =   OpImageSparseSampleImplicitLod %SparseTexel %43 %48 None
         %49 =   OpCompositeExtract %uint %47 0
         %50 =   OpCompositeExtract %v4float %47 1
         %51 =   OpCompositeExtract %float %50 0
         %52 =   OpCompositeExtract %float %50 1
         %53 =   OpCompositeExtract %float %50 2
         %54 =   OpCompositeExtract %float %50 3
         %56 =   OpCompositeConstruct %_ %51 %52 %53 %54 %49
         %57 =   OpCompositeExtract %uint %56 4
         %58 =   OpCompositeExtract %float %56 0
                 OpStore %SV_TARGET %58
         %62 =   OpImageSparseTexelsResident %bool %57
         %63 =   OpSelect %float %62 %float_1 %float_0
                 OpStore %SV_TARGET_1 %63
         %66 =   OpCompositeConstruct %v3float %33 %36 %39
         %65 =   OpImageSparseSampleExplicitLod %SparseTexel %43 %66 Lod %float_0
         %67 =   OpCompositeExtract %uint %65 0
         %68 =   OpCompositeExtract %v4float %65 1
         %69 =   OpCompositeExtract %float %68 0
         %70 =   OpCompositeExtract %float %68 1
         %71 =   OpCompositeExtract %float %68 2
         %72 =   OpCompositeExtract %float %68 3
         %73 =   OpCompositeConstruct %_ %69 %70 %71 %72 %67
         %74 =   OpCompositeExtract %uint %73 4
         %75 =   OpCompositeExtract %float %73 0
                 OpStore %SV_TARGET_2 %75
         %77 =   OpImageSparseTexelsResident %bool %74
         %78 =   OpSelect %float %77 %float_1 %float_0
                 OpStore %SV_TARGET_3 %78
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_sample_depth_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _55
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _59
{
    uint _m0;
    float _m1;
};

layout(set = 0, binding = 0) uniform texture2DArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;
layout(location = 3) out float SV_TARGET_3;

void main()
{
    uint _81;
    vec4 _82;
    _81 = sparseTextureARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), _82);
    SparseTexel _47 = SparseTexel(_81, _82);
    vec4 _50 = _47._m1;
    _55 _56 = _55(_50.x, _50.y, _50.z, _50.w, _47._m0);
    SV_TARGET = _56._m0;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_56._m4)));
    uint _83;
    vec4 _84;
    _83 = sparseTextureLodARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), 0.0, _84);
    SparseTexel _65 = SparseTexel(_83, _84);
    vec4 _68 = _65._m1;
    _55 _73 = _55(_68.x, _68.y, _68.z, _68.w, _65._m0);
    SV_TARGET_2 = _73._m0;
    SV_TARGET_3 = float(sparseTexelsResidentARB(int(_73._m4)));
}


================================================
FILE: reference-dxbc/test_resource_srv_image_sample_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 167
; Schema: 0
               OpCapability Shader
               OpCapability SparseResidency
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %11 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %11 DescriptorSet 0
               OpDecorate %11 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
          %9 = OpTypeSampler
%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
         %45 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
        %110 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %165

        %165 = OpLabel
         %34 =   OpLoad %6 %8
         %35 =   OpLoad %9 %11
         %36 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %39 =   OpLoad %float %36
         %40 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %42 =   OpLoad %float %40
         %44 =   OpLoad %float %LAYER
         %46 =   OpSampledImage %45 %34 %35
         %48 =   OpCompositeConstruct %v2float %39 %42
         %47 =   OpImageQueryLod %v2float %46 %48
         %49 =   OpCompositeExtract %float %47 0
         %50 =   OpLoad %float %LOD_BIAS
         %51 =   OpLoad %float %LOD_CLAMP
         %55 =   OpCompositeConstruct %v3float %39 %42 %44
         %54 =   OpImageSparseSampleImplicitLod %SparseTexel %46 %55 None
         %56 =   OpCompositeExtract %uint %54 0
         %57 =   OpCompositeExtract %v4float %54 1
         %58 =   OpCompositeExtract %float %57 0
         %59 =   OpCompositeExtract %float %57 1
         %60 =   OpCompositeExtract %float %57 2
         %61 =   OpCompositeExtract %float %57 3
         %63 =   OpCompositeConstruct %_ %58 %59 %60 %61 %56
         %64 =   OpCompositeExtract %uint %63 4
         %65 =   OpCompositeExtract %float %63 0
         %66 =   OpCompositeExtract %float %63 1
         %67 =   OpCompositeExtract %float %63 2
         %68 =   OpCompositeExtract %float %63 3
         %69 =   OpCompositeConstruct %v4float %65 %66 %67 %68
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %72 %65
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %73 %66
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %74 %67
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %76 %68
         %79 =   OpImageSparseTexelsResident %bool %64
         %80 =   OpSelect %float %79 %float_1 %float_0
                 OpStore %SV_TARGET_1 %80
         %83 =   OpCompositeConstruct %v3float %39 %42 %44
         %82 =   OpImageSparseSampleExplicitLod %SparseTexel %46 %83 Lod %49
         %84 =   OpCompositeExtract %uint %82 0
         %85 =   OpCompositeExtract %v4float %82 1
         %86 =   OpCompositeExtract %float %85 0
         %87 =   OpCompositeExtract %float %85 1
         %88 =   OpCompositeExtract %float %85 2
         %89 =   OpCompositeExtract %float %85 3
         %90 =   OpCompositeConstruct %_ %86 %87 %88 %89 %84
         %91 =   OpCompositeExtract %uint %90 4
         %92 =   OpCompositeExtract %float %90 0
         %93 =   OpCompositeExtract %float %90 1
         %94 =   OpCompositeExtract %float %90 2
         %95 =   OpCompositeExtract %float %90 3
         %96 =   OpCompositeConstruct %v4float %92 %93 %94 %95
         %98 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %98 %92
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %99 %93
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %100 %94
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %101 %95
        %102 =   OpImageSparseTexelsResident %bool %91
        %103 =   OpSelect %float %102 %float_1 %float_0
                 OpStore %SV_TARGET_3 %103
        %108 =   OpCompositeConstruct %v3float %39 %42 %44
        %107 =   OpImageSparseSampleImplicitLod %SparseTexel %46 %108 Bias|ConstOffset|MinLod %50 %110 %51
        %111 =   OpCompositeExtract %uint %107 0
        %112 =   OpCompositeExtract %v4float %107 1
        %113 =   OpCompositeExtract %float %112 0
        %114 =   OpCompositeExtract %float %112 1
        %115 =   OpCompositeExtract %float %112 2
        %116 =   OpCompositeExtract %float %112 3
        %117 =   OpCompositeConstruct %_ %113 %114 %115 %116 %111
        %118 =   OpCompositeExtract %uint %117 4
        %119 =   OpCompositeExtract %float %117 0
        %120 =   OpCompositeExtract %float %117 1
        %121 =   OpCompositeExtract %float %117 2
        %122 =   OpCompositeExtract %float %117 3
        %123 =   OpCompositeConstruct %v4float %119 %120 %121 %122
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %125 %119
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %126 %120
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %127 %121
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %128 %122
        %129 =   OpImageSparseTexelsResident %bool %118
        %130 =   OpSelect %float %129 %float_1 %float_0
                 OpStore %SV_TARGET_5 %130
        %131 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %132 =   OpLoad %float %131
        %133 =   OpDPdx %float %132
        %134 =   OpDPdy %float %132
        %135 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %136 =   OpLoad %float %135
        %137 =   OpDPdx %float %136
        %138 =   OpDPdy %float %136
        %142 =   OpCompositeConstruct %v3float %39 %42 %44
        %143 =   OpCompositeConstruct %v2float %133 %137
        %144 =   OpCompositeConstruct %v2float %134 %138
        %141 =   OpImageSparseSampleExplicitLod %SparseTexel %46 %142 Grad %143 %144
        %145 =   OpCompositeExtract %uint %141 0
        %146 =   OpCompositeExtract %v4float %141 1
        %147 =   OpCompositeExtract %float %146 0
        %148 =   OpCompositeExtract %float %146 1
        %149 =   OpCompositeExtract %float %146 2
        %150 =   OpCompositeExtract %float %146 3
        %151 =   OpCompositeConstruct %_ %147 %148 %149 %150 %145
        %152 =   OpCompositeExtract %uint %151 4
        %153 =   OpCompositeExtract %float %151 0
        %154 =   OpCompositeExtract %float %151 1
        %155 =   OpCompositeExtract %float %151 2
        %156 =   OpCompositeExtract %float %151 3
        %157 =   OpCompositeConstruct %v4float %153 %154 %155 %156
        %159 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %159 %153
        %160 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %160 %154
        %161 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %161 %155
        %162 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %162 %156
        %163 =   OpImageSparseTexelsResident %bool %152
        %164 =   OpSelect %float %163 %float_1 %float_0
                 OpStore %SV_TARGET_7 %164
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_image_sample_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require
#extension GL_ARB_sparse_texture_clamp : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _62
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _70
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0) uniform texture2DArray _8;
layout(set = 0, binding = 0) uniform sampler _11;

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out float SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out float SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out float SV_TARGET_7;

void main()
{
    uint _167;
    vec4 _168;
    _167 = sparseTextureARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), _168);
    SparseTexel _54 = SparseTexel(_167, _168);
    vec4 _57 = _54._m1;
    _62 _63 = _62(_57.x, _57.y, _57.z, _57.w, _54._m0);
    float _65 = _63._m0;
    float _66 = _63._m1;
    float _67 = _63._m2;
    float _68 = _63._m3;
    SV_TARGET.x = _65;
    SV_TARGET.y = _66;
    SV_TARGET.z = _67;
    SV_TARGET.w = _68;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_63._m4)));
    uint _169;
    vec4 _170;
    _169 = sparseTextureLodARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), textureQueryLod(sampler2DArray(_8, _11), vec2(TEXCOORD.x, TEXCOORD.y)).x, _170);
    SparseTexel _82 = SparseTexel(_169, _170);
    vec4 _85 = _82._m1;
    _62 _90 = _62(_85.x, _85.y, _85.z, _85.w, _82._m0);
    float _92 = _90._m0;
    float _93 = _90._m1;
    float _94 = _90._m2;
    float _95 = _90._m3;
    SV_TARGET_2.x = _92;
    SV_TARGET_2.y = _93;
    SV_TARGET_2.z = _94;
    SV_TARGET_2.w = _95;
    SV_TARGET_3 = float(sparseTexelsResidentARB(int(_90._m4)));
    uint _171;
    vec4 _172;
    _171 = sparseTextureOffsetClampARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0), LOD_CLAMP, _172, LOD_BIAS);
    SparseTexel _107 = SparseTexel(_171, _172);
    vec4 _112 = _107._m1;
    _62 _117 = _62(_112.x, _112.y, _112.z, _112.w, _107._m0);
    float _119 = _117._m0;
    float _120 = _117._m1;
    float _121 = _117._m2;
    float _122 = _117._m3;
    SV_TARGET_4.x = _119;
    SV_TARGET_4.y = _120;
    SV_TARGET_4.z = _121;
    SV_TARGET_4.w = _122;
    SV_TARGET_5 = float(sparseTexelsResidentARB(int(_117._m4)));
    uint _173;
    vec4 _174;
    _173 = sparseTextureGradARB(sampler2DArray(_8, _11), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), vec2(dFdx(TEXCOORD_2.x), dFdx(TEXCOORD_2.y)), vec2(dFdy(TEXCOORD_2.x), dFdy(TEXCOORD_2.y)), _174);
    SparseTexel _141 = SparseTexel(_173, _174);
    vec4 _146 = _141._m1;
    _62 _151 = _62(_146.x, _146.y, _146.z, _146.w, _141._m0);
    float _153 = _151._m0;
    float _154 = _151._m1;
    float _155 = _151._m2;
    float _156 = _151._m3;
    SV_TARGET_6.x = _153;
    SV_TARGET_6.y = _154;
    SV_TARGET_6.z = _155;
    SV_TARGET_6.w = _156;
    SV_TARGET_7 = float(sparseTexelsResidentARB(int(_151._m4)));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 65
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability Sampled1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %27 NonUniform
               OpDecorate %30 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %63

         %63 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %30 =   OpLoad %6 %29
         %32 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %33 =   OpLoad %uint %32
         %38 =   OpCompositeConstruct %v2uint %33 %uint_2
         %36 =   OpImageFetch %v4float %30 %38 Lod %uint_1
         %39 =   OpCompositeExtract %float %36 0
         %40 =   OpCompositeExtract %float %36 1
         %41 =   OpCompositeExtract %float %36 2
         %42 =   OpCompositeExtract %float %36 3
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %45 %39
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %46 %40
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %47 %41
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %48 %42
         %53 =   OpCompositeConstruct %v2uint %33 %uint_2
         %52 =   OpImageFetch %v4float %30 %53 Lod|ConstOffset %uint_1 %int_n1
         %54 =   OpCompositeExtract %float %52 0
         %55 =   OpCompositeExtract %float %52 1
         %56 =   OpCompositeExtract %float %52 2
         %57 =   OpCompositeExtract %float %52 3
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %59 %54
         %60 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %60 %55
         %61 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %61 %56
         %62 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %62 %57
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture1DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _27 = floatBitsToUint(_15._m0[0u]);
    vec4 _36 = texelFetch(_9[nonuniformEXT(_27)], ivec2(uvec2(TEXCOORD.x, 2u)), int(1u));
    SV_TARGET.x = _36.x;
    SV_TARGET.y = _36.y;
    SV_TARGET.z = _36.z;
    SV_TARGET.w = _36.w;
    vec4 _52 = texelFetchOffset(_9[nonuniformEXT(_27)], ivec2(uvec2(TEXCOORD.x, 2u)), int(1u), -1);
    SV_TARGET_1.x = _52.x;
    SV_TARGET_1.y = _52.y;
    SV_TARGET_1.z = _52.z;
    SV_TARGET_1.w = _52.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %24 NonUniform
               OpDecorate %27 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %v2uint = OpTypeVector %uint 2
        %__0 = OpTypeStruct %uint %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %35

         %35 = OpLabel
         %22 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %23 =   OpLoad %float %22
         %24 =   OpBitcast %uint %23
         %26 =   OpAccessChain %_ptr_UniformConstant_6 %9 %24
         %27 =   OpLoad %6 %26
         %29 =   OpImageQuerySizeLod %v2uint %27 %uint_0
         %30 =   OpCompositeExtract %uint %29 0
         %31 =   OpCompositeExtract %uint %29 1
                 OpStore %SV_TARGET %30
                 OpStore %SV_TARGET_1 %31
         %34 =   OpImageQueryLevels %uint %27
                 OpStore %SV_TARGET_2 %34
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _32
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture1DArray _9[];

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _24 = floatBitsToUint(_15._m0[0u]);
    uvec2 _29 = uvec2(textureSize(_9[nonuniformEXT(_24)], int(0u)));
    SV_TARGET = _29.x;
    SV_TARGET_1 = _29.y;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_24)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_array_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 160
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %49 NonUniform
               OpDecorate %54 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
         %53 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %158

        %158 = OpLabel
         %41 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %42 =   OpLoad %float %41
         %43 =   OpBitcast %uint %42
         %45 =   OpAccessChain %_ptr_UniformConstant_6 %9 %43
         %46 =   OpLoad %6 %45
         %48 =   OpAccessChain %_ptr_UniformConstant_16 %19 %43
         %49 =   OpLoad %16 %48
         %50 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %51 =   OpLoad %float %50
         %52 =   OpLoad %float %LAYER
         %54 =   OpSampledImage %53 %46 %49
         %56 =   OpImageQueryLod %v2float %54 %51
         %57 =   OpCompositeExtract %float %56 0
         %58 =   OpLoad %float %LOD_BIAS
         %59 =   OpLoad %float %LOD_CLAMP
         %62 =   OpCompositeConstruct %v2float %51 %52
         %61 =   OpImageSampleImplicitLod %v4float %54 %62 None
         %63 =   OpCompositeExtract %float %61 0
         %64 =   OpCompositeExtract %float %61 1
         %65 =   OpCompositeExtract %float %61 2
         %66 =   OpCompositeExtract %float %61 3
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %69 %63
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %70 %64
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %72 %65
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %74 %66
         %79 =   OpCompositeConstruct %v2float %51 %52
         %78 =   OpImageSampleImplicitLod %v4float %54 %79 ConstOffset %int_n1
         %80 =   OpCompositeExtract %float %78 0
         %81 =   OpCompositeExtract %float %78 1
         %82 =   OpCompositeExtract %float %78 2
         %83 =   OpCompositeExtract %float %78 3
         %85 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %85 %80
         %86 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %88 %83
         %90 =   OpCompositeConstruct %v2float %51 %52
         %89 =   OpImageSampleExplicitLod %v4float %54 %90 Lod %57
         %91 =   OpCompositeExtract %float %89 0
         %92 =   OpCompositeExtract %float %89 1
         %93 =   OpCompositeExtract %float %89 2
         %94 =   OpCompositeExtract %float %89 3
         %96 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %96 %91
         %97 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %97 %92
         %98 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %98 %93
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %99 %94
        %101 =   OpCompositeConstruct %v2float %51 %52
        %100 =   OpImageSampleImplicitLod %v4float %54 %101 Bias %58
        %102 =   OpCompositeExtract %float %100 0
        %103 =   OpCompositeExtract %float %100 1
        %104 =   OpCompositeExtract %float %100 2
        %105 =   OpCompositeExtract %float %100 3
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %107 %102
        %108 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %108 %103
        %109 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %109 %104
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %110 %105
        %112 =   OpCompositeConstruct %v2float %51 %52
        %111 =   OpImageSampleImplicitLod %v4float %54 %112 MinLod %59
        %113 =   OpCompositeExtract %float %111 0
        %114 =   OpCompositeExtract %float %111 1
        %115 =   OpCompositeExtract %float %111 2
        %116 =   OpCompositeExtract %float %111 3
        %118 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %118 %113
        %119 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %119 %114
        %120 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %120 %115
        %121 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %121 %116
        %123 =   OpCompositeConstruct %v2float %51 %52
        %122 =   OpImageSampleImplicitLod %v4float %54 %123 Bias|ConstOffset|MinLod %58 %int_n1 %59
        %124 =   OpCompositeExtract %float %122 0
        %125 =   OpCompositeExtract %float %122 1
        %126 =   OpCompositeExtract %float %122 2
        %127 =   OpCompositeExtract %float %122 3
        %129 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %129 %124
        %130 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %130 %125
        %131 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %131 %126
        %132 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %132 %127
        %133 =   OpLoad %float %TEXCOORD_2
        %134 =   OpDPdx %float %133
        %135 =   OpDPdy %float %133
        %137 =   OpCompositeConstruct %v2float %51 %52
        %136 =   OpImageSampleExplicitLod %v4float %54 %137 Grad %134 %135
        %138 =   OpCompositeExtract %float %136 0
        %139 =   OpCompositeExtract %float %136 1
        %140 =   OpCompositeExtract %float %136 2
        %141 =   OpCompositeExtract %float %136 3
        %143 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %143 %138
        %144 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %144 %139
        %145 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %145 %140
        %146 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %146 %141
        %148 =   OpCompositeConstruct %v2float %51 %52
        %147 =   OpImageSampleExplicitLod %v4float %54 %148 Grad|ConstOffset %134 %135 %int_n1
        %149 =   OpCompositeExtract %float %147 0
        %150 =   OpCompositeExtract %float %147 1
        %151 =   OpCompositeExtract %float %147 2
        %152 =   OpCompositeExtract %float %147 3
        %154 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %154 %149
        %155 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %155 %150
        %156 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %156 %151
        %157 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %157 %152
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_array_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture1DArray _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in float TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    uint _43 = floatBitsToUint(_15._m0[0u]);
    vec4 _61 = texture(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER));
    SV_TARGET.x = _61.x;
    SV_TARGET.y = _61.y;
    SV_TARGET.z = _61.z;
    SV_TARGET.w = _61.w;
    vec4 _78 = textureOffset(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), -1);
    SV_TARGET_1.x = _78.x;
    SV_TARGET_1.y = _78.y;
    SV_TARGET_1.z = _78.z;
    SV_TARGET_1.w = _78.w;
    vec4 _89 = textureLod(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), textureQueryLod(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), TEXCOORD.x).x);
    SV_TARGET_2.x = _89.x;
    SV_TARGET_2.y = _89.y;
    SV_TARGET_2.z = _89.z;
    SV_TARGET_2.w = _89.w;
    vec4 _100 = texture(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), LOD_BIAS);
    SV_TARGET_3.x = _100.x;
    SV_TARGET_3.y = _100.y;
    SV_TARGET_3.z = _100.z;
    SV_TARGET_3.w = _100.w;
    vec4 _111 = textureClampARB(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), LOD_CLAMP);
    SV_TARGET_4.x = _111.x;
    SV_TARGET_4.y = _111.y;
    SV_TARGET_4.z = _111.z;
    SV_TARGET_4.w = _111.w;
    vec4 _122 = textureOffsetClampARB(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), -1, LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _122.x;
    SV_TARGET_5.y = _122.y;
    SV_TARGET_5.z = _122.z;
    SV_TARGET_5.w = _122.w;
    float _134 = dFdx(TEXCOORD_2);
    float _135 = dFdy(TEXCOORD_2);
    vec4 _136 = textureGrad(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), _134, _135);
    SV_TARGET_6.x = _136.x;
    SV_TARGET_6.y = _136.y;
    SV_TARGET_6.z = _136.z;
    SV_TARGET_6.w = _136.w;
    vec4 _147 = textureGradOffset(nonuniformEXT(sampler1DArray(_9[_43], _19[_43])), vec2(TEXCOORD.x, LAYER), _134, _135, -1);
    SV_TARGET_7.x = _147.x;
    SV_TARGET_7.y = _147.y;
    SV_TARGET_7.z = _147.z;
    SV_TARGET_7.w = _147.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 62
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability Sampled1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %27 NonUniform
               OpDecorate %30 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %60

         %60 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %30 =   OpLoad %6 %29
         %32 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %33 =   OpLoad %uint %32
         %35 =   OpImageFetch %v4float %30 %33 Lod %uint_1
         %36 =   OpCompositeExtract %float %35 0
         %37 =   OpCompositeExtract %float %35 1
         %38 =   OpCompositeExtract %float %35 2
         %39 =   OpCompositeExtract %float %35 3
         %42 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %42 %36
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %43 %37
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %44 %38
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %46 %39
         %50 =   OpImageFetch %v4float %30 %33 Lod|ConstOffset %uint_1 %int_n1
         %51 =   OpCompositeExtract %float %50 0
         %52 =   OpCompositeExtract %float %50 1
         %53 =   OpCompositeExtract %float %50 2
         %54 =   OpCompositeExtract %float %50 3
         %56 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %57 %52
         %58 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %58 %53
         %59 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %59 %54
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture1D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _27 = floatBitsToUint(_15._m0[0u]);
    vec4 _35 = texelFetch(_9[nonuniformEXT(_27)], int(TEXCOORD.x), int(1u));
    SV_TARGET.x = _35.x;
    SV_TARGET.y = _35.y;
    SV_TARGET.z = _35.z;
    SV_TARGET.w = _35.w;
    vec4 _50 = texelFetchOffset(_9[nonuniformEXT(_27)], int(TEXCOORD.x), int(1u), -1);
    SV_TARGET_1.x = _50.x;
    SV_TARGET_1.y = _50.y;
    SV_TARGET_1.z = _50.z;
    SV_TARGET_1.w = _50.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %24 NonUniform
               OpDecorate %27 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %33

         %33 = OpLabel
         %22 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %23 =   OpLoad %float %22
         %24 =   OpBitcast %uint %23
         %26 =   OpAccessChain %_ptr_UniformConstant_6 %9 %24
         %27 =   OpLoad %6 %26
         %28 =   OpImageQuerySizeLod %uint %27 %uint_0
                 OpStore %SV_TARGET %28
                 OpStore %SV_TARGET_1 %uint_1
         %32 =   OpImageQueryLevels %uint %27
                 OpStore %SV_TARGET_2 %32
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _29
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture1D _9[];

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _24 = floatBitsToUint(_15._m0[0u]);
    SV_TARGET = uint(textureSize(_9[nonuniformEXT(_24)], int(0u)));
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_24)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 151
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability Sampled1D
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %49 NonUniform
               OpDecorate %53 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
         %52 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %149

        %149 = OpLabel
         %41 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %42 =   OpLoad %float %41
         %43 =   OpBitcast %uint %42
         %45 =   OpAccessChain %_ptr_UniformConstant_6 %9 %43
         %46 =   OpLoad %6 %45
         %48 =   OpAccessChain %_ptr_UniformConstant_16 %19 %43
         %49 =   OpLoad %16 %48
         %50 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %51 =   OpLoad %float %50
         %53 =   OpSampledImage %52 %46 %49
         %55 =   OpImageQueryLod %v2float %53 %51
         %56 =   OpCompositeExtract %float %55 0
         %57 =   OpLoad %float %LOD_BIAS
         %58 =   OpLoad %float %LOD_CLAMP
         %60 =   OpImageSampleImplicitLod %v4float %53 %51 None
         %61 =   OpCompositeExtract %float %60 0
         %62 =   OpCompositeExtract %float %60 1
         %63 =   OpCompositeExtract %float %60 2
         %64 =   OpCompositeExtract %float %60 3
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %67 %61
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %68 %62
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %70 %63
         %72 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %72 %64
         %76 =   OpImageSampleImplicitLod %v4float %53 %51 ConstOffset %int_n1
         %77 =   OpCompositeExtract %float %76 0
         %78 =   OpCompositeExtract %float %76 1
         %79 =   OpCompositeExtract %float %76 2
         %80 =   OpCompositeExtract %float %76 3
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %83 %78
         %84 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %84 %79
         %85 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %85 %80
         %86 =   OpImageSampleExplicitLod %v4float %53 %51 Lod %56
         %87 =   OpCompositeExtract %float %86 0
         %88 =   OpCompositeExtract %float %86 1
         %89 =   OpCompositeExtract %float %86 2
         %90 =   OpCompositeExtract %float %86 3
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %95 %90
         %96 =   OpImageSampleImplicitLod %v4float %53 %51 Bias %57
         %97 =   OpCompositeExtract %float %96 0
         %98 =   OpCompositeExtract %float %96 1
         %99 =   OpCompositeExtract %float %96 2
        %100 =   OpCompositeExtract %float %96 3
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %102 %97
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %105 %100
        %106 =   OpImageSampleImplicitLod %v4float %53 %51 MinLod %58
        %107 =   OpCompositeExtract %float %106 0
        %108 =   OpCompositeExtract %float %106 1
        %109 =   OpCompositeExtract %float %106 2
        %110 =   OpCompositeExtract %float %106 3
        %112 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %112 %107
        %113 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %113 %108
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %114 %109
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %115 %110
        %116 =   OpImageSampleImplicitLod %v4float %53 %51 Bias|ConstOffset|MinLod %57 %int_n1 %58
        %117 =   OpCompositeExtract %float %116 0
        %118 =   OpCompositeExtract %float %116 1
        %119 =   OpCompositeExtract %float %116 2
        %120 =   OpCompositeExtract %float %116 3
        %122 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %122 %117
        %123 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %123 %118
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %125 %120
        %126 =   OpLoad %float %TEXCOORD_2
        %127 =   OpDPdx %float %126
        %128 =   OpDPdy %float %126
        %129 =   OpImageSampleExplicitLod %v4float %53 %51 Grad %127 %128
        %130 =   OpCompositeExtract %float %129 0
        %131 =   OpCompositeExtract %float %129 1
        %132 =   OpCompositeExtract %float %129 2
        %133 =   OpCompositeExtract %float %129 3
        %135 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %135 %130
        %136 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %136 %131
        %137 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %137 %132
        %138 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %138 %133
        %139 =   OpImageSampleExplicitLod %v4float %53 %51 Grad|ConstOffset %127 %128 %int_n1
        %140 =   OpCompositeExtract %float %139 0
        %141 =   OpCompositeExtract %float %139 1
        %142 =   OpCompositeExtract %float %139 2
        %143 =   OpCompositeExtract %float %139 3
        %145 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %145 %140
        %146 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %146 %141
        %147 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %147 %142
        %148 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %148 %143
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_1d_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture1D _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in float TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    uint _43 = floatBitsToUint(_15._m0[0u]);
    vec4 _60 = texture(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x);
    SV_TARGET.x = _60.x;
    SV_TARGET.y = _60.y;
    SV_TARGET.z = _60.z;
    SV_TARGET.w = _60.w;
    vec4 _76 = textureOffset(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, -1);
    SV_TARGET_1.x = _76.x;
    SV_TARGET_1.y = _76.y;
    SV_TARGET_1.z = _76.z;
    SV_TARGET_1.w = _76.w;
    vec4 _86 = textureLod(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, textureQueryLod(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x).x);
    SV_TARGET_2.x = _86.x;
    SV_TARGET_2.y = _86.y;
    SV_TARGET_2.z = _86.z;
    SV_TARGET_2.w = _86.w;
    vec4 _96 = texture(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, LOD_BIAS);
    SV_TARGET_3.x = _96.x;
    SV_TARGET_3.y = _96.y;
    SV_TARGET_3.z = _96.z;
    SV_TARGET_3.w = _96.w;
    vec4 _106 = textureClampARB(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, LOD_CLAMP);
    SV_TARGET_4.x = _106.x;
    SV_TARGET_4.y = _106.y;
    SV_TARGET_4.z = _106.z;
    SV_TARGET_4.w = _106.w;
    vec4 _116 = textureOffsetClampARB(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, -1, LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _116.x;
    SV_TARGET_5.y = _116.y;
    SV_TARGET_5.z = _116.z;
    SV_TARGET_5.w = _116.w;
    float _127 = dFdx(TEXCOORD_2);
    float _128 = dFdy(TEXCOORD_2);
    vec4 _129 = textureGrad(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, _127, _128);
    SV_TARGET_6.x = _129.x;
    SV_TARGET_6.y = _129.y;
    SV_TARGET_6.z = _129.z;
    SV_TARGET_6.w = _129.w;
    vec4 _139 = textureGradOffset(nonuniformEXT(sampler1D(_9[_43], _19[_43])), TEXCOORD.x, _127, _128, -1);
    SV_TARGET_7.x = _139.x;
    SV_TARGET_7.y = _139.y;
    SV_TARGET_7.z = _139.z;
    SV_TARGET_7.w = _139.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 145
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %42 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %48 NonUniform
               OpDecorate %67 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %66 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %86 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %143

        %143 = OpLabel
         %40 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %41 =   OpLoad %float %40
         %42 =   OpBitcast %uint %41
         %44 =   OpAccessChain %_ptr_UniformConstant_6 %9 %42
         %45 =   OpLoad %6 %44
         %47 =   OpAccessChain %_ptr_UniformConstant_16 %19 %42
         %48 =   OpLoad %16 %47
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %50 =   OpLoad %float %49
         %51 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %53 =   OpLoad %float %51
         %56 =   OpLoad %float %LAYER
         %58 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %59 =   OpLoad %int %58
         %60 =   OpBitcast %uint %59
         %61 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %62 =   OpLoad %int %61
         %63 =   OpBitcast %uint %62
         %67 =   OpSampledImage %66 %45 %48
         %68 =   OpCompositeConstruct %v3float %50 %53 %56
         %69 =   OpImageGather %v4float %67 %68 %uint_0
         %70 =   OpCompositeExtract %float %69 0
         %71 =   OpCompositeExtract %float %69 1
         %72 =   OpCompositeExtract %float %69 2
         %73 =   OpCompositeExtract %float %69 3
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %76 %70
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %77 %71
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %78 %72
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %80 %73
         %82 =   OpCompositeConstruct %v3float %50 %53 %56
         %85 =   OpImageGather %v4float %67 %82 %uint_0 ConstOffset %86
         %87 =   OpCompositeExtract %float %85 0
         %88 =   OpCompositeExtract %float %85 1
         %89 =   OpCompositeExtract %float %85 2
         %90 =   OpCompositeExtract %float %85 3
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %95 %90
         %96 =   OpCompositeConstruct %v3float %50 %53 %56
         %97 =   OpImageGather %v4float %67 %96 %uint_1
         %98 =   OpCompositeExtract %float %97 0
         %99 =   OpCompositeExtract %float %97 1
        %100 =   OpCompositeExtract %float %97 2
        %101 =   OpCompositeExtract %float %97 3
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %106 %101
        %107 =   OpCompositeConstruct %v3float %50 %53 %56
        %108 =   OpImageGather %v4float %67 %107 %uint_2
        %109 =   OpCompositeExtract %float %108 0
        %110 =   OpCompositeExtract %float %108 1
        %111 =   OpCompositeExtract %float %108 2
        %112 =   OpCompositeExtract %float %108 3
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %114 %109
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %115 %110
        %116 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %116 %111
        %117 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %117 %112
        %118 =   OpCompositeConstruct %v3float %50 %53 %56
        %119 =   OpImageGather %v4float %67 %118 %uint_3
        %120 =   OpCompositeExtract %float %119 0
        %121 =   OpCompositeExtract %float %119 1
        %122 =   OpCompositeExtract %float %119 2
        %123 =   OpCompositeExtract %float %119 3
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %126 %121
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %127 %122
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %128 %123
        %129 =   OpCompositeConstruct %v3float %50 %53 %56
        %130 =   OpBitcast %int %60
        %131 =   OpBitcast %int %63
        %133 =   OpCompositeConstruct %v2int %130 %131
        %132 =   OpImageGather %v4float %67 %129 %uint_0 Offset %133
        %134 =   OpCompositeExtract %float %132 0
        %135 =   OpCompositeExtract %float %132 1
        %136 =   OpCompositeExtract %float %132 2
        %137 =   OpCompositeExtract %float %132 3
        %139 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %139 %134
        %140 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %140 %135
        %141 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %141 %136
        %142 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %142 %137
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_gather.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DArray _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;

void main()
{
    uint _42 = floatBitsToUint(_15._m0[0u]);
    vec4 _69 = textureGather(nonuniformEXT(sampler2DArray(_9[_42], _19[_42])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER));
    SV_TARGET.x = _69.x;
    SV_TARGET.y = _69.y;
    SV_TARGET.z = _69.z;
    SV_TARGET.w = _69.w;
    vec4 _85 = textureGatherOffset(nonuniformEXT(sampler2DArray(_9[_42], _19[_42])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0));
    SV_TARGET_1.x = _85.x;
    SV_TARGET_1.y = _85.y;
    SV_TARGET_1.z = _85.z;
    SV_TARGET_1.w = _85.w;
    vec4 _97 = textureGather(nonuniformEXT(sampler2DArray(_9[_42], _19[_42])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), int(1u));
    SV_TARGET_2.x = _97.x;
    SV_TARGET_2.y = _97.y;
    SV_TARGET_2.z = _97.z;
    SV_TARGET_2.w = _97.w;
    vec4 _108 = textureGather(nonuniformEXT(sampler2DArray(_9[_42], _19[_42])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), int(2u));
    SV_TARGET_3.x = _108.x;
    SV_TARGET_3.y = _108.y;
    SV_TARGET_3.z = _108.z;
    SV_TARGET_3.w = _108.w;
    vec4 _119 = textureGather(nonuniformEXT(sampler2DArray(_9[_42], _19[_42])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), int(3u));
    SV_TARGET_4.x = _119.x;
    SV_TARGET_4.y = _119.y;
    SV_TARGET_4.z = _119.z;
    SV_TARGET_4.w = _119.w;
    vec4 _132 = textureGatherOffset(nonuniformEXT(sampler2DArray(_9[_42], _19[_42])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_5.x = _132.x;
    SV_TARGET_5.y = _132.y;
    SV_TARGET_5.z = _132.z;
    SV_TARGET_5.w = _132.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 111
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %39 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %66 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %64 = OpTypeImage %float 2D 1 1 0 1 Unknown
         %65 = OpTypeSampledImage %64
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %85 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %109

        %109 = OpLabel
         %37 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %38 =   OpLoad %float %37
         %39 =   OpBitcast %uint %38
         %41 =   OpAccessChain %_ptr_UniformConstant_6 %9 %39
         %42 =   OpLoad %6 %41
         %44 =   OpAccessChain %_ptr_UniformConstant_16 %19 %39
         %45 =   OpLoad %16 %44
         %46 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %47 =   OpLoad %float %46
         %48 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %50 =   OpLoad %float %48
         %53 =   OpLoad %float %DEPTH_REF
         %54 =   OpLoad %float %LAYER
         %56 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %57 =   OpLoad %int %56
         %58 =   OpBitcast %uint %57
         %59 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %60 =   OpLoad %int %59
         %61 =   OpBitcast %uint %60
         %66 =   OpSampledImage %65 %42 %45
         %67 =   OpCompositeConstruct %v3float %47 %50 %54
         %68 =   OpImageDrefGather %v4float %66 %67 %53
         %69 =   OpCompositeExtract %float %68 0
         %70 =   OpCompositeExtract %float %68 1
         %71 =   OpCompositeExtract %float %68 2
         %72 =   OpCompositeExtract %float %68 3
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %76 %70
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %77 %71
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %79 %72
         %81 =   OpCompositeConstruct %v3float %47 %50 %54
         %84 =   OpImageDrefGather %v4float %66 %81 %53 ConstOffset %85
         %86 =   OpCompositeExtract %float %84 0
         %87 =   OpCompositeExtract %float %84 1
         %88 =   OpCompositeExtract %float %84 2
         %89 =   OpCompositeExtract %float %84 3
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %94 %89
         %95 =   OpCompositeConstruct %v3float %47 %50 %54
         %96 =   OpBitcast %int %58
         %97 =   OpBitcast %int %61
         %99 =   OpCompositeConstruct %v2int %96 %97
         %98 =   OpImageDrefGather %v4float %66 %95 %53 Offset %99
        %100 =   OpCompositeExtract %float %98 0
        %101 =   OpCompositeExtract %float %98 1
        %102 =   OpCompositeExtract %float %98 2
        %103 =   OpCompositeExtract %float %98 3
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %106 %101
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %107 %102
        %108 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %108 %103
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_gather_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DArray _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;

void main()
{
    uint _39 = floatBitsToUint(_15._m0[0u]);
    vec3 _67 = vec3(TEXCOORD.x, TEXCOORD.y, LAYER);
    vec4 _68 = textureGather(nonuniformEXT(sampler2DArrayShadow(_9[_39], _19[_39])), _67, DEPTH_REF);
    SV_TARGET.x = _68.x;
    SV_TARGET.y = _68.y;
    SV_TARGET.z = _68.z;
    SV_TARGET.w = _68.w;
    vec3 _81 = vec3(TEXCOORD.x, TEXCOORD.y, LAYER);
    vec4 _84 = textureGatherOffset(nonuniformEXT(sampler2DArrayShadow(_9[_39], _19[_39])), _81, DEPTH_REF, ivec2(-1, 0));
    SV_TARGET_1.x = _84.x;
    SV_TARGET_1.y = _84.y;
    SV_TARGET_1.z = _84.z;
    SV_TARGET_1.w = _84.w;
    vec3 _95 = vec3(TEXCOORD.x, TEXCOORD.y, LAYER);
    vec4 _98 = textureGatherOffset(nonuniformEXT(sampler2DArrayShadow(_9[_39], _19[_39])), _95, DEPTH_REF, ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_2.x = _98.x;
    SV_TARGET_2.y = _98.y;
    SV_TARGET_2.z = _98.z;
    SV_TARGET_2.w = _98.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %27 NonUniform
               OpDecorate %30 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %59 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %69

         %69 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %30 =   OpLoad %6 %29
         %32 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %33 =   OpLoad %uint %32
         %34 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %36 =   OpLoad %uint %34
         %41 =   OpCompositeConstruct %v3uint %33 %36 %uint_2
         %40 =   OpImageFetch %v4float %30 %41 Lod %uint_1
         %42 =   OpCompositeExtract %float %40 0
         %43 =   OpCompositeExtract %float %40 1
         %44 =   OpCompositeExtract %float %40 2
         %45 =   OpCompositeExtract %float %40 3
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %48 %42
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %49 %43
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %50 %44
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %51 %45
         %57 =   OpCompositeConstruct %v3uint %33 %36 %uint_2
         %56 =   OpImageFetch %v4float %30 %57 Lod|ConstOffset %uint_1 %59
         %60 =   OpCompositeExtract %float %56 0
         %61 =   OpCompositeExtract %float %56 1
         %62 =   OpCompositeExtract %float %56 2
         %63 =   OpCompositeExtract %float %56 3
         %65 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %65 %60
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %66 %61
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %67 %62
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %68 %63
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _27 = floatBitsToUint(_15._m0[0u]);
    vec4 _40 = texelFetch(_9[nonuniformEXT(_27)], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), int(1u));
    SV_TARGET.x = _40.x;
    SV_TARGET.y = _40.y;
    SV_TARGET.z = _40.z;
    SV_TARGET.w = _40.w;
    vec4 _56 = texelFetchOffset(_9[nonuniformEXT(_27)], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), int(1u), ivec2(-1, 0));
    SV_TARGET_1.x = _56.x;
    SV_TARGET_1.y = _56.y;
    SV_TARGET_1.z = _56.z;
    SV_TARGET_1.w = _56.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %v3uint = OpTypeVector %uint 3
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %42

         %42 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpImageQuerySizeLod %v3uint %29 %uint_0
         %32 =   OpCompositeExtract %uint %31 0
         %33 =   OpCompositeExtract %uint %31 1
         %34 =   OpCompositeConstruct %v2uint %32 %33
         %35 =   OpCompositeExtract %uint %31 2
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %39 %33
                 OpStore %SV_TARGET_1 %35
         %41 =   OpImageQueryLevels %uint %29
                 OpStore %SV_TARGET_2 %41
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _36
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DArray _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec3 _31 = uvec3(textureSize(_9[nonuniformEXT(_26)], int(0u)));
    uint _32 = _31.x;
    SV_TARGET.x = _32;
    SV_TARGET.y = _31.y;
    SV_TARGET_1 = _31.z;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 179
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
               OpDecorate %45 NonUniform
               OpDecorate %48 NonUniform
               OpDecorate %51 NonUniform
               OpDecorate %60 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
         %59 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %87 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %177

        %177 = OpLabel
         %43 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %44 =   OpLoad %float %43
         %45 =   OpBitcast %uint %44
         %47 =   OpAccessChain %_ptr_UniformConstant_6 %9 %45
         %48 =   OpLoad %6 %47
         %50 =   OpAccessChain %_ptr_UniformConstant_16 %19 %45
         %51 =   OpLoad %16 %50
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %53 =   OpLoad %float %52
         %54 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %56 =   OpLoad %float %54
         %58 =   OpLoad %float %LAYER
         %60 =   OpSampledImage %59 %48 %51
         %62 =   OpCompositeConstruct %v2float %53 %56
         %61 =   OpImageQueryLod %v2float %60 %62
         %63 =   OpCompositeExtract %float %61 0
         %64 =   OpLoad %float %LOD_BIAS
         %65 =   OpLoad %float %LOD_CLAMP
         %68 =   OpCompositeConstruct %v3float %53 %56 %58
         %67 =   OpImageSampleImplicitLod %v4float %60 %68 None
         %69 =   OpCompositeExtract %float %67 0
         %70 =   OpCompositeExtract %float %67 1
         %71 =   OpCompositeExtract %float %67 2
         %72 =   OpCompositeExtract %float %67 3
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %76 %70
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %77 %71
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %79 %72
         %85 =   OpCompositeConstruct %v3float %53 %56 %58
         %84 =   OpImageSampleImplicitLod %v4float %60 %85 ConstOffset %87
         %88 =   OpCompositeExtract %float %84 0
         %89 =   OpCompositeExtract %float %84 1
         %90 =   OpCompositeExtract %float %84 2
         %91 =   OpCompositeExtract %float %84 3
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %95 %90
         %96 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %96 %91
         %98 =   OpCompositeConstruct %v3float %53 %56 %58
         %97 =   OpImageSampleExplicitLod %v4float %60 %98 Lod %63
         %99 =   OpCompositeExtract %float %97 0
        %100 =   OpCompositeExtract %float %97 1
        %101 =   OpCompositeExtract %float %97 2
        %102 =   OpCompositeExtract %float %97 3
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %106 %101
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %107 %102
        %109 =   OpCompositeConstruct %v3float %53 %56 %58
        %108 =   OpImageSampleImplicitLod %v4float %60 %109 Bias %64
        %110 =   OpCompositeExtract %float %108 0
        %111 =   OpCompositeExtract %float %108 1
        %112 =   OpCompositeExtract %float %108 2
        %113 =   OpCompositeExtract %float %108 3
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %115 %110
        %116 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %116 %111
        %117 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %117 %112
        %118 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %118 %113
        %120 =   OpCompositeConstruct %v3float %53 %56 %58
        %119 =   OpImageSampleImplicitLod %v4float %60 %120 MinLod %65
        %121 =   OpCompositeExtract %float %119 0
        %122 =   OpCompositeExtract %float %119 1
        %123 =   OpCompositeExtract %float %119 2
        %124 =   OpCompositeExtract %float %119 3
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %126 %121
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %127 %122
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %128 %123
        %129 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %129 %124
        %131 =   OpCompositeConstruct %v3float %53 %56 %58
        %130 =   OpImageSampleImplicitLod %v4float %60 %131 Bias|ConstOffset|MinLod %64 %87 %65
        %132 =   OpCompositeExtract %float %130 0
        %133 =   OpCompositeExtract %float %130 1
        %134 =   OpCompositeExtract %float %130 2
        %135 =   OpCompositeExtract %float %130 3
        %137 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %137 %132
        %138 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %138 %133
        %139 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %139 %134
        %140 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %140 %135
        %141 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %142 =   OpLoad %float %141
        %143 =   OpDPdx %float %142
        %144 =   OpDPdy %float %142
        %145 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %146 =   OpLoad %float %145
        %147 =   OpDPdx %float %146
        %148 =   OpDPdy %float %146
        %152 =   OpCompositeConstruct %v3float %53 %56 %58
        %153 =   OpCompositeConstruct %v2float %143 %147
        %154 =   OpCompositeConstruct %v2float %144 %148
        %151 =   OpImageSampleExplicitLod %v4float %60 %152 Grad %153 %154
        %155 =   OpCompositeExtract %float %151 0
        %156 =   OpCompositeExtract %float %151 1
        %157 =   OpCompositeExtract %float %151 2
        %158 =   OpCompositeExtract %float %151 3
        %160 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %160 %155
        %161 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %161 %156
        %162 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %162 %157
        %163 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %163 %158
        %165 =   OpCompositeConstruct %v3float %53 %56 %58
        %166 =   OpCompositeConstruct %v2float %143 %147
        %167 =   OpCompositeConstruct %v2float %144 %148
        %164 =   OpImageSampleExplicitLod %v4float %60 %165 Grad|ConstOffset %166 %167 %87
        %168 =   OpCompositeExtract %float %164 0
        %169 =   OpCompositeExtract %float %164 1
        %170 =   OpCompositeExtract %float %164 2
        %171 =   OpCompositeExtract %float %164 3
        %173 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %173 %168
        %174 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %174 %169
        %175 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %175 %170
        %176 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %176 %171
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DArray _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    uint _45 = floatBitsToUint(_15._m0[0u]);
    vec4 _67 = texture(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER));
    SV_TARGET.x = _67.x;
    SV_TARGET.y = _67.y;
    SV_TARGET.z = _67.z;
    SV_TARGET.w = _67.w;
    vec4 _84 = textureOffset(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0));
    SV_TARGET_1.x = _84.x;
    SV_TARGET_1.y = _84.y;
    SV_TARGET_1.z = _84.z;
    SV_TARGET_1.w = _84.w;
    vec4 _97 = textureLod(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), textureQueryLod(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y)).x);
    SV_TARGET_2.x = _97.x;
    SV_TARGET_2.y = _97.y;
    SV_TARGET_2.z = _97.z;
    SV_TARGET_2.w = _97.w;
    vec4 _108 = texture(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), LOD_BIAS);
    SV_TARGET_3.x = _108.x;
    SV_TARGET_3.y = _108.y;
    SV_TARGET_3.z = _108.z;
    SV_TARGET_3.w = _108.w;
    vec4 _119 = textureClampARB(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), LOD_CLAMP);
    SV_TARGET_4.x = _119.x;
    SV_TARGET_4.y = _119.y;
    SV_TARGET_4.z = _119.z;
    SV_TARGET_4.w = _119.w;
    vec4 _130 = textureOffsetClampARB(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), ivec2(-1, 0), LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _130.x;
    SV_TARGET_5.y = _130.y;
    SV_TARGET_5.z = _130.z;
    SV_TARGET_5.w = _130.w;
    float _143 = dFdx(TEXCOORD_2.x);
    float _144 = dFdy(TEXCOORD_2.x);
    float _147 = dFdx(TEXCOORD_2.y);
    float _148 = dFdy(TEXCOORD_2.y);
    vec4 _151 = textureGrad(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), vec2(_143, _147), vec2(_144, _148));
    SV_TARGET_6.x = _151.x;
    SV_TARGET_6.y = _151.y;
    SV_TARGET_6.z = _151.z;
    SV_TARGET_6.w = _151.w;
    vec4 _164 = textureGradOffset(nonuniformEXT(sampler2DArray(_9[_45], _19[_45])), vec3(TEXCOORD.x, TEXCOORD.y, LAYER), vec2(_143, _147), vec2(_144, _148), ivec2(-1, 0));
    SV_TARGET_7.x = _164.x;
    SV_TARGET_7.y = _164.y;
    SV_TARGET_7.z = _164.z;
    SV_TARGET_7.w = _164.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 80
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %39 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %58 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
         %56 = OpTypeImage %float 2D 1 1 0 1 Unknown
         %57 = OpTypeSampledImage %56
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %71 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %78

         %78 = OpLabel
         %37 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %38 =   OpLoad %float %37
         %39 =   OpBitcast %uint %38
         %41 =   OpAccessChain %_ptr_UniformConstant_6 %9 %39
         %42 =   OpLoad %6 %41
         %44 =   OpAccessChain %_ptr_UniformConstant_16 %19 %39
         %45 =   OpLoad %16 %44
         %46 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %47 =   OpLoad %float %46
         %48 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %50 =   OpLoad %float %48
         %52 =   OpLoad %float %DEPTH_REF
         %53 =   OpLoad %float %LAYER
         %58 =   OpSampledImage %57 %42 %45
         %61 =   OpCompositeConstruct %v3float %47 %50 %53
         %60 =   OpImageSampleDrefImplicitLod %float %58 %61 %52 None
         %63 =   OpCompositeConstruct %v4float %60 %60 %60 %60
         %64 =   OpCompositeExtract %float %63 0
                 OpStore %SV_TARGET %64
         %69 =   OpCompositeConstruct %v3float %47 %50 %53
         %68 =   OpImageSampleDrefImplicitLod %float %58 %69 %52 ConstOffset %71
         %72 =   OpCompositeConstruct %v4float %68 %68 %68 %68
         %73 =   OpCompositeExtract %float %72 0
                 OpStore %SV_TARGET_1 %73
         %75 =   OpCompositeConstruct %v3float %47 %50 %53
         %74 =   OpImageSampleDrefExplicitLod %float %58 %75 %52 Lod %float_0
         %76 =   OpCompositeConstruct %v4float %74 %74 %74 %74
         %77 =   OpCompositeExtract %float %76 0
                 OpStore %SV_TARGET_2 %77
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_array_sample_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DArray _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    uint _39 = floatBitsToUint(_15._m0[0u]);
    SV_TARGET = vec4(texture(nonuniformEXT(sampler2DArrayShadow(_9[_39], _19[_39])), vec4(vec3(TEXCOORD.x, TEXCOORD.y, LAYER), DEPTH_REF))).x;
    SV_TARGET_1 = vec4(textureOffset(nonuniformEXT(sampler2DArrayShadow(_9[_39], _19[_39])), vec4(vec3(TEXCOORD.x, TEXCOORD.y, LAYER), DEPTH_REF), ivec2(-1, 0))).x;
    SV_TARGET_2 = vec4(textureGrad(nonuniformEXT(sampler2DArrayShadow(_9[_39], _19[_39])), vec4(vec3(TEXCOORD.x, TEXCOORD.y, LAYER), DEPTH_REF), vec2(0.0), vec2(0.0))).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 144
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %42 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %48 NonUniform
               OpDecorate %66 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %65 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %85 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %142

        %142 = OpLabel
         %40 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %41 =   OpLoad %float %40
         %42 =   OpBitcast %uint %41
         %44 =   OpAccessChain %_ptr_UniformConstant_6 %9 %42
         %45 =   OpLoad %6 %44
         %47 =   OpAccessChain %_ptr_UniformConstant_16 %19 %42
         %48 =   OpLoad %16 %47
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %50 =   OpLoad %float %49
         %51 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %53 =   OpLoad %float %51
         %57 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %58 =   OpLoad %int %57
         %59 =   OpBitcast %uint %58
         %60 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %61 =   OpLoad %int %60
         %62 =   OpBitcast %uint %61
         %66 =   OpSampledImage %65 %45 %48
         %67 =   OpCompositeConstruct %v2float %50 %53
         %68 =   OpImageGather %v4float %66 %67 %uint_0
         %69 =   OpCompositeExtract %float %68 0
         %70 =   OpCompositeExtract %float %68 1
         %71 =   OpCompositeExtract %float %68 2
         %72 =   OpCompositeExtract %float %68 3
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %76 %70
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %77 %71
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %79 %72
         %81 =   OpCompositeConstruct %v2float %50 %53
         %84 =   OpImageGather %v4float %66 %81 %uint_0 ConstOffset %85
         %86 =   OpCompositeExtract %float %84 0
         %87 =   OpCompositeExtract %float %84 1
         %88 =   OpCompositeExtract %float %84 2
         %89 =   OpCompositeExtract %float %84 3
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %94 %89
         %95 =   OpCompositeConstruct %v2float %50 %53
         %96 =   OpImageGather %v4float %66 %95 %uint_1
         %97 =   OpCompositeExtract %float %96 0
         %98 =   OpCompositeExtract %float %96 1
         %99 =   OpCompositeExtract %float %96 2
        %100 =   OpCompositeExtract %float %96 3
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %102 %97
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %105 %100
        %106 =   OpCompositeConstruct %v2float %50 %53
        %107 =   OpImageGather %v4float %66 %106 %uint_2
        %108 =   OpCompositeExtract %float %107 0
        %109 =   OpCompositeExtract %float %107 1
        %110 =   OpCompositeExtract %float %107 2
        %111 =   OpCompositeExtract %float %107 3
        %113 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %113 %108
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %114 %109
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %115 %110
        %116 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %116 %111
        %117 =   OpCompositeConstruct %v2float %50 %53
        %118 =   OpImageGather %v4float %66 %117 %uint_3
        %119 =   OpCompositeExtract %float %118 0
        %120 =   OpCompositeExtract %float %118 1
        %121 =   OpCompositeExtract %float %118 2
        %122 =   OpCompositeExtract %float %118 3
        %124 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %124 %119
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %126 %121
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %127 %122
        %128 =   OpCompositeConstruct %v2float %50 %53
        %129 =   OpBitcast %int %59
        %130 =   OpBitcast %int %62
        %132 =   OpCompositeConstruct %v2int %129 %130
        %131 =   OpImageGather %v4float %66 %128 %uint_0 Offset %132
        %133 =   OpCompositeExtract %float %131 0
        %134 =   OpCompositeExtract %float %131 1
        %135 =   OpCompositeExtract %float %131 2
        %136 =   OpCompositeExtract %float %131 3
        %138 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %138 %133
        %139 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %139 %134
        %140 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %140 %135
        %141 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %141 %136
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_gather.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2D _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;

void main()
{
    uint _42 = floatBitsToUint(_15._m0[0u]);
    vec4 _68 = textureGather(nonuniformEXT(sampler2D(_9[_42], _19[_42])), vec2(TEXCOORD.x, TEXCOORD.y));
    SV_TARGET.x = _68.x;
    SV_TARGET.y = _68.y;
    SV_TARGET.z = _68.z;
    SV_TARGET.w = _68.w;
    vec4 _84 = textureGatherOffset(nonuniformEXT(sampler2D(_9[_42], _19[_42])), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0));
    SV_TARGET_1.x = _84.x;
    SV_TARGET_1.y = _84.y;
    SV_TARGET_1.z = _84.z;
    SV_TARGET_1.w = _84.w;
    vec4 _96 = textureGather(nonuniformEXT(sampler2D(_9[_42], _19[_42])), vec2(TEXCOORD.x, TEXCOORD.y), int(1u));
    SV_TARGET_2.x = _96.x;
    SV_TARGET_2.y = _96.y;
    SV_TARGET_2.z = _96.z;
    SV_TARGET_2.w = _96.w;
    vec4 _107 = textureGather(nonuniformEXT(sampler2D(_9[_42], _19[_42])), vec2(TEXCOORD.x, TEXCOORD.y), int(2u));
    SV_TARGET_3.x = _107.x;
    SV_TARGET_3.y = _107.y;
    SV_TARGET_3.z = _107.z;
    SV_TARGET_3.w = _107.w;
    vec4 _118 = textureGather(nonuniformEXT(sampler2D(_9[_42], _19[_42])), vec2(TEXCOORD.x, TEXCOORD.y), int(3u));
    SV_TARGET_4.x = _118.x;
    SV_TARGET_4.y = _118.y;
    SV_TARGET_4.z = _118.z;
    SV_TARGET_4.w = _118.w;
    vec4 _131 = textureGatherOffset(nonuniformEXT(sampler2D(_9[_42], _19[_42])), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_5.x = _131.x;
    SV_TARGET_5.y = _131.y;
    SV_TARGET_5.z = _131.z;
    SV_TARGET_5.w = _131.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 110
; Schema: 0
               OpCapability Shader
               OpCapability ImageGatherExtended
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %39 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %65 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v2int = OpTypeVector %int 2
%_ptr_Input_v2int = OpTypePointer Input %v2int
     %OFFSET = OpVariable %_ptr_Input_v2int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
    %v2float = OpTypeVector %float 2
%_ptr_Input_int = OpTypePointer Input %int
     %v2uint = OpTypeVector %uint 2
         %63 = OpTypeImage %float 2D 1 0 0 1 Unknown
         %64 = OpTypeSampledImage %63
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
         %84 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %108

        %108 = OpLabel
         %37 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %38 =   OpLoad %float %37
         %39 =   OpBitcast %uint %38
         %41 =   OpAccessChain %_ptr_UniformConstant_6 %9 %39
         %42 =   OpLoad %6 %41
         %44 =   OpAccessChain %_ptr_UniformConstant_16 %19 %39
         %45 =   OpLoad %16 %44
         %46 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %47 =   OpLoad %float %46
         %48 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %50 =   OpLoad %float %48
         %53 =   OpLoad %float %DEPTH_REF
         %55 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_0
         %56 =   OpLoad %int %55
         %57 =   OpBitcast %uint %56
         %58 =   OpAccessChain %_ptr_Input_int %OFFSET %uint_1
         %59 =   OpLoad %int %58
         %60 =   OpBitcast %uint %59
         %65 =   OpSampledImage %64 %42 %45
         %66 =   OpCompositeConstruct %v2float %47 %50
         %67 =   OpImageDrefGather %v4float %65 %66 %53
         %68 =   OpCompositeExtract %float %67 0
         %69 =   OpCompositeExtract %float %67 1
         %70 =   OpCompositeExtract %float %67 2
         %71 =   OpCompositeExtract %float %67 3
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %74 %68
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %76 %70
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %78 %71
         %80 =   OpCompositeConstruct %v2float %47 %50
         %83 =   OpImageDrefGather %v4float %65 %80 %53 ConstOffset %84
         %85 =   OpCompositeExtract %float %83 0
         %86 =   OpCompositeExtract %float %83 1
         %87 =   OpCompositeExtract %float %83 2
         %88 =   OpCompositeExtract %float %83 3
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %93 %88
         %94 =   OpCompositeConstruct %v2float %47 %50
         %95 =   OpBitcast %int %57
         %96 =   OpBitcast %int %60
         %98 =   OpCompositeConstruct %v2int %95 %96
         %97 =   OpImageDrefGather %v4float %65 %94 %53 Offset %98
         %99 =   OpCompositeExtract %float %97 0
        %100 =   OpCompositeExtract %float %97 1
        %101 =   OpCompositeExtract %float %97 2
        %102 =   OpCompositeExtract %float %97 3
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %106 %101
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %107 %102
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_gather_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2D _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec2 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;

void main()
{
    uint _39 = floatBitsToUint(_15._m0[0u]);
    vec2 _66 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _67 = textureGather(nonuniformEXT(sampler2DShadow(_9[_39], _19[_39])), _66, DEPTH_REF);
    SV_TARGET.x = _67.x;
    SV_TARGET.y = _67.y;
    SV_TARGET.z = _67.z;
    SV_TARGET.w = _67.w;
    vec2 _80 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _83 = textureGatherOffset(nonuniformEXT(sampler2DShadow(_9[_39], _19[_39])), _80, DEPTH_REF, ivec2(-1, 0));
    SV_TARGET_1.x = _83.x;
    SV_TARGET_1.y = _83.y;
    SV_TARGET_1.z = _83.z;
    SV_TARGET_1.w = _83.w;
    vec2 _94 = vec2(TEXCOORD.x, TEXCOORD.y);
    vec4 _97 = textureGatherOffset(nonuniformEXT(sampler2DShadow(_9[_39], _19[_39])), _94, DEPTH_REF, ivec2(int(uint(OFFSET.x)), int(uint(OFFSET.y))));
    SV_TARGET_2.x = _97.x;
    SV_TARGET_2.y = _97.y;
    SV_TARGET_2.z = _97.z;
    SV_TARGET_2.w = _97.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %27 NonUniform
               OpDecorate %30 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %59 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %69

         %69 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %30 =   OpLoad %6 %29
         %32 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %33 =   OpLoad %uint %32
         %34 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %36 =   OpLoad %uint %34
         %40 =   OpCompositeConstruct %v2uint %33 %36
         %39 =   OpImageFetch %v4float %30 %40 Lod %uint_1
         %41 =   OpCompositeExtract %float %39 0
         %42 =   OpCompositeExtract %float %39 1
         %43 =   OpCompositeExtract %float %39 2
         %44 =   OpCompositeExtract %float %39 3
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %47 %41
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %48 %42
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %49 %43
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %51 %44
         %57 =   OpCompositeConstruct %v2uint %33 %36
         %56 =   OpImageFetch %v4float %30 %57 Lod|ConstOffset %uint_1 %59
         %60 =   OpCompositeExtract %float %56 0
         %61 =   OpCompositeExtract %float %56 1
         %62 =   OpCompositeExtract %float %56 2
         %63 =   OpCompositeExtract %float %56 3
         %65 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %65 %60
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %66 %61
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %67 %62
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %68 %63
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _27 = floatBitsToUint(_15._m0[0u]);
    vec4 _39 = texelFetch(_9[nonuniformEXT(_27)], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(1u));
    SV_TARGET.x = _39.x;
    SV_TARGET.y = _39.y;
    SV_TARGET.z = _39.z;
    SV_TARGET.w = _39.w;
    vec4 _56 = texelFetchOffset(_9[nonuniformEXT(_27)], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(1u), ivec2(-1, 0));
    SV_TARGET_1.x = _56.x;
    SV_TARGET_1.y = _56.y;
    SV_TARGET_1.z = _56.z;
    SV_TARGET_1.w = _56.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SampleRateShading
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_SAMPLEINDEX %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_SAMPLEINDEX "SV_SAMPLEINDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_SAMPLEINDEX BuiltIn SampleId
               OpDecorate %SV_SAMPLEINDEX Flat
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %29 NonUniform
               OpDecorate %32 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 1 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_SAMPLEINDEX = OpVariable %_ptr_Input_uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %61 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %71

         %71 = OpLabel
         %27 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %28 =   OpLoad %float %27
         %29 =   OpBitcast %uint %28
         %31 =   OpAccessChain %_ptr_UniformConstant_6 %9 %29
         %32 =   OpLoad %6 %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %34 =   OpLoad %uint %33
         %35 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %37 =   OpLoad %uint %35
         %40 =   OpLoad %uint %SV_SAMPLEINDEX
         %43 =   OpCompositeConstruct %v3uint %34 %37 %uint_2
         %42 =   OpImageFetch %v4float %32 %43 Sample %40
         %44 =   OpCompositeExtract %float %42 0
         %45 =   OpCompositeExtract %float %42 1
         %46 =   OpCompositeExtract %float %42 2
         %47 =   OpCompositeExtract %float %42 3
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %50 %44
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %51 %45
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %52 %46
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %53 %47
         %59 =   OpCompositeConstruct %v3uint %34 %37 %uint_2
         %58 =   OpImageFetch %v4float %32 %59 ConstOffset|Sample %61 %40
         %62 =   OpCompositeExtract %float %58 0
         %63 =   OpCompositeExtract %float %58 1
         %64 =   OpCompositeExtract %float %58 2
         %65 =   OpCompositeExtract %float %58 3
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %67 %62
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %68 %63
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %69 %64
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %70 %65
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DMSArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _29 = floatBitsToUint(_15._m0[0u]);
    vec4 _42 = texelFetch(_9[nonuniformEXT(_29)], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), int(uint(gl_SampleID)));
    SV_TARGET.x = _42.x;
    SV_TARGET.y = _42.y;
    SV_TARGET.z = _42.z;
    SV_TARGET.w = _42.w;
    vec4 _58 = texelFetchOffset(_9[nonuniformEXT(_29)], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), ivec2(-1, 0), int(uint(gl_SampleID)));
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    SV_TARGET_1.z = _58.z;
    SV_TARGET_1.w = _58.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 1 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %v3uint = OpTypeVector %uint 3
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %42

         %42 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpImageQuerySize %v3uint %29
         %32 =   OpCompositeExtract %uint %31 0
         %33 =   OpCompositeExtract %uint %31 1
         %34 =   OpCompositeConstruct %v2uint %32 %33
         %35 =   OpCompositeExtract %uint %31 2
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %39 %33
                 OpStore %SV_TARGET_1 %35
         %41 =   OpImageQuerySamples %uint %29
                 OpStore %SV_TARGET_3 %41
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _36
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DMSArray _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec3 _31 = uvec3(textureSize(_9[nonuniformEXT(_26)]));
    uint _32 = _31.x;
    SV_TARGET.x = _32;
    SV_TARGET.y = _31.y;
    SV_TARGET_1 = _31.z;
    SV_TARGET_3 = uint(textureSamples(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SampleRateShading
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_SAMPLEINDEX %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_SAMPLEINDEX "SV_SAMPLEINDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_SAMPLEINDEX BuiltIn SampleId
               OpDecorate %SV_SAMPLEINDEX Flat
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %29 NonUniform
               OpDecorate %32 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 1 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
%SV_SAMPLEINDEX = OpVariable %_ptr_Input_uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %61 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %71

         %71 = OpLabel
         %27 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %28 =   OpLoad %float %27
         %29 =   OpBitcast %uint %28
         %31 =   OpAccessChain %_ptr_UniformConstant_6 %9 %29
         %32 =   OpLoad %6 %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %34 =   OpLoad %uint %33
         %35 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %37 =   OpLoad %uint %35
         %40 =   OpLoad %uint %SV_SAMPLEINDEX
         %42 =   OpCompositeConstruct %v2uint %34 %37
         %41 =   OpImageFetch %v4float %32 %42 Sample %40
         %43 =   OpCompositeExtract %float %41 0
         %44 =   OpCompositeExtract %float %41 1
         %45 =   OpCompositeExtract %float %41 2
         %46 =   OpCompositeExtract %float %41 3
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %49 %43
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %50 %44
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %51 %45
         %53 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %53 %46
         %59 =   OpCompositeConstruct %v2uint %34 %37
         %58 =   OpImageFetch %v4float %32 %59 ConstOffset|Sample %61 %40
         %62 =   OpCompositeExtract %float %58 0
         %63 =   OpCompositeExtract %float %58 1
         %64 =   OpCompositeExtract %float %58 2
         %65 =   OpCompositeExtract %float %58 3
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %67 %62
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %68 %63
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %69 %64
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %70 %65
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DMS _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _29 = floatBitsToUint(_15._m0[0u]);
    vec4 _41 = texelFetch(_9[nonuniformEXT(_29)], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(uint(gl_SampleID)));
    SV_TARGET.x = _41.x;
    SV_TARGET.y = _41.y;
    SV_TARGET.z = _41.z;
    SV_TARGET.w = _41.w;
    vec4 _58 = texelFetchOffset(_9[nonuniformEXT(_29)], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), ivec2(-1, 0), int(uint(gl_SampleID)));
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    SV_TARGET_1.z = _58.z;
    SV_TARGET_1.w = _58.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 1 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %30 =   OpImageQuerySize %v2uint %29
         %31 =   OpCompositeExtract %uint %30 0
         %32 =   OpCompositeExtract %uint %30 1
         %33 =   OpCompositeConstruct %v2uint %31 %32
         %37 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %38 %32
                 OpStore %SV_TARGET_1 %uint_1
         %39 =   OpImageQuerySamples %uint %29
                 OpStore %SV_TARGET_3 %39
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_ms_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _34
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2DMS _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec2 _30 = uvec2(textureSize(_9[nonuniformEXT(_26)]));
    uint _31 = _30.x;
    SV_TARGET.x = _31;
    SV_TARGET.y = _30.y;
    SV_TARGET_1 = 1u;
    SV_TARGET_3 = uint(textureSamples(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %30 =   OpImageQuerySizeLod %v2uint %29 %uint_0
         %31 =   OpCompositeExtract %uint %30 0
         %32 =   OpCompositeExtract %uint %30 1
         %33 =   OpCompositeConstruct %v2uint %31 %32
         %37 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %38 %32
                 OpStore %SV_TARGET_1 %uint_1
         %39 =   OpImageQueryLevels %uint %29
                 OpStore %SV_TARGET_2 %39
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _34
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2D _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec2 _30 = uvec2(textureSize(_9[nonuniformEXT(_26)], int(0u)));
    uint _31 = _30.x;
    SV_TARGET.x = _31;
    SV_TARGET.y = _30.y;
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 178
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
               OpDecorate %45 NonUniform
               OpDecorate %48 NonUniform
               OpDecorate %51 NonUniform
               OpDecorate %59 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
         %58 = OpTypeSampledImage %6
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %86 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %176

        %176 = OpLabel
         %43 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %44 =   OpLoad %float %43
         %45 =   OpBitcast %uint %44
         %47 =   OpAccessChain %_ptr_UniformConstant_6 %9 %45
         %48 =   OpLoad %6 %47
         %50 =   OpAccessChain %_ptr_UniformConstant_16 %19 %45
         %51 =   OpLoad %16 %50
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %53 =   OpLoad %float %52
         %54 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %56 =   OpLoad %float %54
         %59 =   OpSampledImage %58 %48 %51
         %61 =   OpCompositeConstruct %v2float %53 %56
         %60 =   OpImageQueryLod %v2float %59 %61
         %62 =   OpCompositeExtract %float %60 0
         %63 =   OpLoad %float %LOD_BIAS
         %64 =   OpLoad %float %LOD_CLAMP
         %67 =   OpCompositeConstruct %v2float %53 %56
         %66 =   OpImageSampleImplicitLod %v4float %59 %67 None
         %68 =   OpCompositeExtract %float %66 0
         %69 =   OpCompositeExtract %float %66 1
         %70 =   OpCompositeExtract %float %66 2
         %71 =   OpCompositeExtract %float %66 3
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %74 %68
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %76 %70
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %78 %71
         %84 =   OpCompositeConstruct %v2float %53 %56
         %83 =   OpImageSampleImplicitLod %v4float %59 %84 ConstOffset %86
         %87 =   OpCompositeExtract %float %83 0
         %88 =   OpCompositeExtract %float %83 1
         %89 =   OpCompositeExtract %float %83 2
         %90 =   OpCompositeExtract %float %83 3
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %93 %88
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %95 %90
         %97 =   OpCompositeConstruct %v2float %53 %56
         %96 =   OpImageSampleExplicitLod %v4float %59 %97 Lod %62
         %98 =   OpCompositeExtract %float %96 0
         %99 =   OpCompositeExtract %float %96 1
        %100 =   OpCompositeExtract %float %96 2
        %101 =   OpCompositeExtract %float %96 3
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %104 %99
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %106 %101
        %108 =   OpCompositeConstruct %v2float %53 %56
        %107 =   OpImageSampleImplicitLod %v4float %59 %108 Bias %63
        %109 =   OpCompositeExtract %float %107 0
        %110 =   OpCompositeExtract %float %107 1
        %111 =   OpCompositeExtract %float %107 2
        %112 =   OpCompositeExtract %float %107 3
        %114 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %114 %109
        %115 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %115 %110
        %116 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %116 %111
        %117 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %117 %112
        %119 =   OpCompositeConstruct %v2float %53 %56
        %118 =   OpImageSampleImplicitLod %v4float %59 %119 MinLod %64
        %120 =   OpCompositeExtract %float %118 0
        %121 =   OpCompositeExtract %float %118 1
        %122 =   OpCompositeExtract %float %118 2
        %123 =   OpCompositeExtract %float %118 3
        %125 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %125 %120
        %126 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %126 %121
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %127 %122
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %128 %123
        %130 =   OpCompositeConstruct %v2float %53 %56
        %129 =   OpImageSampleImplicitLod %v4float %59 %130 Bias|ConstOffset|MinLod %63 %86 %64
        %131 =   OpCompositeExtract %float %129 0
        %132 =   OpCompositeExtract %float %129 1
        %133 =   OpCompositeExtract %float %129 2
        %134 =   OpCompositeExtract %float %129 3
        %136 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %136 %131
        %137 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %137 %132
        %138 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %138 %133
        %139 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %139 %134
        %140 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %141 =   OpLoad %float %140
        %142 =   OpDPdx %float %141
        %143 =   OpDPdy %float %141
        %144 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %145 =   OpLoad %float %144
        %146 =   OpDPdx %float %145
        %147 =   OpDPdy %float %145
        %151 =   OpCompositeConstruct %v2float %53 %56
        %152 =   OpCompositeConstruct %v2float %142 %146
        %153 =   OpCompositeConstruct %v2float %143 %147
        %150 =   OpImageSampleExplicitLod %v4float %59 %151 Grad %152 %153
        %154 =   OpCompositeExtract %float %150 0
        %155 =   OpCompositeExtract %float %150 1
        %156 =   OpCompositeExtract %float %150 2
        %157 =   OpCompositeExtract %float %150 3
        %159 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %159 %154
        %160 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %160 %155
        %161 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %161 %156
        %162 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %162 %157
        %164 =   OpCompositeConstruct %v2float %53 %56
        %165 =   OpCompositeConstruct %v2float %142 %146
        %166 =   OpCompositeConstruct %v2float %143 %147
        %163 =   OpImageSampleExplicitLod %v4float %59 %164 Grad|ConstOffset %165 %166 %86
        %167 =   OpCompositeExtract %float %163 0
        %168 =   OpCompositeExtract %float %163 1
        %169 =   OpCompositeExtract %float %163 2
        %170 =   OpCompositeExtract %float %163 3
        %172 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %172 %167
        %173 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %173 %168
        %174 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %174 %169
        %175 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %175 %170
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2D _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    uint _45 = floatBitsToUint(_15._m0[0u]);
    vec4 _66 = texture(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y));
    SV_TARGET.x = _66.x;
    SV_TARGET.y = _66.y;
    SV_TARGET.z = _66.z;
    SV_TARGET.w = _66.w;
    vec4 _83 = textureOffset(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0));
    SV_TARGET_1.x = _83.x;
    SV_TARGET_1.y = _83.y;
    SV_TARGET_1.z = _83.z;
    SV_TARGET_1.w = _83.w;
    vec4 _96 = textureLod(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), textureQueryLod(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y)).x);
    SV_TARGET_2.x = _96.x;
    SV_TARGET_2.y = _96.y;
    SV_TARGET_2.z = _96.z;
    SV_TARGET_2.w = _96.w;
    vec4 _107 = texture(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), LOD_BIAS);
    SV_TARGET_3.x = _107.x;
    SV_TARGET_3.y = _107.y;
    SV_TARGET_3.z = _107.z;
    SV_TARGET_3.w = _107.w;
    vec4 _118 = textureClampARB(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), LOD_CLAMP);
    SV_TARGET_4.x = _118.x;
    SV_TARGET_4.y = _118.y;
    SV_TARGET_4.z = _118.z;
    SV_TARGET_4.w = _118.w;
    vec4 _129 = textureOffsetClampARB(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), ivec2(-1, 0), LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _129.x;
    SV_TARGET_5.y = _129.y;
    SV_TARGET_5.z = _129.z;
    SV_TARGET_5.w = _129.w;
    float _142 = dFdx(TEXCOORD_2.x);
    float _143 = dFdy(TEXCOORD_2.x);
    float _146 = dFdx(TEXCOORD_2.y);
    float _147 = dFdy(TEXCOORD_2.y);
    vec4 _150 = textureGrad(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), vec2(_142, _146), vec2(_143, _147));
    SV_TARGET_6.x = _150.x;
    SV_TARGET_6.y = _150.y;
    SV_TARGET_6.z = _150.z;
    SV_TARGET_6.w = _150.w;
    vec4 _163 = textureGradOffset(nonuniformEXT(sampler2D(_9[_45], _19[_45])), vec2(TEXCOORD.x, TEXCOORD.y), vec2(_142, _146), vec2(_143, _147), ivec2(-1, 0));
    SV_TARGET_7.x = _163.x;
    SV_TARGET_7.y = _163.y;
    SV_TARGET_7.z = _163.z;
    SV_TARGET_7.w = _163.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 79
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %39 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %57 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
    %v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
 %TEXCOORD_2 = OpVariable %_ptr_Input_v2float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
         %55 = OpTypeImage %float 2D 1 0 0 1 Unknown
         %56 = OpTypeSampledImage %55
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %v2int = OpTypeVector %int 2
         %70 = OpConstantComposite %v2int %int_n1 %int_0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %77

         %77 = OpLabel
         %37 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %38 =   OpLoad %float %37
         %39 =   OpBitcast %uint %38
         %41 =   OpAccessChain %_ptr_UniformConstant_6 %9 %39
         %42 =   OpLoad %6 %41
         %44 =   OpAccessChain %_ptr_UniformConstant_16 %19 %39
         %45 =   OpLoad %16 %44
         %46 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %47 =   OpLoad %float %46
         %48 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %50 =   OpLoad %float %48
         %52 =   OpLoad %float %DEPTH_REF
         %57 =   OpSampledImage %56 %42 %45
         %60 =   OpCompositeConstruct %v2float %47 %50
         %59 =   OpImageSampleDrefImplicitLod %float %57 %60 %52 None
         %62 =   OpCompositeConstruct %v4float %59 %59 %59 %59
         %63 =   OpCompositeExtract %float %62 0
                 OpStore %SV_TARGET %63
         %68 =   OpCompositeConstruct %v2float %47 %50
         %67 =   OpImageSampleDrefImplicitLod %float %57 %68 %52 ConstOffset %70
         %71 =   OpCompositeConstruct %v4float %67 %67 %67 %67
         %72 =   OpCompositeExtract %float %71 0
                 OpStore %SV_TARGET_1 %72
         %74 =   OpCompositeConstruct %v2float %47 %50
         %73 =   OpImageSampleDrefExplicitLod %float %57 %74 %52 Lod %float_0
         %75 =   OpCompositeConstruct %v4float %73 %73 %73 %73
         %76 =   OpCompositeExtract %float %75 0
                 OpStore %SV_TARGET_2 %76
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_2d_sample_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture2D _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec2 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;
layout(location = 2) out float SV_TARGET_2;

void main()
{
    uint _39 = floatBitsToUint(_15._m0[0u]);
    SV_TARGET = vec4(texture(nonuniformEXT(sampler2DShadow(_9[_39], _19[_39])), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DEPTH_REF))).x;
    SV_TARGET_1 = vec4(textureOffset(nonuniformEXT(sampler2DShadow(_9[_39], _19[_39])), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DEPTH_REF), ivec2(-1, 0))).x;
    SV_TARGET_2 = vec4(textureLod(nonuniformEXT(sampler2DShadow(_9[_39], _19[_39])), vec3(vec2(TEXCOORD.x, TEXCOORD.y), DEPTH_REF), 0.0)).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_3d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %27 NonUniform
               OpDecorate %30 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %int_1 = OpConstant %int 1
      %v3int = OpTypeVector %int 3
         %61 = OpConstantComposite %v3int %int_n1 %int_0 %int_1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %71

         %71 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %30 =   OpLoad %6 %29
         %32 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %33 =   OpLoad %uint %32
         %34 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %36 =   OpLoad %uint %34
         %37 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %39 =   OpLoad %uint %37
         %42 =   OpCompositeConstruct %v3uint %33 %36 %39
         %41 =   OpImageFetch %v4float %30 %42 Lod %uint_1
         %43 =   OpCompositeExtract %float %41 0
         %44 =   OpCompositeExtract %float %41 1
         %45 =   OpCompositeExtract %float %41 2
         %46 =   OpCompositeExtract %float %41 3
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %49 %43
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %50 %44
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %51 %45
         %52 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %52 %46
         %59 =   OpCompositeConstruct %v3uint %33 %36 %39
         %58 =   OpImageFetch %v4float %30 %59 Lod|ConstOffset %uint_1 %61
         %62 =   OpCompositeExtract %float %58 0
         %63 =   OpCompositeExtract %float %58 1
         %64 =   OpCompositeExtract %float %58 2
         %65 =   OpCompositeExtract %float %58 3
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %67 %62
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %68 %63
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %69 %64
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %70 %65
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_3d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture3D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    uint _27 = floatBitsToUint(_15._m0[0u]);
    vec4 _41 = texelFetch(_9[nonuniformEXT(_27)], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(1u));
    SV_TARGET.x = _41.x;
    SV_TARGET.y = _41.y;
    SV_TARGET.z = _41.z;
    SV_TARGET.w = _41.w;
    vec4 _58 = texelFetchOffset(_9[nonuniformEXT(_27)], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(1u), ivec3(-1, 0, 1));
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    SV_TARGET_1.z = _58.z;
    SV_TARGET_1.w = _58.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_3d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 45
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Output_v3uint = OpTypePointer Output %v3uint
  %SV_TARGET = OpVariable %_ptr_Output_v3uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %v3uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %43

         %43 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %30 =   OpImageQuerySizeLod %v3uint %29 %uint_0
         %31 =   OpCompositeExtract %uint %30 0
         %32 =   OpCompositeExtract %uint %30 1
         %33 =   OpCompositeExtract %uint %30 2
         %34 =   OpCompositeConstruct %v3uint %31 %32 %33
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %38 %31
         %39 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %39 %32
         %40 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_2
                 OpStore %40 %33
                 OpStore %SV_TARGET_1 %uint_1
         %42 =   OpImageQueryLevels %uint %29
                 OpStore %SV_TARGET_2 %42
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_3d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _35
{
    uvec3 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture3D _9[];

layout(location = 0) out uvec3 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec3 _30 = uvec3(textureSize(_9[nonuniformEXT(_26)], int(0u)));
    uint _31 = _30.x;
    SV_TARGET.x = _31;
    SV_TARGET.y = _30.y;
    SV_TARGET.z = _30.z;
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_3d_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 184
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4 %SV_TARGET_5 %SV_TARGET_6 %SV_TARGET_7
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpName %SV_TARGET_5 "SV_TARGET_5"
               OpName %SV_TARGET_6 "SV_TARGET_6"
               OpName %SV_TARGET_7 "SV_TARGET_7"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %SV_TARGET_5 Location 5
               OpDecorate %SV_TARGET_6 Location 6
               OpDecorate %SV_TARGET_7 Location 7
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %49 NonUniform
               OpDecorate %60 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_5 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_6 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_7 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %59 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
        %int = OpTypeInt 32 1
     %int_n1 = OpConstant %int -1
      %int_0 = OpConstant %int 0
      %int_1 = OpConstant %int 1
      %v3int = OpTypeVector %int 3
         %88 = OpConstantComposite %v3int %int_n1 %int_0 %int_1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %182

        %182 = OpLabel
         %41 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %42 =   OpLoad %float %41
         %43 =   OpBitcast %uint %42
         %45 =   OpAccessChain %_ptr_UniformConstant_6 %9 %43
         %46 =   OpLoad %6 %45
         %48 =   OpAccessChain %_ptr_UniformConstant_16 %19 %43
         %49 =   OpLoad %16 %48
         %50 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %51 =   OpLoad %float %50
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %54 =   OpLoad %float %52
         %55 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %57 =   OpLoad %float %55
         %60 =   OpSampledImage %59 %46 %49
         %63 =   OpCompositeConstruct %v3float %51 %54 %57
         %62 =   OpImageQueryLod %v2float %60 %63
         %64 =   OpCompositeExtract %float %62 0
         %65 =   OpLoad %float %LOD_BIAS
         %66 =   OpLoad %float %LOD_CLAMP
         %69 =   OpCompositeConstruct %v3float %51 %54 %57
         %68 =   OpImageSampleImplicitLod %v4float %60 %69 None
         %70 =   OpCompositeExtract %float %68 0
         %71 =   OpCompositeExtract %float %68 1
         %72 =   OpCompositeExtract %float %68 2
         %73 =   OpCompositeExtract %float %68 3
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %76 %70
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %77 %71
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %78 %72
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %79 %73
         %86 =   OpCompositeConstruct %v3float %51 %54 %57
         %85 =   OpImageSampleImplicitLod %v4float %60 %86 ConstOffset %88
         %89 =   OpCompositeExtract %float %85 0
         %90 =   OpCompositeExtract %float %85 1
         %91 =   OpCompositeExtract %float %85 2
         %92 =   OpCompositeExtract %float %85 3
         %94 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %94 %89
         %95 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %95 %90
         %96 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %96 %91
         %97 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %97 %92
         %99 =   OpCompositeConstruct %v3float %51 %54 %57
         %98 =   OpImageSampleExplicitLod %v4float %60 %99 Lod %64
        %100 =   OpCompositeExtract %float %98 0
        %101 =   OpCompositeExtract %float %98 1
        %102 =   OpCompositeExtract %float %98 2
        %103 =   OpCompositeExtract %float %98 3
        %105 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %105 %100
        %106 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %106 %101
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %107 %102
        %108 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %108 %103
        %110 =   OpCompositeConstruct %v3float %51 %54 %57
        %109 =   OpImageSampleImplicitLod %v4float %60 %110 Bias %65
        %111 =   OpCompositeExtract %float %109 0
        %112 =   OpCompositeExtract %float %109 1
        %113 =   OpCompositeExtract %float %109 2
        %114 =   OpCompositeExtract %float %109 3
        %116 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %116 %111
        %117 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %117 %112
        %118 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %118 %113
        %119 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %119 %114
        %121 =   OpCompositeConstruct %v3float %51 %54 %57
        %120 =   OpImageSampleImplicitLod %v4float %60 %121 MinLod %66
        %122 =   OpCompositeExtract %float %120 0
        %123 =   OpCompositeExtract %float %120 1
        %124 =   OpCompositeExtract %float %120 2
        %125 =   OpCompositeExtract %float %120 3
        %127 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %127 %122
        %128 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %128 %123
        %129 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %129 %124
        %130 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %130 %125
        %132 =   OpCompositeConstruct %v3float %51 %54 %57
        %131 =   OpImageSampleImplicitLod %v4float %60 %132 Bias|ConstOffset|MinLod %65 %88 %66
        %133 =   OpCompositeExtract %float %131 0
        %134 =   OpCompositeExtract %float %131 1
        %135 =   OpCompositeExtract %float %131 2
        %136 =   OpCompositeExtract %float %131 3
        %138 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_0
                 OpStore %138 %133
        %139 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_1
                 OpStore %139 %134
        %140 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_2
                 OpStore %140 %135
        %141 =   OpAccessChain %_ptr_Output_float %SV_TARGET_5 %uint_3
                 OpStore %141 %136
        %142 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %143 =   OpLoad %float %142
        %144 =   OpDPdx %float %143
        %145 =   OpDPdy %float %143
        %146 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %147 =   OpLoad %float %146
        %148 =   OpDPdx %float %147
        %149 =   OpDPdy %float %147
        %150 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_2
        %151 =   OpLoad %float %150
        %152 =   OpDPdx %float %151
        %153 =   OpDPdy %float %151
        %157 =   OpCompositeConstruct %v3float %51 %54 %57
        %158 =   OpCompositeConstruct %v3float %144 %148 %152
        %159 =   OpCompositeConstruct %v3float %145 %149 %153
        %156 =   OpImageSampleExplicitLod %v4float %60 %157 Grad %158 %159
        %160 =   OpCompositeExtract %float %156 0
        %161 =   OpCompositeExtract %float %156 1
        %162 =   OpCompositeExtract %float %156 2
        %163 =   OpCompositeExtract %float %156 3
        %165 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_0
                 OpStore %165 %160
        %166 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_1
                 OpStore %166 %161
        %167 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_2
                 OpStore %167 %162
        %168 =   OpAccessChain %_ptr_Output_float %SV_TARGET_6 %uint_3
                 OpStore %168 %163
        %170 =   OpCompositeConstruct %v3float %51 %54 %57
        %171 =   OpCompositeConstruct %v3float %144 %148 %152
        %172 =   OpCompositeConstruct %v3float %145 %149 %153
        %169 =   OpImageSampleExplicitLod %v4float %60 %170 Grad|ConstOffset %171 %172 %88
        %173 =   OpCompositeExtract %float %169 0
        %174 =   OpCompositeExtract %float %169 1
        %175 =   OpCompositeExtract %float %169 2
        %176 =   OpCompositeExtract %float %169 3
        %178 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_0
                 OpStore %178 %173
        %179 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_1
                 OpStore %179 %174
        %180 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_2
                 OpStore %180 %175
        %181 =   OpAccessChain %_ptr_Output_float %SV_TARGET_7 %uint_3
                 OpStore %181 %176
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_3d_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform texture3D _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;
layout(location = 5) out vec4 SV_TARGET_5;
layout(location = 6) out vec4 SV_TARGET_6;
layout(location = 7) out vec4 SV_TARGET_7;

void main()
{
    uint _43 = floatBitsToUint(_15._m0[0u]);
    vec4 _68 = texture(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    SV_TARGET.x = _68.x;
    SV_TARGET.y = _68.y;
    SV_TARGET.z = _68.z;
    SV_TARGET.w = _68.w;
    vec4 _85 = textureOffset(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec3(-1, 0, 1));
    SV_TARGET_1.x = _85.x;
    SV_TARGET_1.y = _85.y;
    SV_TARGET_1.z = _85.z;
    SV_TARGET_1.w = _85.w;
    vec4 _98 = textureLod(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), textureQueryLod(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x);
    SV_TARGET_2.x = _98.x;
    SV_TARGET_2.y = _98.y;
    SV_TARGET_2.z = _98.z;
    SV_TARGET_2.w = _98.w;
    vec4 _109 = texture(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_BIAS);
    SV_TARGET_3.x = _109.x;
    SV_TARGET_3.y = _109.y;
    SV_TARGET_3.z = _109.z;
    SV_TARGET_3.w = _109.w;
    vec4 _120 = textureClampARB(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_CLAMP);
    SV_TARGET_4.x = _120.x;
    SV_TARGET_4.y = _120.y;
    SV_TARGET_4.z = _120.z;
    SV_TARGET_4.w = _120.w;
    vec4 _131 = textureOffsetClampARB(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), ivec3(-1, 0, 1), LOD_CLAMP, LOD_BIAS);
    SV_TARGET_5.x = _131.x;
    SV_TARGET_5.y = _131.y;
    SV_TARGET_5.z = _131.z;
    SV_TARGET_5.w = _131.w;
    float _144 = dFdx(TEXCOORD_2.x);
    float _145 = dFdy(TEXCOORD_2.x);
    float _148 = dFdx(TEXCOORD_2.y);
    float _149 = dFdy(TEXCOORD_2.y);
    float _152 = dFdx(TEXCOORD_2.z);
    float _153 = dFdy(TEXCOORD_2.z);
    vec4 _156 = textureGrad(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(_144, _148, _152), vec3(_145, _149, _153));
    SV_TARGET_6.x = _156.x;
    SV_TARGET_6.y = _156.y;
    SV_TARGET_6.z = _156.z;
    SV_TARGET_6.w = _156.w;
    vec4 _169 = textureGradOffset(nonuniformEXT(sampler3D(_9[_43], _19[_43])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(_144, _148, _152), vec3(_145, _149, _153), ivec3(-1, 0, 1));
    SV_TARGET_7.x = _169.x;
    SV_TARGET_7.y = _169.y;
    SV_TARGET_7.z = _169.z;
    SV_TARGET_7.w = _169.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 107
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SampledCubeArray
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %58 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %57 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %105

        %105 = OpLabel
         %38 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %39 =   OpLoad %float %38
         %40 =   OpBitcast %uint %39
         %42 =   OpAccessChain %_ptr_UniformConstant_6 %9 %40
         %43 =   OpLoad %6 %42
         %45 =   OpAccessChain %_ptr_UniformConstant_16 %19 %40
         %46 =   OpLoad %16 %45
         %47 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %48 =   OpLoad %float %47
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %54 =   OpLoad %float %52
         %56 =   OpLoad %float %LAYER
         %58 =   OpSampledImage %57 %43 %46
         %59 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %60 =   OpImageGather %v4float %58 %59 %uint_0
         %61 =   OpCompositeExtract %float %60 0
         %62 =   OpCompositeExtract %float %60 1
         %63 =   OpCompositeExtract %float %60 2
         %64 =   OpCompositeExtract %float %60 3
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %67 %61
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %68 %62
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %69 %63
         %70 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %70 %64
         %72 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %73 =   OpImageGather %v4float %58 %72 %uint_1
         %74 =   OpCompositeExtract %float %73 0
         %75 =   OpCompositeExtract %float %73 1
         %76 =   OpCompositeExtract %float %73 2
         %77 =   OpCompositeExtract %float %73 3
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %82 %77
         %83 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %84 =   OpImageGather %v4float %58 %83 %uint_2
         %85 =   OpCompositeExtract %float %84 0
         %86 =   OpCompositeExtract %float %84 1
         %87 =   OpCompositeExtract %float %84 2
         %88 =   OpCompositeExtract %float %84 3
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %92 %87
         %93 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %93 %88
         %94 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %95 =   OpImageGather %v4float %58 %94 %uint_3
         %96 =   OpCompositeExtract %float %95 0
         %97 =   OpCompositeExtract %float %95 1
         %98 =   OpCompositeExtract %float %95 2
         %99 =   OpCompositeExtract %float %95 3
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %101 %96
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %102 %97
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %103 %98
        %104 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %104 %99
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_gather.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCubeArray _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;

void main()
{
    uint _40 = floatBitsToUint(_15._m0[0u]);
    vec4 _60 = textureGather(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER));
    SV_TARGET.x = _60.x;
    SV_TARGET.y = _60.y;
    SV_TARGET.z = _60.z;
    SV_TARGET.w = _60.w;
    vec4 _73 = textureGather(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), int(1u));
    SV_TARGET_1.x = _73.x;
    SV_TARGET_1.y = _73.y;
    SV_TARGET_1.z = _73.z;
    SV_TARGET_1.w = _73.w;
    vec4 _84 = textureGather(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), int(2u));
    SV_TARGET_2.x = _84.x;
    SV_TARGET_2.y = _84.y;
    SV_TARGET_2.z = _84.z;
    SV_TARGET_2.w = _84.w;
    vec4 _95 = textureGather(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), int(3u));
    SV_TARGET_3.x = _95.x;
    SV_TARGET_3.y = _95.y;
    SV_TARGET_3.z = _95.z;
    SV_TARGET_3.w = _95.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 73
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SampledCubeArray
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %37 NonUniform
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %57 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %55 = OpTypeImage %float Cube 1 1 0 1 Unknown
         %56 = OpTypeSampledImage %55
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %71

         %71 = OpLabel
         %35 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %36 =   OpLoad %float %35
         %37 =   OpBitcast %uint %36
         %39 =   OpAccessChain %_ptr_UniformConstant_6 %9 %37
         %40 =   OpLoad %6 %39
         %42 =   OpAccessChain %_ptr_UniformConstant_16 %19 %37
         %43 =   OpLoad %16 %42
         %44 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %45 =   OpLoad %float %44
         %46 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %48 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %51 =   OpLoad %float %49
         %53 =   OpLoad %float %DEPTH_REF
         %54 =   OpLoad %float %LAYER
         %57 =   OpSampledImage %56 %40 %43
         %58 =   OpCompositeConstruct %v4float %45 %48 %51 %54
         %59 =   OpImageDrefGather %v4float %57 %58 %53
         %60 =   OpCompositeExtract %float %59 0
         %61 =   OpCompositeExtract %float %59 1
         %62 =   OpCompositeExtract %float %59 2
         %63 =   OpCompositeExtract %float %59 3
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %66 %60
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %67 %61
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %68 %62
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %69 %63
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_gather_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCubeArray _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    uint _37 = floatBitsToUint(_15._m0[0u]);
    vec4 _58 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER);
    vec4 _59 = textureGather(nonuniformEXT(samplerCubeArrayShadow(_9[_37], _19[_37])), _58, DEPTH_REF);
    SV_TARGET.x = _59.x;
    SV_TARGET.y = _59.y;
    SV_TARGET.z = _59.z;
    SV_TARGET.w = _59.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 44
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SampledCubeArray
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %v3uint = OpTypeVector %uint 3
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %42

         %42 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpImageQuerySizeLod %v3uint %29 %uint_0
         %32 =   OpCompositeExtract %uint %31 0
         %33 =   OpCompositeExtract %uint %31 1
         %34 =   OpCompositeConstruct %v2uint %32 %33
         %35 =   OpCompositeExtract %uint %31 2
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %39 %33
                 OpStore %SV_TARGET_1 %35
         %41 =   OpImageQueryLevels %uint %29
                 OpStore %SV_TARGET_2 %41
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _36
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCubeArray _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec3 _31 = uvec3(textureSize(_9[nonuniformEXT(_26)], int(0u)));
    uint _32 = _31.x;
    SV_TARGET.x = _32;
    SV_TARGET.y = _31.y;
    SV_TARGET_1 = _31.z;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 141
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability SampledCubeArray
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %58 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %57 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %139

        %139 = OpLabel
         %38 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %39 =   OpLoad %float %38
         %40 =   OpBitcast %uint %39
         %42 =   OpAccessChain %_ptr_UniformConstant_6 %9 %40
         %43 =   OpLoad %6 %42
         %45 =   OpAccessChain %_ptr_UniformConstant_16 %19 %40
         %46 =   OpLoad %16 %45
         %47 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %48 =   OpLoad %float %47
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %54 =   OpLoad %float %52
         %56 =   OpLoad %float %LAYER
         %58 =   OpSampledImage %57 %43 %46
         %61 =   OpCompositeConstruct %v3float %48 %51 %54
         %60 =   OpImageQueryLod %v2float %58 %61
         %62 =   OpCompositeExtract %float %60 0
         %63 =   OpLoad %float %LOD_BIAS
         %64 =   OpLoad %float %LOD_CLAMP
         %67 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %66 =   OpImageSampleImplicitLod %v4float %58 %67 None
         %68 =   OpCompositeExtract %float %66 0
         %69 =   OpCompositeExtract %float %66 1
         %70 =   OpCompositeExtract %float %66 2
         %71 =   OpCompositeExtract %float %66 3
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %74 %68
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %76 %70
         %77 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %77 %71
         %80 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %79 =   OpImageSampleExplicitLod %v4float %58 %80 Lod %62
         %81 =   OpCompositeExtract %float %79 0
         %82 =   OpCompositeExtract %float %79 1
         %83 =   OpCompositeExtract %float %79 2
         %84 =   OpCompositeExtract %float %79 3
         %86 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %88 %83
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %89 %84
         %91 =   OpCompositeConstruct %v4float %48 %51 %54 %56
         %90 =   OpImageSampleImplicitLod %v4float %58 %91 Bias %63
         %92 =   OpCompositeExtract %float %90 0
         %93 =   OpCompositeExtract %float %90 1
         %94 =   OpCompositeExtract %float %90 2
         %95 =   OpCompositeExtract %float %90 3
         %97 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %97 %92
         %98 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %98 %93
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %99 %94
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %100 %95
        %102 =   OpCompositeConstruct %v4float %48 %51 %54 %56
        %101 =   OpImageSampleImplicitLod %v4float %58 %102 MinLod %64
        %103 =   OpCompositeExtract %float %101 0
        %104 =   OpCompositeExtract %float %101 1
        %105 =   OpCompositeExtract %float %101 2
        %106 =   OpCompositeExtract %float %101 3
        %108 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %108 %103
        %109 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %109 %104
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %110 %105
        %111 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %111 %106
        %112 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %113 =   OpLoad %float %112
        %114 =   OpDPdx %float %113
        %115 =   OpDPdy %float %113
        %116 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %117 =   OpLoad %float %116
        %118 =   OpDPdx %float %117
        %119 =   OpDPdy %float %117
        %120 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_2
        %121 =   OpLoad %float %120
        %122 =   OpDPdx %float %121
        %123 =   OpDPdy %float %121
        %127 =   OpCompositeConstruct %v4float %48 %51 %54 %56
        %128 =   OpCompositeConstruct %v3float %114 %118 %122
        %129 =   OpCompositeConstruct %v3float %115 %119 %123
        %126 =   OpImageSampleExplicitLod %v4float %58 %127 Grad %128 %129
        %130 =   OpCompositeExtract %float %126 0
        %131 =   OpCompositeExtract %float %126 1
        %132 =   OpCompositeExtract %float %126 2
        %133 =   OpCompositeExtract %float %126 3
        %135 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %135 %130
        %136 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %136 %131
        %137 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %137 %132
        %138 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %138 %133
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCubeArray _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;

void main()
{
    uint _40 = floatBitsToUint(_15._m0[0u]);
    vec4 _66 = texture(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER));
    SV_TARGET.x = _66.x;
    SV_TARGET.y = _66.y;
    SV_TARGET.z = _66.z;
    SV_TARGET.w = _66.w;
    vec4 _79 = textureLod(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), textureQueryLod(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x);
    SV_TARGET_1.x = _79.x;
    SV_TARGET_1.y = _79.y;
    SV_TARGET_1.z = _79.z;
    SV_TARGET_1.w = _79.w;
    vec4 _90 = texture(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), LOD_BIAS);
    SV_TARGET_2.x = _90.x;
    SV_TARGET_2.y = _90.y;
    SV_TARGET_2.z = _90.z;
    SV_TARGET_2.w = _90.w;
    vec4 _101 = textureClampARB(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), LOD_CLAMP);
    SV_TARGET_3.x = _101.x;
    SV_TARGET_3.y = _101.y;
    SV_TARGET_3.z = _101.z;
    SV_TARGET_3.w = _101.w;
    vec4 _126 = textureGrad(nonuniformEXT(samplerCubeArray(_9[_40], _19[_40])), vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER), vec3(dFdx(TEXCOORD_2.x), dFdx(TEXCOORD_2.y), dFdx(TEXCOORD_2.z)), vec3(dFdy(TEXCOORD_2.x), dFdy(TEXCOORD_2.y), dFdy(TEXCOORD_2.z)));
    SV_TARGET_4.x = _126.x;
    SV_TARGET_4.y = _126.y;
    SV_TARGET_4.z = _126.z;
    SV_TARGET_4.w = _126.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 71
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SampledCubeArray
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %36 NonUniform
               OpDecorate %39 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %58 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 1 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %56 = OpTypeImage %float Cube 1 1 0 1 Unknown
         %57 = OpTypeSampledImage %56
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %69

         %69 = OpLabel
         %34 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %35 =   OpLoad %float %34
         %36 =   OpBitcast %uint %35
         %38 =   OpAccessChain %_ptr_UniformConstant_6 %9 %36
         %39 =   OpLoad %6 %38
         %41 =   OpAccessChain %_ptr_UniformConstant_16 %19 %36
         %42 =   OpLoad %16 %41
         %43 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %44 =   OpLoad %float %43
         %45 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %47 =   OpLoad %float %45
         %48 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %50 =   OpLoad %float %48
         %52 =   OpLoad %float %DEPTH_REF
         %53 =   OpLoad %float %LAYER
         %58 =   OpSampledImage %57 %39 %42
         %62 =   OpCompositeConstruct %v4float %44 %47 %50 %53
         %60 =   OpImageSampleDrefImplicitLod %float %58 %62 %52 None
         %63 =   OpCompositeConstruct %v4float %60 %60 %60 %60
         %64 =   OpCompositeExtract %float %63 0
                 OpStore %SV_TARGET %64
         %66 =   OpCompositeConstruct %v4float %44 %47 %50 %53
         %65 =   OpImageSampleDrefExplicitLod %float %58 %66 %52 Lod %float_0
         %67 =   OpCompositeConstruct %v4float %65 %65 %65 %65
         %68 =   OpCompositeExtract %float %67 0
                 OpStore %SV_TARGET_1 %68
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_array_sample_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCubeArray _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    uint _36 = floatBitsToUint(_15._m0[0u]);
    vec4 _62 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER);
    SV_TARGET = vec4(texture(nonuniformEXT(samplerCubeArrayShadow(_9[_36], _19[_36])), _62, DEPTH_REF)).x;
    vec4 _66 = vec4(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z, LAYER);
    SV_TARGET_1 = vec4(textureGrad(nonuniformEXT(samplerCubeArrayShadow(_9[_36], _19[_36])), _66, DEPTH_REF, vec3(0.0), vec3(0.0))).x;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_gather.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 106
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %57 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %56 = OpTypeSampledImage %6
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %104

        %104 = OpLabel
         %38 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %39 =   OpLoad %float %38
         %40 =   OpBitcast %uint %39
         %42 =   OpAccessChain %_ptr_UniformConstant_6 %9 %40
         %43 =   OpLoad %6 %42
         %45 =   OpAccessChain %_ptr_UniformConstant_16 %19 %40
         %46 =   OpLoad %16 %45
         %47 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %48 =   OpLoad %float %47
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %54 =   OpLoad %float %52
         %57 =   OpSampledImage %56 %43 %46
         %58 =   OpCompositeConstruct %v3float %48 %51 %54
         %59 =   OpImageGather %v4float %57 %58 %uint_0
         %60 =   OpCompositeExtract %float %59 0
         %61 =   OpCompositeExtract %float %59 1
         %62 =   OpCompositeExtract %float %59 2
         %63 =   OpCompositeExtract %float %59 3
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %66 %60
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %67 %61
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %68 %62
         %69 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %69 %63
         %71 =   OpCompositeConstruct %v3float %48 %51 %54
         %72 =   OpImageGather %v4float %57 %71 %uint_1
         %73 =   OpCompositeExtract %float %72 0
         %74 =   OpCompositeExtract %float %72 1
         %75 =   OpCompositeExtract %float %72 2
         %76 =   OpCompositeExtract %float %72 3
         %78 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %78 %73
         %79 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %79 %74
         %80 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %81 %76
         %82 =   OpCompositeConstruct %v3float %48 %51 %54
         %83 =   OpImageGather %v4float %57 %82 %uint_2
         %84 =   OpCompositeExtract %float %83 0
         %85 =   OpCompositeExtract %float %83 1
         %86 =   OpCompositeExtract %float %83 2
         %87 =   OpCompositeExtract %float %83 3
         %89 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %89 %84
         %90 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %90 %85
         %91 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %91 %86
         %92 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %92 %87
         %93 =   OpCompositeConstruct %v3float %48 %51 %54
         %94 =   OpImageGather %v4float %57 %93 %uint_3
         %95 =   OpCompositeExtract %float %94 0
         %96 =   OpCompositeExtract %float %94 1
         %97 =   OpCompositeExtract %float %94 2
         %98 =   OpCompositeExtract %float %94 3
        %100 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %100 %95
        %101 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %101 %96
        %102 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %102 %97
        %103 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %103 %98
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_gather.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCube _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;

void main()
{
    uint _40 = floatBitsToUint(_15._m0[0u]);
    vec4 _59 = textureGather(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    SV_TARGET.x = _59.x;
    SV_TARGET.y = _59.y;
    SV_TARGET.z = _59.z;
    SV_TARGET.w = _59.w;
    vec4 _72 = textureGather(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(1u));
    SV_TARGET_1.x = _72.x;
    SV_TARGET_1.y = _72.y;
    SV_TARGET_1.z = _72.z;
    SV_TARGET_1.w = _72.w;
    vec4 _83 = textureGather(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(2u));
    SV_TARGET_2.x = _83.x;
    SV_TARGET_2.y = _83.y;
    SV_TARGET_2.z = _83.z;
    SV_TARGET_2.w = _83.w;
    vec4 _94 = textureGather(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), int(3u));
    SV_TARGET_3.x = _94.x;
    SV_TARGET_3.y = _94.y;
    SV_TARGET_3.z = _94.z;
    SV_TARGET_3.w = _94.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_gather_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 72
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LAYER %OFFSET %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LAYER "LAYER"
               OpName %OFFSET "OFFSET"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 1
               OpDecorate %OFFSET Flat
               OpDecorate %OFFSET Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %37 NonUniform
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %56 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
        %int = OpTypeInt 32 1
      %v3int = OpTypeVector %int 3
%_ptr_Input_v3int = OpTypePointer Input %v3int
     %OFFSET = OpVariable %_ptr_Input_v3int Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %54 = OpTypeImage %float Cube 1 0 0 1 Unknown
         %55 = OpTypeSampledImage %54
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %70

         %70 = OpLabel
         %35 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %36 =   OpLoad %float %35
         %37 =   OpBitcast %uint %36
         %39 =   OpAccessChain %_ptr_UniformConstant_6 %9 %37
         %40 =   OpLoad %6 %39
         %42 =   OpAccessChain %_ptr_UniformConstant_16 %19 %37
         %43 =   OpLoad %16 %42
         %44 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %45 =   OpLoad %float %44
         %46 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %48 =   OpLoad %float %46
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %51 =   OpLoad %float %49
         %53 =   OpLoad %float %DEPTH_REF
         %56 =   OpSampledImage %55 %40 %43
         %57 =   OpCompositeConstruct %v3float %45 %48 %51
         %58 =   OpImageDrefGather %v4float %56 %57 %53
         %59 =   OpCompositeExtract %float %58 0
         %60 =   OpCompositeExtract %float %58 1
         %61 =   OpCompositeExtract %float %58 2
         %62 =   OpCompositeExtract %float %58 3
         %65 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %65 %59
         %66 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %66 %60
         %67 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %67 %61
         %68 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %68 %62
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_gather_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCube _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LAYER;
layout(location = 2) flat in ivec3 OFFSET;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    uint _37 = floatBitsToUint(_15._m0[0u]);
    vec3 _57 = vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z);
    vec4 _58 = textureGather(nonuniformEXT(samplerCubeShadow(_9[_37], _19[_37])), _57, DEPTH_REF);
    SV_TARGET.x = _58.x;
    SV_TARGET.y = _58.y;
    SV_TARGET.z = _58.z;
    SV_TARGET.w = _58.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
%SV_TARGET_2 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %30 =   OpImageQuerySizeLod %v2uint %29 %uint_0
         %31 =   OpCompositeExtract %uint %30 0
         %32 =   OpCompositeExtract %uint %30 1
         %33 =   OpCompositeConstruct %v2uint %31 %32
         %37 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %38 %32
                 OpStore %SV_TARGET_1 %uint_1
         %39 =   OpImageQueryLevels %uint %29
                 OpStore %SV_TARGET_2 %39
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_samplerless_texture_functions : require

struct _34
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCube _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;
layout(location = 2) out uint SV_TARGET_2;

void main()
{
    uint _26 = floatBitsToUint(_15._m0[0u]);
    uvec2 _30 = uvec2(textureSize(_9[nonuniformEXT(_26)], int(0u)));
    uint _31 = _30.x;
    SV_TARGET.x = _31;
    SV_TARGET.y = _30.y;
    SV_TARGET_1 = 1u;
    SV_TARGET_2 = uint(textureQueryLevels(_9[nonuniformEXT(_26)]));
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_sample.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 140
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability MinLod
               OpCapability ImageQuery
               OpCapability DerivativeControl
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3 %SV_TARGET_4
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpName %SV_TARGET_4 "SV_TARGET_4"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %SV_TARGET_4 Location 4
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %57 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_2 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_3 = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_4 = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %56 = OpTypeSampledImage %6
    %v2float = OpTypeVector %float 2
    %float_0 = OpConstant %float 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %138

        %138 = OpLabel
         %38 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %39 =   OpLoad %float %38
         %40 =   OpBitcast %uint %39
         %42 =   OpAccessChain %_ptr_UniformConstant_6 %9 %40
         %43 =   OpLoad %6 %42
         %45 =   OpAccessChain %_ptr_UniformConstant_16 %19 %40
         %46 =   OpLoad %16 %45
         %47 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %48 =   OpLoad %float %47
         %49 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %51 =   OpLoad %float %49
         %52 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %54 =   OpLoad %float %52
         %57 =   OpSampledImage %56 %43 %46
         %60 =   OpCompositeConstruct %v3float %48 %51 %54
         %59 =   OpImageQueryLod %v2float %57 %60
         %61 =   OpCompositeExtract %float %59 0
         %62 =   OpLoad %float %LOD_BIAS
         %63 =   OpLoad %float %LOD_CLAMP
         %66 =   OpCompositeConstruct %v3float %48 %51 %54
         %65 =   OpImageSampleImplicitLod %v4float %57 %66 None
         %67 =   OpCompositeExtract %float %65 0
         %68 =   OpCompositeExtract %float %65 1
         %69 =   OpCompositeExtract %float %65 2
         %70 =   OpCompositeExtract %float %65 3
         %73 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %73 %67
         %74 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %74 %68
         %75 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %75 %69
         %76 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %76 %70
         %79 =   OpCompositeConstruct %v3float %48 %51 %54
         %78 =   OpImageSampleExplicitLod %v4float %57 %79 Lod %61
         %80 =   OpCompositeExtract %float %78 0
         %81 =   OpCompositeExtract %float %78 1
         %82 =   OpCompositeExtract %float %78 2
         %83 =   OpCompositeExtract %float %78 3
         %85 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %85 %80
         %86 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %88 %83
         %90 =   OpCompositeConstruct %v3float %48 %51 %54
         %89 =   OpImageSampleImplicitLod %v4float %57 %90 Bias %62
         %91 =   OpCompositeExtract %float %89 0
         %92 =   OpCompositeExtract %float %89 1
         %93 =   OpCompositeExtract %float %89 2
         %94 =   OpCompositeExtract %float %89 3
         %96 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_0
                 OpStore %96 %91
         %97 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_1
                 OpStore %97 %92
         %98 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_2
                 OpStore %98 %93
         %99 =   OpAccessChain %_ptr_Output_float %SV_TARGET_2 %uint_3
                 OpStore %99 %94
        %101 =   OpCompositeConstruct %v3float %48 %51 %54
        %100 =   OpImageSampleImplicitLod %v4float %57 %101 MinLod %63
        %102 =   OpCompositeExtract %float %100 0
        %103 =   OpCompositeExtract %float %100 1
        %104 =   OpCompositeExtract %float %100 2
        %105 =   OpCompositeExtract %float %100 3
        %107 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_0
                 OpStore %107 %102
        %108 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_1
                 OpStore %108 %103
        %109 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_2
                 OpStore %109 %104
        %110 =   OpAccessChain %_ptr_Output_float %SV_TARGET_3 %uint_3
                 OpStore %110 %105
        %111 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_0
        %112 =   OpLoad %float %111
        %113 =   OpDPdx %float %112
        %114 =   OpDPdy %float %112
        %115 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_1
        %116 =   OpLoad %float %115
        %117 =   OpDPdx %float %116
        %118 =   OpDPdy %float %116
        %119 =   OpAccessChain %_ptr_Input_float %TEXCOORD_2 %uint_2
        %120 =   OpLoad %float %119
        %121 =   OpDPdx %float %120
        %122 =   OpDPdy %float %120
        %126 =   OpCompositeConstruct %v3float %48 %51 %54
        %127 =   OpCompositeConstruct %v3float %113 %117 %121
        %128 =   OpCompositeConstruct %v3float %114 %118 %122
        %125 =   OpImageSampleExplicitLod %v4float %57 %126 Grad %127 %128
        %129 =   OpCompositeExtract %float %125 0
        %130 =   OpCompositeExtract %float %125 1
        %131 =   OpCompositeExtract %float %125 2
        %132 =   OpCompositeExtract %float %125 3
        %134 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_0
                 OpStore %134 %129
        %135 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_1
                 OpStore %135 %130
        %136 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_2
                 OpStore %136 %131
        %137 =   OpAccessChain %_ptr_Output_float %SV_TARGET_4 %uint_3
                 OpStore %137 %132
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_sample.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_ARB_sparse_texture_clamp : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCube _9[];
layout(set = 0, binding = 0) uniform sampler _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;
layout(location = 2) out vec4 SV_TARGET_2;
layout(location = 3) out vec4 SV_TARGET_3;
layout(location = 4) out vec4 SV_TARGET_4;

void main()
{
    uint _40 = floatBitsToUint(_15._m0[0u]);
    vec4 _65 = texture(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z));
    SV_TARGET.x = _65.x;
    SV_TARGET.y = _65.y;
    SV_TARGET.z = _65.z;
    SV_TARGET.w = _65.w;
    vec4 _78 = textureLod(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), textureQueryLod(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)).x);
    SV_TARGET_1.x = _78.x;
    SV_TARGET_1.y = _78.y;
    SV_TARGET_1.z = _78.z;
    SV_TARGET_1.w = _78.w;
    vec4 _89 = texture(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_BIAS);
    SV_TARGET_2.x = _89.x;
    SV_TARGET_2.y = _89.y;
    SV_TARGET_2.z = _89.z;
    SV_TARGET_2.w = _89.w;
    vec4 _100 = textureClampARB(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), LOD_CLAMP);
    SV_TARGET_3.x = _100.x;
    SV_TARGET_3.y = _100.y;
    SV_TARGET_3.z = _100.z;
    SV_TARGET_3.w = _100.w;
    vec4 _125 = textureGrad(nonuniformEXT(samplerCube(_9[_40], _19[_40])), vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), vec3(dFdx(TEXCOORD_2.x), dFdx(TEXCOORD_2.y), dFdx(TEXCOORD_2.z)), vec3(dFdy(TEXCOORD_2.x), dFdy(TEXCOORD_2.y), dFdy(TEXCOORD_2.z)));
    SV_TARGET_4.x = _125.x;
    SV_TARGET_4.y = _125.y;
    SV_TARGET_4.z = _125.z;
    SV_TARGET_4.w = _125.w;
}


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_sample_depth.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 70
; Schema: 0
               OpCapability Shader
               OpCapability SampledImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability SampledImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %19 %TEXCOORD %DEPTH_REF %LOD_BIAS %LOD_CLAMP %LAYER %TEXCOORD_2 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %DEPTH_REF "DEPTH_REF"
               OpName %LOD_BIAS "LOD_BIAS"
               OpName %LOD_CLAMP "LOD_CLAMP"
               OpName %LAYER "LAYER"
               OpName %TEXCOORD_2 "TEXCOORD_2"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %TEXCOORD Location 0
               OpDecorate %DEPTH_REF Location 1
               OpDecorate %LOD_BIAS Location 1
               OpDecorate %LOD_BIAS Component 1
               OpDecorate %LOD_CLAMP Location 1
               OpDecorate %LOD_CLAMP Component 2
               OpDecorate %LAYER Location 1
               OpDecorate %LAYER Component 3
               OpDecorate %TEXCOORD_2 Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %36 NonUniform
               OpDecorate %39 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %57 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Cube 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
         %16 = OpTypeSampler
%_runtimearr_16 = OpTypeRuntimeArray %16
%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
         %19 = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
    %v3float = OpTypeVector %float 3
%_ptr_Input_v3float = OpTypePointer Input %v3float
   %TEXCOORD = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_float = OpTypePointer Input %float
  %DEPTH_REF = OpVariable %_ptr_Input_float Input
   %LOD_BIAS = OpVariable %_ptr_Input_float Input
  %LOD_CLAMP = OpVariable %_ptr_Input_float Input
      %LAYER = OpVariable %_ptr_Input_float Input
 %TEXCOORD_2 = OpVariable %_ptr_Input_v3float Input
%_ptr_Output_float = OpTypePointer Output %float
  %SV_TARGET = OpVariable %_ptr_Output_float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
         %55 = OpTypeImage %float Cube 1 0 0 1 Unknown
         %56 = OpTypeSampledImage %55
    %float_0 = OpConstant %float 0
    %v4float = OpTypeVector %float 4
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %68

         %68 = OpLabel
         %34 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %35 =   OpLoad %float %34
         %36 =   OpBitcast %uint %35
         %38 =   OpAccessChain %_ptr_UniformConstant_6 %9 %36
         %39 =   OpLoad %6 %38
         %41 =   OpAccessChain %_ptr_UniformConstant_16 %19 %36
         %42 =   OpLoad %16 %41
         %43 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_0
         %44 =   OpLoad %float %43
         %45 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_1
         %47 =   OpLoad %float %45
         %48 =   OpAccessChain %_ptr_Input_float %TEXCOORD %uint_2
         %50 =   OpLoad %float %48
         %52 =   OpLoad %float %DEPTH_REF
         %57 =   OpSampledImage %56 %39 %42
         %60 =   OpCompositeConstruct %v3float %44 %47 %50
         %59 =   OpImageSampleDrefImplicitLod %float %57 %60 %52 None
         %62 =   OpCompositeConstruct %v4float %59 %59 %59 %59
         %63 =   OpCompositeExtract %float %62 0
                 OpStore %SV_TARGET %63
         %65 =   OpCompositeConstruct %v3float %44 %47 %50
         %64 =   OpImageSampleDrefExplicitLod %float %57 %65 %52 Lod %float_0
         %66 =   OpCompositeConstruct %v4float %64 %64 %64 %64
         %67 =   OpCompositeExtract %float %66 0
                 OpStore %SV_TARGET_1 %67
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_srv_indexed_image_cube_sample_depth.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform textureCube _9[];
layout(set = 0, binding = 0) uniform samplerShadow _19[];

layout(location = 0) in vec3 TEXCOORD;
layout(location = 1) in float DEPTH_REF;
layout(location = 1, component = 1) in float LOD_BIAS;
layout(location = 1, component = 2) in float LOD_CLAMP;
layout(location = 1, component = 3) in float LAYER;
layout(location = 2) in vec3 TEXCOORD_2;
layout(location = 0) out float SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    uint _36 = floatBitsToUint(_15._m0[0u]);
    SV_TARGET = vec4(texture(nonuniformEXT(samplerCubeShadow(_9[_36], _19[_36])), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DEPTH_REF))).x;
    SV_TARGET_1 = vec4(textureGrad(nonuniformEXT(samplerCubeShadow(_9[_36], _19[_36])), vec4(vec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z), DEPTH_REF), vec3(0.0), vec3(0.0))).x;
}


================================================
FILE: reference-dxbc/test_resource_uav_buffer_load_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 50
; Schema: 0
               OpCapability Shader
               OpCapability SparseResidency
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
       %uint = OpTypeInt 32 0
 %uint_12345 = OpConstant %uint 12345
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %48

         %48 = OpLabel
         %14 =   OpLoad %6 %8
         %18 =   OpImageSparseRead %SparseTexel %14 %uint_12345 NonPrivateTexel
         %19 =   OpCompositeExtract %uint %18 0
         %20 =   OpCompositeExtract %v4float %18 1
         %21 =   OpCompositeExtract %float %20 0
         %22 =   OpCompositeExtract %float %20 1
         %23 =   OpCompositeExtract %float %20 2
         %24 =   OpCompositeExtract %float %20 3
         %26 =   OpCompositeConstruct %_ %21 %22 %23 %24 %19
         %27 =   OpCompositeExtract %uint %26 4
         %28 =   OpCompositeExtract %float %26 0
         %29 =   OpCompositeExtract %float %26 1
         %30 =   OpCompositeExtract %float %26 2
         %31 =   OpCompositeExtract %float %26 3
         %32 =   OpCompositeConstruct %v4float %28 %29 %30 %31
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %35 %28
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %29
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %30
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %31
         %44 =   OpImageSparseTexelsResident %bool %27
         %45 =   OpSelect %float %44 %float_1 %float_0
                 OpStore %SV_TARGET_1 %45
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_buffer_load_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _25
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _33
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _8;

layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    uint _50;
    vec4 _51;
    _50 = sparseImageLoadARB(_8, int(12345u), _51);
    SparseTexel _18 = SparseTexel(_50, _51);
    vec4 _20 = _18._m1;
    _25 _26 = _25(_20.x, _20.y, _20.z, _20.w, _18._m0);
    float _28 = _26._m0;
    float _29 = _26._m1;
    float _30 = _26._m2;
    float _31 = _26._m3;
    SV_TARGET.x = _28;
    SV_TARGET.y = _29;
    SV_TARGET.z = _30;
    SV_TARGET.w = _31;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_26._m4)));
}


================================================
FILE: reference-dxbc/test_resource_uav_counter.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %9 %12
               OpExecutionMode %main LocalSize 32 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %12 DescriptorSet 0
               OpDecorate %12 Binding 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
         %10 = OpTypeImage %uint Buffer 0 0 0 2 R32ui
%_ptr_UniformConstant_10 = OpTypePointer UniformConstant %10
         %12 = OpVariable %_ptr_UniformConstant_10 UniformConstant
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_0 = OpConstant %uint 0
     %uint_5 = OpConstant %uint 5
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %14 =   OpImageTexelPointer %_ptr_Image_uint %12 %uint_0 %uint_0
         %16 =   OpAtomicIAdd %uint %14 %uint_5 %uint_0 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_counter.glsl
================================================
GLSL:
#version 460
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _12;

void main()
{
    uint _16 = imageAtomicAdd(_12, int(0u), 1u);
}


================================================
FILE: reference-dxbc/test_resource_uav_counter_indexed.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageTexelBufferArrayDynamicIndexing
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint GLCompute %main "main" %10 %14 %SV_GROUPID
               OpExecutionMode %main LocalSize 32 1 1
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SV_GROUPID "SV_GROUPID"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonReadable
               OpDecorate %10 NonWritable
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 0
               OpDecorate %SV_GROUPID BuiltIn WorkgroupId
               OpDecorate %21 NonUniform
               OpDecorate %27 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
         %11 = OpTypeImage %uint Buffer 0 0 0 2 R32ui
%_runtimearr_11 = OpTypeRuntimeArray %11
%_ptr_UniformConstant__runtimearr_11 = OpTypePointer UniformConstant %_runtimearr_11
         %14 = OpVariable %_ptr_UniformConstant__runtimearr_11 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
 %SV_GROUPID = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %19 =   OpAccessChain %_ptr_Input_uint %SV_GROUPID %uint_0
         %21 =   OpLoad %uint %19
         %25 =   OpAccessChain %_ptr_UniformConstant_11 %14 %21
         %27 =   OpImageTexelPointer %_ptr_Image_uint %25 %uint_0 %uint_0
         %28 =   OpAtomicIAdd %uint %27 %uint_5 %uint_0 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_counter_indexed.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _14[];

void main()
{
    uint _28 = imageAtomicAdd(_14[nonuniformEXT(gl_WorkGroupID.x)], int(0u), 1u);
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 1D 0 1 0 2 R32ui
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
     %uint_2 = OpConstant %uint 2
     %v2uint = OpTypeVector %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %17 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %19 =   OpLoad %uint %17
         %20 =   OpLoad %uint %VALUE
         %23 =   OpCompositeConstruct %v2uint %19 %uint_2
         %25 =   OpImageTexelPointer %_ptr_Image_uint %8 %23 %uint_0
         %26 =   OpAtomicIAdd %uint %25 %uint_5 %uint_0 %20
                 OpStore %SV_TARGET %26
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage1DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _26 = imageAtomicAdd(_8, ivec2(uvec2(TEXCOORD.x, 2u)), VALUE);
    SV_TARGET = _26;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_2 = OpConstant %uint 2
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %37

         %37 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %24 =   OpCompositeConstruct %v2uint %20 %uint_2
         %22 =   OpImageRead %v4float %16 %24 NonPrivateTexel
         %25 =   OpCompositeExtract %float %22 0
         %26 =   OpCompositeExtract %float %22 1
         %27 =   OpCompositeExtract %float %22 2
         %28 =   OpCompositeExtract %float %22 3
         %31 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %31 %25
         %32 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %32 %26
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %34 %27
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %35 %28
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly image1DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _22 = imageLoad(_8, ivec2(uvec2(TEXCOORD.x, 2u)));
    SV_TARGET.x = _22.x;
    SV_TARGET.y = _22.y;
    SV_TARGET.z = _22.z;
    SV_TARGET.w = _22.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %v2uint = OpTypeVector %uint 2
          %_ = OpTypeStruct %uint %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %20

         %20 = OpLabel
         %13 =   OpLoad %6 %8
         %15 =   OpImageQuerySize %v2uint %13
         %16 =   OpCompositeExtract %uint %15 0
         %17 =   OpCompositeExtract %uint %15 1
                 OpStore %SV_TARGET %16
                 OpStore %SV_TARGET_1 %17
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_query.glsl
================================================
GLSL:
#version 460

struct _18
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform readonly writeonly image1DArray _8;

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec2 _15 = uvec2(imageSize(_8));
    SV_TARGET = _15.x;
    SV_TARGET_1 = _15.y;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %v2uint = OpTypeVector %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %37

         %37 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %22 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %23 =   OpLoad %float %22
         %24 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %26 =   OpLoad %float %24
         %27 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %29 =   OpLoad %float %27
         %30 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %32 =   OpLoad %float %30
         %35 =   OpCompositeConstruct %v2uint %20 %uint_2
         %36 =   OpCompositeConstruct %v4float %23 %26 %29 %32
                 OpImageWrite %16 %35 %36 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_array_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform writeonly image1DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_8, ivec2(uvec2(TEXCOORD.x, 2u)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 27
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 1D 0 0 0 2 R32ui
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %25

         %25 = OpLabel
         %17 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %19 =   OpLoad %uint %17
         %20 =   OpLoad %uint %VALUE
         %22 =   OpImageTexelPointer %_ptr_Image_uint %8 %19 %uint_0
         %23 =   OpAtomicIAdd %uint %22 %uint_5 %uint_0 %20
                 OpStore %SV_TARGET %23
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage1D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _23 = imageAtomicAdd(_8, int(TEXCOORD.x), VALUE);
    SV_TARGET = _23;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %35

         %35 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpImageRead %v4float %16 %20 NonPrivateTexel
         %22 =   OpCompositeExtract %float %21 0
         %23 =   OpCompositeExtract %float %21 1
         %24 =   OpCompositeExtract %float %21 2
         %25 =   OpCompositeExtract %float %21 3
         %28 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %28 %22
         %29 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %29 %23
         %31 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %31 %24
         %33 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %33 %25
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly image1D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _21 = imageLoad(_8, int(TEXCOORD.x));
    SV_TARGET.x = _21.x;
    SV_TARGET.y = _21.y;
    SV_TARGET.z = _21.z;
    SV_TARGET.w = _21.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 20
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
          %_ = OpTypeStruct %uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %18

         %18 = OpLabel
         %13 =   OpLoad %6 %8
         %14 =   OpImageQuerySize %uint %13
                 OpStore %SV_TARGET %14
                 OpStore %SV_TARGET_1 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_query.glsl
================================================
GLSL:
#version 460

struct _15
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform readonly writeonly image1D _8;

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    SV_TARGET = uint(imageSize(_8));
    SV_TARGET_1 = 1u;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 37
; Schema: 0
               OpCapability Shader
               OpCapability Image1D
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %35

         %35 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %22 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %23 =   OpLoad %float %22
         %24 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %26 =   OpLoad %float %24
         %27 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %29 =   OpLoad %float %27
         %30 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %32 =   OpLoad %float %30
         %34 =   OpCompositeConstruct %v4float %23 %26 %29 %32
                 OpImageWrite %16 %20 %34 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_1d_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform writeonly image1D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_8, int(TEXCOORD.x), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 2D 0 1 0 2 R32ui
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %32

         %32 = OpLabel
         %17 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %19 =   OpLoad %uint %17
         %20 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %22 =   OpLoad %uint %20
         %25 =   OpLoad %uint %VALUE
         %27 =   OpCompositeConstruct %v3uint %19 %22 %uint_2
         %29 =   OpImageTexelPointer %_ptr_Image_uint %8 %27 %uint_0
         %30 =   OpAtomicIAdd %uint %29 %uint_5 %uint_0 %25
                 OpStore %SV_TARGET %30
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage2DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _30 = imageAtomicAdd(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), VALUE);
    SV_TARGET = _30;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %28 =   OpCompositeConstruct %v3uint %20 %23 %uint_2
         %27 =   OpImageRead %v4float %16 %28 NonPrivateTexel
         %29 =   OpCompositeExtract %float %27 0
         %30 =   OpCompositeExtract %float %27 1
         %31 =   OpCompositeExtract %float %27 2
         %32 =   OpCompositeExtract %float %27 3
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %35 %29
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %36 %30
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %38 %32
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly image2DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _27 = imageLoad(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)));
    SV_TARGET.x = _27.x;
    SV_TARGET.y = _27.y;
    SV_TARGET.z = _27.z;
    SV_TARGET.w = _27.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 30
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %v3uint = OpTypeVector %uint 3
          %_ = OpTypeStruct %v2uint %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %28

         %28 = OpLabel
         %15 =   OpLoad %6 %8
         %17 =   OpImageQuerySize %v3uint %15
         %18 =   OpCompositeExtract %uint %17 0
         %19 =   OpCompositeExtract %uint %17 1
         %20 =   OpCompositeConstruct %v2uint %18 %19
         %21 =   OpCompositeExtract %uint %17 2
         %24 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %24 %18
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %26 %19
                 OpStore %SV_TARGET_1 %21
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_query.glsl
================================================
GLSL:
#version 460

struct _22
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform readonly writeonly image2DArray _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec3 _17 = uvec3(imageSize(_8));
    uint _18 = _17.x;
    SV_TARGET.x = _18;
    SV_TARGET.y = _17.y;
    SV_TARGET_1 = _17.z;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_float = OpTypePointer Input %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %27 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %28 =   OpLoad %float %27
         %29 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %30 =   OpLoad %float %29
         %31 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %33 =   OpLoad %float %31
         %34 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %36 =   OpLoad %float %34
         %38 =   OpCompositeConstruct %v3uint %20 %23 %uint_2
         %39 =   OpCompositeConstruct %v4float %28 %30 %33 %36
                 OpImageWrite %16 %38 %39 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_array_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform writeonly image2DArray _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 2D 0 0 0 2 R32ui
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %17 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %19 =   OpLoad %uint %17
         %20 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %22 =   OpLoad %uint %20
         %25 =   OpLoad %uint %VALUE
         %26 =   OpCompositeConstruct %v2uint %19 %22
         %28 =   OpImageTexelPointer %_ptr_Image_uint %8 %26 %uint_0
         %29 =   OpAtomicIAdd %uint %28 %uint_5 %uint_0 %25
                 OpStore %SV_TARGET %29
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _29 = imageAtomicAdd(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), VALUE);
    SV_TARGET = _29;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %27 =   OpCompositeConstruct %v2uint %20 %23
         %26 =   OpImageRead %v4float %16 %27 NonPrivateTexel
         %28 =   OpCompositeExtract %float %26 0
         %29 =   OpCompositeExtract %float %26 1
         %30 =   OpCompositeExtract %float %26 2
         %31 =   OpCompositeExtract %float %26 3
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %34 %28
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %35 %29
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %36 %30
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %38 %31
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly image2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _26 = imageLoad(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)));
    SV_TARGET.x = _26.x;
    SV_TARGET.y = _26.y;
    SV_TARGET.z = _26.z;
    SV_TARGET.w = _26.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_load_precise.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %27 =   OpCompositeConstruct %v2uint %20 %23
         %26 =   OpImageRead %v4float %16 %27 NonPrivateTexel
         %28 =   OpCompositeExtract %float %26 0
         %29 =   OpCompositeExtract %float %26 1
         %30 =   OpCompositeExtract %float %26 2
         %31 =   OpCompositeExtract %float %26 3
         %34 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %34 %28
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %35 %29
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %36 %30
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %38 %31
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_load_precise.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly image2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _26 = imageLoad(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)));
    SV_TARGET.x = _26.x;
    SV_TARGET.y = _26.y;
    SV_TARGET.z = _26.z;
    SV_TARGET.w = _26.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 28
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
          %_ = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_0 = OpConstant %uint 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %26

         %26 = OpLabel
         %15 =   OpLoad %6 %8
         %16 =   OpImageQuerySize %v2uint %15
         %17 =   OpCompositeExtract %uint %16 0
         %18 =   OpCompositeExtract %uint %16 1
         %19 =   OpCompositeConstruct %v2uint %17 %18
         %23 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %23 %17
         %25 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %25 %18
                 OpStore %SV_TARGET_1 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_query.glsl
================================================
GLSL:
#version 460

struct _20
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform readonly writeonly image2D _8;

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec2 _16 = uvec2(imageSize(_8));
    uint _17 = _16.x;
    SV_TARGET.x = _17;
    SV_TARGET.y = _16.y;
    SV_TARGET_1 = 1u;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_float = OpTypePointer Input %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %27 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %28 =   OpLoad %float %27
         %29 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %30 =   OpLoad %float %29
         %31 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %33 =   OpLoad %float %31
         %34 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %36 =   OpLoad %float %34
         %38 =   OpCompositeConstruct %v2uint %20 %23
         %39 =   OpCompositeConstruct %v4float %28 %30 %33 %36
                 OpImageWrite %16 %38 %39 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_2d_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform writeonly image2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 3D 0 0 0 2 R32ui
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %33

         %33 = OpLabel
         %17 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %19 =   OpLoad %uint %17
         %20 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %22 =   OpLoad %uint %20
         %23 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %25 =   OpLoad %uint %23
         %27 =   OpLoad %uint %VALUE
         %28 =   OpCompositeConstruct %v3uint %19 %22 %25
         %30 =   OpImageTexelPointer %_ptr_Image_uint %8 %28 %uint_0
         %31 =   OpAtomicIAdd %uint %30 %uint_5 %uint_0 %27
                 OpStore %SV_TARGET %31
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimage3D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _31 = imageAtomicAdd(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), VALUE);
    SV_TARGET = _31;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %41

         %41 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %24 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %26 =   OpLoad %uint %24
         %29 =   OpCompositeConstruct %v3uint %20 %23 %26
         %28 =   OpImageRead %v4float %16 %29 NonPrivateTexel
         %30 =   OpCompositeExtract %float %28 0
         %31 =   OpCompositeExtract %float %28 1
         %32 =   OpCompositeExtract %float %28 2
         %33 =   OpCompositeExtract %float %28 3
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %36 %30
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %38 %32
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %39 %33
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly image3D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _28 = imageLoad(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)));
    SV_TARGET.x = _28.x;
    SV_TARGET.y = _28.y;
    SV_TARGET.z = _28.z;
    SV_TARGET.w = _28.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 31
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %_ ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Output_v3uint = OpTypePointer Output %v3uint
  %SV_TARGET = OpVariable %_ptr_Output_v3uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
          %_ = OpTypeStruct %v3uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_0 = OpConstant %uint 0
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %29

         %29 = OpLabel
         %15 =   OpLoad %6 %8
         %16 =   OpImageQuerySize %v3uint %15
         %17 =   OpCompositeExtract %uint %16 0
         %18 =   OpCompositeExtract %uint %16 1
         %19 =   OpCompositeExtract %uint %16 2
         %20 =   OpCompositeConstruct %v3uint %17 %18 %19
         %24 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %24 %17
         %26 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %26 %18
         %27 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_2
                 OpStore %27 %19
                 OpStore %SV_TARGET_1 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_query.glsl
================================================
GLSL:
#version 460

struct _21
{
    uvec3 _m0;
    uint _m1;
};

layout(set = 0, binding = 0) uniform readonly writeonly image3D _8;

layout(location = 0) out uvec3 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec3 _16 = uvec3(imageSize(_8));
    uint _17 = _16.x;
    SV_TARGET.x = _17;
    SV_TARGET.y = _16.y;
    SV_TARGET.z = _16.z;
    SV_TARGET_1 = 1u;
}


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Input_float = OpTypePointer Input %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %41

         %41 = OpLabel
         %16 =   OpLoad %6 %8
         %18 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %20 =   OpLoad %uint %18
         %21 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %23 =   OpLoad %uint %21
         %24 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %26 =   OpLoad %uint %24
         %29 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %30 =   OpLoad %float %29
         %31 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %32 =   OpLoad %float %31
         %33 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %34 =   OpLoad %float %33
         %35 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %37 =   OpLoad %float %35
         %39 =   OpCompositeConstruct %v3uint %20 %23 %26
         %40 =   OpCompositeConstruct %v4float %30 %32 %34 %37
                 OpImageWrite %16 %39 %40 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_3d_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform writeonly image3D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_8, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_image_load_sparse_feedback.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 60
; Schema: 0
               OpCapability Shader
               OpCapability SparseResidency
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %TEXCOORD %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SparseTexel "SparseTexel"
               OpName %_ ""
               OpName %__0 ""
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Output_float = OpTypePointer Output %float
%SV_TARGET_1 = OpVariable %_ptr_Output_float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%SparseTexel = OpTypeStruct %uint %v4float
          %_ = OpTypeStruct %float %float %float %float %uint
        %__0 = OpTypeStruct %uint %v4float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %bool = OpTypeBool
    %float_1 = OpConstant %float 1
    %float_0 = OpConstant %float 0
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %58

         %58 = OpLabel
         %18 =   OpLoad %6 %8
         %20 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %22 =   OpLoad %uint %20
         %23 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %25 =   OpLoad %uint %23
         %30 =   OpCompositeConstruct %v2uint %22 %25
         %29 =   OpImageSparseRead %SparseTexel %18 %30 NonPrivateTexel
         %31 =   OpCompositeExtract %uint %29 0
         %32 =   OpCompositeExtract %v4float %29 1
         %33 =   OpCompositeExtract %float %32 0
         %34 =   OpCompositeExtract %float %32 1
         %35 =   OpCompositeExtract %float %32 2
         %36 =   OpCompositeExtract %float %32 3
         %38 =   OpCompositeConstruct %_ %33 %34 %35 %36 %31
         %39 =   OpCompositeExtract %uint %38 4
         %40 =   OpCompositeExtract %float %38 0
         %41 =   OpCompositeExtract %float %38 1
         %42 =   OpCompositeExtract %float %38 2
         %43 =   OpCompositeExtract %float %38 3
         %44 =   OpCompositeConstruct %v4float %40 %41 %42 %43
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %47 %40
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %48 %41
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %49 %42
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %51 %43
         %54 =   OpImageSparseTexelsResident %bool %39
         %55 =   OpSelect %float %54 %float_1 %float_0
                 OpStore %SV_TARGET_1 %55
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_image_load_sparse_feedback.glsl
================================================
GLSL:
#version 460
#extension GL_ARB_sparse_texture2 : require

struct SparseTexel
{
    uint _m0;
    vec4 _m1;
};

struct _37
{
    float _m0;
    float _m1;
    float _m2;
    float _m3;
    uint _m4;
};

struct _45
{
    uint _m0;
    vec4 _m1;
};

layout(set = 0, binding = 0, r32f) uniform readonly image2D _8;

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out float SV_TARGET_1;

void main()
{
    uint _60;
    vec4 _61;
    _60 = sparseImageLoadARB(_8, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), _61);
    SparseTexel _29 = SparseTexel(_60, _61);
    vec4 _32 = _29._m1;
    _37 _38 = _37(_32.x, _32.y, _32.z, _32.w, _29._m0);
    float _40 = _38._m0;
    float _41 = _38._m1;
    float _42 = _38._m2;
    float _43 = _38._m3;
    SV_TARGET.x = _40;
    SV_TARGET.y = _41;
    SV_TARGET.z = _42;
    SV_TARGET.w = _43;
    SV_TARGET_1 = float(sparseTexelsResidentARB(int(_38._m4)));
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %27 NonUniform
               OpDecorate %38 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 1D 0 1 0 2 R32ui
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
     %uint_4 = OpConstant %uint 4
      %float = OpTypeFloat 32
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_2 = OpConstant %uint 2
     %v2uint = OpTypeVector %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %41

         %41 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpLoad %uint %VALUE
         %36 =   OpCompositeConstruct %v2uint %32 %uint_2
         %38 =   OpImageTexelPointer %_ptr_Image_uint %29 %36 %uint_0
         %39 =   OpAtomicIAdd %uint %38 %uint_5 %uint_0 %33
                 OpStore %SV_TARGET %39
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32ui) uniform uimage1DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _39 = imageAtomicAdd(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec2(uvec2(TEXCOORD.x, 2u)), VALUE);
    SV_TARGET = _39;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 51
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 2 R32f
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_2 = OpConstant %uint 2
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %49

         %49 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %36 =   OpCompositeConstruct %v2uint %32 %uint_2
         %34 =   OpImageRead %v4float %29 %36 NonPrivateTexel
         %37 =   OpCompositeExtract %float %34 0
         %38 =   OpCompositeExtract %float %34 1
         %39 =   OpCompositeExtract %float %34 2
         %40 =   OpCompositeExtract %float %34 3
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %43 %37
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %44 %38
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %46 %39
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %47 %40
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32f) uniform readonly image1DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _34 = imageLoad(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec2(uvec2(TEXCOORD.x, 2u)));
    SV_TARGET.x = _34.x;
    SV_TARGET.y = _34.y;
    SV_TARGET.z = _34.z;
    SV_TARGET.w = _34.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 35
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %23 NonUniform
               OpDecorate %26 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %v2uint = OpTypeVector %uint 2
        %__0 = OpTypeStruct %uint %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %33

         %33 = OpLabel
         %21 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %22 =   OpLoad %float %21
         %23 =   OpBitcast %uint %22
         %25 =   OpAccessChain %_ptr_UniformConstant_6 %9 %23
         %26 =   OpLoad %6 %25
         %28 =   OpImageQuerySize %v2uint %26
         %29 =   OpCompositeExtract %uint %28 0
         %30 =   OpCompositeExtract %uint %28 1
                 OpStore %SV_TARGET %29
                 OpStore %SV_TARGET_1 %30
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _31
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform readonly writeonly image1DArray _9[];

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec2 _28 = uvec2(imageSize(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))]));
    SV_TARGET = _28.x;
    SV_TARGET_1 = _28.y;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 51
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 1 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
     %v2uint = OpTypeVector %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %49

         %49 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %34 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %35 =   OpLoad %float %34
         %36 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %38 =   OpLoad %float %36
         %39 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %41 =   OpLoad %float %39
         %42 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %44 =   OpLoad %float %42
         %47 =   OpCompositeConstruct %v2uint %32 %uint_2
         %48 =   OpCompositeConstruct %v4float %35 %38 %41 %44
                 OpImageWrite %29 %47 %48 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_array_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform writeonly image1DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec2(uvec2(TEXCOORD.x, 2u)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %27 NonUniform
               OpDecorate %35 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 1D 0 0 0 2 R32ui
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
     %uint_4 = OpConstant %uint 4
      %float = OpTypeFloat 32
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %38

         %38 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpLoad %uint %VALUE
         %35 =   OpImageTexelPointer %_ptr_Image_uint %29 %32 %uint_0
         %36 =   OpAtomicIAdd %uint %35 %uint_5 %uint_0 %33
                 OpStore %SV_TARGET %36
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32ui) uniform uimage1D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _36 = imageAtomicAdd(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], int(TEXCOORD.x), VALUE);
    SV_TARGET = _36;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 2 R32f
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %47

         %47 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpImageRead %v4float %29 %32 NonPrivateTexel
         %34 =   OpCompositeExtract %float %33 0
         %35 =   OpCompositeExtract %float %33 1
         %36 =   OpCompositeExtract %float %33 2
         %37 =   OpCompositeExtract %float %33 3
         %40 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %40 %34
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %41 %35
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %43 %36
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %45 %37
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32f) uniform readonly image1D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _33 = imageLoad(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], int(TEXCOORD.x));
    SV_TARGET.x = _33.x;
    SV_TARGET.y = _33.y;
    SV_TARGET.z = _33.z;
    SV_TARGET.w = _33.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %23 NonUniform
               OpDecorate %26 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %21 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %22 =   OpLoad %float %21
         %23 =   OpBitcast %uint %22
         %25 =   OpAccessChain %_ptr_UniformConstant_6 %9 %23
         %26 =   OpLoad %6 %25
         %27 =   OpImageQuerySize %uint %26
                 OpStore %SV_TARGET %27
                 OpStore %SV_TARGET_1 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _28
{
    uint _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform readonly writeonly image1D _9[];

layout(location = 0) out uint SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    SV_TARGET = uint(imageSize(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))]));
    SV_TARGET_1 = 1u;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability Image1D
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 1D 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
%_ptr_Input_float = OpTypePointer Input %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %47

         %47 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %34 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %35 =   OpLoad %float %34
         %36 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %38 =   OpLoad %float %36
         %39 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %41 =   OpLoad %float %39
         %42 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %44 =   OpLoad %float %42
         %46 =   OpCompositeConstruct %v4float %35 %38 %41 %44
                 OpImageWrite %29 %32 %46 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_1d_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform writeonly image1D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], int(TEXCOORD.x), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %27 NonUniform
               OpDecorate %42 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 2D 0 1 0 2 R32ui
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
     %uint_4 = OpConstant %uint 4
      %float = OpTypeFloat 32
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %45

         %45 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %38 =   OpLoad %uint %VALUE
         %40 =   OpCompositeConstruct %v3uint %32 %35 %uint_2
         %42 =   OpImageTexelPointer %_ptr_Image_uint %29 %40 %uint_0
         %43 =   OpAtomicIAdd %uint %42 %uint_5 %uint_0 %38
                 OpStore %SV_TARGET %43
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32ui) uniform uimage2DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _43 = imageAtomicAdd(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), VALUE);
    SV_TARGET = _43;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 2 R32f
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %52

         %52 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %40 =   OpCompositeConstruct %v3uint %32 %35 %uint_2
         %39 =   OpImageRead %v4float %29 %40 NonPrivateTexel
         %41 =   OpCompositeExtract %float %39 0
         %42 =   OpCompositeExtract %float %39 1
         %43 =   OpCompositeExtract %float %39 2
         %44 =   OpCompositeExtract %float %39 3
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %47 %41
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %48 %42
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %49 %43
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %50 %44
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32f) uniform readonly image2DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _39 = imageLoad(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)));
    SV_TARGET.x = _39.x;
    SV_TARGET.y = _39.y;
    SV_TARGET.z = _39.z;
    SV_TARGET.w = _39.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 42
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %25 NonUniform
               OpDecorate %28 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %v3uint = OpTypeVector %uint 3
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %40

         %40 = OpLabel
         %23 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %24 =   OpLoad %float %23
         %25 =   OpBitcast %uint %24
         %27 =   OpAccessChain %_ptr_UniformConstant_6 %9 %25
         %28 =   OpLoad %6 %27
         %30 =   OpImageQuerySize %v3uint %28
         %31 =   OpCompositeExtract %uint %30 0
         %32 =   OpCompositeExtract %uint %30 1
         %33 =   OpCompositeConstruct %v2uint %31 %32
         %34 =   OpCompositeExtract %uint %30 2
         %37 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %37 %31
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %38 %32
                 OpStore %SV_TARGET_1 %34
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _35
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform readonly writeonly image2DArray _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec3 _30 = uvec3(imageSize(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))]));
    uint _31 = _30.x;
    SV_TARGET.x = _31;
    SV_TARGET.y = _30.y;
    SV_TARGET_1 = _30.z;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 1 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_float = OpTypePointer Input %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %52

         %52 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %39 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %40 =   OpLoad %float %39
         %41 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %42 =   OpLoad %float %41
         %43 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %45 =   OpLoad %float %43
         %46 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %48 =   OpLoad %float %46
         %50 =   OpCompositeConstruct %v3uint %32 %35 %uint_2
         %51 =   OpCompositeConstruct %v4float %40 %42 %45 %48
                 OpImageWrite %29 %50 %51 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_array_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform writeonly image2DArray _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, 2u)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %27 NonUniform
               OpDecorate %41 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 2D 0 0 0 2 R32ui
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
     %uint_4 = OpConstant %uint 4
      %float = OpTypeFloat 32
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %44

         %44 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %38 =   OpLoad %uint %VALUE
         %39 =   OpCompositeConstruct %v2uint %32 %35
         %41 =   OpImageTexelPointer %_ptr_Image_uint %29 %39 %uint_0
         %42 =   OpAtomicIAdd %uint %41 %uint_5 %uint_0 %38
                 OpStore %SV_TARGET %42
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32ui) uniform uimage2D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _42 = imageAtomicAdd(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), VALUE);
    SV_TARGET = _42;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 R32f
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %52

         %52 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %39 =   OpCompositeConstruct %v2uint %32 %35
         %38 =   OpImageRead %v4float %29 %39 NonPrivateTexel
         %40 =   OpCompositeExtract %float %38 0
         %41 =   OpCompositeExtract %float %38 1
         %42 =   OpCompositeExtract %float %38 2
         %43 =   OpCompositeExtract %float %38 3
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %46 %40
         %47 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %47 %41
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %48 %42
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %50 %43
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32f) uniform readonly image2D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _38 = imageLoad(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)));
    SV_TARGET.x = _38.x;
    SV_TARGET.y = _38.y;
    SV_TARGET.z = _38.z;
    SV_TARGET.w = _38.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 40
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %25 NonUniform
               OpDecorate %28 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v2uint = OpTypeVector %uint 2
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %v2uint %uint
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %38

         %38 = OpLabel
         %23 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %24 =   OpLoad %float %23
         %25 =   OpBitcast %uint %24
         %27 =   OpAccessChain %_ptr_UniformConstant_6 %9 %25
         %28 =   OpLoad %6 %27
         %29 =   OpImageQuerySize %v2uint %28
         %30 =   OpCompositeExtract %uint %29 0
         %31 =   OpCompositeExtract %uint %29 1
         %32 =   OpCompositeConstruct %v2uint %30 %31
         %36 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %36 %30
         %37 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %37 %31
                 OpStore %SV_TARGET_1 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _33
{
    uvec2 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform readonly writeonly image2D _9[];

layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec2 _29 = uvec2(imageSize(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))]));
    uint _30 = _29.x;
    SV_TARGET.x = _30;
    SV_TARGET.y = _29.y;
    SV_TARGET_1 = 1u;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 2D 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_float = OpTypePointer Input %float
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %52

         %52 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %39 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %40 =   OpLoad %float %39
         %41 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %42 =   OpLoad %float %41
         %43 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %45 =   OpLoad %float %43
         %46 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %48 =   OpLoad %float %46
         %50 =   OpCompositeConstruct %v2uint %32 %35
         %51 =   OpCompositeConstruct %v4float %40 %42 %45 %48
                 OpImageWrite %29 %50 %51 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_2d_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform writeonly image2D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %VALUE %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %VALUE "VALUE"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %VALUE Flat
               OpDecorate %VALUE Location 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %27 NonUniform
               OpDecorate %43 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint 3D 0 0 0 2 R32ui
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
     %uint_4 = OpConstant %uint 4
      %float = OpTypeFloat 32
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
      %VALUE = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %46

         %46 = OpLabel
         %25 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %26 =   OpLoad %float %25
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_UniformConstant_6 %9 %27
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %36 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %38 =   OpLoad %uint %36
         %40 =   OpLoad %uint %VALUE
         %41 =   OpCompositeConstruct %v3uint %32 %35 %38
         %43 =   OpImageTexelPointer %_ptr_Image_uint %29 %41 %uint_0
         %44 =   OpAtomicIAdd %uint %43 %uint_5 %uint_0 %40
                 OpStore %SV_TARGET %44
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32ui) uniform uimage3D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) flat in uint VALUE;
layout(location = 0) out uint SV_TARGET;

void main()
{
    uint _44 = imageAtomicAdd(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), VALUE);
    SV_TARGET = _44;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 2 R32f
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %53

         %53 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %36 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %38 =   OpLoad %uint %36
         %41 =   OpCompositeConstruct %v3uint %32 %35 %38
         %40 =   OpImageRead %v4float %29 %41 NonPrivateTexel
         %42 =   OpCompositeExtract %float %40 0
         %43 =   OpCompositeExtract %float %40 1
         %44 =   OpCompositeExtract %float %40 2
         %45 =   OpCompositeExtract %float %40 3
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %48 %42
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %49 %43
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %50 %44
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %51 %45
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0, r32f) uniform readonly image3D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    vec4 _40 = imageLoad(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)));
    SV_TARGET.x = _40.x;
    SV_TARGET.y = _40.y;
    SV_TARGET.z = _40.z;
    SV_TARGET.w = _40.w;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 43
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %__0 ""
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %25 NonUniform
               OpDecorate %28 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Output_v3uint = OpTypePointer Output %v3uint
  %SV_TARGET = OpVariable %_ptr_Output_v3uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_1 = OpVariable %_ptr_Output_uint Output
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
        %__0 = OpTypeStruct %v3uint %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %41

         %41 = OpLabel
         %23 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %24 =   OpLoad %float %23
         %25 =   OpBitcast %uint %24
         %27 =   OpAccessChain %_ptr_UniformConstant_6 %9 %25
         %28 =   OpLoad %6 %27
         %29 =   OpImageQuerySize %v3uint %28
         %30 =   OpCompositeExtract %uint %29 0
         %31 =   OpCompositeExtract %uint %29 1
         %32 =   OpCompositeExtract %uint %29 2
         %33 =   OpCompositeConstruct %v3uint %30 %31 %32
         %37 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %37 %30
         %38 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %38 %31
         %39 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_2
                 OpStore %39 %32
                 OpStore %SV_TARGET_1 %uint_1
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

struct _34
{
    uvec3 _m0;
    uint _m1;
};

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform readonly writeonly image3D _9[];

layout(location = 0) out uvec3 SV_TARGET;
layout(location = 1) out uint SV_TARGET_1;

void main()
{
    uvec3 _29 = uvec3(imageSize(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))]));
    uint _30 = _29.x;
    SV_TARGET.x = _30;
    SV_TARGET.y = _29.y;
    SV_TARGET.z = _29.z;
    SV_TARGET_1 = 1u;
}


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 55
; Schema: 0
               OpCapability Shader
               OpCapability StorageImageArrayDynamicIndexing
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageImageArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %15 %TEXCOORD %COLOR
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %TEXCOORD "TEXCOORD"
               OpName %COLOR "COLOR"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %15 DescriptorSet 0
               OpDecorate %15 Binding 0
               OpDecorate %TEXCOORD Flat
               OpDecorate %TEXCOORD Location 0
               OpDecorate %COLOR NoPerspective
               OpDecorate %COLOR Location 2
               OpDecorate %26 NonUniform
               OpDecorate %29 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float 3D 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %15 = OpVariable %_ptr_Uniform__ Uniform
     %v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
   %TEXCOORD = OpVariable %_ptr_Input_v3uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Input_v4float = OpTypePointer Input %v4float
      %COLOR = OpVariable %_ptr_Input_v4float Input
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform_float = OpTypePointer Uniform %float
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
%_ptr_Input_float = OpTypePointer Input %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %53

         %53 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %15 %uint_0 %uint_0
         %25 =   OpLoad %float %24
         %26 =   OpBitcast %uint %25
         %28 =   OpAccessChain %_ptr_UniformConstant_6 %9 %26
         %29 =   OpLoad %6 %28
         %31 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_0
         %32 =   OpLoad %uint %31
         %33 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_1
         %35 =   OpLoad %uint %33
         %36 =   OpAccessChain %_ptr_Input_uint %TEXCOORD %uint_2
         %38 =   OpLoad %uint %36
         %41 =   OpAccessChain %_ptr_Input_float %COLOR %uint_0
         %42 =   OpLoad %float %41
         %43 =   OpAccessChain %_ptr_Input_float %COLOR %uint_1
         %44 =   OpLoad %float %43
         %45 =   OpAccessChain %_ptr_Input_float %COLOR %uint_2
         %46 =   OpLoad %float %45
         %47 =   OpAccessChain %_ptr_Input_float %COLOR %uint_3
         %49 =   OpLoad %float %47
         %51 =   OpCompositeConstruct %v3uint %32 %35 %38
         %52 =   OpCompositeConstruct %v4float %42 %44 %46 %49
                 OpImageWrite %29 %51 %52 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resource_uav_indexed_image_3d_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_EXT_scalar_block_layout : require

layout(set = 0, binding = 0, scalar) uniform _13_15
{
    float _m0[4];
} _15;

layout(set = 0, binding = 0) uniform writeonly image3D _9[];

layout(location = 0) flat in uvec3 TEXCOORD;
layout(location = 2) noperspective in vec4 COLOR;

void main()
{
    imageStore(_9[nonuniformEXT(floatBitsToUint(_15._m0[0u]))], ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), vec4(COLOR.x, COLOR.y, COLOR.z, COLOR.w));
}


================================================
FILE: reference-dxbc/test_resources_cbv.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 33
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %12 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_arr_v4float_uint_8 ArrayStride 16
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %12 DescriptorSet 0
               OpDecorate %12 Binding 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %uint_8 = OpConstant %uint 8
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
          %_ = OpTypeStruct %_arr_v4float_uint_8
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %12 = OpVariable %_ptr_Uniform__ Uniform
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_2 = OpConstant %uint 2
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %31

         %31 = OpLabel
         %17 =   OpAccessChain %_ptr_Uniform_v4float %12 %uint_0 %uint_2
         %19 =   OpLoad %v4float %17
         %20 =   OpCompositeExtract %float %19 0
         %22 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %22 %20
         %23 =   OpCompositeExtract %float %19 1
         %24 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %24 %23
         %26 =   OpCompositeExtract %float %19 2
         %27 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %27 %26
         %28 =   OpCompositeExtract %float %19 3
         %29 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %29 %28
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_cbv.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[8];
} _12;

layout(location = 0) out vec4 SV_TARGET;

void main()
{
    SV_TARGET.x = _12._m0[2u].x;
    SV_TARGET.y = _12._m0[2u].y;
    SV_TARGET.z = _12._m0[2u].z;
    SV_TARGET.w = _12._m0[2u].w;
}


================================================
FILE: reference-dxbc/test_resources_cbv_dynamic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 36
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %12 %INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %INDEX "INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_arr_v4float_uint_4096 ArrayStride 16
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %12 DescriptorSet 0
               OpDecorate %12 Binding 0
               OpDecorate %INDEX Flat
               OpDecorate %INDEX Location 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
  %uint_4096 = OpConstant %uint 4096
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_arr_v4float_uint_4096 = OpTypeArray %v4float %uint_4096
          %_ = OpTypeStruct %_arr_v4float_uint_4096
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %12 = OpVariable %_ptr_Uniform__ Uniform
%_ptr_Input_uint = OpTypePointer Input %uint
      %INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %34

         %34 = OpLabel
         %17 =   OpLoad %uint %INDEX
         %19 =   OpAccessChain %_ptr_Uniform_v4float %12 %uint_0 %17
         %21 =   OpLoad %v4float %19
         %22 =   OpCompositeExtract %float %21 0
         %24 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %24 %22
         %25 =   OpCompositeExtract %float %21 1
         %26 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %26 %25
         %28 =   OpCompositeExtract %float %21 2
         %29 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %29 %28
         %31 =   OpCompositeExtract %float %21 3
         %32 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %32 %31
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_cbv_dynamic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std140) uniform _10_12
{
    vec4 _m0[4096];
} _12;

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    SV_TARGET.x = _12._m0[INDEX].x;
    SV_TARGET.y = _12._m0[INDEX].y;
    SV_TARGET.z = _12._m0[INDEX].z;
    SV_TARGET.w = _12._m0[INDEX].w;
}


================================================
FILE: reference-dxbc/test_resources_cbv_indexed.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 46
; Schema: 0
               OpCapability Shader
               OpCapability UniformBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %11 %19 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %__0 ""
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_arr_float_uint_4 ArrayStride 4
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %11 DescriptorSet 1
               OpDecorate %11 Binding 0
               OpDecorate %_arr_v4float_uint_8 ArrayStride 16
               OpMemberDecorate %__0 0 Offset 0
               OpDecorate %__0 Block
               OpDecorate %19 DescriptorSet 0
               OpDecorate %19 Binding 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %uint_4 = OpConstant %uint 4
      %float = OpTypeFloat 32
%_arr_float_uint_4 = OpTypeArray %float %uint_4
          %_ = OpTypeStruct %_arr_float_uint_4
%_ptr_Uniform__ = OpTypePointer Uniform %_
         %11 = OpVariable %_ptr_Uniform__ Uniform
     %uint_8 = OpConstant %uint 8
    %v4float = OpTypeVector %float 4
%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
        %__0 = OpTypeStruct %_arr_v4float_uint_8
   %uint_256 = OpConstant %uint 256
%_arr___0_uint_256 = OpTypeArray %__0 %uint_256
%_ptr_Uniform__arr___0_uint_256 = OpTypePointer Uniform %_arr___0_uint_256
         %19 = OpVariable %_ptr_Uniform__arr___0_uint_256 Uniform
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
     %uint_1 = OpConstant %uint 1
%_ptr_Uniform_float = OpTypePointer Uniform %float
     %uint_0 = OpConstant %uint 0
%_ptr_Uniform___0 = OpTypePointer Uniform %__0
     %uint_2 = OpConstant %uint 2
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
%_ptr_Output_float = OpTypePointer Output %float
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %44

         %44 = OpLabel
         %24 =   OpAccessChain %_ptr_Uniform_float %11 %uint_0 %uint_1
         %26 =   OpLoad %float %24
         %27 =   OpBitcast %uint %26
         %29 =   OpAccessChain %_ptr_Uniform___0 %19 %27
         %32 =   OpAccessChain %_ptr_Uniform_v4float %29 %uint_0 %uint_2
         %33 =   OpLoad %v4float %32
         %34 =   OpCompositeExtract %float %33 0
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %36 %34
         %37 =   OpCompositeExtract %float %33 1
         %38 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %38 %37
         %39 =   OpCompositeExtract %float %33 2
         %40 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %40 %39
         %41 =   OpCompositeExtract %float %33 3
         %42 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %42 %41
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_cbv_indexed.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_scalar_block_layout : require

layout(set = 1, binding = 0, scalar) uniform _9_11
{
    float _m0[4];
} _11;

layout(set = 0, binding = 0, std140) uniform _15_19
{
    vec4 _m0[8];
} _19[256];

layout(location = 0) out vec4 SV_TARGET;

void main()
{
    uint _27 = floatBitsToUint(_11._m0[1u]);
    SV_TARGET.x = _19[_27]._m0[2u].x;
    SV_TARGET.y = _19[_27]._m0[2u].y;
    SV_TARGET.z = _19[_27]._m0[2u].z;
    SV_TARGET.w = _19[_27]._m0[2u].w;
}


================================================
FILE: reference-dxbc/test_resources_cbv_indexed_nonuniform.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 39
; Schema: 0
               OpCapability Shader
               OpCapability UniformBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability UniformBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %13 %INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %_ ""
               OpName %INDEX "INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_arr_v4float_uint_8 ArrayStride 16
               OpMemberDecorate %_ 0 Offset 0
               OpDecorate %_ Block
               OpDecorate %13 DescriptorSet 0
               OpDecorate %13 Binding 0
               OpDecorate %INDEX Flat
               OpDecorate %INDEX Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %18 NonUniform
               OpDecorate %20 NonUniform
               OpDecorate %23 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
     %uint_8 = OpConstant %uint 8
      %float = OpTypeFloat 32
    %v4float = OpTypeVector %float 4
%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
          %_ = OpTypeStruct %_arr_v4float_uint_8
%_runtimearr__ = OpTypeRuntimeArray %_
%_ptr_Uniform__runtimearr__ = OpTypePointer Uniform %_runtimearr__
         %13 = OpVariable %_ptr_Uniform__runtimearr__ Uniform
%_ptr_Input_uint = OpTypePointer Input %uint
      %INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%_ptr_Uniform__ = OpTypePointer Uniform %_
     %uint_2 = OpConstant %uint 2
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
     %uint_0 = OpConstant %uint 0
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %37

         %37 = OpLabel
         %18 =   OpLoad %uint %INDEX
         %20 =   OpAccessChain %_ptr_Uniform__ %13 %18
         %23 =   OpAccessChain %_ptr_Uniform_v4float %20 %uint_0 %uint_2
         %25 =   OpLoad %v4float %23
         %26 =   OpCompositeExtract %float %25 0
         %28 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %28 %26
         %29 =   OpCompositeExtract %float %25 1
         %30 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %30 %29
         %32 =   OpCompositeExtract %float %25 2
         %33 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %33 %32
         %34 =   OpCompositeExtract %float %25 3
         %35 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %35 %34
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_cbv_indexed_nonuniform.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std140) uniform _10_13
{
    vec4 _m0[8];
} _13[];

layout(location = 0) flat in uint INDEX;
layout(location = 0) out vec4 SV_TARGET;

void main()
{
    SV_TARGET.x = _13[nonuniformEXT(INDEX)]._m0[2u].x;
    SV_TARGET.y = _13[nonuniformEXT(INDEX)]._m0[2u].y;
    SV_TARGET.z = _13[nonuniformEXT(INDEX)]._m0[2u].z;
    SV_TARGET.w = _13[nonuniformEXT(INDEX)]._m0[2u].w;
}


================================================
FILE: reference-dxbc/test_resources_srv_buffer_raw_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %14 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 0
               OpDecorate %14 NonWritable
               OpDecorate %14 Restrict
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %14 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
     %uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
     %uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %64

         %64 = OpLabel
         %23 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %25 =   OpLoad %uint %23
         %26 =   OpIMul %uint %25 %uint_4
         %30 =   OpIMul %uint %uint_16 %25
         %34 =   OpIMul %uint %25 %uint_2
         %35 =   OpIAdd %uint %34 %uint_1
         %38 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %35
         %39 =   OpLoad %v2uint %38
         %40 =   OpCompositeExtract %uint %39 0
         %41 =   OpCompositeExtract %uint %39 1
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %46 =   OpLoad %uint %45
         %48 =   OpIAdd %uint %uint_7 %uint_1
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %48
         %49 =   OpLoad %uint %47
         %50 =   OpCompositeConstruct %v2uint %46 %49
         %51 =   OpCompositeExtract %uint %50 0
         %52 =   OpCompositeExtract %uint %50 1
         %54 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %54 %40
         %55 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %55 %41
         %56 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %57 %52
         %58 =   OpIMul %uint %uint_16 %25
         %60 =   OpIMul %uint %25 %uint_4
         %61 =   OpIAdd %uint %60 %uint_2
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %61
         %63 =   OpLoad %uint %62
                 OpStore %SV_TARGET_3 %63
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_buffer_raw_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) restrict readonly buffer _12_14
{
    uvec2 _m0[];
} _14;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _35 = (BUFFER_ADDRESS.x * 2u) + 1u;
    uint _48 = 7u + 1u;
    uvec2 _50 = uvec2(_9._m0[7u], _9._m0[_48]);
    SV_TARGET.x = _14._m0[_35].x;
    SV_TARGET.y = _14._m0[_35].y;
    SV_TARGET_1.x = _50.x;
    SV_TARGET_1.y = _50.y;
    SV_TARGET_3 = _9._m0[(BUFFER_ADDRESS.x * 4u) + 2u];
}


================================================
FILE: reference-dxbc/test_resources_srv_buffer_raw_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %13

         %13 = OpLabel
         %12 =   OpArrayLength %uint %9 0
                 OpStore %SV_TARGET %12
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_buffer_raw_query.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_9._m0.length());
}


================================================
FILE: reference-dxbc/test_resources_srv_buffer_structured_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
     %v4uint = OpTypeVector %uint 4
%_ptr_Output_v4uint = OpTypePointer Output %v4uint
%SV_TARGET_2 = OpVariable %_ptr_Output_v4uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
   %uint_331 = OpConstant %uint 331
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %89

         %89 = OpLabel
         %22 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %24 =   OpLoad %uint %22
         %25 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %27 =   OpLoad %uint %25
         %34 =   OpIMul %uint %24 %uint_20
         %36 =   OpIAdd %uint %34 %27
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %36
         %39 =   OpLoad %uint %38
         %41 =   OpIAdd %uint %36 %uint_1
         %40 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %41
         %42 =   OpLoad %uint %40
         %43 =   OpCompositeConstruct %v2uint %39 %42
         %44 =   OpCompositeExtract %uint %43 0
         %45 =   OpCompositeExtract %uint %43 1
         %48 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_143
         %49 =   OpLoad %uint %48
         %51 =   OpIAdd %uint %uint_143 %uint_1
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %51
         %52 =   OpLoad %uint %50
         %53 =   OpCompositeConstruct %v2uint %49 %52
         %54 =   OpCompositeExtract %uint %53 0
         %55 =   OpCompositeExtract %uint %53 1
         %57 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %57 %44
         %58 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %58 %45
         %59 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %59 %54
         %60 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %60 %55
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_331
         %63 =   OpLoad %uint %62
         %65 =   OpIAdd %uint %uint_331 %uint_1
         %64 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %65
         %66 =   OpLoad %uint %64
         %68 =   OpIAdd %uint %uint_331 %uint_2
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %68
         %70 =   OpLoad %uint %67
         %72 =   OpIAdd %uint %uint_331 %uint_3
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %72
         %73 =   OpLoad %uint %71
         %74 =   OpCompositeConstruct %v4uint %63 %66 %70 %73
         %75 =   OpCompositeExtract %uint %74 0
         %76 =   OpCompositeExtract %uint %74 1
         %77 =   OpCompositeExtract %uint %74 2
         %78 =   OpCompositeExtract %uint %74 3
         %80 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_0
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_1
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_2
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_3
                 OpStore %83 %78
         %85 =   OpIMul %uint %24 %uint_20
         %86 =   OpIAdd %uint %85 %27
         %87 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %86
         %88 =   OpLoad %uint %87
                 OpStore %SV_TARGET_3 %88
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_buffer_structured_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 2) out uvec4 SV_TARGET_2;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _36 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    uvec2 _43 = uvec2(_9._m0[_36], _9._m0[_36 + 1u]);
    uint _51 = 143u + 1u;
    uvec2 _53 = uvec2(_9._m0[143u], _9._m0[_51]);
    SV_TARGET.x = _43.x;
    SV_TARGET.y = _43.y;
    SV_TARGET_1.x = _53.x;
    SV_TARGET_1.y = _53.y;
    uint _65 = 331u + 1u;
    uint _68 = 331u + 2u;
    uint _72 = 331u + 3u;
    uvec4 _74 = uvec4(_9._m0[331u], _9._m0[_65], _9._m0[_68], _9._m0[_72]);
    SV_TARGET_2.x = _74.x;
    SV_TARGET_2.y = _74.y;
    SV_TARGET_2.z = _74.z;
    SV_TARGET_2.w = _74.w;
    SV_TARGET_3 = _9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y];
}


================================================
FILE: reference-dxbc/test_resources_srv_buffer_structured_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 17
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Restrict
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
    %uint_20 = OpConstant %uint 20
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %15

         %15 = OpLabel
         %12 =   OpArrayLength %uint %9 0
         %13 =   OpUDiv %uint %12 %uint_20
                 OpStore %SV_TARGET %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_buffer_structured_query.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_9._m0.length()) / 20u;
}


================================================
FILE: reference-dxbc/test_resources_srv_buffer_typed_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
               OpCapability Shader
               OpCapability SampledBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_7 = OpConstant %uint 7
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %47

         %47 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpImageFetch %v4float %17 %21
         %23 =   OpCompositeExtract %float %22 0
         %24 =   OpCompositeExtract %float %22 1
         %25 =   OpCompositeExtract %float %22 2
         %26 =   OpCompositeExtract %float %22 3
         %29 =   OpImageFetch %v4float %17 %uint_7
         %30 =   OpCompositeExtract %float %29 0
         %31 =   OpCompositeExtract %float %29 1
         %32 =   OpCompositeExtract %float %29 2
         %33 =   OpCompositeExtract %float %29 3
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %36 %23
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %24
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %25
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %26
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %43 %30
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %44 %31
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %45 %32
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %46 %33
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_buffer_typed_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform samplerBuffer _8;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _22 = texelFetch(_8, int(BUFFER_ADDRESS.x));
    vec4 _29 = texelFetch(_8, int(7u));
    SV_TARGET.x = _22.x;
    SV_TARGET.y = _22.y;
    SV_TARGET.z = _22.z;
    SV_TARGET.w = _22.w;
    SV_TARGET_1.x = _29.x;
    SV_TARGET_1.y = _29.y;
    SV_TARGET_1.z = _29.z;
    SV_TARGET_1.w = _29.w;
}


================================================
FILE: reference-dxbc/test_resources_srv_buffer_typed_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
               OpCapability Shader
               OpCapability SampledBuffer
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 1 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %14

         %14 = OpLabel
         %12 =   OpLoad %6 %8
         %13 =   OpImageQuerySize %uint %12
                 OpStore %SV_TARGET %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_buffer_typed_query.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform samplerBuffer _8;

layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(textureSize(_8));
}


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_raw_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %16 %BUFFER_INDEX %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %16 DescriptorSet 0
               OpDecorate %16 Binding 0
               OpDecorate %16 NonWritable
               OpDecorate %16 Restrict
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %26 NonUniform
               OpDecorate %28 NonUniform
               OpDecorate %30 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %53 NonUniform
               OpDecorate %55 NonUniform
               OpDecorate %70 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint
%_runtimearr_SSBO_0 = OpTypeRuntimeArray %SSBO_0
%_ptr_StorageBuffer__runtimearr_SSBO_0 = OpTypePointer StorageBuffer %_runtimearr_SSBO_0
         %16 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO_0 StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
     %uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
     %uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %72

         %72 = OpLabel
         %26 =   OpLoad %uint %BUFFER_INDEX
         %28 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %26
         %30 =   OpAccessChain %_ptr_StorageBuffer_SSBO_0 %16 %26
         %31 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %33 =   OpLoad %uint %31
         %34 =   OpIMul %uint %33 %uint_4
         %38 =   OpIMul %uint %uint_16 %33
         %42 =   OpIMul %uint %33 %uint_2
         %43 =   OpIAdd %uint %42 %uint_1
         %46 =   OpAccessChain %_ptr_StorageBuffer_v2uint %30 %uint_0 %43
         %47 =   OpLoad %v2uint %46
         %48 =   OpCompositeExtract %uint %47 0
         %49 =   OpCompositeExtract %uint %47 1
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %28 %uint_0 %uint_7
         %54 =   OpLoad %uint %53
         %56 =   OpIAdd %uint %uint_7 %uint_1
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %28 %uint_0 %56
         %57 =   OpLoad %uint %55
         %58 =   OpCompositeConstruct %v2uint %54 %57
         %59 =   OpCompositeExtract %uint %58 0
         %60 =   OpCompositeExtract %uint %58 1
         %62 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %62 %48
         %63 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %63 %49
         %64 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %64 %59
         %65 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %65 %60
         %66 =   OpIMul %uint %uint_16 %33
         %68 =   OpIMul %uint %33 %uint_4
         %69 =   OpIAdd %uint %68 %uint_2
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %28 %uint_0 %69
         %71 =   OpLoad %uint %70
                 OpStore %SV_TARGET_3 %71
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_raw_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(set = 0, binding = 0, std430) restrict readonly buffer _13_16
{
    uvec2 _m0[];
} _16[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _43 = (BUFFER_ADDRESS.x * 2u) + 1u;
    uint _56 = 7u + 1u;
    uvec2 _58 = uvec2(_10[nonuniformEXT(BUFFER_INDEX)]._m0[7u], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_56]);
    SV_TARGET.x = _16[nonuniformEXT(BUFFER_INDEX)]._m0[_43].x;
    SV_TARGET.y = _16[nonuniformEXT(BUFFER_INDEX)]._m0[_43].y;
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    SV_TARGET_3 = _10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u];
}


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_raw_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %15 NonUniform
               OpDecorate %17 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %15
         %18 =   OpArrayLength %uint %17 0
                 OpStore %SV_TARGET %18
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_raw_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_10[nonuniformEXT(BUFFER_INDEX)]._m0.length());
}


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_structured_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 96
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %24 NonUniform
               OpDecorate %26 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %53 NonUniform
               OpDecorate %55 NonUniform
               OpDecorate %67 NonUniform
               OpDecorate %69 NonUniform
               OpDecorate %72 NonUniform
               OpDecorate %76 NonUniform
               OpDecorate %92 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
     %v4uint = OpTypeVector %uint 4
%_ptr_Output_v4uint = OpTypePointer Output %v4uint
%SV_TARGET_2 = OpVariable %_ptr_Output_v4uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
   %uint_331 = OpConstant %uint 331
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %94

         %94 = OpLabel
         %24 =   OpLoad %uint %BUFFER_INDEX
         %26 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %24
         %27 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %29 =   OpLoad %uint %27
         %30 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %32 =   OpLoad %uint %30
         %39 =   OpIMul %uint %29 %uint_20
         %41 =   OpIAdd %uint %39 %32
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %41
         %44 =   OpLoad %uint %43
         %46 =   OpIAdd %uint %41 %uint_1
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %46
         %47 =   OpLoad %uint %45
         %48 =   OpCompositeConstruct %v2uint %44 %47
         %49 =   OpCompositeExtract %uint %48 0
         %50 =   OpCompositeExtract %uint %48 1
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %uint_143
         %54 =   OpLoad %uint %53
         %56 =   OpIAdd %uint %uint_143 %uint_1
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %56
         %57 =   OpLoad %uint %55
         %58 =   OpCompositeConstruct %v2uint %54 %57
         %59 =   OpCompositeExtract %uint %58 0
         %60 =   OpCompositeExtract %uint %58 1
         %62 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %62 %49
         %63 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %63 %50
         %64 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %64 %59
         %65 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %65 %60
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %uint_331
         %68 =   OpLoad %uint %67
         %70 =   OpIAdd %uint %uint_331 %uint_1
         %69 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %70
         %71 =   OpLoad %uint %69
         %73 =   OpIAdd %uint %uint_331 %uint_2
         %72 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %73
         %75 =   OpLoad %uint %72
         %77 =   OpIAdd %uint %uint_331 %uint_3
         %76 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %77
         %78 =   OpLoad %uint %76
         %79 =   OpCompositeConstruct %v4uint %68 %71 %75 %78
         %80 =   OpCompositeExtract %uint %79 0
         %81 =   OpCompositeExtract %uint %79 1
         %82 =   OpCompositeExtract %uint %79 2
         %83 =   OpCompositeExtract %uint %79 3
         %85 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_0
                 OpStore %85 %80
         %86 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_1
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_2
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_3
                 OpStore %88 %83
         %90 =   OpIMul %uint %29 %uint_20
         %91 =   OpIAdd %uint %90 %32
         %92 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %91
         %93 =   OpLoad %uint %92
                 OpStore %SV_TARGET_3 %93
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_structured_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 2) out uvec4 SV_TARGET_2;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _41 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    uvec2 _48 = uvec2(_10[nonuniformEXT(BUFFER_INDEX)]._m0[_41], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_41 + 1u]);
    uint _56 = 143u + 1u;
    uvec2 _58 = uvec2(_10[nonuniformEXT(BUFFER_INDEX)]._m0[143u], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_56]);
    SV_TARGET.x = _48.x;
    SV_TARGET.y = _48.y;
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    uint _70 = 331u + 1u;
    uint _73 = 331u + 2u;
    uint _77 = 331u + 3u;
    uvec4 _79 = uvec4(_10[nonuniformEXT(BUFFER_INDEX)]._m0[331u], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_70], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_73], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_77]);
    SV_TARGET_2.x = _79.x;
    SV_TARGET_2.y = _79.y;
    SV_TARGET_2.z = _79.z;
    SV_TARGET_2.w = _79.w;
    SV_TARGET_3 = _10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y];
}


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_structured_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Restrict
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %15 NonUniform
               OpDecorate %17 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
    %uint_20 = OpConstant %uint 20
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %21

         %21 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %15
         %18 =   OpArrayLength %uint %17 0
         %19 =   OpUDiv %uint %18 %uint_20
                 OpStore %SV_TARGET %19
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_structured_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_10[nonuniformEXT(BUFFER_INDEX)]._m0.length()) / 20u;
}


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_typed_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
               OpCapability Shader
               OpCapability SampledBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability UniformTexelBufferArrayDynamicIndexing
               OpCapability UniformTexelBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_INDEX %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %20 NonUniform
               OpDecorate %23 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_0 = OpConstant %uint 0
     %uint_7 = OpConstant %uint 7
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %52

         %52 = OpLabel
         %20 =   OpLoad %uint %BUFFER_INDEX
         %22 =   OpAccessChain %_ptr_UniformConstant_6 %9 %20
         %23 =   OpLoad %6 %22
         %24 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %26 =   OpLoad %uint %24
         %27 =   OpImageFetch %v4float %23 %26
         %28 =   OpCompositeExtract %float %27 0
         %29 =   OpCompositeExtract %float %27 1
         %30 =   OpCompositeExtract %float %27 2
         %31 =   OpCompositeExtract %float %27 3
         %34 =   OpImageFetch %v4float %23 %uint_7
         %35 =   OpCompositeExtract %float %34 0
         %36 =   OpCompositeExtract %float %34 1
         %37 =   OpCompositeExtract %float %34 2
         %38 =   OpCompositeExtract %float %34 3
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %41 %28
         %42 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %42 %29
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %44 %30
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %46 %31
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %48 %35
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %49 %36
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %50 %37
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %51 %38
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_typed_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform samplerBuffer _9[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _27 = texelFetch(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x));
    vec4 _34 = texelFetch(_9[nonuniformEXT(BUFFER_INDEX)], int(7u));
    SV_TARGET.x = _27.x;
    SV_TARGET.y = _27.y;
    SV_TARGET.z = _27.z;
    SV_TARGET.w = _27.w;
    SV_TARGET_1.x = _34.x;
    SV_TARGET_1.y = _34.y;
    SV_TARGET_1.z = _34.z;
    SV_TARGET_1.w = _34.w;
}


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_typed_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
               OpCapability Shader
               OpCapability SampledBuffer
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability UniformTexelBufferArrayDynamicIndexing
               OpCapability UniformTexelBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %15 NonUniform
               OpDecorate %18 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 1 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %20

         %20 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_UniformConstant_6 %9 %15
         %18 =   OpLoad %6 %17
         %19 =   OpImageQuerySize %uint %18
                 OpStore %SV_TARGET %19
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_srv_indexed_buffer_typed_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform samplerBuffer _9[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(textureSize(_9[nonuniformEXT(BUFFER_INDEX)]));
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 132
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_5 = OpConstant %uint 5
    %uint_10 = OpConstant %uint 10
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %130

        %130 = OpLabel
         %14 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %16 =   OpLoad %uint %14
         %17 =   OpIMul %uint %16 %uint_4
         %21 =   OpIMul %uint %uint_16 %16
         %25 =   OpIMul %uint %16 %uint_4
         %26 =   OpIAdd %uint %25 %uint_2
         %28 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %26
         %29 =   OpAtomicLoad %uint %28 %uint_5 %uint_0
         %31 =   OpIAdd %uint %29 %uint_10
         %33 =   OpIMul %uint %uint_16 %16
         %35 =   OpIMul %uint %16 %uint_4
         %36 =   OpIAdd %uint %35 %uint_2
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %36
         %38 =   OpAtomicExchange %uint %37 %uint_5 %uint_0 %31
         %40 =   OpIMul %uint %uint_16 %16
         %42 =   OpIMul %uint %16 %uint_4
         %43 =   OpIAdd %uint %42 %uint_2
         %44 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %43
         %45 =   OpAtomicCompareExchange %uint %44 %uint_5 %uint_0 %uint_0 %38 %uint_10
         %46 =   OpIMul %uint %uint_16 %16
         %48 =   OpIMul %uint %16 %uint_4
         %49 =   OpIAdd %uint %48 %uint_2
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %49
         %51 =   OpAtomicIAdd %uint %50 %uint_5 %uint_0 %45
         %52 =   OpIMul %uint %uint_16 %16
         %54 =   OpIMul %uint %16 %uint_4
         %55 =   OpIAdd %uint %54 %uint_2
         %56 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %55
         %57 =   OpAtomicISub %uint %56 %uint_5 %uint_0 %51
         %58 =   OpIMul %uint %uint_16 %16
         %60 =   OpIMul %uint %16 %uint_4
         %61 =   OpIAdd %uint %60 %uint_2
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %61
         %63 =   OpAtomicSMin %uint %62 %uint_5 %uint_0 %57
         %64 =   OpIMul %uint %uint_16 %16
         %66 =   OpIMul %uint %16 %uint_4
         %67 =   OpIAdd %uint %66 %uint_2
         %68 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %67
         %69 =   OpAtomicSMax %uint %68 %uint_5 %uint_0 %63
         %70 =   OpIMul %uint %uint_16 %16
         %72 =   OpIMul %uint %16 %uint_4
         %73 =   OpIAdd %uint %72 %uint_2
         %74 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %73
         %75 =   OpAtomicUMin %uint %74 %uint_5 %uint_0 %69
         %76 =   OpIMul %uint %uint_16 %16
         %78 =   OpIMul %uint %16 %uint_4
         %79 =   OpIAdd %uint %78 %uint_2
         %80 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %79
         %81 =   OpAtomicUMax %uint %80 %uint_5 %uint_0 %75
         %82 =   OpIMul %uint %uint_16 %16
         %84 =   OpIMul %uint %16 %uint_4
         %85 =   OpIAdd %uint %84 %uint_2
         %86 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %85
         %87 =   OpAtomicAnd %uint %86 %uint_5 %uint_0 %81
         %88 =   OpIMul %uint %uint_16 %16
         %90 =   OpIMul %uint %16 %uint_4
         %91 =   OpIAdd %uint %90 %uint_2
         %92 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %91
         %93 =   OpAtomicOr %uint %92 %uint_5 %uint_0 %87
         %94 =   OpIMul %uint %uint_16 %16
         %96 =   OpIMul %uint %16 %uint_4
         %97 =   OpIAdd %uint %96 %uint_2
         %98 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %97
         %99 =   OpAtomicXor %uint %98 %uint_5 %uint_0 %93
        %100 =   OpIMul %uint %uint_16 %16
        %102 =   OpIMul %uint %16 %uint_4
        %103 =   OpIAdd %uint %102 %uint_2
        %104 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %103
        %105 =   OpAtomicIAdd %uint %104 %uint_5 %uint_0 %uint_1
        %107 =   OpIMul %uint %uint_16 %16
        %109 =   OpIMul %uint %16 %uint_4
        %110 =   OpIAdd %uint %109 %uint_2
        %111 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %110
        %112 =   OpAtomicISub %uint %111 %uint_5 %uint_0 %uint_1
        %113 =   OpIMul %uint %uint_16 %16
        %115 =   OpIMul %uint %16 %uint_4
        %116 =   OpIAdd %uint %115 %uint_2
        %117 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %116
                 OpAtomicStore %117 %uint_5 %uint_0 %112
        %118 =   OpIMul %uint %uint_16 %16
        %120 =   OpIMul %uint %16 %uint_4
        %121 =   OpIAdd %uint %120 %uint_2
        %122 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %121
        %123 =   OpAtomicIAdd %uint %122 %uint_5 %uint_0 %uint_1
        %124 =   OpIMul %uint %uint_16 %16
        %126 =   OpIMul %uint %16 %uint_4
        %127 =   OpIAdd %uint %126 %uint_2
        %128 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %127
        %129 =   OpAtomicISub %uint %128 %uint_5 %uint_0 %uint_2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _29 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 0u);
    uint _38 = atomicExchange(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _29 + 10u);
    uint _45 = atomicCompSwap(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 10u, _38);
    uint _51 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _45);
    uint _57 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], -_51);
    uint _63 = atomicMin(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _57);
    uint _69 = atomicMax(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _63);
    uint _75 = atomicMin(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _69);
    uint _81 = atomicMax(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _75);
    uint _87 = atomicAnd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _81);
    uint _93 = atomicOr(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _87);
    uint _99 = atomicXor(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _93);
    uint _105 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 1u);
    uint _112 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], -1u);
    atomicExchange(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _112);
    uint _123 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 1u);
    uint _129 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 4u) + 2u], -2u);
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %14 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Aliased
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 0
               OpDecorate %14 NonWritable
               OpDecorate %14 Aliased
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %14 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
     %uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
     %uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %64

         %64 = OpLabel
         %23 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %25 =   OpLoad %uint %23
         %26 =   OpIMul %uint %25 %uint_4
         %30 =   OpIMul %uint %uint_16 %25
         %34 =   OpIMul %uint %25 %uint_2
         %35 =   OpIAdd %uint %34 %uint_1
         %38 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %35
         %39 =   OpLoad %v2uint %38 NonPrivatePointer
         %40 =   OpCompositeExtract %uint %39 0
         %41 =   OpCompositeExtract %uint %39 1
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %46 =   OpLoad %uint %45 NonPrivatePointer
         %48 =   OpIAdd %uint %uint_7 %uint_1
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %48
         %49 =   OpLoad %uint %47 NonPrivatePointer
         %50 =   OpCompositeConstruct %v2uint %46 %49
         %51 =   OpCompositeExtract %uint %50 0
         %52 =   OpCompositeExtract %uint %50 1
         %54 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %54 %40
         %55 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %55 %41
         %56 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %57 %52
         %58 =   OpIMul %uint %uint_16 %25
         %60 =   OpIMul %uint %25 %uint_4
         %61 =   OpIAdd %uint %60 %uint_2
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %61
         %63 =   OpLoad %uint %62 NonPrivatePointer
                 OpStore %SV_TARGET_3 %63
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) readonly buffer _12_14
{
    uvec2 _m0[];
} _14;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _35 = (BUFFER_ADDRESS.x * 2u) + 1u;
    uint _48 = 7u + 1u;
    uvec2 _50 = uvec2(_9._m0[7u], _9._m0[_48]);
    SV_TARGET.x = _14._m0[_35].x;
    SV_TARGET.y = _14._m0[_35].y;
    SV_TARGET_1.x = _50.x;
    SV_TARGET_1.y = _50.y;
    SV_TARGET_3 = _9._m0[(BUFFER_ADDRESS.x * 4u) + 2u];
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_load_precise.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 66
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %14 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %9 Aliased
               OpDecorate %14 DescriptorSet 0
               OpDecorate %14 Binding 0
               OpDecorate %14 NonWritable
               OpDecorate %14 Aliased
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
         %14 = OpVariable %_ptr_StorageBuffer_SSBO_0 StorageBuffer
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
     %uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
     %uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %64

         %64 = OpLabel
         %23 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %25 =   OpLoad %uint %23
         %26 =   OpIMul %uint %25 %uint_4
         %30 =   OpIMul %uint %uint_16 %25
         %34 =   OpIMul %uint %25 %uint_2
         %35 =   OpIAdd %uint %34 %uint_1
         %38 =   OpAccessChain %_ptr_StorageBuffer_v2uint %14 %uint_0 %35
         %39 =   OpLoad %v2uint %38 NonPrivatePointer
         %40 =   OpCompositeExtract %uint %39 0
         %41 =   OpCompositeExtract %uint %39 1
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
         %46 =   OpLoad %uint %45 NonPrivatePointer
         %48 =   OpIAdd %uint %uint_7 %uint_1
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %48
         %49 =   OpLoad %uint %47 NonPrivatePointer
         %50 =   OpCompositeConstruct %v2uint %46 %49
         %51 =   OpCompositeExtract %uint %50 0
         %52 =   OpCompositeExtract %uint %50 1
         %54 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %54 %40
         %55 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %55 %41
         %56 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %56 %51
         %57 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %57 %52
         %58 =   OpIMul %uint %uint_16 %25
         %60 =   OpIMul %uint %25 %uint_4
         %61 =   OpIAdd %uint %60 %uint_2
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %61
         %63 =   OpLoad %uint %62 NonPrivatePointer
                 OpStore %SV_TARGET_3 %63
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_load_precise.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(set = 0, binding = 0, std430) readonly buffer _12_14
{
    uvec2 _m0[];
} _14;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _35 = (BUFFER_ADDRESS.x * 2u) + 1u;
    uint _48 = 7u + 1u;
    uvec2 _50 = uvec2(_9._m0[7u], _9._m0[_48]);
    SV_TARGET.x = _14._m0[_35].x;
    SV_TARGET.y = _14._m0[_35].y;
    SV_TARGET_1.x = _50.x;
    SV_TARGET_1.y = _50.y;
    SV_TARGET_3 = _9._m0[(BUFFER_ADDRESS.x * 4u) + 2u];
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 15
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %13

         %13 = OpLabel
         %12 =   OpArrayLength %uint %9 0
                 OpStore %SV_TARGET %12
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_query.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_9._m0.length());
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 47
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
     %v3uint = OpTypeVector %uint 3
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_7 = OpConstant %uint 7
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %45

         %45 = OpLabel
         %14 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %16 =   OpLoad %uint %14
         %17 =   OpIMul %uint %16 %uint_4
         %25 =   OpIMul %uint %uint_16 %16
         %29 =   OpIMul %uint %16 %uint_4
         %30 =   OpIAdd %uint %29 %uint_2
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %30
                 OpStore %32 %uint_1 NonPrivatePointer
         %34 =   OpIAdd %uint %30 %uint_1
         %33 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %34
                 OpStore %33 %uint_2 NonPrivatePointer
         %36 =   OpIAdd %uint %30 %uint_2
         %35 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %36
                 OpStore %35 %uint_3 NonPrivatePointer
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
                 OpStore %38 %uint_1 NonPrivatePointer
         %40 =   OpIAdd %uint %uint_7 %uint_1
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %40
                 OpStore %39 %uint_2 NonPrivatePointer
         %42 =   OpIAdd %uint %uint_7 %uint_2
         %41 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %42
                 OpStore %41 %uint_3 NonPrivatePointer
         %44 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_7
                 OpStore %44 %uint_6 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_raw_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _30 = (BUFFER_ADDRESS.x * 4u) + 2u;
    _9._m0[_30] = 1u;
    _9._m0[_30 + 1u] = 2u;
    _9._m0[_30 + 2u] = 3u;
    _9._m0[7u] = 1u;
    uint _40 = 7u + 1u;
    _9._m0[_40] = 2u;
    uint _42 = 7u + 2u;
    _9._m0[_42] = 3u;
    _9._m0[7u] = 6u;
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 115
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_5 = OpConstant %uint 5
    %uint_10 = OpConstant %uint 10
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %113

        %113 = OpLabel
         %14 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %16 =   OpLoad %uint %14
         %17 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %19 =   OpLoad %uint %17
         %23 =   OpIMul %uint %16 %uint_20
         %25 =   OpIAdd %uint %23 %19
         %27 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %25
         %28 =   OpAtomicLoad %uint %27 %uint_5 %uint_0
         %30 =   OpIAdd %uint %28 %uint_10
         %33 =   OpIMul %uint %16 %uint_20
         %34 =   OpIAdd %uint %33 %19
         %35 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %34
         %36 =   OpAtomicExchange %uint %35 %uint_5 %uint_0 %30
         %39 =   OpIMul %uint %16 %uint_20
         %40 =   OpIAdd %uint %39 %19
         %41 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %40
         %42 =   OpAtomicCompareExchange %uint %41 %uint_5 %uint_0 %uint_0 %36 %uint_10
         %44 =   OpIMul %uint %16 %uint_20
         %45 =   OpIAdd %uint %44 %19
         %46 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %45
         %47 =   OpAtomicIAdd %uint %46 %uint_5 %uint_0 %42
         %49 =   OpIMul %uint %16 %uint_20
         %50 =   OpIAdd %uint %49 %19
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %50
         %52 =   OpAtomicISub %uint %51 %uint_5 %uint_0 %47
         %54 =   OpIMul %uint %16 %uint_20
         %55 =   OpIAdd %uint %54 %19
         %56 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %55
         %57 =   OpAtomicSMin %uint %56 %uint_5 %uint_0 %52
         %59 =   OpIMul %uint %16 %uint_20
         %60 =   OpIAdd %uint %59 %19
         %61 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %60
         %62 =   OpAtomicSMax %uint %61 %uint_5 %uint_0 %57
         %64 =   OpIMul %uint %16 %uint_20
         %65 =   OpIAdd %uint %64 %19
         %66 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %65
         %67 =   OpAtomicUMin %uint %66 %uint_5 %uint_0 %62
         %69 =   OpIMul %uint %16 %uint_20
         %70 =   OpIAdd %uint %69 %19
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %70
         %72 =   OpAtomicUMax %uint %71 %uint_5 %uint_0 %67
         %74 =   OpIMul %uint %16 %uint_20
         %75 =   OpIAdd %uint %74 %19
         %76 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %75
         %77 =   OpAtomicAnd %uint %76 %uint_5 %uint_0 %72
         %79 =   OpIMul %uint %16 %uint_20
         %80 =   OpIAdd %uint %79 %19
         %81 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %80
         %82 =   OpAtomicOr %uint %81 %uint_5 %uint_0 %77
         %84 =   OpIMul %uint %16 %uint_20
         %85 =   OpIAdd %uint %84 %19
         %86 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %85
         %87 =   OpAtomicXor %uint %86 %uint_5 %uint_0 %82
         %89 =   OpIMul %uint %16 %uint_20
         %90 =   OpIAdd %uint %89 %19
         %91 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %90
         %92 =   OpAtomicIAdd %uint %91 %uint_5 %uint_0 %uint_1
         %94 =   OpIMul %uint %16 %uint_20
         %95 =   OpIAdd %uint %94 %19
         %96 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %95
         %97 =   OpAtomicISub %uint %96 %uint_5 %uint_0 %uint_1
         %99 =   OpIMul %uint %16 %uint_20
        %100 =   OpIAdd %uint %99 %19
        %101 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %100
                 OpAtomicStore %101 %uint_5 %uint_0 %97
        %103 =   OpIMul %uint %16 %uint_20
        %104 =   OpIAdd %uint %103 %19
        %105 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %104
        %106 =   OpAtomicIAdd %uint %105 %uint_5 %uint_0 %uint_1
        %108 =   OpIMul %uint %16 %uint_20
        %109 =   OpIAdd %uint %108 %19
        %110 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %109
        %111 =   OpAtomicISub %uint %110 %uint_5 %uint_0 %uint_2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _28 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 0u);
    uint _36 = atomicExchange(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _28 + 10u);
    uint _42 = atomicCompSwap(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 10u, _36);
    uint _47 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _42);
    uint _52 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], -_47);
    uint _57 = atomicMin(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _52);
    uint _62 = atomicMax(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _57);
    uint _67 = atomicMin(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _62);
    uint _72 = atomicMax(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _67);
    uint _77 = atomicAnd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _72);
    uint _82 = atomicOr(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _77);
    uint _87 = atomicXor(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _82);
    uint _92 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 1u);
    uint _97 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], -1u);
    atomicExchange(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _97);
    uint _106 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 1u);
    uint _111 = atomicAdd(_9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], -2u);
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
     %v4uint = OpTypeVector %uint 4
%_ptr_Output_v4uint = OpTypePointer Output %v4uint
%SV_TARGET_2 = OpVariable %_ptr_Output_v4uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
   %uint_331 = OpConstant %uint 331
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %89

         %89 = OpLabel
         %22 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %24 =   OpLoad %uint %22
         %25 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %27 =   OpLoad %uint %25
         %34 =   OpIMul %uint %24 %uint_20
         %36 =   OpIAdd %uint %34 %27
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %36
         %39 =   OpLoad %uint %38 NonPrivatePointer
         %41 =   OpIAdd %uint %36 %uint_1
         %40 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %41
         %42 =   OpLoad %uint %40 NonPrivatePointer
         %43 =   OpCompositeConstruct %v2uint %39 %42
         %44 =   OpCompositeExtract %uint %43 0
         %45 =   OpCompositeExtract %uint %43 1
         %48 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_143
         %49 =   OpLoad %uint %48 NonPrivatePointer
         %51 =   OpIAdd %uint %uint_143 %uint_1
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %51
         %52 =   OpLoad %uint %50 NonPrivatePointer
         %53 =   OpCompositeConstruct %v2uint %49 %52
         %54 =   OpCompositeExtract %uint %53 0
         %55 =   OpCompositeExtract %uint %53 1
         %57 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %57 %44
         %58 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %58 %45
         %59 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %59 %54
         %60 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %60 %55
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_331
         %63 =   OpLoad %uint %62 NonPrivatePointer
         %65 =   OpIAdd %uint %uint_331 %uint_1
         %64 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %65
         %66 =   OpLoad %uint %64 NonPrivatePointer
         %68 =   OpIAdd %uint %uint_331 %uint_2
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %68
         %70 =   OpLoad %uint %67 NonPrivatePointer
         %72 =   OpIAdd %uint %uint_331 %uint_3
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %72
         %73 =   OpLoad %uint %71 NonPrivatePointer
         %74 =   OpCompositeConstruct %v4uint %63 %66 %70 %73
         %75 =   OpCompositeExtract %uint %74 0
         %76 =   OpCompositeExtract %uint %74 1
         %77 =   OpCompositeExtract %uint %74 2
         %78 =   OpCompositeExtract %uint %74 3
         %80 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_0
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_1
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_2
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_3
                 OpStore %83 %78
         %85 =   OpIMul %uint %24 %uint_20
         %86 =   OpIAdd %uint %85 %27
         %87 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %86
         %88 =   OpLoad %uint %87 NonPrivatePointer
                 OpStore %SV_TARGET_3 %88
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 2) out uvec4 SV_TARGET_2;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _36 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    uvec2 _43 = uvec2(_9._m0[_36], _9._m0[_36 + 1u]);
    uint _51 = 143u + 1u;
    uvec2 _53 = uvec2(_9._m0[143u], _9._m0[_51]);
    SV_TARGET.x = _43.x;
    SV_TARGET.y = _43.y;
    SV_TARGET_1.x = _53.x;
    SV_TARGET_1.y = _53.y;
    uint _65 = 331u + 1u;
    uint _68 = 331u + 2u;
    uint _72 = 331u + 3u;
    uvec4 _74 = uvec4(_9._m0[331u], _9._m0[_65], _9._m0[_68], _9._m0[_72]);
    SV_TARGET_2.x = _74.x;
    SV_TARGET_2.y = _74.y;
    SV_TARGET_2.z = _74.z;
    SV_TARGET_2.w = _74.w;
    SV_TARGET_3 = _9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y];
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_load_precise.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 91
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
     %v4uint = OpTypeVector %uint 4
%_ptr_Output_v4uint = OpTypePointer Output %v4uint
%SV_TARGET_2 = OpVariable %_ptr_Output_v4uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
   %uint_331 = OpConstant %uint 331
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %89

         %89 = OpLabel
         %22 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %24 =   OpLoad %uint %22
         %25 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %27 =   OpLoad %uint %25
         %34 =   OpIMul %uint %24 %uint_20
         %36 =   OpIAdd %uint %34 %27
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %36
         %39 =   OpLoad %uint %38 NonPrivatePointer
         %41 =   OpIAdd %uint %36 %uint_1
         %40 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %41
         %42 =   OpLoad %uint %40 NonPrivatePointer
         %43 =   OpCompositeConstruct %v2uint %39 %42
         %44 =   OpCompositeExtract %uint %43 0
         %45 =   OpCompositeExtract %uint %43 1
         %48 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_143
         %49 =   OpLoad %uint %48 NonPrivatePointer
         %51 =   OpIAdd %uint %uint_143 %uint_1
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %51
         %52 =   OpLoad %uint %50 NonPrivatePointer
         %53 =   OpCompositeConstruct %v2uint %49 %52
         %54 =   OpCompositeExtract %uint %53 0
         %55 =   OpCompositeExtract %uint %53 1
         %57 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %57 %44
         %58 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %58 %45
         %59 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %59 %54
         %60 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %60 %55
         %62 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_331
         %63 =   OpLoad %uint %62 NonPrivatePointer
         %65 =   OpIAdd %uint %uint_331 %uint_1
         %64 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %65
         %66 =   OpLoad %uint %64 NonPrivatePointer
         %68 =   OpIAdd %uint %uint_331 %uint_2
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %68
         %70 =   OpLoad %uint %67 NonPrivatePointer
         %72 =   OpIAdd %uint %uint_331 %uint_3
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %72
         %73 =   OpLoad %uint %71 NonPrivatePointer
         %74 =   OpCompositeConstruct %v4uint %63 %66 %70 %73
         %75 =   OpCompositeExtract %uint %74 0
         %76 =   OpCompositeExtract %uint %74 1
         %77 =   OpCompositeExtract %uint %74 2
         %78 =   OpCompositeExtract %uint %74 3
         %80 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_0
                 OpStore %80 %75
         %81 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_1
                 OpStore %81 %76
         %82 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_2
                 OpStore %82 %77
         %83 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_3
                 OpStore %83 %78
         %85 =   OpIMul %uint %24 %uint_20
         %86 =   OpIAdd %uint %85 %27
         %87 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %86
         %88 =   OpLoad %uint %87 NonPrivatePointer
                 OpStore %SV_TARGET_3 %88
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_load_precise.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 2) out uvec4 SV_TARGET_2;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _36 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    uvec2 _43 = uvec2(_9._m0[_36], _9._m0[_36 + 1u]);
    uint _51 = 143u + 1u;
    uvec2 _53 = uvec2(_9._m0[143u], _9._m0[_51]);
    SV_TARGET.x = _43.x;
    SV_TARGET.y = _43.y;
    SV_TARGET_1.x = _53.x;
    SV_TARGET_1.y = _53.y;
    uint _65 = 331u + 1u;
    uint _68 = 331u + 2u;
    uint _72 = 331u + 3u;
    uvec4 _74 = uvec4(_9._m0[331u], _9._m0[_65], _9._m0[_68], _9._m0[_72]);
    SV_TARGET_2.x = _74.x;
    SV_TARGET_2.y = _74.y;
    SV_TARGET_2.z = _74.z;
    SV_TARGET_2.w = _74.w;
    SV_TARGET_3 = _9._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y];
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 17
; Schema: 0
               OpCapability Shader
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
    %uint_20 = OpConstant %uint 20
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %15

         %15 = OpLabel
         %12 =   OpArrayLength %uint %9 0
         %13 =   OpUDiv %uint %12 %uint_20
                 OpStore %SV_TARGET %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_query.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_9._m0.length()) / 20u;
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 48
; Schema: 0
               OpCapability Shader
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
          %9 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %v3uint = OpTypeVector %uint 3
     %uint_2 = OpConstant %uint 2
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %46

         %46 = OpLabel
         %14 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %16 =   OpLoad %uint %14
         %17 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %19 =   OpLoad %uint %17
         %29 =   OpIMul %uint %16 %uint_20
         %31 =   OpIAdd %uint %29 %19
         %33 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %31
                 OpStore %33 %uint_1 NonPrivatePointer
         %35 =   OpIAdd %uint %31 %uint_1
         %34 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %35
                 OpStore %34 %uint_2 NonPrivatePointer
         %37 =   OpIAdd %uint %31 %uint_2
         %36 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %37
                 OpStore %36 %uint_3 NonPrivatePointer
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_143
                 OpStore %39 %uint_1 NonPrivatePointer
         %41 =   OpIAdd %uint %uint_143 %uint_1
         %40 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %41
                 OpStore %40 %uint_2 NonPrivatePointer
         %43 =   OpIAdd %uint %uint_143 %uint_2
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %43
                 OpStore %42 %uint_3 NonPrivatePointer
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %9 %uint_0 %uint_143
                 OpStore %45 %uint_6 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_structured_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _9;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _31 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    _9._m0[_31] = 1u;
    _9._m0[_31 + 1u] = 2u;
    _9._m0[_31 + 2u] = 3u;
    _9._m0[143u] = 1u;
    uint _41 = 143u + 1u;
    _9._m0[_41] = 2u;
    uint _43 = 143u + 2u;
    _9._m0[_43] = 3u;
    _9._m0[143u] = 6u;
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 59
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint Buffer 0 0 0 2 R32ui
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
    %uint_10 = OpConstant %uint 10
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %57

         %57 = OpLabel
         %14 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %16 =   OpLoad %uint %14
         %18 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %19 =   OpAtomicLoad %uint %18 %uint_5 %uint_0
         %21 =   OpIAdd %uint %19 %uint_10
         %23 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %24 =   OpAtomicExchange %uint %23 %uint_5 %uint_0 %21
         %26 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %27 =   OpAtomicCompareExchange %uint %26 %uint_5 %uint_0 %uint_0 %24 %uint_10
         %28 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %29 =   OpAtomicIAdd %uint %28 %uint_5 %uint_0 %27
         %30 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %31 =   OpAtomicISub %uint %30 %uint_5 %uint_0 %29
         %32 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %33 =   OpAtomicSMin %uint %32 %uint_5 %uint_0 %31
         %34 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %35 =   OpAtomicSMax %uint %34 %uint_5 %uint_0 %33
         %36 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %37 =   OpAtomicUMin %uint %36 %uint_5 %uint_0 %35
         %38 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %39 =   OpAtomicUMax %uint %38 %uint_5 %uint_0 %37
         %40 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %41 =   OpAtomicAnd %uint %40 %uint_5 %uint_0 %39
         %42 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %43 =   OpAtomicOr %uint %42 %uint_5 %uint_0 %41
         %44 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %45 =   OpAtomicXor %uint %44 %uint_5 %uint_0 %43
         %46 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %47 =   OpAtomicIAdd %uint %46 %uint_5 %uint_0 %uint_1
         %49 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %50 =   OpAtomicISub %uint %49 %uint_5 %uint_0 %uint_1
         %51 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
                 OpAtomicStore %51 %uint_5 %uint_0 %50
         %52 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %53 =   OpAtomicIAdd %uint %52 %uint_5 %uint_0 %uint_1
         %54 =   OpImageTexelPointer %_ptr_Image_uint %8 %16 %uint_0
         %55 =   OpAtomicISub %uint %54 %uint_5 %uint_0 %uint_2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_atomic.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _8;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _19 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), 0u);
    uint _24 = imageAtomicExchange(_8, int(BUFFER_ADDRESS.x), _19 + 10u);
    uint _27 = imageAtomicCompSwap(_8, int(BUFFER_ADDRESS.x), 10u, _24);
    uint _29 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), _27);
    uint _31 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), -_29);
    uint _33 = imageAtomicMin(_8, int(BUFFER_ADDRESS.x), _31);
    uint _35 = imageAtomicMax(_8, int(BUFFER_ADDRESS.x), _33);
    uint _37 = imageAtomicMin(_8, int(BUFFER_ADDRESS.x), _35);
    uint _39 = imageAtomicMax(_8, int(BUFFER_ADDRESS.x), _37);
    uint _41 = imageAtomicAnd(_8, int(BUFFER_ADDRESS.x), _39);
    uint _43 = imageAtomicOr(_8, int(BUFFER_ADDRESS.x), _41);
    uint _45 = imageAtomicXor(_8, int(BUFFER_ADDRESS.x), _43);
    uint _47 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), 1u);
    uint _50 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), -1u);
    imageAtomicExchange(_8, int(BUFFER_ADDRESS.x), _50);
    uint _53 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), 1u);
    uint _55 = imageAtomicAdd(_8, int(BUFFER_ADDRESS.x), -2u);
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_7 = OpConstant %uint 7
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %47

         %47 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpImageRead %v4float %17 %21 NonPrivateTexel
         %23 =   OpCompositeExtract %float %22 0
         %24 =   OpCompositeExtract %float %22 1
         %25 =   OpCompositeExtract %float %22 2
         %26 =   OpCompositeExtract %float %22 3
         %29 =   OpImageRead %v4float %17 %uint_7 NonPrivateTexel
         %30 =   OpCompositeExtract %float %29 0
         %31 =   OpCompositeExtract %float %29 1
         %32 =   OpCompositeExtract %float %29 2
         %33 =   OpCompositeExtract %float %29 3
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %36 %23
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %24
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %25
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %26
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %43 %30
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %44 %31
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %45 %32
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %46 %33
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_load.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _8;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _22 = imageLoad(_8, int(BUFFER_ADDRESS.x));
    vec4 _29 = imageLoad(_8, int(7u));
    SV_TARGET.x = _22.x;
    SV_TARGET.y = _22.y;
    SV_TARGET.z = _22.z;
    SV_TARGET.w = _22.w;
    SV_TARGET_1.x = _29.x;
    SV_TARGET_1.y = _29.y;
    SV_TARGET_1.z = _29.z;
    SV_TARGET_1.w = _29.w;
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_load_precise.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 49
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonWritable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 R32f
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
     %uint_7 = OpConstant %uint 7
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %47

         %47 = OpLabel
         %17 =   OpLoad %6 %8
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpImageRead %v4float %17 %21 NonPrivateTexel
         %23 =   OpCompositeExtract %float %22 0
         %24 =   OpCompositeExtract %float %22 1
         %25 =   OpCompositeExtract %float %22 2
         %26 =   OpCompositeExtract %float %22 3
         %29 =   OpImageRead %v4float %17 %uint_7 NonPrivateTexel
         %30 =   OpCompositeExtract %float %29 0
         %31 =   OpCompositeExtract %float %29 1
         %32 =   OpCompositeExtract %float %29 2
         %33 =   OpCompositeExtract %float %29 3
         %36 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %36 %23
         %37 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %37 %24
         %39 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %39 %25
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %41 %26
         %43 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %43 %30
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %44 %31
         %45 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %45 %32
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %46 %33
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_load_precise.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _8;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _22 = imageLoad(_8, int(BUFFER_ADDRESS.x));
    vec4 _29 = imageLoad(_8, int(7u));
    SV_TARGET.x = _22.x;
    SV_TARGET.y = _22.y;
    SV_TARGET.z = _22.z;
    SV_TARGET.w = _22.w;
    SV_TARGET_1.x = _29.x;
    SV_TARGET_1.y = _29.y;
    SV_TARGET_1.z = _29.z;
    SV_TARGET_1.w = _29.w;
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 16
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %8 NonWritable
               OpDecorate %SV_TARGET Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %14

         %14 = OpLabel
         %12 =   OpLoad %6 %8
         %13 =   OpImageQuerySize %uint %12
                 OpStore %SV_TARGET %13
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_query.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform readonly writeonly imageBuffer _8;

layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(imageSize(_8));
}


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 29
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability VulkanMemoryModel
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %8 %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %8 DescriptorSet 0
               OpDecorate %8 Binding 0
               OpDecorate %8 NonReadable
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 Unknown
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
       %uint = OpTypeInt 32 0
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Input_uint = OpTypePointer Input %uint
     %uint_0 = OpConstant %uint 0
    %v4float = OpTypeVector %float 4
    %float_1 = OpConstant %float 1
    %float_2 = OpConstant %float 2
    %float_3 = OpConstant %float 3
    %float_4 = OpConstant %float 4
     %uint_7 = OpConstant %uint 7
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %27

         %27 = OpLabel
         %13 =   OpLoad %6 %8
         %15 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %17 =   OpLoad %uint %15
         %24 =   OpCompositeConstruct %v4float %float_1 %float_2 %float_3 %float_4
                 OpImageWrite %13 %17 %24 NonPrivateTexel
         %26 =   OpCompositeConstruct %v4float %float_1 %float_2 %float_3 %float_4
                 OpImageWrite %13 %uint_7 %26 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_buffer_typed_store.glsl
================================================
GLSL:
#version 460

layout(set = 0, binding = 0) uniform writeonly imageBuffer _8;

layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    imageStore(_8, int(BUFFER_ADDRESS.x), vec4(1.0, 2.0, 3.0, 4.0));
    imageStore(_8, int(7u), vec4(1.0, 2.0, 3.0, 4.0));
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 137
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %16 NonUniform
               OpDecorate %18 NonUniform
               OpDecorate %33 NonUniform
               OpDecorate %42 NonUniform
               OpDecorate %49 NonUniform
               OpDecorate %55 NonUniform
               OpDecorate %61 NonUniform
               OpDecorate %67 NonUniform
               OpDecorate %73 NonUniform
               OpDecorate %79 NonUniform
               OpDecorate %85 NonUniform
               OpDecorate %91 NonUniform
               OpDecorate %97 NonUniform
               OpDecorate %103 NonUniform
               OpDecorate %109 NonUniform
               OpDecorate %116 NonUniform
               OpDecorate %122 NonUniform
               OpDecorate %127 NonUniform
               OpDecorate %133 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_5 = OpConstant %uint 5
    %uint_10 = OpConstant %uint 10
     %uint_1 = OpConstant %uint 1
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %135

        %135 = OpLabel
         %16 =   OpLoad %uint %BUFFER_INDEX
         %18 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %16
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpIMul %uint %21 %uint_4
         %26 =   OpIMul %uint %uint_16 %21
         %30 =   OpIMul %uint %21 %uint_4
         %31 =   OpIAdd %uint %30 %uint_2
         %33 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %31
         %34 =   OpAtomicLoad %uint %33 %uint_5 %uint_0
         %36 =   OpIAdd %uint %34 %uint_10
         %38 =   OpIMul %uint %uint_16 %21
         %40 =   OpIMul %uint %21 %uint_4
         %41 =   OpIAdd %uint %40 %uint_2
         %42 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %41
         %43 =   OpAtomicExchange %uint %42 %uint_5 %uint_0 %36
         %45 =   OpIMul %uint %uint_16 %21
         %47 =   OpIMul %uint %21 %uint_4
         %48 =   OpIAdd %uint %47 %uint_2
         %49 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %48
         %50 =   OpAtomicCompareExchange %uint %49 %uint_5 %uint_0 %uint_0 %43 %uint_10
         %51 =   OpIMul %uint %uint_16 %21
         %53 =   OpIMul %uint %21 %uint_4
         %54 =   OpIAdd %uint %53 %uint_2
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %54
         %56 =   OpAtomicIAdd %uint %55 %uint_5 %uint_0 %50
         %57 =   OpIMul %uint %uint_16 %21
         %59 =   OpIMul %uint %21 %uint_4
         %60 =   OpIAdd %uint %59 %uint_2
         %61 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %60
         %62 =   OpAtomicISub %uint %61 %uint_5 %uint_0 %56
         %63 =   OpIMul %uint %uint_16 %21
         %65 =   OpIMul %uint %21 %uint_4
         %66 =   OpIAdd %uint %65 %uint_2
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %66
         %68 =   OpAtomicSMin %uint %67 %uint_5 %uint_0 %62
         %69 =   OpIMul %uint %uint_16 %21
         %71 =   OpIMul %uint %21 %uint_4
         %72 =   OpIAdd %uint %71 %uint_2
         %73 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %72
         %74 =   OpAtomicSMax %uint %73 %uint_5 %uint_0 %68
         %75 =   OpIMul %uint %uint_16 %21
         %77 =   OpIMul %uint %21 %uint_4
         %78 =   OpIAdd %uint %77 %uint_2
         %79 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %78
         %80 =   OpAtomicUMin %uint %79 %uint_5 %uint_0 %74
         %81 =   OpIMul %uint %uint_16 %21
         %83 =   OpIMul %uint %21 %uint_4
         %84 =   OpIAdd %uint %83 %uint_2
         %85 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %84
         %86 =   OpAtomicUMax %uint %85 %uint_5 %uint_0 %80
         %87 =   OpIMul %uint %uint_16 %21
         %89 =   OpIMul %uint %21 %uint_4
         %90 =   OpIAdd %uint %89 %uint_2
         %91 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %90
         %92 =   OpAtomicAnd %uint %91 %uint_5 %uint_0 %86
         %93 =   OpIMul %uint %uint_16 %21
         %95 =   OpIMul %uint %21 %uint_4
         %96 =   OpIAdd %uint %95 %uint_2
         %97 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %96
         %98 =   OpAtomicOr %uint %97 %uint_5 %uint_0 %92
         %99 =   OpIMul %uint %uint_16 %21
        %101 =   OpIMul %uint %21 %uint_4
        %102 =   OpIAdd %uint %101 %uint_2
        %103 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %102
        %104 =   OpAtomicXor %uint %103 %uint_5 %uint_0 %98
        %105 =   OpIMul %uint %uint_16 %21
        %107 =   OpIMul %uint %21 %uint_4
        %108 =   OpIAdd %uint %107 %uint_2
        %109 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %108
        %110 =   OpAtomicIAdd %uint %109 %uint_5 %uint_0 %uint_1
        %112 =   OpIMul %uint %uint_16 %21
        %114 =   OpIMul %uint %21 %uint_4
        %115 =   OpIAdd %uint %114 %uint_2
        %116 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %115
        %117 =   OpAtomicISub %uint %116 %uint_5 %uint_0 %uint_1
        %118 =   OpIMul %uint %uint_16 %21
        %120 =   OpIMul %uint %21 %uint_4
        %121 =   OpIAdd %uint %120 %uint_2
        %122 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %121
                 OpAtomicStore %122 %uint_5 %uint_0 %117
        %123 =   OpIMul %uint %uint_16 %21
        %125 =   OpIMul %uint %21 %uint_4
        %126 =   OpIAdd %uint %125 %uint_2
        %127 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %126
        %128 =   OpAtomicIAdd %uint %127 %uint_5 %uint_0 %uint_1
        %129 =   OpIMul %uint %uint_16 %21
        %131 =   OpIMul %uint %21 %uint_4
        %132 =   OpIAdd %uint %131 %uint_2
        %133 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %132
        %134 =   OpAtomicISub %uint %133 %uint_5 %uint_0 %uint_2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _34 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 0u);
    uint _43 = atomicExchange(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _34 + 10u);
    uint _50 = atomicCompSwap(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 10u, _43);
    uint _56 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _50);
    uint _62 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], -_56);
    uint _68 = atomicMin(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _62);
    uint _74 = atomicMax(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _68);
    uint _80 = atomicMin(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _74);
    uint _86 = atomicMax(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _80);
    uint _92 = atomicAnd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _86);
    uint _98 = atomicOr(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _92);
    uint _104 = atomicXor(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _98);
    uint _110 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 1u);
    uint _117 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], -1u);
    atomicExchange(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], _117);
    uint _128 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], 1u);
    uint _134 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u], -2u);
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 74
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %16 %BUFFER_INDEX %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %SSBO_0 "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %_runtimearr_v2uint ArrayStride 8
               OpMemberDecorate %SSBO_0 0 Offset 0
               OpDecorate %SSBO_0 Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %10 Aliased
               OpDecorate %16 DescriptorSet 0
               OpDecorate %16 Binding 0
               OpDecorate %16 NonWritable
               OpDecorate %16 Aliased
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %26 NonUniform
               OpDecorate %28 NonUniform
               OpDecorate %30 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %53 NonUniform
               OpDecorate %55 NonUniform
               OpDecorate %70 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
     %v2uint = OpTypeVector %uint 2
%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
     %SSBO_0 = OpTypeStruct %_runtimearr_v2uint
%_runtimearr_SSBO_0 = OpTypeRuntimeArray %SSBO_0
%_ptr_StorageBuffer__runtimearr_SSBO_0 = OpTypePointer StorageBuffer %_runtimearr_SSBO_0
         %16 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO_0 StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
%_ptr_StorageBuffer_SSBO_0 = OpTypePointer StorageBuffer %SSBO_0
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
     %uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
     %uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %72

         %72 = OpLabel
         %26 =   OpLoad %uint %BUFFER_INDEX
         %28 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %26
         %30 =   OpAccessChain %_ptr_StorageBuffer_SSBO_0 %16 %26
         %31 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %33 =   OpLoad %uint %31
         %34 =   OpIMul %uint %33 %uint_4
         %38 =   OpIMul %uint %uint_16 %33
         %42 =   OpIMul %uint %33 %uint_2
         %43 =   OpIAdd %uint %42 %uint_1
         %46 =   OpAccessChain %_ptr_StorageBuffer_v2uint %30 %uint_0 %43
         %47 =   OpLoad %v2uint %46 NonPrivatePointer
         %48 =   OpCompositeExtract %uint %47 0
         %49 =   OpCompositeExtract %uint %47 1
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %28 %uint_0 %uint_7
         %54 =   OpLoad %uint %53 NonPrivatePointer
         %56 =   OpIAdd %uint %uint_7 %uint_1
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %28 %uint_0 %56
         %57 =   OpLoad %uint %55 NonPrivatePointer
         %58 =   OpCompositeConstruct %v2uint %54 %57
         %59 =   OpCompositeExtract %uint %58 0
         %60 =   OpCompositeExtract %uint %58 1
         %62 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %62 %48
         %63 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %63 %49
         %64 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %64 %59
         %65 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %65 %60
         %66 =   OpIMul %uint %uint_16 %33
         %68 =   OpIMul %uint %33 %uint_4
         %69 =   OpIAdd %uint %68 %uint_2
         %70 =   OpAccessChain %_ptr_StorageBuffer_uint %28 %uint_0 %69
         %71 =   OpLoad %uint %70 NonPrivatePointer
                 OpStore %SV_TARGET_3 %71
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(set = 0, binding = 0, std430) readonly buffer _13_16
{
    uvec2 _m0[];
} _16[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _43 = (BUFFER_ADDRESS.x * 2u) + 1u;
    uint _56 = 7u + 1u;
    uvec2 _58 = uvec2(_10[nonuniformEXT(BUFFER_INDEX)]._m0[7u], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_56]);
    SV_TARGET.x = _16[nonuniformEXT(BUFFER_INDEX)]._m0[_43].x;
    SV_TARGET.y = _16[nonuniformEXT(BUFFER_INDEX)]._m0[_43].y;
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    SV_TARGET_3 = _10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 4u) + 2u];
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 21
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonReadable
               OpDecorate %10 NonWritable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %15 NonUniform
               OpDecorate %17 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %19

         %19 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %15
         %18 =   OpArrayLength %uint %17 0
                 OpStore %SV_TARGET %18
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_10[nonuniformEXT(BUFFER_INDEX)]._m0.length());
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 52
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonReadable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %16 NonUniform
               OpDecorate %18 NonUniform
               OpDecorate %37 NonUniform
               OpDecorate %38 NonUniform
               OpDecorate %40 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %44 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %49 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
     %uint_0 = OpConstant %uint 0
     %uint_4 = OpConstant %uint 4
     %uint_2 = OpConstant %uint 2
     %v3uint = OpTypeVector %uint 3
     %uint_1 = OpConstant %uint 1
     %uint_3 = OpConstant %uint 3
    %uint_16 = OpConstant %uint 16
     %uint_8 = OpConstant %uint 8
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_7 = OpConstant %uint 7
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %50

         %50 = OpLabel
         %16 =   OpLoad %uint %BUFFER_INDEX
         %18 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %16
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpIMul %uint %21 %uint_4
         %30 =   OpIMul %uint %uint_16 %21
         %34 =   OpIMul %uint %21 %uint_4
         %35 =   OpIAdd %uint %34 %uint_2
         %37 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %35
                 OpStore %37 %uint_1 NonPrivatePointer
         %39 =   OpIAdd %uint %35 %uint_1
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %39
                 OpStore %38 %uint_2 NonPrivatePointer
         %41 =   OpIAdd %uint %35 %uint_2
         %40 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %41
                 OpStore %40 %uint_3 NonPrivatePointer
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %uint_7
                 OpStore %43 %uint_1 NonPrivatePointer
         %45 =   OpIAdd %uint %uint_7 %uint_1
         %44 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %45
                 OpStore %44 %uint_2 NonPrivatePointer
         %47 =   OpIAdd %uint %uint_7 %uint_2
         %46 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %47
                 OpStore %46 %uint_3 NonPrivatePointer
         %49 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %uint_7
                 OpStore %49 %uint_6 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_raw_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _35 = (BUFFER_ADDRESS.x * 4u) + 2u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_35] = 1u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_35 + 1u] = 2u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_35 + 2u] = 3u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[7u] = 1u;
    uint _45 = 7u + 1u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_45] = 2u;
    uint _47 = 7u + 2u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_47] = 3u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[7u] = 6u;
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 120
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %16 NonUniform
               OpDecorate %18 NonUniform
               OpDecorate %32 NonUniform
               OpDecorate %40 NonUniform
               OpDecorate %46 NonUniform
               OpDecorate %51 NonUniform
               OpDecorate %56 NonUniform
               OpDecorate %61 NonUniform
               OpDecorate %66 NonUniform
               OpDecorate %71 NonUniform
               OpDecorate %76 NonUniform
               OpDecorate %81 NonUniform
               OpDecorate %86 NonUniform
               OpDecorate %91 NonUniform
               OpDecorate %96 NonUniform
               OpDecorate %101 NonUniform
               OpDecorate %106 NonUniform
               OpDecorate %110 NonUniform
               OpDecorate %115 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_5 = OpConstant %uint 5
    %uint_10 = OpConstant %uint 10
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %118

        %118 = OpLabel
         %16 =   OpLoad %uint %BUFFER_INDEX
         %18 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %16
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %24 =   OpLoad %uint %22
         %28 =   OpIMul %uint %21 %uint_20
         %30 =   OpIAdd %uint %28 %24
         %32 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %30
         %33 =   OpAtomicLoad %uint %32 %uint_5 %uint_0
         %35 =   OpIAdd %uint %33 %uint_10
         %38 =   OpIMul %uint %21 %uint_20
         %39 =   OpIAdd %uint %38 %24
         %40 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %39
         %41 =   OpAtomicExchange %uint %40 %uint_5 %uint_0 %35
         %44 =   OpIMul %uint %21 %uint_20
         %45 =   OpIAdd %uint %44 %24
         %46 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %45
         %47 =   OpAtomicCompareExchange %uint %46 %uint_5 %uint_0 %uint_0 %41 %uint_10
         %49 =   OpIMul %uint %21 %uint_20
         %50 =   OpIAdd %uint %49 %24
         %51 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %50
         %52 =   OpAtomicIAdd %uint %51 %uint_5 %uint_0 %47
         %54 =   OpIMul %uint %21 %uint_20
         %55 =   OpIAdd %uint %54 %24
         %56 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %55
         %57 =   OpAtomicISub %uint %56 %uint_5 %uint_0 %52
         %59 =   OpIMul %uint %21 %uint_20
         %60 =   OpIAdd %uint %59 %24
         %61 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %60
         %62 =   OpAtomicSMin %uint %61 %uint_5 %uint_0 %57
         %64 =   OpIMul %uint %21 %uint_20
         %65 =   OpIAdd %uint %64 %24
         %66 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %65
         %67 =   OpAtomicSMax %uint %66 %uint_5 %uint_0 %62
         %69 =   OpIMul %uint %21 %uint_20
         %70 =   OpIAdd %uint %69 %24
         %71 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %70
         %72 =   OpAtomicUMin %uint %71 %uint_5 %uint_0 %67
         %74 =   OpIMul %uint %21 %uint_20
         %75 =   OpIAdd %uint %74 %24
         %76 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %75
         %77 =   OpAtomicUMax %uint %76 %uint_5 %uint_0 %72
         %79 =   OpIMul %uint %21 %uint_20
         %80 =   OpIAdd %uint %79 %24
         %81 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %80
         %82 =   OpAtomicAnd %uint %81 %uint_5 %uint_0 %77
         %84 =   OpIMul %uint %21 %uint_20
         %85 =   OpIAdd %uint %84 %24
         %86 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %85
         %87 =   OpAtomicOr %uint %86 %uint_5 %uint_0 %82
         %89 =   OpIMul %uint %21 %uint_20
         %90 =   OpIAdd %uint %89 %24
         %91 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %90
         %92 =   OpAtomicXor %uint %91 %uint_5 %uint_0 %87
         %94 =   OpIMul %uint %21 %uint_20
         %95 =   OpIAdd %uint %94 %24
         %96 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %95
         %97 =   OpAtomicIAdd %uint %96 %uint_5 %uint_0 %uint_1
         %99 =   OpIMul %uint %21 %uint_20
        %100 =   OpIAdd %uint %99 %24
        %101 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %100
        %102 =   OpAtomicISub %uint %101 %uint_5 %uint_0 %uint_1
        %104 =   OpIMul %uint %21 %uint_20
        %105 =   OpIAdd %uint %104 %24
        %106 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %105
                 OpAtomicStore %106 %uint_5 %uint_0 %102
        %108 =   OpIMul %uint %21 %uint_20
        %109 =   OpIAdd %uint %108 %24
        %110 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %109
        %111 =   OpAtomicIAdd %uint %110 %uint_5 %uint_0 %uint_1
        %113 =   OpIMul %uint %21 %uint_20
        %114 =   OpIAdd %uint %113 %24
        %115 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %114
        %116 =   OpAtomicISub %uint %115 %uint_5 %uint_0 %uint_2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _33 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 0u);
    uint _41 = atomicExchange(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _33 + 10u);
    uint _47 = atomicCompSwap(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 10u, _41);
    uint _52 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _47);
    uint _57 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], -_52);
    uint _62 = atomicMin(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _57);
    uint _67 = atomicMax(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _62);
    uint _72 = atomicMin(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _67);
    uint _77 = atomicMax(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _72);
    uint _82 = atomicAnd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _77);
    uint _87 = atomicOr(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _82);
    uint _92 = atomicXor(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _87);
    uint _97 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 1u);
    uint _102 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], -1u);
    atomicExchange(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], _102);
    uint _111 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], 1u);
    uint _116 = atomicAdd(_10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y], -2u);
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 96
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1 %SV_TARGET_2 %SV_TARGET_3
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpName %SV_TARGET_2 "SV_TARGET_2"
               OpName %SV_TARGET_3 "SV_TARGET_3"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonWritable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %SV_TARGET_2 Location 2
               OpDecorate %SV_TARGET_3 Location 3
               OpDecorate %24 NonUniform
               OpDecorate %26 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %53 NonUniform
               OpDecorate %55 NonUniform
               OpDecorate %67 NonUniform
               OpDecorate %69 NonUniform
               OpDecorate %72 NonUniform
               OpDecorate %76 NonUniform
               OpDecorate %92 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_Output_v2uint = OpTypePointer Output %v2uint
  %SV_TARGET = OpVariable %_ptr_Output_v2uint Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v2uint Output
     %v4uint = OpTypeVector %uint 4
%_ptr_Output_v4uint = OpTypePointer Output %v4uint
%SV_TARGET_2 = OpVariable %_ptr_Output_v4uint Output
%_ptr_Output_uint = OpTypePointer Output %uint
%SV_TARGET_3 = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
   %uint_331 = OpConstant %uint 331
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %94

         %94 = OpLabel
         %24 =   OpLoad %uint %BUFFER_INDEX
         %26 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %24
         %27 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %29 =   OpLoad %uint %27
         %30 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %32 =   OpLoad %uint %30
         %39 =   OpIMul %uint %29 %uint_20
         %41 =   OpIAdd %uint %39 %32
         %43 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %41
         %44 =   OpLoad %uint %43 NonPrivatePointer
         %46 =   OpIAdd %uint %41 %uint_1
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %46
         %47 =   OpLoad %uint %45 NonPrivatePointer
         %48 =   OpCompositeConstruct %v2uint %44 %47
         %49 =   OpCompositeExtract %uint %48 0
         %50 =   OpCompositeExtract %uint %48 1
         %53 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %uint_143
         %54 =   OpLoad %uint %53 NonPrivatePointer
         %56 =   OpIAdd %uint %uint_143 %uint_1
         %55 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %56
         %57 =   OpLoad %uint %55 NonPrivatePointer
         %58 =   OpCompositeConstruct %v2uint %54 %57
         %59 =   OpCompositeExtract %uint %58 0
         %60 =   OpCompositeExtract %uint %58 1
         %62 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_0
                 OpStore %62 %49
         %63 =   OpAccessChain %_ptr_Output_uint %SV_TARGET %uint_1
                 OpStore %63 %50
         %64 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_0
                 OpStore %64 %59
         %65 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_1 %uint_1
                 OpStore %65 %60
         %67 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %uint_331
         %68 =   OpLoad %uint %67 NonPrivatePointer
         %70 =   OpIAdd %uint %uint_331 %uint_1
         %69 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %70
         %71 =   OpLoad %uint %69 NonPrivatePointer
         %73 =   OpIAdd %uint %uint_331 %uint_2
         %72 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %73
         %75 =   OpLoad %uint %72 NonPrivatePointer
         %77 =   OpIAdd %uint %uint_331 %uint_3
         %76 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %77
         %78 =   OpLoad %uint %76 NonPrivatePointer
         %79 =   OpCompositeConstruct %v4uint %68 %71 %75 %78
         %80 =   OpCompositeExtract %uint %79 0
         %81 =   OpCompositeExtract %uint %79 1
         %82 =   OpCompositeExtract %uint %79 2
         %83 =   OpCompositeExtract %uint %79 3
         %85 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_0
                 OpStore %85 %80
         %86 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_1
                 OpStore %86 %81
         %87 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_2
                 OpStore %87 %82
         %88 =   OpAccessChain %_ptr_Output_uint %SV_TARGET_2 %uint_3
                 OpStore %88 %83
         %90 =   OpIMul %uint %29 %uint_20
         %91 =   OpIAdd %uint %90 %32
         %92 =   OpAccessChain %_ptr_StorageBuffer_uint %26 %uint_0 %91
         %93 =   OpLoad %uint %92 NonPrivatePointer
                 OpStore %SV_TARGET_3 %93
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out uvec2 SV_TARGET;
layout(location = 1) out uvec2 SV_TARGET_1;
layout(location = 2) out uvec4 SV_TARGET_2;
layout(location = 3) out uint SV_TARGET_3;

void main()
{
    uint _41 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    uvec2 _48 = uvec2(_10[nonuniformEXT(BUFFER_INDEX)]._m0[_41], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_41 + 1u]);
    uint _56 = 143u + 1u;
    uvec2 _58 = uvec2(_10[nonuniformEXT(BUFFER_INDEX)]._m0[143u], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_56]);
    SV_TARGET.x = _48.x;
    SV_TARGET.y = _48.y;
    SV_TARGET_1.x = _58.x;
    SV_TARGET_1.y = _58.y;
    uint _70 = 331u + 1u;
    uint _73 = 331u + 2u;
    uint _77 = 331u + 3u;
    uvec4 _79 = uvec4(_10[nonuniformEXT(BUFFER_INDEX)]._m0[331u], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_70], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_73], _10[nonuniformEXT(BUFFER_INDEX)]._m0[_77]);
    SV_TARGET_2.x = _79.x;
    SV_TARGET_2.y = _79.y;
    SV_TARGET_2.z = _79.z;
    SV_TARGET_2.w = _79.w;
    SV_TARGET_3 = _10[nonuniformEXT(BUFFER_INDEX)]._m0[(BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y];
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 23
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonReadable
               OpDecorate %10 NonWritable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %15 NonUniform
               OpDecorate %17 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
    %uint_20 = OpConstant %uint 20
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %21

         %21 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %15
         %18 =   OpArrayLength %uint %17 0
         %19 =   OpUDiv %uint %18 %uint_20
                 OpStore %SV_TARGET %19
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) writeonly readonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(_10[nonuniformEXT(BUFFER_INDEX)]._m0.length()) / 20u;
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 53
; Schema: 0
               OpCapability Shader
               OpCapability StorageBufferArrayDynamicIndexing
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %10 %BUFFER_INDEX %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %SSBO "SSBO"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %_runtimearr_uint ArrayStride 4
               OpMemberDecorate %SSBO 0 Offset 0
               OpDecorate %SSBO Block
               OpDecorate %10 DescriptorSet 0
               OpDecorate %10 Binding 0
               OpDecorate %10 NonReadable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %16 NonUniform
               OpDecorate %18 NonUniform
               OpDecorate %38 NonUniform
               OpDecorate %39 NonUniform
               OpDecorate %41 NonUniform
               OpDecorate %44 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %47 NonUniform
               OpDecorate %50 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
       %SSBO = OpTypeStruct %_runtimearr_uint
%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
%_ptr_StorageBuffer__runtimearr_SSBO = OpTypePointer StorageBuffer %_runtimearr_SSBO
         %10 = OpVariable %_ptr_StorageBuffer__runtimearr_SSBO StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
     %uint_0 = OpConstant %uint 0
     %uint_1 = OpConstant %uint 1
     %uint_7 = OpConstant %uint 7
     %uint_3 = OpConstant %uint 3
     %v3uint = OpTypeVector %uint 3
     %uint_2 = OpConstant %uint 2
     %uint_4 = OpConstant %uint 4
    %uint_20 = OpConstant %uint 20
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
   %uint_143 = OpConstant %uint 143
     %uint_6 = OpConstant %uint 6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %51

         %51 = OpLabel
         %16 =   OpLoad %uint %BUFFER_INDEX
         %18 =   OpAccessChain %_ptr_StorageBuffer_SSBO %10 %16
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %22 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_1
         %24 =   OpLoad %uint %22
         %34 =   OpIMul %uint %21 %uint_20
         %36 =   OpIAdd %uint %34 %24
         %38 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %36
                 OpStore %38 %uint_1 NonPrivatePointer
         %40 =   OpIAdd %uint %36 %uint_1
         %39 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %40
                 OpStore %39 %uint_2 NonPrivatePointer
         %42 =   OpIAdd %uint %36 %uint_2
         %41 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %42
                 OpStore %41 %uint_3 NonPrivatePointer
         %44 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %uint_143
                 OpStore %44 %uint_1 NonPrivatePointer
         %46 =   OpIAdd %uint %uint_143 %uint_1
         %45 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %46
                 OpStore %45 %uint_2 NonPrivatePointer
         %48 =   OpIAdd %uint %uint_143 %uint_2
         %47 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %48
                 OpStore %47 %uint_3 NonPrivatePointer
         %50 =   OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_0 %uint_143
                 OpStore %50 %uint_6 NonPrivatePointer
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_structured_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, std430) writeonly buffer SSBO
{
    uint _m0[];
} _10[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _36 = (BUFFER_ADDRESS.x * 20u) + BUFFER_ADDRESS.y;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_36] = 1u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_36 + 1u] = 2u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_36 + 2u] = 3u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[143u] = 1u;
    uint _46 = 143u + 1u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_46] = 2u;
    uint _48 = 143u + 2u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[_48] = 3u;
    _10[nonuniformEXT(BUFFER_INDEX)]._m0[143u] = 6u;
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_atomic.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 64
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageTexelBufferArrayDynamicIndexing
               OpCapability StorageTexelBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_INDEX %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %15 NonUniform
               OpDecorate %23 NonUniform
               OpDecorate %28 NonUniform
               OpDecorate %31 NonUniform
               OpDecorate %33 NonUniform
               OpDecorate %35 NonUniform
               OpDecorate %37 NonUniform
               OpDecorate %39 NonUniform
               OpDecorate %41 NonUniform
               OpDecorate %43 NonUniform
               OpDecorate %45 NonUniform
               OpDecorate %47 NonUniform
               OpDecorate %49 NonUniform
               OpDecorate %51 NonUniform
               OpDecorate %54 NonUniform
               OpDecorate %56 NonUniform
               OpDecorate %57 NonUniform
               OpDecorate %59 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
       %uint = OpTypeInt 32 0
          %6 = OpTypeImage %uint Buffer 0 0 0 2 R32ui
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_0 = OpConstant %uint 0
%_ptr_Image_uint = OpTypePointer Image %uint
     %uint_5 = OpConstant %uint 5
    %uint_10 = OpConstant %uint 10
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %62

         %62 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_UniformConstant_6 %9 %15
         %19 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %21 =   OpLoad %uint %19
         %23 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %24 =   OpAtomicLoad %uint %23 %uint_5 %uint_0
         %26 =   OpIAdd %uint %24 %uint_10
         %28 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %29 =   OpAtomicExchange %uint %28 %uint_5 %uint_0 %26
         %31 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %32 =   OpAtomicCompareExchange %uint %31 %uint_5 %uint_0 %uint_0 %29 %uint_10
         %33 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %34 =   OpAtomicIAdd %uint %33 %uint_5 %uint_0 %32
         %35 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %36 =   OpAtomicISub %uint %35 %uint_5 %uint_0 %34
         %37 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %38 =   OpAtomicSMin %uint %37 %uint_5 %uint_0 %36
         %39 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %40 =   OpAtomicSMax %uint %39 %uint_5 %uint_0 %38
         %41 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %42 =   OpAtomicUMin %uint %41 %uint_5 %uint_0 %40
         %43 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %44 =   OpAtomicUMax %uint %43 %uint_5 %uint_0 %42
         %45 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %46 =   OpAtomicAnd %uint %45 %uint_5 %uint_0 %44
         %47 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %48 =   OpAtomicOr %uint %47 %uint_5 %uint_0 %46
         %49 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %50 =   OpAtomicXor %uint %49 %uint_5 %uint_0 %48
         %51 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %52 =   OpAtomicIAdd %uint %51 %uint_5 %uint_0 %uint_1
         %54 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %55 =   OpAtomicISub %uint %54 %uint_5 %uint_0 %uint_1
         %56 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
                 OpAtomicStore %56 %uint_5 %uint_0 %55
         %57 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %58 =   OpAtomicIAdd %uint %57 %uint_5 %uint_0 %uint_1
         %59 =   OpImageTexelPointer %_ptr_Image_uint %17 %21 %uint_0
         %60 =   OpAtomicISub %uint %59 %uint_5 %uint_0 %uint_2
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_atomic.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32ui) uniform uimageBuffer _9[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    uint _24 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), 0u);
    uint _29 = imageAtomicExchange(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _24 + 10u);
    uint _32 = imageAtomicCompSwap(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), 10u, _29);
    uint _34 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _32);
    uint _36 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), -_34);
    uint _38 = imageAtomicMin(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _36);
    uint _40 = imageAtomicMax(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _38);
    uint _42 = imageAtomicMin(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _40);
    uint _44 = imageAtomicMax(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _42);
    uint _46 = imageAtomicAnd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _44);
    uint _48 = imageAtomicOr(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _46);
    uint _50 = imageAtomicXor(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _48);
    uint _52 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), 1u);
    uint _55 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), -1u);
    imageAtomicExchange(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), _55);
    uint _58 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), 1u);
    uint _60 = imageAtomicAdd(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), -2u);
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_load.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 54
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageTexelBufferArrayDynamicIndexing
               OpCapability StorageTexelBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_INDEX %BUFFER_ADDRESS %SV_TARGET %SV_TARGET_1
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpName %SV_TARGET "SV_TARGET"
               OpName %SV_TARGET_1 "SV_TARGET_1"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonWritable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %SV_TARGET Location 0
               OpDecorate %SV_TARGET_1 Location 1
               OpDecorate %20 NonUniform
               OpDecorate %23 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 R32f
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
  %SV_TARGET = OpVariable %_ptr_Output_v4float Output
%SV_TARGET_1 = OpVariable %_ptr_Output_v4float Output
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_0 = OpConstant %uint 0
     %uint_7 = OpConstant %uint 7
%_ptr_Output_float = OpTypePointer Output %float
     %uint_1 = OpConstant %uint 1
     %uint_2 = OpConstant %uint 2
     %uint_3 = OpConstant %uint 3
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %52

         %52 = OpLabel
         %20 =   OpLoad %uint %BUFFER_INDEX
         %22 =   OpAccessChain %_ptr_UniformConstant_6 %9 %20
         %23 =   OpLoad %6 %22
         %24 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %26 =   OpLoad %uint %24
         %27 =   OpImageRead %v4float %23 %26 NonPrivateTexel
         %28 =   OpCompositeExtract %float %27 0
         %29 =   OpCompositeExtract %float %27 1
         %30 =   OpCompositeExtract %float %27 2
         %31 =   OpCompositeExtract %float %27 3
         %34 =   OpImageRead %v4float %23 %uint_7 NonPrivateTexel
         %35 =   OpCompositeExtract %float %34 0
         %36 =   OpCompositeExtract %float %34 1
         %37 =   OpCompositeExtract %float %34 2
         %38 =   OpCompositeExtract %float %34 3
         %41 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_0
                 OpStore %41 %28
         %42 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_1
                 OpStore %42 %29
         %44 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_2
                 OpStore %44 %30
         %46 =   OpAccessChain %_ptr_Output_float %SV_TARGET %uint_3
                 OpStore %46 %31
         %48 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_0
                 OpStore %48 %35
         %49 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_1
                 OpStore %49 %36
         %50 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_2
                 OpStore %50 %37
         %51 =   OpAccessChain %_ptr_Output_float %SV_TARGET_1 %uint_3
                 OpStore %51 %38
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_load.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0, r32f) uniform readonly imageBuffer _9[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;
layout(location = 0) out vec4 SV_TARGET;
layout(location = 1) out vec4 SV_TARGET_1;

void main()
{
    vec4 _27 = imageLoad(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x));
    vec4 _34 = imageLoad(_9[nonuniformEXT(BUFFER_INDEX)], int(7u));
    SV_TARGET.x = _27.x;
    SV_TARGET.y = _27.y;
    SV_TARGET.z = _27.z;
    SV_TARGET.w = _27.w;
    SV_TARGET_1.x = _34.x;
    SV_TARGET_1.y = _34.y;
    SV_TARGET_1.z = _34.z;
    SV_TARGET_1.w = _34.w;
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_query.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 22
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability ImageQuery
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageTexelBufferArrayDynamicIndexing
               OpCapability StorageTexelBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_INDEX %SV_TARGET
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %SV_TARGET "SV_TARGET"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %9 NonWritable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %SV_TARGET Location 0
               OpDecorate %15 NonUniform
               OpDecorate %18 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
%_ptr_Output_uint = OpTypePointer Output %uint
  %SV_TARGET = OpVariable %_ptr_Output_uint Output
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %20

         %20 = OpLabel
         %15 =   OpLoad %uint %BUFFER_INDEX
         %17 =   OpAccessChain %_ptr_UniformConstant_6 %9 %15
         %18 =   OpLoad %6 %17
         %19 =   OpImageQuerySize %uint %18
                 OpStore %SV_TARGET %19
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_query.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform readonly writeonly imageBuffer _9[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) out uint SV_TARGET;

void main()
{
    SV_TARGET = uint(imageSize(_9[nonuniformEXT(BUFFER_INDEX)]));
}


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_store.asm
================================================
SPIR-V:
; SPIR-V
; Version: 1.6
; Generator: Unknown(30017); 21022
; Bound: 34
; Schema: 0
               OpCapability Shader
               OpCapability ImageBuffer
               OpCapability StorageImageWriteWithoutFormat
               OpCapability SignedZeroInfNanPreserve
               OpCapability RuntimeDescriptorArray
               OpCapability StorageTexelBufferArrayDynamicIndexing
               OpCapability StorageTexelBufferArrayNonUniformIndexing
               OpCapability VulkanMemoryModel
               OpExtension "SPV_EXT_descriptor_indexing"
               OpExtension "SPV_KHR_float_controls"
               OpMemoryModel Logical Vulkan
               OpEntryPoint Fragment %main "main" %9 %BUFFER_INDEX %BUFFER_ADDRESS
               OpExecutionMode %main OriginUpperLeft
               OpExecutionMode %main SignedZeroInfNanPreserve 32
               OpName %main "main"
               OpName %BUFFER_INDEX "BUFFER_INDEX"
               OpName %BUFFER_ADDRESS "BUFFER_ADDRESS"
               OpDecorate %9 DescriptorSet 0
               OpDecorate %9 Binding 0
               OpDecorate %9 NonReadable
               OpDecorate %BUFFER_INDEX Flat
               OpDecorate %BUFFER_INDEX Location 0
               OpDecorate %BUFFER_INDEX Component 2
               OpDecorate %BUFFER_ADDRESS Flat
               OpDecorate %BUFFER_ADDRESS Location 0
               OpDecorate %16 NonUniform
               OpDecorate %19 NonUniform
       %void = OpTypeVoid
          %2 = OpTypeFunction %void
      %float = OpTypeFloat 32
          %6 = OpTypeImage %float Buffer 0 0 0 2 Unknown
%_runtimearr_6 = OpTypeRuntimeArray %6
%_ptr_UniformConstant__runtimearr_6 = OpTypePointer UniformConstant %_runtimearr_6
          %9 = OpVariable %_ptr_UniformConstant__runtimearr_6 UniformConstant
       %uint = OpTypeInt 32 0
%_ptr_Input_uint = OpTypePointer Input %uint
%BUFFER_INDEX = OpVariable %_ptr_Input_uint Input
     %v2uint = OpTypeVector %uint 2
%_ptr_Input_v2uint = OpTypePointer Input %v2uint
%BUFFER_ADDRESS = OpVariable %_ptr_Input_v2uint Input
%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
     %uint_0 = OpConstant %uint 0
    %v4float = OpTypeVector %float 4
    %float_1 = OpConstant %float 1
    %float_2 = OpConstant %float 2
    %float_3 = OpConstant %float 3
    %float_4 = OpConstant %float 4
     %uint_7 = OpConstant %uint 7
       %main = OpFunction %void None %2

          %4 = OpLabel
                 OpBranch %32

         %32 = OpLabel
         %16 =   OpLoad %uint %BUFFER_INDEX
         %18 =   OpAccessChain %_ptr_UniformConstant_6 %9 %16
         %19 =   OpLoad %6 %18
         %20 =   OpAccessChain %_ptr_Input_uint %BUFFER_ADDRESS %uint_0
         %22 =   OpLoad %uint %20
         %29 =   OpCompositeConstruct %v4float %float_1 %float_2 %float_3 %float_4
                 OpImageWrite %19 %22 %29 NonPrivateTexel
         %31 =   OpCompositeConstruct %v4float %float_1 %float_2 %float_3 %float_4
                 OpImageWrite %19 %uint_7 %31 NonPrivateTexel
                 OpReturn
               OpFunctionEnd


================================================
FILE: reference-dxbc/test_resources_uav_indexed_buffer_typed_store.glsl
================================================
GLSL:
#version 460
#extension GL_EXT_nonuniform_qualifier : require

layout(set = 0, binding = 0) uniform writeonly imageBuffer _9[];

layout(location = 0, component = 2) flat in uint BUFFER_INDEX;
layout(location = 0) flat in uvec2 BUFFER_ADDRESS;

void main()
{
    imageStore(_9[nonuniformEXT(BUFFER_INDEX)], int(BUFFER_ADDRESS.x), vec4(1.0, 2.0, 3.0, 4.0));
    imageStore(_9[nonuniformEXT(BUFFER_INDEX)], int(7u), vec4(1.0, 2.0, 3.0, 4.0));
}


================================================
FILE: roundtrip_shaders.py
================================================
#!/usr/bin/env python3

# Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
#
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Helper utility for vkd3d-proton where we roundtrip
# VKD3D_SHADER_DUMP_PATH -> VKD3D_SHADER_OVERRIDE folders.

import sys
import os
import os.path
import subprocess
import multiprocessing
import argparse

def convert_spirv(in_path, out_path, args, stage):
    out_file = out_path + '.' + stage
    if args.preserve and os.path.exists(out_file):
        return

    spirv_cross_cmd = [args.spirv_cross, '--version', '460', '--vulkan-semantics', '--output', out_file, in_path, '--stage', stage, '--relax-nan-checks']
    try:
        subprocess.check_call(spirv_cross_cmd, stderr = subprocess.DEVNULL)
    except subprocess.CalledProcessError as e:
        if e.returncode > 0:
            # Ignore this. We kinda expect it.
            return
        raise

def convert_shaders(args):
    files = os.listdir(args.input)
    files = [ f for f in files if os.path.splitext(f)[1] == '.spv' ]

    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    results = []

    for f in files:
        in_path = os.path.join(args.input, f)
        out_path = os.path.join(args.output, os.path.splitext(f)[0])
        # Try to emit all stages. Kinda hacky.
        # Should maybe add --deduce-extension or something to SPIRV-Cross CLI, but w/e.
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'vert')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'frag')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'tesc')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'tese')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'geom')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'comp')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'rgen')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'rint')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'rahit')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'rchit')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'rmiss')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'rcall')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'mesh')))
        results.append(pool.apply_async(convert_spirv, args = (in_path, out_path, args, 'task')))

    for res in results:
        res.get()
    pool.close()
    pool.join()

def main():
    parser = argparse.ArgumentParser(description = 'Script for roundtripping shaders through SPIRV-Cross.')
    parser.add_argument('--input',
            help = 'Folder containing shader files to convert.')
    parser.add_argument('--spirv-cross',
            default = 'spirv-cross',
            help = 'Explicit path to SPIRV-Cross')
    parser.add_argument('--preserve',
            action = 'store_true',
            help = 'Do not overwrite files which already exist')
    parser.add_argument('--output',
            help = 'Path where shaders are output')

    args = parser.parse_args()
    if not args.input:
        sys.stderr.write('Need input shader folder.\n')
        sys.exit(1)
    if not args.output:
        sys.stderr.write('Need output shader folder.\n')
        sys.exit(1)

    convert_shaders(args)
    print('Complete!')

if __name__ == '__main__':
    main()


================================================
FILE: scratch_pool.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef DXIL_SPV_SCRATCH_POOL_H_
#define DXIL_SPV_SCRATCH_POOL_H_

#include "thread_local_allocator.hpp"

namespace dxil_spv
{
template <typename T>
class ScratchPool
{
public:
	ScratchPool()
	{
		static_assert(std::is_trivially_destructible<T>::value, "T must be trivially destructible.");
	}

	template <typename... P>
	T *allocate(P &&... p)
	{
		T *t = allocate_raw();
		return new (t) T(std::forward<P>(p)...);
	}

	T *allocate_raw()
	{
		if (current.offset < current.size)
			return &current.base[current.offset++];

		Block new_block = {};
		new_block.size = next_allocate_size;
		new_block.base = static_cast<T *>(allocate_in_thread(sizeof(T) * next_allocate_size));
		if (!new_block.base)
		{
			// If we fail to allocate this little memory, we are hosed anyways.
			std::terminate();
		}

		blocks.emplace_back(new_block.base);
		next_allocate_size *= 2;

		current = new_block;
		current.offset = 1;
		return current.base;
	}

private:
	struct MallocDeleter
	{
		void operator()(void *ptr) noexcept
		{
			free_in_thread(ptr);
		}
	};

	struct Block
	{
		T *base;
		size_t offset;
		size_t size;
	};
	Block current = {};
	size_t next_allocate_size = 64;
	Vector<std::unique_ptr<T, MallocDeleter>> blocks;
};
} // namespace dxil_spv

#endif


================================================
FILE: shaders/ags/ags.ssbo.comp
================================================
RWByteAddressBuffer Buf;
RWTexture1D<uint2> Img1D;
RWTexture2D<uint2> Img2D;
RWTexture3D<uint2> Img3D;
RWStructuredBuffer<uint2> Out;

#include "ags_shader_intrinsics_dx12.inc"

[numthreads(8, 8, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	Out[thr.x] = AmdExtD3DShaderIntrinsics_AtomicMaxU64(Buf, 8 * thr.x, uint2(40, 50));
	Out[thr.x] += AmdExtD3DShaderIntrinsics_AtomicMinU64(Img1D, thr.x, uint2(60, 70));
	Out[thr.x] += AmdExtD3DShaderIntrinsics_AtomicXorU64(Img2D, thr.xy, uint2(80, 90));
	Out[thr.x] += AmdExtD3DShaderIntrinsics_AtomicAddU64(Img3D, thr, uint2(100, 110));
	Out[thr.x].x += AmdExtD3DShaderIntrinsics_ReadfirstlaneU(thr.x * 16);
	Out[thr.x].x += AmdExtD3DShaderIntrinsics_ReadfirstlaneF(thr.y * 16);
	Out[thr.x].y += AmdExtD3DShaderIntrinsics_ReadlaneU(thr.z * 16, 7);
	Out[thr.x].y += AmdExtD3DShaderIntrinsics_ReadlaneF(thr.x * 16, 9);
	Out[thr.x] += AmdExtD3DShaderIntrinsics_LaneId();
}


================================================
FILE: shaders/ags/ags_shader_intrinsics_dx12.inc
================================================
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//

/**
***********************************************************************************************************************
* @file  ags_shader_intrinsics_dx12.hlsl
* @brief
*    AMD D3D Shader Intrinsics HLSL include file.
*    This include file contains the Shader Intrinsics definitions used in shader code by the application.
* @note
*    This does not work with immediate values or values that the compiler determines can produces denorms
*
***********************************************************************************************************************
*/

#ifndef _AMDEXTD3DSHADERINTRINICS_HLSL
#define _AMDEXTD3DSHADERINTRINICS_HLSL

// Default AMD shader intrinsics designated SpaceId.
#define AmdExtD3DShaderIntrinsicsSpaceId space2147420894

// Dummy UAV used to access shader intrinsics. Applications need to add a root signature entry for this resource in
// order to use shader extensions. Applications may specify an alternate UAV binding by defining AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE.
#ifdef AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE
RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE);
#else
RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderIntrinsicsSpaceId);
#endif

/**
***********************************************************************************************************************
*   Definitions to construct the intrinsic instruction composed of an opcode and optional immediate data.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsics_MagicCodeShift   28
#define AmdExtD3DShaderIntrinsics_MagicCodeMask    0xf
#define AmdExtD3DShaderIntrinsics_OpcodePhaseShift 24
#define AmdExtD3DShaderIntrinsics_OpcodePhaseMask  0x3
#define AmdExtD3DShaderIntrinsics_DataShift        8
#define AmdExtD3DShaderIntrinsics_DataMask         0xffff
#define AmdExtD3DShaderIntrinsics_OpcodeShift      0
#define AmdExtD3DShaderIntrinsics_OpcodeMask       0xff

#define AmdExtD3DShaderIntrinsics_MagicCode        0x5


/**
***********************************************************************************************************************
*   Intrinsic opcodes.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane           0x01
#define AmdExtD3DShaderIntrinsicsOpcode_Readlane                0x02
#define AmdExtD3DShaderIntrinsicsOpcode_LaneId                  0x03
#define AmdExtD3DShaderIntrinsicsOpcode_Swizzle                 0x04
#define AmdExtD3DShaderIntrinsicsOpcode_Ballot                  0x05
#define AmdExtD3DShaderIntrinsicsOpcode_MBCnt                   0x06
#define AmdExtD3DShaderIntrinsicsOpcode_Min3U                   0x07
#define AmdExtD3DShaderIntrinsicsOpcode_Min3F                   0x08
#define AmdExtD3DShaderIntrinsicsOpcode_Med3U                   0x09
#define AmdExtD3DShaderIntrinsicsOpcode_Med3F                   0x0a
#define AmdExtD3DShaderIntrinsicsOpcode_Max3U                   0x0b
#define AmdExtD3DShaderIntrinsicsOpcode_Max3F                   0x0c
#define AmdExtD3DShaderIntrinsicsOpcode_BaryCoord               0x0d
#define AmdExtD3DShaderIntrinsicsOpcode_VtxParam                0x0e
#define AmdExtD3DShaderIntrinsicsOpcode_Reserved1               0x0f
#define AmdExtD3DShaderIntrinsicsOpcode_Reserved2               0x10
#define AmdExtD3DShaderIntrinsicsOpcode_Reserved3               0x11
#define AmdExtD3DShaderIntrinsicsOpcode_WaveReduce              0x12
#define AmdExtD3DShaderIntrinsicsOpcode_WaveScan                0x13
#define AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr            0x14
#define AmdExtD3DShaderIntrinsicsOpcode_DrawIndex               0x17
#define AmdExtD3DShaderIntrinsicsOpcode_AtomicU64               0x18
#define AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize             0x19
#define AmdExtD3DShaderIntrinsicsOpcode_BaseInstance            0x1a
#define AmdExtD3DShaderIntrinsicsOpcode_BaseVertex              0x1b
#define AmdExtD3DShaderIntrinsicsOpcode_FloatConversion         0x1c
#define AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt              0x1d
#define AmdExtD3DShaderIntrinsicsOpcode_ShaderClock             0x1f
#define AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock     0x20


/**
***********************************************************************************************************************
*   Intrinsic opcode phases.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsOpcodePhase_0    0x0
#define AmdExtD3DShaderIntrinsicsOpcodePhase_1    0x1
#define AmdExtD3DShaderIntrinsicsOpcodePhase_2    0x2
#define AmdExtD3DShaderIntrinsicsOpcodePhase_3    0x3

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsWaveOp defines for supported operations. Can be used as the parameter for the
*   AmdExtD3DShaderIntrinsicsOpcode_WaveOp intrinsic.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsWaveOp_AddF 0x01
#define AmdExtD3DShaderIntrinsicsWaveOp_AddI 0x02
#define AmdExtD3DShaderIntrinsicsWaveOp_AddU 0x03
#define AmdExtD3DShaderIntrinsicsWaveOp_MulF 0x04
#define AmdExtD3DShaderIntrinsicsWaveOp_MulI 0x05
#define AmdExtD3DShaderIntrinsicsWaveOp_MulU 0x06
#define AmdExtD3DShaderIntrinsicsWaveOp_MinF 0x07
#define AmdExtD3DShaderIntrinsicsWaveOp_MinI 0x08
#define AmdExtD3DShaderIntrinsicsWaveOp_MinU 0x09
#define AmdExtD3DShaderIntrinsicsWaveOp_MaxF 0x0a
#define AmdExtD3DShaderIntrinsicsWaveOp_MaxI 0x0b
#define AmdExtD3DShaderIntrinsicsWaveOp_MaxU 0x0c
#define AmdExtD3DShaderIntrinsicsWaveOp_And  0x0d    // Reduction only
#define AmdExtD3DShaderIntrinsicsWaveOp_Or   0x0e    // Reduction only
#define AmdExtD3DShaderIntrinsicsWaveOp_Xor  0x0f    // Reduction only

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsWaveOp masks and shifts for opcode and flags
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift 0
#define AmdExtD3DShaderIntrinsicsWaveOp_OpcodeMask  0xff
#define AmdExtD3DShaderIntrinsicsWaveOp_FlagShift   8
#define AmdExtD3DShaderIntrinsicsWaveOp_FlagMask    0xff

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsWaveOp flags for use with AmdExtD3DShaderIntrinsicsOpcode_WaveScan.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsWaveOp_Inclusive   0x01
#define AmdExtD3DShaderIntrinsicsWaveOp_Exclusive   0x02

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsSwizzle defines for common swizzles.  Can be used as the operation parameter for the
*   AmdExtD3DShaderIntrinsics_Swizzle intrinsic.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX1      0x041f
#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX2      0x081f
#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX4      0x101f
#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX8      0x201f
#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX16     0x401f
#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX2   0x041f
#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4   0x0c1f
#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX8   0x1c1f
#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX16  0x3c1f
#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX32  0x7c1f
#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX2     0x003e
#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX4     0x003c
#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX8     0x0038
#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX16    0x0030
#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX32    0x0020


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsBarycentric defines for barycentric interpolation mode.  To be used with
*   AmdExtD3DShaderIntrinsicsOpcode_IjBarycentricCoords to specify the interpolation mode.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsBarycentric_LinearCenter    0x1
#define AmdExtD3DShaderIntrinsicsBarycentric_LinearCentroid  0x2
#define AmdExtD3DShaderIntrinsicsBarycentric_LinearSample    0x3
#define AmdExtD3DShaderIntrinsicsBarycentric_PerspCenter     0x4
#define AmdExtD3DShaderIntrinsicsBarycentric_PerspCentroid   0x5
#define AmdExtD3DShaderIntrinsicsBarycentric_PerspSample     0x6
#define AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel  0x7

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsBarycentric defines for specifying vertex and parameter indices.  To be used as inputs to
*   the AmdExtD3DShaderIntrinsicsOpcode_VertexParameter function
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex0        0x0
#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex1        0x1
#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex2        0x2

#define AmdExtD3DShaderIntrinsicsBarycentric_Param0         0x00
#define AmdExtD3DShaderIntrinsicsBarycentric_Param1         0x01
#define AmdExtD3DShaderIntrinsicsBarycentric_Param2         0x02
#define AmdExtD3DShaderIntrinsicsBarycentric_Param3         0x03
#define AmdExtD3DShaderIntrinsicsBarycentric_Param4         0x04
#define AmdExtD3DShaderIntrinsicsBarycentric_Param5         0x05
#define AmdExtD3DShaderIntrinsicsBarycentric_Param6         0x06
#define AmdExtD3DShaderIntrinsicsBarycentric_Param7         0x07
#define AmdExtD3DShaderIntrinsicsBarycentric_Param8         0x08
#define AmdExtD3DShaderIntrinsicsBarycentric_Param9         0x09
#define AmdExtD3DShaderIntrinsicsBarycentric_Param10        0x0a
#define AmdExtD3DShaderIntrinsicsBarycentric_Param11        0x0b
#define AmdExtD3DShaderIntrinsicsBarycentric_Param12        0x0c
#define AmdExtD3DShaderIntrinsicsBarycentric_Param13        0x0d
#define AmdExtD3DShaderIntrinsicsBarycentric_Param14        0x0e
#define AmdExtD3DShaderIntrinsicsBarycentric_Param15        0x0f
#define AmdExtD3DShaderIntrinsicsBarycentric_Param16        0x10
#define AmdExtD3DShaderIntrinsicsBarycentric_Param17        0x11
#define AmdExtD3DShaderIntrinsicsBarycentric_Param18        0x12
#define AmdExtD3DShaderIntrinsicsBarycentric_Param19        0x13
#define AmdExtD3DShaderIntrinsicsBarycentric_Param20        0x14
#define AmdExtD3DShaderIntrinsicsBarycentric_Param21        0x15
#define AmdExtD3DShaderIntrinsicsBarycentric_Param22        0x16
#define AmdExtD3DShaderIntrinsicsBarycentric_Param23        0x17
#define AmdExtD3DShaderIntrinsicsBarycentric_Param24        0x18
#define AmdExtD3DShaderIntrinsicsBarycentric_Param25        0x19
#define AmdExtD3DShaderIntrinsicsBarycentric_Param26        0x1a
#define AmdExtD3DShaderIntrinsicsBarycentric_Param27        0x1b
#define AmdExtD3DShaderIntrinsicsBarycentric_Param28        0x1c
#define AmdExtD3DShaderIntrinsicsBarycentric_Param29        0x1d
#define AmdExtD3DShaderIntrinsicsBarycentric_Param30        0x1e
#define AmdExtD3DShaderIntrinsicsBarycentric_Param31        0x1f

#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentX     0x0
#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentY     0x1
#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ     0x2
#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentW     0x3

#define AmdExtD3DShaderIntrinsicsBarycentric_ParamShift     0
#define AmdExtD3DShaderIntrinsicsBarycentric_ParamMask      0x1f
#define AmdExtD3DShaderIntrinsicsBarycentric_VtxShift       0x5
#define AmdExtD3DShaderIntrinsicsBarycentric_VtxMask        0x3
#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift 0x7
#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentMask  0x3

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsAtomic defines for supported operations. Can be used as the parameter for the
*   AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 intrinsic.
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsAtomicOp_MinU64     0x01
#define AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64     0x02
#define AmdExtD3DShaderIntrinsicsAtomicOp_AndU64     0x03
#define AmdExtD3DShaderIntrinsicsAtomicOp_OrU64      0x04
#define AmdExtD3DShaderIntrinsicsAtomicOp_XorU64     0x05
#define AmdExtD3DShaderIntrinsicsAtomicOp_AddU64     0x06
#define AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64    0x07
#define AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 0x08

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions.
*   To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction
***********************************************************************************************************************
*/
#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near    0x01
#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf  0x02
#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf 0x03


/**
***********************************************************************************************************************
*   MakeAmdShaderIntrinsicsInstruction
*
*   Creates instruction from supplied opcode and immediate data.
*   NOTE: This is an internal function and should not be called by the source HLSL shader directly.
*
***********************************************************************************************************************
*/
uint MakeAmdShaderIntrinsicsInstruction(uint opcode, uint opcodePhase, uint immediateData)
{
    return ((AmdExtD3DShaderIntrinsics_MagicCode << AmdExtD3DShaderIntrinsics_MagicCodeShift) |
            (immediateData << AmdExtD3DShaderIntrinsics_DataShift) |
            (opcodePhase << AmdExtD3DShaderIntrinsics_OpcodePhaseShift) |
            (opcode << AmdExtD3DShaderIntrinsics_OpcodeShift));
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ReadfirstlaneF
*
*   Returns the value of float src for the first active lane of the wavefront.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_ReadfirstlaneF(float src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane, 0, 0);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal);
    return asfloat(retVal);
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ReadfirstlaneU
*
*   Returns the value of unsigned integer src for the first active lane of the wavefront.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_ReadfirstlaneU(uint src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane, 0, 0);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal);
    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Readlane
*
*   Returns the value of float src for the lane within the wavefront specified by laneId.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_ReadlaneF(float src, uint laneId)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readlane, 0, laneId);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal);
    return asfloat(retVal);
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ReadlaneU
*
*   Returns the value of unsigned integer src for the lane within the wavefront specified by laneId.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_ReadlaneU(uint src, uint laneId)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readlane, 0, laneId);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal);
    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_LaneId
*
*   Returns the current lane id for the thread within the wavefront.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_LaneId()
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LaneId, 0, 0);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal);
    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_GetWaveSize
*
*   Returns the wave size for the current shader, including active, inactive and helper lanes.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_GetWaveSize()
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize, 0, 0);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal);
    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Swizzle
*
*   Generic instruction to shuffle the float src value among different lanes as specified by the operation.
*   Note that the operation parameter must be an immediately specified value not a value from a variable.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_SwizzleF(float src, uint operation)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Swizzle, 0, operation);

    uint retVal;
    //InterlockedCompareExchange(AmdExtD3DShaderIntrinsicsUAV[instruction], asuint(src), 0, retVal);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal);
    return asfloat(retVal);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_SwizzleU
*
*   Generic instruction to shuffle the unsigned integer src value among different lanes as specified by the operation.
*   Note that the operation parameter must be an immediately specified value not a value from a variable.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_SwizzleU(uint src, uint operation)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Swizzle, 0, operation);

    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal);
    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Ballot
*
*   Given an input predicate returns a bit mask indicating for which lanes the predicate is true.
*   Inactive or non-existent lanes will always return 0.  The number of existent lanes is the wavefront size.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_Ballot(bool predicate)
{
    uint instruction;

    uint retVal1;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Ballot,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0, 0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal1);

    uint retVal2;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Ballot,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_1, 0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal2);

    return uint2(retVal1, retVal2);
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_BallotAny
*
*   Convenience routine that uses Ballot and returns true if for any of the active lanes the predicate is true.
*
***********************************************************************************************************************
*/
bool AmdExtD3DShaderIntrinsics_BallotAny(bool predicate)
{
    uint2 retVal = AmdExtD3DShaderIntrinsics_Ballot(predicate);

    return ((retVal.x | retVal.y) != 0 ? true : false);
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_BallotAll
*
*   Convenience routine that uses Ballot and returns true if for all of the active lanes the predicate is true.
*
***********************************************************************************************************************
*/
bool AmdExtD3DShaderIntrinsics_BallotAll(bool predicate)
{
    uint2 ballot = AmdExtD3DShaderIntrinsics_Ballot(predicate);

    uint2 execMask = AmdExtD3DShaderIntrinsics_Ballot(true);

    return ((ballot.x == execMask.x) && (ballot.y == execMask.y));
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_MBCnt
*
*   Returns the masked bit count of the source register for this thread within all the active threads within a
*   wavefront.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_MBCnt(uint2 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_MBCnt, 0, 0);

    uint retVal;

    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, src.y, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Min3F
*
*   Returns the minimum value of the three floating point source arguments.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_Min3F(float src0, float src1, float src2)
{
    uint minimum;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3F,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), minimum);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3F,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), minimum, minimum);

    return asfloat(minimum);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Min3U
*
*   Returns the minimum value of the three unsigned integer source arguments.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_Min3U(uint src0, uint src1, uint src2)
{
    uint minimum;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3U,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, minimum);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3U,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, minimum, minimum);

    return minimum;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Med3F
*
*   Returns the median value of the three floating point source arguments.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_Med3F(float src0, float src1, float src2)
{
    uint median;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3F,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), median);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3F,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), median, median);

    return asfloat(median);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Med3U
*
*   Returns the median value of the three unsigned integer source arguments.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_Med3U(uint src0, uint src1, uint src2)
{
    uint median;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3U,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, median);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3U,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, median, median);

    return median;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Max3F
*
*   Returns the maximum value of the three floating point source arguments.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_Max3F(float src0, float src1, float src2)
{
    uint maximum;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3F,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), maximum);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3F,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), maximum, maximum);

    return asfloat(maximum);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_Max3U
*
*   Returns the maximum value of the three unsigned integer source arguments.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_Max3U(uint src0, uint src1, uint src2)
{
    uint maximum;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3U,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, maximum);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3U,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, maximum, maximum);

    return maximum;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_IjBarycentricCoords
*
*   Returns the (i, j) barycentric coordinate pair for this shader invocation with the specified interpolation mode at
*   the specified pixel location.  Should not be used for "pull-model" interpolation, PullModelBarycentricCoords should
*   be used instead
*
*   Can only be used in pixel shader stages.
*
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_IjBarycentricCoords(uint interpMode)
{
    uint2 retVal;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           interpMode);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           interpMode);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y);

    return float2(asfloat(retVal.x), asfloat(retVal.y));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_PullModelBarycentricCoords
*
*   Returns the (1/W,1/I,1/J) coordinates at the pixel center which can be used for custom interpolation at any
*   location in the pixel.
*
*   Can only be used in pixel shader stages.
*
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_PullModelBarycentricCoords()
{
    uint3 retVal;

    uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                           AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x);

    uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                           AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y);

    uint instruction3 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord,
                                                           AmdExtD3DShaderIntrinsicsOpcodePhase_2,
                                                           AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction3, retVal.y, 0, retVal.z);

    return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_VertexParameter
*
*   Returns the triangle's parameter information at the specified triangle vertex.
*   The vertex and parameter indices must specified as immediate values.
*
*   Only available in pixel shader stages.
*
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_VertexParameter(uint vertexIdx, uint parameterIdx)
{
    uint4 retVal;
    uint4 instruction;

    instruction.x = MakeAmdShaderIntrinsicsInstruction(
             AmdExtD3DShaderIntrinsicsOpcode_VtxParam,
             AmdExtD3DShaderIntrinsicsOpcodePhase_0,
           ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) |
            (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) |
            (AmdExtD3DShaderIntrinsicsBarycentric_ComponentX << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift)));
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.x, 0, 0, retVal.x);

    instruction.y = MakeAmdShaderIntrinsicsInstruction(
             AmdExtD3DShaderIntrinsicsOpcode_VtxParam,
             AmdExtD3DShaderIntrinsicsOpcodePhase_0,
           ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) |
            (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) |
            (AmdExtD3DShaderIntrinsicsBarycentric_ComponentY << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift)));
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.y, 0, 0, retVal.y);

    instruction.z = MakeAmdShaderIntrinsicsInstruction(
             AmdExtD3DShaderIntrinsicsOpcode_VtxParam,
             AmdExtD3DShaderIntrinsicsOpcodePhase_0,
           ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) |
            (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) |
            (AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift)));
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.z, 0, 0, retVal.z);

    instruction.w = MakeAmdShaderIntrinsicsInstruction(
             AmdExtD3DShaderIntrinsicsOpcode_VtxParam,
             AmdExtD3DShaderIntrinsicsOpcodePhase_0,
           ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) |
            (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) |
            (AmdExtD3DShaderIntrinsicsBarycentric_ComponentW << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift)));
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.w, 0, 0, retVal.w);

    return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_VertexParameterComponent
*
*   Returns the triangle's parameter information at the specified triangle vertex and component.
*   The vertex, parameter and component indices must be specified as immediate values.
*
*   Only available in pixel shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_VertexParameterComponent(uint vertexIdx, uint parameterIdx, uint componentIdx)
{
    uint retVal;
    uint instruction =
        MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_VtxParam,
                                           AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                          ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) |
                                           (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) |
                                           (componentIdx << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift)));
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal);

    return asfloat(retVal);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce
*
*   Performs reduction operation on wavefront (thread group) data.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : float
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));
    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal);

    return asfloat(retVal);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : float2
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float2 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    uint2 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y);

    return float2(asfloat(retVal.x), asfloat(retVal.y));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : float3
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float3 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    uint3 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z);

    return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : float4
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float4 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    uint4 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w);

    return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : int
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    int retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : int2
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int2 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    int2 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : int3
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int3 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    int3 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveReduce : int4
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int4 src)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift));

    int4 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.w, 0, retVal.w);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan
*
*   Performs scan operation on wavefront (thread group) data.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : float
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags  << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);
    uint retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal);

    return asfloat(retVal);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : float2
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float2 src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    uint2 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y);

    return float2(asfloat(retVal.x), asfloat(retVal.y));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : float3
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float3 src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    uint3 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z);

    return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : float4
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float4 src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    uint4 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w);

    return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w));
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : int
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    int retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : int2
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int2 src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    int2 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : int3
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int3 src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    int3 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveScan : int4
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int4 src)
{
    const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) |
                            (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift);

    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          waveScanOp);

    int4 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.w, 0, retVal.w);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_LoadDwordAtAddr
*
*   Loads a DWORD from GPU memory from a given 64-bit GPU VA and 32-bit offset.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_LoadDwordAtAddr
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(uint gpuVaLoBits, uint gpuVaHiBits, uint offset)
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, gpuVaLoBits, gpuVaHiBits, retVal);

    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                     0);

    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, offset, 0, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx2
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx2(uint gpuVaLoBits, uint gpuVaHiBits, uint offset)
{
    uint2 retVal;

    retVal.x = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset);
    retVal.y = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x4);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx4
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx4(uint gpuVaLoBits, uint gpuVaHiBits, uint offset)
{
    uint4 retVal;

    retVal.x = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset);
    retVal.y = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x4);
    retVal.z = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x8);
    retVal.w = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0xC);

    return retVal;
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_GetDrawIndex
*
*   Returns the 0-based draw index in an indirect draw. Always returns 0 for direct draws.
*
*   Available in vertex shader stage only.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_GetDrawIndex()
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_DrawIndex,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_GetBaseInstance
*
*   Returns the StartInstanceLocation parameter passed to direct or indirect drawing commands.
*
*   Available in vertex shader stage only.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_GetBaseInstance()
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaseInstance,
        AmdExtD3DShaderIntrinsicsOpcodePhase_0,
        0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_GetBaseVertex
*
*   For non-indexed draw commands, returns the StartVertexLocation parameter. For indexed draw commands, returns the
*   BaseVertexLocation parameter.
*
*   Available in vertex shader stage only.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_GetBaseVertex()
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaseVertex,
        AmdExtD3DShaderIntrinsicsOpcodePhase_0,
        0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal);

    return retVal;
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ReadlaneAt : uint
*
*   Returns the value of the source for the given lane index within the specified wave.  The lane index
*   can be non-uniform across the wave.
*
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_ReadlaneAt(uint src, uint laneId)
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, laneId, retVal);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ReadlaneAt : int
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_ReadlaneAt(int src, uint laneId)
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal);

    return asint(retVal);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ReadlaneAt : float
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_ReadlaneAt(float src, uint laneId)
{
    uint retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal);

    return asfloat(retVal);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ConvertF32toF16
*
*   Converts 32bit floating point numbers into 16bit floating point number using a specified rounding mode
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ConvertF32toF16 - helper to convert f32 to f16 number
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16(in uint convOp, in float3 val)
{
    uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_FloatConversion,
                                                          AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                          convOp);

    uint3 retVal;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.x), 0, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.y), 0, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.z), 0, retVal.z);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using nearest rounding mode
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16Near(in float3 inVec)
{
    return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near, inVec);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using -inf rounding mode
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16NegInf(in float3 inVec)
{
    return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf, inVec);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using +inf rounding mode
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16PosInf(in float3 inVec)
{
    return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf, inVec);
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ShaderClock
*
*   Returns the current value of the timestamp clock. The value monotonically increments and will wrap after it
*   exceeds the maximum representable value. The units are not defined and need not be constant, and the value
*   is not guaranteed to be dynamically uniform across a single draw or dispatch.
*
*   The function serves as a code motion barrier. Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ShaderClock
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_ShaderClock()
{
    uint2 retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderClock,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.x);

    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderClock,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.y);

    return retVal;
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ShaderRealtimeClock
*
*   Returns a value representing the real-time clock that is globally coherent by all invocations on the GPU.
*   The units are not defined and the value will wrap after exceeding the maximum representable value.
*
*   The function serves as a code motion barrier. Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_ShaderRealtimeClock
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_ShaderRealtimeClock()
{
    uint2 retVal;

    uint instruction;
    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_0,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.x);

    instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock,
                                                     AmdExtD3DShaderIntrinsicsOpcodePhase_1,
                                                     0);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.y);

    return retVal;
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_MakeAtomicInstructions
*
*   Creates uint4 with x/y/z/w components containing phase 0/1/2/3 for atomic instructions.
*   NOTE: This is an internal function and should not be called by the source HLSL shader directly.
*
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(uint op)
{
    uint4 instructions;
    instructions.x = MakeAmdShaderIntrinsicsInstruction(
        AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_0, op);
    instructions.y = MakeAmdShaderIntrinsicsInstruction(
        AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_1, op);
    instructions.z = MakeAmdShaderIntrinsicsInstruction(
        AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_2, op);
    instructions.w = MakeAmdShaderIntrinsicsInstruction(
        AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_3, op);
    return instructions;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicOp
*
*   Creates intrinstic instructions for the specified atomic op.
*   NOTE: These are internal functions and should not be called by the source HLSL shader directly.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWByteAddressBuffer uav, uint3 address, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x,   retVal.y);
    uav.Store(retVal.x, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,   retVal.y,  retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture1D<uint2> uav, uint3 address, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x,   retVal.y);
    uav[retVal.x] = retVal.y;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,   retVal.y,  retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture2D<uint2> uav, uint3 address, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x,   retVal.y);
    uav[uint2(retVal.x, retVal.x)] = retVal.y;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,   retVal.y,  retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture3D<uint2> uav, uint3 address, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x,   retVal.y);
    uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,   retVal.y,  retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(
    RWByteAddressBuffer uav, uint3 address, uint2 compare_value, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x,       address.y,       retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z,       value.x,         retVal.y);
    uav.Store(retVal.x, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,         compare_value.x, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y,        retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(
    RWTexture1D<uint2> uav, uint3 address, uint2 compare_value, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x,       address.y,       retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z,       value.x,         retVal.y);
    uav[retVal.x] = retVal.y;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,         compare_value.x, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y,        retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(
    RWTexture2D<uint2> uav, uint3 address, uint2 compare_value, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x,       address.y,       retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z,       value.x,         retVal.y);
    uav[uint2(retVal.x, retVal.x)] = retVal.y;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,         compare_value.x, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y,        retVal.y);

    return retVal;
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOp(
    RWTexture3D<uint2> uav, uint3 address, uint2 compare_value, uint2 value, uint op)
{
    uint2 retVal;

    const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x,       address.y,       retVal.x);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z,       value.x,         retVal.y);
    uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y;
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y,         compare_value.x, retVal.y);
    AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y,        retVal.y);

    return retVal;
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicMinU64
*
*   Performs 64-bit atomic minimum of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicMaxU64
*
*   Performs 64-bit atomic maximum of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicAndU64
*
*   Performs 64-bit atomic AND of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicOrU64
*
*   Performs 64-bit atomic OR of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicXorU64
*
*   Performs 64-bit atomic XOR of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicAddU64
*
*   Performs 64-bit atomic add of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicXchgU64
*
*   Performs 64-bit atomic exchange of value with the UAV at address, returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWByteAddressBuffer uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64
*
*   Performs 64-bit atomic compare of comparison value with UAV at address, stores value if values match,
*   returns the original value.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64(
    RWByteAddressBuffer uav, uint address, uint2 compare_value, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64(
    RWTexture1D<uint2> uav, uint address, uint2 compare_value, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64(
    RWTexture2D<uint2> uav, uint2 address, uint2 compare_value, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), compare_value, value, op);
}

uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64(
    RWTexture3D<uint2> uav, uint3 address, uint2 compare_value, uint2 value)
{
    const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64;
    return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), compare_value, value, op);
}


/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum
*
*   Performs reduction operation across a wave and returns the result of the reduction (sum of all threads in a wave)
*   to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WaveActiveSum(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WaveActiveSum(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WaveActiveSum(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WaveActiveSum(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveSum(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveSum(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveSum(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveSum(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveSum(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveSum<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct
*
*   Performs reduction operation across a wave and returns the result of the reduction (product of all threads in a
*   wave) to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WaveActiveProduct(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveProduct(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveProduct<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin
*
*   Performs reduction operation across a wave and returns the result of the reduction (minimum of all threads in a
*   wave) to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WaveActiveMin(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WaveActiveMin(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WaveActiveMin(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WaveActiveMin(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveMin(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveMin(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveMin(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveMin(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveMin(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMin<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax
*
*   Performs reduction operation across a wave and returns the result of the reduction (maximum of all threads in a
*   wave) to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WaveActiveMax(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WaveActiveMax(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WaveActiveMax(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WaveActiveMax(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveMax(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveMax(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveMax(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveMax(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveMax(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveMax<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd
*
*   Performs reduction operation across a wave and returns the result of the reduction (Bitwise AND of all threads in a
*   wave) to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitAnd<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr
*
*   Performs reduction operation across a wave and returns the result of the reduction (Bitwise OR of all threads in a
*   wave) to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitOr<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor
*
*   Performs reduction operation across a wave and returns the result of the reduction (Bitwise XOR of all threads in a
*   wave) to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WaveActiveBitXor<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum
*
*   Performs a prefix (exclusive) scan operation across a wave and returns the resulting sum to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePrefixSum(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePrefixSum(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePrefixSum(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePrefixSum(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePrefixSum(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePrefixSum(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePrefixSum(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePrefixSum(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePrefixSum(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixSum<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct
*
*   Performs a prefix scan operation across a wave and returns the resulting product to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePrefixProduct(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePrefixProduct(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixProduct<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin
*
*   Performs a prefix scan operation across a wave and returns the resulting minimum value to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePrefixMin(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePrefixMin(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePrefixMin(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePrefixMin(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePrefixMin(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePrefixMin(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePrefixMin(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePrefixMin(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePrefixMin(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMin<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax
*
*   Performs a prefix scan operation across a wave and returns the resulting maximum value to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePrefixMax(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePrefixMax(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePrefixMax(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePrefixMax(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePrefixMax(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePrefixMax(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePrefixMax(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePrefixMax(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePrefixMax(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePrefixMax<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Exclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum
*
*   Performs a Postfix (Inclusive) scan operation across a wave and returns the resulting sum to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePostfixSum(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePostfixSum(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePostfixSum(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePostfixSum(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePostfixSum(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePostfixSum(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePostfixSum(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePostfixSum(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePostfixSum(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixSum<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct
*
*   Performs a Postfix scan operation across a wave and returns the resulting product to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePostfixProduct(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePostfixProduct(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixProduct<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin
*
*   Performs a Postfix scan operation across a wave and returns the resulting minimum value to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePostfixMin(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePostfixMin(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePostfixMin(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePostfixMin(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePostfixMin(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePostfixMin(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePostfixMin(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePostfixMin(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePostfixMin(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMin<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax
*
*   Performs a Postfix scan operation across a wave and returns the resulting maximum value to all participating lanes.
*
*   Available in all shader stages.
*
***********************************************************************************************************************
*/
float AmdExtD3DShaderIntrinsics_WavePostfixMax(float src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<float2>
***********************************************************************************************************************
*/
float2 AmdExtD3DShaderIntrinsics_WavePostfixMax(float2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<float3>
***********************************************************************************************************************
*/
float3 AmdExtD3DShaderIntrinsics_WavePostfixMax(float3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<float4>
***********************************************************************************************************************
*/
float4 AmdExtD3DShaderIntrinsics_WavePostfixMax(float4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<int>
***********************************************************************************************************************
*/
int AmdExtD3DShaderIntrinsics_WavePostfixMax(int src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<int2>
***********************************************************************************************************************
*/
int2 AmdExtD3DShaderIntrinsics_WavePostfixMax(int2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<int3>
***********************************************************************************************************************
*/
int3 AmdExtD3DShaderIntrinsics_WavePostfixMax(int3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<int4>
***********************************************************************************************************************
*/
int4 AmdExtD3DShaderIntrinsics_WavePostfixMax(int4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<uint>
***********************************************************************************************************************
*/
uint AmdExtD3DShaderIntrinsics_WavePostfixMax(uint src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<uint2>
***********************************************************************************************************************
*/
uint2 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint2 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<uint3>
***********************************************************************************************************************
*/
uint3 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint3 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

/**
***********************************************************************************************************************
*   AmdExtD3DShaderIntrinsics_WavePostfixMax<uint4>
***********************************************************************************************************************
*/
uint4 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint4 src)
{
    return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU,
                                              AmdExtD3DShaderIntrinsicsWaveOp_Inclusive,
                                              src);
}

#if defined (AGS_RAY_HIT_TOKEN)

//=====================================================================================================================
struct AmdExtRtHitToken
{
    uint dword[2];
};

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
*    that the dwords are already supplied in AmdExtRtHitTokenIn and only requires a call to intersect
*    ray, bypassing the traversal of the acceleration structure.
***********************************************************************************************************************
*/
struct AmdExtRtHitTokenIn : AmdExtRtHitToken { };

/**
***********************************************************************************************************************
* @brief
*    AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
*    that the dwords must be patched into the payload after traversal.  The application can store this
*    data in a buffer which can then be used for hit group sorting so shading divergence can be avoided.
***********************************************************************************************************************
*/
struct AmdExtRtHitTokenOut : AmdExtRtHitToken { };

/**
***********************************************************************************************************************
* @brief
*    Group shared memory reserved for temprary storage of hit tokens. Not intended to touched by the app shader.
*    Application shader must only use the extension functions defined below to access the hit tokens
*
***********************************************************************************************************************
*/
groupshared AmdExtRtHitToken AmdHitToken;

/**
***********************************************************************************************************************
* @brief
*    Accessor function to obtain the hit tokens from the last call to TraceRays(). The data returned by this
*    function only guarantees valid values for the last call to TraceRays() prior to calling this function.
*
***********************************************************************************************************************
*/
uint2 AmdGetLastHitToken()
{
    return uint2(AmdHitToken.dword[0], AmdHitToken.dword[1]);
}

/**
***********************************************************************************************************************
* @brief
*    This function initialises hit tokens for subsequent TraceRays() call. Note, any TraceRay() that intends to use
*    these hit tokens must include this function call in the same basic block. Applications can use a convenience macro
*    defined below to enforce that.
*
***********************************************************************************************************************
*/
void AmdSetHitToken(uint2 token)
{
    AmdHitToken.dword[0] = token.x;
    AmdHitToken.dword[1] = token.y;
}

/**
***********************************************************************************************************************
* @brief
*    Convenience macro for calling TraceRays that uses the hit token
*
***********************************************************************************************************************
*/
#define AmdTraceRay(accelStruct,                    \
                    rayFlags,                       \
                    instanceInclusionMask,          \
                    rayContributionToHitGroupIndex, \
                    geometryMultiplier,             \
                    missShaderIndex,                \
                    ray,                            \
                    payload,                        \
                    token)                          \
AmdSetHitToken(token);                              \
TraceRay(accelStruct,                               \
         rayFlags,                                  \
         instanceInclusionMask,                     \
         rayContributionToHitGroupIndex,            \
         geometryMultiplier,                        \
         missShaderIndex,                           \
         ray,                                       \
         payload);                                  \

#endif // AGS_RAY_HIT_TOKEN

#endif // _AMDEXTD3DSHADERINTRINICS_HLSL


================================================
FILE: shaders/ags/cs_constexpr_wmma_gep.sm66.full-wmma.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

static WMMA_Matrix as[5];
static WMMA_Matrix bs[5];

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	as[0] = WMMA_Load(TypeA, Inputs, 0, 64);
	bs[0] = WMMA_Load(TypeB, Inputs, 1024, 64);
	as[1] = WMMA_Load(TypeA, Inputs, 32, 64);
	bs[1] = WMMA_Load(TypeB, Inputs, 1024 + 32, 64);

	[loop]
	for (int i = 2; i < 5; i++)
	{
		as[i] = WMMA_Load(TypeA, Inputs, 32 * i, 64);
		bs[i] = WMMA_Load(TypeB, Inputs, 1024 + 32 * i, 64);
	}

	WMMA_Matrix c = WMMA_MatrixFill(TypeC, 0);
	c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[0], bs[0], c);
	c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[1], bs[1], c);
	[loop]
	for (int i = 2; i < 5; i++)
		c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[i], bs[i], c);
	WMMA_Store(TypeC, Outputs, 0, 64, c);
}


================================================
FILE: shaders/ags/cs_constexpr_wmma_gep.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

static WMMA_Matrix as[5];
static WMMA_Matrix bs[5];

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	as[0] = WMMA_Load(TypeA, Inputs, 0, 64);
	bs[0] = WMMA_Load(TypeB, Inputs, 1024, 64);
	as[1] = WMMA_Load(TypeA, Inputs, 32, 64);
	bs[1] = WMMA_Load(TypeB, Inputs, 1024 + 32, 64);

	[loop]
	for (int i = 2; i < 5; i++)
	{
		as[i] = WMMA_Load(TypeA, Inputs, 32 * i, 64);
		bs[i] = WMMA_Load(TypeB, Inputs, 1024 + 32 * i, 64);
	}

	WMMA_Matrix c = WMMA_MatrixFill(TypeC, 0);
	c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[0], bs[0], c);
	c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[1], bs[1], c);
	[loop]
	for (int i = 2; i < 5; i++)
		c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[i], bs[i], c);
	WMMA_Store(TypeC, Outputs, 0, 64, c);
}


================================================
FILE: shaders/ags/cs_wmma_alloca.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Matrix as[2];
	WMMA_Matrix bs[2];
	WMMA_Matrix cs[2];
	WMMA_Matrix ds[2];

	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	[loop]
	for (int i = 0; i < 2; i++)
	{
		as[i] = WMMA_Load(TypeA, Inputs, 32 * i, 64);
		bs[i] = WMMA_Load(TypeB, Inputs, 1024 + 32 * i, 64);
	}

	// Test case where we have static write addressing.
	[unroll]
	for (int i = 0; i < 2; i++)
	{
		cs[1 - i] = WMMA_Load(TypeA, Inputs, 32 - 32 * i, 64);
		ds[1 - i] = WMMA_Load(TypeB, Inputs, 1024 + 32 - 32 * i, 64);
	}

	WMMA_Matrix c = WMMA_MatrixFill(TypeC, 0);

	[loop]
	for (int i = 0; i < 2; i++)
		c = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, as[i], bs[i], c);

	[loop]
	for (int i = 0; i < 2; i++)
		c = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, cs[i], ds[i], c);

	WMMA_Store(TypeC, Outputs, 0, 64, c);
}


================================================
FILE: shaders/ags/cs_wmma_copy_transpose_fp16.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	// Transpose layout is ignored on arith ops it seems.
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC32 = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeB8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);

	// Tests if we can just YOLO convert to different types.
	// It seems to work ...
	WMMA_Matrix m = WMMA_Load(TypeC, Inputs, 0, 32);
	WMMA_Matrix a = WMMA_Convert(TypeC, TypeA, m);
	WMMA_Matrix b = WMMA_Convert(TypeC, TypeB, m);
	WMMA_Matrix c = WMMA_Convert(TypeC, TypeC32, m);
	m = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, a, b, c);
	WMMA_Store(TypeC32, Outputs, 0, 64, m);

	// Possible to transpose and do format conversions ... >_<
	// Ran into these in the wild.
	m = WMMA_Convert(TypeC32, TypeB8, m);
	WMMA_Store(TypeB8, Outputs, 1024, 16, m);

	m = WMMA_Convert(TypeB8, TypeC32, m);
	WMMA_Store(TypeC32, Outputs, 2048, 64, m);
}


================================================
FILE: shaders/ags/cs_wmma_copy_transpose_fp16.sm66.ssbo.full-wmma.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	// Transpose layout is ignored on arith ops it seems.
	WMMA_Type TypeC32 = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeB8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);

	WMMA_Matrix m = WMMA_Load(TypeC32, Inputs, 0, 64);
	m = WMMA_ConvertSaturate(TypeC32, TypeB8, m);
	WMMA_Store(TypeB8, Outputs, 1024, 16, m);
}


================================================
FILE: shaders/ags/cs_wmma_extract_insert.sm66.ssbo.full-wmma.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Type TypeA8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Matrix m = WMMA_Load(TypeA8, Inputs, 0, 16);
	uint len = WMMA_MatrixLength(TypeA8);

	// 8-bit matrices are tightly packed ;_;
	for (uint i = 0; i < len; i++)
	{
		uint elem = WMMA_MatrixElementExtract(TypeA8, m, i);
		Outputs.Store(256 + 4 * (thr * len + i), elem);
		elem ^= 0xff00ff;
		m = WMMA_MatrixElementFill(TypeA8, m, i, elem);
	}

	WMMA_Store(TypeA8, Outputs, 0, 16, m);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 32, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_at.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, true);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 32, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_bt.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 32, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_ct.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 32, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_f16_quant_f16_ot.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 32, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_f16_quant_fp8.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, A, B, C);
	C = WMMA_Convert(TypeC, TypeC8, C);

	WMMA_Store(TypeC8, Outputs, 0, 16, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_fp8.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 16);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 256, 16);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 512, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	WMMA_Store(TypeC, Outputs, 0, 64, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_fp8.sm66.ssbo.full-wmma.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 16);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 256, 16);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 512, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	WMMA_Store(TypeC, Outputs, 0, 64, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f16.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 16);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 256, 16);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 512, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 32, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f16_strided.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 64, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f16_strided_transpose.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, true);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 32);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 512, 32);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 1024, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	C = WMMA_Convert(TypeC, TypeC16, C);

	WMMA_Store(TypeC16, Outputs, 0, 64, C);
}


================================================
FILE: shaders/ags/cs_wmma_f32_16x16x16_fp8_quant_f32.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 16);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 256, 16);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 512, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	WMMA_Store(TypeC, Outputs, 0, 64, C);
}


================================================
FILE: shaders/ags/cs_wmma_fp16_fp8_conversions.sm66.ssbo.full-wmma.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type Type8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type Type16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);

	WMMA_Matrix pm = WMMA_MatrixFill(Type16, gid.x);
	WMMA_Matrix nm = WMMA_MatrixFill(Type16, gid.x | 0x8000);
	pm = WMMA_Convert(Type16, Type8, pm);
	nm = WMMA_Convert(Type16, Type8, nm);
	uint extracted0 = WMMA_MatrixElementExtract(Type8, pm, 0);
	uint extracted1 = WMMA_MatrixElementExtract(Type8, nm, 0);

	if (thr == 0)
		Outputs.Store2(8 * gid, uint2(extracted0, extracted1));
}


================================================
FILE: shaders/ags/cs_wmma_fp32_fp16_conversions.sm66.ssbo.full-wmma.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type Type16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type Type32 = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix pm = WMMA_MatrixFill(Type32, gid.x);
	pm = WMMA_Convert(Type32, Type16, pm);
	uint extracted0 = WMMA_MatrixElementExtract(Type16, pm, 0);

	if (thr == 0)
		Outputs.Store(gid, extracted0);
}


================================================
FILE: shaders/ags/cs_wmma_fp32_fp8_conversions.sm66.ssbo.nv-coopmat2.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type Type8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type Type32 = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Matrix pm = WMMA_MatrixFill(Type32, gid.x);
	pm = WMMA_ConvertSaturate(Type32, Type8, pm);
	WMMA_Store(Type8, Outputs, 0, 16, pm);
}


================================================
FILE: shaders/ags/cs_wmma_fp8_fp32_conversions.sm66.ssbo.full-wmma.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type Type8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type Type16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type Type16C = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type Type32 = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix m = WMMA_MatrixFill(Type8, gid);
	m = WMMA_Convert(Type8, Type16, m);

	WMMA_StoreLDS(Type16, 0, 32, m);
	GroupMemoryBarrierWithGroupSync();
	m = WMMA_LoadLDS(Type16C, 0, 32);
	m = WMMA_Convert(Type16C, Type32, m);

	uint extracted = WMMA_MatrixElementExtract(Type32, m, 0);
	if (thr == 0)
		Outputs.Store(4 * gid, extracted);
}


================================================
FILE: shaders/ags/cs_wmma_lds_transpose.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Type TypeA16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeA16T = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, true);
	WMMA_Type TypeB16 = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC32 = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix m = WMMA_Load(TypeA16, Inputs, 0, 32);
	WMMA_StoreLDS(TypeA16, 0, 32, m);
	GroupMemoryBarrierWithGroupSync();
	WMMA_Matrix a = WMMA_LoadLDS(TypeA16T, 0, 32);
	WMMA_Matrix b = WMMA_LoadLDS(TypeB16, 0, 32);
	WMMA_Matrix c = WMMA_MatrixFill(TypeC32, 0);
	m = WMMA_MatMulAcc(WMMA_F32_16X16X16_F16, a, b, c);

	WMMA_Store(TypeC32, Outputs, 0, 64, m);
}


================================================
FILE: shaders/ags/cs_wmma_matrix_length.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main()
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);
	WMMA_Type TypeA8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB8 = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);

	WMMA_Type TypeAT = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_A, WaveMatrixShape_16X16, true);
	WMMA_Type TypeBT = WMMA_MakeType(WaveMatrixDataFormat_F16, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeCT = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);
	WMMA_Type TypeA8T = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, true);
	WMMA_Type TypeB8T = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);

	uint size[10];
	size[0] = WMMA_MatrixLength(TypeA);
	size[1] = WMMA_MatrixLength(TypeB);
	size[2] = WMMA_MatrixLength(TypeC);
	size[3] = WMMA_MatrixLength(TypeA8);
	size[4] = WMMA_MatrixLength(TypeB8);
	size[5] = WMMA_MatrixLength(TypeAT);
	size[6] = WMMA_MatrixLength(TypeBT);
	size[7] = WMMA_MatrixLength(TypeCT);
	size[8] = WMMA_MatrixLength(TypeA8T);
	size[9] = WMMA_MatrixLength(TypeB8T);

	for (int i = 0; i < 10; i++)
		Outputs.Store(4 * i, size[i]);
}


================================================
FILE: shaders/ags/cs_wmma_store_phi.full-wmma.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

cbuffer Cbuf { uint selector; };

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Matrix as[2];
	WMMA_Matrix bs[2];

	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	[loop]
	for (int i = 0; i < 2; i++)
	{
		as[i] = WMMA_Load(TypeA, Inputs, 32 * i, 64);

		WMMA_Matrix t;

		[branch]
		if (selector == 1)
			t = WMMA_Load(TypeB, Inputs, 1024 + 32 * i, 64);
		else
			t = WMMA_MatrixFill(TypeB, 0);

		bs[i] = t;
	}

	WMMA_Matrix c = WMMA_MatrixFill(TypeC, 0);
	[loop]
	for (int i = 0; i < 2; i++)
		c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[i], bs[i], c);
	WMMA_Store(TypeC, Outputs, 0, 64, c);
}


================================================
FILE: shaders/ags/cs_wmma_store_phi.sm66.ssbo.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

cbuffer Cbuf { uint selector; };

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex)
{
	WMMA_Matrix as[2];
	WMMA_Matrix bs[2];

	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, true);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, true);

	[loop]
	for (int i = 0; i < 2; i++)
	{
		as[i] = WMMA_Load(TypeA, Inputs, 32 * i, 64);

		WMMA_Matrix t;

		[branch]
		if (selector == 1)
			t = WMMA_Load(TypeB, Inputs, 1024 + 32 * i, 64);
		else
			t = WMMA_MatrixFill(TypeB, 0);

		bs[i] = t;
	}

	WMMA_Matrix c = WMMA_MatrixFill(TypeC, 0);
	[loop]
	for (int i = 0; i < 2; i++)
		c = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, as[i], bs[i], c);
	WMMA_Store(TypeC, Outputs, 0, 64, c);
}


================================================
FILE: shaders/ags/wmma_ags.h
================================================
#pragma once

static const uint MagicCodeShift   = 28;
static const uint MagicCodeMask    = 0xf;
static const uint OpcodePhaseShift = 24;
static const uint OpcodePhaseMask  = 0x3;
static const uint DataShift        = 8;
static const uint DataMask         = 0xffff;
static const uint OpcodeShift      = 0;
static const uint OpcodeMask       = 0xff;

static const uint MagicCode = 0x5;

static const uint WaveMatrixMulAcc            = 0x28;
static const uint WaveMatrixUavLoad           = 0x29;
static const uint WaveMatrixUavStore          = 0x2a;
static const uint WaveMatrixGlobalLoad        = 0x2b;
static const uint WaveMatrixGlobalStore       = 0x2c;
static const uint WaveMatrixLdsLoad           = 0x2d;
static const uint WaveMatrixLdsStore          = 0x2e;
static const uint WaveMatrixElementFill       = 0x2f;
static const uint WaveMatrixElementExtract    = 0x30;
static const uint WaveMatrixLength            = 0x31;
static const uint WaveMatrixCopy              = 0x32;
static const uint WaveMatrixFill              = 0x33;
static const uint Float8Conversion            = 0x36;

enum WaveMatrixOpDataFormat
{
    WaveMatrixDataFormat_I4   = 0x0,
    WaveMatrixDataFormat_U4   = 0x1,
    WaveMatrixDataFormat_I8   = 0x2,
    WaveMatrixDataFormat_U8   = 0x3,
    WaveMatrixDataFormat_F16  = 0x4,
    WaveMatrixDataFormat_BF16 = 0x5,
    WaveMatrixDataFormat_F32  = 0x6,
    WaveMatrixDataFormat_I32  = 0x7,
    WaveMatrixDataFormat_U32  = 0x8,
    WaveMatrixDataFormat_BF8  = 0x9,
    WaveMatrixDataFormat_FP8  = 0xa,
};

enum WaveMatrixOpMatrixType
{
    WaveMatrixType_A            = 0x0,
    WaveMatrixType_B            = 0x1,
    WaveMatrixType_Accumulator  = 0x2,
};

enum WaveMatrixOpMatrixShape
{
    WaveMatrixShape_16X16 = 0x0,
    WaveMatrixShape_32X16 = 0x1,
    WaveMatrixShape_16X32 = 0x2,
    WaveMatrixShape_64X16 = 0x3,
};

enum WaveMatrixOpcode
{
    WMMA_BF16_16X16X16_BF16     = 0x0,
    WMMA_F16_16X16X16_F16       = 0x1,
    WMMA_F32_16X16X16_BF16      = 0x2,
    WMMA_F32_16X16X16_BF8_BF8   = 0x3,
    WMMA_F32_16X16X16_BF8_FP8   = 0x4,
    WMMA_F32_16X16X16_F16       = 0x5,
    WMMA_F32_16X16X16_FP8_BF8   = 0x6,
    WMMA_F32_16X16X16_FP8_FP8   = 0x7,
    WMMA_I32_16X16X16_I4        = 0x8,
    WMMA_I32_16X16X16_U4        = 0x9,
    WMMA_I32_16X16X16_IU4       = 0xa,
    WMMA_I32_16X16X16_UI4       = 0xb,
    WMMA_I32_16X16X16_I8        = 0xc,
    WMMA_I32_16X16X16_U8        = 0xd,
    WMMA_I32_16X16X16_IU8       = 0xe,
    WMMA_I32_16X16X16_UI8       = 0xf,
    WMMA_I32_16X16X32_I4        = 0x10,
    WMMA_I32_16X16X32_U4        = 0x11,
    WMMA_I32_16X16X32_IU4       = 0x12,
    WMMA_I32_16X16X32_UI4       = 0x13,
    SWMMA_BF16_16X16X32_BF16    = 0x14,
    SWMMA_F16_16X16X32_F16      = 0x15,
    SWMMA_F32_16X16X32_BF16     = 0x16,
    SWMMA_F32_16X16X32_BF8_BF8  = 0x17,
    SWMMA_F32_16X16X32_BF8_FP8  = 0x18,
    SWMMA_F32_16X16X32_F16      = 0x19,
    SWMMA_F32_16X16X32_FP8_BF8  = 0x1a,
    SWMMA_F32_16X16X32_FP8_FP8  = 0x1b,
    SWMMA_I32_16X16X32_I4       = 0x1c,
    SWMMA_I32_16X16X32_U4       = 0x1d,
    SWMMA_I32_16X16X32_IU4      = 0x1e,
    SWMMA_I32_16X16X32_UI4      = 0x1f,
    SWMMA_I32_16X16X32_I8       = 0x20,
    SWMMA_I32_16X16X32_U8       = 0x21,
    SWMMA_I32_16X16X32_IU8      = 0x22,
    SWMMA_I32_16X16X32_UI8      = 0x23,
    SWMMA_I32_16X16X64_I4       = 0x24,
    SWMMA_I32_16X16X64_U4       = 0x25,
    SWMMA_I32_16X16X64_IU4      = 0x26,
    SWMMA_I32_16X16X64_UI4      = 0x27,
};

enum WaveMatrixRegType
{
    WaveMatrixRegType_RetVal_Reg          = 0x0,
    WaveMatrixRegType_A_TempReg           = 0x1,
    WaveMatrixRegType_B_TempReg           = 0x2,
    WaveMatrixRegType_Accumulator_TempReg = 0x3,
};

enum MatrixElementWiseOp
{
    MatrixElementWiseOp_Add   = 0x1,
    MatrixElementWiseOp_Sub   = 0x2,
    MatrixElementWiseOp_Mul   = 0x3,
    MatrixElementWiseOp_Div   = 0x4,
    MatrixElementWiseOp_Times = 0x5,
};

enum SparsityIndexMem
{
    SparsityIndexMem_UavBuffer    = 0x0,
    SparsityIndexMem_GroupShared  = 0x1,
    SparsityIndexMem_GlobalBuffer = 0x2,
};

static const uint WaveMatrixOpcode_OpsShift  = 0;
static const uint WaveMatrixOpcode_OpsMask   = 0x7f;
static const uint WaveMatrixOpcode_FlagShift = 15;
static const uint WaveMatrixOpcode_FlagMask  = 0x1;

static const uint WaveMatrixInOut_ChannelShift        = 0;
static const uint WaveMatrixInOut_ChannelMask         = 0xf;
static const uint WaveMatrixInOut_SecondRegFlagShift  = 4;
static const uint WaveMatrixInOut_SecondRegFlagMask   = 0xf;
static const uint WaveMatrixInOut_MatRegTypeFlagShift = 8;
static const uint WaveMatrixInOut_MatRegTypeFlagMask  = 0xff;

static const uint WaveMatrixModifier_DataFormatFlagShift = 0;
static const uint WaveMatrixModifier_DataFormatFlagMask  = 0xf;
static const uint WaveMatrixModifier_MatrixTypeFlagShift = 4;
static const uint WaveMatrixModifier_MatrixTypeFlagMask  = 0x7;
static const uint WaveMatrixModifier_LayoutFlagShift     = 7;
static const uint WaveMatrixModifier_LayoutFlagMask      = 0x1;
static const uint WaveMatrixModifier_ShapeShift          = 8;
static const uint WaveMatrixModifier_ShapeMask           = 0x7;
static const uint WaveMatrixModifier_MatrixTileShift     = 11;
static const uint WaveMatrixModifier_MatrixTileMask      = 0x1;
static const uint WaveMatrixModifier_IndexMemTypeShift   = 14;
static const uint WaveMatrixModifier_IndexMemTypeMask    = 0x3;

// For wave32 and FP32, we need up to 8 values to store a 16x16 matrix.
struct WMMA_Matrix
{
	uint v[8];
};

uint Code(uint opcode, uint opcodePhase, uint immediateData)
{
    return (MagicCode << MagicCodeShift) |
           ((immediateData & DataMask) << DataShift) |
           ((opcodePhase & OpcodePhaseMask) << OpcodePhaseShift) |
           ((opcode & OpcodeMask) << OpcodeShift);
}

uint AGSMagic(uint code, uint arg0, uint arg1)
{
	uint ret;
	MAGIC.InterlockedCompareExchange(code, arg0, arg1, ret);
	return ret;
}

uint AGSMagic(uint opcode, uint phase, uint imm, uint arg0, uint arg1)
{
	return AGSMagic(Code(opcode, phase, imm), arg0, arg1);
}

uint MatrixIO(uint channel, uint reg, uint type)
{
	return (channel << WaveMatrixInOut_ChannelShift) |
		(reg << WaveMatrixInOut_SecondRegFlagShift) |
		(type << WaveMatrixInOut_MatRegTypeFlagShift);
}

WMMA_Matrix WMMA_MatMulAcc(WaveMatrixOpcode op, WMMA_Matrix A, WMMA_Matrix B, WMMA_Matrix C)
{
	// A matrix
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 0, WaveMatrixRegType_A_TempReg), A.v[0], A.v[1]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 0, WaveMatrixRegType_A_TempReg), A.v[2], A.v[3]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 1, WaveMatrixRegType_A_TempReg), A.v[4], A.v[5]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 1, WaveMatrixRegType_A_TempReg), A.v[6], A.v[7]);

	// B matrix
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 0, WaveMatrixRegType_B_TempReg), B.v[0], B.v[1]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 0, WaveMatrixRegType_B_TempReg), B.v[2], B.v[3]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 1, WaveMatrixRegType_B_TempReg), B.v[4], B.v[5]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 1, WaveMatrixRegType_B_TempReg), B.v[6], B.v[7]);

	// C matrix
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 0, WaveMatrixRegType_Accumulator_TempReg), C.v[0], C.v[1]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 0, WaveMatrixRegType_Accumulator_TempReg), C.v[2], C.v[3]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 1, WaveMatrixRegType_Accumulator_TempReg), C.v[4], C.v[5]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 1, WaveMatrixRegType_Accumulator_TempReg), C.v[6], C.v[7]);

	// Configure type
	AGSMagic(WaveMatrixMulAcc, 1, int(op) << int(WaveMatrixOpcode_OpsShift), 0, 0);

	// Read output
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

struct WMMA_Type
{
	uint code;
};

WMMA_Type WMMA_MakeType(WaveMatrixOpDataFormat fmt, WaveMatrixOpMatrixType mtype, WaveMatrixOpMatrixShape shape, bool transposed)
{
	WMMA_Type type;
	type.code = (int(fmt) << WaveMatrixModifier_DataFormatFlagShift) |
		(int(mtype) << WaveMatrixModifier_MatrixTypeFlagShift) |
		(int(shape) << WaveMatrixModifier_ShapeShift) |
		(transposed << WaveMatrixModifier_LayoutFlagShift);
	return type;
}

WMMA_Matrix WMMA_Load(WMMA_Type type, ByteAddressBuffer BAB, uint offset, uint stride)
{
	uint doorbell = AGSMagic(WaveMatrixUavLoad, 0, 0, offset, stride);
	uint hook = BAB.Load(doorbell);
	AGSMagic(WaveMatrixUavLoad, 1, type.code, hook, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

WMMA_Matrix WMMA_Load(WMMA_Type type, RWByteAddressBuffer BAB, uint offset, uint stride)
{
	uint doorbell = AGSMagic(WaveMatrixUavLoad, 0, 0, offset, stride);
	uint hook = BAB.Load(doorbell);
	AGSMagic(WaveMatrixUavLoad, 1, type.code, hook, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

void WMMA_Store(WMMA_Type type, RWByteAddressBuffer BAB, uint offset, uint stride, WMMA_Matrix m)
{
	uint doorbell = AGSMagic(WaveMatrixUavStore, 0, 0, offset, stride);
	uint hook = BAB.Load(doorbell);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixUavStore, 2, type.code, hook, 0);
}

groupshared uint LDS[512];

WMMA_Matrix WMMA_LoadLDS(WMMA_Type type, uint offset, uint stride)
{
	uint hook;
	uint doorbell = AGSMagic(WaveMatrixLdsLoad, 0, 0, offset, stride);
	InterlockedAdd(LDS[doorbell], 0, hook);
	AGSMagic(WaveMatrixLdsLoad, 1, type.code, hook, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

void WMMA_StoreLDS(WMMA_Type type, uint offset, uint stride, WMMA_Matrix m)
{
	uint doorbell = AGSMagic(WaveMatrixLdsStore, 0, 0, offset, stride);
	uint hook;
	InterlockedAdd(LDS[doorbell], 0, hook);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixLdsStore, 2, type.code, hook, 0);
}

WMMA_Matrix WMMA_Convert(WMMA_Type intype, WMMA_Type outtype, WMMA_Matrix m)
{
	AGSMagic(WaveMatrixCopy, 0, 0, 0, 0);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(0, 0, 0), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(1, 0, 0), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(0, 1, 0), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(1, 1, 0), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixCopy, 1, intype.code, outtype.code, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

WMMA_Matrix WMMA_ConvertSaturate(WMMA_Type intype, WMMA_Type outtype, WMMA_Matrix m)
{
	AGSMagic(WaveMatrixCopy, 0, 0, 0, 0);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(0, 0, 0), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(1, 0, 0), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(0, 1, 0), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(1, 1, 0), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixCopy, 1, intype.code, outtype.code, 1);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

uint WMMA_MatrixLength(WMMA_Type type)
{
	return AGSMagic(WaveMatrixLength, 0, type.code, 0, 0);
}

uint WMMA_MatrixElementExtract(WMMA_Type type, WMMA_Matrix m, uint elem)
{
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	return AGSMagic(WaveMatrixElementExtract, 1, type.code, elem, 0); 
}

WMMA_Matrix WMMA_MatrixElementFill(WMMA_Type type, WMMA_Matrix m, uint index, uint data)
{
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixElementFill, 1, type.code, index, data);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

WMMA_Matrix WMMA_MatrixFill(WMMA_Type type, uint value)
{
	AGSMagic(WaveMatrixFill, 0, type.code, value, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixFill, 1, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixFill, 1, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixFill, 1, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixFill, 1, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixFill, 1, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixFill, 1, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixFill, 1, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixFill, 1, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}


================================================
FILE: shaders/alloca-opts/bad-stride.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	bs[0] = foo.b[0];
	bs[1] = foo.b[1];
	bs[2] = foo.b[3];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/double-array-load.frag
================================================
struct Foo
{
	double4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	double4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * float4(bs[a % 6]);
}


================================================
FILE: shaders/alloca-opts/float4-array-load.bindless.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/float4-array-load.bindless.root-constants.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/float4-array-load.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/float4-array-load.root-constant.frag
================================================
struct Foo
{
	float4 b0;
	float4 b1;
	float4 b2;
};

cbuffer Buf : register(b0)
{
	Foo foo;
};

cbuffer BufForward : register(b3)
{
	Foo foo2;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[3];
	float4 cs[3];

	bs[0] = foo.b0;
	bs[1] = foo.b1;
	bs[2] = foo.b2;

	cs[0] = foo2.b0;
	cs[1] = foo2.b1;
	cs[2] = foo2.b2;

	return p * bs[a % 3] + cs[a % 3];
}


================================================
FILE: shaders/alloca-opts/float4-array-load.root-descriptor.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/float4-array-load.root-descriptor.root-constants.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/load-different.frag
================================================
struct Foo
{
	float a, b, c, d;
};

cbuffer Buf
{
	Foo foo;
};

cbuffer Buf2
{
	Foo foo2;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float same[4];
	float diff[4];

	same[0] = foo.a;
	same[1] = foo.b;
	same[2] = foo.c;
	same[3] = foo.d;

	diff[0] = foo.a;
	diff[1] = foo.b;
	diff[2] = foo2.c;
	diff[3] = foo2.d;

	return p * same[a % 4] * diff[a % 4];
}


================================================
FILE: shaders/alloca-opts/local-root-constants.local-root-signature.rgen
================================================
struct Foo
{
	float b0, b1, b2, b3;
};

cbuffer Buf : register(b0, space15)
{
	Foo foo;
};

cbuffer Buf : register(b2, space15)
{
	Foo foo2;
};

RWTexture2D<float> RWIMG : register(u0);

[shader("raygeneration")]
void main()
{
	float bs[4];
	float cs[4];

	bs[0] = foo.b0;
	bs[1] = foo.b1;
	bs[2] = foo.b2;
	bs[3] = foo.b3;

	cs[0] = foo2.b0;
	cs[1] = foo2.b1;
	cs[2] = foo2.b2;
	cs[3] = foo2.b3;

	uint2 id = DispatchRaysIndex().xy;

	RWIMG[id] = bs[id.x % 4] * cs[id.y % 4];
}


================================================
FILE: shaders/alloca-opts/matrix-load.frag
================================================
struct Foo
{
	float4x4 a[6];
};

cbuffer Buf
{
	float4 a;
	float4 b;
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4x4 as[3];
	[unroll]
	for (int i = 0; i < 3; i++)
		as[i] = foo.a[i];

	return as[a % 3][2] * p;
}


================================================
FILE: shaders/alloca-opts/missing-first.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 1; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/missing-last-element.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 5; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/out-of-order-load.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];

	bs[0] = foo.b[0];
	bs[5] = foo.b[5];
	bs[3] = foo.b[3];

	return p * bs[a % 6];
}


================================================
FILE: shaders/alloca-opts/store-after-load.frag
================================================
struct Foo
{
	float4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

float4 main(uint a : A, float4 p : P) : SV_Target
{
	float4 bs[6];
	[unroll]
	for (int i = 0; i < 3; i++)
		bs[i] = foo.b[i];

	float4 result = bs[a % 3];

	for (int i = 3; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6] + result;
}


================================================
FILE: shaders/alloca-opts/uint4-array-load.frag
================================================
struct Foo
{
	uint4 b[6];
};

cbuffer Buf
{
	Foo foo;
};

uint4 main(uint a : A, float4 p : P) : SV_Target
{
	uint4 bs[6];
	[unroll]
	for (int i = 0; i < 6; i++)
		bs[i] = foo.b[i];

	return p * bs[a % 6];
}


================================================
FILE: shaders/auto-barrier/complex-loop.auto-group-shared-barrier.comp
================================================
RWStructuredBuffer<uint> Output;
groupshared uint A;
groupshared uint B[32];

	[numthreads(32, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	[loop]
	for (int i = 0; i < 4; i++)
	{
		// Store
		A = 10;
		B[thr] = 40;
		// Load
		Output[thr] = B[thr ^ 31];
		Output[thr] = B[thr ^ 15];

		[loop]
		for (int j = 0; j < 4; j++)
		{
			// Atomics
			uint o;
			InterlockedAdd(B[thr], 1, o);
			InterlockedOr(B[thr], o, o);
			InterlockedXor(B[thr], o, o);
		}
	}
}


================================================
FILE: shaders/auto-barrier/inner-to-inner.auto-group-shared-barrier.comp
================================================
RWStructuredBuffer<uint> Output;
RWStructuredBuffer<uint> Output2;
groupshared uint A[32];

[numthreads(32, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	[branch]
	if (thr < 16)
	{
		// Store
		A[thr] = Output[1024 + thr];
	}
	else
	{
		// Store
		A[thr ^ 1] = Output2[1024 + thr];
	}

	[branch]
	if (thr < 16)
	{
		[branch]
		if (Output[thr] == 50)
		{
			// Load
			Output[thr] = A[thr ^ 31];
		}
	}
}


================================================
FILE: shaders/auto-barrier/inner-to-outer.auto-group-shared-barrier.comp
================================================
RWStructuredBuffer<uint> Output;
groupshared uint A;

[numthreads(32, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	[branch]
	if (thr < 16)
	{
		[branch]
		if (Output[thr] == 50)
		{
			// Store
			A = 50;
		}
	}
	// Load
	Output[thr] = A;
}


================================================
FILE: shaders/auto-barrier/outer-to-inner.auto-group-shared-barrier.comp
================================================
RWStructuredBuffer<uint> Output;
groupshared uint A[32];

[numthreads(32, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	// Store
	A[thr] = Output[1024 + thr];

	[branch]
	if (thr < 16)
	{
		[branch]
		if (Output[thr] == 50)
		{
			// Load
			Output[thr] = A[thr ^ 31];
		}
	}
}


================================================
FILE: shaders/auto-barrier/single-block-loop.auto-group-shared-barrier.comp
================================================
RWStructuredBuffer<uint> Output;
groupshared uint A;
groupshared uint B[32];

[numthreads(32, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	[loop]
	for (int i = 0; i < 4; i++)
	{
		// Store
		A = 10;
		B[thr] = 40;
		// Load
		Output[thr] = B[thr ^ 31];
		Output[thr] = B[thr ^ 15];
		// Atomics
		uint o;
		InterlockedAdd(B[thr], 1, o);
		InterlockedOr(B[thr], 1, o);
		InterlockedXor(B[thr], 1, o);
	}
}


================================================
FILE: shaders/auto-barrier/single-block.auto-group-shared-barrier.comp
================================================
RWStructuredBuffer<uint> Output;
groupshared uint A;
groupshared uint B[32];

[numthreads(32, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	// Store
	A = 10;
	B[thr] = 40;
	// Load
	Output[thr] = B[thr ^ 31];
	Output[thr] = B[thr ^ 15];
	// Atomics
	uint o;
	InterlockedAdd(B[thr], 1, o);
	InterlockedOr(B[thr], 1, o);
	InterlockedXor(B[thr], 1, o);
}


================================================
FILE: shaders/control-flow/branch-return-2.comp
================================================
RWByteAddressBuffer Buf : register(u0);

// Test two layers of complicated inlining.

uint func2(uint3 index)
{
	uint result = 0;
	[branch]
	if (index.x == 10)
	{
		[branch]
		if (index.z == 40)
			return index.x * index.y;
		result += index.x;
	}
	else if (index.y == 20)
	{
		[branch]
		if (index.z == 50)
			return index.x * (index.y + 4);
		result += index.z;
	}

	result += index.x;
	return result;
}

uint func(uint3 index)
{
	uint result = 0;
	[branch]
	if (index.x == 10)
	{
		[branch]
		if (index.z == 40)
			return index.x * index.y;
		result += func2(index);
	}
	else if (index.y == 20)
	{
		[branch]
		if (index.z == 50)
			return index.x * (index.y + 4);
		result += func2(index + 10);
	}

	result += index.x;

	return result;
}

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = func(index);
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/branch-return.comp
================================================
RWByteAddressBuffer Buf : register(u0);

uint func(uint3 index)
{
	uint result = 0;
	[branch]
	if (index.x == 10)
	{
		[branch]
		if (index.z == 40)
			return index.x * index.y;
		result += index.y;
	}
	else if (index.y == 20)
	{
		[branch]
		if (index.z == 50)
			return index.x * (index.y + 4);
		result += index.z;
	}

	result += index.x;

	return result;
}

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = func(index);
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/branch.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Single branch
	[branch]
	if (index.x == 10)
		result = 40;
	else
		result = 50;

	// 2-level branch
	[branch]
	if (index.x == 20)
	{
		[branch]
		if (index.y == 30)
			result += 20;
		else
			result *= result;
	}
	else
	{
		[branch]
		if (index.y == 40)
			result += 70;
		else
			result *= 10;
	}

	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/conditional-break-into-if-else-if-ladder-2.comp
================================================
RWStructuredBuffer<uint> RW : register(u0);

uint run_foo(uint3 id)
{
	[branch]
	if (id.x & 1)
	{
		[branch]
		if (id.x & 2)
			return 60;

		[branch]
		if (id.x & 4)
		{
			return 30;
		}
		else if (id.x & 8)
		{
			return 40;
		}
		else if (id.x & 16)
		{
			return 50;
		}
	}
	return 0;
}

[numthreads(1, 1, 1)]
void main(uint3 id : SV_DispatchThreadID)
{
	uint foo = run_foo(id);
	RW[0] = foo;
}


================================================
FILE: shaders/control-flow/conditional-break-into-if-else-if-ladder.comp
================================================
RWStructuredBuffer<uint> RW : register(u0);

uint run_foo(uint3 id)
{
	[branch]
	if (id.x & 1)
	{
		[branch]
		if (id.x & 2)
		{
			RW[2] = 20;
			return 60;
		}

		[branch]
		if (id.x & 4)
		{
			return 30;
		}
		else if (id.x & 8)
		{
			return 40;
		}
		else if (id.x & 16)
		{
			return 50;
		}
	}
	return 0;
}

[numthreads(1, 1, 1)]
void main(uint3 id : SV_DispatchThreadID)
{
	uint foo = run_foo(id);
	RW[0] = foo;
}


================================================
FILE: shaders/control-flow/dual-inner-loop-early-return.comp
================================================
uint func(uint3 dispatch)
{
	[loop]
	for (int i = 0; i < 10; i++)
	{
		[loop]
		for (int j = 0; j < 20; j++)
		{
			[branch]
			if (dispatch.y < 10)
				return 50;

			[branch]
			if (dispatch.z < 10)
				return 70;
			dispatch.y++;
		}

		dispatch.x++;
	}

	return 80;
}

RWStructuredBuffer<uint> buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 dispatch : SV_DispatchThreadID)
{
	buf[dispatch.x] = func(dispatch);
}


================================================
FILE: shaders/control-flow/if-else-if-into-continue.comp
================================================
RWStructuredBuffer<uint> RW : register(u0);

[numthreads(1,1,1)]
void main(uint3 d : SV_DispatchThreadID)
{
	[loop]
	for (uint i = 0; i < d.x; i++)
	{
		[branch]
		if (d.z < 10)
			RW[d.y] = 20;
		else if (d.y < 5)
			RW[d.z] = 40;

		d += 1;
	}
}


================================================
FILE: shaders/control-flow/inner-loop-early-return.comp
================================================
uint func(uint3 dispatch)
{
	[loop]
	for (int i = 0; i < 10; i++)
	{
		[loop]
		for (int j = 0; j < 20; j++)
		{
			[branch]
			if (dispatch.z < 10)
				return 70;
			dispatch.y++;
		}

		dispatch.x++;
	}

	return 80;
}

RWStructuredBuffer<uint> buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 dispatch : SV_DispatchThreadID)
{
	buf[dispatch.x] = func(dispatch);
}


================================================
FILE: shaders/control-flow/interleaved-unrolled-loop-breaks.comp
================================================
RWStructuredBuffer<uint> RW : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	uint v;
	uint w = 1;
	uint dummy = 0;

	[unroll]
	for (int i = 0; i < 4; i++)
	{
		InterlockedAdd(RW[0], w, v); w = v;

		[branch]
		if (w & 13)
		{
			InterlockedAdd(RW[0], w, v); w = v;
			dummy = 1;
			break;
		}

		[branch]
		if (w & 1)
		{
			[branch]
			if (w & 4)
				InterlockedOr(RW[0], w, v); w = v;
			dummy = 2;
			break;
		}

		[branch]
		if (w & 2)
		{
			InterlockedOr(RW[0], w, v); w = v;
			dummy = 3;
			break;
		}
	}

	InterlockedAdd(RW[0], w, v); w = v;
	InterlockedAdd(RW[0], dummy, v);
}


================================================
FILE: shaders/control-flow/loop-break-2.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Simple loop with a break.
	[loop]
	for (uint i = 0; i < index.z; i++)
	{
		uint val = Buf.Load(4 * i);
		if (val == 30)
		{
			// Do some work before breaking.
			result += Buf.Load(100);
			break;
		}
		result += val;
	}
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/loop-break.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Simple loop with a break.
	[loop]
	for (uint i = 0; i < index.z; i++)
	{
		uint val = Buf.Load(4 * i);
		if (val == 30)
			break;
		result += val;
	}
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/loop-continue-2.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;
	uint i = 0;

	while (i < index.x)
	{
		[branch]
		if (Buf.Load(4 * i) == 40)
		{
			[branch]
			if (i == 10)
			{
				i += index.y;
				continue;
			}
			result += Buf.Load(16 * i);
		}
		else
		{
			result += Buf.Load(12 * i);
		}
		result += Buf.Load(8 * i);
		i++;
	}

	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/loop-continue-3.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;
	uint i = 0;

	while (i < index.x)
	{
		[branch]
		if (Buf.Load(4 * i) == 40)
		{
			[branch]
			if (i == 10)
			{
				Buf.Store(256 * i, i);
				i *= index.y;
				continue;
			}
			result += Buf.Load(16 * i);
		}
		else
		{
			result += Buf.Load(12 * i);
		}
		result += Buf.Load(8 * i);
		i++;
	}

	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/loop-continue.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	for (uint i = 0; i < index.x; i++)
	{
		[branch]
		if (Buf.Load(4 * i) == 40)
		{
			[branch]
			if (i == 10)
			{
				continue;
			}
			result += Buf.Load(16 * i);
		}
		else
		{
			result += Buf.Load(12 * i);
		}
		result += Buf.Load(8 * i);
	}

	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/loop-inside-infinite-loop-2.frag
================================================
StructuredBuffer<uint> RO : register(t0);
RWStructuredBuffer<uint> RW : register(u0);

void inc(inout uint v)
{
	uint ov;
	InterlockedAdd(RW[0], v, v);
	v = ov;
}

float4 inner_main(uint4 v)
{
	for (;;)
	{
		inc(v.x);
		for (int i = 0; i < v.y; i++)
		{
			if (v.x & 3)
				return float4(v);
			if (v.x & 7)
				break;
			inc(v.x);
		}
		inc(v.x);
		v.x++;
	}
	return float4(v);
}

float4 main(uint4 v : V) : SV_Target
{
	float4 ov;
	[branch]
	if (v.x < 10)
	{
		ov = inner_main(v);
	}
	else
	{
		ov = inner_main(~v);
		//ov = 0.0.xxxx;
	}
	return ov;
}


================================================
FILE: shaders/control-flow/loop-inside-infinite-loop.frag
================================================
StructuredBuffer<uint> RO : register(t0);
RWStructuredBuffer<uint> RW : register(u0);

void inc(inout uint v)
{
	uint ov;
	InterlockedAdd(RW[0], v, v);
	v = ov;
}

float4 inner_main(uint4 v)
{
	for (;;)
	{
		inc(v.x);
		for (int i = 0; i < v.y; i++)
		{
			if (v.x & 3)
				return float4(v);
			if (v.x & 7)
				break;
			inc(v.x);
		}
		inc(v.x);
		v.x++;
	}
	return float4(v);
}

float4 main(uint4 v : V) : SV_Target
{
	float4 ov;
	[branch]
	if (v.x < 10)
	{
		ov = inner_main(v);
	}
	else
	{
		//ov = inner_main(~v);
		ov = 0.0.xxxx;
	}
	return ov;
}


================================================
FILE: shaders/control-flow/loop-return.comp
================================================
RWByteAddressBuffer Buf : register(u0);

uint func(uint3 index)
{
	uint result = 0;
	for (uint i = 0; i < index.x; i++)
	{
		for (uint j = 0; j < index.y; j++)
		{
			if (Buf.Load(4 * (i ^ j)) == 100)
			{
				result += Buf.Load(128);
				return result;
			}

			result += Buf.Load(4 * (i * j));
		}
	}

	return result;
}

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result;
	[branch]
	if (index.x < 100)
		result = func(index);
	else
		result = func(index.zyx);
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/loop.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Simple loop
	[loop]
	for (uint i = 0; i < index.z; i++)
		result += Buf.Load(4 * i);
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/nested-loop-break-2.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Two level loop, with one break.
	[loop]
	for (uint j = 0; j < index.y; j++)
	{
		[loop]
		for (uint k = 0; k < index.z; k++)
		{
			if (Buf.Load(k * 128) == 10)
			{
				result += Buf.Load(8);
				break;
			}

			result += Buf.Load(4 * (j ^ k));
		}
	}
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/nested-loop-break.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Three level loop, with two breaks.
	[loop]
	for (uint i = 0; i < index.x; i++)
	{
		[loop]
		for (uint j = 0; j < index.y; j++)
		{
			if (Buf.Load(j * 128) == 10)
			{
				result += Buf.Load(4);
				break;
			}

			[loop]
			for (uint k = 0; k < index.z; k++)
			{
				if (Buf.Load(k * 128) == 10)
				{
					result += Buf.Load(8);
					break;
				}

				result += Buf.Load(4 * (i ^ j ^ k));
			}
		}
	}
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/nested-loop.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint result = 0;

	// Three level loop.
	[loop]
	for (uint i = 0; i < index.x; i++)
	{
		[loop]
		for (uint j = 0; j < index.y; j++)
		{
			[loop]
			for (uint k = 0; k < index.z; k++)
			{
				result += Buf.Load(4 * (i ^ j ^ k));
			}
		}
	}
	Buf.Store(0, result);
}


================================================
FILE: shaders/control-flow/selection-merge-split-post-domination.frag
================================================
float4 main(float4 v : V) : SV_Target
{
	float4 res = 0.0.xxxx;
	[branch]
	if (v.x > 0.0)
	{
		res.x = 10.0;
	}

	[branch]
	if (v.y > 0.0)
	{
		res.y = 20.0;
		[branch]
		if (v.x > 1.0)
			res.y = 10.0;

		[branch]
		if (v.z > 0.0)
		{
			res.w = 20.0;
		}
	}

	res += 10.0;
	return res;
}


================================================
FILE: shaders/control-flow/switch-continue.frag
================================================
float main(int4 i : I) : SV_Target
{
	[loop]
	for (;;)
	{
		switch (i.x)
		{
			case 5:
				i.x++;
				i = i.yzwx;
				continue;

			case 7:
				i.x += 10;
				i = i.yzwx;
				continue;

			default:
				break;
		}

		i = i.yzwx;
		break;
	}

	return float(i.x + i.y);
}


================================================
FILE: shaders/control-flow/switch-merge-into-other-merge.comp
================================================
RWStructuredBuffer<uint> RW : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	uint ret = 0;
	[loop]
	for (uint i = 0; i < thr.y; i++)
	{
		[branch]
		if (RW[100] > 20)
		{
			ret = (ret * 3 + 4);
			RW[50] += ret * 80;
		}

		[branch]
		switch (thr.x)
		{
		case 0:
			RW[0] += ret;
			break;

		case 1:
			RW[40] += 2 * ret;
			break;

		case 5:
			RW[60] += 3 * ret;
			break;

		default:
			RW[100] += 4 * ret;
			break;
		}
		break;
	}
	RW[1] = ret;
}


================================================
FILE: shaders/control-flow/switch-shared-header-with-loop.comp
================================================
RWStructuredBuffer<uint> RW : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	uint ret = 0;
	[loop]
	for (uint i = 0; i < thr.y; i++)
	{
		[branch]
		switch (thr.x)
		{
		case 0:
			RW[0] += ret;
			break;

		case 1:
			RW[40] += 2 * ret;
			break;

		case 5:
			RW[60] += 3 * ret;
			break;

		default:
			RW[100] += 4 * ret;
			break;
		}
	}
	RW[1] = ret;
}


================================================
FILE: shaders/control-flow/wave-size-dependent-loop-unroll.comp
================================================
RWStructuredBuffer<float4> U : register(u0);

groupshared float4 foo[64];

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint count = 256 / WaveGetLaneCount();
	uint i;

	[unroll]
	for (i = 0; i < count; i++)
		foo[i] = float4(0, 1, 2, 3) + float(i) * 4.0;

	GroupMemoryBarrierWithGroupSync();

	for (i = 0; i < count; i++)
		U[thr] = foo[i];

}


================================================
FILE: shaders/descriptor_qa/acceleration-structure.bindless.descriptor-qa.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Plain : register(t3, space0);
RaytracingAccelerationStructure AS[] : register(t100, space1);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS[10], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS[NonUniformResourceIndex(int(p.color.w))], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/descriptor_qa/acceleration-structure.bindless.descriptor-qa.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Plain : register(t3, space0);
RaytracingAccelerationStructure AS[] : register(t100, space1);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS[10], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS[NonUniformResourceIndex(int(p.color.w))], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/descriptor_qa/acceleration-structure.bindless.ssbo-rtas.local-root-signature.descriptor-qa.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Plain : register(t3, space1);
RaytracingAccelerationStructure AS[] : register(t100, space1);
RaytracingAccelerationStructure AS_Local[] : register(t3, space15);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS[10], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS[NonUniformResourceIndex(int(p.color.w))], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Local[200], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/descriptor_qa/descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if (index > 2)
		value += c.v;
	if (index > 3)
		value += carr[index].v;
	if (index > 4)
		value += tex[int2(index, 0)];
	if (index > 5)
		value += texarr[index & 1][int2(0, index)];
	if (index > 6)
		value += typedbuf[index];
	if (index > 7)
		value += typedbufarr[index & 1][index];
	if (index > 8)
		value += buf[index];
	if (index > 9)
		value += bufarr[index & 1][index];
	if (index > 10)
		value += asfloat(rawbuf.Load(4 * index));
	if (index > 11)
		value += asfloat(rawbufarr[index & 1].Load(index));

	if (index > 1)
		rwbuf[index] = value;
	if (index > 30)
		rwbufarr[index & 1][index] = value;
	if (index > 40)
		rwrawbuf.Store(4 * index, asuint(value));
	if (index > 50)
		rwrawbufarr[index & 1].Store(4 * index, asuint(value));
	if (index > 80)
		rwtex[int2(index, 0)] = value;
	if (index > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

SamplerState samp : register(s0, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if (index > 2)
		value += c.v;
	if (index > 3)
		value += carr[index].v;
	if (index > 4)
		value += tex.SampleLevel(samp, float2(0.5, 0.5), 0.0);
	if (index > 5)
		value += texarr[index & 1][int2(0, index)];
	if (index > 6)
		value += typedbuf[index];
	if (index > 7)
		value += typedbufarr[index & 1][index];
	if (index > 8)
		value += buf[index];
	if (index > 9)
		value += bufarr[index & 1][index];
	if (index > 10)
		value += asfloat(rawbuf.Load(4 * index));
	if (index > 11)
		value += asfloat(rawbufarr[index & 1].Load(index));

	if (index > 1)
		rwbuf[index] = value;
	if (index > 30)
		rwbufarr[index & 1][index] = value;
	if (index > 40)
		rwrawbuf.Store(4 * index, asuint(value));
	if (index > 50)
		rwrawbufarr[index & 1].Store(4 * index, asuint(value));
	if (index > 80)
		rwtex[int2(index, 0)] = value;
	if (index > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.sm66.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

SamplerState samp : register(s0, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if ((index & 3423432) > 2)
		value += c.v;
	if ((index & 234234) > 3)
		value += carr[index].v;
	if ((index & 236234) > 4)
		value += tex.SampleLevel(samp, float2(0.5, 0.5), 0.0);
	if ((index & 34234) > 5)
		value += texarr[index & 1][int2(0, index)];
	if ((index & 234) > 6)
		value += typedbuf[index];
	if ((index & 22) > 7)
		value += typedbufarr[index & 1][index];
	if ((index & 9234234) > 8)
		value += buf[index];
	if ((index & 2342342) > 9)
		value += bufarr[index & 1][index];
	if ((index & 234234324) > 10)
		value += asfloat(rawbuf.Load(4 * index));
	if ((index & 234234233) > 11)
		value += asfloat(rawbufarr[index & 1].Load(index));

	if ((index & 234884) > 1)
		rwbuf[index] = value;
	if ((index & 9999) > 30)
		rwbufarr[index & 1][index] = value;
	if ((index & 8888) > 40)
		rwrawbuf.Store(4 * index, asuint(value));
	if ((index & 7777) > 50)
		rwrawbufarr[index & 1].Store(4 * index, asuint(value));
	if ((index & 5555) > 80)
		rwtex[int2(index, 0)] = value;
	if ((index & 34234) > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/descriptor_qa/descriptor_qa.bindless.ssbo.descriptor-qa.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if (index > 2)
		value += c.v;
	if (index > 3)
		value += carr[index].v;
	if (index > 4)
		value += tex[int2(index, 0)];
	if (index > 5)
		value += texarr[index & 1][int2(0, index)];
	if (index > 6)
		value += typedbuf[index];
	if (index > 7)
		value += typedbufarr[index & 1][index];
	if (index > 8)
		value += buf[index];
	if (index > 9)
		value += bufarr[index & 1][index];
	if (index > 10)
		value += asfloat(rawbuf.Load(4 * index));
	if (index > 11)
		value += asfloat(rawbufarr[index & 1].Load(index));

	if (index > 1)
		rwbuf[index] = value;
	if (index > 30)
		rwbufarr[index & 1][index] = value;
	if (index > 40)
		rwrawbuf.Store(4 * index, asuint(value));
	if (index > 50)
		rwrawbufarr[index & 1].Store(4 * index, asuint(value));
	if (index > 80)
		rwtex[int2(index, 0)] = value;
	if (index > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag
================================================
Texture2D<float4> T : register(t0);
SamplerState S : register(s0);

float4 main(float2 uv : UV) : SV_Target
{
	if (uv.x < 0.0)
		discard;
	return T.Sample(S, uv);
}


================================================
FILE: shaders/descriptor_qa/early-3.bindless.descriptor-qa.frag
================================================
Texture2D<float4> T : register(t0);
SamplerState S : register(s0);

struct Out { float4 col : SV_Target; float d : SV_Depth; };

Out main(float2 uv : UV)
{
	Out o;
	o.col = T.Sample(S, uv);
	o.d = 0.5;
	return o;
}


================================================
FILE: shaders/descriptor_qa/early-4.bindless.descriptor-qa.frag
================================================
Texture2D<float4> T : register(t0);
SamplerState S : register(s0);

struct Out { float4 col : SV_Target; uint d : SV_Coverage; };

Out main(float2 uv : UV)
{
	Out o;
	o.col = T.Sample(S, uv);
	o.d = 3;
	return o;
}


================================================
FILE: shaders/descriptor_qa/early-5.bindless.descriptor-qa.frag
================================================
Texture2D<float4> T : register(t0);
SamplerState S : register(s0);
RWTexture2D<float4> U : register(u0);

float4 main(float2 uv : UV) : SV_Target
{
	if (uv.x < 0.0)
		U[int2(uv)] = 2.0.xxxx;
	return T.Sample(S, uv);
}


================================================
FILE: shaders/descriptor_qa/early-heap.descriptor-qa.sm66.frag
================================================
float4 main(uint index : INDEX, float2 uv : UV) : SV_Target
{
	Texture2D<float4> T = ResourceDescriptorHeap[index];
	return T.Load(int3(uv, 0));
}


================================================
FILE: shaders/descriptor_qa/early.bindless.descriptor-qa.frag
================================================
Texture2D<float4> T : register(t0);
SamplerState S : register(s0);

float4 main(float2 uv : UV) : SV_Target
{
	return T.Sample(S, uv);
}


================================================
FILE: shaders/dxil-builtin/accept-hit-and-end-search-ignore-hit.rany
================================================
struct Payload
{
	float p;
};

struct Hit
{
	float2 attr;
};

[shader("anyhit")]
void RayAny(inout Payload payload, in Hit attr)
{
	if (attr.attr.x > 10.0)
	{
		payload.p = 10.0;
		AcceptHitAndEndSearch();
	}
	else
	{
		IgnoreHit();
	}
}


================================================
FILE: shaders/dxil-builtin/acos.frag
================================================
float main(float a : A) : SV_Target
{
	return acos(a);
}


================================================
FILE: shaders/dxil-builtin/asin.frag
================================================
float main(float a : A) : SV_Target
{
	return asin(a);
}


================================================
FILE: shaders/dxil-builtin/atan.frag
================================================
float main(float a : A) : SV_Target
{
	return atan(a);
}


================================================
FILE: shaders/dxil-builtin/atomic-bin-op.bindless.root-constant.frag
================================================
RWTexture2D<uint> RWTex2D[] : register(u2);

uint main(nointerpolation uint index : INDEX, nointerpolation uint3 coord : TEXCOORD) : SV_Target
{
	uint output;
	uint res = 0;
	InterlockedAdd(RWTex2D[NonUniformResourceIndex(index)][coord.xy], 3, output);
	res += output;

	return res;
}


================================================
FILE: shaders/dxil-builtin/atomic-bin-op.frag
================================================
RWTexture1D<uint> RWTex1D : register(u0);
RWTexture1DArray<uint> RWTex1DArray : register(u1);
RWTexture2D<uint> RWTex2D : register(u2);
RWTexture2DArray<uint> RWTex2DArray : register(u3);
RWTexture3D<uint> RWTex3D : register(u4);
RWBuffer<uint> RWBuf : register(u5);

RWTexture1D<int> RWTex1DSigned : register(u0, space1);
RWTexture2D<int> RWTex2DSigned : register(u2, space1);

struct Composite
{
	int a, b, c, d;
};

RWStructuredBuffer<Composite> RWStructured : register(u6);
RWByteAddressBuffer Raw : register(u7);

uint main(nointerpolation uint3 coord : TEXCOORD) : SV_Target
{
	uint res = 0;
	uint output;
	int output_signed;

	InterlockedAdd(RWTex1D[coord.x], 1, output);
	res += output;

	InterlockedAnd(RWTex1DArray[coord.xy], 2, output);
	res += output;

	InterlockedExchange(RWTex2D[coord.xy], 3, output);
	res += output;

	InterlockedMax(RWTex2DArray[coord.xyz], 4, output);
	res += output;

	InterlockedMin(RWTex3D[coord.xyz], 5, output);
	res += output;

	InterlockedOr(RWBuf[coord.x], 6, output);
	res += output;

	InterlockedXor(RWBuf[coord.x], 7, output);
	res += output;

	InterlockedMin(RWTex1DSigned[coord.x], 8, output_signed);
	res += output_signed;

	InterlockedMax(RWTex2DSigned[coord.xy], 9, output_signed);
	res += output_signed;

	InterlockedAdd(RWStructured[coord.x].c, 10, output_signed);
	res += output_signed;

	Raw.InterlockedMax(coord.x * 4, 12, output_signed);
	res += output_signed;

	return res;
}


================================================
FILE: shaders/dxil-builtin/atomic-bin-op.root-descriptor.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint res;
	Buf.InterlockedAdd(thr * 16, 1, res);
	Buf.InterlockedAdd(thr * 16 + 4, 2, res);
	Buf.InterlockedAdd(thr * 16 + 2, 3, res);
}


================================================
FILE: shaders/dxil-builtin/atomic-bin-op.ssbo.frag
================================================
RWTexture1D<uint> RWTex1D : register(u0);
RWTexture1DArray<uint> RWTex1DArray : register(u1);
RWTexture2D<uint> RWTex2D : register(u2);
RWTexture2DArray<uint> RWTex2DArray : register(u3);
RWTexture3D<uint> RWTex3D : register(u4);
RWBuffer<uint> RWBuf : register(u5);

RWTexture1D<int> RWTex1DSigned : register(u0, space1);
RWTexture2D<int> RWTex2DSigned : register(u2, space1);

struct Composite
{
	int a, b, c, d;
};

RWStructuredBuffer<Composite> RWStructured : register(u6);
RWByteAddressBuffer Raw : register(u7);

uint main(nointerpolation uint3 coord : TEXCOORD) : SV_Target
{
	uint res = 0;
	uint output;
	int output_signed;

	InterlockedAdd(RWTex1D[coord.x], 1, output);
	res += output;

	InterlockedAnd(RWTex1DArray[coord.xy], 2, output);
	res += output;

	InterlockedExchange(RWTex2D[coord.xy], 3, output);
	res += output;

	InterlockedMax(RWTex2DArray[coord.xyz], 4, output);
	res += output;

	InterlockedMin(RWTex3D[coord.xyz], 5, output);
	res += output;

	InterlockedOr(RWBuf[coord.x], 6, output);
	res += output;

	InterlockedXor(RWBuf[coord.x], 7, output);
	res += output;

	InterlockedMin(RWTex1DSigned[coord.x], 8, output_signed);
	res += output_signed;

	InterlockedMax(RWTex2DSigned[coord.xy], 9, output_signed);
	res += output_signed;

	InterlockedAdd(RWStructured[coord.x].c, 10, output_signed);
	res += output_signed;

	Raw.InterlockedMax(coord.x * 4, 12, output_signed);
	res += output_signed;

	return res;
}


================================================
FILE: shaders/dxil-builtin/atomic-compare-exchange.frag
================================================
RWTexture1D<uint> RWTex1D : register(u0);
RWTexture1DArray<uint> RWTex1DArray : register(u1);
RWTexture2D<uint> RWTex2D : register(u2);
RWTexture2DArray<uint> RWTex2DArray : register(u3);
RWTexture3D<uint> RWTex3D : register(u4);
RWBuffer<uint> RWBuf : register(u5);

RWTexture1D<int> RWTex1DSigned : register(u0, space1);
RWTexture2D<int> RWTex2DSigned : register(u2, space1);

struct Composite
{
	int a, b, c, d;
};

RWStructuredBuffer<Composite> RWStructured : register(u6);
RWByteAddressBuffer Raw : register(u7);

uint main(nointerpolation uint3 coord : TEXCOORD) : SV_Target
{
	uint res = 0;
	uint compare_value = 20;
	uint value = 30;
	uint output;

	InterlockedCompareExchange(RWTex1D[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex1DArray[coord.xy], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex2D[coord.xy], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex2DArray[coord.xyz], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex3D[coord.xyz], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWBuf[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWBuf[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex1DSigned[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex2DSigned[coord.xy], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWStructured[coord.x].c, compare_value, value, output);
	res += output;

	Raw.InterlockedCompareExchange(coord.x * 4, compare_value, value, output);
	res += output;

	return res;
}


================================================
FILE: shaders/dxil-builtin/atomic-compare-exchange.root-descriptor.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint res;
	Buf.InterlockedCompareExchange(thr * 4, 1, 2, res);
}


================================================
FILE: shaders/dxil-builtin/atomic-compare-exchange.ssbo.frag
================================================
RWTexture1D<uint> RWTex1D : register(u0);
RWTexture1DArray<uint> RWTex1DArray : register(u1);
RWTexture2D<uint> RWTex2D : register(u2);
RWTexture2DArray<uint> RWTex2DArray : register(u3);
RWTexture3D<uint> RWTex3D : register(u4);
RWBuffer<uint> RWBuf : register(u5);

RWTexture1D<int> RWTex1DSigned : register(u0, space1);
RWTexture2D<int> RWTex2DSigned : register(u2, space1);

struct Composite
{
	int a, b, c, d;
};

RWStructuredBuffer<Composite> RWStructured : register(u6);
RWByteAddressBuffer Raw : register(u7);

uint main(nointerpolation uint3 coord : TEXCOORD) : SV_Target
{
	uint res = 0;
	uint compare_value = 20;
	uint value = 30;
	uint output;

	InterlockedCompareExchange(RWTex1D[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex1DArray[coord.xy], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex2D[coord.xy], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex2DArray[coord.xyz], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex3D[coord.xyz], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWBuf[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWBuf[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex1DSigned[coord.x], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWTex2DSigned[coord.xy], compare_value, value, output);
	res += output;

	InterlockedCompareExchange(RWStructured[coord.x].c, compare_value, value, output);
	res += output;

	Raw.InterlockedCompareExchange(coord.x * 4, compare_value, value, output);
	res += output;

	return res;
}


================================================
FILE: shaders/dxil-builtin/attributes.denorm-ftz.comp
================================================
StructuredBuffer<float> A;
RWStructuredBuffer<float> B;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	B[thr] = 2.0 * A[thr];
}


================================================
FILE: shaders/dxil-builtin/attributes.denorm-preserve.comp
================================================
StructuredBuffer<float> A;
RWStructuredBuffer<float> B;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	B[thr] = 2.0 * A[thr];
}


================================================
FILE: shaders/dxil-builtin/barrier.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(7, 8, 9)]
void main()
{
	Buf.Store(0, 5u);
	GroupMemoryBarrier();
	Buf.Store(0, 10u);
	AllMemoryBarrier();
	Buf.Store(0, 15u);
	GroupMemoryBarrierWithGroupSync();
	Buf.Store(0, 20u);
	AllMemoryBarrierWithGroupSync();
	Buf.Store(0, 30u);
	DeviceMemoryBarrier();
	Buf.Store(0, 40u);
	DeviceMemoryBarrierWithGroupSync();
}


================================================
FILE: shaders/dxil-builtin/barycentrics-2.frag
================================================
float main(
		nointerpolation float attrib0 : ATTRIB,
		nointerpolation float attrib1 : ATTRIB1,
		nointerpolation float attrib2 : ATTRIB2,
		sample float3 bary : SV_Barycentrics,
		centroid float foo : FOO) : SV_Target
{
	float result = 0.0;
	result += GetAttributeAtVertex(attrib0, 0) * bary.x;
	result += GetAttributeAtVertex(attrib1, 1) * bary.y;
	result += GetAttributeAtVertex(attrib2, 2) * bary.z;
	result += foo;
	return result;
}


================================================
FILE: shaders/dxil-builtin/barycentrics.frag
================================================
float4 main(
		nointerpolation float4 attrib : ATTRIB, nointerpolation float4 attrib2[2] : ATTRIB2,
		float3 bary : SV_Barycentrics, centroid noperspective float3 bary2 : SV_Barycentrics1, nointerpolation uint index : INDEX) : SV_Target
{
	float4 result = float4(bary.xy, bary2.yx);
	result += GetAttributeAtVertex(attrib, 0) * bary.x;
	result += GetAttributeAtVertex(attrib, 1) * bary.y;
	result += GetAttributeAtVertex(attrib2[index], 0) * bary.z;
	result += GetAttributeAtVertex(attrib2[0], 1) * bary2.x;
	result += GetAttributeAtVertex(attrib2[0], 2) * bary2.y;
	result += GetAttributeAtVertex(attrib2[1], 0) * bary2.x;
	result += GetAttributeAtVertex(attrib2[1], 1) * bary2.y;
	result += attrib;
	return result;
}


================================================
FILE: shaders/dxil-builtin/bfrev.frag
================================================
uint main(uint a : A) : SV_Target
{
	return reversebits(a);
}


================================================
FILE: shaders/dxil-builtin/bitcount-bitrev-sizes.ssbo.comp
================================================
RWStructuredBuffer<uint64_t> A;
RWStructuredBuffer<uint16_t> B;

[numthreads(1, 1, 1)]
void main()
{
	A[0] = reversebits(A[0]);
	B[0] = reversebits(B[0]);
	A[1] = countbits(A[1]);
	B[1] = uint16_t(countbits(B[1]));
}


================================================
FILE: shaders/dxil-builtin/buffer-load-feedback.frag
================================================
ByteAddressBuffer RawBuf : register(t1);
RWByteAddressBuffer RWRawBuf : register(u1);

StructuredBuffer<float2> StructuredBuf : register(t2);
RWStructuredBuffer<float2> RWStructuredBuf : register(u2);

struct Composite
{
	float3 a;
	float3 b;
};

StructuredBuffer<Composite> StructuredBufComposite : register(t3);
RWStructuredBuffer<Composite> RWStructuredBufComposite : register(u3);

float2 main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	res += asfloat(RawBuf.Load2(index * 8, feedback));
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += asfloat(RWRawBuf.Load2(index * 8, feedback));
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += StructuredBuf.Load(index, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += RWStructuredBuf.Load(index, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	return res;
}


================================================
FILE: shaders/dxil-builtin/buffer-load-signed-feedback.frag
================================================
Buffer<int2> TypedBuf : register(t0);
RWBuffer<int2> RWTypedBuf : register(u0);

int2 main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	int2 res = int2(0, 0);
	uint feedback;
	res += TypedBuf.Load(index, feedback).xy;
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += RWTypedBuf.Load(index, feedback).xy;
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	return res;
}


================================================
FILE: shaders/dxil-builtin/buffer-load-signed.frag
================================================
Buffer<int2> TypedBuf : register(t0);
RWBuffer<int2> RWTypedBuf : register(u0);

int2 main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	int2 res = int2(0, 0);
	res += TypedBuf.Load(index).xy;
	res += RWTypedBuf.Load(index).xy;
	return res;
}


================================================
FILE: shaders/dxil-builtin/buffer-load.frag
================================================
Buffer<float2> TypedBuf : register(t0);
RWBuffer<float2> RWTypedBuf : register(u0);

ByteAddressBuffer RawBuf : register(t1);
RWByteAddressBuffer RWRawBuf : register(u1);

StructuredBuffer<float2> StructuredBuf : register(t2);
RWStructuredBuffer<float2> RWStructuredBuf : register(u2);

struct Composite
{
	float3 a;
	float3 b;
};

StructuredBuffer<Composite> StructuredBufComposite : register(t3);
RWStructuredBuffer<Composite> RWStructuredBufComposite : register(u3);

float2 main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += TypedBuf.Load(index).xy;
	res += RWTypedBuf.Load(index).xy;
	res += asfloat(RawBuf.Load2(index * 8));
	res += asfloat(RWRawBuf.Load2(index * 8));
	res += StructuredBuf[index];
	res += RWStructuredBuf[index];
	res += StructuredBufComposite[index].a.yz;
	res += RWStructuredBufComposite[index].b.yz;
	return res;
}


================================================
FILE: shaders/dxil-builtin/buffer-load.ssbo.frag
================================================
Buffer<float2> TypedBuf : register(t0);
RWBuffer<float2> RWTypedBuf : register(u0);

ByteAddressBuffer RawBuf : register(t1);
RWByteAddressBuffer RWRawBuf : register(u1);

StructuredBuffer<float2> StructuredBuf : register(t2);
RWStructuredBuffer<float2> RWStructuredBuf : register(u2);

struct Composite
{
	float3 a;
	float3 b;
};

StructuredBuffer<Composite> StructuredBufComposite : register(t3);
RWStructuredBuffer<Composite> RWStructuredBufComposite : register(u3);

float2 main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += TypedBuf.Load(index).xy;
	res += RWTypedBuf.Load(index).xy;
	res += asfloat(RawBuf.Load2(index * 8));
	res += asfloat(RWRawBuf.Load2(index * 8));
	res += StructuredBuf[index];
	res += RWStructuredBuf[index];
	res += StructuredBufComposite[index].a.yz;
	res += RWStructuredBufComposite[index].b.yz;
	return res;
}


================================================
FILE: shaders/dxil-builtin/buffer-store-signed.frag
================================================
RWBuffer<int2> RWBuf : register(u0);

void main(nointerpolation uint index : INDEX, nointerpolation int2 data : DATA)
{
	RWBuf[index] = data;
}


================================================
FILE: shaders/dxil-builtin/buffer-store.frag
================================================
RWBuffer<float2> RWBuf : register(u0);
RWByteAddressBuffer RWRawBuf : register(u1);
RWStructuredBuffer<float2> RWStructuredBuf : register(u2);

struct Composite
{
	float3 a;
	float2 b;
};
RWStructuredBuffer<Composite> RWStructuredBufComposite : register(u3);

void main(nointerpolation uint index : INDEX, nointerpolation float2 data : DATA)
{
	RWBuf[index] = data;
	RWRawBuf.Store2(8 * index, asuint(data));
	RWStructuredBuf[index] = data;
	RWStructuredBufComposite[index].b = data;
}


================================================
FILE: shaders/dxil-builtin/buffer-store.ssbo.frag
================================================
RWBuffer<float2> RWBuf : register(u0);
RWByteAddressBuffer RWRawBuf : register(u1);
RWStructuredBuffer<float2> RWStructuredBuf : register(u2);

struct Composite
{
	float3 a;
	float2 b;
};
RWStructuredBuffer<Composite> RWStructuredBufComposite : register(u3);

void main(nointerpolation uint index : INDEX, nointerpolation float2 data : DATA)
{
	RWBuf[index] = data;
	RWRawBuf.Store2(8 * index, asuint(data));
	RWStructuredBuf[index] = data;
	RWStructuredBufComposite[index].b = data;
}


================================================
FILE: shaders/dxil-builtin/buffer-update-counter.frag
================================================
RWStructuredBuffer<float2> RWStructuredBuf : register(u0);
RWStructuredBuffer<float2> RWStructuredBuf2 : register(u1);

uint main() : SV_Target
{
	uint v = RWStructuredBuf.IncrementCounter();
	v += RWStructuredBuf2.DecrementCounter();
	return v;
}


================================================
FILE: shaders/dxil-builtin/calculate-lod.frag
================================================
Texture1D<float> Tex1D : register(t0);
Texture1DArray<float> Tex1DArray : register(t1);
Texture2D<float> Tex2D : register(t2);
Texture2DArray<float> Tex2DArray : register(t3);
Texture3D<float> Tex3D : register(t4);
TextureCube<float> TexCube : register(t5);
TextureCubeArray<float> TexCubeArray : register(t6);

SamplerState Samp : register(s0);

float2 main(float3 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res.x += Tex1D.CalculateLevelOfDetail(Samp, UV.x);
	res.x += Tex1DArray.CalculateLevelOfDetail(Samp, UV.x);
	res.x += Tex2D.CalculateLevelOfDetail(Samp, UV.xy);
	res.x += Tex2DArray.CalculateLevelOfDetail(Samp, UV.xy);
	res.x += Tex3D.CalculateLevelOfDetail(Samp, UV.xyz);
	res.x += TexCube.CalculateLevelOfDetail(Samp, UV.xyz);
	res.x += TexCubeArray.CalculateLevelOfDetail(Samp, UV.xyz);

	res.y += Tex1D.CalculateLevelOfDetailUnclamped(Samp, UV.x);
	res.y += Tex1DArray.CalculateLevelOfDetailUnclamped(Samp, UV.x);
	res.y += Tex2D.CalculateLevelOfDetailUnclamped(Samp, UV.xy);
	res.y += Tex2DArray.CalculateLevelOfDetailUnclamped(Samp, UV.xy);
	res.y += Tex3D.CalculateLevelOfDetailUnclamped(Samp, UV.xyz);
	res.y += TexCube.CalculateLevelOfDetailUnclamped(Samp, UV.xyz);
	res.y += TexCubeArray.CalculateLevelOfDetailUnclamped(Samp, UV.xyz);

	return res;
}


================================================
FILE: shaders/dxil-builtin/call-shader.rgen
================================================
struct Payload0
{
	float4 color;
};

struct Payload1
{
	float4 color;
};

struct Payload2
{
	float4 color;
};

struct Payload3
{
	float4 color;
};

RWTexture2D<float4> IMG : register(u0);

[shader("raygeneration")]
void RayGen()
{
	Payload0 p0;
	Payload1 p1;
	Payload2 p2;
	Payload3 p3;
	CallShader(0, p0);
	CallShader(1, p1);
	CallShader(2, p2);
	CallShader(3, p3);

	IMG[DispatchRaysIndex().xy] = p0.color + p1.color + p2.color + p3.color;
}


================================================
FILE: shaders/dxil-builtin/clip.demote-to-helper.frag
================================================
void main(float2 UV : TEXCOORD)
{
	clip(UV.x - 10.0);
	clip(UV.y - 20.0);
}


================================================
FILE: shaders/dxil-builtin/clip.frag
================================================
void main(float2 UV : TEXCOORD)
{
	clip(UV.x - 10.0);
	clip(UV.y - 20.0);
}


================================================
FILE: shaders/dxil-builtin/compute-shader-derivatives-cube-array.noderivs.sm66.ssbo.comp
================================================
Texture2D<float> T2D;
Texture2DArray<float> T2DArr;
TextureCube<float> TCube;
TextureCubeArray<float> TCubeArr;
Texture3D<float> T3D;

RWStructuredBuffer<float> O;

SamplerState S;
SamplerComparisonState SComp;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float4 uv = float4(0.1, 0.2, 0.3, 0.4) * float(thr);
	O[thr] += TCubeArr.CalculateLevelOfDetail(S, uv.xyz);
}


================================================
FILE: shaders/dxil-builtin/compute-shader-derivatives-cube.noderivs.sm66.ssbo.comp
================================================
Texture2D<float> T2D;
Texture2DArray<float> T2DArr;
TextureCube<float> TCube;
TextureCubeArray<float> TCubeArr;
Texture3D<float> T3D;

RWStructuredBuffer<float> O;

SamplerState S;
SamplerComparisonState SComp;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float4 uv = float4(0.1, 0.2, 0.3, 0.4) * float(thr);
	O[thr] += TCube.CalculateLevelOfDetail(S, uv.xyz);
}


================================================
FILE: shaders/dxil-builtin/compute-shader-derivatives-single-thread.sm66.ssbo.comp
================================================
Texture2D<float> T2D;
Texture2DArray<float> T2DArr;
TextureCube<float> TCube;
TextureCubeArray<float> TCubeArr;
Texture3D<float> T3D;

RWStructuredBuffer<float> O;

SamplerState S;
SamplerComparisonState SComp;

// DXC is robust against this, even if it is non-sensical.

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float4 uv = float4(0.1, 0.2, 0.3, 0.4) * float(thr);

	O[thr] += T2D.Sample(S, uv.xy);
	O[thr] += T2DArr.Sample(S, uv.xyz);
	O[thr] += TCube.Sample(S, uv.xyz);
	O[thr] += TCubeArr.Sample(S, uv);
	O[thr] += T3D.Sample(S, uv.xyz);

	// Delimiter to make things a bit easier to read.
	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.SampleBias(S, uv.xy, uv.w);
	O[thr] += T2DArr.SampleBias(S, uv.xyz, uv.w);
	O[thr] += TCube.SampleBias(S, uv.xyz, uv.w);
	O[thr] += TCubeArr.SampleBias(S, uv, uv.w);
	O[thr] += T3D.SampleBias(S, uv.xyz, uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.SampleCmp(SComp, uv.xy, uv.w);
	O[thr] += T2DArr.SampleCmp(SComp, uv.xyz, uv.w);
	O[thr] += TCube.SampleCmp(SComp, uv.xyz, uv.w);
	O[thr] += TCubeArr.SampleCmp(SComp, uv, uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += ddx(uv.w);
	O[thr] += ddy(uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += ddx_coarse(uv.w);
	O[thr] += ddy_coarse(uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += ddx_fine(uv.w);
	O[thr] += ddy_fine(uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.CalculateLevelOfDetail(S, uv.xy);
	O[thr] += T2DArr.CalculateLevelOfDetail(S, uv.xy);
	// Cube derivatives in separate test
	O[thr] += T3D.CalculateLevelOfDetail(S, uv.xyz);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.CalculateLevelOfDetailUnclamped(S, uv.xy);
	O[thr] += T2DArr.CalculateLevelOfDetailUnclamped(S, uv.xy);
	O[thr] += T3D.CalculateLevelOfDetailUnclamped(S, uv.xyz);
}


================================================
FILE: shaders/dxil-builtin/compute-shader-derivatives.noderivs.sm66.ssbo.comp
================================================
Texture2D<float> T2D;
Texture2DArray<float> T2DArr;
TextureCube<float> TCube;
TextureCubeArray<float> TCubeArr;
Texture3D<float> T3D;

RWStructuredBuffer<float> O;

SamplerState S;
SamplerComparisonState SComp;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float4 uv = float4(0.1, 0.2, 0.3, 0.4) * float(thr);

	O[thr] += T2D.Sample(S, uv.xy);
	O[thr] += T2DArr.Sample(S, uv.xyz);
	O[thr] += TCube.Sample(S, uv.xyz);
	O[thr] += TCubeArr.Sample(S, uv);
	O[thr] += T3D.Sample(S, uv.xyz);

	// Delimiter to make things a bit easier to read.
	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.SampleBias(S, uv.xy, uv.w);
	O[thr] += T2DArr.SampleBias(S, uv.xyz, uv.w);
	O[thr] += TCube.SampleBias(S, uv.xyz, uv.w);
	O[thr] += TCubeArr.SampleBias(S, uv, uv.w);
	O[thr] += T3D.SampleBias(S, uv.xyz, uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.SampleCmp(SComp, uv.xy, uv.w);
	O[thr] += T2DArr.SampleCmp(SComp, uv.xyz, uv.w);
	O[thr] += TCube.SampleCmp(SComp, uv.xyz, uv.w);
	O[thr] += TCubeArr.SampleCmp(SComp, uv, uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += ddx(uv.w);
	O[thr] += ddy(uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += ddx_coarse(uv.w);
	O[thr] += ddy_coarse(uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += ddx_fine(uv.w);
	O[thr] += ddy_fine(uv.w);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.CalculateLevelOfDetail(S, uv.xy);
	O[thr] += T2DArr.CalculateLevelOfDetail(S, uv.xy);
	// Cube derivatives in separate test
	O[thr] += T3D.CalculateLevelOfDetail(S, uv.xyz);

	DeviceMemoryBarrierWithGroupSync();

	O[thr] += T2D.CalculateLevelOfDetailUnclamped(S, uv.xy);
	O[thr] += T2DArr.CalculateLevelOfDetailUnclamped(S, uv.xy);
	O[thr] += T3D.CalculateLevelOfDetailUnclamped(S, uv.xyz);
}


================================================
FILE: shaders/dxil-builtin/cos.frag
================================================
float main(float a : A) : SV_Target
{
	return cos(a);
}


================================================
FILE: shaders/dxil-builtin/countbits.frag
================================================
uint main(uint a : A) : SV_Target
{
	return countbits(a);
}


================================================
FILE: shaders/dxil-builtin/coverage.frag
================================================
uint main(uint index : SV_Coverage) : SV_Target
{
	return index;
}


================================================
FILE: shaders/dxil-builtin/derivative.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	float4 r = 0.0.xxxx;
	r.xy += ddx_coarse(UV);
	r.zw += ddy_coarse(UV);
	r.xy += ddx_fine(UV);
	r.zw += ddy_fine(UV);

	half2 UV_h = half2(UV);
	r.xy += float2(ddx_coarse(UV_h));
	r.zw += float2(ddy_coarse(UV_h));
	r.xy += float2(ddx_fine(UV_h));
	r.zw += float2(ddy_fine(UV_h));

	double2 UV_d = double2(UV);
	r.xy += double2(ddx_coarse(UV_d));
	r.zw += double2(ddy_coarse(UV_d));
	r.xy += double2(ddx_fine(UV_d));
	r.zw += double2(ddy_fine(UV_d));

	return r;
}


================================================
FILE: shaders/dxil-builtin/derivative.sm60.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	float4 r = 0.0.xxxx;
	r.xy += ddx_coarse(UV);
	r.zw += ddy_coarse(UV);
	r.xy += ddx_fine(UV);
	r.zw += ddy_fine(UV);

	min16float2 UV_h = min16float2(UV);
	r.xy += float2(ddx_coarse(UV_h));
	r.zw += float2(ddy_coarse(UV_h));
	r.xy += float2(ddx_fine(UV_h));
	r.zw += float2(ddy_fine(UV_h));

	return r;
}


================================================
FILE: shaders/dxil-builtin/derivative.sm60.native-fp16.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	float4 r = 0.0.xxxx;
	r.xy += ddx_coarse(UV);
	r.zw += ddy_coarse(UV);
	r.xy += ddx_fine(UV);
	r.zw += ddy_fine(UV);

	min16float2 UV_h = min16float2(UV);
	r.xy += float2(ddx_coarse(UV_h));
	r.zw += float2(ddy_coarse(UV_h));
	r.xy += float2(ddx_fine(UV_h));
	r.zw += float2(ddy_fine(UV_h));

	return r;
}


================================================
FILE: shaders/dxil-builtin/derivatives.sm66.comp
================================================
RWTexture3D<float4> RW;
SamplerState S;
Texture3D<float4> TEX;

[numthreads(4, 4, 4)]
void main(uint3 thr : SV_DispatchThreadID, uint3 gthr : SV_GroupThreadID)
{
	float3 uvw = (float3(thr) + 0.5) / 64.0;
	float4 result = TEX.Sample(S, uvw);
	RW[thr] = result;
}


================================================
FILE: shaders/dxil-builtin/discard.demote-to-helper.frag
================================================
void main(float2 UV : TEXCOORD)
{
	if (UV.x > 10.0)
		discard;
	else if (UV.y > 20.0)
		discard;
}


================================================
FILE: shaders/dxil-builtin/discard.frag
================================================
void main(float2 UV : TEXCOORD)
{
	if (UV.x > 10.0)
		discard;
	else if (UV.y > 20.0)
		discard;
}


================================================
FILE: shaders/dxil-builtin/dispatch-rays-dimensions.rgen
================================================
RWTexture2D<uint3> UTex : register(u0);

[shader("raygeneration")]
void RayGen()
{
	UTex[DispatchRaysDimensions().xy] = uint3(1, 2, 3);
}


================================================
FILE: shaders/dxil-builtin/dispatch-rays-index.rgen
================================================
RWTexture2D<uint3> UTex : register(u0);

[shader("raygeneration")]
void RayGen()
{
	UTex[DispatchRaysIndex().xy] = uint3(1, 2, 3);
}


================================================
FILE: shaders/dxil-builtin/dot2.frag
================================================
float main(float2 a : A, float2 b : B) : SV_Target
{
	return dot(a, b);
}


================================================
FILE: shaders/dxil-builtin/dot3.frag
================================================
float main(float3 a : A, float3 b : B) : SV_Target
{
	return dot(a, b);
}


================================================
FILE: shaders/dxil-builtin/dot4.frag
================================================
float main(float4 a : A, float4 b : B) : SV_Target
{
	return dot(a, b);
}


================================================
FILE: shaders/dxil-builtin/eval-centroid.frag
================================================
float2 main(float2 UV[2] : TEXCOORD, float uv0 : UV0, float uv1[2] : UV1, uint a : A, uint b : B) : SV_Target
{
	float2 res0 = EvaluateAttributeCentroid(UV[a]);
	float2 res1 = EvaluateAttributeCentroid(UV[b]);
	return res0 + res1 + EvaluateAttributeCentroid(uv0) + EvaluateAttributeCentroid(uv1[a]);
}


================================================
FILE: shaders/dxil-builtin/eval-sample-index.frag
================================================
float2 main(float2 UV : TEXCOORD, nointerpolation int sample : SAMPLE) : SV_Target
{
	float2 res = EvaluateAttributeAtSample(UV, 1);
	res += EvaluateAttributeAtSample(UV, sample);
	return res;
}


================================================
FILE: shaders/dxil-builtin/eval-snapped.frag
================================================
float2 main(float2 UV : TEXCOORD, nointerpolation int2 code : CODE) : SV_Target
{
	float2 res = EvaluateAttributeSnapped(UV, int2(-7, 4));
	res += EvaluateAttributeSnapped(UV, code);
	return res;
}


================================================
FILE: shaders/dxil-builtin/exp.frag
================================================
float main(float a : A) : SV_Target
{
	return exp2(a);
}


================================================
FILE: shaders/dxil-builtin/f16-to-f32.frag
================================================
float main(nointerpolation uint a : A) : SV_Target
{
	return f16tof32(a);
}


================================================
FILE: shaders/dxil-builtin/f32-to-f16.frag
================================================
uint main(nointerpolation float a : A) : SV_Target
{
	return f32tof16(a);
}


================================================
FILE: shaders/dxil-builtin/fabs.frag
================================================
float main(float a : A) : SV_Target
{
	return abs(a);
}


================================================
FILE: shaders/dxil-builtin/firstbithi-16.sm62.frag
================================================
uint main(uint16_t a : A) : SV_Target
{
	return firstbithigh(a);
}


================================================
FILE: shaders/dxil-builtin/firstbithi-64.frag
================================================
uint main(uint a : A, uint b : B) : SV_Target
{
	uint64_t c = a | (uint64_t(b) << 32);
	return firstbithigh(c);
}


================================================
FILE: shaders/dxil-builtin/firstbithi.frag
================================================
uint main(uint a : A) : SV_Target
{
	return firstbithigh(a);
}


================================================
FILE: shaders/dxil-builtin/firstbitlo-16.sm62.frag
================================================
uint main(uint16_t a : A) : SV_Target
{
	return firstbitlow(a);
}


================================================
FILE: shaders/dxil-builtin/firstbitlo-64.frag
================================================
uint main(uint a : A, uint b : B) : SV_Target
{
	uint64_t c = a | (uint64_t(b) << 32);
	return firstbitlow(c);
}


================================================
FILE: shaders/dxil-builtin/firstbitlo.frag
================================================
uint main(uint a : A) : SV_Target
{
	return firstbitlow(a);
}


================================================
FILE: shaders/dxil-builtin/firstbitshi-16.sm62.frag
================================================
int main(int16_t a : A) : SV_Target
{
	return firstbithigh(a);
}


================================================
FILE: shaders/dxil-builtin/firstbitshi-64.frag
================================================
int main(uint a : A, uint b : B) : SV_Target
{
	int64_t c = int64_t((uint64_t(b) << 32) | a);
	return firstbithigh(c);
}


================================================
FILE: shaders/dxil-builtin/firstbitshi.frag
================================================
int main(int a : A) : SV_Target
{
	return firstbithigh(a);
}


================================================
FILE: shaders/dxil-builtin/flattened_thread_id_in_group.comp
================================================
RWStructuredBuffer<float> RWBuf : register(u0);

[numthreads(2, 2, 2)]
void main(uint ThreadID : SV_GroupIndex)
{
	RWBuf[ThreadID] = 10.0;
}


================================================
FILE: shaders/dxil-builtin/fma.frag
================================================
float main(float3 a : A) : SV_Target
{
	double3 d = a;
	return float(fma(d.x, d.y, d.z));
}


================================================
FILE: shaders/dxil-builtin/fmad-precise.frag
================================================
float main(float3 a : A) : SV_Target
{
	precise float res = mad(a.x, a.y, a.z);
	return res;
}


================================================
FILE: shaders/dxil-builtin/fmad.frag
================================================
float main(float3 a : A) : SV_Target
{
	return mad(a.x, a.y, a.z);
}


================================================
FILE: shaders/dxil-builtin/fmax.frag
================================================
float main(float2 a : A) : SV_Target
{
	return max(a.x, a.y);
}


================================================
FILE: shaders/dxil-builtin/fmin.frag
================================================
float main(float2 a : A) : SV_Target
{
	return min(a.x, a.y);
}


================================================
FILE: shaders/dxil-builtin/frc.frag
================================================
float main(float a : A) : SV_Target
{
	return frac(a);
}


================================================
FILE: shaders/dxil-builtin/get-dimensions-w-only.frag
================================================
Texture1D<float4> Tex1D : register(t0);
Texture1DArray<float4> Tex1DArray : register(t1);
Texture2D<float4> Tex2D : register(t2);
Texture2DArray<float4> Tex2DArray : register(t3);
Texture2DMS<float4> Tex2DMS : register(t4);
Texture2DMSArray<float4> Tex2DMSArray : register(t5);
Texture3D<float4> Tex3D : register(t6);
TextureCube<float4> TexCube : register(t7);
TextureCubeArray<float4> TexCubeArray : register(t8);
Buffer<float4> Buf : register(t9);

RWTexture1D<float4> RWTex1D : register(u0);
RWTexture1DArray<float4> RWTex1DArray : register(u1);
RWTexture2D<float4> RWTex2D : register(u2);
RWTexture2DArray<float4> RWTex2DArray : register(u3);
RWTexture3D<float4> RWTex3D : register(u6);
RWBuffer<float4> RWBuf : register(u9);

StructuredBuffer<float4> StructBuf : register(t10);
RWStructuredBuffer<float4> RWStructBuf : register(u10);

ByteAddressBuffer RawBuf : register(t11);
RWByteAddressBuffer RWRawBuf : register(u11);

uint main(nointerpolation uint level : LEVEL) : SV_Target
{
	uint res = 0;

	uint width, height, depth, layers, levels, samples;

	Tex1D.GetDimensions(level, width, levels);
	res += levels;

	Tex1DArray.GetDimensions(level, width, layers, levels);
	res += levels;

	Tex2D.GetDimensions(level, width, height, levels);
	res += levels;

	Tex2DArray.GetDimensions(level, width, height, layers, levels);
	res += levels;

	Tex2DMS.GetDimensions(width, height, samples);
	res += samples;

	Tex2DMSArray.GetDimensions(width, height, layers, samples);
	res += samples;

	Tex3D.GetDimensions(level, width, height, depth, levels);
	res += levels;

	TexCube.GetDimensions(level, width, height, levels);
	res += levels;

	TexCubeArray.GetDimensions(level, width, height, layers, levels);
	res += levels;

	return res;
}


================================================
FILE: shaders/dxil-builtin/get-dimensions-xyz-only.frag
================================================
Texture1D<float4> Tex1D : register(t0);
Texture1DArray<float4> Tex1DArray : register(t1);
Texture2D<float4> Tex2D : register(t2);
Texture2DArray<float4> Tex2DArray : register(t3);
Texture2DMS<float4> Tex2DMS : register(t4);
Texture2DMSArray<float4> Tex2DMSArray : register(t5);
Texture3D<float4> Tex3D : register(t6);
TextureCube<float4> TexCube : register(t7);
TextureCubeArray<float4> TexCubeArray : register(t8);
Buffer<float4> Buf : register(t9);

RWTexture1D<float4> RWTex1D : register(u0);
RWTexture1DArray<float4> RWTex1DArray : register(u1);
RWTexture2D<float4> RWTex2D : register(u2);
RWTexture2DArray<float4> RWTex2DArray : register(u3);
RWTexture3D<float4> RWTex3D : register(u6);
RWBuffer<float4> RWBuf : register(u9);

StructuredBuffer<float4> StructBuf : register(t10);
RWStructuredBuffer<float4> RWStructBuf : register(u10);

ByteAddressBuffer RawBuf : register(t11);
RWByteAddressBuffer RWRawBuf : register(u11);

uint main(nointerpolation uint level : LEVEL) : SV_Target
{
	uint res = 0;

	uint width, height, depth, layers, levels, samples;

	Tex1D.GetDimensions(level, width, levels);
	res += width;

	Tex1DArray.GetDimensions(level, width, layers, levels);
	res += width + layers;

	Tex2D.GetDimensions(level, width, height, levels);
	res += width + height;

	Tex2DArray.GetDimensions(level, width, height, layers, levels);
	res += width + height + layers;

	Tex2DMS.GetDimensions(width, height, samples);
	res += width + height;

	Tex2DMSArray.GetDimensions(width, height, layers, samples);
	res += width + height + layers;

	Tex3D.GetDimensions(level, width, height, depth, levels);
	res += width + height + depth;

	TexCube.GetDimensions(level, width, height, levels);
	res += width + height;

	TexCubeArray.GetDimensions(level, width, height, layers, levels);
	res += width + height + layers;

	Buf.GetDimensions(width);
	res += width * width;

	RWTex1D.GetDimensions(width);
	res += width * width;

	RWTex1DArray.GetDimensions(width, layers);
	res += width + layers;

	RWTex2D.GetDimensions(width, height);
	res += width + height;

	RWTex2DArray.GetDimensions(width, height, layers);
	res += width + height + layers;

	RWTex3D.GetDimensions(width, height, depth);
	res += width + height + depth;

	RWBuf.GetDimensions(width);
	res += width * width;

	StructBuf.GetDimensions(width, depth);
	res += width * width + depth;

	RWStructBuf.GetDimensions(width, depth);
	res += width * width + depth;

	RawBuf.GetDimensions(width);
	res += width * width;
	RWRawBuf.GetDimensions(width);
	res += width * width;

	return res;
}


================================================
FILE: shaders/dxil-builtin/get-dimensions.bindless.root-constant.frag
================================================
Texture2D<float4> Tex2D[] : register(t0, space0);
Buffer<float4> Buf[] : register(t0, space1);
RWTexture2D<float4> RWTex2D[] : register(u0, space2);
RWBuffer<float4> RWBuf[] : register(u0, space3);

StructuredBuffer<float4> StructBuf[] : register(t0, space4);
RWStructuredBuffer<float4> RWStructBuf[] : register(u0, space5);

ByteAddressBuffer RawBuf[] : register(t0, space6);
RWByteAddressBuffer RWRawBuf[] : register(u0, space7);

uint main(nointerpolation uint level : LEVEL, nointerpolation uint index : INDEX) : SV_Target
{
	uint res = 0;
	uint width, height, depth, layers, levels;

	Tex2D[NonUniformResourceIndex(index)].GetDimensions(level, width, height, levels);
	res += width + height + levels;

	Buf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;

	RWTex2D[NonUniformResourceIndex(index)].GetDimensions(width, height);
	res += width + height;

	RWBuf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;

	StructBuf[NonUniformResourceIndex(index)].GetDimensions(width, depth);
	res += width * width + depth;

	RWStructBuf[NonUniformResourceIndex(index)].GetDimensions(width, depth);
	res += width * width + depth;

	RawBuf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;
	RWRawBuf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;

	return res;
}


================================================
FILE: shaders/dxil-builtin/get-dimensions.bindless.root-constant.ssbo.frag
================================================
Texture2D<float4> Tex2D[] : register(t0, space0);
Buffer<float4> Buf[] : register(t0, space1);
RWTexture2D<float4> RWTex2D[] : register(u0, space2);
RWBuffer<float4> RWBuf[] : register(u0, space3);

StructuredBuffer<float4> StructBuf[] : register(t0, space4);
RWStructuredBuffer<float4> RWStructBuf[] : register(u0, space5);

ByteAddressBuffer RawBuf[] : register(t0, space6);
RWByteAddressBuffer RWRawBuf[] : register(u0, space7);

uint main(nointerpolation uint level : LEVEL, nointerpolation uint index : INDEX) : SV_Target
{
	uint res = 0;
	uint width, height, depth, layers, levels;

	Tex2D[NonUniformResourceIndex(index)].GetDimensions(level, width, height, levels);
	res += width + height + levels;

	Buf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;

	RWTex2D[NonUniformResourceIndex(index)].GetDimensions(width, height);
	res += width + height;

	RWBuf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;

	StructBuf[NonUniformResourceIndex(index)].GetDimensions(width, depth);
	res += width * width + depth;

	RWStructBuf[NonUniformResourceIndex(index)].GetDimensions(width, depth);
	res += width * width + depth;

	RawBuf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;
	RWRawBuf[NonUniformResourceIndex(index)].GetDimensions(width);
	res += width * width;

	return res;
}


================================================
FILE: shaders/dxil-builtin/get-dimensions.frag
================================================
Texture1D<float4> Tex1D : register(t0);
Texture1DArray<float4> Tex1DArray : register(t1);
Texture2D<float4> Tex2D : register(t2);
Texture2DArray<float4> Tex2DArray : register(t3);
Texture2DMS<float4> Tex2DMS : register(t4);
Texture2DMSArray<float4> Tex2DMSArray : register(t5);
Texture3D<float4> Tex3D : register(t6);
TextureCube<float4> TexCube : register(t7);
TextureCubeArray<float4> TexCubeArray : register(t8);
Buffer<float4> Buf : register(t9);

RWTexture1D<float4> RWTex1D : register(u0);
RWTexture1DArray<float4> RWTex1DArray : register(u1);
RWTexture2D<float4> RWTex2D : register(u2);
RWTexture2DArray<float4> RWTex2DArray : register(u3);
RWTexture3D<float4> RWTex3D : register(u6);
RWBuffer<float4> RWBuf : register(u9);

StructuredBuffer<float4> StructBuf : register(t10);
RWStructuredBuffer<float4> RWStructBuf : register(u10);

ByteAddressBuffer RawBuf : register(t11);
RWByteAddressBuffer RWRawBuf : register(u11);

uint main(nointerpolation uint level : LEVEL) : SV_Target
{
	uint res = 0;

	uint width, height, depth, layers, levels, samples;

	Tex1D.GetDimensions(level, width, levels);
	res += width + levels;

	Tex1DArray.GetDimensions(level, width, layers, levels);
	res += width + layers + levels;

	Tex2D.GetDimensions(level, width, height, levels);
	res += width + height + levels;

	Tex2DArray.GetDimensions(level, width, height, layers, levels);
	res += width + height + layers + levels;

	Tex2DMS.GetDimensions(width, height, samples);
	res += width + height + samples;

	Tex2DMSArray.GetDimensions(width, height, layers, samples);
	res += width + height + layers + samples;

	Tex3D.GetDimensions(level, width, height, depth, levels);
	res += width + height + depth + levels;

	TexCube.GetDimensions(level, width, height, levels);
	res += width + height + levels;

	TexCubeArray.GetDimensions(level, width, height, layers, levels);
	res += width + height + layers + levels;

	Buf.GetDimensions(width);
	res += width * width;

	RWTex1D.GetDimensions(width);
	res += width * width;

	RWTex1DArray.GetDimensions(width, layers);
	res += width + layers;

	RWTex2D.GetDimensions(width, height);
	res += width + height;

	RWTex2DArray.GetDimensions(width, height, layers);
	res += width + height + layers;

	RWTex3D.GetDimensions(width, height, depth);
	res += width + height + depth;

	RWBuf.GetDimensions(width);
	res += width * width;

	StructBuf.GetDimensions(width, depth);
	res += width * width + depth;

	RWStructBuf.GetDimensions(width, depth);
	res += width * width + depth;

	RawBuf.GetDimensions(width);
	res += width * width;
	RWRawBuf.GetDimensions(width);
	res += width * width;

	return res;
}


================================================
FILE: shaders/dxil-builtin/get-dimensions.ssbo.frag
================================================
RWStructuredBuffer<float4> RWStructBuf : register(u10);
RWByteAddressBuffer RWRawBuf : register(u11);

uint main(nointerpolation uint level : LEVEL) : SV_Target
{
	uint res = 0;

	uint width, height, depth, layers, levels, samples;

	RWStructBuf.GetDimensions(width, depth);
	res += width * width + depth;
	RWRawBuf.GetDimensions(width);
	res += width * width;

	return res;
}


================================================
FILE: shaders/dxil-builtin/group_id.comp
================================================
RWStructuredBuffer<float> RWBuf : register(u0);

[numthreads(2, 2, 2)]
void main(uint3 ThreadID : SV_GroupID)
{
	RWBuf[ThreadID.x] = 10.0;
	RWBuf[ThreadID.y] = 20.0;
	RWBuf[ThreadID.z] = 30.0;
}


================================================
FILE: shaders/dxil-builtin/hcos.frag
================================================
float main(float a : A) : SV_Target
{
	return cosh(a);
}


================================================
FILE: shaders/dxil-builtin/hsin.frag
================================================
float main(float a : A) : SV_Target
{
	return sinh(a);
}


================================================
FILE: shaders/dxil-builtin/htan.frag
================================================
float main(float a : A) : SV_Target
{
	return tanh(a);
}


================================================
FILE: shaders/dxil-builtin/imad.frag
================================================
int main(nointerpolation int3 a : A) : SV_Target
{
	return mad(a.x, a.y, a.z);
}


================================================
FILE: shaders/dxil-builtin/imax.frag
================================================
int main(nointerpolation int2 a : A) : SV_Target
{
	return max(a.x, a.y);
}


================================================
FILE: shaders/dxil-builtin/imin.frag
================================================
int main(nointerpolation int2 a : A) : SV_Target
{
	return min(a.x, a.y);
}


================================================
FILE: shaders/dxil-builtin/instance-id.vert
================================================
float4 main(uint ID : SV_InstanceID) : SV_Position
{
	float res = float(ID);
	return res.xxxx;
}


================================================
FILE: shaders/dxil-builtin/is-helper-lane-2.demote-to-helper.sm66.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	float4 res = 0.0.xxxx;
	if (!IsHelperLane())
		res = WavePrefixSum(float4(UV, UV * UV));
	return res;
}


================================================
FILE: shaders/dxil-builtin/is-helper-lane-2.sm66.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	float4 res = 0.0.xxxx;
	if (!IsHelperLane())
		res = WavePrefixSum(float4(UV, UV * UV));
	return res;
}


================================================
FILE: shaders/dxil-builtin/is-helper-lane.demote-to-helper.sm66.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	if (UV.x > 10.0)
		discard;
	else if (UV.y > 20.0)
		discard;

	float4 res = 0.0.xxxx;
	if (!IsHelperLane())
		res = WavePrefixSum(float4(UV, UV * UV));
	return res;
}


================================================
FILE: shaders/dxil-builtin/is-helper-lane.sm66.frag
================================================
float4 main(float2 UV : TEXCOORD) : SV_Target
{
	if (UV.x > 10.0)
		discard;
	else if (UV.y > 20.0)
		discard;

	float4 res = 0.0.xxxx;
	if (!IsHelperLane())
		res = WavePrefixSum(float4(UV, UV * UV));
	return res;
}


================================================
FILE: shaders/dxil-builtin/isfinite.frag
================================================
float main(float3 a : A) : SV_Target
{
	return isfinite(a.x) ? a.y : a.z;
}


================================================
FILE: shaders/dxil-builtin/isinf.frag
================================================
float main(float3 a : A) : SV_Target
{
	return isinf(a.x) ? a.y : a.z;
}


================================================
FILE: shaders/dxil-builtin/isnan.frag
================================================
float main(float3 a : A) : SV_Target
{
	return isnan(a.x) ? a.y : a.z;
}


================================================
FILE: shaders/dxil-builtin/log.frag
================================================
float main(float a : A) : SV_Target
{
	return log2(a);
}


================================================
FILE: shaders/dxil-builtin/make-double.frag
================================================
float main(nointerpolation uint2 values : VALUE) : SV_Target
{
	return float(asdouble(values.x, values.y));
}


================================================
FILE: shaders/dxil-builtin/msaa-uav.sm67.comp
================================================
RWTexture2DMS<float, 4> FirstLayer : register(u0);
RWTexture2DMSArray<float, 4> Layers : register(u1);
RWStructuredBuffer<float4> Outputs : register(u2);

float compute_reference_value(uint3 coord, uint sample)
{
	return float(coord.z * 64 + coord.y * 16 + coord.x * 4 + sample);
}

[numthreads(4, 4, 4)]
void main(uint3 id : SV_DispatchThreadID)
{
	// Read and write to different coordinates so we're sure we're going through the descriptors.
	uint3 write_coord = id ^ uint3(1, 2, 3);
	if (write_coord.z == 0)
	{
		FirstLayer[write_coord.xy] = compute_reference_value(write_coord, 0);
		for (int i = 1; i < 4; i++)
			FirstLayer.sample[i][write_coord.xy] = compute_reference_value(write_coord, i);
	}
	else
	{
		Layers[write_coord] = compute_reference_value(write_coord, 0);
		for (int i = 1; i < 4; i++)
			Layers.sample[i][write_coord] = compute_reference_value(write_coord, i);
	}

	// globallycoherent is not needed since it's within same threadgroup.
	DeviceMemoryBarrierWithGroupSync();

	// Test all new commands.
	float4 samples;
	if (id.z == 0)
	{
		samples[0] = FirstLayer[id.xy];
		// Make sure the sample indexing works both in a constant and dynamic context.
		[unroll]
			for (int i = 1; i < 4; i++)
				samples[i] = FirstLayer.sample[i][id.xy];
	}
	else
	{
		samples[0] = Layers[id];
		[loop]
			for (int i = 1; i < 4; i++)
				samples[i] = Layers.sample[i][id];
	}

	Outputs[id.z * 16 + id.y * 4 + id.x] = samples;
}


================================================
FILE: shaders/dxil-builtin/msad.comp
================================================
    StructuredBuffer<uint4> Inputs;
    RWStructuredBuffer<uint4> Outputs;

    [numthreads(1, 1, 1)]
    void main(uint thr : SV_DispatchThreadID)
    {
            Outputs[thr] = msad4(Inputs[thr].x, Inputs[thr].yz, Inputs[thr].wwww);
    }


================================================
FILE: shaders/dxil-builtin/object-ray-direction.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = ObjectRayDirection();
}


================================================
FILE: shaders/dxil-builtin/object-ray-origin.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = ObjectRayOrigin();
}


================================================
FILE: shaders/dxil-builtin/object-to-world-3x4.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = mul(ObjectToWorld3x4(), float4(payload.orig, 1.0));
}


================================================
FILE: shaders/dxil-builtin/object-to-world-4x3.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = mul(float4(payload.orig, 1.0), ObjectToWorld4x3());
}


================================================
FILE: shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp
================================================
StructuredBuffer<uint> RO : register(t0);
RWStructuredBuffer<uint4> RW : register(u0);
RWStructuredBuffer<uint16_t4> RW16 : register(u1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint8_t4_packed p = RO[thr];

	{
		uint4 p4 = unpack_u8u32(p);
		RW[thr] = p4;
	}

	{
		uint16_t4 p4 = unpack_u8u16(p);
		RW16[thr] = p4;
	}

	{
		int4 p4 = unpack_s8s32(p);
		RW[thr] = p4;
	}

	{
		int16_t4 p4 = unpack_s8s16(p);
		RW16[thr] = p4;
	}

	{
		uint4 u32s = RW[thr + 100000];
		uint p0 = pack_u8(u32s);
		uint p1 = pack_s8(int4(u32s));
		uint p2 = pack_clamp_u8(int4(u32s));
		uint p3 = pack_clamp_s8(int4(u32s));
		RW[thr + 1000] = uint4(p0, p1, p2, p3);
	}

	{
		uint16_t4 u16s = RW16[thr + 100000];
		uint p0 = pack_u8(u16s);
		uint p1 = pack_s8(int16_t4(u16s));
		uint p2 = pack_clamp_u8(int16_t4(u16s));
		uint p3 = pack_clamp_s8(int16_t4(u16s));
		RW16[thr + 1000] = uint16_t4(p0, p1, p2, p3);
	}
}


================================================
FILE: shaders/dxil-builtin/quad-all-any.sm67.comp
================================================
RWStructuredBuffer<uint> A : register(u0);
RWStructuredBuffer<uint> B : register(u1);

[numthreads(8, 8, 1)]
void main(uint index : SV_DispatchThreadID)
{
	bool any_result = QuadAny(bool(A[index]));
	bool all_result = QuadAll(bool(A[index]));
	B[2 * index] = uint(any_result);
	B[2 * index + 1] = uint(all_result);
}


================================================
FILE: shaders/dxil-builtin/quad-all-any.sm67.quad-maximal-reconvergence.noglsl.comp
================================================
RWStructuredBuffer<uint> A : register(u0);
RWStructuredBuffer<uint> B : register(u1);

[numthreads(8, 8, 1)]
void main(uint index : SV_DispatchThreadID)
{
	bool any_result = QuadAny(bool(A[index]));
	bool all_result = QuadAll(bool(A[index]));
	B[2 * index] = uint(any_result);
	B[2 * index + 1] = uint(all_result);
}


================================================
FILE: shaders/dxil-builtin/quad-read-at-2d.comp
================================================
RWByteAddressBuffer A : register(u0);
RWByteAddressBuffer B : register(u1);

[numthreads(8, 8, 1)]
void main(uint index : SV_GroupIndex, uint gid : SV_GroupID)
{
	index += gid * 64;

	uint value = A.Load(index * 4);
	uint2 r;
	r.x = QuadReadLaneAt(value, 1);
	r.y = QuadReadLaneAt(value, value & 3);
	B.Store2(index * 8, r);
}


================================================
FILE: shaders/dxil-builtin/quad-read-at-2d.sm66.comp
================================================
RWByteAddressBuffer A : register(u0);
RWByteAddressBuffer B : register(u1);

[numthreads(8, 4, 2)]
void main(uint index : SV_GroupIndex, uint gid : SV_GroupID)
{
	index += gid * 64;

	uint value = A.Load(index * 4);
	uint2 r;
	r.x = QuadReadLaneAt(value, 1);
	r.y = QuadReadLaneAt(value, value & 3);
	B.Store2(index * 8, r);
}


================================================
FILE: shaders/dxil-builtin/quad-read-at.comp
================================================
RWByteAddressBuffer A : register(u0);
RWByteAddressBuffer B : register(u1);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint value = A.Load(index * 4);
	uint2 r;
	r.x = QuadReadLaneAt(value, 1);
	r.y = QuadReadLaneAt(value, value & 3);
	B.Store2(index * 8, r);
}


================================================
FILE: shaders/dxil-builtin/quad-read-at.frag
================================================
float2 main(float v : V, nointerpolation uint index : INDEX) : SV_Target
{
	float2 res;
	res.x = QuadReadLaneAt(v, 2);
	res.y = QuadReadLaneAt(v, index);
	return res;
}


================================================
FILE: shaders/dxil-builtin/quad-swap.comp
================================================
RWByteAddressBuffer A : register(u0);
RWByteAddressBuffer B : register(u1);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint value = A.Load(index * 4);
	uint4 r;
	r.x = value;
	r.y = QuadReadAcrossX(value);
	r.z = QuadReadAcrossY(value);
	r.w = QuadReadAcrossDiagonal(value);
	B.Store4(index * 16, r);
}


================================================
FILE: shaders/dxil-builtin/quad-swap.frag
================================================
float4 main(float a : A) : SV_Target
{
	float4 r;
	r.x = a;
	r.y = QuadReadAcrossX(a);
	r.z = QuadReadAcrossY(a);
	r.w = QuadReadAcrossDiagonal(a);
	return r;
}


================================================
FILE: shaders/dxil-builtin/raw-gather-offset-sparse.sm67.ssbo.comp
================================================
Texture2D<uint16_t> D16 : register(t0);
Texture2D<uint> D32 : register(t1);
Texture2D<uint64_t> D64 : register(t2);
SamplerState S : register(s0);
RWByteAddressBuffer U : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	uint3 status;
	uint16_t4 d16 = D16.GatherRaw(S, 0.5.xx, int2(-3, -2), status.x);
	uint4 d32 = D32.GatherRaw(S, 0.5.xx, int2(d16.xy), status.y);
	uint64_t4 d64 = D64.GatherRaw(S, 0.5.xx, int2(-3, -2), status.z);

	U.Store<uint4>(0, uint4(d16));
	U.Store<uint4>(16, d32);
	U.Store<uint64_t4>(32, d64);
	U.Store<uint3>(64, status);
}


================================================
FILE: shaders/dxil-builtin/raw-gather-offset.sm67.ssbo.comp
================================================
Texture2D<uint16_t> D16 : register(t0);
Texture2D<uint> D32 : register(t1);
Texture2D<uint64_t> D64 : register(t2);
SamplerState S : register(s0);
RWByteAddressBuffer U : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	uint16_t4 d16 = D16.GatherRaw(S, 0.5.xx, int2(1, 2));
	// Dynamic offset is allowed for gather as-is in Vulkan.
	uint4 d32 = D32.GatherRaw(S, 0.5.xx, int2(d16.xy));
	uint64_t4 d64 = D64.GatherRaw(S, 0.5.xx, int2(4, 5));

	U.Store<uint4>(0, uint4(d16));
	U.Store<uint4>(16, d32);
	U.Store<uint64_t4>(32, d64);
}


================================================
FILE: shaders/dxil-builtin/raw-gather-sparse.sm67.ssbo.comp
================================================
Texture2D<uint16_t> D16 : register(t0);
Texture2D<uint> D32 : register(t1);
Texture2D<uint64_t> D64 : register(t2);
SamplerState S : register(s0);
RWByteAddressBuffer U : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	uint3 status;
	uint16_t4 d16 = D16.GatherRaw(S, 0.5.xx, int2(0, 0), status.x);
	uint4 d32 = D32.GatherRaw(S, 0.5.xx, int2(0, 0), status.y);
	uint64_t4 d64 = D64.GatherRaw(S, 0.5.xx, int2(0, 0), status.z);

	U.Store<uint4>(0, uint4(d16));
	U.Store<uint4>(16, d32);
	U.Store<uint64_t4>(32, d64);
	U.Store<uint3>(64, status);
}


================================================
FILE: shaders/dxil-builtin/raw-gather.sm67.ssbo.comp
================================================
Texture2D<uint16_t> D16 : register(t0);
Texture2D<uint> D32 : register(t1);
Texture2D<uint64_t> D64 : register(t2);
SamplerState S : register(s0);
RWByteAddressBuffer U : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	uint16_t4 d16 = D16.GatherRaw(S, 0.5.xx);
	uint4 d32 = D32.GatherRaw(S, 0.5.xx);
	uint64_t4 d64 = D64.GatherRaw(S, 0.5.xx);

	U.Store<uint4>(0, uint4(d16));
	U.Store<uint4>(16, d32);
	U.Store<uint64_t4>(32, d64);
}


================================================
FILE: shaders/dxil-builtin/ray-query-phi-multi.invalid.sm66.comp
================================================
cbuffer Bindings : register(b0)
{
    uint tlasSrv;
	uint test;
};

[numthreads(8, 8, 1)]
void main(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float2 pixelCenter = dispatchThreadId + 0.5f;
    float3 wsPos = float3(pixelCenter, -1);
    RayDesc ray;
    ray.Origin = wsPos;
    ray.TMin = 0.1;
    ray.TMax = 1000.0;
    ray.Direction = float3(0, 0, 1);

    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> a;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> b;

	bool alphaTest = test != 0;

	[branch]
    if (!alphaTest)
		q = a;
	else
		q = b;

    RaytracingAccelerationStructure tlas = ResourceDescriptorHeap[tlasSrv];
    q.TraceRayInline(tlas, 0, 0xff, ray);
    b.TraceRayInline(tlas, 0, 0xff, ray);
}


================================================
FILE: shaders/dxil-builtin/ray-query-phi-simple.sm66.comp
================================================
cbuffer Bindings : register(b0)
{
    uint tlasSrv;
	uint test;
};

[numthreads(8, 8, 1)]
void main(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float2 pixelCenter = dispatchThreadId + 0.5f;
    float3 wsPos = float3(pixelCenter, -1);
    RayDesc ray;
    ray.Origin = wsPos;
    ray.TMin = 0.1;
    ray.TMax = 1000.0;
    ray.Direction = float3(0, 0, 1);

    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> a;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> b;

	bool alphaTest = test != 0;

	[branch]
    if (!alphaTest)
		q = a;
	else
		q = b;

    RaytracingAccelerationStructure tlas = ResourceDescriptorHeap[tlasSrv];
    q.TraceRayInline(tlas, 0, 0xff, ray);
}


================================================
FILE: shaders/dxil-builtin/ray-query-select-multi.invalid.sm66.comp
================================================
cbuffer Bindings : register(b0)
{
    uint tlasSrv;
	uint test;
};

[numthreads(8, 8, 1)]
void main(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float2 pixelCenter = dispatchThreadId + 0.5f;
    float3 wsPos = float3(pixelCenter, -1);
    RayDesc ray;
    ray.Origin = wsPos;
    ray.TMin = 0.1;
    ray.TMax = 1000.0;
    ray.Direction = float3(0, 0, 1);

    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> a;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> b;

	bool alphaTest = test != 0;

    if (!alphaTest)
		q = a;
	else
		q = b;

    RaytracingAccelerationStructure tlas = ResourceDescriptorHeap[tlasSrv];
    q.TraceRayInline(tlas, 0, 0xff, ray);
    b.TraceRayInline(tlas, 0, 0xff, ray);
}


================================================
FILE: shaders/dxil-builtin/ray-query-select-simple.sm66.comp
================================================
cbuffer Bindings : register(b0)
{
    uint tlasSrv;
	uint test;
};

[numthreads(8, 8, 1)]
void main(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float2 pixelCenter = dispatchThreadId + 0.5f;
    float3 wsPos = float3(pixelCenter, -1);
    RayDesc ray;
    ray.Origin = wsPos;
    ray.TMin = 0.1;
    ray.TMax = 1000.0;
    ray.Direction = float3(0, 0, 1);

    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> a;
	RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> b;

	bool alphaTest = test != 0;

    if (!alphaTest)
		q = a;
	else
		q = b;

    RaytracingAccelerationStructure tlas = ResourceDescriptorHeap[tlasSrv];
    q.TraceRayInline(tlas, 0, 0xff, ray);
}


================================================
FILE: shaders/dxil-builtin/ray-query-store-multi.invalid.sm66.comp
================================================
cbuffer Bindings : register(b0)
{
    uint tlasSrv;
	uint test;
};

[numthreads(8, 8, 1)]
void main(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float2 pixelCenter = dispatchThreadId + 0.5f;
    float3 wsPos = float3(pixelCenter, -1);
    RayDesc ray;
    ray.Origin = wsPos;
    ray.TMin = 0.1;
    ray.TMax = 1000.0;
    ray.Direction = float3(0, 0, 1);

    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> a;
    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> qa[4];

    RayQuery<RAY_FLAG_FORCE_OPAQUE> b;
    RayQuery<RAY_FLAG_FORCE_OPAQUE> qb[4];

	a = qa[test ^ 1];
	b = qb[test ^ 2];
    RaytracingAccelerationStructure tlas = ResourceDescriptorHeap[tlasSrv];
    a.TraceRayInline(tlas, 0, 0xff, ray);
    b.TraceRayInline(tlas, 0, 0xff, ray);
}


================================================
FILE: shaders/dxil-builtin/ray-query-store-simple.sm66.comp
================================================
cbuffer Bindings : register(b0)
{
    uint tlasSrv;
	uint test;
};

[numthreads(8, 8, 1)]
void main(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float2 pixelCenter = dispatchThreadId + 0.5f;
    float3 wsPos = float3(pixelCenter, -1);
    RayDesc ray;
    ray.Origin = wsPos;
    ray.TMin = 0.1;
    ray.TMax = 1000.0;
    ray.Direction = float3(0, 0, 1);

    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> a;
    RayQuery<RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q[4];

	bool alphaTest = test != 0;
	a = q[test];
    RaytracingAccelerationStructure tlas = ResourceDescriptorHeap[tlasSrv];
    a.TraceRayInline(tlas, 0, 0xff, ray);
}


================================================
FILE: shaders/dxil-builtin/ray-query.comp
================================================
RaytracingAccelerationStructure RTAS : register(t0);
RWStructuredBuffer<uint> RWUint : register(u1);
RWStructuredBuffer<float> RWFloat : register(u2);
RWStructuredBuffer<float4> RWFloat4 : register(u3);
RWStructuredBuffer<float3> RWFloat3 : register(u4);

[numthreads(64, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	RayQuery<RAY_FLAG_CULL_NON_OPAQUE> q;
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.TMin = 4;
	ray.Direction = float3(5, 6, 7);
	ray.TMax = 8;
	q.TraceRayInline(RTAS, RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH, 0xaa, ray);

	uint a = 0;
	uint b = 0;
	uint c = 0;

	while (q.Proceed())
	{
		RWUint[64 * thr + 0] = q.CandidateInstanceIndex();
		RWUint[64 * thr + 1] = q.CandidateInstanceID();
		RWUint[64 * thr + 2] = q.CandidateGeometryIndex();
		RWUint[64 * thr + 3] = q.CandidatePrimitiveIndex();
		RWFloat[64 * thr + 4] = q.CandidateObjectRayOrigin().x;
		RWFloat[64 * thr + 5] = q.CandidateObjectRayDirection().y;
		RWUint[64 * thr + 16] = q.CandidateInstanceContributionToHitGroupIndex();

		float4 row0 = q.CandidateWorldToObject3x4()[0];
		float4 row1 = q.CandidateObjectToWorld3x4()[0];
		RWFloat4[4 * thr + 0] = row0;
		RWFloat4[4 * thr + 1] = row1;

		float3 row2 = q.CandidateWorldToObject4x3()[0];
		float3 row3 = q.CandidateObjectToWorld4x3()[0];
		RWFloat3[4 * thr + 0] = row2;
		RWFloat3[4 * thr + 1] = row3;

		if (q.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE)
		{
			a++;
			RWFloat[64 * thr + 6] = q.CandidateTriangleRayT();
			RWFloat[64 * thr + 7] = q.CandidateTriangleBarycentrics().y;
			RWUint[64 * thr + 8] = q.CandidateTriangleFrontFace() ? 100 : 10;
			q.CommitNonOpaqueTriangleHit();
		}
		else if (q.CandidateType() == CANDIDATE_PROCEDURAL_PRIMITIVE)
		{
			if (q.CandidateProceduralPrimitiveNonOpaque())
				b++;
			else
				c++;
			q.CommitProceduralPrimitiveHit(0.5);
		}

		if (a == 4)
			q.Abort();
	}

	RWUint[64 * thr + 9] = q.RayFlags();
	RWFloat[64 * thr + 10] = q.WorldRayOrigin().x;
	RWFloat[64 * thr + 11] = q.WorldRayDirection().y;
	RWFloat[64 * thr + 12] = q.RayTMin();

	if (q.CommittedStatus())
	{
		RWUint[64 * thr + 32] = q.CommittedInstanceIndex();
		RWUint[64 * thr + 33] = q.CommittedInstanceID();
		RWUint[64 * thr + 34] = q.CommittedGeometryIndex();
		RWUint[64 * thr + 35] = q.CommittedPrimitiveIndex();
		RWFloat[64 * thr + 36] = q.CommittedObjectRayOrigin().x;
		RWFloat[64 * thr + 37] = q.CommittedObjectRayDirection().y;
		RWUint[64 * thr + 38] = q.CommittedInstanceContributionToHitGroupIndex();

		float4 row0 = q.CommittedWorldToObject3x4()[0];
		float4 row1 = q.CommittedObjectToWorld3x4()[0];
		RWFloat4[4 * thr + 39] = row0;
		RWFloat4[4 * thr + 40] = row1;

		float3 row2 = q.CommittedWorldToObject4x3()[0];
		float3 row3 = q.CommittedObjectToWorld4x3()[0];
		RWFloat3[4 * thr + 41] = row2;
		RWFloat3[4 * thr + 42] = row3;
		RWFloat[64 * thr + 43] = q.CommittedRayT();
	}

	if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT)
	{
		RWUint[64 * thr + 13] = a;
		RWFloat[64 * thr + 44] = q.CommittedTriangleBarycentrics().y;
		RWUint[64 * thr + 45] = q.CommittedTriangleFrontFace() ? 100 : 10;
	}
	else if (q.CommittedStatus() == COMMITTED_PROCEDURAL_PRIMITIVE_HIT)
	{
		RWUint[64 * thr + 14] = b;
	}
	else
	{
		RWUint[64 * thr + 15] = c;
	}
}


================================================
FILE: shaders/dxil-builtin/ray-t-current.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = RayTCurrent();
}


================================================
FILE: shaders/dxil-builtin/ray-t-min.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = RayTMin();
}


================================================
FILE: shaders/dxil-builtin/render-target-sample-count.frag
================================================
float4 main() : SV_Target
{
	return GetRenderTargetSampleCount();
}


================================================
FILE: shaders/dxil-builtin/render-target-sample-position.frag
================================================
float2 main(uint index : SV_SampleIndex) : SV_Target
{
	return GetRenderTargetSamplePosition(index);
}


================================================
FILE: shaders/dxil-builtin/report-hit.rint
================================================
struct Payload
{
	float p;
};

[shader("intersection")]
void RayIntersect()
{
	Payload p;
	Payload p2;
	p.p = RayTCurrent();
	p2.p = RayTMin();

	ReportHit(4.0, 100, p);
	ReportHit(3.0, 50, p2);
}


================================================
FILE: shaders/dxil-builtin/round-ne.frag
================================================
float main(float a : A) : SV_Target
{
	return round(a);
}


================================================
FILE: shaders/dxil-builtin/round-ni.frag
================================================
float main(float a : A) : SV_Target
{
	return floor(a);
}


================================================
FILE: shaders/dxil-builtin/round-pi.frag
================================================
float main(float a : A) : SV_Target
{
	return ceil(a);
}


================================================
FILE: shaders/dxil-builtin/round-z.frag
================================================
float main(float a : A) : SV_Target
{
	return trunc(a);
}


================================================
FILE: shaders/dxil-builtin/rsqrt.frag
================================================
float main(float a : A) : SV_Target
{
	return rsqrt(a);
}


================================================
FILE: shaders/dxil-builtin/rt-geometry-index.rany
================================================
struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = GeometryIndex();
}


================================================
FILE: shaders/dxil-builtin/rt-hit-kind.rany
================================================
struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = HitKind();
}


================================================
FILE: shaders/dxil-builtin/rt-instance-id.rany
================================================
struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = InstanceID();
}


================================================
FILE: shaders/dxil-builtin/rt-instance-index.rany
================================================
struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = InstanceIndex();
}


================================================
FILE: shaders/dxil-builtin/rt-primitive-index.rany
================================================
struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = PrimitiveIndex();
}


================================================
FILE: shaders/dxil-builtin/rt-ray-flags.rany
================================================
struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = RayFlags();
}


================================================
FILE: shaders/dxil-builtin/sample-bias-feedback.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	// Sample without bias.
	res += Tex1D.SampleBias(Samp, UV.x, 0.0, 0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex1DArray.SampleBias(Samp, UV.xy, 0.0, 0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex2D.SampleBias(Samp, UV.xy, 0.0, int2(0, 0), 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex2DArray.SampleBias(Samp, UV.xyz, 0.0, int2(0, 0), 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex3D.SampleBias(Samp, UV.xyz, 0.0, int3(0, 0, 0), 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += TexCube.SampleBias(Samp, UV.xyz, 0.0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += TexCubeArray.SampleBias(Samp, UV, 0.0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-bias-offset.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleBias(Samp, UV.x, UV.w, 1);
	res += Tex1DArray.SampleBias(Samp, UV.xy, UV.w, 2);
	res += Tex2D.SampleBias(Samp, UV.xy, UV.w, int2(2, 3));
	res += Tex2DArray.SampleBias(Samp, UV.xyz, UV.w, int2(-1, -3));
	res += Tex3D.SampleBias(Samp, UV.xyz, UV.w, int3(-4, -5, 3));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-bias.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleBias(Samp, UV.x, UV.w);
	res += Tex1DArray.SampleBias(Samp, UV.xy, UV.w);
	res += Tex2D.SampleBias(Samp, UV.xy, UV.w);
	res += Tex2DArray.SampleBias(Samp, UV.xyz, UV.w);
	res += Tex3D.SampleBias(Samp, UV.xyz, UV.w);
	res += TexCube.SampleBias(Samp, UV.xyz, UV.w);
	res += TexCubeArray.SampleBias(Samp, UV, UV.w);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-bias-feedback.frag
================================================
Texture1D<float> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
TextureCube<float> TexCube : register(t5, space1);

SamplerComparisonState Samp : register(s0);

float main(float4 UV : TEXCOORD, float Dref : DREF) : SV_Target
{
	float res = 0.0;
	uint feedback;

	// Sample without bias.
	res += Tex1D.SampleCmpBias(Samp, UV.x, Dref, 0.0, 0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex1DArray.SampleCmpBias(Samp, UV.xy, Dref, 0.0, 0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex2D.SampleCmpBias(Samp, UV.xy, Dref, 0.0, int2(0, 0), 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += Tex2DArray.SampleCmpBias(Samp, UV.xyz, Dref, 0.0, int2(0, 0), 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;
	res += TexCube.SampleCmpBias(Samp, UV.xyz, Dref, 0.0, 1.5, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1.0 : 0.0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-bias-offset.frag
================================================
Texture1D<float> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
TextureCube<float> TexCube : register(t5, space1);

SamplerComparisonState Samp : register(s0);

float main(float4 UV : TEXCOORD, float Dref : DREF) : SV_Target
{
	float res = 0.0;

	res += Tex1D.SampleCmpBias(Samp, UV.x, Dref, UV.w, 1);
	res += Tex1DArray.SampleCmpBias(Samp, UV.xy, Dref, UV.w, 2);
	res += Tex2D.SampleCmpBias(Samp, UV.xy, Dref, UV.w, int2(2, 3));
	res += Tex2DArray.SampleCmpBias(Samp, UV.xyz, Dref, UV.w, int2(-1, -3));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-bias.frag
================================================
Texture1D<float> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
TextureCube<float> TexCube : register(t5, space1);

SamplerComparisonState Samp : register(s0);

float main(float4 UV : TEXCOORD, float Dref : DREF) : SV_Target
{
	float res = 0.0;

	res += Tex1D.SampleCmpBias(Samp, UV.x, Dref, UV.w);
	res += Tex1DArray.SampleCmpBias(Samp, UV.xy, Dref, UV.w);
	res += Tex2D.SampleCmpBias(Samp, UV.xy, Dref, UV.w);
	res += Tex2DArray.SampleCmpBias(Samp, UV.xyz, Dref, UV.w);
	res += TexCube.SampleCmpBias(Samp, UV.xyz, Dref, UV.w);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-feedback.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	res += Tex1D.SampleCmp(Samp, UV.x, UV.w, 1, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex1DArray.SampleCmp(Samp, UV.xy, UV.w, 2, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2D.SampleCmp(Samp, UV.xy, UV.w, int2(1, 2), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.SampleCmp(Samp, UV.xyz, UV.w, int2(1, 2), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += TexCube.SampleCmp(Samp, UV.xyz, UV.w, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += TexCubeArray.SampleCmp(Samp, UV, UV.w, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-grad-offset-feedback.frag
================================================
Texture1D<float> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
TextureCube<float> TexCube : register(t5, space1);

SamplerComparisonState Samp : register(s0);

float main(float4 UV : TEXCOORD, float Dref : DREF) : SV_Target
{
	float res = 0.0;
	uint feedback;

	res += Tex1D.SampleCmpGrad(Samp, UV.x, Dref, UV.z, UV.w, 1, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex1DArray.SampleCmpGrad(Samp, UV.xy, Dref, UV.z, UV.w, 2, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2D.SampleCmpGrad(Samp, UV.xy, Dref, UV.zz, UV.ww, int2(-3, -4), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.SampleCmpGrad(Samp, UV.xyz, Dref, UV.zz, UV.ww, int2(4, -5), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-grad-offset.frag
================================================
Texture1D<float> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
TextureCube<float> TexCube : register(t5, space1);

SamplerComparisonState Samp : register(s0);

float main(float4 UV : TEXCOORD, float Dref : DREF) : SV_Target
{
	float res = 0.0;

	res += Tex1D.SampleCmpGrad(Samp, UV.x, Dref, UV.z, UV.w, 1);
	res += Tex1DArray.SampleCmpGrad(Samp, UV.xy, Dref, UV.z, UV.w, 2);
	res += Tex2D.SampleCmpGrad(Samp, UV.xy, Dref, UV.zz, UV.ww, int2(-3, -4));
	res += Tex2DArray.SampleCmpGrad(Samp, UV.xyz, Dref, UV.zz, UV.ww, int2(4, -5));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-grad.frag
================================================
Texture1D<float> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
TextureCube<float> TexCube : register(t5, space1);

SamplerComparisonState Samp : register(s0);

float main(float4 UV : TEXCOORD, float Dref : DREF) : SV_Target
{
	float res = 0.0;

	res += Tex1D.SampleCmpGrad(Samp, UV.x, Dref, UV.z, UV.w);
	res += Tex1DArray.SampleCmpGrad(Samp, UV.xy, Dref, UV.z, UV.w);
	res += Tex2D.SampleCmpGrad(Samp, UV.xy, Dref, UV.zz, UV.ww);
	res += Tex2DArray.SampleCmpGrad(Samp, UV.xyz, Dref, UV.zz, UV.ww);
	res += TexCube.SampleCmpGrad(Samp, UV.xyz, Dref, UV.zzz, UV.www);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-level.sm67.noglsl.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	// Some of these cannot be expressed in GLSL directly, so use noglsl.
	res += Tex1D.SampleCmpLevel(Samp, UV.x, UV.w, 0.25);
	res += Tex1DArray.SampleCmpLevel(Samp, UV.xy, UV.w, 0.5);
	res += Tex2D.SampleCmpLevel(Samp, UV.xy, UV.w, 1.0);
	res += Tex2DArray.SampleCmpLevel(Samp, UV.xyz, UV.w, 2.0);
	res += TexCube.SampleCmpLevel(Samp, UV.xyz, UV.w, 3.0);
	res += TexCubeArray.SampleCmpLevel(Samp, UV, UV.w, 4.0);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-levelzero.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleCmpLevelZero(Samp, UV.x, UV.w);
	res += Tex1DArray.SampleCmpLevelZero(Samp, UV.xy, UV.w);
	res += Tex2D.SampleCmpLevelZero(Samp, UV.xy, UV.w);
	res += Tex2DArray.SampleCmpLevelZero(Samp, UV.xyz, UV.w);
	res += TexCube.SampleCmpLevelZero(Samp, UV.xyz, UV.w);
	res += TexCubeArray.SampleCmpLevelZero(Samp, UV, UV.w);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-offset-levelzero-feedback.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	res += Tex1D.SampleCmpLevelZero(Samp, UV.x, UV.w, 1, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex1DArray.SampleCmpLevelZero(Samp, UV.xy, UV.w, 2, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2D.SampleCmpLevelZero(Samp, UV.xy, UV.w, int2(-3, -2), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.SampleCmpLevelZero(Samp, UV.xyz, UV.w, int2(4, 5), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-offset-levelzero.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleCmpLevelZero(Samp, UV.x, UV.w, 1);
	res += Tex1DArray.SampleCmpLevelZero(Samp, UV.xy, UV.w, 2);
	res += Tex2D.SampleCmpLevelZero(Samp, UV.xy, UV.w, int2(-3, -2));
	res += Tex2DArray.SampleCmpLevelZero(Samp, UV.xyz, UV.w, int2(4, 5));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp-offset.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleCmp(Samp, UV.x, UV.w, 1);
	res += Tex1DArray.SampleCmp(Samp, UV.xy, UV.w, 2);
	res += Tex2D.SampleCmp(Samp, UV.xy, UV.w, int2(-3, -2));
	res += Tex2DArray.SampleCmp(Samp, UV.xyz, UV.w, int2(4, 5));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-cmp.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerComparisonState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleCmp(Samp, UV.x, UV.w);
	res += Tex1DArray.SampleCmp(Samp, UV.xy, UV.w);
	res += Tex2D.SampleCmp(Samp, UV.xy, UV.w);
	res += Tex2DArray.SampleCmp(Samp, UV.xyz, UV.w);
	res += TexCube.SampleCmp(Samp, UV.xyz, UV.w);
	res += TexCubeArray.SampleCmp(Samp, UV, UV.w);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-grad-offset-dynamic.noglsl.invalid.sm67.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

cbuffer Offsets : register(b0)
{
	int off;
	int2 off2;
	int3 off3;
};

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleGrad(Samp, UV.x, UV.z, UV.w, off);
	res += Tex1DArray.SampleGrad(Samp, UV.xy, UV.z, UV.y, off);
	res += Tex2D.SampleGrad(Samp, UV.xy, UV.zz, UV.ww, off2);
	res += Tex2DArray.SampleGrad(Samp, UV.xyz, UV.zz, UV.ww, off2);
	res += Tex3D.SampleGrad(Samp, UV.xyz, UV.zzz, UV.www, off3);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-grad-offset-feedback.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	res += Tex1D.SampleGrad(Samp, UV.x, UV.z, UV.w, 1, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex1DArray.SampleGrad(Samp, UV.xy, UV.z, UV.y, 2, 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2D.SampleGrad(Samp, UV.xy, UV.zz, UV.ww, int2(-3, -4), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.SampleGrad(Samp, UV.xyz, UV.zz, UV.ww, int2(4, -5), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex3D.SampleGrad(Samp, UV.xyz, UV.zzz, UV.www, int3(5, 6, 7), 0.0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-grad-offset.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleGrad(Samp, UV.x, UV.z, UV.w, 1);
	res += Tex1DArray.SampleGrad(Samp, UV.xy, UV.z, UV.y, 2);
	res += Tex2D.SampleGrad(Samp, UV.xy, UV.zz, UV.ww, int2(-3, -4));
	res += Tex2DArray.SampleGrad(Samp, UV.xyz, UV.zz, UV.ww, int2(4, -5));
	res += Tex3D.SampleGrad(Samp, UV.xyz, UV.zzz, UV.www, int3(5, 6, 7));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-grad.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleGrad(Samp, UV.x, UV.z, UV.w);
	res += Tex1DArray.SampleGrad(Samp, UV.xy, UV.z, UV.y);
	res += Tex2D.SampleGrad(Samp, UV.xy, UV.zz, UV.ww);
	res += Tex2DArray.SampleGrad(Samp, UV.xyz, UV.zz, UV.ww);
	res += Tex3D.SampleGrad(Samp, UV.xyz, UV.zzz, UV.www);
	res += TexCube.SampleGrad(Samp, UV.xyz, UV.zzz, UV.www);
	res += TexCubeArray.SampleGrad(Samp, UV, UV.zzz, UV.www);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-id.frag
================================================
uint main(uint index : SV_SampleIndex) : SV_Target
{
	return index;
}


================================================
FILE: shaders/dxil-builtin/sample-level-offset-feedback.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	res += Tex1D.SampleLevel(Samp, UV.x, UV.w, 1, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex1DArray.SampleLevel(Samp, UV.xy, UV.w, 2, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2D.SampleLevel(Samp, UV.xy, UV.w, float2(2, 3), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.SampleLevel(Samp, UV.xyz, UV.w, float2(-1, -3), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex3D.SampleLevel(Samp, UV.xyz, UV.w, float3(-4, -5, 3), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-level-offset.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleLevel(Samp, UV.x, UV.w, 1);
	res += Tex1DArray.SampleLevel(Samp, UV.xy, UV.w, 2);
	res += Tex2D.SampleLevel(Samp, UV.xy, UV.w, int2(2, 3));
	res += Tex2DArray.SampleLevel(Samp, UV.xyz, UV.w, int2(-1, -3));
	res += Tex3D.SampleLevel(Samp, UV.xyz, UV.w, int3(-4, -5, 3));

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-level.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	res += Tex1D.SampleLevel(Samp, UV.x, UV.w);
	res += Tex1DArray.SampleLevel(Samp, UV.xy, UV.w);
	res += Tex2D.SampleLevel(Samp, UV.xy, UV.w);
	res += Tex2DArray.SampleLevel(Samp, UV.xyz, UV.w);
	res += Tex3D.SampleLevel(Samp, UV.xyz, UV.w);
	res += TexCube.SampleLevel(Samp, UV.xyz, UV.w);
	res += TexCubeArray.SampleLevel(Samp, UV, UV.w);

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-offset-dynamic.noglsl.invalid.sm67.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

cbuffer Offsets : register(b0)
{
	int off;
	int2 off2;
	int3 off3;
};

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	// Sample without bias.
	res += Tex1D.Sample(Samp, UV.x, off);
	res += Tex1DArray.Sample(Samp, UV.xy, off);
	res += Tex2D.Sample(Samp, UV.xy, off2);
	res += Tex2DArray.Sample(Samp, UV.xyz, off2);
	res += Tex3D.Sample(Samp, UV.xyz, off3);

	// TextureCube does not support offset.

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample-offset.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	// Sample without bias.
	res += Tex1D.Sample(Samp, UV.x, 1);
	res += Tex1DArray.Sample(Samp, UV.xy, 2);
	res += Tex2D.Sample(Samp, UV.xy, int2(3, 4));
	res += Tex2DArray.Sample(Samp, UV.xyz, int2(5, 6));
	res += Tex3D.Sample(Samp, UV.xyz, int3(7, 6, 5));

	// TextureCube does not support offset.

	return res;
}


================================================
FILE: shaders/dxil-builtin/sample.frag
================================================
Texture1D<float2> Tex1D : register(t0, space1);
Texture1DArray<float> Tex1DArray : register(t1, space1);
Texture2D<float2> Tex2D : register(t2, space1);
Texture2DArray<float> Tex2DArray : register(t3, space1);
Texture3D<float2> Tex3D : register(t4, space1);
TextureCube<float2> TexCube : register(t5, space1);
TextureCubeArray<float> TexCubeArray : register(t6, space1);

SamplerState Samp : register(s0);

float2 main(float4 UV : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;

	// Sample without bias.
	res += Tex1D.Sample(Samp, UV.x);
	res += Tex1DArray.Sample(Samp, UV.xy);
	res += Tex2D.Sample(Samp, UV.xy);
	res += Tex2DArray.Sample(Samp, UV.xyz);
	res += Tex3D.Sample(Samp, UV.xyz);
	res += TexCube.Sample(Samp, UV.xyz);
	res += TexCubeArray.Sample(Samp, UV);

	return res;
}


================================================
FILE: shaders/dxil-builtin/saturate.frag
================================================
float main(float a : A) : SV_Target
{
	return saturate(a);
}


================================================
FILE: shaders/dxil-builtin/sin.frag
================================================
float main(float a : A) : SV_Target
{
	return sin(a);
}


================================================
FILE: shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp
================================================
StructuredBuffer<uint> A;
StructuredBuffer<uint> B;
RWStructuredBuffer<uint> C;

StructuredBuffer<half2> D;
StructuredBuffer<half2> E;
RWStructuredBuffer<float> F;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	C[thr] = dot4add_u8packed(A[thr], B[thr], C[thr]);
	C[thr] = dot4add_i8packed(A[thr], B[thr], C[thr]);
	F[thr] = dot2add(half(2.0) * D[thr], half(3.0) * E[thr], F[thr]);
	precise float intermediate = dot2add(half(4.0) * D[thr], half(5.0) * E[thr], F[thr]);
	F[thr] = intermediate;
}


================================================
FILE: shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
================================================
StructuredBuffer<uint> A;
StructuredBuffer<uint> B;
RWStructuredBuffer<uint> C;

StructuredBuffer<half2> D;
StructuredBuffer<half2> E;
RWStructuredBuffer<float> F;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	C[thr] = dot4add_u8packed(A[thr], B[thr], C[thr]);
	C[thr] = dot4add_i8packed(A[thr], B[thr], C[thr]);
	F[thr] = dot2add(half(2.0) * D[thr], half(3.0) * E[thr], F[thr]);
	precise float intermediate = dot2add(half(4.0) * D[thr], half(5.0) * E[thr], F[thr]);
	F[thr] = intermediate;
}


================================================
FILE: shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.mixed-float-dot-product.noglsl.comp
================================================
StructuredBuffer<uint> A;
StructuredBuffer<uint> B;
RWStructuredBuffer<uint> C;

StructuredBuffer<half2> D;
StructuredBuffer<half2> E;
RWStructuredBuffer<float> F;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	C[thr] = dot4add_u8packed(A[thr], B[thr], C[thr]);
	C[thr] = dot4add_i8packed(A[thr], B[thr], C[thr]);
	F[thr] = dot2add(half(2.0) * D[thr], half(3.0) * E[thr], F[thr]);
	precise float intermediate = dot2add(half(4.0) * D[thr], half(5.0) * E[thr], F[thr]);
	F[thr] = intermediate;
}


================================================
FILE: shaders/dxil-builtin/split-double.frag
================================================
uint2 main(float a : A) : SV_Target
{
	uint oa, ob;
	asuint(double(a), oa, ob);
	return uint2(oa, ob);
}


================================================
FILE: shaders/dxil-builtin/sqrt.frag
================================================
float main(float a : A) : SV_Target
{
	return sqrt(a);
}


================================================
FILE: shaders/dxil-builtin/tan.frag
================================================
float main(float a : A) : SV_Target
{
	return tan(a);
}


================================================
FILE: shaders/dxil-builtin/texture-gather-4offset.frag
================================================
Texture2D<float4> Tex2D : register(t3);

SamplerState Samp : register(s1);

float4 main(float4 UV : TEXCOORD, nointerpolation int2 off : OFF, int2 off2 : OFF2) : SV_Target
{
	float4 res = 0.0.xxxx;

	res += Tex2D.GatherRed(Samp, UV.xy, off, off + off2, off - off2, off2);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather-cmp-offset-feedback.frag
================================================
Texture2D<float4> Tex2D : register(t3);
Texture2DArray<float4> Tex2DArray : register(t4);

SamplerComparisonState Samp : register(s1);

float4 main(float4 UV : TEXCOORD) : SV_Target
{
	float4 res = 0.0.xxxx;
	uint feedback;

	res += Tex2D.GatherCmp(Samp, UV.xy, UV.z, int2(-3, -4), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.GatherCmp(Samp, UV.xyz, UV.w, int2(-4, -5), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather-cmp-offset.frag
================================================
Texture2D<float4> Tex2D : register(t3);
Texture2DArray<float4> Tex2DArray : register(t4);

SamplerComparisonState Samp : register(s1);

float4 main(float4 UV : TEXCOORD) : SV_Target
{
	float4 res = 0.0.xxxx;

	res += Tex2D.GatherCmp(Samp, UV.xy, UV.z, int2(-3, -4));
	res += Tex2DArray.GatherCmp(Samp, UV.xyz, UV.w, int2(-4, -5));

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather-cmp.frag
================================================
Texture2D<float4> Tex2D : register(t3);
Texture2DArray<float4> Tex2DArray : register(t4);
TextureCube<float4> TexCube : register(t6);
TextureCubeArray<float4> TexCubeArray : register(t7);

SamplerComparisonState Samp : register(s1);

float4 main(float4 UV : TEXCOORD, nointerpolation int2 off : OFF) : SV_Target
{
	float4 res = 0.0.xxxx;

	res += Tex2D.GatherCmp(Samp, UV.xy, UV.z);
	res += Tex2DArray.GatherCmp(Samp, UV.xyz, UV.w);
	res += TexCube.GatherCmp(Samp, UV.xyz, UV.w);
	res += TexCubeArray.GatherCmp(Samp, UV, UV.w);

	res += Tex2D.GatherCmp(Samp, UV.xy, UV.z, off);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather-offset.frag
================================================
Texture2D<float4> Tex2D : register(t3);
Texture2DArray<float4> Tex2DArray : register(t4);

SamplerState Samp : register(s1);

float4 main(float4 UV : TEXCOORD) : SV_Target
{
	float4 res = 0.0.xxxx;

	res += Tex2D.Gather(Samp, UV.xy, int2(1, 2));
	res += Tex2DArray.GatherGreen(Samp, UV.xyz, int2(-2, -3));

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather-signed-feedback.frag
================================================
Texture2D<int4> Tex2D : register(t3);
Texture2DArray<int4> Tex2DArray : register(t4);
TextureCube<int4> TexCube : register(t6);
TextureCubeArray<int4> TexCubeArray : register(t7);

SamplerState Samp : register(s1);

int4 main(float4 UV : TEXCOORD) : SV_Target
{
	int4 res = int4(0, 0, 0, 0);
	uint feedback;

	res += Tex2D.Gather(Samp, UV.xy, int2(0, 1), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.GatherGreen(Samp, UV.xyz, int2(0, 1), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += TexCube.GatherBlue(Samp, UV.xyz, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += TexCubeArray.GatherAlpha(Samp, UV, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather-signed.frag
================================================
Texture2D<int4> Tex2D : register(t3);
Texture2DArray<int4> Tex2DArray : register(t4);
TextureCube<int4> TexCube : register(t6);
TextureCubeArray<int4> TexCubeArray : register(t7);

SamplerState Samp : register(s1);

int4 main(float4 UV : TEXCOORD) : SV_Target
{
	int4 res = int4(0, 0, 0, 0);

	res += Tex2D.Gather(Samp, UV.xy);
	res += Tex2DArray.GatherGreen(Samp, UV.xyz);
	res += TexCube.GatherBlue(Samp, UV.xyz);
	res += TexCubeArray.GatherAlpha(Samp, UV);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-gather.frag
================================================
Texture2D<float4> Tex2D : register(t3);
Texture2DArray<float4> Tex2DArray : register(t4);
TextureCube<float4> TexCube : register(t6);
TextureCubeArray<float4> TexCubeArray : register(t7);

SamplerState Samp : register(s1);

float4 main(float4 UV : TEXCOORD, nointerpolation int2 off : OFF) : SV_Target
{
	float4 res = 0.0.xxxx;

	res += Tex2D.Gather(Samp, UV.xy);
	res += Tex2DArray.GatherGreen(Samp, UV.xyz);
	res += TexCube.GatherBlue(Samp, UV.xyz);
	res += TexCubeArray.GatherAlpha(Samp, UV);

	res += Tex2D.Gather(Samp, UV.xy, off);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-load-feedback.frag
================================================
Texture1D<float2> Tex1D : register(t1);
Texture1DArray<float2> Tex1DArray : register(t2);
Texture2D<float2> Tex2D : register(t3);
Texture2DArray<float2> Tex2DArray : register(t4);
Texture3D<float2> Tex3D : register(t5);

Texture2DMS<float2> Tex2DMS : register(t6);
Texture2DMSArray<float2> Tex2DMSArray : register(t7);

RWTexture1D<float2> RWTex1D : register(u1);
RWTexture1DArray<float2> RWTex1DArray : register(u2);
RWTexture2D<float2> RWTex2D : register(u3);
RWTexture2DArray<float2> RWTex2DArray : register(u4);
RWTexture3D<float2> RWTex3D : register(u5);

float2 main(nointerpolation uint4 coord : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	uint feedback;

	res += Tex1D.Load(coord.xy, 0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex1DArray.Load(coord.xyz, 0, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2D.Load(coord.xyz, int2(0, 0), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DArray.Load(coord.xyzw, int2(0, 0), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex3D.Load(coord.xyzw, int3(0, 0, 0), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	res += Tex2DMS.Load(coord.xy, coord.z, int2(0, 0), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += Tex2DMSArray.Load(coord.xyz, coord.w, int2(0, 0), feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	res += RWTex1D.Load(coord.x, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += RWTex1DArray.Load(coord.xy, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += RWTex2D.Load(coord.xy, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += RWTex2DArray.Load(coord.xyz, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;
	res += RWTex3D.Load(coord.xyz, feedback);
	res += CheckAccessFullyMapped(feedback) ? 1 : 0;

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-load-offset-dynamic.sm67.frag
================================================
Texture1D<float2> Tex1D : register(t1);
Texture1DArray<float2> Tex1DArray : register(t2);
Texture2D<float2> Tex2D : register(t3);
Texture2DArray<float2> Tex2DArray : register(t4);
Texture3D<float2> Tex3D : register(t5);

Texture2DMS<float2> Tex2DMS : register(t6);
Texture2DMSArray<float2> Tex2DMSArray : register(t7);

cbuffer Offsets : register(b0)
{
	int off;
	int2 off2;
	int3 off3;
};

float2 main(nointerpolation uint4 coord : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	res += Tex1D.Load(coord.xy, off);
	res += Tex1DArray.Load(coord.xyz, off);
	res += Tex2D.Load(coord.xyz, off2);
	res += Tex2DArray.Load(coord.xyzw, off2);
	res += Tex3D.Load(coord.xyzw, off3);

	res += Tex2DMS.Load(coord.xy, coord.z, off2);
	res += Tex2DMSArray.Load(coord.xyz, coord.w, off2);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-load-offset.frag
================================================
Texture1D<float2> Tex1D : register(t1);
Texture1DArray<float2> Tex1DArray : register(t2);
Texture2D<float2> Tex2D : register(t3);
Texture2DArray<float2> Tex2DArray : register(t4);
Texture3D<float2> Tex3D : register(t5);

Texture2DMS<float2> Tex2DMS : register(t6);
Texture2DMSArray<float2> Tex2DMSArray : register(t7);

float2 main(nointerpolation uint4 coord : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	res += Tex1D.Load(coord.xy, 1);
	res += Tex1DArray.Load(coord.xyz, 2);
	res += Tex2D.Load(coord.xyz, int2(3, 4));
	res += Tex2DArray.Load(coord.xyzw, int2(-4, -3));
	res += Tex3D.Load(coord.xyzw, int3(-4, 2, 3));

	res += Tex2DMS.Load(coord.xy, coord.z, int2(2, 3));
	res += Tex2DMSArray.Load(coord.xyz, coord.w, int2(4, 5));

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-load-signed.frag
================================================
Texture1D<int2> Tex1D : register(t1);
Texture1DArray<int2> Tex1DArray : register(t2);
Texture2D<int2> Tex2D : register(t3);
Texture2DArray<int2> Tex2DArray : register(t4);
Texture3D<int2> Tex3D : register(t5);

Texture2DMS<int2> Tex2DMS : register(t6);
Texture2DMSArray<int2> Tex2DMSArray : register(t7);

RWTexture1D<int2> RWTex1D : register(u1);
RWTexture1DArray<int2> RWTex1DArray : register(u2);
RWTexture2D<int2> RWTex2D : register(u3);
RWTexture2DArray<int2> RWTex2DArray : register(u4);
RWTexture3D<int2> RWTex3D : register(u5);

int2 main(nointerpolation uint4 coord : TEXCOORD) : SV_Target
{
	int2 res = int2(0, 0);
	res += Tex1D.Load(coord.xy);
	res += Tex1DArray.Load(coord.xyz);
	res += Tex2D.Load(coord.xyz);
	res += Tex2DArray.Load(coord.xyzw);
	res += Tex3D.Load(coord.xyzw);

	res += Tex2DMS.Load(coord.xy, coord.z);
	res += Tex2DMSArray.Load(coord.xyz, coord.w);

	res += RWTex1D.Load(coord.x);
	res += RWTex1DArray.Load(coord.xy);
	res += RWTex2D.Load(coord.xy);
	res += RWTex2DArray.Load(coord.xyz);
	res += RWTex3D.Load(coord.xyz);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-load.frag
================================================
Texture1D<float2> Tex1D : register(t1);
Texture1DArray<float2> Tex1DArray : register(t2);
Texture2D<float2> Tex2D : register(t3);
Texture2DArray<float2> Tex2DArray : register(t4);
Texture3D<float2> Tex3D : register(t5);

Texture2DMS<float2> Tex2DMS : register(t6);
Texture2DMSArray<float2> Tex2DMSArray : register(t7);

RWTexture1D<float2> RWTex1D : register(u1);
RWTexture1DArray<float2> RWTex1DArray : register(u2);
RWTexture2D<float2> RWTex2D : register(u3);
RWTexture2DArray<float2> RWTex2DArray : register(u4);
RWTexture3D<float2> RWTex3D : register(u5);

float2 main(nointerpolation uint4 coord : TEXCOORD) : SV_Target
{
	float2 res = 0.0.xx;
	res += Tex1D.Load(coord.xy);
	res += Tex1DArray.Load(coord.xyz);
	res += Tex2D.Load(coord.xyz);
	res += Tex2DArray.Load(coord.xyzw);
	res += Tex3D.Load(coord.xyzw);

	res += Tex2DMS.Load(coord.xy, coord.z);
	res += Tex2DMSArray.Load(coord.xyz, coord.w);

	res += RWTex1D.Load(coord.x);
	res += RWTex1DArray.Load(coord.xy);
	res += RWTex2D.Load(coord.xy);
	res += RWTex2DArray.Load(coord.xyz);
	res += RWTex3D.Load(coord.xyz);

	return res;
}


================================================
FILE: shaders/dxil-builtin/texture-store-signed.frag
================================================
RWTexture1D<int2> RWTex1D : register(u1);
globallycoherent RWTexture1DArray<int2> RWTex1DArray : register(u2);
RWTexture2D<int2> RWTex2D : register(u3);
globallycoherent RWTexture2DArray<int2> RWTex2DArray : register(u4);
RWTexture3D<int2> RWTex3D : register(u5);

void main(float3 uv : TEXCOORD)
{
	RWTex1D[int(uv.x)] = int2(1, 2);
	RWTex1DArray[int2(uv.xy)] = int2(3, 4);
	RWTex2D[int2(uv.xy)] = int2(5, 6);
	RWTex2DArray[int3(uv.xyz)] = int2(7, 8);
	RWTex3D[int3(uv.xyz)] = int2(9, -9);
}


================================================
FILE: shaders/dxil-builtin/texture-store.frag
================================================
RWTexture1D<float2> RWTex1D : register(u1);
globallycoherent RWTexture1DArray<float2> RWTex1DArray : register(u2);
RWTexture2D<float2> RWTex2D : register(u3);
globallycoherent RWTexture2DArray<float2> RWTex2DArray : register(u4);
RWTexture3D<float2> RWTex3D : register(u5);

void main(float3 uv : TEXCOORD)
{
	RWTex1D[int(uv.x)] = float2(1, 2);
	RWTex1DArray[int2(uv.xy)] = float2(3, 4);
	RWTex2D[int2(uv.xy)] = float2(5, 6);
	RWTex2DArray[int3(uv.xyz)] = float2(7, 8);
	RWTex3D[int3(uv.xyz)] = float2(9, -9);
}


================================================
FILE: shaders/dxil-builtin/texture2dms-sample-position.frag
================================================
uint index;
Texture2DMS<float4> t;

float4 main() : SV_Target
{
	return float4(t.GetSamplePosition(index), 0, 0);
}


================================================
FILE: shaders/dxil-builtin/thread_id.comp
================================================
RWStructuredBuffer<float> RWBuf : register(u0);

[numthreads(2, 2, 2)]
void main(uint3 ThreadID : SV_DispatchThreadID)
{
	RWBuf[ThreadID.x] = 10.0;
	RWBuf[ThreadID.y] = 20.0;
	RWBuf[ThreadID.z] = 30.0;
}


================================================
FILE: shaders/dxil-builtin/thread_id_in_group.comp
================================================
RWStructuredBuffer<float> RWBuf : register(u0);

[numthreads(2, 2, 2)]
void main(uint3 ThreadID : SV_GroupThreadID)
{
	RWBuf[ThreadID.x] = 10.0;
	RWBuf[ThreadID.y] = 20.0;
	RWBuf[ThreadID.z] = 30.0;
}


================================================
FILE: shaders/dxil-builtin/trace-ray-flags-2.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS : register(t0);
RWTexture2D<float4> IMG : register(u0);

cbuffer Flags : register(b0)
{
	uint flags;
};

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(4, 5, 6);
	ray.TMin = 7.0;
	ray.TMax = 8.0;

	Payload payload0;
	TraceRay(AS, flags, 0, 1, 2, 3, ray, payload0);

	IMG[int2(0, 0)] = payload0.color;
}


================================================
FILE: shaders/dxil-builtin/trace-ray-flags.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS : register(t0);
RWTexture2D<float4> IMG : register(u0);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(4, 5, 6);
	ray.TMin = 7.0;
	ray.TMax = 8.0;

	Payload payload0;
	TraceRay(AS, 17 | 0x100, 0, 1, 2, 3, ray, payload0);

	IMG[int2(0, 0)] = payload0.color;
}


================================================
FILE: shaders/dxil-builtin/trace-ray.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS : register(t0);
RWTexture2D<float4> IMG : register(u0);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(4, 5, 6);
	ray.TMin = 7.0;
	ray.TMax = 8.0;

	Payload payload0;
	TraceRay(AS, 17, 0, 1, 2, 3, ray, payload0);

	IMG[int2(0, 0)] = payload0.color;
}


================================================
FILE: shaders/dxil-builtin/umad.frag
================================================
uint main(nointerpolation uint3 a : A) : SV_Target
{
	return mad(a.x, a.y, a.z);
}


================================================
FILE: shaders/dxil-builtin/umax.frag
================================================
uint main(nointerpolation uint2 a : A) : SV_Target
{
	return max(a.x, a.y);
}


================================================
FILE: shaders/dxil-builtin/umin.frag
================================================
uint main(nointerpolation uint2 a : A) : SV_Target
{
	return min(a.x, a.y);
}


================================================
FILE: shaders/dxil-builtin/vertex-id.vert
================================================
float4 main(uint ID : SV_VertexID) : SV_Position
{
	float res = float(ID);
	return res.xxxx;
}


================================================
FILE: shaders/dxil-builtin/wave-active-all-true.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	bool all_true = WaveActiveAllTrue(index < 100);
	if (all_true)
		Buf.Store(index * 4, 1);
}


================================================
FILE: shaders/dxil-builtin/wave-active-all-true.frag
================================================
RWByteAddressBuffer Buf : register(u0);

void main(uint index : INDEX)
{
	if (index == 40)
		discard;

	bool all_true = WaveActiveAllTrue(index < 100);
	if (all_true)
		Buf.Store(index * 4, 1);
}


================================================
FILE: shaders/dxil-builtin/wave-active-any-true.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	bool any_true = WaveActiveAnyTrue(index < 100);
	if (any_true)
		Buf.Store(index * 4, 1);
}


================================================
FILE: shaders/dxil-builtin/wave-active-any-true.frag
================================================
RWByteAddressBuffer Buf : register(u0);

void main(uint index : INDEX)
{
	if (index == 40)
		discard;

	bool any_true = WaveActiveAnyTrue(index < 100);
	if (any_true)
		Buf.Store(index * 4, 1);
}


================================================
FILE: shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag
================================================
uint4 main(nointerpolation uint index : INDEX) : SV_Target
{
	if (index == 40)
		discard;
	uint4 ballot = WaveActiveBallot(index < 100);
	return ballot;
}


================================================
FILE: shaders/dxil-builtin/wave-active-ballot-discard.frag
================================================
uint4 main(nointerpolation uint index : INDEX) : SV_Target
{
	if (index == 40)
		discard;
	uint4 ballot = WaveActiveBallot(index < 100);
	return ballot;
}


================================================
FILE: shaders/dxil-builtin/wave-active-ballot.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint4 ballot = WaveActiveBallot(index < 100);
	Buf.Store4(index * 16, ballot);
}


================================================
FILE: shaders/dxil-builtin/wave-active-ballot.demote-to-helper.frag
================================================
uint4 main(nointerpolation uint index : INDEX) : SV_Target
{
	uint4 ballot = WaveActiveBallot(index < 100);
	return ballot;
}


================================================
FILE: shaders/dxil-builtin/wave-active-ballot.frag
================================================
uint4 main(nointerpolation uint index : INDEX) : SV_Target
{
	uint4 ballot = WaveActiveBallot(index < 100);
	return ballot;
}


================================================
FILE: shaders/dxil-builtin/wave-active-count-bits.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint value = WaveActiveCountBits(index < 100);
	Buf.Store(index * 4, value);
}


================================================
FILE: shaders/dxil-builtin/wave-active-count-bits.frag
================================================
RWByteAddressBuffer Buf : register(u0);

void main(uint index : INDEX)
{
	if (index == 40)
		discard;

	uint value = WaveActiveCountBits(index < 100);
	Buf.Store(index * 4, value);
}


================================================
FILE: shaders/dxil-builtin/wave-all-equal.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	bool all_equal = WaveActiveAllEqual(index);
	if (all_equal)
		Buf.Store(index * 4, 1);
}


================================================
FILE: shaders/dxil-builtin/wave-all-equal.frag
================================================
RWByteAddressBuffer Buf : register(u0);

void main(uint index : INDEX)
{
	if (index == 40)
		discard;
	bool all_equal = WaveActiveAllEqual(index);
	if (all_equal)
		Buf.Store(index * 4, 1);
}


================================================
FILE: shaders/dxil-builtin/wave-get-lane-count.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main()
{
	Buf.Store(0, WaveGetLaneCount());
}


================================================
FILE: shaders/dxil-builtin/wave-get-lane-index.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main()
{
	Buf.Store(0, WaveGetLaneIndex());
}


================================================
FILE: shaders/dxil-builtin/wave-is-first-lane.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main()
{
	if (WaveIsFirstLane())
		Buf.Store(0, 1);
	else
		Buf.Store(0, 0);
}


================================================
FILE: shaders/dxil-builtin/wave-is-first-lane.frag
================================================
RWByteAddressBuffer Buf : register(u0);

void main(uint thr : THR)
{
	if (thr == 40)
		discard;

	if (WaveIsFirstLane())
		Buf.Store(0, 1);
	else
		Buf.Store(0, 0);
}


================================================
FILE: shaders/dxil-builtin/wave-match.comp
================================================
StructuredBuffer<uint> RO;
RWStructuredBuffer<uint4> RW;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] =
		WaveMatch(RO[thr]) |
		WaveMatch(asfloat(RO[thr])) |
		WaveMatch(bool(RO[thr] != 20));
}


================================================
FILE: shaders/dxil-builtin/wave-match.frag
================================================
StructuredBuffer<uint> RO;
RWStructuredBuffer<uint4> RW;

void main(uint thr : THR)
{
	if (thr == 40)
		discard;
	RW[thr] =
		WaveMatch(RO[thr]) |
		WaveMatch(asfloat(RO[thr])) |
		WaveMatch(bool(RO[thr] != 20));
}


================================================
FILE: shaders/dxil-builtin/wave-match.partitioned.noglsl.comp
================================================
StructuredBuffer<uint> RO;
RWStructuredBuffer<uint4> RW;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] =
		WaveMatch(RO[thr]) |
		WaveMatch(asfloat(RO[thr])) |
		WaveMatch(bool(RO[thr] != 20));
}


================================================
FILE: shaders/dxil-builtin/wave-match.partitioned.noglsl.frag
================================================
StructuredBuffer<uint> RO;
RWStructuredBuffer<uint4> RW;

void main(uint thr : THR)
{
	RW[thr] =
		WaveMatch(RO[thr]) |
		WaveMatch(asfloat(RO[thr])) |
		WaveMatch(bool(RO[thr] != 20));
}


================================================
FILE: shaders/dxil-builtin/wave-multi-prefix-count-bits.comp
================================================
StructuredBuffer<uint> RO;
StructuredBuffer<uint4> ROMask;
RWStructuredBuffer<uint> RW;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = WaveMultiPrefixCountBits(RO[thr] != 10, ROMask[thr]);
}


================================================
FILE: shaders/dxil-builtin/wave-multi-prefix-count-bits.frag
================================================
StructuredBuffer<uint> RO;
StructuredBuffer<uint4> ROMask;
RWStructuredBuffer<uint> RW;

void main(uint thr : THR)
{
	if (thr == 40)
		discard;
	RW[thr] = WaveMultiPrefixCountBits(RO[thr] != 10, ROMask[thr]);
}


================================================
FILE: shaders/dxil-builtin/wave-multi-prefix-op.comp
================================================
StructuredBuffer<uint> RO;
StructuredBuffer<uint4> ROMask;
RWStructuredBuffer<uint> RW;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[7 * thr + 0] = WaveMultiPrefixSum(RO[thr], ROMask[thr]);
	RW[7 * thr + 1] = WaveMultiPrefixSum(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 2] = WaveMultiPrefixProduct(RO[thr], ROMask[thr]);
	RW[7 * thr + 3] = WaveMultiPrefixProduct(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 4] = WaveMultiPrefixBitOr(RO[thr], ROMask[thr]);
	RW[7 * thr + 5] = WaveMultiPrefixBitAnd(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 6] = WaveMultiPrefixBitXor(asfloat(RO[thr]), ROMask[thr]);
}


================================================
FILE: shaders/dxil-builtin/wave-multi-prefix-op.frag
================================================
StructuredBuffer<uint> RO;
StructuredBuffer<uint4> ROMask;
RWStructuredBuffer<uint> RW;

void main(uint thr : THR)
{
	RW[7 * thr + 0] = WaveMultiPrefixSum(RO[thr], ROMask[thr]);
	RW[7 * thr + 1] = WaveMultiPrefixSum(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 2] = WaveMultiPrefixProduct(RO[thr], ROMask[thr]);
	RW[7 * thr + 3] = WaveMultiPrefixProduct(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 4] = WaveMultiPrefixBitOr(RO[thr], ROMask[thr]);
	RW[7 * thr + 5] = WaveMultiPrefixBitAnd(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 6] = WaveMultiPrefixBitXor(asfloat(RO[thr]), ROMask[thr]);
}


================================================
FILE: shaders/dxil-builtin/wave-multi-prefix-op.partitioned.noglsl.comp
================================================
StructuredBuffer<uint> RO;
StructuredBuffer<uint4> ROMask;
RWStructuredBuffer<uint> RW;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[7 * thr + 0] = WaveMultiPrefixSum(RO[thr], ROMask[thr]);
	RW[7 * thr + 1] = WaveMultiPrefixSum(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 2] = WaveMultiPrefixProduct(RO[thr], ROMask[thr]);
	RW[7 * thr + 3] = WaveMultiPrefixProduct(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 4] = WaveMultiPrefixBitOr(RO[thr], ROMask[thr]);
	RW[7 * thr + 5] = WaveMultiPrefixBitAnd(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 6] = WaveMultiPrefixBitXor(asfloat(RO[thr]), ROMask[thr]);
}


================================================
FILE: shaders/dxil-builtin/wave-multi-prefix-op.partitioned.noglsl.frag
================================================
StructuredBuffer<uint> RO;
StructuredBuffer<uint4> ROMask;
RWStructuredBuffer<uint> RW;

void main(uint thr : THR)
{
	RW[7 * thr + 0] = WaveMultiPrefixSum(RO[thr], ROMask[thr]);
	RW[7 * thr + 1] = WaveMultiPrefixSum(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 2] = WaveMultiPrefixProduct(RO[thr], ROMask[thr]);
	RW[7 * thr + 3] = WaveMultiPrefixProduct(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 4] = WaveMultiPrefixBitOr(RO[thr], ROMask[thr]);
	RW[7 * thr + 5] = WaveMultiPrefixBitAnd(asfloat(RO[thr]), ROMask[thr]);
	RW[7 * thr + 6] = WaveMultiPrefixBitXor(asfloat(RO[thr]), ROMask[thr]);
}


================================================
FILE: shaders/dxil-builtin/wave-prefix.comp
================================================
RWByteAddressBuffer BufU : register(u0);
RWByteAddressBuffer BufI : register(u1);
RWByteAddressBuffer BufF : register(u2);

static const uint STRIDE = 8;

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint value;

	value = WavePrefixCountBits(index < 100);
	BufU.Store(index * STRIDE, value);

	// uint
	{
		value = WavePrefixSum(index);
		BufU.Store(index * STRIDE, value);
		value = WavePrefixProduct(index);
		BufU.Store(index * STRIDE + 4, value);
	}

	// int
	{
		int iindex = index;
		value = WavePrefixSum(iindex);
		BufI.Store(index * STRIDE, value);
		value = WavePrefixProduct(iindex);
		BufI.Store(index * STRIDE + 4, value);
	}

	// float
	{
		float findex = index;
		value = WavePrefixSum(findex);
		BufF.Store(index * STRIDE, value);
		value = WavePrefixProduct(findex);
		BufF.Store(index * STRIDE + 4, value);
	}
}


================================================
FILE: shaders/dxil-builtin/wave-prefix.frag
================================================
RWByteAddressBuffer BufU : register(u0);
RWByteAddressBuffer BufI : register(u1);
RWByteAddressBuffer BufF : register(u2);

static const uint STRIDE = 8;

void main(uint index : INDEX)
{
	if (index == 40)
		discard;
	uint value;

	value = WavePrefixCountBits(index < 100);
	BufU.Store(index * STRIDE, value);

	// uint
	{
		value = WavePrefixSum(index);
		BufU.Store(index * STRIDE, value);
		value = WavePrefixProduct(index);
		BufU.Store(index * STRIDE + 4, value);
	}

	// int
	{
		int iindex = index;
		value = WavePrefixSum(iindex);
		BufI.Store(index * STRIDE, value);
		value = WavePrefixProduct(iindex);
		BufI.Store(index * STRIDE + 4, value);
	}

	// float
	{
		float findex = index;
		value = WavePrefixSum(findex);
		BufF.Store(index * STRIDE, value);
		value = WavePrefixProduct(findex);
		BufF.Store(index * STRIDE + 4, value);
	}
}


================================================
FILE: shaders/dxil-builtin/wave-read-lane-at-optimizations.comp
================================================
RWStructuredBuffer<float> F : register(u0);

uint get_quad_shuffle_add(uint index)
{
	return (WaveGetLaneIndex() & ~3) + index;
}

uint get_quad_shuffle_or(uint index)
{
	return (WaveGetLaneIndex() & ~3) | index;
}

[numthreads(64, 1, 1)]
void main(uint2 thr : SV_DispatchThreadID)
{
	float v = F[thr.x];
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_add(0));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_add(1));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_add(2));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_add(3));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_add(4));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_or(0));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_or(1));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_or(2));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_or(3));
	F[thr.x] += WaveReadLaneAt(v, get_quad_shuffle_or(4));
	F[thr.x] += WaveReadLaneAt(v, WaveGetLaneIndex() & ~3);
	F[thr.x] += WaveReadLaneAt(v, WaveGetLaneIndex() | 3);
}


================================================
FILE: shaders/dxil-builtin/wave-read-lane-at.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint3 first_value = WaveReadLaneAt(index, index.x & (WaveGetLaneCount() - 1));
	first_value += WaveReadLaneAt(index, 11);
	first_value += WaveReadLaneAt(index, WaveGetLaneIndex() ^ 1);
	first_value += WaveReadLaneAt(index, WaveGetLaneIndex() ^ 2);
	first_value += WaveReadLaneAt(index, WaveGetLaneIndex() ^ 3);
	first_value += WaveReadLaneAt(index, WaveGetLaneIndex() ^ 4);
	first_value += WaveReadLaneAt(index, WaveGetLaneIndex() ^ 8);
	first_value += WaveReadLaneAt(index, 16 ^ WaveGetLaneIndex());
	first_value += WaveReadLaneAt(index, 32 ^ WaveGetLaneIndex());

	Buf.Store3(index.x * 12, first_value);
}


================================================
FILE: shaders/dxil-builtin/wave-read-lane-first.comp
================================================
RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint3 index : SV_DispatchThreadID)
{
	uint3 first_value = WaveReadLaneFirst(index);
	Buf.Store3(index.x * 12, first_value);
}


================================================
FILE: shaders/dxil-builtin/wave-read-lane-first.frag
================================================
RWByteAddressBuffer Buf : register(u0);

void main(uint3 index : INDEX)
{
	if (index.x == 40)
		discard;
	uint3 first_value = WaveReadLaneFirst(index);
	Buf.Store3(index.x * 12, first_value);
}


================================================
FILE: shaders/dxil-builtin/wave-reduce-helpers.sm67.frag
================================================
RWByteAddressBuffer BufU : register(u0);
RWByteAddressBuffer BufI : register(u1);
RWByteAddressBuffer BufF : register(u2);

static const uint STRIDE = 28;

[WaveOpsIncludeHelperLanes]
void main(uint index : INDEX)
{
	if (index == 40)
		discard;
	uint value;

	// uint
	{
		value = WaveActiveSum(index);
		BufU.Store(index * STRIDE, value);
		value = WaveActiveProduct(index);
		BufU.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(index);
		BufU.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(index);
		BufU.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(index);
		BufU.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(index);
		BufU.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(index);
		BufU.Store(index * STRIDE + 24, value);
	}

	// int
	{
		int iindex = index;
		value = WaveActiveSum(iindex);
		BufI.Store(index * STRIDE, value);
		value = WaveActiveProduct(iindex);
		BufI.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(uint(iindex));
		BufI.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(uint(iindex));
		BufI.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(uint(iindex));
		BufI.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(iindex);
		BufI.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(iindex);
		BufI.Store(index * STRIDE + 24, value);
	}

	// float
	{
		float findex = index;
		value = WaveActiveSum(findex);
		BufF.Store(index * STRIDE, value);
		value = WaveActiveProduct(findex);
		BufF.Store(index * STRIDE + 4, value);
		value = WaveActiveMin(findex);
		BufF.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(findex);
		BufF.Store(index * STRIDE + 24, value);
	}
}


================================================
FILE: shaders/dxil-builtin/wave-reduce-helpers.sm67.quad-maximal-reconvergence.frag
================================================
RWByteAddressBuffer BufU : register(u0);
RWByteAddressBuffer BufI : register(u1);
RWByteAddressBuffer BufF : register(u2);

static const uint STRIDE = 28;

[WaveOpsIncludeHelperLanes]
void main(uint index : INDEX)
{
	if (index == 40)
		discard;
	uint value;

	// uint
	{
		value = WaveActiveSum(index);
		BufU.Store(index * STRIDE, value);
		value = WaveActiveProduct(index);
		BufU.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(index);
		BufU.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(index);
		BufU.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(index);
		BufU.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(index);
		BufU.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(index);
		BufU.Store(index * STRIDE + 24, value);
	}

	// int
	{
		int iindex = index;
		value = WaveActiveSum(iindex);
		BufI.Store(index * STRIDE, value);
		value = WaveActiveProduct(iindex);
		BufI.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(uint(iindex));
		BufI.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(uint(iindex));
		BufI.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(uint(iindex));
		BufI.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(iindex);
		BufI.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(iindex);
		BufI.Store(index * STRIDE + 24, value);
	}

	// float
	{
		float findex = index;
		value = WaveActiveSum(findex);
		BufF.Store(index * STRIDE, value);
		value = WaveActiveProduct(findex);
		BufF.Store(index * STRIDE + 4, value);
		value = WaveActiveMin(findex);
		BufF.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(findex);
		BufF.Store(index * STRIDE + 24, value);
	}
}


================================================
FILE: shaders/dxil-builtin/wave-reduce.comp
================================================
RWByteAddressBuffer BufU : register(u0);
RWByteAddressBuffer BufI : register(u1);
RWByteAddressBuffer BufF : register(u2);

static const uint STRIDE = 28;

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint value;

	// uint
	{
		value = WaveActiveSum(index);
		BufU.Store(index * STRIDE, value);
		value = WaveActiveProduct(index);
		BufU.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(index);
		BufU.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(index);
		BufU.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(index);
		BufU.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(index);
		BufU.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(index);
		BufU.Store(index * STRIDE + 24, value);
	}

	// int
	{
		int iindex = index;
		value = WaveActiveSum(iindex);
		BufI.Store(index * STRIDE, value);
		value = WaveActiveProduct(iindex);
		BufI.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(uint(iindex));
		BufI.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(uint(iindex));
		BufI.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(uint(iindex));
		BufI.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(iindex);
		BufI.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(iindex);
		BufI.Store(index * STRIDE + 24, value);
	}

	// float
	{
		float findex = index;
		value = WaveActiveSum(findex);
		BufF.Store(index * STRIDE, value);
		value = WaveActiveProduct(findex);
		BufF.Store(index * STRIDE + 4, value);
		value = WaveActiveMin(findex);
		BufF.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(findex);
		BufF.Store(index * STRIDE + 24, value);
	}
}


================================================
FILE: shaders/dxil-builtin/wave-reduce.frag
================================================
RWByteAddressBuffer BufU : register(u0);
RWByteAddressBuffer BufI : register(u1);
RWByteAddressBuffer BufF : register(u2);

static const uint STRIDE = 28;

void main(uint index : INDEX)
{
	if (index == 40)
		discard;
	uint value;

	// uint
	{
		value = WaveActiveSum(index);
		BufU.Store(index * STRIDE, value);
		value = WaveActiveProduct(index);
		BufU.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(index);
		BufU.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(index);
		BufU.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(index);
		BufU.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(index);
		BufU.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(index);
		BufU.Store(index * STRIDE + 24, value);
	}

	// int
	{
		int iindex = index;
		value = WaveActiveSum(iindex);
		BufI.Store(index * STRIDE, value);
		value = WaveActiveProduct(iindex);
		BufI.Store(index * STRIDE + 4, value);
		value = WaveActiveBitAnd(uint(iindex));
		BufI.Store(index * STRIDE + 8, value);
		value = WaveActiveBitOr(uint(iindex));
		BufI.Store(index * STRIDE + 12, value);
		value = WaveActiveBitXor(uint(iindex));
		BufI.Store(index * STRIDE + 16, value);
		value = WaveActiveMin(iindex);
		BufI.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(iindex);
		BufI.Store(index * STRIDE + 24, value);
	}

	// float
	{
		float findex = index;
		value = WaveActiveSum(findex);
		BufF.Store(index * STRIDE, value);
		value = WaveActiveProduct(findex);
		BufF.Store(index * STRIDE + 4, value);
		value = WaveActiveMin(findex);
		BufF.Store(index * STRIDE + 20, value);
		value = WaveActiveMax(findex);
		BufF.Store(index * STRIDE + 24, value);
	}
}


================================================
FILE: shaders/dxil-builtin/wave-size.sm66.comp
================================================
RWStructuredBuffer<float> RW : register(u0);

[WaveSize(128)]
[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = 10.0;
}


================================================
FILE: shaders/dxil-builtin/world-ray-direction.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = WorldRayDirection();
}


================================================
FILE: shaders/dxil-builtin/world-ray-origin.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = WorldRayOrigin();
}


================================================
FILE: shaders/dxil-builtin/world-to-object-3x4.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = mul(WorldToObject3x4(), float4(payload.orig, 1.0));
}


================================================
FILE: shaders/dxil-builtin/world-to-object-4x3.rany
================================================
struct Payload
{
	float3 orig;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.orig = mul(float4(payload.orig, 1.0), WorldToObject4x3());
}


================================================
FILE: shaders/fp16/saturate.frag
================================================
half4 main(half4 v : V) : SV_Target
{
	return saturate(v);
}


================================================
FILE: shaders/fp16/saturate.sm60.frag
================================================
min16float4 main(min16float4 v : V) : SV_Target
{
	return saturate(v);
}


================================================
FILE: shaders/fp16/saturate.sm60.native-fp16.frag
================================================
min16float4 main(min16float4 v : V) : SV_Target
{
	return saturate(v);
}


================================================
FILE: shaders/heap-robustness/misc.bindless.heap-raw-va-cbv.sm66.ssbo.comp
================================================
Texture2D<float4> Tex[] : register(t0, space0);
SamplerState S[] : register(s0, space0);
RWTexture2D<float4> RWTex[] : register(u0, space0);
RWBuffer<float4> RWBuf[] : register(u0, space1);
RWStructuredBuffer<float4> RWStruct[] : register(u0, space2);

struct C { float4 v; };
ConstantBuffer<C> Cbufs[] : register(b0, space0);

RWStructuredBuffer<float4> Blah : register(u100, space3);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	Blah[thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWTex[NonUniformResourceIndex(thr)][int2(thr, 0)] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWBuf[NonUniformResourceIndex(thr)][thr] = Cbufs[NonUniformResourceIndex(thr)].v;
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)][thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)].IncrementCounter();
	GroupMemoryBarrierWithGroupSync();

	Texture2D<float4> HeapTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr)];
	SamplerState HeapS = SamplerDescriptorHeap[NonUniformResourceIndex(thr + 100)];
	RWTexture2D<float4> HeapRWTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 200)];
	RWBuffer<float4> HeapRWBuf = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 300)];
	RWStructuredBuffer<float4> HeapRWStruct = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 400)];
	ConstantBuffer<C> HeapCbufs = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 500)];

	Blah[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWTex[int2(thr, 0)] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWBuf[thr] = HeapCbufs.v;
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct.IncrementCounter();
	GroupMemoryBarrierWithGroupSync();
}


================================================
FILE: shaders/heap-robustness/misc.bindless.heap-robustness.heap-robustness-cbv.sm66.ssbo.comp
================================================
Texture2D<float4> Tex[] : register(t0, space0);
SamplerState S[] : register(s0, space0);
RWTexture2D<float4> RWTex[] : register(u0, space0);
RWBuffer<float4> RWBuf[] : register(u0, space1);
RWStructuredBuffer<float4> RWStruct[] : register(u0, space2);

struct C { float4 v; };
ConstantBuffer<C> Cbufs[] : register(b0, space0);

RWStructuredBuffer<float4> Blah : register(u100, space3);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	Blah[thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWTex[NonUniformResourceIndex(thr)][int2(thr, 0)] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWBuf[NonUniformResourceIndex(thr)][thr] = Cbufs[NonUniformResourceIndex(thr)].v;
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)][thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)].IncrementCounter();
	GroupMemoryBarrierWithGroupSync();

	Texture2D<float4> HeapTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr)];
	SamplerState HeapS = SamplerDescriptorHeap[NonUniformResourceIndex(thr + 100)];
	RWTexture2D<float4> HeapRWTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 200)];
	RWBuffer<float4> HeapRWBuf = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 300)];
	RWStructuredBuffer<float4> HeapRWStruct = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 400)];
	ConstantBuffer<C> HeapCbufs = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 500)];

	Blah[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWTex[int2(thr, 0)] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWBuf[thr] = HeapCbufs.v;
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct.IncrementCounter();
	GroupMemoryBarrierWithGroupSync();
}


================================================
FILE: shaders/heap-robustness/misc.bindless.heap-robustness.sm66.ssbo.comp
================================================
Texture2D<float4> Tex[] : register(t0, space0);
SamplerState S[] : register(s0, space0);
RWTexture2D<float4> RWTex[] : register(u0, space0);
RWBuffer<float4> RWBuf[] : register(u0, space1);
RWStructuredBuffer<float4> RWStruct[] : register(u0, space2);

struct C { float4 v; };
ConstantBuffer<C> Cbufs[] : register(b0, space0);

RWStructuredBuffer<float4> Blah : register(u100, space3);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	Blah[thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWTex[NonUniformResourceIndex(thr)][int2(thr, 0)] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWBuf[NonUniformResourceIndex(thr)][thr] = Cbufs[NonUniformResourceIndex(thr)].v;
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)][thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)].IncrementCounter();
	GroupMemoryBarrierWithGroupSync();

	Texture2D<float4> HeapTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr)];
	SamplerState HeapS = SamplerDescriptorHeap[NonUniformResourceIndex(thr + 100)];
	RWTexture2D<float4> HeapRWTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 200)];
	RWBuffer<float4> HeapRWBuf = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 300)];
	RWStructuredBuffer<float4> HeapRWStruct = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 400)];
	ConstantBuffer<C> HeapCbufs = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 500)];

	Blah[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWTex[int2(thr, 0)] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWBuf[thr] = HeapCbufs.v;
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct.IncrementCounter();
	GroupMemoryBarrierWithGroupSync();
}


================================================
FILE: shaders/heap-robustness/misc.bindless.sm66.ssbo.comp
================================================
Texture2D<float4> Tex[] : register(t0, space0);
SamplerState S[] : register(s0, space0);
RWTexture2D<float4> RWTex[] : register(u0, space0);
RWBuffer<float4> RWBuf[] : register(u0, space1);
RWStructuredBuffer<float4> RWStruct[] : register(u0, space2);

struct C { float4 v; };
ConstantBuffer<C> Cbufs[] : register(b0, space0);

RWStructuredBuffer<float4> Blah : register(u100, space3);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	Blah[thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWTex[NonUniformResourceIndex(thr)][int2(thr, 0)] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWBuf[NonUniformResourceIndex(thr)][thr] = Cbufs[NonUniformResourceIndex(thr)].v;
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)][thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)].IncrementCounter();
	GroupMemoryBarrierWithGroupSync();

	Texture2D<float4> HeapTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr)];
	SamplerState HeapS = SamplerDescriptorHeap[NonUniformResourceIndex(thr + 100)];
	RWTexture2D<float4> HeapRWTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 200)];
	RWBuffer<float4> HeapRWBuf = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 300)];
	RWStructuredBuffer<float4> HeapRWStruct = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 400)];
	ConstantBuffer<C> HeapCbufs = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 500)];

	Blah[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWTex[int2(thr, 0)] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWBuf[thr] = HeapCbufs.v;
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct.IncrementCounter();
	GroupMemoryBarrierWithGroupSync();
}


================================================
FILE: shaders/heap-robustness/misc.heap-robustness.bindless.heap-robustness-cbv.heap-raw-va-cbv.sm66.ssbo.comp
================================================
Texture2D<float4> Tex[] : register(t0, space0);
SamplerState S[] : register(s0, space0);
RWTexture2D<float4> RWTex[] : register(u0, space0);
RWBuffer<float4> RWBuf[] : register(u0, space1);
RWStructuredBuffer<float4> RWStruct[] : register(u0, space2);

struct C { float4 v; };
ConstantBuffer<C> Cbufs[] : register(b0, space0);

RWStructuredBuffer<float4> Blah : register(u100, space3);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	Blah[thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWTex[NonUniformResourceIndex(thr)][int2(thr, 0)] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWBuf[NonUniformResourceIndex(thr)][thr] = Cbufs[NonUniformResourceIndex(thr)].v;
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)][thr] = Tex[NonUniformResourceIndex(thr)].SampleLevel(S[NonUniformResourceIndex(thr)], 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	RWStruct[NonUniformResourceIndex(thr)].IncrementCounter();
	GroupMemoryBarrierWithGroupSync();

	Texture2D<float4> HeapTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr)];
	SamplerState HeapS = SamplerDescriptorHeap[NonUniformResourceIndex(thr + 100)];
	RWTexture2D<float4> HeapRWTex = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 200)];
	RWBuffer<float4> HeapRWBuf = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 300)];
	RWStructuredBuffer<float4> HeapRWStruct = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 400)];
	ConstantBuffer<C> HeapCbufs = ResourceDescriptorHeap[NonUniformResourceIndex(thr + 500)];

	Blah[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWTex[int2(thr, 0)] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWBuf[thr] = HeapCbufs.v;
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct[thr] = HeapTex.SampleLevel(HeapS, 0.5.xx, 0.0);
	GroupMemoryBarrierWithGroupSync();
	HeapRWStruct.IncrementCounter();
	GroupMemoryBarrierWithGroupSync();
}


================================================
FILE: shaders/instrumentation/atomics-raw.bindless.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<uint> R;
RWByteAddressBuffer BAB;
RWBuffer<uint> T;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint o;
	BAB.InterlockedAdd(4 * thr, 1, o);
	BAB.InterlockedCompareExchange(4 * thr, 5, 6, o);
}


================================================
FILE: shaders/instrumentation/atomics-raw.root-descriptor.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<uint> R;
RWByteAddressBuffer BAB;
RWBuffer<uint> T;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint o;
	BAB.InterlockedAdd(4 * thr, 1, o);
	BAB.InterlockedCompareExchange(4 * thr, 5, 6, o);
}


================================================
FILE: shaders/instrumentation/atomics-structured-counter.bindless.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<uint> R;
RWByteAddressBuffer BAB;
RWBuffer<uint> T;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint o;
	InterlockedAdd(R[thr], 1, o);
	InterlockedCompareExchange(R[thr], 1, 2, o);
	R.IncrementCounter(); // Should block validation for the UAV for now.
}


================================================
FILE: shaders/instrumentation/atomics-structured.bindless.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<uint> R;
RWByteAddressBuffer BAB;
RWBuffer<uint> T;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint o;
	InterlockedAdd(R[thr], 1, o);
	InterlockedCompareExchange(R[thr], 1, 2, o);
}


================================================
FILE: shaders/instrumentation/atomics-structured.root-descriptor.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<uint> R;
RWByteAddressBuffer BAB;
RWBuffer<uint> T;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint o;
	InterlockedAdd(R[thr], 1, o);
	InterlockedCompareExchange(R[thr], 1, 2, o);
}


================================================
FILE: shaders/instrumentation/atomics-typed.bindless.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<uint> R;
RWByteAddressBuffer BAB;
RWBuffer<uint> T;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint o;
	InterlockedAdd(T[thr], 1, o);
	InterlockedCompareExchange(T[thr], 1, 2, o);
}


================================================
FILE: shaders/instrumentation/cbv.bindless.bda-instrumentation.comp
================================================
RWStructuredBuffer<float> R;

struct Foo { float3 vs[64]; };
ConstantBuffer<Foo> C[] : register(b0);

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = C[thr].vs[thr / 16];
	R[thr] += 40.0 + loaded.x + loaded.z;
}


================================================
FILE: shaders/instrumentation/cbv.root-descriptor.bda-instrumentation.comp
================================================
RWStructuredBuffer<float> R;

struct Foo { float3 vs[64]; };
ConstantBuffer<Foo> C : register(b0);

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = C.vs[thr / 16];
	R[thr] += 40.0 + loaded.x + loaded.z;
}


================================================
FILE: shaders/instrumentation/raw.bindless.bda-instrumentation.ssbo.comp
================================================
RWByteAddressBuffer R;
ByteAddressBuffer RO;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = RO.Load<float3>(12 * thr);
	R.Store<float>(4 * thr, R.Load<float>(4 * thr) + 40.0 + loaded.x + loaded.z);
}


================================================
FILE: shaders/instrumentation/raw.root-descriptor.bda-instrumentation.ssbo.comp
================================================
RWByteAddressBuffer R;
ByteAddressBuffer RO;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = RO.Load<float3>(8 * thr);
	R.Store<float>(4 * thr, R.Load<float>(4 * thr) + 40.0 + loaded.x + loaded.z);
}


================================================
FILE: shaders/instrumentation/structured.bindless.bda-instrumentation.comp
================================================
RWStructuredBuffer<float> R;
StructuredBuffer<float3> RO;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = RO[thr];
	R[thr] += 40.0 + loaded.x + loaded.z;
}


================================================
FILE: shaders/instrumentation/structured.bindless.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<float> R;
StructuredBuffer<float3> RO;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = RO[thr];
	R[thr] += 40.0 + loaded.x + loaded.z;
}


================================================
FILE: shaders/instrumentation/structured.root-descriptor.bda-instrumentation.ssbo.comp
================================================
RWStructuredBuffer<float> R;
StructuredBuffer<float3> RO;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = RO[thr];
	R[thr] += 40.0 + loaded.x + loaded.z;
}


================================================
FILE: shaders/instrumentation/typed.bindless.bda-instrumentation.comp
================================================
RWBuffer<float> R;
Buffer<float3> RO;

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float3 loaded = RO[thr];
	R[thr] += 40.0 + loaded.x + loaded.z;
}


================================================
FILE: shaders/llvm-builtin/alloca-robustness-cases.extended-robustness.vert
================================================
static const float LUT[4] = { 1, 2, 3, 4 };
static float NonLUT[4] = { 5, 6, 7, 8 };

float4 main(uint vid : SV_VertexID, uint iid : SV_InstanceID, uint4 a : A) : SV_Position
{
	float arr[4] = (float[4])0;

	arr[a.x] = vid;
	NonLUT[a.y] = iid;

	for (uint i = 0; i < a.w; i++, arr[i] += 50)
	{
		NonLUT[i ^ 2] += 60;
	}

	return float4(LUT[a.z], NonLUT[a.w], arr[a.x], 1.0); 
}


================================================
FILE: shaders/llvm-builtin/alloca.frag
================================================
uint main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	uint array[4] = { 1, 2, 3, 4 };
	array[index & 3] = index;
	return array[0] + array[1] + array[2] + array[3];
}


================================================
FILE: shaders/llvm-builtin/atomic-bin-op.comp
================================================
groupshared uint array[64];
groupshared int array_signed[64];

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint res = 0;
	uint output;
	int output_signed;

	InterlockedAdd(array[index], 1, output);
	res += output;

	InterlockedAnd(array[index], 2, output);
	res += output;

	InterlockedExchange(array[index], 3, output);
	res += output;

	InterlockedMax(array[index], 4, output);
	res += output;

	InterlockedMin(array[index], 5, output);
	res += output;

	InterlockedOr(array[index], 6, output);
	res += output;

	InterlockedXor(array[index], 7, output);
	res += output;

	InterlockedMin(array_signed[index], 8, output_signed);
	res += output_signed;

	InterlockedMax(array_signed[index], 9, output_signed);
	res += output_signed;
}


================================================
FILE: shaders/llvm-builtin/atomic-compare-exchange.comp
================================================
groupshared uint data[64];

RWByteAddressBuffer Buf : register(u0);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	uint res = 0;
	uint compare_value = 20;
	uint value = 30;
	uint output;

	InterlockedCompareExchange(data[index], compare_value, value, output);
	res += output;
	Buf.Store(index * 4, res);
}


================================================
FILE: shaders/llvm-builtin/atomic-compare-exchange.sm66.ssbo.comp
================================================
groupshared uint data[64];
groupshared uint64_t data64[64];

RWByteAddressBuffer Buf : register(u0);
RWByteAddressBuffer Buf64 : register(u1);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	{
		uint res = 0;
		uint compare_value = 20;
		uint value = 30;
		uint output;

		InterlockedCompareExchange(data[index], compare_value, value, output);
		res += output;
		InterlockedCompareExchange(data[index], compare_value, value, output);
		Buf.Store(index * 4, res);
	}

	{
		uint64_t res = 0;
		uint64_t compare_value = 20;
		uint64_t value = 30;
		uint64_t output;

		InterlockedCompareExchange(data64[index], compare_value, value, output);
		res += output;
		InterlockedCompareExchange(data64[index], compare_value, value, output);
		Buf64.Store<uint64_t>(index * 8, res);
	}
}


================================================
FILE: shaders/llvm-builtin/bool-to-fp.frag
================================================
float main(bool v : V) : SV_Target
{
	return float(v);
}


================================================
FILE: shaders/llvm-builtin/constant-expression-cast.comp
================================================
groupshared int foo[4];
RWStructuredBuffer<float> RW : register(u0);

[numthreads(4, 1, 1)]
void main()
{
    RW[0] = ((float[4])(foo))[2];
}


================================================
FILE: shaders/llvm-builtin/constant-expression-gep.comp
================================================
groupshared uint foo[4];
RWStructuredBuffer<uint> RW : register(u0);

[numthreads(4, 1, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	foo[0] = thr.x;
	foo[1] = thr.y;
	foo[2] = thr.z;
	foo[3] = 10;

	uint v;
	GroupMemoryBarrierWithGroupSync();
	InterlockedAdd(foo[3], 10, v);
	InterlockedCompareExchange(foo[2], 10, v, v);
	
	RW[thr.x] = foo[thr.y & 3];
	RW[thr.z] = foo[1] + v;
}


================================================
FILE: shaders/llvm-builtin/fadd.frag
================================================
float4 main(float4 a : A, float4 b : B) : SV_Target
{
	return a + b;
}


================================================
FILE: shaders/llvm-builtin/fast-mul-div-pair.comp
================================================
RWStructuredBuffer<float> Results : register(u0);
StructuredBuffer<float> Inputs : register(t0);

[numthreads(1, 1, 1)]
void main()
{
	float a = Inputs[0];
	float b = Inputs[1];

	Results[0] = (Inputs[2] * b) / b; // Should opt
	Results[1] = (b * Inputs[3]) / b; // Should opt
	Results[2] = b / (Inputs[4] * b); // Should not opt
	Results[3] = b / (b * Inputs[5]); // Should not opt

	Results[4] = (Inputs[6] / b) * b; // Should opt
	Results[5] = b * (Inputs[7] / b); // Should opt
	Results[6] = (b / Inputs[8]) * b; // Should not opt
	Results[7] = b * (b / Inputs[9]); // Should not opt
}


================================================
FILE: shaders/llvm-builtin/fcmp_eq.frag
================================================
float main(float4 a : A) : SV_Target
{
	return a.x == a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/fcmp_ge.frag
================================================
float main(float4 a : A) : SV_Target
{
	return a.x >= a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/fcmp_gt.frag
================================================
float main(float4 a : A) : SV_Target
{
	return a.x > a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/fcmp_le.frag
================================================
float main(float4 a : A) : SV_Target
{
	return a.x <= a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/fcmp_lt.frag
================================================
float main(float4 a : A) : SV_Target
{
	return a.x < a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/fcmp_ne.frag
================================================
float main(float4 a : A) : SV_Target
{
	return a.x != a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/fdiv.frag
================================================
float4 main(float4 a : A, float4 b : B) : SV_Target
{
	return a / b;
}


================================================
FILE: shaders/llvm-builtin/fmul.frag
================================================
float4 main(float4 a : A, float4 b : B) : SV_Target
{
	return a * b;
}


================================================
FILE: shaders/llvm-builtin/frem.frag
================================================
float4 main(float4 a : A, float4 b : B) : SV_Target
{
	return a % b;
}


================================================
FILE: shaders/llvm-builtin/fsub.frag
================================================
float4 main(float4 a : A, float4 b : B) : SV_Target
{
	return a - b;
}


================================================
FILE: shaders/llvm-builtin/glitched-integer-width.comp
================================================
cbuffer buffer1 : register (b0) 
{ 
    uint cbSwitch;
}; 

struct struct1 
{ 
    float4 value1; 
}; 

cbuffer buffer2 : register (b1) 
{ 
    struct1 g_struct1; 
} 

RWTexture2DArray<float3> out1 : register(u0); 

[numthreads(8, 8, 1)] 
void main() 
{ 
    const float3 variable1 = float3(1.0f, 1.0f, 1.0f);
    
    bool variable3 = true; 
    switch (cbSwitch) 
    { 
	case 1:
	case 2:
	case 3:
	case 7:
	case 10:
	case 11:
        variable3 = false; 
        break; 
    } 
    
    if (variable3) 
    {
        out1[variable1] = float3(0, 0, 0);
    } 
    else
    {
        out1[variable1] = float3(1, 1, 1);
    }
}


================================================
FILE: shaders/llvm-builtin/groupshared.comp
================================================
struct Foo
{
	float2 a[2];
	float2 b;
};

groupshared Foo foos[64];

RWStructuredBuffer<float2> outputs : register(u0);

[numthreads(64, 1, 1)]
void main(uint ThreadID : SV_GroupIndex)
{
	foos[ThreadID].a[0] = float2(ThreadID, ThreadID + 1);
	foos[ThreadID].a[1] = float2(ThreadID + 1, ThreadID + 2);
	foos[ThreadID].b = float2(ThreadID + 2, ThreadID + 3);
	GroupMemoryBarrierWithGroupSync();

	outputs[ThreadID] = foos[ThreadID ^ 1].a[0] * foos[ThreadID ^ 2].a[1] * foos[ThreadID ^ 4].b;
}


================================================
FILE: shaders/llvm-builtin/icmp_eq.frag
================================================
uint main(nointerpolation uint4 a : A) : SV_Target
{
	return a.x == a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_ne.frag
================================================
uint main(nointerpolation uint4 a : A) : SV_Target
{
	return a.x != a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_sge.frag
================================================
int main(nointerpolation int4 a : A) : SV_Target
{
	return a.x >= a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_sgt.frag
================================================
int main(nointerpolation int4 a : A) : SV_Target
{
	return a.x > a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_sle.frag
================================================
int main(nointerpolation int4 a : A) : SV_Target
{
	return a.x <= a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_slt.frag
================================================
int main(nointerpolation int4 a : A) : SV_Target
{
	return a.x < a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_uge.frag
================================================
uint main(nointerpolation uint4 a : A) : SV_Target
{
	return a.x >= a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_ugt.frag
================================================
uint main(nointerpolation uint4 a : A) : SV_Target
{
	return a.x > a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_ule.frag
================================================
uint main(nointerpolation uint4 a : A) : SV_Target
{
	return a.x <= a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/icmp_ult.frag
================================================
uint main(nointerpolation uint4 a : A) : SV_Target
{
	return a.x < a.y ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/logical-and.frag
================================================
float main(float4 a : A) : SV_Target
{
	return isnan(a.x) && isnan(a.y) ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/logical-equal.frag
================================================
float main(float4 a : A) : SV_Target
{
	return isnan(a.x) == isnan(a.y) ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/logical-not-equal.frag
================================================
float main(float4 a : A) : SV_Target
{
	return isnan(a.x) != isnan(a.y) ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/logical-or.frag
================================================
float main(float4 a : A) : SV_Target
{
	return isnan(a.x) || isnan(a.y) ? a.z : a.w;
}


================================================
FILE: shaders/llvm-builtin/lut.frag
================================================
static const int2 values[8] = {
	int2(1, -1),
	int2(2, -2),
	int2(3, -3),
	int2(5, -5),
	int2(7, -7),
	int2(11, -11),
	int2(13, -13),
	int2(17, -17)
};

uint2 main(nointerpolation uint index : TEXCOORD) : SV_Target
{
	return values[index];
}


================================================
FILE: shaders/llvm-builtin/min16-phi.sm60.comp
================================================
RWStructuredBuffer<min16float4> B;
Texture2D<min16float4> T0;
Texture2D<min16float4> T1;
Texture2D<min16float4> T2;

[numthreads(64, 1, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	min16float4 v;
	if (thr.x < 20)
		v = T0.Load(int3(thr));
	else if (thr.y < 40)
		v = T1.Load(int3(thr));
	else
		v = T2.Load(int3(thr));

	B[thr.x] = v;
}


================================================
FILE: shaders/llvm-builtin/precise_math.frag
================================================
precise float main(float a : A, float b : B, float c : C) : SV_Target
{
	precise float tmp;
	tmp = a * b + c;
	tmp += a;
	tmp -= b;
	tmp *= c;
	return tmp;
}


================================================
FILE: shaders/llvm-builtin/zext-bool.frag
================================================
uint main(uint v : V) : SV_Target
{
	bool b = v != 40;
	return uint(b);
}


================================================
FILE: shaders/memory-model/uav-coherent-promotion.bindless.ssbo.comp
================================================
RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent-promotion.root-descriptor.ssbo.comp
================================================
RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent-promotion.sm66.bindless.ssbo.comp
================================================
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);
cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	RWStructuredBuffer<float4> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<float4> RW_RDONLY = ResourceDescriptorHeap[1];
	RWStructuredBuffer<float4> RW_WRONLY = ResourceDescriptorHeap[2];

	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent-promotion.sm66.ssbo.comp
================================================
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);
cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	RWStructuredBuffer<float4> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<float4> RW_RDONLY = ResourceDescriptorHeap[1];
	RWStructuredBuffer<float4> RW_WRONLY = ResourceDescriptorHeap[2];

	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent-promotion.ssbo.comp
================================================
RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent.root-descriptor.ssbo.comp
================================================
globallycoherent RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent.sm66.ssbo.comp
================================================
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);
cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	globallycoherent RWStructuredBuffer<float4> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<float4> RW_RDONLY = ResourceDescriptorHeap[1];
	RWStructuredBuffer<float4> RW_WRONLY = ResourceDescriptorHeap[2];

	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/memory-model/uav-coherent.ssbo.comp
================================================
globallycoherent RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/nvapi/bringup.nvapi.ssbo.rgen
================================================
#define NV_SHADER_EXTN_SLOT u127
#define NV_SHADER_EXTN_REGISTER_SPACE space0
#include "nvHLSLExtns.h"

RWStructuredBuffer<uint> Blah : register(u0);

RWByteAddressBuffer BAB : register(u1);
RWTexture1D<float2> RWTex : register(u2);

[shader("raygeneration")]
void main()
{
	uint thr = DispatchRaysIndex().x;
	Blah[thr] = NvShfl(thr, 9);

	// Test UAV reference shenanigans.
	Blah[100] = NvInterlockedAddFp16x2(BAB, 64, 9);
	Blah[101] = NvInterlockedAddFp16x2(RWTex, thr, 10);
}


================================================
FILE: shaders/nvapi/get-special-global-timer.nvapi.ssbo.rgen
================================================
#define NV_SHADER_EXTN_SLOT u127
#define NV_SHADER_EXTN_REGISTER_SPACE space0
#include "nvHLSLExtns.h"

RWStructuredBuffer<uint> Buf : register(u0);

[shader("raygeneration")]
void main()
{
	Buf[0] = NvGetSpecial(NV_SPECIALOP_GLOBAL_TIMER_LO);
	Buf[1] = NvGetSpecial(NV_SPECIALOP_GLOBAL_TIMER_HI);
}


================================================
FILE: shaders/nvapi/hit-object.local-root-signature.noglsl.nvapi.ssbo.rgen
================================================
#define NV_SHADER_EXTN_SLOT u127
#define NV_SHADER_EXTN_REGISTER_SPACE space0
#include "nvHLSLExtns.h"

struct Payload
{
	uint index;
};

RaytracingAccelerationStructure AS : register(t0);
RWStructuredBuffer<float> RWFloat : register(u1);
RWStructuredBuffer<uint> RWUint : register(u2);

[shader("raygeneration")]
void main()
{
	BuiltInTriangleIntersectionAttributes attributes;
	NvHitObject hitobject;
	Payload payload;
	RayDesc ray;

	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(4, 5, 6);
	ray.TMin = 7.0;
	ray.TMax = 8.0;

	hitobject = NvMakeNop();
	hitobject = NvMakeMiss(0, ray);
	hitobject = NvMakeHit(AS, 0, 1, 2, 3, 4, 5, ray, attributes);
	hitobject = NvMakeHitWithRecordIndex(4, AS, 0, 1, 2, 3, ray, attributes);
	hitobject = NvTraceRayHitObject(AS, 17, 0, 1, 2, 3, ray, payload);

	RWUint[0] = payload.index;

	NvReorderThread(1, 2);
	NvReorderThread(hitobject);

	NvInvokeHitObject(AS, hitobject, payload);

	RWUint[1] = payload.index;

	RWUint[2] = hitobject.IsNop() ? 1 : 0;
	RWUint[3] = hitobject.IsMiss() ? 1 : 0;
	RWUint[4] = hitobject.IsHit() ? 1 : 0;

	RWUint[5] = hitobject.GetClusterID();

	RWUint[6] = hitobject.GetInstanceID();
	RWUint[7] = hitobject.GetInstanceIndex();
	RWUint[8] = hitobject.GetPrimitiveIndex();
	RWUint[9] = hitobject.GetGeometryIndex();
	RWUint[10] = hitobject.GetHitKind();
	RWUint[11] = hitobject.GetShaderTableIndex();

	RWUint[12] = hitobject.LoadLocalRootTableConstant(32);

	ray = hitobject.GetRayDesc();

	RWFloat[0] = ray.TMin;
	RWFloat[1] = ray.TMax;
	RWFloat[2] = ray.Origin.x;
	RWFloat[3] = ray.Origin.y;
	RWFloat[4] = ray.Origin.z;
	RWFloat[5] = ray.Direction.x;
	RWFloat[6] = ray.Direction.y;
	RWFloat[7] = ray.Direction.z;

	attributes = hitobject.GetAttributes<BuiltInTriangleIntersectionAttributes>();

	RWFloat[8] = attributes.barycentrics.x;
	RWFloat[9] = attributes.barycentrics.y;
}


================================================
FILE: shaders/nvapi/nvHLSLExtns.h
================================================
/*********************************************************************************************************\
|*                                                                                                        *|
|* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  *|
|* SPDX-License-Identifier: MIT                                                                           *|
|*                                                                                                        *|
|* Permission is hereby granted, free of charge, to any person obtaining a                                *|
|* copy of this software and associated documentation files (the "Software"),                             *|
|* to deal in the Software without restriction, including without limitation                              *|
|* the rights to use, copy, modify, merge, publish, distribute, sublicense,                               *|
|* and/or sell copies of the Software, and to permit persons to whom the                                  *|
|* Software is furnished to do so, subject to the following conditions:                                   *|
|*                                                                                                        *|
|* The above copyright notice and this permission notice shall be included in                             *|
|* all copies or substantial portions of the Software.                                                    *|
|*                                                                                                        *|
|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR                             *|
|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,                               *|
|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL                               *|
|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER                             *|
|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING                                *|
|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER                                    *|
|* DEALINGS IN THE SOFTWARE.                                                                              *|
|*                                                                                                        *|
|*                                                                                                        *|
\*********************************************************************************************************/

////////////////////////// NVIDIA SHADER EXTENSIONS /////////////////

// this file is to be #included in the app HLSL shader code to make
// use of nvidia shader extensions


#include "nvHLSLExtnsInternal.h"

//----------------------------------------------------------------------------//
//------------------------- Warp Shuffle Functions ---------------------------//
//----------------------------------------------------------------------------//

// all functions have variants with width parameter which permits sub-division 
// of the warp into segments - for example to exchange data between 4 groups of 
// 8 lanes in a SIMD manner. If width is less than warpSize then each subsection 
// of the warp behaves as a separate entity with a starting logical lane ID of 0. 
// A thread may only exchange data with others in its own subsection. Width must 
// have a value which is a power of 2 so that the warp can be subdivided equally; 
// results are undefined if width is not a power of 2, or is a number greater 
// than warpSize.

//
// simple variant of SHFL instruction
// returns val from the specified lane
// optional width parameter must be a power of two and width <= 32
// 
int NvShfl(int val, uint srcLane, int width = NV_WARP_SIZE)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  val;                             // variable to be shuffled
    g_NvidiaExt[index].src0u.y  =  srcLane;                         // source lane
    g_NvidiaExt[index].src0u.z  =  __NvGetShflMaskFromWidth(width);
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_SHFL;
    
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}

int2 NvShfl(int2 val, uint srcLane, int width = NV_WARP_SIZE)
{
    int x = NvShfl(val.x, srcLane, width);
    int y = NvShfl(val.y, srcLane, width);
    return int2(x, y);
}

int4 NvShfl(int4 val, uint srcLane, int width = NV_WARP_SIZE)
{
    int x = NvShfl(val.x, srcLane, width);
    int y = NvShfl(val.y, srcLane, width);
    int z = NvShfl(val.z, srcLane, width);
    int w = NvShfl(val.w, srcLane, width);
    return int4(x, y, z, w);
}

//
// Copy from a lane with lower ID relative to caller
//
int NvShflUp(int val, uint delta, int width = NV_WARP_SIZE)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  val;                        // variable to be shuffled
    g_NvidiaExt[index].src0u.y  =  delta;                      // relative lane offset
    g_NvidiaExt[index].src0u.z  =  (NV_WARP_SIZE - width) << 8;   // minIndex = maxIndex for shfl_up (src2[4:0] is expected to be 0)
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_SHFL_UP;
    return g_NvidiaExt.IncrementCounter();
}

//
// Copy from a lane with higher ID relative to caller
//
int NvShflDown(int val, uint delta, int width = NV_WARP_SIZE)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  val;           // variable to be shuffled
    g_NvidiaExt[index].src0u.y  =  delta;         // relative lane offset
    g_NvidiaExt[index].src0u.z  =  __NvGetShflMaskFromWidth(width);
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_SHFL_DOWN;
    return g_NvidiaExt.IncrementCounter();
}

//
// Copy from a lane based on bitwise XOR of own lane ID
//
int NvShflXor(int val, uint laneMask, int width = NV_WARP_SIZE)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  val;           // variable to be shuffled
    g_NvidiaExt[index].src0u.y  =  laneMask;      // laneMask to be XOR'ed with current laneId to get the source lane id
    g_NvidiaExt[index].src0u.z  =  __NvGetShflMaskFromWidth(width); 
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_SHFL_XOR;
    return g_NvidiaExt.IncrementCounter();
}


//----------------------------------------------------------------------------//
//----------------------------- Warp Vote Functions---------------------------//
//----------------------------------------------------------------------------//

// returns 0xFFFFFFFF if the predicate is true for any thread in the warp, returns 0 otherwise
uint NvAny(int predicate)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  predicate;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_VOTE_ANY;
    return g_NvidiaExt.IncrementCounter();
}

// returns 0xFFFFFFFF if the predicate is true for ALL threads in the warp, returns 0 otherwise
uint NvAll(int predicate)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  predicate;
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_VOTE_ALL;
    return g_NvidiaExt.IncrementCounter();
}

// returns a mask of all threads in the warp with bits set for threads that have predicate true
uint NvBallot(int predicate)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  predicate;
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_VOTE_BALLOT;
    return g_NvidiaExt.IncrementCounter();
}


//----------------------------------------------------------------------------//
//----------------------------- Utility Functions ----------------------------//
//----------------------------------------------------------------------------//

// returns the lane index of the current thread (thread index in warp)
int NvGetLaneId()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_GET_LANE_ID;
    return g_NvidiaExt.IncrementCounter();
}

// returns value of special register - specify subopcode from any of NV_SPECIALOP_* specified in nvShaderExtnEnums.h - other opcodes undefined behavior
uint NvGetSpecial(uint subOpCode)
{
    return __NvGetSpecial(subOpCode);
}

//----------------------------------------------------------------------------//
//----------------------------- FP16 Atmoic Functions-------------------------//
//----------------------------------------------------------------------------//

// The functions below performs atomic operations on two consecutive fp16 
// values in the given raw UAV. 
// The uint paramater 'fp16x2Val' is treated as two fp16 values byteAddress must be multiple of 4
// The returned value are the two fp16 values packed into a single uint

uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MAX);
}


// versions of the above functions taking two fp32 values (internally converted to fp16 values)
uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val)
{
    return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val)
{
    return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val)
{
    return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
}


//----------------------------------------------------------------------------//

// The functions below perform atomic operation on a R16G16_FLOAT UAV at the given address
// the uint paramater 'fp16x2Val' is treated as two fp16 values
// the returned value are the two fp16 values (.x and .y components) packed into a single uint
// Warning: Behaviour of these set of functions is undefined if the UAV is not 
// of R16G16_FLOAT format (might result in app crash or TDR)

uint NvInterlockedAddFp16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
}

uint NvInterlockedAddFp16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
}

uint NvInterlockedAddFp16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
}


// versions taking two fp32 values (internally converted to fp16)
uint NvInterlockedAddFp16x2(RWTexture1D<float2> uav, uint address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWTexture1D<float2> uav, uint address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWTexture1D<float2> uav, uint address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
}

uint NvInterlockedAddFp16x2(RWTexture2D<float2> uav, uint2 address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWTexture2D<float2> uav, uint2 address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWTexture2D<float2> uav, uint2 address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
}

uint NvInterlockedAddFp16x2(RWTexture3D<float2> uav, uint3 address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
}

uint NvInterlockedMinFp16x2(RWTexture3D<float2> uav, uint3 address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
}

uint NvInterlockedMaxFp16x2(RWTexture3D<float2> uav, uint3 address, float2 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
}


//----------------------------------------------------------------------------//

// The functions below perform Atomic operation on a R16G16B16A16_FLOAT UAV at the given address
// the uint2 paramater 'fp16x2Val' is treated as four fp16 values 
// i.e, fp16x2Val.x = uav.xy and fp16x2Val.y = uav.yz
// The returned value are the four fp16 values (.xyzw components) packed into uint2
// Warning: Behaviour of these set of functions is undefined if the UAV is not 
// of R16G16B16A16_FLOAT format (might result in app crash or TDR)

uint2 NvInterlockedAddFp16x4(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMinFp16x4(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedMaxFp16x4(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedAddFp16x4(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMinFp16x4(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedMaxFp16x4(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedAddFp16x4(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMinFp16x4(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedMaxFp16x4(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val)
{
    return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
}

// versions taking four fp32 values (internally converted to fp16)
uint2 NvInterlockedAddFp16x4(RWTexture1D<float4> uav, uint address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMinFp16x4(RWTexture1D<float4> uav, uint address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedMaxFp16x4(RWTexture1D<float4> uav, uint address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedAddFp16x4(RWTexture2D<float4> uav, uint2 address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMinFp16x4(RWTexture2D<float4> uav, uint2 address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedMaxFp16x4(RWTexture2D<float4> uav, uint2 address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedAddFp16x4(RWTexture3D<float4> uav, uint3 address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMinFp16x4(RWTexture3D<float4> uav, uint3 address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedMaxFp16x4(RWTexture3D<float4> uav, uint3 address, float4 val)
{
    return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX);
}


//----------------------------------------------------------------------------//
//----------------------------- FP32 Atmoic Functions-------------------------//
//----------------------------------------------------------------------------//

// The functions below performs atomic add on the given UAV treating the value as float
// byteAddress must be multiple of 4
// The returned value is the value present in memory location before the atomic add

float NvInterlockedAddFp32(RWByteAddressBuffer uav, uint byteAddress, float val)
{
    return __NvAtomicAddFP32(uav, byteAddress, val);
}

//----------------------------------------------------------------------------//

// The functions below perform atomic add on a R32_FLOAT UAV at the given address
// the returned value is the value before performing the atomic add
// Warning: Behaviour of these set of functions is undefined if the UAV is not 
// of R32_FLOAT format (might result in app crash or TDR)

float NvInterlockedAddFp32(RWTexture1D<float> uav, uint address, float val)
{
    return __NvAtomicAddFP32(uav, address, val);
}

float NvInterlockedAddFp32(RWTexture2D<float> uav, uint2 address, float val)
{
    return __NvAtomicAddFP32(uav, address, val);
}

float NvInterlockedAddFp32(RWTexture3D<float> uav, uint3 address, float val)
{
    return __NvAtomicAddFP32(uav, address, val);
}


//----------------------------------------------------------------------------//
//--------------------------- UINT64 Atmoic Functions-------------------------//
//----------------------------------------------------------------------------//

// The functions below performs atomic operation on the given UAV treating the value as uint64
// byteAddress must be multiple of 8
// The returned value is the value present in memory location before the atomic operation
// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits.

uint2 NvInterlockedAddUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMaxUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedMinUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedAndUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_AND);
}

uint2 NvInterlockedOrUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_OR);
}

uint2 NvInterlockedXorUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_XOR);
}

uint2 NvInterlockedCompareExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 compare_value, uint2 value)
{
    return __NvAtomicCompareExchangeUINT64(uav, byteAddress, compare_value, value);
}

uint2 NvInterlockedExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
{
    return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_SWAP);
}

//----------------------------------------------------------------------------//

// The functions below perform atomic operation on a R32G32_UINT UAV at the given address treating the value as uint64
// the returned value is the value before performing the atomic operation
// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits.
// Warning: Behaviour of these set of functions is undefined if the UAV is not of R32G32_UINT format (might result in app crash or TDR)

uint2 NvInterlockedAddUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMaxUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedMinUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedAndUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND);
}

uint2 NvInterlockedOrUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR);
}

uint2 NvInterlockedXorUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR);
}

uint2 NvInterlockedCompareExchangeUint64(RWTexture1D<uint2> uav, uint address, uint2 compare_value, uint2 value)
{
    return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value);
}

uint2 NvInterlockedExchangeUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP);
}

uint2 NvInterlockedAddUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMaxUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedMinUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedAndUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND);
}

uint2 NvInterlockedOrUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR);
}

uint2 NvInterlockedXorUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR);
}

uint2 NvInterlockedCompareExchangeUint64(RWTexture2D<uint2> uav, uint2 address, uint2 compare_value, uint2 value)
{
    return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value);
}

uint2 NvInterlockedExchangeUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP);
}

uint2 NvInterlockedAddUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD);
}

uint2 NvInterlockedMaxUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX);
}

uint2 NvInterlockedMinUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN);
}

uint2 NvInterlockedAndUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND);
}

uint2 NvInterlockedOrUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR);
}

uint2 NvInterlockedXorUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR);
}

uint2 NvInterlockedCompareExchangeUint64(RWTexture3D<uint2> uav, uint3 address, uint2 compare_value, uint2 value)
{
    return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value);
}

uint2 NvInterlockedExchangeUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
{
    return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP);
}

//----------------------------------------------------------------------------//
//--------------------------- VPRS functions ---------------------------------//
//----------------------------------------------------------------------------//

// Returns the shading rate and the number of per-pixel shading passes for current VPRS pixel
uint3 NvGetShadingRate()
{
    uint3 shadingRate = (uint3)0;
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SHADING_RATE;
    g_NvidiaExt[index].numOutputsForIncCounter = 3;
    shadingRate.x = g_NvidiaExt.IncrementCounter();
    shadingRate.y = g_NvidiaExt.IncrementCounter();
    shadingRate.z = g_NvidiaExt.IncrementCounter();
    return shadingRate;
}

float NvEvaluateAttributeAtSampleForVPRS(float attrib, uint sampleIndex, int2 pixelOffset)
{
    float value = (float)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.x    = asuint(attrib.x);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 1;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

float2 NvEvaluateAttributeAtSampleForVPRS(float2 attrib, uint sampleIndex, int2 pixelOffset)
{
    float2 value = (float2)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xy   = asuint(attrib.xy);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 2;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    value.y = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

float3 NvEvaluateAttributeAtSampleForVPRS(float3 attrib, uint sampleIndex, int2 pixelOffset)
{
    float3 value = (float3)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xyz  = asuint(attrib.xyz);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 3;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    value.y = asfloat(g_NvidiaExt.IncrementCounter());
    value.z = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

float4 NvEvaluateAttributeAtSampleForVPRS(float4 attrib, uint sampleIndex, int2 pixelOffset)
{
    float4 value = (float4)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 4;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    value.y = asfloat(g_NvidiaExt.IncrementCounter());
    value.z = asfloat(g_NvidiaExt.IncrementCounter());
    value.w = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

int NvEvaluateAttributeAtSampleForVPRS(int attrib, uint sampleIndex, int2 pixelOffset)
{
    int value = (int)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.x    = asuint(attrib.x);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 1;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

int2 NvEvaluateAttributeAtSampleForVPRS(int2 attrib, uint sampleIndex, int2 pixelOffset)
{
    int2 value = (int2)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xy   = asuint(attrib.xy);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 2;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    value.y = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

int3 NvEvaluateAttributeAtSampleForVPRS(int3 attrib, uint sampleIndex, int2 pixelOffset)
{
    int3 value = (int3)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xyz  = asuint(attrib.xyz);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 3;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    value.y = asint(g_NvidiaExt.IncrementCounter());
    value.z = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

int4 NvEvaluateAttributeAtSampleForVPRS(int4 attrib, uint sampleIndex, int2 pixelOffset)
{
    int4 value = (int4)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 4;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    value.y = asint(g_NvidiaExt.IncrementCounter());
    value.z = asint(g_NvidiaExt.IncrementCounter());
    value.w = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint NvEvaluateAttributeAtSampleForVPRS(uint attrib, uint sampleIndex, int2 pixelOffset)
{
    uint value = (uint)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.x    = asuint(attrib.x);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 1;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint2 NvEvaluateAttributeAtSampleForVPRS(uint2 attrib, uint sampleIndex, int2 pixelOffset)
{
    uint2 value = (uint2)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xy   = asuint(attrib.xy);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 2;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    value.y = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint3 NvEvaluateAttributeAtSampleForVPRS(uint3 attrib, uint sampleIndex, int2 pixelOffset)
{
    uint3 value = (uint3)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xyz  = asuint(attrib.xyz);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 3;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    value.y = asuint(g_NvidiaExt.IncrementCounter());
    value.z = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint4 NvEvaluateAttributeAtSampleForVPRS(uint4 attrib, uint sampleIndex, int2 pixelOffset)
{
    uint4 value = (uint4)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
    g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
    g_NvidiaExt[ext].src1u.x    = sampleIndex;
    g_NvidiaExt[ext].src2u.xy   = pixelOffset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 4;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    value.y = asuint(g_NvidiaExt.IncrementCounter());
    value.z = asuint(g_NvidiaExt.IncrementCounter());
    value.w = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}


float NvEvaluateAttributeSnappedForVPRS(float attrib, uint2 offset)
{
    float value = (float)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.x    = asuint(attrib.x);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 1;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

float2 NvEvaluateAttributeSnappedForVPRS(float2 attrib, uint2 offset)
{
    float2 value = (float2)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xy   = asuint(attrib.xy);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 2;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    value.y = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

float3 NvEvaluateAttributeSnappedForVPRS(float3 attrib, uint2 offset)
{
    float3 value = (float3)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xyz  = asuint(attrib.xyz);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 3;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    value.y = asfloat(g_NvidiaExt.IncrementCounter());
    value.z = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

float4 NvEvaluateAttributeSnappedForVPRS(float4 attrib, uint2 offset)
{
    float4 value = (float4)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 4;
    value.x = asfloat(g_NvidiaExt.IncrementCounter());
    value.y = asfloat(g_NvidiaExt.IncrementCounter());
    value.z = asfloat(g_NvidiaExt.IncrementCounter());
    value.w = asfloat(g_NvidiaExt.IncrementCounter());
    return value;
}

int NvEvaluateAttributeSnappedForVPRS(int attrib, uint2 offset)
{
    int value = (int)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.x    = asuint(attrib.x);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 1;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

int2 NvEvaluateAttributeSnappedForVPRS(int2 attrib, uint2 offset)
{
    int2 value = (int2)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xy   = asuint(attrib.xy);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 2;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    value.y = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

int3 NvEvaluateAttributeSnappedForVPRS(int3 attrib, uint2 offset)
{
    int3 value = (int3)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xyz  = asuint(attrib.xyz);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 3;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    value.y = asint(g_NvidiaExt.IncrementCounter());
    value.z = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

int4 NvEvaluateAttributeSnappedForVPRS(int4 attrib, uint2 offset)
{
    int4 value = (int4)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 4;
    value.x = asint(g_NvidiaExt.IncrementCounter());
    value.y = asint(g_NvidiaExt.IncrementCounter());
    value.z = asint(g_NvidiaExt.IncrementCounter());
    value.w = asint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint NvEvaluateAttributeSnappedForVPRS(uint attrib, uint2 offset)
{
    uint value = (uint)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.x    = asuint(attrib.x);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 1;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint2 NvEvaluateAttributeSnappedForVPRS(uint2 attrib, uint2 offset)
{
    uint2 value = (uint2)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xy   = asuint(attrib.xy);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 2;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    value.y = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint3 NvEvaluateAttributeSnappedForVPRS(uint3 attrib, uint2 offset)
{
    uint3 value = (uint3)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xyz  = asuint(attrib.xyz);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 3;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    value.y = asuint(g_NvidiaExt.IncrementCounter());
    value.z = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

uint4 NvEvaluateAttributeSnappedForVPRS(uint4 attrib, uint2 offset)
{
    uint4 value = (uint4)0;
    uint ext = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[ext].opcode     = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
    g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
    g_NvidiaExt[ext].src1u.xy   = offset;
    g_NvidiaExt[ext].numOutputsForIncCounter = 4;
    value.x = asuint(g_NvidiaExt.IncrementCounter());
    value.y = asuint(g_NvidiaExt.IncrementCounter());
    value.z = asuint(g_NvidiaExt.IncrementCounter());
    value.w = asuint(g_NvidiaExt.IncrementCounter());
    return value;
}

// MATCH instruction variants 
uint NvWaveMatch(uint value)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = value;
    g_NvidiaExt[index].src1u.x = 1;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_MATCH_ANY;
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}

uint NvWaveMatch(uint2 value)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy = value.xy;
    g_NvidiaExt[index].src1u.x = 2;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_MATCH_ANY;
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}

uint NvWaveMatch(uint4 value)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u = value;
    g_NvidiaExt[index].src1u.x = 4;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_MATCH_ANY;
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}

uint NvWaveMatch(float value)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = asuint(value);
    g_NvidiaExt[index].src1u.x = 1;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_MATCH_ANY;
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}

uint NvWaveMatch(float2 value)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy = asuint(value);
    g_NvidiaExt[index].src1u.x = 2;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_MATCH_ANY;
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}

uint NvWaveMatch(float4 value)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u = asuint(value);
    g_NvidiaExt[index].src1u.x = 4;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_MATCH_ANY;
    // result is returned as the return value of IncrementCounter on fake UAV slot
    return g_NvidiaExt.IncrementCounter();
}


//----------------------------------------------------------------------------//
//------------------------------ Footprint functions -------------------------//
//----------------------------------------------------------------------------//
// texSpace and smpSpace must be immediates, texIndex and smpIndex can be variable
// offset must be immediate
// the required components of location and offset fields can be filled depending on the dimension/type of the texture
// texType should be one of 2D or 3D as defined in nvShaderExtnEnums.h and and should be an immediate literal
// if the above restrictions are not met, the behaviour of this instruction is undefined

uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0))
{
    return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset);
}

uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0))
{
    return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset);
}


uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, int3 offset = int3(0, 0, 0))
{
    return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset);
}

uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, int3 offset = int3(0, 0, 0))
{
    return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset);
}


uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, int3 offset = int3(0, 0, 0))
{
    return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset);
}

uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, int3 offset = int3(0, 0, 0))
{
    return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset);
}


uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0))
{
    return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset);
}

uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0))
{
    return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset);
}

uint NvFootprintExtractLOD(uint4 blob)
{
    return ((blob.w & 0xF000) >> 12);
}

uint NvFootprintExtractReturnGran(uint4 blob)
{
    return ((blob.z & 0xF000000) >> 24);
}

uint2 NvFootprintExtractAnchorTileLoc2D(uint4 blob)
{
    uint2 loc;
    loc.x = (blob.w & 0xFFF);
    loc.y = (blob.z & 0xFFF);
    return loc;
}

uint3 NvFootprintExtractAnchorTileLoc3D(uint4 blob)
{
    uint3 loc;
    loc.x = (blob.w & 0xFFF);
    loc.y = ((blob.w & 0xFFF0000) >> 16);
    loc.z = (blob.z & 0x1FFF);
    return loc;
}

uint2 NvFootprintExtractOffset2D(uint4 blob)
{
    uint2 loc;
    loc.x = ((blob.z & 0x070000) >> 16);
    loc.y = ((blob.z & 0x380000) >> 19);
    return loc;
}

uint3 NvFootprintExtractOffset3D(uint4 blob)
{
    uint3 loc;
    loc.x = ((blob.z & 0x030000) >> 16);
    loc.y = ((blob.z & 0x0C0000) >> 18);
    loc.z = ((blob.z & 0x300000) >> 20);
    return loc;
}

uint2 NvFootprintExtractBitmask(uint4 blob)
{
    return blob.xy;
}


// Variant of Footprint extensions which returns isSingleLod (out parameter) 
// isSingleLod = true -> This footprint request touched the texels from only single LOD.
uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}

uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}


uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}

uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}


uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}

uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}


uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}

uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0))
{
    uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset);
    isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
    return res;
}


uint NvActiveThreads()
{
    return NvBallot(1);
}


//----------------------------------------------------------------------------//
//------------------------------ WaveMultiPrefix functions -------------------//
//----------------------------------------------------------------------------//

// Following are the WaveMultiPrefix functions for different operations (Add, Bitand, BitOr, BitXOr) for different datatypes (uint, uint2, uint4) 
// This is a set of functions which implement multi-prefix operations among the set of active lanes in the current wave (WARP). 
// A multi-prefix operation comprises a set of prefix operations, executed in parallel within subsets of lanes identified with the provided bitmasks. 
// These bitmasks represent partitioning of the set of active lanes in the current wave into N groups (where N is the number of unique masks across all lanes in the wave). 
// N prefix operations are then performed each within its corresponding group. 
// The groups are assumed to be non-intersecting (that is, a given lane can be a member of one and only one group), 
// and bitmasks in all lanes belonging to the same group are required to be the same.
// There are 2 type of functions - Exclusive and Inclusive prefix operations.
// e.g. For NvWaveMultiPrefixInclusiveAdd(val, mask) operation - For each of the groups (for which mask input is same) following is the expected output : 
// i^th thread in a group has value = sum(values of threads 0 to i)
// For Exclusive version of same opeartion - 
// i^th thread in a group has value = sum(values of threads 0 to i-1)  and 0th thread in a the Group has value 0 

// Extensions for Add 
uint NvWaveMultiPrefixInclusiveAdd(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        // As remainingThreads only has threads in group with smaller thread ids than its own thread-id nextLane can never be 31 for any thread in the group except the smallest one
        // For smallest thread in the group, remainingThreads is 0 -->  nextLane is ~0 (i.e. considering last 5 bits its 31)
        // So passing maskClampValue=30 to __NvShflGeneric, it will return laneValid=false for the smallest thread in the group. So update val and nextLane based on laneValid.
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val + temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint NvWaveMultiPrefixExclusiveAdd(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : 0;
    return NvWaveMultiPrefixInclusiveAdd(val, mask);
}

uint2 NvWaveMultiPrefixInclusiveAdd(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val + temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint2 NvWaveMultiPrefixExclusiveAdd(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint2(0, 0);
    return NvWaveMultiPrefixInclusiveAdd(val, mask);
}

uint4 NvWaveMultiPrefixInclusiveAdd(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val + temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint4 NvWaveMultiPrefixExclusiveAdd(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0);
    return NvWaveMultiPrefixInclusiveAdd(val, mask);
}

// MultiPrefix extensions for Bitand
uint NvWaveMultiPrefixInclusiveAnd(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val & temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint NvWaveMultiPrefixExclusiveAnd(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : ~0;
    return NvWaveMultiPrefixInclusiveAnd(val, mask);
}

uint2 NvWaveMultiPrefixInclusiveAnd(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val & temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint2 NvWaveMultiPrefixExclusiveAnd(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint2(~0, ~0);
    return NvWaveMultiPrefixInclusiveAnd(val, mask);
}


uint4 NvWaveMultiPrefixInclusiveAnd(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val & temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint4 NvWaveMultiPrefixExclusiveAnd(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint4(~0, ~0, ~0, ~0);
    return NvWaveMultiPrefixInclusiveAnd(val, mask);
}


// MultiPrefix extensions for BitOr
uint NvWaveMultiPrefixInclusiveOr(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val | temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint NvWaveMultiPrefixExclusiveOr(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : 0;
    return NvWaveMultiPrefixInclusiveOr(val, mask);
}

uint2 NvWaveMultiPrefixInclusiveOr(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val | temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint2 NvWaveMultiPrefixExclusiveOr(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint2(0, 0);
    return NvWaveMultiPrefixInclusiveOr(val, mask);
}


uint4 NvWaveMultiPrefixInclusiveOr(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val | temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint4 NvWaveMultiPrefixExclusiveOr(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0);
    return NvWaveMultiPrefixInclusiveOr(val, mask);
}


// MultiPrefix extensions for BitXOr
uint NvWaveMultiPrefixInclusiveXOr(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val ^ temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint NvWaveMultiPrefixExclusiveXOr(uint val, uint mask)
{
    uint temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : 0;
    return NvWaveMultiPrefixInclusiveXOr(val, mask);
}

uint2 NvWaveMultiPrefixInclusiveXOr(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val ^ temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint2 NvWaveMultiPrefixExclusiveXOr(uint2 val, uint mask)
{
    uint2 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint2(0, 0);
    return NvWaveMultiPrefixInclusiveXOr(val, mask);
}


uint4 NvWaveMultiPrefixInclusiveXOr(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint nextLane = firstbithigh(remainingThreads);
    for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
    {
        temp = NvShfl(val, nextLane);
        uint laneValid;
        uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
        if (laneValid) // if nextLane's nextLane is valid
        {
            val = val ^ temp;
            nextLane = newLane;
        }
    }
    return val;
}

uint4 NvWaveMultiPrefixExclusiveXOr(uint4 val, uint mask)
{
    uint4 temp;
    uint a = NvActiveThreads();
    uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
    uint lane = firstbithigh(remainingThreads);
    temp = NvShfl(val, lane);
    val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0);
    return NvWaveMultiPrefixInclusiveXOr(val, mask);
}


//----------------------------------------------------------------------------//
//------------------------- DXR Micro-map Extension --------------------------//
//----------------------------------------------------------------------------//

float3x3 NvRtTriangleObjectPositions()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_TRIANGLE_OBJECT_POSITIONS;

    float3x3 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float3x3 NvRtMicroTriangleObjectPositions()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_TRIANGLE_OBJECT_POSITIONS;

    float3x3 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float3x2 NvRtMicroTriangleBarycentrics()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_TRIANGLE_BARYCENTRICS;

    float3x2 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

bool NvRtIsMicroTriangleHit()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_MICRO_TRIANGLE_HIT;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

bool NvRtIsBackFacing()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_BACK_FACING;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5)

float3 NvRtMicroVertexObjectPosition(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint2 UV)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_VERTEX_OBJECT_POSITION;
    g_NvidiaExt[index].src0u.x = InstanceIndex;
    g_NvidiaExt[index].src0u.y = GeometryIndex;
    g_NvidiaExt[index].src0u.z = PrimitiveIndex;
    g_NvidiaExt[index].src0u.w = UV.x;
    g_NvidiaExt[index].src1u.x = UV.y;
    uint handle = g_NvidiaExt.IncrementCounter();
    float3 ret;
    ret.x = asfloat(g_NvidiaExt.IncrementCounter());
    ret.y = asfloat(g_NvidiaExt.IncrementCounter());
    ret.z = asfloat(g_NvidiaExt.IncrementCounter());

    RayQuery<0> rq;
    rq.TraceRayInline(AccelerationStructure, 0, handle, (RayDesc)0);

    return ret;
}

float2 NvRtMicroVertexBarycentrics(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint2 UV)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_VERTEX_BARYCENTRICS;
    g_NvidiaExt[index].src0u.x = InstanceIndex;
    g_NvidiaExt[index].src0u.y = GeometryIndex;
    g_NvidiaExt[index].src0u.z = PrimitiveIndex;
    g_NvidiaExt[index].src0u.w = UV.x;
    g_NvidiaExt[index].src1u.x = UV.y;
    uint handle = g_NvidiaExt.IncrementCounter();
    float2 ret;
    ret.x = asfloat(g_NvidiaExt.IncrementCounter());
    ret.y = asfloat(g_NvidiaExt.IncrementCounter());

    RayQuery<0> rq;
    rq.TraceRayInline(AccelerationStructure, 0, handle, (RayDesc)0);

    return ret;
}

#endif

//----------------------------------------------------------------------------//
//--------------------- DXR Cluster Geometry Extension -----------------------//
//----------------------------------------------------------------------------//

#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 3)

uint NvRtGetClusterID()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_GET_CLUSTER_ID;
    return g_NvidiaExt.IncrementCounter();
}

#endif

#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5)

#define NvRtGetCandidateClusterID(rq) __NvRtGetCandidateClusterID(rq.RayFlags())

#define NvRtGetCommittedClusterID(rq) __NvRtGetCommittedClusterID(rq.RayFlags())

#define NvRtCandidateTriangleObjectPositions(rq) __NvRtCandidateTriangleObjectPositions(rq.RayFlags())

#define NvRtCommittedTriangleObjectPositions(rq) __NvRtCommittedTriangleObjectPositions(rq.RayFlags())

#endif

//----------------------------------------------------------------------------//
//--------------------- DXR Linear Swept Sphere Extension --------------------//
//----------------------------------------------------------------------------//

#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 3)

float4 NvRtSphereObjectPositionAndRadius()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_SPHERE_OBJECT_POSITION_AND_RADIUS;

    float4 ret;
    ret[0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[3] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float2x4 NvRtLssObjectPositionsAndRadii()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_LSS_OBJECT_POSITIONS_AND_RADII;

    float2x4 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

bool NvRtIsSphereHit()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_SPHERE_HIT;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

bool NvRtIsLssHit()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_LSS_HIT;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

#endif

#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5)

#define NvRtCandidateIsNonOpaqueSphere(rq) __NvRtCandidateIsNonOpaqueSphere(rq.RayFlags())

#define NvRtCandidateIsNonOpaqueLss(rq) __NvRtCandidateIsNonOpaqueLss(rq.RayFlags())

#define NvRtCandidateLssHitParameter(rq) __NvRtCandidateLssHitParameter(rq.RayFlags())

#define NvRtCandidateSphereObjectPositionAndRadius(rq) __NvRtCandidateSphereObjectPositionAndRadius(rq.RayFlags())

#define NvRtCandidateLssObjectPositionsAndRadii(rq) __NvRtCandidateLssObjectPositionsAndRadii(rq.RayFlags())

#define NvRtCandidateBuiltinPrimitiveRayT(rq) __NvRtCandidateBuiltinPrimitiveRayT(rq.RayFlags())

#define NvRtCommittedIsSphere(rq) __NvRtCommittedIsSphere(rq.RayFlags())

#define NvRtCommittedIsLss(rq) __NvRtCommittedIsLss(rq.RayFlags())

#define NvRtCommittedLssHitParameter(rq) __NvRtCommittedLssHitParameter(rq.RayFlags())

#define NvRtCommittedSphereObjectPositionAndRadius(rq) __NvRtCommittedSphereObjectPositionAndRadius(rq.RayFlags())

#define NvRtCommittedLssObjectPositionsAndRadii(rq) __NvRtCommittedLssObjectPositionsAndRadii(rq.RayFlags())

#define NvRtCommitNonOpaqueBuiltinPrimitiveHit(rq) __NvRtCommitNonOpaqueBuiltinPrimitiveHit(rq.RayFlags())

#endif

//----------------------------------------------------------------------------//
//------------------------- DXR HitObject Extension --------------------------//
//----------------------------------------------------------------------------//

// Support for templates in HLSL requires HLSL 2021+. When using dxc,
// use the -HV 2021 command line argument to enable these versions.
#if defined(__HLSL_VERSION) && (__HLSL_VERSION >= 2021) && !defined(NV_HITOBJECT_USE_MACRO_API)

struct NvHitObject {
    uint _handle;

    bool IsMiss()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    bool IsHit()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    bool IsNop()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    uint GetInstanceID()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetInstanceIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetPrimitiveIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetGeometryIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetHitKind()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    RayDesc GetRayDesc()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC;
        g_NvidiaExt[index].src0u.x = _handle;

        uint tmin = g_NvidiaExt.IncrementCounter();
        uint tmax = g_NvidiaExt.IncrementCounter();
        uint rayOrgX = g_NvidiaExt.IncrementCounter();
        uint rayOrgY = g_NvidiaExt.IncrementCounter();
        uint rayOrgZ = g_NvidiaExt.IncrementCounter();
        uint rayDirX = g_NvidiaExt.IncrementCounter();
        uint rayDirY = g_NvidiaExt.IncrementCounter();
        uint rayDirZ = g_NvidiaExt.IncrementCounter();

        RayDesc ray;
        ray.TMin = asfloat(tmin);
        ray.TMax = asfloat(tmax);
        ray.Origin.x = asfloat(rayOrgX);
        ray.Origin.y = asfloat(rayOrgY);
        ray.Origin.z = asfloat(rayOrgZ);
        ray.Direction.x = asfloat(rayDirX);
        ray.Direction.y = asfloat(rayDirY);
        ray.Direction.z = asfloat(rayDirZ);

        return ray;
    }

    template <typename T>
    T GetAttributes()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES;
        g_NvidiaExt[index].src0u.x = _handle;
        uint callHandle = g_NvidiaExt.IncrementCounter();

        T attrs;
        CallShader(callHandle, attrs);
        return attrs;
    }

    uint GetShaderTableIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes)
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT;
        g_NvidiaExt[index].src0u.x = _handle;
        g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes;
        return g_NvidiaExt.IncrementCounter();
    }

    float4 GetSphereObjectPositionAndRadius()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SPHERE_OBJECT_POSITION_AND_RADIUS;
        g_NvidiaExt[index].src0u.x = _handle;

        float4 ret;
        ret[0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[3] = asfloat(g_NvidiaExt.IncrementCounter());
        return ret;
    }

    float2x4 GetLssObjectPositionsAndRadii()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_LSS_OBJECT_POSITIONS_AND_RADII;
        g_NvidiaExt[index].src0u.x = _handle;

        float2x4 ret;
        ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter());
        return ret;
    }

    bool IsSphereHit()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_SPHERE_HIT;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    bool IsLssHit()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_LSS_HIT;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    uint GetClusterID()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    float3x3 GetTriangleObjectPositions()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_TRIANGLE_OBJECT_POSITIONS;
        g_NvidiaExt[index].src0u.x = _handle;

        float3x3 ret;
        ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
        return ret;
    }
};

template<typename T>
NvHitObject NvTraceRayHitObject(
    RaytracingAccelerationStructure AccelerationStructure,
    uint RayFlags,
    uint InstanceInclusionMask,
    uint RayContributionToHitGroupIndex,
    uint MultiplierForGeometryContributionToHitGroupIndex,
    uint MissShaderIndex,
    RayDesc Ray,
    inout T Payload)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY;
    g_NvidiaExt[index].numOutputsForIncCounter = 2;
    g_NvidiaExt[index].src0u.x = MissShaderIndex;
    uint hitHandle = g_NvidiaExt.IncrementCounter();
    uint traceHandle = g_NvidiaExt.IncrementCounter();

    TraceRay(AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, traceHandle, Ray, Payload);

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

template <typename T>
NvHitObject NvMakeHit(
    RaytracingAccelerationStructure AccelerationStructure,
    uint InstanceIndex,
    uint GeometryIndex,
    uint PrimitiveIndex,
    uint HitKind,
    uint RayContributionToHitGroupIndex,
    uint MultiplierForGeometryContributionToHitGroupIndex,
    RayDesc Ray,
    T Attributes)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT;
    g_NvidiaExt[index].numOutputsForIncCounter = 2;
    g_NvidiaExt[index].src0u.x = InstanceIndex;
    g_NvidiaExt[index].src0u.y = GeometryIndex;
    g_NvidiaExt[index].src0u.z = PrimitiveIndex;
    g_NvidiaExt[index].src0u.w = HitKind;
    g_NvidiaExt[index].src1u.x = RayContributionToHitGroupIndex;
    g_NvidiaExt[index].src1u.y = MultiplierForGeometryContributionToHitGroupIndex;
    uint hitHandle = g_NvidiaExt.IncrementCounter();
    uint traceHandle = g_NvidiaExt.IncrementCounter();

    struct AttrWrapper { T Attrs; };
    AttrWrapper wrapper;
    wrapper.Attrs = Attributes;
    CallShader(traceHandle, wrapper);

    struct DummyPayload { int a; };
    DummyPayload payload;
    TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload);

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

template <typename T>
NvHitObject NvMakeHitWithRecordIndex(
    uint HitGroupRecordIndex,
    RaytracingAccelerationStructure AccelerationStructure,
    uint InstanceIndex,
    uint GeometryIndex,
    uint PrimitiveIndex,
    uint HitKind,
    RayDesc Ray,
    T Attributes)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX;
    g_NvidiaExt[index].numOutputsForIncCounter = 2;
    g_NvidiaExt[index].src0u.x = InstanceIndex;
    g_NvidiaExt[index].src0u.y = GeometryIndex;
    g_NvidiaExt[index].src0u.z = PrimitiveIndex;
    g_NvidiaExt[index].src0u.w = HitKind;
    g_NvidiaExt[index].src1u.x = HitGroupRecordIndex;
    uint hitHandle = g_NvidiaExt.IncrementCounter();
    uint traceHandle = g_NvidiaExt.IncrementCounter();

    struct AttrWrapper { T Attrs; };
    AttrWrapper wrapper;
    wrapper.Attrs = Attributes;
    CallShader(traceHandle, wrapper);

    struct DummyPayload { int a; };
    DummyPayload payload;
    TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload);

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

NvHitObject NvMakeMiss(
    uint MissShaderIndex,
    RayDesc Ray)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS;
    g_NvidiaExt[index].src0u.x = MissShaderIndex;
    g_NvidiaExt[index].src0u.y = asuint(Ray.TMin);
    g_NvidiaExt[index].src0u.z = asuint(Ray.TMax);
    g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x);
    g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y);
    g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z);
    g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x);
    g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y);
    g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z);
    uint hitHandle = g_NvidiaExt.IncrementCounter();

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

NvHitObject NvMakeNop()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP;
    uint hitHandle = g_NvidiaExt.IncrementCounter();

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
    g_NvidiaExt[index].src0u.x = 0;
    g_NvidiaExt[index].src0u.y = 0;
    g_NvidiaExt[index].src0u.z = CoherenceHint;
    g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
    g_NvidiaExt.IncrementCounter();
}

void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
    g_NvidiaExt[index].src0u.x = 1;
    g_NvidiaExt[index].src0u.y = HitObj._handle;
    g_NvidiaExt[index].src0u.z = CoherenceHint;
    g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
    g_NvidiaExt.IncrementCounter();
}

void NvReorderThread(NvHitObject HitObj)
{
    NvReorderThread(HitObj, 0, 0);
}

template<typename T>
void NvInvokeHitObject(
    RaytracingAccelerationStructure AccelerationStructure,
    NvHitObject HitObj,
    inout T Payload)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE;
    g_NvidiaExt[index].src0u.x = HitObj._handle;
    uint handle = g_NvidiaExt.IncrementCounter();

    TraceRay(AccelerationStructure, 0, 0, 0, 0, handle, (RayDesc)0, Payload);
}

// Macro-based version of the HitObject API. Use this when HLSL 2021 is not available.
// Enable by specifying #define NV_HITOBJECT_USE_MACRO_API before including this header.
#elif defined(NV_HITOBJECT_USE_MACRO_API)

struct NvHitObject {
    uint _handle;

    bool IsMiss()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    bool IsHit()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    bool IsNop()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    uint GetInstanceID()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetInstanceIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetPrimitiveIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetGeometryIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint GetHitKind()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    RayDesc GetRayDesc()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC;
        g_NvidiaExt[index].src0u.x = _handle;

        uint tmin = g_NvidiaExt.IncrementCounter();
        uint tmax = g_NvidiaExt.IncrementCounter();
        uint rayOrgX = g_NvidiaExt.IncrementCounter();
        uint rayOrgY = g_NvidiaExt.IncrementCounter();
        uint rayOrgZ = g_NvidiaExt.IncrementCounter();
        uint rayDirX = g_NvidiaExt.IncrementCounter();
        uint rayDirY = g_NvidiaExt.IncrementCounter();
        uint rayDirZ = g_NvidiaExt.IncrementCounter();

        RayDesc ray;
        ray.TMin = asfloat(tmin);
        ray.TMax = asfloat(tmax);
        ray.Origin.x = asfloat(rayOrgX);
        ray.Origin.y = asfloat(rayOrgY);
        ray.Origin.z = asfloat(rayOrgZ);
        ray.Direction.x = asfloat(rayDirX);
        ray.Direction.y = asfloat(rayDirY);
        ray.Direction.z = asfloat(rayDirZ);

        return ray;
    }    

    uint GetShaderTableIndex()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes)
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT;
        g_NvidiaExt[index].src0u.x = _handle;
        g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes;
        return g_NvidiaExt.IncrementCounter();
    }

    float4 GetSphereObjectPositionAndRadius()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SPHERE_OBJECT_POSITION_AND_RADIUS;
        g_NvidiaExt[index].src0u.x = _handle;

        float4 ret;
        ret[0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[3] = asfloat(g_NvidiaExt.IncrementCounter());
        return ret;
    }

    float2x4 GetLssObjectPositionsAndRadii()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_LSS_OBJECT_POSITIONS_AND_RADII;
        g_NvidiaExt[index].src0u.x = _handle;

        float2x4 ret;
        ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter());
        return ret;
    }

    bool IsSphereHit()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_SPHERE_HIT;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    bool IsLssHit()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_LSS_HIT;
        g_NvidiaExt[index].src0u.x = _handle;
        uint ret = g_NvidiaExt.IncrementCounter();
        return ret != 0;
    }

    uint GetClusterID()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID;
        g_NvidiaExt[index].src0u.x = _handle;
        return g_NvidiaExt.IncrementCounter();
    }

    float3x3 GetTriangleObjectPositions()
    {
        uint index = g_NvidiaExt.IncrementCounter();
        g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_TRIANGLE_OBJECT_POSITIONS;
        g_NvidiaExt[index].src0u.x = _handle;

        float3x3 ret;
        ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
        ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
        return ret;
    }
};

#define NvTraceRayHitObject(AccelerationStructure,RayFlags,InstanceInclusionMask,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex,Ray,Payload,ResultHitObj) \
do { \
    uint _rayFlags = RayFlags; \
    uint _instanceInclusionMask = InstanceInclusionMask; \
    uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \
    uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \
    uint _missShaderIndex = MissShaderIndex; \
    RayDesc _ray = Ray; \
    uint _index = g_NvidiaExt.IncrementCounter(); \
    g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY; \
    g_NvidiaExt[_index].numOutputsForIncCounter = 2; \
    g_NvidiaExt[_index].src0u.x = _missShaderIndex; \
    uint _hitHandle = g_NvidiaExt.IncrementCounter(); \
    uint _traceHandle = g_NvidiaExt.IncrementCounter(); \
    TraceRay(AccelerationStructure, _rayFlags, _instanceInclusionMask, _rayContributionToHitGroupIndex, _multiplierForGeometryContributionToHitGroupIndex, _traceHandle, _ray, Payload); \
    ResultHitObj._handle = _hitHandle; \
} while(0)

struct NvHitObjectMacroDummyPayloadType { int a; };

#define NvMakeHit(AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,Ray,Attributes,ResultHitObj) \
do { \
    uint _instanceIndex = InstanceIndex; \
    uint _geometryIndex = GeometryIndex; \
    uint _primitiveIndex = PrimitiveIndex; \
    uint _hitKind = HitKind; \
    uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \
    uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \
    RayDesc _ray = Ray; \
    uint _index = g_NvidiaExt.IncrementCounter(); \
    g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT; \
    g_NvidiaExt[_index].numOutputsForIncCounter = 2; \
    g_NvidiaExt[_index].src0u.x = _instanceIndex; \
    g_NvidiaExt[_index].src0u.y = _geometryIndex; \
    g_NvidiaExt[_index].src0u.z = _primitiveIndex; \
    g_NvidiaExt[_index].src0u.w = _hitKind; \
    g_NvidiaExt[_index].src1u.x = _rayContributionToHitGroupIndex; \
    g_NvidiaExt[_index].src1u.y = _multiplierForGeometryContributionToHitGroupIndex; \
    uint _hitHandle = g_NvidiaExt.IncrementCounter(); \
    uint _traceHandle = g_NvidiaExt.IncrementCounter(); \
    CallShader(_traceHandle, Attributes); \
    NvHitObjectMacroDummyPayloadType _payload; \
    TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \
    ResultHitObj._handle = _hitHandle; \
} while(0)

#define NvMakeHitWithRecordIndex(HitGroupRecordIndex,AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,Ray,Attributes,ResultHitObj) \
do { \
    uint _hitGroupRecordIndex = HitGroupRecordIndex; \
    uint _instanceIndex = InstanceIndex; \
    uint _geometryIndex = GeometryIndex; \
    uint _primitiveIndex = PrimitiveIndex; \
    uint _hitKind = HitKind; \
    RayDesc _ray = Ray; \
    uint _index = g_NvidiaExt.IncrementCounter(); \
    g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX; \
    g_NvidiaExt[_index].numOutputsForIncCounter = 2; \
    g_NvidiaExt[_index].src0u.x = _instanceIndex; \
    g_NvidiaExt[_index].src0u.y = _geometryIndex; \
    g_NvidiaExt[_index].src0u.z = _primitiveIndex; \
    g_NvidiaExt[_index].src0u.w = _hitKind; \
    g_NvidiaExt[_index].src1u.x = _hitGroupRecordIndex; \
    uint _hitHandle = g_NvidiaExt.IncrementCounter(); \
    uint _traceHandle = g_NvidiaExt.IncrementCounter(); \
    CallShader(_traceHandle, Attributes); \
    NvHitObjectMacroDummyPayloadType _payload; \
    TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \
    ResultHitObj._handle = _hitHandle; \
} while(0)

NvHitObject NvMakeMiss(
    uint MissShaderIndex,
    RayDesc Ray)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS;
    g_NvidiaExt[index].src0u.x = MissShaderIndex;
    g_NvidiaExt[index].src0u.y = asuint(Ray.TMin);
    g_NvidiaExt[index].src0u.z = asuint(Ray.TMax);
    g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x);
    g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y);
    g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z);
    g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x);
    g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y);
    g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z);
    uint hitHandle = g_NvidiaExt.IncrementCounter();

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

NvHitObject NvMakeNop()
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP;
    uint hitHandle = g_NvidiaExt.IncrementCounter();

    NvHitObject hitObj;
    hitObj._handle = hitHandle;
    return hitObj;
}

#define NvGetAttributesFromHitObject(HitObj,ResultAttributes) \
do { \
    uint _index = g_NvidiaExt.IncrementCounter(); \
    g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES; \
    g_NvidiaExt[_index].src0u.x = HitObj._handle; \
    uint _callHandle = g_NvidiaExt.IncrementCounter(); \
    CallShader(_callHandle, ResultAttributes); \
} while(0)

void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
    g_NvidiaExt[index].src0u.x = 0;
    g_NvidiaExt[index].src0u.y = 0;
    g_NvidiaExt[index].src0u.z = CoherenceHint;
    g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
    g_NvidiaExt.IncrementCounter();
}

void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
    g_NvidiaExt[index].src0u.x = 1;
    g_NvidiaExt[index].src0u.y = HitObj._handle;
    g_NvidiaExt[index].src0u.z = CoherenceHint;
    g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
    g_NvidiaExt.IncrementCounter();
}

void NvReorderThread(NvHitObject HitObj)
{
    NvReorderThread(HitObj, 0, 0);
}

#define NvInvokeHitObject(AccelerationStructure,HitObj,Payload) \
do { \
    uint _index = g_NvidiaExt.IncrementCounter(); \
    g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE; \
    g_NvidiaExt[_index].src0u.x = HitObj._handle; \
    uint _handle = g_NvidiaExt.IncrementCounter(); \
    TraceRay(AccelerationStructure, 0, 0, 0, 0, _handle, (RayDesc)0, Payload); \
} while(0)

#endif


================================================
FILE: shaders/nvapi/nvHLSLExtnsInternal.h
================================================
/*********************************************************************************************************\
|*                                                                                                        *|
|* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  *|
|* SPDX-License-Identifier: MIT                                                                           *|
|*                                                                                                        *|
|* Permission is hereby granted, free of charge, to any person obtaining a                                *|
|* copy of this software and associated documentation files (the "Software"),                             *|
|* to deal in the Software without restriction, including without limitation                              *|
|* the rights to use, copy, modify, merge, publish, distribute, sublicense,                               *|
|* and/or sell copies of the Software, and to permit persons to whom the                                  *|
|* Software is furnished to do so, subject to the following conditions:                                   *|
|*                                                                                                        *|
|* The above copyright notice and this permission notice shall be included in                             *|
|* all copies or substantial portions of the Software.                                                    *|
|*                                                                                                        *|
|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR                             *|
|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,                               *|
|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL                               *|
|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER                             *|
|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING                                *|
|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER                                    *|
|* DEALINGS IN THE SOFTWARE.                                                                              *|
|*                                                                                                        *|
|*                                                                                                        *|
\*********************************************************************************************************/
////////////////////////// NVIDIA SHADER EXTENSIONS /////////////////
// internal functions
// Functions in this file are not expected to be called by apps directly

#include "nvShaderExtnEnums.h"

struct NvShaderExtnStruct
{
    uint   opcode;                  // opcode
    uint   rid;                     // resource ID
    uint   sid;                     // sampler ID
            
    uint4  dst1u;                   // destination operand 1 (for instructions that need extra destination operands)
    uint4  src3u;                   // source operand 3
    uint4  src4u;                   // source operand 4
    uint4  src5u;                   // source operand 5
            
    uint4  src0u;                   // uint source operand  0
    uint4  src1u;                   // uint source operand  0
    uint4  src2u;                   // uint source operand  0
    uint4  dst0u;                   // uint destination operand
            
    uint   markUavRef;              // the next store to UAV is fake and is used only to identify the uav slot
    uint   numOutputsForIncCounter; // Used for output to IncrementCounter 
    float  padding1[27];            // struct size: 256 bytes
};

// RW structured buffer for Nvidia shader extensions

// Application needs to define NV_SHADER_EXTN_SLOT as a unused slot, which should be 
// set using NvAPI_D3D11_SetNvShaderExtnSlot() call before creating the first shader that
// uses nvidia shader extensions. E.g before including this file in shader define it as:
// #define NV_SHADER_EXTN_SLOT u7

// For SM5.1, application needs to define NV_SHADER_EXTN_REGISTER_SPACE as register space
// E.g. before including this file in shader define it as:
// #define NV_SHADER_EXTN_REGISTER_SPACE space2

// Note that other operations to this UAV will be ignored so application
// should bind a null resource

#ifdef NV_SHADER_EXTN_REGISTER_SPACE
RWStructuredBuffer<NvShaderExtnStruct> g_NvidiaExt : register( NV_SHADER_EXTN_SLOT, NV_SHADER_EXTN_REGISTER_SPACE );
#else
RWStructuredBuffer<NvShaderExtnStruct> g_NvidiaExt : register( NV_SHADER_EXTN_SLOT );
#endif

//----------------------------------------------------------------------------//
// the exposed SHFL instructions accept a mask parameter in src2
// To compute lane mask from width of segment:
// minLaneID : currentLaneId & src2[12:8]
// maxLaneID : minLaneId | (src2[4:0] & ~src2[12:8])
// where [minLaneId, maxLaneId] defines the segment where currentLaneId belongs
// we always set src2[4:0] to 11111 (0x1F), and set src2[12:8] as (32 - width)
int __NvGetShflMaskFromWidth(uint width)
{
    return ((NV_WARP_SIZE - width) << 8) | 0x1F;
}

//----------------------------------------------------------------------------//

void __NvReferenceUAVForOp(RWByteAddressBuffer uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav.Store(index, 0);
}

void __NvReferenceUAVForOp(RWTexture1D<float2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = float2(0,0);
}

void __NvReferenceUAVForOp(RWTexture2D<float2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = float2(0,0);
}

void __NvReferenceUAVForOp(RWTexture3D<float2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = float2(0,0);
}

void __NvReferenceUAVForOp(RWTexture1D<float4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = float4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture2D<float4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = float4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture3D<float4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = float4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture1D<float> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = 0.0f;
}

void __NvReferenceUAVForOp(RWTexture2D<float> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = 0.0f;
}

void __NvReferenceUAVForOp(RWTexture3D<float> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = 0.0f;
}


void __NvReferenceUAVForOp(RWTexture1D<uint2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = uint2(0,0);
}

void __NvReferenceUAVForOp(RWTexture2D<uint2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = uint2(0,0);
}

void __NvReferenceUAVForOp(RWTexture3D<uint2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = uint2(0,0);
}

void __NvReferenceUAVForOp(RWTexture1D<uint4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = uint4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture2D<uint4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = uint4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture3D<uint4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = uint4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture1D<uint> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = 0;
}

void __NvReferenceUAVForOp(RWTexture2D<uint> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = 0;
}

void __NvReferenceUAVForOp(RWTexture3D<uint> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = 0;
}

void __NvReferenceUAVForOp(RWTexture1D<int2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = int2(0,0);
}

void __NvReferenceUAVForOp(RWTexture2D<int2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = int2(0,0);
}

void __NvReferenceUAVForOp(RWTexture3D<int2> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = int2(0,0);
}

void __NvReferenceUAVForOp(RWTexture1D<int4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = int4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture2D<int4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = int4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture3D<int4> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = int4(0,0,0,0);
}

void __NvReferenceUAVForOp(RWTexture1D<int> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[index] = 0;
}

void __NvReferenceUAVForOp(RWTexture2D<int> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint2(index,index)] = 0;
}

void __NvReferenceUAVForOp(RWTexture3D<int> uav)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].markUavRef = 1;
    uav[uint3(index,index,index)] = 0;
}

//----------------------------------------------------------------------------//
// ATOMIC op sub-opcodes
#define NV_EXTN_ATOM_AND                            0
#define NV_EXTN_ATOM_OR                             1
#define NV_EXTN_ATOM_XOR                            2

#define NV_EXTN_ATOM_ADD                            3
#define NV_EXTN_ATOM_MAX                            6
#define NV_EXTN_ATOM_MIN                            7

#define NV_EXTN_ATOM_SWAP                           8
#define NV_EXTN_ATOM_CAS                            9

//----------------------------------------------------------------------------//

// performs Atomic operation on two consecutive fp16 values in the given UAV
// the uint paramater 'fp16x2Val' is treated as two fp16 values
// the passed sub-opcode 'op' should be an immediate constant
// byteAddress must be multiple of 4
// the returned value are the two fp16 values packed into a single uint
uint __NvAtomicOpFP16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = byteAddress;
    g_NvidiaExt[index].src1u.x = fp16x2Val;
    g_NvidiaExt[index].src2u.x = atomicOpType;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_FP16_ATOMIC;

    return g_NvidiaExt[index].dst0u.x;    
}

//----------------------------------------------------------------------------//

// performs Atomic operation on a R16G16_FLOAT UAV at the given address
// the uint paramater 'fp16x2Val' is treated as two fp16 values
// the passed sub-opcode 'op' should be an immediate constant
// the returned value are the two fp16 values (.x and .y components) packed into a single uint
// Warning: Behaviour of these set of functions is undefined if the UAV is not 
// of R16G16_FLOAT format (might result in app crash or TDR)

uint __NvAtomicOpFP16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x    = address;
    g_NvidiaExt[index].src1u.x    = fp16x2Val;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;

    return g_NvidiaExt[index].dst0u.x;    
}

uint __NvAtomicOpFP16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy   = address;
    g_NvidiaExt[index].src1u.x    = fp16x2Val;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;

    return g_NvidiaExt[index].dst0u.x;    
}

uint __NvAtomicOpFP16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xyz  = address;
    g_NvidiaExt[index].src1u.x    = fp16x2Val;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;

    return g_NvidiaExt[index].dst0u.x;    
}

//----------------------------------------------------------------------------//

// performs Atomic operation on a R16G16B16A16_FLOAT UAV at the given address
// the uint2 paramater 'fp16x2Val' is treated as four fp16 values 
// i.e, fp16x2Val.x = uav.xy and fp16x2Val.y = uav.yz
// the passed sub-opcode 'op' should be an immediate constant
// the returned value are the four fp16 values (.xyzw components) packed into uint2
// Warning: Behaviour of these set of functions is undefined if the UAV is not 
// of R16G16B16A16_FLOAT format (might result in app crash or TDR)

uint2 __NvAtomicOpFP16x2(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    // break it down into two fp16x2 atomic ops
    uint2 retVal;

    // first op has x-coordinate = x * 2
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x    = address * 2;
    g_NvidiaExt[index].src1u.x    = fp16x2Val.x;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;
    retVal.x = g_NvidiaExt[index].dst0u.x;

    // second op has x-coordinate = x * 2 + 1
    index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x    = address * 2 + 1;
    g_NvidiaExt[index].src1u.x    = fp16x2Val.y;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;
    retVal.y = g_NvidiaExt[index].dst0u.x;

    return retVal;
}

uint2 __NvAtomicOpFP16x2(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    // break it down into two fp16x2 atomic ops
    uint2 retVal;

    // first op has x-coordinate = x * 2
    uint2 addressTemp = uint2(address.x * 2, address.y);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy   = addressTemp;
    g_NvidiaExt[index].src1u.x    = fp16x2Val.x;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;
    retVal.x = g_NvidiaExt[index].dst0u.x;

    // second op has x-coordinate = x * 2 + 1
    addressTemp.x++;
    index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy   = addressTemp;
    g_NvidiaExt[index].src1u.x    = fp16x2Val.y;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;
    retVal.y = g_NvidiaExt[index].dst0u.x;

    return retVal;
}

uint2 __NvAtomicOpFP16x2(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    // break it down into two fp16x2 atomic ops
    uint2 retVal;

    // first op has x-coordinate = x * 2
    uint3 addressTemp = uint3(address.x * 2, address.y, address.z);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xyz  = addressTemp;
    g_NvidiaExt[index].src1u.x    = fp16x2Val.x;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;
    retVal.x = g_NvidiaExt[index].dst0u.x;

    // second op has x-coordinate = x * 2 + 1
    addressTemp.x++;
    index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xyz  = addressTemp;
    g_NvidiaExt[index].src1u.x    = fp16x2Val.y;
    g_NvidiaExt[index].src2u.x    = atomicOpType;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP16_ATOMIC;
    retVal.y = g_NvidiaExt[index].dst0u.x;

    return retVal;
}

uint __fp32x2Tofp16x2(float2 val)
{
    return (f32tof16(val.y)<<16) | f32tof16(val.x) ;
}

uint2 __fp32x4Tofp16x4(float4 val)
{
    return uint2( (f32tof16(val.y)<<16) | f32tof16(val.x), (f32tof16(val.w)<<16) | f32tof16(val.z) ) ;
}

//----------------------------------------------------------------------------//

// FP32 Atomic functions
// performs Atomic operation treating the uav as float (fp32) values
// the passed sub-opcode 'op' should be an immediate constant
// byteAddress must be multiple of 4
float __NvAtomicAddFP32(RWByteAddressBuffer uav, uint byteAddress, float val)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = byteAddress;
    g_NvidiaExt[index].src1u.x = asuint(val);   // passing as uint to make it more convinient for the driver to translate
    g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD;
    g_NvidiaExt[index].opcode  = NV_EXTN_OP_FP32_ATOMIC;

    return asfloat(g_NvidiaExt[index].dst0u.x);
}

float __NvAtomicAddFP32(RWTexture1D<float> uav, uint address, float val)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x    = address;
    g_NvidiaExt[index].src1u.x    = asuint(val);
    g_NvidiaExt[index].src2u.x    = NV_EXTN_ATOM_ADD;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP32_ATOMIC;

    return asfloat(g_NvidiaExt[index].dst0u.x);
}

float __NvAtomicAddFP32(RWTexture2D<float> uav, uint2 address, float val)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy   = address;
    g_NvidiaExt[index].src1u.x    = asuint(val);
    g_NvidiaExt[index].src2u.x    = NV_EXTN_ATOM_ADD;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP32_ATOMIC;

    return asfloat(g_NvidiaExt[index].dst0u.x);
}

float __NvAtomicAddFP32(RWTexture3D<float> uav, uint3 address, float val)
{
    __NvReferenceUAVForOp(uav);
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xyz  = address;
    g_NvidiaExt[index].src1u.x    = asuint(val);
    g_NvidiaExt[index].src2u.x    = NV_EXTN_ATOM_ADD;
    g_NvidiaExt[index].opcode     = NV_EXTN_OP_FP32_ATOMIC;

    return asfloat(g_NvidiaExt[index].dst0u.x);
}

//----------------------------------------------------------------------------//

// UINT64 Atmoic Functions
// The functions below performs atomic operation on the given UAV treating the value as uint64
// byteAddress must be multiple of 8
// The returned value is the value present in memory location before the atomic operation
// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits.

uint2 __NvAtomicCompareExchangeUINT64(RWByteAddressBuffer uav, uint byteAddress, uint2 compareValue, uint2 value)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  = byteAddress;
    g_NvidiaExt[index].src1u.xy = compareValue;
    g_NvidiaExt[index].src1u.zw = value;
    g_NvidiaExt[index].src2u.x  = NV_EXTN_ATOM_CAS;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicOpUINT64(RWByteAddressBuffer uav, uint byteAddress, uint2 value, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  = byteAddress;
    g_NvidiaExt[index].src1u.xy = value;
    g_NvidiaExt[index].src2u.x  = atomicOpType;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicCompareExchangeUINT64(RWTexture1D<uint2> uav, uint address, uint2 compareValue, uint2 value)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  = address;
    g_NvidiaExt[index].src1u.xy = compareValue;
    g_NvidiaExt[index].src1u.zw = value;
    g_NvidiaExt[index].src2u.x  = NV_EXTN_ATOM_CAS;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicOpUINT64(RWTexture1D<uint2> uav, uint address, uint2 value, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  = address;
    g_NvidiaExt[index].src1u.xy = value;
    g_NvidiaExt[index].src2u.x  = atomicOpType;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicCompareExchangeUINT64(RWTexture2D<uint2> uav, uint2 address, uint2 compareValue, uint2 value)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy  = address;
    g_NvidiaExt[index].src1u.xy = compareValue;
    g_NvidiaExt[index].src1u.zw = value;
    g_NvidiaExt[index].src2u.x  = NV_EXTN_ATOM_CAS;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicOpUINT64(RWTexture2D<uint2> uav, uint2 address, uint2 value, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xy  = address;
    g_NvidiaExt[index].src1u.xy = value;
    g_NvidiaExt[index].src2u.x  = atomicOpType;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicCompareExchangeUINT64(RWTexture3D<uint2> uav, uint3 address, uint2 compareValue, uint2 value)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xyz  = address;
    g_NvidiaExt[index].src1u.xy = compareValue;
    g_NvidiaExt[index].src1u.zw = value;
    g_NvidiaExt[index].src2u.x  = NV_EXTN_ATOM_CAS;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}

uint2 __NvAtomicOpUINT64(RWTexture3D<uint2> uav, uint3 address, uint2 value, uint atomicOpType)
{
    __NvReferenceUAVForOp(uav);

    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.xyz  = address;
    g_NvidiaExt[index].src1u.xy = value;
    g_NvidiaExt[index].src2u.x  = atomicOpType;
    g_NvidiaExt[index].opcode   = NV_EXTN_OP_UINT64_ATOMIC;

    return g_NvidiaExt[index].dst0u.xy;
}


uint4 __NvFootprint(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, int3 offset = int3(0, 0, 0))
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = texIndex;
    g_NvidiaExt[index].src0u.y  = smpIndex;
    g_NvidiaExt[index].src1u.xyz = asuint(location);
    g_NvidiaExt[index].src1u.w = gran;
    g_NvidiaExt[index].src3u.x = texSpace;
    g_NvidiaExt[index].src3u.y = smpSpace;
    g_NvidiaExt[index].src3u.z = texType;
    g_NvidiaExt[index].src3u.w = footprintmode;
    g_NvidiaExt[index].src4u.xyz = asuint(offset);

    g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT;
    g_NvidiaExt[index].numOutputsForIncCounter = 4;

    // result is returned as the return value of IncrementCounter on fake UAV slot
    uint4 op;
    op.x = g_NvidiaExt.IncrementCounter();
    op.y = g_NvidiaExt.IncrementCounter();
    op.z = g_NvidiaExt.IncrementCounter();
    op.w = g_NvidiaExt.IncrementCounter();
    return op;
}

uint4 __NvFootprintBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float bias, int3 offset = int3(0, 0, 0))
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = texIndex;
    g_NvidiaExt[index].src0u.y  = smpIndex;
    g_NvidiaExt[index].src1u.xyz = asuint(location);
    g_NvidiaExt[index].src1u.w = gran;
    g_NvidiaExt[index].src2u.x = asuint(bias);
    g_NvidiaExt[index].src3u.x = texSpace;
    g_NvidiaExt[index].src3u.y = smpSpace;
    g_NvidiaExt[index].src3u.z = texType;
    g_NvidiaExt[index].src3u.w = footprintmode;
    g_NvidiaExt[index].src4u.xyz = asuint(offset);

    g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_BIAS;
    g_NvidiaExt[index].numOutputsForIncCounter = 4;

    // result is returned as the return value of IncrementCounter on fake UAV slot
    uint4 op;
    op.x = g_NvidiaExt.IncrementCounter();
    op.y = g_NvidiaExt.IncrementCounter();
    op.z = g_NvidiaExt.IncrementCounter();
    op.w = g_NvidiaExt.IncrementCounter();
    return op;
}

uint4 __NvFootprintLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float lodLevel, int3 offset = int3(0, 0, 0))
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = texIndex;
    g_NvidiaExt[index].src0u.y  = smpIndex;
    g_NvidiaExt[index].src1u.xyz = asuint(location);
    g_NvidiaExt[index].src1u.w = gran;
    g_NvidiaExt[index].src2u.x = asuint(lodLevel);
    g_NvidiaExt[index].src3u.x = texSpace;
    g_NvidiaExt[index].src3u.y = smpSpace;
    g_NvidiaExt[index].src3u.z = texType;
    g_NvidiaExt[index].src3u.w = footprintmode;
    g_NvidiaExt[index].src4u.xyz = asuint(offset);

    g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_LEVEL;
    g_NvidiaExt[index].numOutputsForIncCounter = 4;

    // result is returned as the return value of IncrementCounter on fake UAV slot
    uint4 op;
    op.x = g_NvidiaExt.IncrementCounter();
    op.y = g_NvidiaExt.IncrementCounter();
    op.z = g_NvidiaExt.IncrementCounter();
    op.w = g_NvidiaExt.IncrementCounter();
    return op;
}

uint4 __NvFootprintGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0))
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x = texIndex;
    g_NvidiaExt[index].src0u.y  = smpIndex;
    g_NvidiaExt[index].src1u.xyz = asuint(location);
    g_NvidiaExt[index].src1u.w = gran;
    g_NvidiaExt[index].src2u.xyz = asuint(ddx);
    g_NvidiaExt[index].src5u.xyz = asuint(ddy);
    g_NvidiaExt[index].src3u.x = texSpace;
    g_NvidiaExt[index].src3u.y = smpSpace;
    g_NvidiaExt[index].src3u.z = texType;
    g_NvidiaExt[index].src3u.w = footprintmode;
    g_NvidiaExt[index].src4u.xyz = asuint(offset);
    g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_GRAD;
    g_NvidiaExt[index].numOutputsForIncCounter = 4;

    // result is returned as the return value of IncrementCounter on fake UAV slot
    uint4 op;
    op.x = g_NvidiaExt.IncrementCounter();
    op.y = g_NvidiaExt.IncrementCounter();
    op.z = g_NvidiaExt.IncrementCounter();
    op.w = g_NvidiaExt.IncrementCounter();
    return op;
}

// returns value of special register - specify subopcode from any of NV_SPECIALOP_* specified in nvShaderExtnEnums.h - other opcodes undefined behavior
uint __NvGetSpecial(uint subOpCode)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SPECIAL;
    g_NvidiaExt[index].src0u.x = subOpCode;
    return g_NvidiaExt.IncrementCounter();
}

// predicate is returned in laneValid indicating if srcLane is in range and val from specified lane is returned.
int __NvShflGeneric(int val, uint srcLane, uint maskClampVal, out uint laneValid)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].src0u.x  =  val;                             // variable to be shuffled
    g_NvidiaExt[index].src0u.y  =  srcLane;                         // source lane
    g_NvidiaExt[index].src0u.z  =  maskClampVal;
    g_NvidiaExt[index].opcode   =  NV_EXTN_OP_SHFL_GENERIC;
    g_NvidiaExt[index].numOutputsForIncCounter = 2;

    laneValid = asuint(g_NvidiaExt.IncrementCounter());
    return g_NvidiaExt.IncrementCounter();
}

//----------------------------------------------------------------------------//

// DXR RayQuery functions

#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5)

uint __NvRtGetCandidateClusterID(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_GET_CANDIDATE_CLUSTER_ID;
    g_NvidiaExt[index].src0u.x = rqFlags;
    return g_NvidiaExt.IncrementCounter();
}

uint __NvRtGetCommittedClusterID(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_GET_COMMITTED_CLUSTER_ID;
    g_NvidiaExt[index].src0u.x = rqFlags;
    return g_NvidiaExt.IncrementCounter();
}

float3x3 __NvRtCandidateTriangleObjectPositions(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_TRIANGLE_OBJECT_POSITIONS;
    g_NvidiaExt[index].src0u.x = rqFlags;

    float3x3 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float3x3 __NvRtCommittedTriangleObjectPositions(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_TRIANGLE_OBJECT_POSITIONS;
    g_NvidiaExt[index].src0u.x = rqFlags;

    float3x3 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

bool __NvRtCandidateIsNonOpaqueSphere(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_SPHERE;
    g_NvidiaExt[index].src0u.x = rqFlags;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

bool __NvRtCandidateIsNonOpaqueLss(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_LSS;
    g_NvidiaExt[index].src0u.x = rqFlags;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

float __NvRtCandidateLssHitParameter(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_LSS_HIT_PARAMETER;
    g_NvidiaExt[index].src0u.x = rqFlags;
    float ret = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float4 __NvRtCandidateSphereObjectPositionAndRadius(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_SPHERE_OBJECT_POSITION_AND_RADIUS;
    g_NvidiaExt[index].src0u.x = rqFlags;

    float4 ret;
    ret[0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[3] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float2x4 __NvRtCandidateLssObjectPositionsAndRadii(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_LSS_OBJECT_POSITIONS_AND_RADII;
    g_NvidiaExt[index].src0u.x = rqFlags;

    float2x4 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float __NvRtCandidateBuiltinPrimitiveRayT(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_BUILTIN_PRIMITIVE_RAY_T;
    g_NvidiaExt[index].src0u.x = rqFlags;
    float ret = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

bool __NvRtCommittedIsSphere(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_IS_SPHERE;
    g_NvidiaExt[index].src0u.x = rqFlags;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

bool __NvRtCommittedIsLss(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_IS_LSS;
    g_NvidiaExt[index].src0u.x = rqFlags;
    uint ret = g_NvidiaExt.IncrementCounter();
    return ret != 0;
}

float __NvRtCommittedLssHitParameter(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_LSS_HIT_PARAMETER;
    g_NvidiaExt[index].src0u.x = rqFlags;
    float ret = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float4 __NvRtCommittedSphereObjectPositionAndRadius(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_SPHERE_OBJECT_POSITION_AND_RADIUS;
    g_NvidiaExt[index].src0u.x = rqFlags;

    float4 ret;
    ret[0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[3] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

float2x4 __NvRtCommittedLssObjectPositionsAndRadii(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_LSS_OBJECT_POSITIONS_AND_RADII;
    g_NvidiaExt[index].src0u.x = rqFlags;

    float2x4 ret;
    ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
    ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter());
    return ret;
}

void __NvRtCommitNonOpaqueBuiltinPrimitiveHit(uint rqFlags)
{
    uint index = g_NvidiaExt.IncrementCounter();
    g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMIT_NONOPAQUE_BUILTIN_PRIMITIVE_HIT;
    g_NvidiaExt[index].src0u.x = rqFlags;
    uint handle = g_NvidiaExt.IncrementCounter();
}

#endif


================================================
FILE: shaders/nvapi/nvShaderExtnEnums.h
================================================
/*********************************************************************************************************\
|*                                                                                                        *|
|* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  *|
|* SPDX-License-Identifier: MIT                                                                           *|
|*                                                                                                        *|
|* Permission is hereby granted, free of charge, to any person obtaining a                                *|
|* copy of this software and associated documentation files (the "Software"),                             *|
|* to deal in the Software without restriction, including without limitation                              *|
|* the rights to use, copy, modify, merge, publish, distribute, sublicense,                               *|
|* and/or sell copies of the Software, and to permit persons to whom the                                  *|
|* Software is furnished to do so, subject to the following conditions:                                   *|
|*                                                                                                        *|
|* The above copyright notice and this permission notice shall be included in                             *|
|* all copies or substantial portions of the Software.                                                    *|
|*                                                                                                        *|
|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR                             *|
|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,                               *|
|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL                               *|
|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER                             *|
|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING                                *|
|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER                                    *|
|* DEALINGS IN THE SOFTWARE.                                                                              *|
|*                                                                                                        *|
|*                                                                                                        *|
\*********************************************************************************************************/

////////////////////////////////////////////////////////////////////////////////
////////////////////////// NVIDIA SHADER EXTENSIONS ////////////////////////////
////////////////////////////////////////////////////////////////////////////////

// This file can be included both from HLSL shader code as well as C++ code.
// The app should call NvAPI_D3D11_IsNvShaderExtnOpCodeSupported() / NvAPI_D3D12_IsNvShaderExtnOpCodeSupported()
// to check for support for every nv shader extension opcode it plans to use


//----------------------------------------------------------------------------//
//---------------------------- NV Shader Extn Version  -----------------------//
//----------------------------------------------------------------------------//
#define NV_SHADER_EXTN_VERSION                              1

//----------------------------------------------------------------------------//
//---------------------------- Misc constants --------------------------------//
//----------------------------------------------------------------------------//
#define NV_WARP_SIZE                                       32
#define NV_WARP_SIZE_LOG2                                   5

//----------------------------------------------------------------------------//
//---------------------------- opCode constants ------------------------------//
//----------------------------------------------------------------------------//


#define NV_EXTN_OP_SHFL                                     1
#define NV_EXTN_OP_SHFL_UP                                  2
#define NV_EXTN_OP_SHFL_DOWN                                3
#define NV_EXTN_OP_SHFL_XOR                                 4

#define NV_EXTN_OP_VOTE_ALL                                 5
#define NV_EXTN_OP_VOTE_ANY                                 6
#define NV_EXTN_OP_VOTE_BALLOT                              7

#define NV_EXTN_OP_GET_LANE_ID                              8
#define NV_EXTN_OP_FP16_ATOMIC                             12
#define NV_EXTN_OP_FP32_ATOMIC                             13

#define NV_EXTN_OP_GET_SPECIAL                             19

#define NV_EXTN_OP_UINT64_ATOMIC                           20

#define NV_EXTN_OP_MATCH_ANY                               21 

// FOOTPRINT - For Sample and SampleBias
#define NV_EXTN_OP_FOOTPRINT                               28
#define NV_EXTN_OP_FOOTPRINT_BIAS                          29

#define NV_EXTN_OP_GET_SHADING_RATE                        30

// FOOTPRINT - For SampleLevel and SampleGrad
#define NV_EXTN_OP_FOOTPRINT_LEVEL                         31
#define NV_EXTN_OP_FOOTPRINT_GRAD                          32

// SHFL Generic
#define NV_EXTN_OP_SHFL_GENERIC                            33

#define NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE              51
#define NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED                52

// HitObject API
#define NV_EXTN_OP_HIT_OBJECT_TRACE_RAY                    67
#define NV_EXTN_OP_HIT_OBJECT_MAKE_HIT                     68
#define NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX   69
#define NV_EXTN_OP_HIT_OBJECT_MAKE_MISS                    70
#define NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD               71
#define NV_EXTN_OP_HIT_OBJECT_INVOKE                       72
#define NV_EXTN_OP_HIT_OBJECT_IS_MISS                      73
#define NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID              74
#define NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX           75
#define NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX          76
#define NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX           77
#define NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND                 78
#define NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC                 79
#define NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES               80
#define NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX       81
#define NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT  82
#define NV_EXTN_OP_HIT_OBJECT_IS_HIT                       83
#define NV_EXTN_OP_HIT_OBJECT_IS_NOP                       84
#define NV_EXTN_OP_HIT_OBJECT_MAKE_NOP                     85

// Micro-map API
#define NV_EXTN_OP_RT_TRIANGLE_OBJECT_POSITIONS            86
#define NV_EXTN_OP_RT_MICRO_TRIANGLE_OBJECT_POSITIONS      87
#define NV_EXTN_OP_RT_MICRO_TRIANGLE_BARYCENTRICS          88
#define NV_EXTN_OP_RT_IS_MICRO_TRIANGLE_HIT                89
#define NV_EXTN_OP_RT_IS_BACK_FACING                       90
#define NV_EXTN_OP_RT_MICRO_VERTEX_OBJECT_POSITION         91
#define NV_EXTN_OP_RT_MICRO_VERTEX_BARYCENTRICS            92

// Megageometry API
#define NV_EXTN_OP_RT_GET_CLUSTER_ID                        93
#define NV_EXTN_OP_RT_GET_CANDIDATE_CLUSTER_ID              94
#define NV_EXTN_OP_RT_GET_COMMITTED_CLUSTER_ID              95
#define NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID                96
#define NV_EXTN_OP_RT_CANDIDATE_TRIANGLE_OBJECT_POSITIONS   97
#define NV_EXTN_OP_RT_COMMITTED_TRIANGLE_OBJECT_POSITIONS   98
#define NV_EXTN_OP_HIT_OBJECT_GET_TRIANGLE_OBJECT_POSITIONS 99

// Linear Swept Sphere API
#define NV_EXTN_OP_RT_SPHERE_OBJECT_POSITION_AND_RADIUS             100
#define NV_EXTN_OP_RT_CANDIDATE_SPHERE_OBJECT_POSITION_AND_RADIUS   101
#define NV_EXTN_OP_RT_COMMITTED_SPHERE_OBJECT_POSITION_AND_RADIUS   102
#define NV_EXTN_OP_HIT_OBJECT_GET_SPHERE_OBJECT_POSITION_AND_RADIUS 103
#define NV_EXTN_OP_RT_LSS_OBJECT_POSITIONS_AND_RADII                104
#define NV_EXTN_OP_RT_CANDIDATE_LSS_OBJECT_POSITIONS_AND_RADII      105
#define NV_EXTN_OP_RT_COMMITTED_LSS_OBJECT_POSITIONS_AND_RADII      106
#define NV_EXTN_OP_HIT_OBJECT_GET_LSS_OBJECT_POSITIONS_AND_RADII    107
#define NV_EXTN_OP_RT_IS_SPHERE_HIT                                 108
#define NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_SPHERE                 109
#define NV_EXTN_OP_RT_COMMITTED_IS_SPHERE                           110
#define NV_EXTN_OP_HIT_OBJECT_IS_SPHERE_HIT                         111
#define NV_EXTN_OP_RT_IS_LSS_HIT                                    112
#define NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_LSS                    113
#define NV_EXTN_OP_RT_COMMITTED_IS_LSS                              114
#define NV_EXTN_OP_HIT_OBJECT_IS_LSS_HIT                            115
#define NV_EXTN_OP_RT_CANDIDATE_LSS_HIT_PARAMETER                   116
#define NV_EXTN_OP_RT_COMMITTED_LSS_HIT_PARAMETER                   117
#define NV_EXTN_OP_RT_CANDIDATE_BUILTIN_PRIMITIVE_RAY_T             118
#define NV_EXTN_OP_RT_COMMIT_NONOPAQUE_BUILTIN_PRIMITIVE_HIT        119
//----------------------------------------------------------------------------//
//-------------------- GET_SPECIAL subOpCode constants -----------------------//
//----------------------------------------------------------------------------//
#define NV_SPECIALOP_THREADLTMASK                           4
#define NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED               5
#define NV_SPECIALOP_GLOBAL_TIMER_LO                        9
#define NV_SPECIALOP_GLOBAL_TIMER_HI                       10

//----------------------------------------------------------------------------//
//----------------------------- Texture Types  -------------------------------//
//----------------------------------------------------------------------------//
#define NV_EXTN_TEXTURE_1D                                  2
#define NV_EXTN_TEXTURE_1D_ARRAY                            3
#define NV_EXTN_TEXTURE_2D                                  4
#define NV_EXTN_TEXTURE_2D_ARRAY                            5
#define NV_EXTN_TEXTURE_3D                                  6
#define NV_EXTN_TEXTURE_CUBE                                7
#define NV_EXTN_TEXTURE_CUBE_ARRAY                          8


//---------------------------------------------------------------------------//
//----------------FOOTPRINT Enums for NvFootprint* extns---------------------//
//---------------------------------------------------------------------------//
#define NV_EXTN_FOOTPRINT_MODE_FINE                         0
#define NV_EXTN_FOOTPRINT_MODE_COARSE                       1

//----------------------------------------------------------------------------//
//--------------------------- Cluster Constants  -----------------------------//
//----------------------------------------------------------------------------//

#define NV_EXTN_CLUSTER_ID_INVALID                 0xffffffff


================================================
FILE: shaders/nvapi/ray-query-cluster-id.nvapi.comp
================================================
#define NV_SHADER_EXTN_SLOT u127
#define NV_SHADER_EXTN_REGISTER_SPACE space0
#include "nvHLSLExtns.h"

RaytracingAccelerationStructure RTAS : register(t0);
RWStructuredBuffer<uint> RWUint : register(u1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	RayQuery<RAY_FLAG_CULL_NON_OPAQUE> q;
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.TMin = 4;
	ray.Direction = float3(5, 6, 7);
	ray.TMax = 8;
	q.TraceRayInline(RTAS, RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH, 0xaa, ray);

	while (q.Proceed())
	{
		RWUint[2 * thr + 0] = NvRtGetCandidateClusterID(q);
		q.Abort();
	}

	if (q.CommittedStatus())
	{
		RWUint[2 * thr + 1] = NvRtGetCommittedClusterID(q);
	}
}


================================================
FILE: shaders/nvapi/rt-cluster-id.nvapi.rany
================================================
#define NV_SHADER_EXTN_SLOT u127
#define NV_SHADER_EXTN_REGISTER_SPACE space0
#include "nvHLSLExtns.h"

struct Payload
{
	uint index;
};

[shader("anyhit")]
void RayHit(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.index = NvRtGetClusterID();
}


================================================
FILE: shaders/nvapi/shuffle.nvapi.ssbo.comp
================================================
#define NV_SHADER_EXTN_SLOT u127
#define NV_SHADER_EXTN_REGISTER_SPACE space0
#include "nvHLSLExtns.h"

RWStructuredBuffer<uint> Blah;

RWByteAddressBuffer BAB;
RWTexture1D<float2> RWTex;

[numthreads(32, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	Blah[thr] = NvShfl(thr, 9);

	// Test UAV reference shenanigans.
	Blah[100] = NvInterlockedAddFp16x2(BAB, 64, 9);
	Blah[101] = NvInterlockedAddFp16x2(RWTex, thr, 10);
}


================================================
FILE: shaders/opts/fp16-fp32-fp16-1.ssbo.comp
================================================
RWStructuredBuffer<uint> RWBuf;
RWStructuredBuffer<float16_t> RWBuf2;
RWStructuredBuffer<uint> RWBuf3;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	float f32_1 = f16tof32(RWBuf[thr] & 0xffff);
	float f32_2 = f16tof32(RWBuf[thr] >> 16);

	RWBuf2[2 * thr + 0] = float16_t(f32_1);
	RWBuf2[2 * thr + 1] = float16_t(f32_2);

	float2 a = float2(f32_1, f32_2);
	RWBuf3[2 * thr + 0] = f32tof16(a.x);
	RWBuf3[2 * thr + 1] = f32tof16(a.y);

	min16float2 b = min16float2(f32_1, f32_2);
	RWBuf3[2 * thr + 1024 + 0] = f32tof16(b.x);
	RWBuf3[2 * thr + 1024 + 1] = f32tof16(b.y);
}


================================================
FILE: shaders/opts/sabs.frag
================================================
int4 main(int4 a : A) : SV_Target
{
	return abs(a);
}


================================================
FILE: shaders/opts/sneg.frag
================================================
int4 main(int4 a : A) : SV_Target
{
	return -a;
}


================================================
FILE: shaders/opts/wave-read-lane-first-heap.sm66.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	ConstantBuffer<Foo> Ctmp = ResourceDescriptorHeap[0];
	InterlockedAdd(U[1], WaveReadLaneFirst(Ctmp.value[0]), o); // Should not opt. Ignore opts for global heap.

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.bindless.local-root-signature.rmiss
================================================
struct CBVData { float4 v; float4 w; };

ConstantBuffer<CBVData> SBTCBV : register(b3, space15);
ConstantBuffer<CBVData> SBTRootConstant : register(b0, space15);

struct Payload
{
	float4 color;
	int index;
};

[shader("miss")]
void RayMiss(inout Payload payload)
{
	// None of these should optimize, unsafe.
	payload.color += WaveReadLaneFirst(SBTRootConstant.v);
	payload.color += WaveReadLaneFirst(SBTCBV.v);
}


================================================
FILE: shaders/opts/wave-read-lane-first.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.no-legacy-cbuf-layout.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.no-legacy-cbuf-layout.sm60.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.sm60.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.sm66.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.ssbo.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint2> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);
ByteAddressBuffer TB : register(t4);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1].x), o); // Should opt
	InterlockedAdd(U[0], WaveReadLaneFirst(TB.Load(0)), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr].y), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[1][thr & 1]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(TB.Load(thr * 4)), o); // Should not opt

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.ssbo.rgen
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint2> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);
ByteAddressBuffer TB : register(t4);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[shader("raygeneration")]
void main()
{
	uint thr = DispatchRaysIndex().x;
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1].x), o); // Should opt
	InterlockedAdd(U[0], WaveReadLaneFirst(TB.Load(0)), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr].y), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[1][thr & 1]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(TB.Load(thr * 4)), o); // Should not opt

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.ssbo.sm60.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.ssbo.sm66.comp
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint gid : SV_GroupID)
{
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// WorkGroupID
	InterlockedAdd(U[0], WaveReadLaneFirst(gid), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(thr), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/opts/wave-read-lane-first.ssbo.sm66.rgen
================================================
RWStructuredBuffer<uint> U : register(u0);
StructuredBuffer<uint2> T : register(t0);
StructuredBuffer<uint> Ts[2] : register(t1);
Buffer<float> B : register(t3);
ByteAddressBuffer TB : register(t4);

struct Foo { uint value[1024]; };
ConstantBuffer<Foo> C : register(b0);
ConstantBuffer<Foo> Cs[2] : register(b1);

[shader("raygeneration")]
void main()
{
	uint thr = DispatchRaysIndex().x;
	uint o;

	// CBV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(C.value[1]), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(C.value[thr]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(Cs[0].value[thr]), o); // Should not opt

	// SRV loads.
	InterlockedAdd(U[0], WaveReadLaneFirst(T[1].x), o); // Should opt
	InterlockedAdd(U[0], WaveReadLaneFirst(TB.Load(0)), o); // Should opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[thr].y), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(T[1][thr & 1]), o); // Should not opt
	InterlockedAdd(U[1], WaveReadLaneFirst(Ts[0][1]), o); // Should not opt (but could if we care)
	InterlockedAdd(U[1], WaveReadLaneFirst(TB.Load(thr * 4)), o); // Should not opt

	// UAV loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(U[2]), o); // Should not opt

	// Typed loads.
	InterlockedAdd(U[1], WaveReadLaneFirst(B[0]), o); // Should not opt

	// Wave ops which are considered wave uniform.
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveReadLaneFirst(C.value[1])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveSum(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBitOr(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllEqual(C.value[thr])), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveBallot(C.value[thr] != 0).x), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAnyTrue(C.value[thr] != 0)), o);
	InterlockedAdd(U[0], WaveReadLaneFirst(WaveActiveAllTrue(C.value[thr] != 0)), o);

	// Wave ops that should not optimize.
	InterlockedAdd(U[1], WaveReadLaneFirst(WavePrefixSum(C.value[thr])), o);
}


================================================
FILE: shaders/raw-access/bab-double1.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB.Store<double>(8 * thr, BAB.Load<double>(8 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-double2.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[2] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<double2>(8 * thr, BAB[0].Load<double2>(8 * thr) + 4.0);
	BAB[1].Store<double2>(16 * thr, BAB[1].Load<double2>(16 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-double3.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[4] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<double3>(8 * thr, BAB[0].Load<double3>(8 * thr) + 4.0);
	BAB[1].Store<double3>(16 * thr, BAB[1].Load<double3>(16 * thr) + 4.0);
	BAB[2].Store<double3>(24 * thr, BAB[2].Load<double3>(24 * thr) + 4.0);
	BAB[3].Store<double3>(32 * thr, BAB[3].Load<double3>(32 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-double4.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[5] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<double4>(8 * thr, BAB[0].Load<double4>(8 * thr) + 4.0);
	BAB[1].Store<double4>(16 * thr, BAB[1].Load<double4>(16 * thr) + 4.0);
	BAB[2].Store<double4>(24 * thr, BAB[2].Load<double4>(24 * thr) + 4.0);
	BAB[3].Store<double4>(32 * thr, BAB[3].Load<double4>(32 * thr) + 4.0);
	BAB[4].Store<double4>(40 * thr, BAB[4].Load<double4>(40 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-float1.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB.Store<float>(4 * thr, BAB.Load<float>(4 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-float2.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[2] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<float2>(4 * thr, BAB[0].Load<float2>(4 * thr) + 4.0);
	BAB[1].Store<float2>(8 * thr, BAB[1].Load<float2>(8 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-float3.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[4] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<float3>(4 * thr, BAB[0].Load<float3>(4 * thr) + 4.0);
	BAB[1].Store<float3>(8 * thr, BAB[1].Load<float3>(8 * thr) + 4.0);
	BAB[2].Store<float3>(12 * thr, BAB[2].Load<float3>(12 * thr) + 4.0);
	BAB[3].Store<float3>(16 * thr, BAB[3].Load<float3>(16 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-float4.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[5] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<float4>(4 * thr, BAB[0].Load<float4>(4 * thr) + 4.0);
	BAB[1].Store<float4>(8 * thr, BAB[1].Load<float4>(8 * thr) + 4.0);
	BAB[2].Store<float4>(12 * thr, BAB[2].Load<float4>(12 * thr) + 4.0);
	BAB[3].Store<float4>(16 * thr, BAB[3].Load<float4>(16 * thr) + 4.0);
	BAB[4].Store<float4>(20 * thr, BAB[4].Load<float4>(20 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/bab-float4x4.raw-access-chains.noglsl.ssbo.comp
================================================
RWByteAddressBuffer BAB[2] : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	BAB[0].Store<float4x4>(64 * thr, BAB[0].Load<float4x4>(64 * thr) + 4.0);
	BAB[1].Store<float4x4>(80 * thr, BAB[1].Load<float4x4>(80 * thr) + 4.0);
}


================================================
FILE: shaders/raw-access/structured-float1.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<float> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4.0;
}


================================================
FILE: shaders/raw-access/structured-float2.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<float2> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4.0;
}


================================================
FILE: shaders/raw-access/structured-float3.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<float3> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4.0;
}


================================================
FILE: shaders/raw-access/structured-float4.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<float4> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4.0;
}


================================================
FILE: shaders/raw-access/structured-float4x4.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<float4x4> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4.0;
}


================================================
FILE: shaders/raw-access/structured-half1.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<half> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += half(4.0);
}


================================================
FILE: shaders/raw-access/structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.comp
================================================
RWStructuredBuffer<min16float> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += min16float(4.0);
}


================================================
FILE: shaders/raw-access/structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
================================================
RWStructuredBuffer<min16float> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += min16float(4.0);
}


================================================
FILE: shaders/raw-access/structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.comp
================================================
RWStructuredBuffer<min16uint> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += min16uint(4);
}


================================================
FILE: shaders/raw-access/structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp
================================================
RWStructuredBuffer<min16uint> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += min16uint(4);
}


================================================
FILE: shaders/raw-access/structured-uint1.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<uint> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4;
}


================================================
FILE: shaders/raw-access/structured-uint2.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<uint2> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4;
}


================================================
FILE: shaders/raw-access/structured-uint3.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<uint3> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4;
}


================================================
FILE: shaders/raw-access/structured-uint4.raw-access-chains.noglsl.ssbo.comp
================================================
RWStructuredBuffer<uint4> F1 : register(u0);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	F1[thr] += 4;
}


================================================
FILE: shaders/resources/acceleration-structure.bindless.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Plain : register(t3, space0);
RaytracingAccelerationStructure AS[] : register(t100, space1);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS[10], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS[NonUniformResourceIndex(int(p.color.w))], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/resources/acceleration-structure.bindless.ssbo-rtas.local-root-signature.raw-va-stride-offset.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Plain : register(t3, space1);
RaytracingAccelerationStructure AS[] : register(t100, space1);
RaytracingAccelerationStructure AS_Local[] : register(t3, space15);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS[10], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS[NonUniformResourceIndex(int(p.color.w))], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Local[200], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/resources/acceleration-structure.bindless.ssbo-rtas.local-root-signature.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Plain : register(t3, space1);
RaytracingAccelerationStructure AS[] : register(t100, space1);
RaytracingAccelerationStructure AS_Local[] : register(t3, space15);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS[10], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS[NonUniformResourceIndex(int(p.color.w))], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Local[200], RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/resources/acceleration-structure.local-root-signature.root-descriptor.rgen
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS_Local : register(t1, space15);
RaytracingAccelerationStructure AS_RootDesc : register(t0, space0);
RaytracingAccelerationStructure AS_Plain : register(t1, space0);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	p.color = float4(1, 2, 3, 4);
	TraceRay(AS_Local, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_RootDesc, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS_Plain, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
}


================================================
FILE: shaders/resources/basic.input-attachment.frag
================================================
Texture2D<float4> Input : register(t0, space1000);
Texture2D<int4> InputInt : register(t1, space1000);
Texture2D<uint4> InputUint : register(t2, space1000);
Texture2D<float> InputDepth : register(t3, space1001);
Texture2D<uint> InputStencil : register(t4, space1001);
Texture2DMS<float4> InputMS : register(t5, space1000);
Texture2DMS<float> InputDepthMS : register(t6, space1001);

float4 main(float4 pos : SV_Position, uint samp : SV_SampleIndex) : SV_Target
{
	float4 res = 0.0.xxxx;

	res += Input.Load(int3(pos.xy, 0));
	res += float4(InputInt.Load(int3(pos.xy, 0)));
	res += float4(InputUint.Load(int3(pos.xy, 0)));
	res += InputDepth.Load(int3(pos.xy, 0));
	res += float(InputStencil.Load(int3(pos.xy, 0)));

	res += InputMS.Load(int2(pos.xy), samp);
	res += InputDepthMS.Load(int2(pos.xy), samp);

	return res;
}


================================================
FILE: shaders/resources/buffer-16bit.ssbo.bindless.comp
================================================
RWStructuredBuffer<half> HalfBuf : register(u0);
RWStructuredBuffer<uint16_t2> Half2Buf : register(u1);
RWStructuredBuffer<half3> Half3Buf : register(u2);
RWStructuredBuffer<uint16_t4> Half4Buf : register(u3);

RWByteAddressBuffer RWBuf : register(u4);
RWByteAddressBuffer RWBuf2 : register(u5);
RWByteAddressBuffer RWBuf3 : register(u6);

RWByteAddressBuffer RWBuf4[] : register(u0, space1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	HalfBuf[thr] += half(1.0);
	Half2Buf[thr] += uint16_t(2);
	Half3Buf[thr] += half(3.0);
	Half4Buf[thr] += uint16_t(4.0);

	float4 result = 0.0.xxxx;

	result += RWBuf.Load<half>(2 * thr);
	result += RWBuf.Load<half2>(4 * thr).xxyy;
	result += RWBuf.Load<half3>(6 * thr).xyzx;
	result += RWBuf.Load<half4>(8 * thr);

	result += RWBuf.Load<uint16_t>(2 * thr);
	result += RWBuf.Load<uint16_t2>(4 * thr).xxyy;
	result += RWBuf.Load<uint16_t3>(6 * thr).xyzx;
	result += RWBuf.Load<uint16_t4>(8 * thr);

	RWBuf2.Store<half>(2 * thr, half(1.0));
	RWBuf2.Store<half2>(4 * thr, half2(2.0, 4.0));
	RWBuf2.Store<half3>(6 * thr, half3(4.0, 5.0, 6.0));
	RWBuf2.Store<half4>(8 * thr, half4(7.0, 8.0, 9.0, 10.0));

	RWBuf3.Store<uint16_t>(2 * thr, uint16_t(1));
	RWBuf3.Store<uint16_t2>(4 * thr, uint16_t2(2, 4));
	RWBuf3.Store<uint16_t3>(6 * thr, uint16_t3(4, 5, 6));
	RWBuf3.Store<uint16_t4>(8 * thr, uint16_t4(7, 8, 9, 10));

	result += RWBuf.Load<float>(4 * thr);
	result += RWBuf4[NonUniformResourceIndex(thr)].Load<half>(4 * thr);
	RWBuf4[NonUniformResourceIndex(thr)].Store<half2>(8 * thr, half2(result.xy));

	uint count, stride;
	HalfBuf.GetDimensions(count, stride);
	result.x += count;
	result.y += stride;

	Half4Buf.GetDimensions(count, stride);
	result.z += count;
	result.w += stride;

	RWBuf2.GetDimensions(count);
	result.x += count;

	RWBuf.Store4(16 * thr, result);

}


================================================
FILE: shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp
================================================
RWStructuredBuffer<half> HalfBuf : register(u0);
RWStructuredBuffer<uint16_t2> Half2Buf : register(u1);
RWStructuredBuffer<half3> Half3Buf : register(u2);
RWStructuredBuffer<uint16_t4> Half4Buf : register(u3);

RWByteAddressBuffer RWBuf : register(u4);
RWByteAddressBuffer RWBuf2 : register(u5);
RWByteAddressBuffer RWBuf3 : register(u6);

RWByteAddressBuffer RWBuf4[] : register(u0, space1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	HalfBuf[thr] += half(1.0);
	Half2Buf[thr] += uint16_t(2);
	Half3Buf[thr] += half(3.0);
	Half4Buf[thr] += uint16_t(4.0);

	float4 result = 0.0.xxxx;

	result += RWBuf.Load<half>(2 * thr);
	result += RWBuf.Load<half2>(4 * thr).xxyy;
	result += RWBuf.Load<half3>(6 * thr).xyzx;
	result += RWBuf.Load<half4>(8 * thr);

	result += RWBuf.Load<uint16_t>(2 * thr);
	result += RWBuf.Load<uint16_t2>(4 * thr).xxyy;
	result += RWBuf.Load<uint16_t3>(6 * thr).xyzx;
	result += RWBuf.Load<uint16_t4>(8 * thr);

	RWBuf2.Store<half>(2 * thr, half(1.0));
	RWBuf2.Store<half2>(4 * thr, half2(2.0, 4.0));
	RWBuf2.Store<half3>(6 * thr, half3(4.0, 5.0, 6.0));
	RWBuf2.Store<half4>(8 * thr, half4(7.0, 8.0, 9.0, 10.0));

	RWBuf3.Store<uint16_t>(2 * thr, uint16_t(1));
	RWBuf3.Store<uint16_t2>(4 * thr, uint16_t2(2, 4));
	RWBuf3.Store<uint16_t3>(6 * thr, uint16_t3(4, 5, 6));
	RWBuf3.Store<uint16_t4>(8 * thr, uint16_t4(7, 8, 9, 10));

	result += RWBuf.Load<float>(4 * thr);
	result += RWBuf4[NonUniformResourceIndex(thr)].Load<half>(4 * thr);
	RWBuf4[NonUniformResourceIndex(thr)].Store<half2>(8 * thr, half2(result.xy));

	uint count, stride;
	HalfBuf.GetDimensions(count, stride);
	result.x += count;
	result.y += stride;

	Half4Buf.GetDimensions(count, stride);
	result.z += count;
	result.w += stride;

	RWBuf2.GetDimensions(count);
	result.x += count;

	RWBuf.Store4(16 * thr, result);

}


================================================
FILE: shaders/resources/buffer-16bit.ssbo.comp
================================================
RWStructuredBuffer<half> HalfBuf : register(u0);
RWStructuredBuffer<uint16_t2> Half2Buf : register(u1);
RWStructuredBuffer<half3> Half3Buf : register(u2);
RWStructuredBuffer<uint16_t4> Half4Buf : register(u3);

RWByteAddressBuffer RWBuf : register(u4);
RWByteAddressBuffer RWBuf2 : register(u5);
RWByteAddressBuffer RWBuf3 : register(u6);

RWByteAddressBuffer RWBuf4[] : register(u0, space1);

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	HalfBuf[thr] += half(1.0);
	Half2Buf[thr] += uint16_t(2);
	Half3Buf[thr] += half(3.0);
	Half4Buf[thr] += uint16_t(4.0);

	float4 result = 0.0.xxxx;

	result += RWBuf.Load<half>(2 * thr);
	result += RWBuf.Load<half2>(4 * thr).xxyy;
	result += RWBuf.Load<half3>(6 * thr).xyzx;
	result += RWBuf.Load<half4>(8 * thr);

	result += RWBuf.Load<uint16_t>(2 * thr);
	result += RWBuf.Load<uint16_t2>(4 * thr).xxyy;
	result += RWBuf.Load<uint16_t3>(6 * thr).xyzx;
	result += RWBuf.Load<uint16_t4>(8 * thr);

	RWBuf2.Store<half>(2 * thr, half(1.0));
	RWBuf2.Store<half2>(4 * thr, half2(2.0, 4.0));
	RWBuf2.Store<half3>(6 * thr, half3(4.0, 5.0, 6.0));
	RWBuf2.Store<half4>(8 * thr, half4(7.0, 8.0, 9.0, 10.0));

	RWBuf3.Store<uint16_t>(2 * thr, uint16_t(1));
	RWBuf3.Store<uint16_t2>(4 * thr, uint16_t2(2, 4));
	RWBuf3.Store<uint16_t3>(6 * thr, uint16_t3(4, 5, 6));
	RWBuf3.Store<uint16_t4>(8 * thr, uint16_t4(7, 8, 9, 10));

	result += RWBuf.Load<float>(4 * thr);
	result += RWBuf4[NonUniformResourceIndex(thr)].Load<half>(4 * thr);
	RWBuf4[NonUniformResourceIndex(thr)].Store<half2>(8 * thr, half2(result.xy));

	uint count, stride;
	HalfBuf.GetDimensions(count, stride);
	result.x += count;
	result.y += stride;

	Half4Buf.GetDimensions(count, stride);
	result.z += count;
	result.w += stride;

	RWBuf2.GetDimensions(count);
	result.x += count;

	RWBuf.Store4(16 * thr, result);

}


================================================
FILE: shaders/resources/buffer-64bit.ssbo.bindless.ssbo-align.comp
================================================
RWStructuredBuffer<uint64_t3> RW : register(u0);
StructuredBuffer<uint64_t3> RO : register(t0);

RWByteAddressBuffer RWBAB : register(u1);
ByteAddressBuffer BAB : register(t1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint64_t3 v0 = RO[2 * thr + 0];
	uint64_t3 v1 = RO[2 * thr + 1];
	RW[8 * thr + 0] = v0 + v1;
	RW[8 * thr + 1] = v0 - v1;
	RW[8 * thr + 2] = v0 * v1;
	RW[8 * thr + 3] = v0 / v1;
	RW[8 * thr + 4] = v0 << v1;
	RW[8 * thr + 5] = v0 >> v1;
	RW[8 * thr + 6] = int64_t3(v0) >> v1;
	RW[8 * thr + 7] = v0 & v1;

	v0.xy = BAB.Load<uint64_t2>(24 * thr);
	RWBAB.Store<uint64_t3>(8 * thr, v0);
}


================================================
FILE: shaders/resources/buffer-64bit.ssbo.comp
================================================
RWStructuredBuffer<uint64_t3> RW : register(u0);
StructuredBuffer<uint64_t3> RO : register(t0);

RWByteAddressBuffer RWBAB : register(u1);
ByteAddressBuffer BAB : register(t1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint64_t3 v0 = RO[2 * thr + 0];
	uint64_t3 v1 = RO[2 * thr + 1];
	RW[8 * thr + 0] = v0 + v1;
	RW[8 * thr + 1] = v0 - v1;
	RW[8 * thr + 2] = v0 * v1;
	RW[8 * thr + 3] = v0 / v1;
	RW[8 * thr + 4] = v0 << v1;
	RW[8 * thr + 5] = v0 >> v1;
	RW[8 * thr + 6] = int64_t3(v0) >> v1;
	RW[8 * thr + 7] = v0 & v1;

	v0.xy = BAB.Load<uint64_t2>(24 * thr);
	RWBAB.Store<uint64_t3>(8 * thr, v0);
}


================================================
FILE: shaders/resources/buffer-alignment-fixup.bindless.root-constant.offset-layout.typed-buffer-offset.comp
================================================
RWStructuredBuffer<float2> RWStr : register(u0);
RWByteAddressBuffer RWBuf : register(u1);
StructuredBuffer<float2> RStr : register(t0);
ByteAddressBuffer RBuf : register(t1);

RWStructuredBuffer<float2> RWStrs[] : register(u0, space1);
RWByteAddressBuffer RWBufs[] : register(u1, space2);
StructuredBuffer<float2> RStrs[] : register(t2, space3);
ByteAddressBuffer RBufs[] : register(t3, space4);

RWStructuredBuffer<uint2> RWStrUint2 : register(u0, space5);

RWBuffer<uint4> RWTyped : register(u0, space6);

[numthreads(64, 1, 1)]
void main(uint thread : SV_DispatchThreadID, uint wg : SV_GroupID)
{
	RWStr[thread] = RStr[thread];
	RWBuf.Store4(thread * 16, RBuf.Load4(thread * 16));

	RWStr[thread] = RStr[thread];
	RWBuf.Store4(thread * 16, RBuf.Load4(thread * 16));

	RWStrs[wg][thread] = RStrs[wg][thread];
	RWStrs[NonUniformResourceIndex(thread)][thread] = RStrs[NonUniformResourceIndex(thread)][thread];

	RWBufs[wg].Store4(16 * thread, RBufs[wg].Load4(16 * thread));
	RWBufs[NonUniformResourceIndex(thread)].Store4(16 * thread, RBufs[NonUniformResourceIndex(thread)].Load4(16 * thread));

	uint val;
	InterlockedAdd(RWStrUint2[thread].y, 1, val);

	uint count, stride;
	RWStr.GetDimensions(count, stride);
	InterlockedAdd(RWStrUint2[thread].x, count);

	RWTyped[thread] = uint4(0, 1, 2, 3) + 4 * thread;
}


================================================
FILE: shaders/resources/buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.comp
================================================
RWStructuredBuffer<float2> RWStr : register(u0);
RWByteAddressBuffer RWBuf : register(u1);
StructuredBuffer<float2> RStr : register(t0);
ByteAddressBuffer RBuf : register(t1);

RWStructuredBuffer<float2> RWStrs[] : register(u0, space1);
RWByteAddressBuffer RWBufs[] : register(u1, space2);
StructuredBuffer<float2> RStrs[] : register(t2, space3);
ByteAddressBuffer RBufs[] : register(t3, space4);

RWStructuredBuffer<uint2> RWStrUint2 : register(u0, space5);

[numthreads(64, 1, 1)]
void main(uint thread : SV_DispatchThreadID, uint wg : SV_GroupID)
{
	RWStr[thread] = RStr[thread];
	RWBuf.Store4(thread * 16, RBuf.Load4(thread * 16));

	RWStr[thread] = RStr[thread];
	RWBuf.Store4(thread * 16, RBuf.Load4(thread * 16));

	RWStrs[wg][thread] = RStrs[wg][thread];
	RWStrs[NonUniformResourceIndex(thread)][thread] = RStrs[NonUniformResourceIndex(thread)][thread];

	RWBufs[wg].Store4(16 * thread, RBufs[wg].Load4(16 * thread));
	RWBufs[NonUniformResourceIndex(thread)].Store4(16 * thread, RBufs[NonUniformResourceIndex(thread)].Load4(16 * thread));

	uint val;
	InterlockedAdd(RWStrUint2[thread].y, 1, val);

	uint count, stride;
	RWStr.GetDimensions(count, stride);
	InterlockedAdd(RWStrUint2[thread].x, count);
}


================================================
FILE: shaders/resources/buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.offset-layout.typed-buffer-offset.comp
================================================
RWStructuredBuffer<float2> RWStr : register(u0);
RWByteAddressBuffer RWBuf : register(u1);
StructuredBuffer<float2> RStr : register(t0);
ByteAddressBuffer RBuf : register(t1);

RWStructuredBuffer<float2> RWStrs[] : register(u0, space1);
RWByteAddressBuffer RWBufs[] : register(u1, space2);
StructuredBuffer<float2> RStrs[] : register(t2, space3);
ByteAddressBuffer RBufs[] : register(t3, space4);

RWStructuredBuffer<uint2> RWStrUint2 : register(u0, space5);

RWBuffer<uint4> RWTyped : register(u0, space6);

[numthreads(64, 1, 1)]
void main(uint thread : SV_DispatchThreadID, uint wg : SV_GroupID)
{
	RWStr[thread] = RStr[thread];
	RWBuf.Store4(thread * 16, RBuf.Load4(thread * 16));

	RWStr[thread] = RStr[thread];
	RWBuf.Store4(thread * 16, RBuf.Load4(thread * 16));

	RWStrs[wg][thread] = RStrs[wg][thread];
	RWStrs[NonUniformResourceIndex(thread)][thread] = RStrs[NonUniformResourceIndex(thread)][thread];

	RWBufs[wg].Store4(16 * thread, RBufs[wg].Load4(16 * thread));
	RWBufs[NonUniformResourceIndex(thread)].Store4(16 * thread, RBufs[NonUniformResourceIndex(thread)].Load4(16 * thread));

	uint val;
	InterlockedAdd(RWStrUint2[thread].y, 1, val);

	uint count, stride;
	RWStr.GetDimensions(count, stride);
	InterlockedAdd(RWStrUint2[thread].x, count);

	RWTyped[thread] = uint4(0, 1, 2, 3) + 4 * thread;
}


================================================
FILE: shaders/resources/cbuf.root-constant.min16float.sm60.frag
================================================
cbuffer Foo
{
	min16float4 a;
	min16float4 b;
};

min16float4 main(min16float4 c : C) : SV_Target
{
	return a + b + c;
}


================================================
FILE: shaders/resources/cbuf.root-constant.min16float.sm60.native-fp16.frag
================================================
cbuffer Foo
{
	min16float4 a;
	min16float4 b;
};

min16float4 main(min16float4 c : C) : SV_Target
{
	return a + b + c;
}


================================================
FILE: shaders/resources/cbuf.root-constant.min16int.sm60.frag
================================================
cbuffer Foo
{
	min16int4 a;
	min16int4 b;
};

min16int4 main(min16float4 c : C) : SV_Target
{
	return a + b + min16int4(c);
}


================================================
FILE: shaders/resources/cbuf.root-constant.min16int.sm60.native-fp16.frag
================================================
cbuffer Foo
{
	min16int4 a;
	min16int4 b;
};

min16int4 main(min16float4 c : C) : SV_Target
{
	return a + b + min16int4(c);
}


================================================
FILE: shaders/resources/cbv-array-nonuniform.frag
================================================
struct Foo
{
	float4 a[4];
};

// !!! DXC does not emit NonUniformResourceIndex here.

ConstantBuffer<Foo> Buf[] : register(b0, space0);
ConstantBuffer<Foo> Buf2[100] : register(b0, space1);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Buf[NonUniformResourceIndex(index)].a[index & 3] + Buf2[NonUniformResourceIndex(index ^ 1)].a[index & 1];
}


================================================
FILE: shaders/resources/cbv-array.frag
================================================
struct Foo
{
	float4 a[4];
};

ConstantBuffer<Foo> Buf[] : register(b0, space0);
ConstantBuffer<Foo> Buf2[100] : register(b0, space1);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Buf[index].a[index & 3] + Buf2[index ^ 1].a[index & 1];
}


================================================
FILE: shaders/resources/cbv-dynamic.no-legacy-cbuf-layout.local-root-signature.rmiss
================================================
cbuffer A : register(b0, space15)
{
	float a[2];
	uint b;
	int c;
};

cbuffer B : register(b1, space15)
{
	float2 a2[2];
	uint b2;
	int c2;
};

struct Payload
{
	float4 f;
	int4 i;
};

[shader("miss")]
void main(inout Payload payload)
{
	payload.f = float4(a[payload.i.x], b, c, 1.0);
	payload.i = int4(a2[payload.i.y], b2, c);
}


================================================
FILE: shaders/resources/cbv-indexing.frag
================================================
struct Foo { float4 v; };
ConstantBuffer<Foo> CBV[3] : register(b5);
float4 main(uint v : V) : SV_Target
{
	return CBV[0].v + CBV[2].v + CBV[v].v;
}


================================================
FILE: shaders/resources/cbv-indexing.sm66.frag
================================================
struct Foo { float4 v; };
ConstantBuffer<Foo> CBV[3] : register(b5);
float4 main(uint v : V) : SV_Target
{
	return CBV[0].v + CBV[2].v + CBV[v].v;
}


================================================
FILE: shaders/resources/cbv-legacy-fp16-fp64.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag
================================================
struct Foo
{
	float4 v;
};

ConstantBuffer<Foo> uFoo : register(b3);
ConstantBuffer<Foo> uFooArray[64] : register(b4);
ConstantBuffer<Foo> uFooBindless[] : register(b100);

cbuffer Root : register(b0)
{
	uint index;
};

float4 main(nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	float4 result = uFoo.v;
	result += uFooArray[1].v;
	result += uFooArray[index].v;
	// DXC is buggy and does not emit NonUniformResourceIndex correctly in the DXIL.
	result += uFooBindless[NonUniformResourceIndex(dynamic_index)].v;
	return result;
}


================================================
FILE: shaders/resources/cbv.bindless.root-constant.frag
================================================
struct Foo
{
	float4 v;
};

ConstantBuffer<Foo> uFoo : register(b3);
ConstantBuffer<Foo> uFooArray[64] : register(b4);
ConstantBuffer<Foo> uFooBindless[] : register(b100);

cbuffer Root : register(b0)
{
	uint index;
};

float4 main(nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	float4 result = uFoo.v;
	result += uFooArray[1].v;
	result += uFooArray[index].v;
	// DXC is buggy and does not emit NonUniformResourceIndex correctly in the DXIL.
	result += uFooBindless[NonUniformResourceIndex(dynamic_index)].v;
	return result;
}


================================================
FILE: shaders/resources/cbv.frag
================================================
cbuffer A : register(b1, space2)
{
	float a;
	uint b;
	int c;
};

cbuffer B : register(b2, space2)
{
	float a2;
	uint b2;
	int c2;
};

float main() : SV_Target
{
	return (a + a2) + (b + b2) + (c + c2);
}


================================================
FILE: shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag
================================================
cbuffer Cbuf
{
	float4 a;
	half4 b;
	int64_t4 c;
};

cbuffer Cbuf1 : register(b1)
{
	float4 d;
};

cbuffer Cbuf2 : register(b2)
{
	double4 e;
};

float4 main() : SV_Target
{
	return a + float4(b) + float4(c) + d + float4(e);
}


================================================
FILE: shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag
================================================
cbuffer Cbuf
{
	float a[64];
	float2 b[64];
	float3 c[64];
	float4 d[64];
};

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return a[index].xxxx + b[index].xyxy + c[index].xyzx + d[index];
}


================================================
FILE: shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
================================================
cbuffer A : register(b0, space15)
{
	float a;
	uint b;
	int c;
};

cbuffer B : register(b1, space15)
{
	float2 a2;
	uint b2;
	int c2;
};

struct Payload
{
	float4 f;
	int4 i;
};

[shader("miss")]
void main(inout Payload payload)
{
	payload.f = float4(a, b, c, 1.0);
	payload.i = int4(a2, b2, c);
}


================================================
FILE: shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
================================================
struct Half8 { min16float4 lo; min16float4 hi; };

cbuffer Cbuf
{
	float4 a;
	Half8 b;
	int64_t4 c;
};

float4 main() : SV_Target
{
	Half8 half8 = b;
	return a + float4(half8.lo) + float4(half8.hi) + float4(c);
}


================================================
FILE: shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag
================================================
cbuffer A : register(b0, space0)
{
	float a;
	uint b;
	int c;
};

cbuffer B : register(b0, space1)
{
	float2 a2;
	uint b2;
	int c2;
};

float2 main() : SV_Target
{
	return (a + a2) + (b + b2) + (c + c2);
}


================================================
FILE: shaders/resources/cbv.root-constant.frag
================================================
cbuffer A : register(b0, space0)
{
	float a;
	uint b;
	int c;
};

cbuffer B : register(b2, space2)
{
	float a2;
	uint b2;
	int c2;
};

float main() : SV_Target
{
	return (a + a2) + (b + b2) + (c + c2);
}


================================================
FILE: shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag
================================================
cbuffer Buf : register(b0)
{
	float4 a;
	half4 c;
	int64_t4 b;
};

float4 main() : SV_Target
{
	float4 res = 0.0.xxxx;
	res += a;
	res += float4(b);
	res += float4(c);
	return res;
}


================================================
FILE: shaders/resources/combined-image-sampler-reuse.frag
================================================
Texture2D<float4> Tex : register(t0);
SamplerState Samp : register(s0, space1);

float4 main(float2 uv : UV) : SV_Target
{
	float4 res = Tex.Sample(Samp, uv);
	res += Tex.Sample(Samp, uv + 0.1);
	return res;
}


================================================
FILE: shaders/resources/dynamic-root-constant.root-constant.bindless.root-descriptor.comp
================================================
cbuffer cbuf : register(b0, space1)
{
	float4 v[4];
};

RWStructuredBuffer<float4> RW0 : register(u0);
RWStructuredBuffer<float4> RW1 : register(u2);

[numthreads(64, 1, 1)]
void main(uint gid : SV_DispatchThreadID, uint thr : SV_GroupID)
{
	RW0[gid] = v[2 * thr + 1];
	RW1[gid] = v[3];
}


================================================
FILE: shaders/resources/min16-alloca-groupshared.sm60.comp
================================================
groupshared min16float bar[64];
groupshared min16float4 bar4[64];

RWStructuredBuffer<min16float4> O;
static min16float4 Vec;

[numthreads(64, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint l : SV_GroupIndex)
{
	Vec = min16float4(0, 0, 0, 0);
	bar[l] = min16float(l);
	bar4[l] = min16float(l) + min16float4(1, 2, 3, 4);
	GroupMemoryBarrierWithGroupSync();
	Vec[l & 3] = bar[l ^ 5];
	Vec[(l + 1) & 3] = bar[l ^ 4];
	O[thr] = bar[l ^ 1] + bar4[l ^ 3] + Vec;
}


================================================
FILE: shaders/resources/min16float-ssbo-dxr.ssbo.rgen
================================================
RaytracingAccelerationStructure AS : register(t0);
RWStructuredBuffer<min16float> BUF : register(u0);

[shader("raygeneration")]
void RayGen()
{
	BUF[DispatchRaysIndex().x] = min16float(20.0);
}


================================================
FILE: shaders/resources/raw-buffer-addressing.comp
================================================
RWByteAddressBuffer RWBuf : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	// Should not be folded.
	RWBuf.Store(1 * id, 1.0);
	RWBuf.Store(2 * id, 2.0);
	RWBuf.Store(3 * id, 3.0);

	// Should be folded.
	RWBuf.Store(4 * id, 4.0);
	RWBuf.Store(8 * id, 5.0);
	RWBuf.Store(12 * id, 6.0);
	RWBuf.Store(id << 2, 7.0);
	RWBuf.Store(id << 3, 8.0);
	RWBuf.Store(id << 4, 9.0);

	// Should not be folded.
	RWBuf.Store(12 * id + 1, 10.0);
	RWBuf.Store(12 * id + 2, 11.0);
	RWBuf.Store(12 * id + 3, 12.0);

	// Should be folded.
	RWBuf.Store(12 * id + 4, 13.0);
	RWBuf.Store(12 * id + 8, 14.0);
	RWBuf.Store(12 * id - 4, 15.0);
	RWBuf.Store(12 * id - 8, 16.0);

	// Should not be folded.
	RWBuf.Store(id + 4, 17.0);
	RWBuf.Store(id + 8, 18.0);
	RWBuf.Store(id - 4, 19.0);
	RWBuf.Store(id - 8, 20.0);

	RWBuf.Store(id + 5, 17.0);
	RWBuf.Store(id + 9, 18.0);
	RWBuf.Store(id - 5, 19.0);
	RWBuf.Store(id - 9, 20.0);
}


================================================
FILE: shaders/resources/raw-buffer-addressing.ssbo.comp
================================================
RWByteAddressBuffer RWBuf : register(u0);

[numthreads(1, 1, 1)]
void main(uint id : SV_DispatchThreadID)
{
	// Should not be folded.
	RWBuf.Store(1 * id, 1.0);
	RWBuf.Store(2 * id, 2.0);
	RWBuf.Store(3 * id, 3.0);

	// Should be folded.
	RWBuf.Store(4 * id, 4.0);
	RWBuf.Store(8 * id, 5.0);
	RWBuf.Store(12 * id, 6.0);
	RWBuf.Store(id << 2, 7.0);
	RWBuf.Store(id << 3, 8.0);
	RWBuf.Store(id << 4, 9.0);

	// Should not be folded.
	RWBuf.Store(12 * id + 1, 10.0);
	RWBuf.Store(12 * id + 2, 11.0);
	RWBuf.Store(12 * id + 3, 12.0);

	// Should be folded.
	RWBuf.Store(12 * id + 4, 13.0);
	RWBuf.Store(12 * id + 8, 14.0);
	RWBuf.Store(12 * id - 4, 15.0);
	RWBuf.Store(12 * id - 8, 16.0);

	// Should not be folded.
	RWBuf.Store(id + 4, 17.0);
	RWBuf.Store(id + 8, 18.0);
	RWBuf.Store(id - 4, 19.0);
	RWBuf.Store(id - 8, 20.0);

	RWBuf.Store(id + 5, 17.0);
	RWBuf.Store(id + 9, 18.0);
	RWBuf.Store(id - 5, 19.0);
	RWBuf.Store(id - 9, 20.0);
}


================================================
FILE: shaders/resources/root-bda.root-descriptor.comp
================================================
globallycoherent RWStructuredBuffer<float4> RWBuf : register(u0);
StructuredBuffer<float4> Buf : register(t0);

RWByteAddressBuffer RW : register(u1);

cbuffer CBV : register(b0)
{
	float4 v[16];
};

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWBuf[thr] = Buf[thr] + v[thr];

	RW.Store<half>(2 * thr, half(1.0));
	RW.Store<half2>(4 * thr, half2(1.0, 2.0));
	RW.Store<half3>(6 * thr, half3(1.0, 2.0, 3.0));
	RW.Store<half4>(8 * thr, half4(1.0, 2.0, 3.0, 4.0));
	RW.Store<uint2>(8 * thr, uint2(4, 5));
}


================================================
FILE: shaders/resources/root-bda.root-descriptor.sm60.comp
================================================
globallycoherent RWStructuredBuffer<float4> RWBuf : register(u0);
StructuredBuffer<float4> Buf : register(t0);

cbuffer CBV : register(b0)
{
	float4 v[16];
};

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWBuf[thr] = Buf[thr] + v[thr];
}


================================================
FILE: shaders/resources/root-constant-with-bda.root-descriptor.root-constant.comp
================================================
globallycoherent RWStructuredBuffer<float4> RWBuf : register(u0);
StructuredBuffer<float4> Buf : register(t0);

RWByteAddressBuffer RW : register(u1);

cbuffer CBV : register(b0, space1)
{
	float4 v;
	float4 v1;
	float4 v2;
};

[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWBuf[thr] = Buf[thr] + v.x + v1.y + v2.z;
}


================================================
FILE: shaders/resources/rt-resources.bindless.local-root-signature.rmiss
================================================
Texture2D<float4> Tex[2] : register(t0);
Texture2D<float4> TexUnsized[] : register(t0, space1);

Texture2D<float4> TexSBT[] : register(t10, space15);
RWTexture2D<float4> UAVTexSBT[] : register(u10, space15);

struct CBVData { float4 v; float4 w; };

ConstantBuffer<CBVData> SBTCBV : register(b3, space15);
ConstantBuffer<CBVData> SBTCBVs[] : register(b4, space15);

ConstantBuffer<CBVData> SBTRootConstant : register(b0, space15);
ConstantBuffer<CBVData> SBTRootDescriptor : register(b2, space15);

SamplerState Samp : register(s3, space15);
SamplerState Samps[] : register(s4, space15);

StructuredBuffer<float4> StrBuf : register(t1, space15);
ByteAddressBuffer BABuf : register(t2, space15);

RWStructuredBuffer<float> RWStrBuf : register(u1, space15);
RWByteAddressBuffer RWBABuf : register(u2, space15);

struct Payload
{
	float4 color;
	int index;
};

[shader("miss")]
void RayMiss(inout Payload payload)
{
	payload.color = Tex[payload.index & 1].Load(int3(0, 0, 0));
	payload.color += TexUnsized[payload.index].Load(int3(0, 0, 0));
	payload.color += TexSBT[payload.index].Load(int3(0, 0, 0));
	payload.color += UAVTexSBT[payload.index].Load(int2(0, 0));
	payload.color += SBTCBV.v;
	payload.color += SBTCBVs[payload.index].v;
	payload.color += SBTRootConstant.v;
	payload.color += SBTRootConstant.w;
	payload.color += SBTRootDescriptor.w;

	payload.color += Tex[payload.index & 1].SampleLevel(Samp, 0.5.xx, 0.0);
	payload.color += TexUnsized[payload.index].SampleLevel(Samps[payload.index ^ 1], 0.5.xx, 0.0);
	payload.color += StrBuf[payload.index];
	payload.color += asfloat(BABuf.Load(4 * payload.index)).xxxx;
	payload.color += asfloat(BABuf.Load2(8 * payload.index)).xyxy;
	payload.color += asfloat(BABuf.Load3(12 * payload.index)).xyzz;
	payload.color += asfloat(BABuf.Load4(16 * payload.index));

	payload.color += RWStrBuf[payload.index];
	payload.color += asfloat(RWBABuf.Load(4 * payload.index));

	RWStrBuf[payload.index] = payload.color.x;
	RWBABuf.Store(4 * payload.index, payload.color.y);
}


================================================
FILE: shaders/resources/rt-resources.bindless.rmiss
================================================
Texture2D<float4> Tex[2] : register(t0);
Texture2D<float4> TexUnsized[] : register(t0, space1);

Texture2D<float4> TexSBT[] : register(t10, space15);

struct Payload
{
	float4 color;
	int index;
};

[shader("miss")]
void RayMiss(inout Payload payload)
{
	payload.color = Tex[payload.index & 1].Load(int3(0, 0, 0));
	payload.color += TexUnsized[payload.index].Load(int3(0, 0, 0));
	payload.color += TexSBT[payload.index].Load(int3(0, 0, 0));
}


================================================
FILE: shaders/resources/rt-resources.rmiss
================================================
Texture2D<float4> Tex[2] : register(t0);
Texture2D<float4> TexUnsized[] : register(t0, space1);

struct Payload
{
	float4 color;
	int index;
};

[shader("miss")]
void RayMiss(inout Payload payload)
{
	payload.color = Tex[payload.index & 1].Load(int3(0, 0, 0));
	payload.color += TexUnsized[payload.index].Load(int3(0, 0, 0));
}


================================================
FILE: shaders/resources/sampler-array.frag
================================================
Texture2D<float4> Tex : register(t0, space0);
SamplerState Samp[] : register(s0, space0);
SamplerState Samp2[100] : register(s0, space1);

float4 main() : SV_Target
{
	return
		Tex.Sample(Samp[2], 0.5.xx) +
		Tex.Sample(Samp2[3], 0.5.xx);
}


================================================
FILE: shaders/resources/sampler-indexing.frag
================================================
SamplerState S[3] : register(s5);
Texture2D T;
float4 main(uint v : V) : SV_Target
{
	return T.Sample(S[0], 0.5.xx) + T.Sample(S[2], 0.5.xx) + T.Sample(S[v], 0.5.xx);
}


================================================
FILE: shaders/resources/sampler-indexing.sm66.frag
================================================
SamplerState S[3] : register(s5);
Texture2D T;
float4 main(uint v : V) : SV_Target
{
	return T.Sample(S[0], 0.5.xx) + T.Sample(S[2], 0.5.xx) + T.Sample(S[v], 0.5.xx);
}


================================================
FILE: shaders/resources/sampler.bindless.root-constant.frag
================================================
SamplerState Samp : register(s3);
SamplerState SampArray[64] : register(s4);
SamplerState SampBindless[] : register(s100);

Texture2D<float4> Tex : register(t0);

cbuffer BUFFER : register(b0)
{
	uint index;
};

float4 main(float2 uv : UV, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	float4 result = Tex.Sample(Samp, uv);
	result += Tex.Sample(SampArray[1], uv);
	result += Tex.Sample(SampArray[index], uv);
	result += Tex.Sample(SampBindless[NonUniformResourceIndex(dynamic_index)], uv);
	return result;
}


================================================
FILE: shaders/resources/sm66/atomics-64bit-groupshared.ssbo.sm66.comp
================================================
groupshared int64_t v[4];
RWByteAddressBuffer RWBAB : register(u0);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	if (thr < 4)
	{
		v[thr] = 0;
	}

	GroupMemoryBarrierWithGroupSync();

	uint thr64 = thr;
	uint wrapped = thr & 3;

	InterlockedAdd(v[wrapped], thr64);
	InterlockedAnd(v[wrapped], thr64);
	InterlockedOr(v[wrapped], thr64);
	InterlockedXor(v[wrapped], thr64);
	InterlockedMin(v[wrapped], thr64);
	InterlockedMax(v[wrapped], thr64);
	InterlockedMin(v[wrapped], thr64);
	InterlockedMax(v[wrapped], thr64);

	uint64_t out_value;
	InterlockedExchange(v[wrapped], thr64, out_value);
	InterlockedCompareStore(v[wrapped], 10, out_value);
	InterlockedCompareExchange(v[wrapped], 20, thr64, out_value);

	InterlockedAdd(v[wrapped], thr64);
	InterlockedAdd(v[wrapped], thr64);

	GroupMemoryBarrierWithGroupSync();

	if (thr < 4)
		RWBAB.Store<uint64_t>(8 * thr, v[thr]);
}


================================================
FILE: shaders/resources/sm66/atomics-64bit.root-descriptor.sm66.comp
================================================
RWStructuredBuffer<uint64_t> RW : register(u0);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint64_t thr64 = thr;

	InterlockedAdd(RW[0], thr64);
	InterlockedAnd(RW[1], thr64);
	InterlockedOr(RW[2], thr64);
	InterlockedXor(RW[3], thr64);
	InterlockedMin(RW[4], thr64);
	InterlockedMax(RW[5], thr64);

	uint64_t out_value;
	InterlockedExchange(RW[6], thr64, out_value);
	InterlockedCompareStore(RW[7], 10, out_value);
	InterlockedCompareExchange(RW[8], 20, thr64, out_value);
}


================================================
FILE: shaders/resources/sm66/atomics-64bit.ssbo.sm66.comp
================================================
[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWStructuredBuffer<uint64_t> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<int64_t> RWSign = ResourceDescriptorHeap[1];
	RWByteAddressBuffer RWBAB = ResourceDescriptorHeap[2];

	uint64_t thr64 = thr;

	InterlockedAdd(RW[0], thr64);
	InterlockedAnd(RW[1], thr64);
	InterlockedOr(RW[2], thr64);
	InterlockedXor(RW[3], thr64);
	InterlockedMin(RW[4], thr64);
	InterlockedMax(RW[5], thr64);
	InterlockedMin(RWSign[6], thr64);
	InterlockedMax(RWSign[7], thr64);

	uint64_t out_value;
	InterlockedExchange(RW[8], thr64, out_value);
	InterlockedCompareStore(RW[9], 10, out_value);
	InterlockedCompareExchange(RW[10], 20, thr64, out_value);

	RWBAB.InterlockedAdd64(0, thr64);
	RWBAB.InterlockedAnd64(8, thr64);
	RWBAB.InterlockedOr64(16, thr64);
}


================================================
FILE: shaders/resources/sm66/atomics-component-alias.sm66.comp
================================================
Buffer<uint64_t> RO;
RWBuffer<uint64_t> RW;

Texture2D<uint64_t> ROTex;
RWTexture2D<uint64_t> RWTex;

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
	InterlockedAdd(RW[thr + 1], RW[thr + 10]);

	RWTex[int2(thr, thr)] = ROTex[int2(thr, thr)];
	InterlockedAdd(RWTex[int2(thr + 1, thr + 2)], RWTex[int2(thr + 3, thr + 4)]);
}


================================================
FILE: shaders/resources/sm66/atomics-typed-64bit-heap.sm66.comp
================================================
[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWBuffer<uint64_t> RW = ResourceDescriptorHeap[0];
	RWBuffer<int64_t> RWSign = ResourceDescriptorHeap[1];
	RWTexture2D<uint64_t> TexRW = ResourceDescriptorHeap[2];
	RWTexture2D<int64_t> TexRWSign = ResourceDescriptorHeap[3];

	uint64_t thr64 = thr;

	InterlockedAdd(RW[0], thr64);
	InterlockedAnd(RWSign[1], thr64);
	InterlockedOr(RW[2], thr64);
	InterlockedXor(RWSign[3], thr64);
	InterlockedMin(RW[4], thr64);
	InterlockedMax(RW[5], thr64);
	InterlockedMin(RWSign[6], thr64);
	InterlockedMax(RWSign[7], thr64);

	uint64_t out_value;
	InterlockedExchange(RWSign[8], thr64, out_value);
	InterlockedCompareStore(RW[9], 10, out_value);
	InterlockedCompareExchange(RWSign[10], 20, thr64, out_value);

	InterlockedAdd(TexRW[int2(1, 2)], thr64);
	InterlockedAdd(TexRWSign[int2(3, 4)], thr64);
}


================================================
FILE: shaders/resources/sm66/atomics-typed-64bit.bindless.sm66.comp
================================================
RWBuffer<uint64_t> RW : register(u0);
RWBuffer<int64_t> RWSign : register(u1);
RWTexture2D<uint64_t> TexRW : register(u2);
RWTexture2D<int64_t> TexRWSign : register(u3);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint64_t thr64 = thr;

	InterlockedAdd(RW[0], thr64);
	InterlockedAnd(RWSign[1], thr64);
	InterlockedOr(RW[2], thr64);
	InterlockedXor(RWSign[3], thr64);
	InterlockedMin(RW[4], thr64);
	InterlockedMax(RW[5], thr64);
	InterlockedMin(RWSign[6], thr64);
	InterlockedMax(RWSign[7], thr64);

	uint64_t out_value;
	InterlockedExchange(RWSign[8], thr64, out_value);
	InterlockedCompareStore(RW[9], 10, out_value);
	InterlockedCompareExchange(RWSign[10], 20, thr64, out_value);

	InterlockedAdd(TexRW[int2(1, 2)], thr64);
	InterlockedAdd(TexRWSign[int2(3, 4)], thr64);
}


================================================
FILE: shaders/resources/sm66/atomics-typed-64bit.sm66.comp
================================================
RWBuffer<uint64_t> RW : register(u0);
RWBuffer<int64_t> RWSign : register(u1);
RWTexture2D<uint64_t> TexRW : register(u2);
RWTexture2D<int64_t> TexRWSign : register(u3);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	uint64_t thr64 = thr;

	InterlockedAdd(RW[0], thr64);
	InterlockedAnd(RWSign[1], thr64);
	InterlockedOr(RW[2], thr64);
	InterlockedXor(RWSign[3], thr64);
	InterlockedMin(RW[4], thr64);
	InterlockedMax(RW[5], thr64);
	InterlockedMin(RWSign[6], thr64);
	InterlockedMax(RWSign[7], thr64);

	uint64_t out_value;
	InterlockedExchange(RWSign[8], thr64, out_value);
	InterlockedCompareStore(RW[9], 10, out_value);
	InterlockedCompareExchange(RWSign[10], 20, thr64, out_value);

	InterlockedAdd(TexRW[int2(1, 2)], thr64);
	InterlockedAdd(TexRWSign[int2(3, 4)], thr64);
}


================================================
FILE: shaders/resources/sm66/binding-range-selection.bindless.sm66.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

SamplerState samp : register(s0, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if (index > 80)
		rwtex[int2(index, 0)] = value;
	if (index > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/resources/sm66/binding-range-selection.sm66.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

SamplerState samp : register(s0, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if (index > 80)
		rwtex[int2(index, 0)] = value;
	if (index > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/resources/sm66/buffer-64bit-double.ssbo.sm66.comp
================================================
[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWStructuredBuffer<double3> RW = ResourceDescriptorHeap[0];
	StructuredBuffer<double3> RO = ResourceDescriptorHeap[1];
	RWByteAddressBuffer RWBAB = ResourceDescriptorHeap[2];
	ByteAddressBuffer BAB = ResourceDescriptorHeap[3];

	double3 v0 = RO[2 * thr + 0];
	double3 v1 = RO[2 * thr + 1];
	RW[4 * thr + 0] = v0 + v1;
	RW[4 * thr + 1] = v0 - v1;
	RW[4 * thr + 2] = v0 * v1;
	RW[4 * thr + 3] = v0 / v1;

	v0.xy = BAB.Load<double2>(24 * thr);
	RWBAB.Store<double3>(8 * thr, v0);
}


================================================
FILE: shaders/resources/sm66/buffer-64bit.ssbo.sm66.comp
================================================
[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWStructuredBuffer<uint64_t3> RW = ResourceDescriptorHeap[0];
	StructuredBuffer<uint64_t3> RO = ResourceDescriptorHeap[1];
	RWByteAddressBuffer RWBAB = ResourceDescriptorHeap[2];
	ByteAddressBuffer BAB = ResourceDescriptorHeap[3];

	uint64_t3 v0 = RO[2 * thr + 0];
	uint64_t3 v1 = RO[2 * thr + 1];
	RW[8 * thr + 0] = v0 + v1;
	RW[8 * thr + 1] = v0 - v1;
	RW[8 * thr + 2] = v0 * v1;
	RW[8 * thr + 3] = v0 / v1;
	RW[8 * thr + 4] = v0 << v1;
	RW[8 * thr + 5] = v0 >> v1;
	RW[8 * thr + 6] = int64_t3(v0) >> v1;
	RW[8 * thr + 7] = v0 & v1;

	v0.xy = BAB.Load<uint64_t2>(24 * thr);
	RWBAB.Store<uint64_t3>(8 * thr, v0);
}


================================================
FILE: shaders/resources/sm66/buffer-64bit.ssbo.ssbo-align.sm66.comp
================================================
[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RWStructuredBuffer<uint64_t3> RW = ResourceDescriptorHeap[0];
	StructuredBuffer<uint64_t3> RO = ResourceDescriptorHeap[1];
	RWByteAddressBuffer RWBAB = ResourceDescriptorHeap[2];
	ByteAddressBuffer BAB = ResourceDescriptorHeap[3];

	uint64_t3 v0 = RO[2 * thr + 0];
	uint64_t3 v1 = RO[2 * thr + 1];
	RW[8 * thr + 0] = v0 + v1;
	RW[8 * thr + 1] = v0 - v1;
	RW[8 * thr + 2] = v0 * v1;
	RW[8 * thr + 3] = v0 / v1;
	RW[8 * thr + 4] = v0 << v1;
	RW[8 * thr + 5] = v0 >> v1;
	RW[8 * thr + 6] = int64_t3(v0) >> v1;
	RW[8 * thr + 7] = v0 & v1;

	v0.xy = BAB.Load<uint64_t2>(24 * thr);
	RWBAB.Store<uint64_t3>(8 * thr, v0);
}


================================================
FILE: shaders/resources/sm66/cbuffer-heap.sm66.frag
================================================
struct Foo { int4 v; };

float4 main(uint index : INDEX, float3 uv : UV) : SV_Target
{
	Texture3D<float4> tex = ResourceDescriptorHeap[index + 1];
	ConstantBuffer<Foo> cbuf = ResourceDescriptorHeap[index + 2];
	return tex.Load(cbuf.v);
}


================================================
FILE: shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag
================================================
cbuffer Cbuf
{
	float4 a;
	half4 b;
	int64_t4 c;
};

cbuffer Cbuf1 : register(b1)
{
	float4 d;
};

cbuffer Cbuf2 : register(b2)
{
	double4 e;
};

float4 main() : SV_Target
{
	return a + float4(b) + float4(c) + d + float4(e);
}


================================================
FILE: shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag
================================================
cbuffer Cbuf
{
	float4 a;
	half4 b;
	int64_t4 c;
};

cbuffer Cbuf1 : register(b1)
{
	float4 d;
};

cbuffer Cbuf2 : register(b2)
{
	double4 e;
};

float4 main() : SV_Target
{
	return a + float4(b) + float4(c) + d + float4(e);
}


================================================
FILE: shaders/resources/sm66/raw-buffer-heap.sm66.frag
================================================
float3 main(uint index : INDEX, int3 uv : UV) : SV_Target
{
	ByteAddressBuffer RO = ResourceDescriptorHeap[index];
	globallycoherent RWByteAddressBuffer WR = ResourceDescriptorHeap[index + 1];

	float3 res = asfloat(RO.Load3(uv.x));
	WR.Store3(uv.y, asuint(res));

	return res;
}


================================================
FILE: shaders/resources/sm66/raw-buffer-heap.ssbo.sm66.frag
================================================
float3 main(uint index : INDEX, int3 uv : UV) : SV_Target
{
	ByteAddressBuffer RO = ResourceDescriptorHeap[index];
	globallycoherent RWByteAddressBuffer WR = ResourceDescriptorHeap[index + 1];

	float3 res = asfloat(RO.Load3(uv.x));
	WR.Store3(uv.y, asuint(res));

	return res;
}


================================================
FILE: shaders/resources/sm66/raw-buffer-heap.typed-buffer-offset.sm66.frag
================================================
float3 main(uint index : INDEX, int3 uv : UV) : SV_Target
{
	ByteAddressBuffer RO = ResourceDescriptorHeap[index];
	globallycoherent RWByteAddressBuffer WR = ResourceDescriptorHeap[index + 1];

	float3 res = asfloat(RO.Load3(uv.x));
	WR.Store3(uv.y, asuint(res));

	return res;
}


================================================
FILE: shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag
================================================
StructuredBuffer<float> RO1 : register(t0);
StructuredBuffer<half2> RO2 : register(t1);

RWStructuredBuffer<float> RW1 : register(u2);
RWStructuredBuffer<half2> RW2 : register(u3);

ByteAddressBuffer BAB1 : register(t4);
RWByteAddressBuffer BAB2 : register(u5);

float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.yz += float2(RO2[uv.y]);
	res.z += RW1[uv.z];
	res.wx += float2(RW2[uv.w]);

	res += BAB1.Load<half4>(8);
	res += BAB1.Load<float4>(16);
	res += BAB2.Load<half4>(8);
	BAB2.Store<float4>(16, res);

	return res;
}


================================================
FILE: shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
================================================
StructuredBuffer<float> RO1 : register(t0);
StructuredBuffer<half2> RO2 : register(t1);

RWStructuredBuffer<float> RW1 : register(u2);
RWStructuredBuffer<half2> RW2 : register(u3);

ByteAddressBuffer BAB1 : register(t4);
RWByteAddressBuffer BAB2 : register(u5);

float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.yz += float2(RO2[uv.y]);
	res.z += RW1[uv.z];
	res.wx += float2(RW2[uv.w]);

	res += BAB1.Load<half4>(8);
	res += BAB1.Load<float4>(16);
	res += BAB2.Load<half4>(8);
	BAB2.Store<float4>(16, res);

	return res;
}


================================================
FILE: shaders/resources/sm66/raygen-heap.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	RaytracingAccelerationStructure AS = ResourceDescriptorHeap[0];
	RWTexture2D<float4> IMG = ResourceDescriptorHeap[1];

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/resources/sm66/raygen-heap.ssbo-rtas.raw-va-stride-offset.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	RaytracingAccelerationStructure AS = ResourceDescriptorHeap[0];
	RWTexture2D<float4> IMG = ResourceDescriptorHeap[1];

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/resources/sm66/raygen-heap.ssbo-rtas.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	RaytracingAccelerationStructure AS = ResourceDescriptorHeap[0];
	RWTexture2D<float4> IMG = ResourceDescriptorHeap[1];

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/resources/sm66/raygen.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

RaytracingAccelerationStructure AS : register(t30, space40);
RWTexture2D<float4> IMG : register(u10, space20);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/resources/sm66/raygen.ssbo-rtas.bindless.raw-va-stride-offset.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

RaytracingAccelerationStructure AS : register(t30, space40);
RWTexture2D<float4> IMG : register(u10, space20);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/resources/sm66/raygen.ssbo-rtas.bindless.sm66.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

RaytracingAccelerationStructure AS : register(t30, space40);
RWTexture2D<float4> IMG : register(u10, space20);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/resources/sm66/rw-typed-binding.sm66.frag
================================================
RWTexture2D<float> RO[] : register(u0, space0);
RWBuffer<uint> WR[] : register(u0, space1);
globallycoherent RWTexture2D<int> RW[] : register(u0, space2);
globallycoherent RWTexture1D<uint> ATOM[] : register(u0, space3);

float3 main(uint index : INDEX, int3 uv : UV) : SV_Target
{
	float3 res;

	res.x = RO[index].Load(uv.xy);
	WR[index][uv.x] = uv.y;
	res.y = float(RW[index].Load(uv.yz));
	RW[index][uv.yz] = uv.x;

	uint o;
	InterlockedAdd(ATOM[index][uv.x], uv.y, o);
	res.z = float(o);

	return res;
}


================================================
FILE: shaders/resources/sm66/rw-typed-heap.sm66.frag
================================================
float3 main(uint index : INDEX, int3 uv : UV) : SV_Target
{
	RWTexture2D<float> RO = ResourceDescriptorHeap[index];
	RWBuffer<uint> WR = ResourceDescriptorHeap[index + 1];
	globallycoherent RWTexture2D<int> RW = ResourceDescriptorHeap[index + 2];
	globallycoherent RWTexture1D<uint> ATOM = ResourceDescriptorHeap[index + 3];

	float3 res;

	res.x = RO.Load(uv.xy);
	WR[uv.x] = uv.y;
	res.y = float(RW.Load(uv.yz));
	RW[uv.yz] = uv.x;

	uint o;
	InterlockedAdd(ATOM[uv.x], uv.y, o);
	res.z = float(o);

	return res;
}


================================================
FILE: shaders/resources/sm66/sampled-types-binding.sm66.frag
================================================
Texture2D<float4> tex[] : register(t4, space0);
Buffer<float4> buf[1000] : register(t5, space1);
Texture1D<float4> tex1d : register(t6, space2);

float4 main(uint index : INDEX, float2 uv : UV) : SV_Target
{
	return tex[NonUniformResourceIndex(index)].Load(int3(uv, 0)) +
		buf[NonUniformResourceIndex(index + 1)][int(uv.x)] +
		tex1d.Load(int2(uv));
}


================================================
FILE: shaders/resources/sm66/sampled-types.sm66.frag
================================================
float4 main(uint index : INDEX, float2 uv : UV) : SV_Target
{
	Texture2D<half4> tex = ResourceDescriptorHeap[NonUniformResourceIndex(index)];
	Buffer<unorm float4> buf = ResourceDescriptorHeap[NonUniformResourceIndex(index + 1)];
	Texture1D<float4> tex1d = ResourceDescriptorHeap[NonUniformResourceIndex(index + 2)];
	return float4(tex.Load(int3(uv, 0))) + buf[int(uv.x)] + tex1d.Load(int(uv.y));
}


================================================
FILE: shaders/resources/sm66/sampler-binding.sm66.frag
================================================
Texture3D<float4> tex[] : register(t1);
SamplerState samp[] : register(s4);

float4 main(uint index : INDEX, float3 uv : UV) : SV_Target
{
	return tex[index].Sample(samp[index], uv);
}


================================================
FILE: shaders/resources/sm66/sampler-heap.sm66.frag
================================================
float4 main(uint index : INDEX, float3 uv : UV) : SV_Target
{
	Texture3D<float4> tex = ResourceDescriptorHeap[index + 1];
	SamplerState samp = SamplerDescriptorHeap[index + 2];
	return tex.Sample(samp, uv);
}


================================================
FILE: shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag
================================================
float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	StructuredBuffer<float> RO1 = ResourceDescriptorHeap[index + 0];
	StructuredBuffer<half2> RO2 = ResourceDescriptorHeap[index + 1];

	RWStructuredBuffer<float> RW1 = ResourceDescriptorHeap[index + 4];
	RWStructuredBuffer<half2> RW2 = ResourceDescriptorHeap[index + 5];

	ByteAddressBuffer BAB1 = ResourceDescriptorHeap[index + 8];
	RWByteAddressBuffer BAB2 = ResourceDescriptorHeap[index + 9];

	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.yz += float2(RO2[uv.y]);
	res.z += RW1[uv.z];
	res.wx += float2(RW2[uv.w]);

	res += BAB1.Load<half4>(8);
	res += BAB1.Load<float4>(16);
	res += BAB2.Load<half4>(8);
	BAB2.Store<float4>(16, res);

	return res;
}


================================================
FILE: shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag
================================================
float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	StructuredBuffer<float> RO1 = ResourceDescriptorHeap[index + 0];
	StructuredBuffer<half2> RO2 = ResourceDescriptorHeap[index + 1];

	RWStructuredBuffer<float> RW1 = ResourceDescriptorHeap[index + 4];
	RWStructuredBuffer<half2> RW2 = ResourceDescriptorHeap[index + 5];

	ByteAddressBuffer BAB1 = ResourceDescriptorHeap[index + 8];
	RWByteAddressBuffer BAB2 = ResourceDescriptorHeap[index + 9];

	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.yz += float2(RO2[uv.y]);
	res.z += RW1[uv.z];
	res.wx += float2(RW2[uv.w]);

	res += BAB1.Load<half4>(8);
	res += BAB1.Load<float4>(16);
	res += BAB2.Load<half4>(8);
	BAB2.Store<float4>(16, res);

	return res;
}


================================================
FILE: shaders/resources/sm66/structured-buffer-heap.sm66.frag
================================================
float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	StructuredBuffer<float > RO1 = ResourceDescriptorHeap[index + 0];
	StructuredBuffer<float2> RO2 = ResourceDescriptorHeap[index + 1];
	StructuredBuffer<float3> RO3 = ResourceDescriptorHeap[index + 2];
	StructuredBuffer<float4> RO4 = ResourceDescriptorHeap[index + 3];

	RWStructuredBuffer<float > RW1 = ResourceDescriptorHeap[index + 4];
	RWStructuredBuffer<float2> RW2 = ResourceDescriptorHeap[index + 5];
	RWStructuredBuffer<float3> RW3 = ResourceDescriptorHeap[index + 6];
	RWStructuredBuffer<float4> RW4 = ResourceDescriptorHeap[index + 7];

	globallycoherent RWStructuredBuffer<float > RWC1 = ResourceDescriptorHeap[index + 8];
	globallycoherent RWStructuredBuffer<float2> RWC2 = ResourceDescriptorHeap[index + 9];
	globallycoherent RWStructuredBuffer<float3> RWC3 = ResourceDescriptorHeap[index + 10];
	globallycoherent RWStructuredBuffer<float4> RWC4 = ResourceDescriptorHeap[index + 11];

	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.xy += RO2[uv.y];
	res.xyz += RO3[uv.z];
	res += RO4[uv.w];

	res.x += RW1[uv.x];
	res.xy += RW2[uv.y];
	res.xyz += RW3[uv.z];
	res += RW4[uv.w];

	res.x += RWC1[uv.x];
	res.xy += RWC2[uv.y];
	res.xyz += RWC3[uv.z];
	res += RWC4[uv.w];

	RW2[uv.x] = 20.0;
	RWC3[uv.y] = 30.0;

	return res;
}


================================================
FILE: shaders/resources/sm66/structured-buffer-heap.ssbo.sm66.frag
================================================
float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	StructuredBuffer<float > RO1 = ResourceDescriptorHeap[index + 0];
	StructuredBuffer<float2> RO2 = ResourceDescriptorHeap[index + 1];
	StructuredBuffer<float3> RO3 = ResourceDescriptorHeap[index + 2];
	StructuredBuffer<float4> RO4 = ResourceDescriptorHeap[index + 3];

	RWStructuredBuffer<float > RW1 = ResourceDescriptorHeap[index + 4];
	RWStructuredBuffer<float2> RW2 = ResourceDescriptorHeap[index + 5];
	RWStructuredBuffer<float3> RW3 = ResourceDescriptorHeap[index + 6];
	RWStructuredBuffer<float4> RW4 = ResourceDescriptorHeap[index + 7];

	globallycoherent RWStructuredBuffer<float > RWC1 = ResourceDescriptorHeap[index + 8];
	globallycoherent RWStructuredBuffer<float2> RWC2 = ResourceDescriptorHeap[index + 9];
	globallycoherent RWStructuredBuffer<float3> RWC3 = ResourceDescriptorHeap[index + 10];
	globallycoherent RWStructuredBuffer<float4> RWC4 = ResourceDescriptorHeap[index + 11];

	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.xy += RO2[uv.y];
	res.xyz += RO3[uv.z];
	res += RO4[uv.w];

	res.x += RW1[uv.x];
	res.xy += RW2[uv.y];
	res.xyz += RW3[uv.z];
	res += RW4[uv.w];

	res.x += RWC1[uv.x];
	res.xy += RWC2[uv.y];
	res.xyz += RWC3[uv.z];
	res += RWC4[uv.w];

	RW2[uv.x] = 20.0;
	RWC3[uv.y] = 30.0;

	return res;
}


================================================
FILE: shaders/resources/sm66/structured-buffer-heap.ssbo.ssbo-align.sm66.frag
================================================
float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	StructuredBuffer<float > RO1 = ResourceDescriptorHeap[index + 0];
	StructuredBuffer<float2> RO2 = ResourceDescriptorHeap[index + 1];
	StructuredBuffer<float3> RO3 = ResourceDescriptorHeap[index + 2];
	StructuredBuffer<float4> RO4 = ResourceDescriptorHeap[index + 3];

	RWStructuredBuffer<float > RW1 = ResourceDescriptorHeap[index + 4];
	RWStructuredBuffer<float2> RW2 = ResourceDescriptorHeap[index + 5];
	RWStructuredBuffer<float3> RW3 = ResourceDescriptorHeap[index + 6];
	RWStructuredBuffer<float4> RW4 = ResourceDescriptorHeap[index + 7];

	globallycoherent RWStructuredBuffer<float > RWC1 = ResourceDescriptorHeap[index + 8];
	globallycoherent RWStructuredBuffer<float2> RWC2 = ResourceDescriptorHeap[index + 9];
	globallycoherent RWStructuredBuffer<float3> RWC3 = ResourceDescriptorHeap[index + 10];
	globallycoherent RWStructuredBuffer<float4> RWC4 = ResourceDescriptorHeap[index + 11];

	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.xy += RO2[uv.y];
	res.xyz += RO3[uv.z];
	res += RO4[uv.w];

	res.x += RW1[uv.x];
	res.xy += RW2[uv.y];
	res.xyz += RW3[uv.z];
	res += RW4[uv.w];

	res.x += RWC1[uv.x];
	res.xy += RWC2[uv.y];
	res.xyz += RWC3[uv.z];
	res += RWC4[uv.w];

	RW2[uv.x] = 20.0;
	RWC3[uv.y] = 30.0;

	return res;
}


================================================
FILE: shaders/resources/sm66/structured-buffer-heap.typed-buffer-offset.sm66.frag
================================================
float4 main(uint index : INDEX, int4 uv : UV) : SV_Target
{
	StructuredBuffer<float > RO1 = ResourceDescriptorHeap[index + 0];
	StructuredBuffer<float2> RO2 = ResourceDescriptorHeap[index + 1];
	StructuredBuffer<float3> RO3 = ResourceDescriptorHeap[index + 2];
	StructuredBuffer<float4> RO4 = ResourceDescriptorHeap[index + 3];

	RWStructuredBuffer<float > RW1 = ResourceDescriptorHeap[index + 4];
	RWStructuredBuffer<float2> RW2 = ResourceDescriptorHeap[index + 5];
	RWStructuredBuffer<float3> RW3 = ResourceDescriptorHeap[index + 6];
	RWStructuredBuffer<float4> RW4 = ResourceDescriptorHeap[index + 7];

	globallycoherent RWStructuredBuffer<float > RWC1 = ResourceDescriptorHeap[index + 8];
	globallycoherent RWStructuredBuffer<float2> RWC2 = ResourceDescriptorHeap[index + 9];
	globallycoherent RWStructuredBuffer<float3> RWC3 = ResourceDescriptorHeap[index + 10];
	globallycoherent RWStructuredBuffer<float4> RWC4 = ResourceDescriptorHeap[index + 11];

	float4 res = 0.0.xxxx;
	res.x += RO1[uv.x];
	res.xy += RO2[uv.y];
	res.xyz += RO3[uv.z];
	res += RO4[uv.w];

	res.x += RW1[uv.x];
	res.xy += RW2[uv.y];
	res.xyz += RW3[uv.z];
	res += RW4[uv.w];

	res.x += RWC1[uv.x];
	res.xy += RWC2[uv.y];
	res.xyz += RWC3[uv.z];
	res += RWC4[uv.w];

	RW2[uv.x] = 20.0;
	RWC3[uv.y] = 30.0;

	return res;
}


================================================
FILE: shaders/resources/srv-array-raw-buffer-nonuniform.frag
================================================
ByteAddressBuffer Tex[] : register(t0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return asfloat(Tex[NonUniformResourceIndex(index)].Load4(16 * index));
}


================================================
FILE: shaders/resources/srv-array-raw-buffer.frag
================================================
ByteAddressBuffer Tex[] : register(t0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return asfloat(Tex[index].Load4(16 * index));
}


================================================
FILE: shaders/resources/srv-array-structured-buffer-nonuniform.frag
================================================
StructuredBuffer<float4> Tex[] : register(t0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[NonUniformResourceIndex(index)][index];
}


================================================
FILE: shaders/resources/srv-array-structured-buffer.frag
================================================
StructuredBuffer<float4> Tex[] : register(t0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[index][index];
}


================================================
FILE: shaders/resources/srv-array-texture-nonuniform.frag
================================================
Texture2D<float4> Tex[] : register(t0, space0);
Texture2D<float4> Tex2[100] : register(t0, space1);
SamplerState Samp : register(s0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return
		Tex[NonUniformResourceIndex(index)].Sample(Samp, 0.5.xx) +
		Tex2[NonUniformResourceIndex(index ^ 1)].Sample(Samp, 0.5.xx);
}


================================================
FILE: shaders/resources/srv-array-texture.frag
================================================
Texture2D<float4> Tex[] : register(t0, space0);
Texture2D<float4> Tex2[100] : register(t0, space1);
SamplerState Samp : register(s0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return
		Tex[index].Sample(Samp, 0.5.xx) +
		Tex2[index ^ 1].Sample(Samp, 0.5.xx);
}


================================================
FILE: shaders/resources/srv-array-typed-buffer-nonuniform.frag
================================================
Buffer<float4> Tex[] : register(t0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return
		Tex[NonUniformResourceIndex(index)].Load(index);
}


================================================
FILE: shaders/resources/srv-array-typed-buffer.frag
================================================
Buffer<float4> Tex[] : register(t0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[index].Load(index);
}


================================================
FILE: shaders/resources/srv-indexing.frag
================================================
Texture2D<float4> T[3] : register(t5);
float4 main(uint v : V) : SV_Target
{
	return T[0].Load(int3(0, 0, 0)) + T[2].Load(int3(0, 0, 0)) + T[v].Load(int3(0, 0, 0));
}


================================================
FILE: shaders/resources/srv-indexing.sm66.frag
================================================
Texture2D<float4> T[3] : register(t5);
float4 main(uint v : V) : SV_Target
{
	return T[0].Load(int3(0, 0, 0)) + T[2].Load(int3(0, 0, 0)) + T[v].Load(int3(0, 0, 0));
}


================================================
FILE: shaders/resources/srv-raw-buffer.bindless.root-constant.frag
================================================
ByteAddressBuffer uBuffer : register(t3);
ByteAddressBuffer uBufferArray[64] : register(t4);
ByteAddressBuffer uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

uint2 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x) * 8;
	uint2 result = uBuffer.Load2(offset);
	result += uBufferArray[index].Load2(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load2(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-raw-buffer.bindless.root-constant.ssbo.frag
================================================
ByteAddressBuffer uBuffer : register(t3);
ByteAddressBuffer uBufferArray[64] : register(t4);
ByteAddressBuffer uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

uint2 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x) * 8;
	uint2 result = uBuffer.Load2(offset);
	result += uBufferArray[index].Load2(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load2(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-raw-buffer.ssbo.frag
================================================
ByteAddressBuffer uBuffer : register(t3);
ByteAddressBuffer uBufferArray[64] : register(t4);
ByteAddressBuffer uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

uint2 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x) * 8;
	uint2 result = uBuffer.Load2(offset);
	result += uBufferArray[index].Load2(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load2(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-structured-buffer.bindless.root-constant.frag
================================================
StructuredBuffer<float> uBuffer : register(t3);
StructuredBuffer<float2> uBufferArray[64] : register(t4);
StructuredBuffer<float3> uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float3 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x);
	float3 result = uBuffer.Load(offset);
	result += uBufferArray[index].Load(offset).xyx;
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-structured-buffer.bindless.root-constant.ssbo.frag
================================================
StructuredBuffer<float> uBuffer : register(t3);
StructuredBuffer<float2> uBufferArray[64] : register(t4);
StructuredBuffer<float3> uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float3 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x);
	float3 result = uBuffer.Load(offset);
	result += uBufferArray[index].Load(offset).xyx;
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-structured-buffer.ssbo.frag
================================================
StructuredBuffer<float> uBuffer : register(t3);
StructuredBuffer<float2> uBufferArray[64] : register(t4);
StructuredBuffer<float3> uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float3 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x);
	float3 result = uBuffer.Load(offset);
	result += uBufferArray[index].Load(offset).xyx;
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-texture.bindless.root-constant.frag
================================================
Texture2D<float4> uTex : register(t3);
Texture2D<float4> uTexArray[64] : register(t4);
Texture2D<float4> uTexBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float4 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int3 coord = int3(pos.xy, 0);
	float4 result = uTex.Load(coord);
	result += uTexArray[index].Load(coord);
	result += uTexBindless[NonUniformResourceIndex(dynamic_index)].Load(coord);
	result += uTexBindless[1].Load(coord);
	return result;
}


================================================
FILE: shaders/resources/srv-texture.bindless.root-constant.inline-ubo.frag
================================================
Texture2D<float4> uTex : register(t3);
Texture2D<float4> uTexArray[64] : register(t4);
Texture2D<float4> uTexBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float4 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int3 coord = int3(pos.xy, 0);
	float4 result = uTex.Load(coord);
	result += uTexArray[index].Load(coord);
	result += uTexBindless[NonUniformResourceIndex(dynamic_index)].Load(coord);
	result += uTexBindless[1].Load(coord);
	return result;
}


================================================
FILE: shaders/resources/srv-typed-buffer.bindless.root-constant.frag
================================================
Buffer<float4> uBuffer : register(t3);
Buffer<uint4> uBufferArray[64] : register(t4);
Buffer<int4> uBufferBindless[] : register(t100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float4 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x);
	float4 result = uBuffer.Load(offset);
	result += uBufferArray[index].Load(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load(offset);
	return result;
}


================================================
FILE: shaders/resources/srv-uav-raw.typed-buffer-offset.comp
================================================
RWStructuredBuffer<float4> RWBuf : register(u0);
StructuredBuffer<float4> Buf : register(t1);
RWByteAddressBuffer RWAtomic : register(u2);

RWStructuredBuffer<float4> RWBufs[] : register(u0, space1);
StructuredBuffer<float4> Bufs[] : register(t0, space2);
RWByteAddressBuffer RWAtomics[] : register(u0, space3);

[numthreads(1, 1, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	RWBuf[thr.x + 1024] = RWBuf[thr.x + 4];
	RWBuf[thr.x + 2048] = Buf[thr.x + 2];

	uint val;
	RWAtomic.InterlockedAdd(4 * thr.x, 40, val);

	RWAtomic.InterlockedCompareExchange(4 * thr.y, 40, 50, val);

	uint count;
	RWBuf.GetDimensions(val, count);
	RWBuf[0] = val;

	Buf.GetDimensions(val, count);
	RWBuf[1] = val;

	RWBufs[NonUniformResourceIndex(thr.z)][thr.x + 1024] = RWBufs[NonUniformResourceIndex(thr.z)][thr.x + 4];
	RWBufs[NonUniformResourceIndex(thr.z)][thr.x + 2048] = Bufs[NonUniformResourceIndex(thr.z)][thr.x + 4];
	RWAtomics[NonUniformResourceIndex(thr.z)].InterlockedAdd(4 * thr.y, 40, val);
	RWAtomics[NonUniformResourceIndex(thr.z)].InterlockedCompareExchange(4 * thr.y, 40, 70, val);
	RWBufs[NonUniformResourceIndex(thr.z)].GetDimensions(val, count);
	RWBuf[2] = val;
	Bufs[NonUniformResourceIndex(thr.z)].GetDimensions(val, count);
	RWBuf[3] = val;
}


================================================
FILE: shaders/resources/srv-uav.typed-buffer-offset.comp
================================================
RWBuffer<float4> RWBuf : register(u0);
Buffer<float4> Buf : register(t1);
RWBuffer<uint> RWAtomic : register(u2);

RWBuffer<float4> RWBufs[] : register(u0, space1);
Buffer<float4> Bufs[] : register(t0, space2);
RWBuffer<uint> RWAtomics[] : register(u0, space3);

[numthreads(1, 1, 1)]
void main(uint3 thr : SV_DispatchThreadID)
{
	RWBuf[thr.x + 1024] = RWBuf[thr.x + 4];
	RWBuf[thr.x + 2048] = Buf[thr.x + 2];

	uint val;
	InterlockedAdd(RWAtomic[thr.x], 40, val);

	InterlockedCompareExchange(RWAtomic[thr.y], 40, 50, val);

	RWBuf.GetDimensions(val);
	RWBuf[0] = val;

	Buf.GetDimensions(val);
	RWBuf[1] = val;


	RWBufs[NonUniformResourceIndex(thr.z)][thr.x + 1024] = RWBufs[NonUniformResourceIndex(thr.z)][thr.x + 4];
	RWBufs[NonUniformResourceIndex(thr.z)][thr.x + 2048] = Bufs[NonUniformResourceIndex(thr.z)][thr.x + 4];
	InterlockedAdd(RWAtomics[NonUniformResourceIndex(thr.z)][thr.y], 40, val);
	InterlockedCompareExchange(RWAtomics[NonUniformResourceIndex(thr.z)][thr.y], 40, 70, val);
	RWBufs[NonUniformResourceIndex(thr.z)].GetDimensions(val);
	RWBuf[2] = val;
	Bufs[NonUniformResourceIndex(thr.z)].GetDimensions(val);
	RWBuf[3] = val;
}


================================================
FILE: shaders/resources/ssbo-minprecision.sm60.native-fp16.frag
================================================
StructuredBuffer<min16float> B;
RWStructuredBuffer<min16float> C;

StructuredBuffer<min16int> Bint;
RWStructuredBuffer<min16int> Cint;

int main(min16int a : A) : SV_Target
{
	C[a] = B[a] + B[a + 1];
	Cint[a] = Bint[a] + Bint[a + 1];
	return 10;
}


================================================
FILE: shaders/resources/ssbo-minprecision.sm60.ssbo.frag
================================================
StructuredBuffer<min16float> B;
RWStructuredBuffer<min16float> C;

int main(min16int a : A) : SV_Target
{
	C[a] = B[a] + B[a + 1];
	return 10;
}


================================================
FILE: shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag
================================================
StructuredBuffer<min16float> B;
RWStructuredBuffer<min16float> C;

StructuredBuffer<min16int> Bint;
RWStructuredBuffer<min16int> Cint;

int main(min16int a : A) : SV_Target
{
	C[a] = B[a] + B[a + 1];
	Cint[a] = Bint[a] + Bint[a + 1];
	return 10;
}


================================================
FILE: shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
================================================
StructuredBuffer<min16float> B;
RWStructuredBuffer<min16float> C;

StructuredBuffer<min16int> Bint;
RWStructuredBuffer<min16int> Cint;

int main(min16int a : A) : SV_Target
{
	C[a] = B[a] + B[a + 1];
	Cint[a] = Bint[a] + Bint[a + 1];
	return 10;
}


================================================
FILE: shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag
================================================
StructuredBuffer<min16float> B;
RWStructuredBuffer<min16float> C;

int main(min16int a : A) : SV_Target
{
	C[a] = B[a] + B[a + 1];
	return 10;
}


================================================
FILE: shaders/resources/subobject-parsing.rgen
================================================
RaytracingShaderConfig sconf = { 80, 60 };
RaytracingPipelineConfig pconf0 = { 4 };
RaytracingPipelineConfig1 pconf1 = { 5, RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES };
// DXC specifies the typo :V
StateObjectConfig soc = { STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS };
TriangleHitGroup tri_hg = { "a", "b" };
TriangleHitGroup tri_hg2 = { "", "b" };
ProceduralPrimitiveHitGroup aabb_hg = { "c", "d", "e" };
SubobjectToExportsAssociation assoc1 = { "A", "foo;bar;meow;nyaa;frog" };
SubobjectToExportsAssociation assoc2 = { "B", "meep;frog" };

[shader("raygeneration")]
void main()
{
}


================================================
FILE: shaders/resources/typed-resources-16bit-sparse.frag
================================================
Texture2D<half4> Tex : register(t0);
Texture2D<int16_t4> TexI16 : register(t1);
Texture2D<uint16_t4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

Buffer<half4> Buf : register(t3);
Buffer<int16_t4> BufI16 : register(t4);
Buffer<uint16_t4> BufU16 : register(t5);

struct FOut
{
	half4 f16 : SV_Target0;
	int16_t4 i16 : SV_Target1;
	uint16_t4 u16 : SV_Target2;
	uint codes : SV_Target3;
};

FOut main(float2 uv : UV)
{
	uint code;

	FOut fout;
	fout.codes = 0;
	fout.f16 = Tex.Sample(S, uv, int2(0, 0), 0.0, code); fout.codes |= code;
	fout.i16 = TexI16.Load(int3(1, 2, 3), int2(0, 0), code); fout.codes |= code;
	fout.u16 = TexU16.Load(int3(4, 5, 6), int2(0, 0), code); fout.codes |= code;

	fout.f16 += Tex.GatherRed(S, uv, int2(0, 0), code); fout.codes |= code;
	fout.i16 += TexI16.GatherGreen(S, uv, int2(0, 0), code); fout.codes |= code;
	fout.u16 += TexU16.GatherBlue(S, uv, int2(0, 0), code); fout.codes |= code;

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5, int2(0, 0), 0.0, code).x; fout.codes |= code;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5, int2(0, 0), code).x; fout.codes |= code;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5, int2(0, 0), code); fout.codes |= code;

	fout.f16 += Tex.SampleLevel(S, uv, 0.0, int2(0, 0), code); fout.codes |= code;
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5), int2(0, 0), 0.0, code); fout.codes |= code;
	fout.f16 += Tex.SampleBias(S, uv, 0.5, int2(0, 0), 0.0, code); fout.codes |= code;

	fout.f16 += Buf.Load(int(uv.x), code); fout.codes |= code;
	fout.i16 += BufI16.Load(int(uv.x), code); fout.codes |= code;
	fout.u16 += BufU16.Load(int(uv.x), code); fout.codes |= code;

	return fout;
}


================================================
FILE: shaders/resources/typed-resources-16bit.bindless.frag
================================================
Texture2D<half4> Tex : register(t0);
Texture2D<int16_t4> TexI16 : register(t1);
Texture2D<uint16_t4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

RWTexture2D<half4> RWTex : register(u0);
RWTexture2D<int16_t4> RWTexI16 : register(u1);
RWTexture2D<uint16_t4> RWTexU16 : register(u2);

Buffer<half4> Buf : register(t3);
Buffer<int16_t4> BufI16 : register(t4);
Buffer<uint16_t4> BufU16 : register(t5);

RWBuffer<half4> RWBuf : register(u3);
RWBuffer<int16_t4> RWBufI16 : register(u4);
RWBuffer<uint16_t4> RWBufU16 : register(u5);

struct FOut
{
	half4 f16 : SV_Target0;
	int16_t4 i16 : SV_Target1;
	uint16_t4 u16 : SV_Target2;
};

FOut main(float2 uv : UV)
{
	RWTex[int2(uv)] = half4(uv.xyxy);
	RWTexI16[int2(uv)] = int16_t4(uv.xyxy);
	RWTexU16[int2(uv)] = uint16_t4(uv.xyxy);

	RWBuf[int(uv.x)] = half(8.0);
	RWBufI16[int(uv.x)] = int16_t(-20);
	RWBufU16[int(uv.x)] = uint16_t(80);

	FOut fout;
	fout.f16 = Tex.Sample(S, uv);
	fout.i16 = TexI16.Load(int3(1, 2, 3));
	fout.u16 = TexU16.Load(int3(4, 5, 6));

	fout.f16 += Tex.GatherRed(S, uv);
	fout.i16 += TexI16.GatherGreen(S, uv);
	fout.u16 += TexU16.GatherBlue(S, uv);

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5).x;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5).x;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5);

	fout.f16 += Tex.SampleLevel(S, uv, 0.0);
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5));
	fout.f16 += Tex.SampleBias(S, uv, 0.5);

	fout.f16 += Buf[int(uv.x)];
	fout.i16 += BufI16[int(uv.x)];
	fout.u16 += BufU16[int(uv.x)];

	return fout;
}


================================================
FILE: shaders/resources/typed-resources-16bit.frag
================================================
Texture2D<half4> Tex : register(t0);
Texture2D<int16_t4> TexI16 : register(t1);
Texture2D<uint16_t4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

RWTexture2D<half4> RWTex : register(u0);
RWTexture2D<int16_t4> RWTexI16 : register(u1);
RWTexture2D<uint16_t4> RWTexU16 : register(u2);

Buffer<half4> Buf : register(t3);
Buffer<int16_t4> BufI16 : register(t4);
Buffer<uint16_t4> BufU16 : register(t5);

RWBuffer<half4> RWBuf : register(u3);
RWBuffer<int16_t4> RWBufI16 : register(u4);
RWBuffer<uint16_t4> RWBufU16 : register(u5);

struct FOut
{
	half4 f16 : SV_Target0;
	int16_t4 i16 : SV_Target1;
	uint16_t4 u16 : SV_Target2;
};

FOut main(float2 uv : UV)
{
	RWTex[int2(uv)] = half4(uv.xyxy);
	RWTexI16[int2(uv)] = int16_t4(uv.xyxy);
	RWTexU16[int2(uv)] = uint16_t4(uv.xyxy);

	RWBuf[int(uv.x)] = half(8.0);
	RWBufI16[int(uv.x)] = int16_t(-20);
	RWBufU16[int(uv.x)] = uint16_t(80);

	FOut fout;
	fout.f16 = Tex.Sample(S, uv);
	fout.i16 = TexI16.Load(int3(1, 2, 3));
	fout.u16 = TexU16.Load(int3(4, 5, 6));

	fout.f16 += Tex.GatherRed(S, uv);
	fout.i16 += TexI16.GatherGreen(S, uv);
	fout.u16 += TexU16.GatherBlue(S, uv);

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5).x;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5).x;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5);

	fout.f16 += Tex.SampleLevel(S, uv, 0.0);
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5));
	fout.f16 += Tex.SampleBias(S, uv, 0.5);

	fout.f16 += Buf[int(uv.x)];
	fout.i16 += BufI16[int(uv.x)];
	fout.u16 += BufU16[int(uv.x)];

	return fout;
}


================================================
FILE: shaders/resources/typed-resources-16bit.sm60.bindless.frag
================================================
Texture2D<min16float4> Tex : register(t0);
Texture2D<min16int4> TexI16 : register(t1);
Texture2D<min16uint4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

RWTexture2D<min16float4> RWTex : register(u0);
RWTexture2D<min16int4> RWTexI16 : register(u1);
RWTexture2D<min16uint4> RWTexU16 : register(u2);

Buffer<min16float4> Buf : register(t3);
Buffer<min16int4> BufI16 : register(t4);
Buffer<min16uint4> BufU16 : register(t5);

RWBuffer<min16float4> RWBuf : register(u3);
RWBuffer<min16int4> RWBufI16 : register(u4);
RWBuffer<min16uint4> RWBufU16 : register(u5);

struct FOut
{
	min16float4 f16 : SV_Target0;
	min16int4 i16 : SV_Target1;
	min16uint4 u16 : SV_Target2;
};

FOut main(float2 uv : UV)
{
	RWTex[int2(uv)] = min16float4(uv.xyxy);
	RWTexI16[int2(uv)] = min16int4(uv.xyxy);
	RWTexU16[int2(uv)] = min16uint4(uv.xyxy);

	RWBuf[int(uv.x)] = half(8.0);
	RWBufI16[int(uv.x)] = min16int(-20);
	RWBufU16[int(uv.x)] = min16uint(80);

	FOut fout;
	fout.f16 = Tex.Sample(S, uv);
	fout.i16 = TexI16.Load(int3(1, 2, 3));
	fout.u16 = TexU16.Load(int3(4, 5, 6));

	fout.f16 += Tex.GatherRed(S, uv);
	fout.i16 += TexI16.GatherGreen(S, uv);
	fout.u16 += TexU16.GatherBlue(S, uv);

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5).x;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5).x;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5);

	fout.f16 += Tex.SampleLevel(S, uv, 0.0);
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5));
	fout.f16 += Tex.SampleBias(S, uv, 0.5);

	fout.f16 += Buf[int(uv.x)];
	fout.i16 += BufI16[int(uv.x)];
	fout.u16 += BufU16[int(uv.x)];

	return fout;
}


================================================
FILE: shaders/resources/typed-resources-16bit.sm60.frag
================================================
Texture2D<min16float4> Tex : register(t0);
Texture2D<min16int4> TexI16 : register(t1);
Texture2D<min16uint4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

RWTexture2D<min16float4> RWTex : register(u0);
RWTexture2D<min16int4> RWTexI16 : register(u1);
RWTexture2D<min16uint4> RWTexU16 : register(u2);

Buffer<min16float4> Buf : register(t3);
Buffer<min16int4> BufI16 : register(t4);
Buffer<min16uint4> BufU16 : register(t5);

RWBuffer<min16float4> RWBuf : register(u3);
RWBuffer<min16int4> RWBufI16 : register(u4);
RWBuffer<min16uint4> RWBufU16 : register(u5);

struct FOut
{
	min16float4 f16 : SV_Target0;
	min16int4 i16 : SV_Target1;
	min16uint4 u16 : SV_Target2;
};

FOut main(float2 uv : UV)
{
	RWTex[int2(uv)] = min16float4(uv.xyxy);
	RWTexI16[int2(uv)] = min16int4(uv.xyxy);
	RWTexU16[int2(uv)] = min16uint4(uv.xyxy);

	RWBuf[int(uv.x)] = half(8.0);
	RWBufI16[int(uv.x)] = min16int(-20);
	RWBufU16[int(uv.x)] = min16uint(80);

	FOut fout;
	fout.f16 = Tex.Sample(S, uv);
	fout.i16 = TexI16.Load(int3(1, 2, 3));
	fout.u16 = TexU16.Load(int3(4, 5, 6));

	fout.f16 += Tex.GatherRed(S, uv);
	fout.i16 += TexI16.GatherGreen(S, uv);
	fout.u16 += TexU16.GatherBlue(S, uv);

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5).x;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5).x;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5);

	fout.f16 += Tex.SampleLevel(S, uv, 0.0);
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5));
	fout.f16 += Tex.SampleBias(S, uv, 0.5);

	fout.f16 += Buf[int(uv.x)];
	fout.i16 += BufI16[int(uv.x)];
	fout.u16 += BufU16[int(uv.x)];

	return fout;
}


================================================
FILE: shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag
================================================
Texture2D<min16float4> Tex : register(t0);
Texture2D<min16int4> TexI16 : register(t1);
Texture2D<min16uint4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

RWTexture2D<min16float4> RWTex : register(u0);
RWTexture2D<min16int4> RWTexI16 : register(u1);
RWTexture2D<min16uint4> RWTexU16 : register(u2);

Buffer<min16float4> Buf : register(t3);
Buffer<min16int4> BufI16 : register(t4);
Buffer<min16uint4> BufU16 : register(t5);

RWBuffer<min16float4> RWBuf : register(u3);
RWBuffer<min16int4> RWBufI16 : register(u4);
RWBuffer<min16uint4> RWBufU16 : register(u5);

struct FOut
{
	min16float4 f16 : SV_Target0;
	min16int4 i16 : SV_Target1;
	min16uint4 u16 : SV_Target2;
};

FOut main(float2 uv : UV)
{
	RWTex[int2(uv)] = min16float4(uv.xyxy);
	RWTexI16[int2(uv)] = min16int4(uv.xyxy);
	RWTexU16[int2(uv)] = min16uint4(uv.xyxy);

	RWBuf[int(uv.x)] = half(8.0);
	RWBufI16[int(uv.x)] = min16int(-20);
	RWBufU16[int(uv.x)] = min16uint(80);

	FOut fout;
	fout.f16 = Tex.Sample(S, uv);
	fout.i16 = TexI16.Load(int3(1, 2, 3));
	fout.u16 = TexU16.Load(int3(4, 5, 6));

	fout.f16 += Tex.GatherRed(S, uv);
	fout.i16 += TexI16.GatherGreen(S, uv);
	fout.u16 += TexU16.GatherBlue(S, uv);

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5).x;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5).x;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5);

	fout.f16 += Tex.SampleLevel(S, uv, 0.0);
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5));
	fout.f16 += Tex.SampleBias(S, uv, 0.5);

	fout.f16 += Buf[int(uv.x)];
	fout.i16 += BufI16[int(uv.x)];
	fout.u16 += BufU16[int(uv.x)];

	return fout;
}


================================================
FILE: shaders/resources/typed-resources-16bit.sm60.native-fp16.frag
================================================
Texture2D<min16float4> Tex : register(t0);
Texture2D<min16int4> TexI16 : register(t1);
Texture2D<min16uint4> TexU16 : register(t2);
SamplerState S : register(s0);
SamplerComparisonState SComp : register(s1);

RWTexture2D<min16float4> RWTex : register(u0);
RWTexture2D<min16int4> RWTexI16 : register(u1);
RWTexture2D<min16uint4> RWTexU16 : register(u2);

Buffer<min16float4> Buf : register(t3);
Buffer<min16int4> BufI16 : register(t4);
Buffer<min16uint4> BufU16 : register(t5);

RWBuffer<min16float4> RWBuf : register(u3);
RWBuffer<min16int4> RWBufI16 : register(u4);
RWBuffer<min16uint4> RWBufU16 : register(u5);

struct FOut
{
	min16float4 f16 : SV_Target0;
	min16int4 i16 : SV_Target1;
	min16uint4 u16 : SV_Target2;
};

FOut main(float2 uv : UV)
{
	RWTex[int2(uv)] = min16float4(uv.xyxy);
	RWTexI16[int2(uv)] = min16int4(uv.xyxy);
	RWTexU16[int2(uv)] = min16uint4(uv.xyxy);

	RWBuf[int(uv.x)] = half(8.0);
	RWBufI16[int(uv.x)] = min16int(-20);
	RWBufU16[int(uv.x)] = min16uint(80);

	FOut fout;
	fout.f16 = Tex.Sample(S, uv);
	fout.i16 = TexI16.Load(int3(1, 2, 3));
	fout.u16 = TexU16.Load(int3(4, 5, 6));

	fout.f16 += Tex.GatherRed(S, uv);
	fout.i16 += TexI16.GatherGreen(S, uv);
	fout.u16 += TexU16.GatherBlue(S, uv);

	fout.f16 += Tex.SampleCmp(SComp, uv, 0.5).x;
	fout.f16 += Tex.SampleCmpLevelZero(SComp, uv, 0.5).x;
	fout.f16 += Tex.GatherCmp(SComp, uv, 0.5);

	fout.f16 += Tex.SampleLevel(S, uv, 0.0);
	fout.f16 += Tex.SampleGrad(S, uv, float2(0.2, 0.3), float2(0.4, 0.5));
	fout.f16 += Tex.SampleBias(S, uv, 0.5);

	fout.f16 += Buf[int(uv.x)];
	fout.i16 += BufI16[int(uv.x)];
	fout.u16 += BufU16[int(uv.x)];

	return fout;
}


================================================
FILE: shaders/resources/uav-array-raw-buffer-nonuniform.frag
================================================
RWByteAddressBuffer Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return asfloat(Tex[NonUniformResourceIndex(index)].Load4(16 * index));
}


================================================
FILE: shaders/resources/uav-array-raw-buffer.frag
================================================
RWByteAddressBuffer Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return asfloat(Tex[index].Load4(16 * index));
}


================================================
FILE: shaders/resources/uav-array-structured-buffer-nonuniform.frag
================================================
RWStructuredBuffer<float4> Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[NonUniformResourceIndex(index)][index];
}


================================================
FILE: shaders/resources/uav-array-structured-buffer-nonuniform.ssbo.bindless.root-constant.frag
================================================
RWStructuredBuffer<float4> Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[NonUniformResourceIndex(index)][index];
}


================================================
FILE: shaders/resources/uav-array-structured-buffer-nonuniform.ssbo.frag
================================================
RWStructuredBuffer<float4> Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[NonUniformResourceIndex(index)][index];
}


================================================
FILE: shaders/resources/uav-array-structured-buffer.frag
================================================
RWStructuredBuffer<float4> Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[index][index];
}


================================================
FILE: shaders/resources/uav-array-texture-nonuniform.frag
================================================
RWTexture2D<float4> Tex[] : register(u0, space0);
RWTexture2D<float4> Tex2[100] : register(u0, space1);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return
		Tex[NonUniformResourceIndex(index)].Load(int2(0, 0)) +
		Tex2[NonUniformResourceIndex(index ^ 1)].Load(int2(0, 0));
}


================================================
FILE: shaders/resources/uav-array-texture.frag
================================================
RWTexture2D<float4> Tex[] : register(u0, space0);
RWTexture2D<float4> Tex2[100] : register(u0, space1);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return
		Tex[index].Load(int2(0, 0)) +
		Tex2[index ^ 1].Load(int2(0, 0));
}


================================================
FILE: shaders/resources/uav-array-typed-buffer-nonuniform.frag
================================================
RWBuffer<float4> Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return
		Tex[NonUniformResourceIndex(index)].Load(index);
}


================================================
FILE: shaders/resources/uav-array-typed-buffer.frag
================================================
RWBuffer<float4> Tex[] : register(u0, space0);

float4 main(nointerpolation uint index : INDEX) : SV_Target
{
	return Tex[index].Load(index);
}


================================================
FILE: shaders/resources/uav-counter-array.ssbo.frag
================================================
RWStructuredBuffer<uint> Blah[5] : register(u10);

uint main(uint i : I) : SV_Target
{
	return Blah[NonUniformResourceIndex(i)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter-array.ssbo.sm66.frag
================================================
RWStructuredBuffer<uint> Blah[5] : register(u10);

uint main(uint i : I) : SV_Target
{
	return Blah[NonUniformResourceIndex(i)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter-array.ssbo.sm66.uav-counter-ssbo.frag
================================================
RWStructuredBuffer<uint> Blah[5] : register(u10);

uint main(uint i : I) : SV_Target
{
	return Blah[NonUniformResourceIndex(i)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter-array.ssbo.uav-counter-ssbo.frag
================================================
RWStructuredBuffer<uint> Blah[5] : register(u10);

uint main(uint i : I) : SV_Target
{
	return Blah[NonUniformResourceIndex(i)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter-heap.sm66.bindless.ssbo.frag
================================================
float4 main(uint i : I) : SV_Target
{
	RWStructuredBuffer<float4> A = ResourceDescriptorHeap[14];
	uint v = A.IncrementCounter();
	return A[v];
}


================================================
FILE: shaders/resources/uav-counter-heap.sm66.uav-counter-ssbo.bindless.ssbo.frag
================================================
float4 main(uint i : I) : SV_Target
{
	RWStructuredBuffer<float4> A = ResourceDescriptorHeap[14];
	uint v = A.IncrementCounter();
	return A[v];
}


================================================
FILE: shaders/resources/uav-counter-heap.sm66.uav-counter-texel-buffer.bindless.ssbo.frag
================================================
float4 main(uint i : I) : SV_Target
{
	RWStructuredBuffer<float4> A = ResourceDescriptorHeap[14];
	uint v = A.IncrementCounter();
	return A[v];
}


================================================
FILE: shaders/resources/uav-counter.bindless.nobda.root-constant.comp
================================================
RWStructuredBuffer<float> Bufs[] : register(u2, space1);
RWStructuredBuffer<float> Bufs2[] : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs[NonUniformResourceIndex(index)].IncrementCounter();
	Bufs2[NonUniformResourceIndex(index ^ 1)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.bindless.nobda.root-constant.raw-va-stride-offset.comp
================================================
RWStructuredBuffer<float> Bufs[] : register(u2, space1);
RWStructuredBuffer<float> Bufs2[] : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs[NonUniformResourceIndex(index)].IncrementCounter();
	Bufs2[NonUniformResourceIndex(index ^ 1)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.bindless.root-constant.comp
================================================
RWStructuredBuffer<float> Bufs[] : register(u2, space1);
RWStructuredBuffer<float> Bufs2[] : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs[NonUniformResourceIndex(index)].IncrementCounter();
	Bufs2[NonUniformResourceIndex(index ^ 1)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.bindless.root-constant.raw-va-stride-offset.comp
================================================
RWStructuredBuffer<float> Bufs[] : register(u2, space1);
RWStructuredBuffer<float> Bufs2[] : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs[NonUniformResourceIndex(index)].IncrementCounter();
	Bufs2[NonUniformResourceIndex(index ^ 1)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.bindless.root-constant.raw-va-stride-offset.heap-raw-va-cbv.comp
================================================
RWStructuredBuffer<float> Bufs[] : register(u2, space1);
RWStructuredBuffer<float> Bufs2[] : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs[NonUniformResourceIndex(index)].IncrementCounter();
	Bufs2[NonUniformResourceIndex(index ^ 1)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.bindless.root-constant.uav-counter-ssbo.comp
================================================
RWStructuredBuffer<float> Bufs[] : register(u2, space1);
RWStructuredBuffer<float> Bufs2[] : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs[NonUniformResourceIndex(index)].IncrementCounter();
	Bufs2[NonUniformResourceIndex(index ^ 1)].IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.ssbo.comp
================================================
RWStructuredBuffer<float> Bufs : register(u2, space1);
RWStructuredBuffer<float> Bufs2 : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs.IncrementCounter();
	Bufs2.IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.ssbo.raw-va-stride-offset.comp
================================================
RWStructuredBuffer<float> Bufs : register(u2, space1);
RWStructuredBuffer<float> Bufs2 : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs.IncrementCounter();
	Bufs2.IncrementCounter();
}


================================================
FILE: shaders/resources/uav-counter.ssbo.uav-counter-ssbo.comp
================================================
RWStructuredBuffer<float> Bufs : register(u2, space1);
RWStructuredBuffer<float> Bufs2 : register(u2, space2);

[numthreads(64, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	Bufs.IncrementCounter();
	Bufs2.IncrementCounter();
}


================================================
FILE: shaders/resources/uav-indexing.frag
================================================
RWTexture2D<float4> T[3] : register(u5);
float4 main(uint v : V) : SV_Target
{
	return T[0].Load(int2(0, 0)) + T[2].Load(int2(0, 0)) + T[v].Load(int2(0, 0));
}


================================================
FILE: shaders/resources/uav-indexing.sm66.frag
================================================
RWTexture2D<float4> T[3] : register(u5);
float4 main(uint v : V) : SV_Target
{
	return T[0].Load(int2(0, 0)) + T[2].Load(int2(0, 0)) + T[v].Load(int2(0, 0));
}


================================================
FILE: shaders/resources/uav-raw-buffer.bindless.root-constant.frag
================================================
RWByteAddressBuffer uBuffer : register(u3);
RWByteAddressBuffer uBufferArray[64] : register(u4);
RWByteAddressBuffer uBufferBindless[] : register(u100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

uint2 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x) * 8;
	uint2 result = uBuffer.Load2(offset);
	result += uBufferArray[index].Load2(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load2(offset);
	return result;
}


================================================
FILE: shaders/resources/uav-raw-buffer.ssbo.frag
================================================
RWByteAddressBuffer uBuffer : register(u3);
RWByteAddressBuffer uBufferArray[64] : register(u4);
RWByteAddressBuffer uBufferBindless[] : register(u100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

uint2 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x) * 8;
	uint2 result = uBuffer.Load2(offset);
	result += uBufferArray[index].Load2(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load2(offset);
	return result;
}


================================================
FILE: shaders/resources/uav-structured-buffer.bindless.root-constant.frag
================================================
RWStructuredBuffer<float> uBuffer : register(u3);
RWStructuredBuffer<float2> uBufferArray[64] : register(u4);
RWStructuredBuffer<float3> uBufferBindless[] : register(u100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float3 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x);
	float3 result = uBuffer.Load(offset);
	result += uBufferArray[index].Load(offset).xyx;
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load(offset);
	return result;
}


================================================
FILE: shaders/resources/uav-typed-buffer.bindless.root-constant.frag
================================================
RWBuffer<float4> uBuffer : register(u3);
RWBuffer<uint4> uBufferArray[64] : register(u4);
RWBuffer<int4> uBufferBindless[] : register(u100);

cbuffer CBUFFER : register(b0)
{
	uint index;
};

float4 main(float4 pos : SV_Position, nointerpolation uint dynamic_index : INDEX) : SV_Target
{
	int offset = int(pos.x);
	float4 result = uBuffer.Load(offset);
	result += uBufferArray[index].Load(offset);
	result += uBufferBindless[NonUniformResourceIndex(dynamic_index)].Load(offset);
	return result;
}


================================================
FILE: shaders/resources/uav-typed.typed-uav-without-format.comp
================================================
RWTexture2D<float4> Img : register(u0);
RWBuffer<float4> Buf : register(u1);
RWBuffer<uint> BufAtomic : register(u2);

[numthreads(1, 1, 1)]
void main(uint2 thr : SV_DispatchThreadID)
{
	float4 v = Img[thr];
	v += 2.0;
	Img[thr] = v;

	v = Buf[thr.x];
	v += 3.0;
	Buf[thr.x] = v;

	InterlockedAdd(BufAtomic[thr.y], 10u);
}


================================================
FILE: shaders/rov/rov-bab.bindless.frag
================================================
RasterizerOrderedByteAddressBuffer RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW.Store4(offset, RW.Load4(offset) + uint4(1, 2, 3, 4));
}


================================================
FILE: shaders/rov/rov-bab.frag
================================================
RasterizerOrderedByteAddressBuffer RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW.Store4(offset, RW.Load4(offset) + uint4(1, 2, 3, 4));
}


================================================
FILE: shaders/rov/rov-bab.ssbo.bindless.frag
================================================
RasterizerOrderedByteAddressBuffer RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW.Store4(offset, RW.Load4(offset) + uint4(1, 2, 3, 4));
}


================================================
FILE: shaders/rov/rov-bab.ssbo.frag
================================================
RasterizerOrderedByteAddressBuffer RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW.Store4(offset, RW.Load4(offset) + uint4(1, 2, 3, 4));
}


================================================
FILE: shaders/rov/rov-bab.ssbo.root-descriptor.frag
================================================
RasterizerOrderedByteAddressBuffer RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW.Store4(offset, RW.Load4(offset) + uint4(1, 2, 3, 4));
}


================================================
FILE: shaders/rov/rov-branch-early-return.frag
================================================
RasterizerOrderedTexture2D<float4> RW0 : register(u0);
RasterizerOrderedTexture2D<float4> RW1 : register(u1);
RWTexture2D<float4> RW2 : register(u2);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);

	RW2[coord] += float4(1, 2, 3, 4);

	[branch]
	if (pos.x < 10.0)
	{
		RW0[coord] += float4(1, 2, 3, 4);
		if (RW2[coord].z > 100.0)
			return;
	}
	else
		RW1[coord] += float4(1, 2, 3, 4);

	RW2[coord ^ 1] += float4(1, 2, 3, 4);
}


================================================
FILE: shaders/rov/rov-branch.frag
================================================
RasterizerOrderedTexture2D<float4> RW0 : register(u0);
RasterizerOrderedTexture2D<float4> RW1 : register(u1);
RWTexture2D<float4> RW2 : register(u2);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);

	RW2[coord] += float4(1, 2, 3, 4);

	[branch]
	if (pos.x < 10.0)
		RW0[coord] += float4(1, 2, 3, 4);
	else
		RW1[coord] += float4(1, 2, 3, 4);
}


================================================
FILE: shaders/rov/rov-buffer.frag
================================================
RasterizerOrderedBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-inloop-2.frag
================================================
RasterizerOrderedTexture2D<float4> RW0 : register(u0);
RasterizerOrderedTexture2D<float4> RW1 : register(u1);
RWTexture2D<float4> RW2 : register(u2);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);

	RW2[coord] += float4(1, 2, 3, 4);

	[loop]
	for (uint i = 0; i < uint(pos.z); i++)
		RW0[coord] += float4(1, 2, 3, 4);
}


================================================
FILE: shaders/rov/rov-inloop.frag
================================================
RasterizerOrderedTexture2D<float4> RW0 : register(u0);
RasterizerOrderedTexture2D<float4> RW1 : register(u1);
RWTexture2D<float4> RW2 : register(u2);

[earlydepthstencil]
float main(float4 pos : SV_Position) : SV_Target
{
	uint2 coord = uint2(pos.xy);
	// Make sure OpVariable move works for fallback.
	float V[8];
	uint i;
	for (i = 0; i < 8; i++)
		V[i] = float(i);

	RW2[coord] += float4(1, 2, 3, 4);

	[loop]
	for (i = 0; i < 4; i++)
	{
		RW0[coord] += float4(1, 2, 3, 4);
		V[i] = RW0[coord].y;
	}

	return V[uint(pos.w * 7.0)];
}


================================================
FILE: shaders/rov/rov-per-sample.sm66.frag
================================================
[earlydepthstencil]
void main(float4 pos : SV_Position, uint id : SV_SampleIndex)
{
	uint3 coord = uint3(pos.xyz);
	coord += id;

	RasterizerOrderedByteAddressBuffer RW0 = ResourceDescriptorHeap[0];

	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW0.Store4(offset, RW0.Load4(offset) + uint4(1, 2, 3, 4));

	RasterizerOrderedStructuredBuffer<float4> RW1 = ResourceDescriptorHeap[1];
	RW1[coord.y * 1000 + coord.x] += float4(1, 2, 3, 4);

	RasterizerOrderedBuffer<float4> RW2 = ResourceDescriptorHeap[2];
	RW2[coord.y * 1000 + coord.x] += float4(1, 2, 3, 4);

	RasterizerOrderedTexture1D<float4> RW3 = ResourceDescriptorHeap[3];
	RW3[coord.y * 1000 + coord.x] += float4(1, 2, 3, 4);

	RasterizerOrderedTexture2D<float4> RW4 = ResourceDescriptorHeap[4];
	RW4[coord.xy] += float4(1, 2, 3, 4);
}


================================================
FILE: shaders/rov/rov-postloop.frag
================================================
RasterizerOrderedTexture2D<float4> RW0 : register(u0);
RasterizerOrderedTexture2D<float4> RW1 : register(u1);
RWTexture2D<float4> RW2 : register(u2);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);

	[loop]
	for (uint i = 0; i < 4; i++)
		RW2[coord] += float4(1, 2, 3, 4);

	RW0[coord] += float4(1, 2, 3, 4);
}


================================================
FILE: shaders/rov/rov-structured.bindless.frag
================================================
RasterizerOrderedStructuredBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-structured.frag
================================================
RasterizerOrderedStructuredBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-structured.ssbo.bindless.frag
================================================
RasterizerOrderedStructuredBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-structured.ssbo.frag
================================================
RasterizerOrderedStructuredBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-structured.ssbo.root-descriptor.frag
================================================
RasterizerOrderedStructuredBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex1d.bindless.frag
================================================
RasterizerOrderedTexture1D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex1d.frag
================================================
RasterizerOrderedTexture1D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex1darray.bindless.frag
================================================
RasterizerOrderedTexture1DArray<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex1darray.frag
================================================
RasterizerOrderedTexture1DArray<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex2d.bindless.frag
================================================
RasterizerOrderedTexture2D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex2d.frag
================================================
RasterizerOrderedTexture2D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex2darray.bindless.frag
================================================
RasterizerOrderedTexture2DArray<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint3 coord = uint3(pos.xyz);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex2darray.frag
================================================
RasterizerOrderedTexture2DArray<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint3 coord = uint3(pos.xyz);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex3d.bindless.frag
================================================
RasterizerOrderedTexture3D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint3 coord = uint3(pos.xyz);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-tex3d.frag
================================================
RasterizerOrderedTexture3D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint3 coord = uint3(pos.xyz);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/rov/rov-undef.frag
================================================
RasterizerOrderedByteAddressBuffer RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	bool v = pos.x > 1000.0;
	bool v2 = pos.x > 1100.0;

	uint2 coord;

	[branch]
	if (v)
	{
		coord = uint2(pos.xy);
		coord = RW.Load2(0);
		RW.Store(0, 50);
	}

	[branch]
	if (v2)
	{
		uint offset = 16 * (coord.y * 1000 + coord.x);
		RW.Store4(offset, RW.Load4(offset) + uint4(1, 2, 3, 4));
	}
}


================================================
FILE: shaders/rov/rov.sm66.frag
================================================
[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint3 coord = uint3(pos.xyz);
	RasterizerOrderedByteAddressBuffer RW0 = ResourceDescriptorHeap[0];

	uint offset = 16 * (coord.y * 1000 + coord.x);
	RW0.Store4(offset, RW0.Load4(offset) + uint4(1, 2, 3, 4));

	RasterizerOrderedStructuredBuffer<float4> RW1 = ResourceDescriptorHeap[1];
	RW1[coord.y * 1000 + coord.x] += float4(1, 2, 3, 4);

	RasterizerOrderedBuffer<float4> RW2 = ResourceDescriptorHeap[2];
	RW2[coord.y * 1000 + coord.x] += float4(1, 2, 3, 4);

	RasterizerOrderedTexture1D<float4> RW3 = ResourceDescriptorHeap[3];
	RW3[coord.y * 1000 + coord.x] += float4(1, 2, 3, 4);

	RasterizerOrderedTexture2D<float4> RW4 = ResourceDescriptorHeap[4];
	RW4[coord.xy] += float4(1, 2, 3, 4);
}


================================================
FILE: shaders/sampler-feedback/sampler-feedback.frag
================================================
// We ignore the kind of sampler feedback used, so don't bother testing it exhaustively.
FeedbackTexture2D<SAMPLER_FEEDBACK_MIP_REGION_USED> F0 : register(u0);
FeedbackTexture2DArray<SAMPLER_FEEDBACK_MIN_MIP> F1 : register(u1);
FeedbackTexture2D<SAMPLER_FEEDBACK_MIP_REGION_USED> F2[] : register(u0, space1);
FeedbackTexture2DArray<SAMPLER_FEEDBACK_MIP_REGION_USED> F3[] : register(u0, space2);

Texture2D<float4> T : register(t0);
Texture2DArray<float4> TArray : register(t1);
Texture2D<float4> Ts[] : register(t0, space1);
Texture2DArray<float4> TArrays[] : register(t0, space2);

// Comparison samplers don't work. Not a huge deal since we can estimate footprint with non-comparison samplers just fine. :')
SamplerState S : register(s0);
SamplerState Ss[] : register(s0, space1);

void main(float4 pos : SV_Position, float2 grad_x : GRADX, float2 grad_y : GRADY, float mipclamp : CLAMP, uint idx : IDX)
{
	// Test all HLSL calls available.
	F0.WriteSamplerFeedback(T, S, pos.xy);
	// Undocumented, with clamp
	F0.WriteSamplerFeedback(T, S, pos.xy, mipclamp);
	F0.WriteSamplerFeedbackLevel(T, S, pos.xy, pos.z);
	F0.WriteSamplerFeedbackGrad(T, S, pos.xy, grad_x, grad_y);
	// Undocumented, with clamp
	F0.WriteSamplerFeedbackGrad(T, S, pos.xy, grad_x, grad_y, mipclamp);
	F0.WriteSamplerFeedbackBias(T, S, pos.xy, pos.z);
	// Undocumented, with clamp
	F0.WriteSamplerFeedbackBias(T, S, pos.xy, pos.z, mipclamp);

	// Test array versions.
	F1.WriteSamplerFeedback(TArray, S, pos.xyz);
	// Undocumented, with clamp
	F1.WriteSamplerFeedback(TArray, S, pos.xyz, mipclamp);
	F1.WriteSamplerFeedbackLevel(TArray, S, pos.xyz, pos.w);
	F1.WriteSamplerFeedbackGrad(TArray, S, pos.xyz, grad_x, grad_y);
	// Undocumented, with clamp
	F1.WriteSamplerFeedbackGrad(TArray, S, pos.xyz, grad_x, grad_y, mipclamp);
	F1.WriteSamplerFeedbackBias(TArray, S, pos.xyz, pos.z);
	// Undocumented, with clamp
	F1.WriteSamplerFeedbackBias(TArray, S, pos.xyz, pos.w, mipclamp);

	// Test indexing
	F2[idx].WriteSamplerFeedback(Ts[idx], Ss[idx], pos.xy);
	F3[idx].WriteSamplerFeedback(TArrays[idx], Ss[idx], pos.xyz);

	// Test non-uniform indexing
	F2[NonUniformResourceIndex(idx)].WriteSamplerFeedback(Ts[NonUniformResourceIndex(idx)], Ss[NonUniformResourceIndex(idx)], pos.xy);
	F3[NonUniformResourceIndex(idx)].WriteSamplerFeedback(TArrays[NonUniformResourceIndex(idx)], Ss[NonUniformResourceIndex(idx)], pos.xyz);
}


================================================
FILE: shaders/sampler-feedback/sampler-feedback.sm66.frag
================================================
// We ignore the kind of sampler feedback used, so don't bother testing it exhaustively.
FeedbackTexture2D<SAMPLER_FEEDBACK_MIP_REGION_USED> F0 : register(u0);
FeedbackTexture2DArray<SAMPLER_FEEDBACK_MIN_MIP> F1 : register(u1);
FeedbackTexture2D<SAMPLER_FEEDBACK_MIP_REGION_USED> F2[] : register(u0, space1);
FeedbackTexture2DArray<SAMPLER_FEEDBACK_MIP_REGION_USED> F3[] : register(u0, space2);

Texture2D<float4> T : register(t0);
Texture2DArray<float4> TArray : register(t1);
Texture2D<float4> Ts[] : register(t0, space1);
Texture2DArray<float4> TArrays[] : register(t0, space2);

// Comparison samplers don't work. Not a huge deal since we can estimate footprint with non-comparison samplers just fine. :')
SamplerState S : register(s0);
SamplerState Ss[] : register(s0, space1);

void main(float4 pos : SV_Position, float2 grad_x : GRADX, float2 grad_y : GRADY, float mipclamp : CLAMP, uint idx : IDX)
{
	// Test all HLSL calls available.
	F0.WriteSamplerFeedback(T, S, pos.xy);
	// Undocumented, with clamp
	F0.WriteSamplerFeedback(T, S, pos.xy, mipclamp);
	F0.WriteSamplerFeedbackLevel(T, S, pos.xy, pos.z);
	F0.WriteSamplerFeedbackGrad(T, S, pos.xy, grad_x, grad_y);
	// Undocumented, with clamp
	F0.WriteSamplerFeedbackGrad(T, S, pos.xy, grad_x, grad_y, mipclamp);
	F0.WriteSamplerFeedbackBias(T, S, pos.xy, pos.z);
	// Undocumented, with clamp
	F0.WriteSamplerFeedbackBias(T, S, pos.xy, pos.z, mipclamp);

	// Test array versions.
	F1.WriteSamplerFeedback(TArray, S, pos.xyz);
	// Undocumented, with clamp
	F1.WriteSamplerFeedback(TArray, S, pos.xyz, mipclamp);
	F1.WriteSamplerFeedbackLevel(TArray, S, pos.xyz, pos.w);
	F1.WriteSamplerFeedbackGrad(TArray, S, pos.xyz, grad_x, grad_y);
	// Undocumented, with clamp
	F1.WriteSamplerFeedbackGrad(TArray, S, pos.xyz, grad_x, grad_y, mipclamp);
	F1.WriteSamplerFeedbackBias(TArray, S, pos.xyz, pos.z);
	// Undocumented, with clamp
	F1.WriteSamplerFeedbackBias(TArray, S, pos.xyz, pos.w, mipclamp);

	// Test indexing
	F2[idx].WriteSamplerFeedback(Ts[idx], Ss[idx], pos.xy);
	F3[idx].WriteSamplerFeedback(TArrays[idx], Ss[idx], pos.xyz);

	// Test non-uniform indexing
	F2[NonUniformResourceIndex(idx)].WriteSamplerFeedback(Ts[NonUniformResourceIndex(idx)], Ss[NonUniformResourceIndex(idx)], pos.xy);
	F3[NonUniformResourceIndex(idx)].WriteSamplerFeedback(TArrays[NonUniformResourceIndex(idx)], Ss[NonUniformResourceIndex(idx)], pos.xyz);

	// Test heap.
	FeedbackTexture2D<SAMPLER_FEEDBACK_MIN_MIP> FHeap = ResourceDescriptorHeap[idx];
	FHeap.WriteSamplerFeedback(T, S, pos.xy);
	FeedbackTexture2D<SAMPLER_FEEDBACK_MIN_MIP> FHeapNRUI = ResourceDescriptorHeap[NonUniformResourceIndex(idx)];
	FHeapNRUI.WriteSamplerFeedback(T, S, pos.xy);
}


================================================
FILE: shaders/semantics/clip-cull-distance.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float4 clip : SV_ClipDistance;
	float4 cull : SV_CullDistance;
};

VSOut main(float4 pos : POS, float4 clip : CLIP, float4 cull : CULL)
{
	VSOut vs;
	vs.pos = pos;
	vs.clip = clip;
	vs.cull = cull;
	return vs;
}


================================================
FILE: shaders/semantics/clip-cull.frag
================================================
struct PSIn
{
	float2 clip0 : SV_ClipDistance0;
	float clip1 : SV_ClipDistance1;
	float clip2 : SV_ClipDistance2;
	float2 cull0 : SV_CullDistance0;
	float cull1 : SV_CullDistance1;
};

float4 main(PSIn pin) : SV_Target
{
	float4 res = float4(pin.clip0, pin.clip1, pin.clip2);
	res += pin.cull0.xyxy;
	res += pin.cull1;
	return res;
}


================================================
FILE: shaders/semantics/clip-distance-cols.frag
================================================
struct VSOut
{
	float2 clip : SV_ClipDistance;
};

float4 main(VSOut vin) : SV_Target
{
	return vin.clip.xyyx;
}


================================================
FILE: shaders/semantics/clip-distance-cols.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float2 clip : SV_ClipDistance;
};

VSOut main(float4 pos : POS, float2 clip : CLIP)
{
	VSOut vs;
	vs.pos = pos;
	vs.clip[0] = clip.x;
	vs.clip[1] = clip.y;
	return vs;
}


================================================
FILE: shaders/semantics/clip-distance-flatten.frag
================================================
struct VSOut
{
	float2 clip[2] : SV_ClipDistance;
};

float4 main(VSOut vin) : SV_Target
{
	return float4(vin.clip[0], vin.clip[1]);
}


================================================
FILE: shaders/semantics/clip-distance-flatten.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float2 clip[2] : SV_ClipDistance;
};

VSOut main(float4 pos : POS, float2 clip : CLIP)
{
	VSOut vs;
	vs.pos = pos;
	vs.clip[0] = clip;
	vs.clip[1] = clip + 1.0;
	return vs;
}


================================================
FILE: shaders/semantics/clip-distance-rows.frag
================================================
struct VSOut
{
	float clip[2] : SV_ClipDistance;
};

float4 main(VSOut vin) : SV_Target
{
	return float4(vin.clip[0].xx, vin.clip[1].xx);
}


================================================
FILE: shaders/semantics/clip-distance-rows.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float clip[2] : SV_ClipDistance;
};

VSOut main(float4 pos : POS, float clip : CLIP)
{
	VSOut vs;
	vs.pos = pos;
	vs.clip[0] = clip;
	vs.clip[1] = clip + 1.0;
	return vs;
}


================================================
FILE: shaders/semantics/clip-distance-single.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float clip : SV_ClipDistance;
};

VSOut main(float4 pos : POS, float clip : CLIP)
{
	VSOut vs;
	vs.pos = pos;
	vs.clip = clip.x;
	return vs;
}


================================================
FILE: shaders/semantics/coverage.frag
================================================
struct PSOut
{
	float4 color : SV_Target;
	uint coverage : SV_Coverage;
};

PSOut main(uint coverage : SV_Coverage)
{
	PSOut pout;
	pout.color = 1.0.xxxx;
	pout.coverage = coverage & 3;
	return pout;
}


================================================
FILE: shaders/semantics/depth-greater-equal.frag
================================================
float main(float d : D) : SV_DepthGreaterEqual
{
	return d;
}


================================================
FILE: shaders/semantics/depth-less-equal.frag
================================================
float main(float d : D) : SV_DepthLessEqual
{
	return d;
}


================================================
FILE: shaders/semantics/depth.frag
================================================
float main(float d : D) : SV_Depth
{
	return d;
}


================================================
FILE: shaders/semantics/early-depth-stencil.frag
================================================
[earlydepthstencil]
float4 main() : SV_Target
{
	return 1.0.xxxx;
}


================================================
FILE: shaders/semantics/inner-coverage.noglsl.frag
================================================
float main(uint cov : SV_InnerCoverage) : SV_Target
{
	if (cov == 0)
		discard;
	return 1.0;
}


================================================
FILE: shaders/semantics/is-front-face.frag
================================================
float main(bool front : SV_IsFrontFace) : SV_Target
{
	return front ? 1.0 : 0.0;
}


================================================
FILE: shaders/semantics/position.frag
================================================
float main(float4 pos : SV_Position, nointerpolation uint index : INDEX) : SV_Target
{
	return pos[index];
}


================================================
FILE: shaders/semantics/primitive-id.frag
================================================
uint main(uint prim : SV_PrimitiveID) : SV_Target
{
	return prim;
}


================================================
FILE: shaders/semantics/primitive-id.geom
================================================
struct Output
{
	float4 a : SV_Position;
	uint prim : SV_PrimitiveID;
};

struct Inputs
{
	float4 a : A;
};

[maxvertexcount(1)]
void main(point Inputs points[1], inout PointStream<Output> o, uint prim : SV_PrimitiveID)
{
	Output res;
	res.a = points[0].a;
	res.prim = prim + 1;
	o.Append(res);
}


================================================
FILE: shaders/semantics/render-target-array-index.frag
================================================
uint main(uint layer : SV_RenderTargetArrayIndex) : SV_Target
{
	return layer;
}


================================================
FILE: shaders/semantics/render-target-array-index.geom
================================================
struct Output
{
	float4 a : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
};

struct Inputs
{
	float4 a : A;
	float4 b : B;
};

[maxvertexcount(2)]
void main(point Inputs points[1], inout PointStream<Output> o)
{
	Output res;
	res.a = points[0].a;
	res.layer = 0;
	o.Append(res);
	res.a = points[0].b;
	res.layer = 1;
	o.Append(res);
}


================================================
FILE: shaders/semantics/sample-rate-pos.frag
================================================
float4 main(sample float4 pos : SV_Position) : SV_Target
{
	return pos;
}


================================================
FILE: shaders/semantics/stencil-ref.frag
================================================
struct Out
{
	float4 v : SV_Target0;
	uint s : SV_StencilRef;
};

Out main(float v : V)
{
	Out o;
	o.v = v.xxxx;
	o.s = uint(v);
	return o;
}


================================================
FILE: shaders/semantics/sv-shading-rate.noglsl.frag
================================================
uint main(uint rate : SV_ShadingRate) : SV_Target
{
	return rate;
}


================================================
FILE: shaders/semantics/sv-shading-rate.noglsl.vert
================================================
struct Outs
{
	float4 pos : SV_Position;
	uint rate : SV_ShadingRate;
};

Outs main()
{
	Outs o;
	o.pos = 1.0.xxxx;
	o.rate = 0xa;
	return o;
}


================================================
FILE: shaders/semantics/view-id.frag
================================================
float4 main(float4 pos : POSITION, uint view : SV_ViewID) : SV_Target
{
	return pos + float(view);
}


================================================
FILE: shaders/semantics/view-id.vert
================================================
float4 main(float4 pos : POSITION, uint view : SV_ViewID) : SV_Position
{
	return pos + float(view);
}


================================================
FILE: shaders/semantics/viewport-array-index.frag
================================================
uint main(uint layer : SV_ViewportArrayIndex) : SV_Target
{
	return layer;
}


================================================
FILE: shaders/semantics/viewport-array-index.geom
================================================
struct Output
{
	float4 a : SV_Position;
	uint layer : SV_ViewportArrayIndex;
};

struct Inputs
{
	float4 a : A;
	float4 b : B;
};

[maxvertexcount(2)]
void main(point Inputs points[1], inout PointStream<Output> o)
{
	Output res;
	res.a = points[0].a;
	res.layer = 0;
	o.Append(res);
	res.a = points[0].b;
	res.layer = 1;
	o.Append(res);
}


================================================
FILE: shaders/stages/boolean-io.vert
================================================
struct Foo
{
	float4 pos : SV_Position;
	bool v : V;
};

Foo main(bool a : A)
{
	Foo foo;
	foo.pos = 1.0.xxxx;
	foo.v = true;
	return foo;
}


================================================
FILE: shaders/stages/callable-chain.rcall
================================================
struct Payload
{
	float p;
};

[shader("callable")]
void main(inout Payload p)
{
	CallShader(0, p);
}


================================================
FILE: shaders/stages/callable.rcall
================================================
struct Payload
{
	float p;
};

[shader("callable")]
void RayCallable(inout Payload p)
{
	p.p = 1.0;
}


================================================
FILE: shaders/stages/closesthit.rclosest
================================================
struct Payload
{
	float4 color;
};

[shader("closesthit")]
void RayClosest(inout Payload payload, BuiltInTriangleIntersectionAttributes attr)
{
	payload.color = attr.barycentrics.xyxy;
}


================================================
FILE: shaders/stages/domain-clip-cull.tese
================================================
struct Foo
{
	float3 a : SV_CullDistance;
	float b : SV_ClipDistance;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

struct VOut
{
	float4 pos : SV_Position;
	float clip : SV_ClipDistance;
};

[domain("tri")]
VOut main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation)
{
	VOut vout;
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z;
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	vout.pos = float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
	vout.clip = 5.0;
	return vout;
}


================================================
FILE: shaders/stages/domain-patch-input-integer-io.tese
================================================
struct Foo
{
	float a : A;
};

struct Patch
{
	int b : B;
	uint c : C;
	int4 bv : BV;
	uint4 cv : CV;
	int bvs[2] : BVS;
	uint cvs[2] : CVS;
	int4 bvss[2] : BVSS;
	uint4 cvss[2] : CVSS;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation) : SV_Position
{
	return float4(op[0].a, float(p.b), float(p.c), 1.0) +
		float4(p.bv) +
		float4(p.cv) +
		float(p.bvs[0]) +
		float(p.cvs[1]) +
		float4(p.bvss[0]) +
		float4(p.cvss[1]);
}


================================================
FILE: shaders/stages/domain.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z;
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/stages/extra_output.dual-source-blending.frag
================================================
float4 c0;
float4 c1;
float4 c2;

void main(out float4 o0 : SV_Target0, out float4 o1 : SV_Target1, out float4 o2 : SV_Target2)
{
	o0 = c0;
	o1 = c1;
	o2 = c2;
}


================================================
FILE: shaders/stages/extra_output_reordered.dual-source-blending.frag
================================================
float4 c0;
float4 c1;
float4 c2;

void main(out float4 o2 : SV_Target2, out float4 o0 : SV_Target0, out float4 o1 : SV_Target1)
{
	o0 = c0;
	o1 = c1;
	o2 = c2;
}


================================================
FILE: shaders/stages/geometry-clip-cull.geom
================================================
struct Inputs
{
	float4 a : SV_ClipDistance;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : SV_ClipDistance;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
void main(triangle Inputs input[3], inout TriangleStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
	res.a = input[1].a;
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-input-line.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
void main(line Inputs input[2], inout TriangleStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-input-lineadj.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
void main(lineadj Inputs input[4], inout TriangleStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[3].pos;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-input-point.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
void main(point Inputs input[1], inout TriangleStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[0].pos;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-input-triangle.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
void main(triangle Inputs input[3], inout TriangleStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-input-triangleadj.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
void main(triangleadj Inputs input[6], inout TriangleStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[5].pos;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-instancing.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(6)]
[instance(2)]
void main(triangle Inputs input[3], inout TriangleStream<Outputs> o, uint InstanceID : SV_GSInstanceID)
{
	Outputs res;
	res.a = input[InstanceID].a;
	res.pos = input[InstanceID ^ 1].pos;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	res.pos.x += 0.01;
	o.Append(res);
	o.RestartStrip();

	o.Append(res);
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-output-line.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(2)]
void main(triangle Inputs input[3], inout LineStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
	res.pos += 0.01;
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-output-point.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(1)]
void main(triangle Inputs input[3], inout PointStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
}


================================================
FILE: shaders/stages/geometry-streams.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs0
{
	float4 a : A;
	float4 pos : SV_Position;
};

struct Outputs1
{
	float4 b : B;
	float4 d[2] : D;
};

struct Outputs2
{
	float4 c : C;
	float4 e : E;
};

[maxvertexcount(3)]
void main(point Inputs input[1], inout PointStream<Outputs0> o0, inout PointStream<Outputs1> o1, inout PointStream<Outputs2> o2)
{
	Outputs0 res0;
	res0.a = input[0].a;
	res0.pos = input[0].pos;
	o0.Append(res0);

	Outputs1 res1;
	res1.b = input[0].a + 1.0;
	res1.d[0] = 1.0.xxxx;
	res1.d[1] = 3.0.xxxx;
	o1.Append(res1);

	Outputs2 res2;
	res2.c = input[0].a + 2.0;
	res2.e = res2.c + 1.0;
	o2.Append(res2);
}


================================================
FILE: shaders/stages/hull-arrays.tesc
================================================
struct VSControlPoint
{
	float value[3] : VSValue;
};

struct HSControlPoint
{
	float value[3] : HSValue;
};

struct PatchConstant
{
	float outer[3] : SV_TessFactor;
	float inner[1] : SV_InsideTessFactor;
	float patch[2] : PATCH;
};

[domain("tri")]
[partitioning("integer")]
[outputtopology("triangle_ccw")]
[outputcontrolpoints(2)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 5> ip)
{
	HSControlPoint cp;
	cp.value[0] = ip[0].value[0] + ip[1].value[1] + ip[2].value[2];
	cp.value[1] = cp.value[0] + 1.0;
	cp.value[2] = cp.value[0] + 2.0;
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 2> op, InputPatch<VSControlPoint, 5> ip)
{
	PatchConstant pc;
	pc.inner[0] = op[0].value[1] + op[1].value[2];
	pc.outer[0] = ip[0].value[2];
	pc.outer[1] = ip[1].value[1];
	pc.outer[2] = ip[2].value[0];
	pc.patch[0] = ip[3].value[2];
	pc.patch[1] = ip[4].value[1];
	return pc;
}


================================================
FILE: shaders/stages/hull-clip-cull.tesc
================================================
struct VSControlPoint
{
	float2 clip_value : SV_ClipDistance;
	float cull_value[2] : SV_CullDistance;
};

struct HSControlPoint
{
	float2 value : HSValue;
	float2 clip_value : SV_ClipDistance;
	float cull_value[2] : SV_CullDistance;
};

struct PatchConstant
{
	float outer[4] : SV_TessFactor;
	float inner[2] : SV_InsideTessFactor;
};

[domain("quad")]
[partitioning("integer")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(4)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 4> ip, uint control : SV_OutputControlPointID)
{
	HSControlPoint cp;
	cp.value = ip[control].clip_value + ip[control].cull_value[0] + ip[control].cull_value[1];
	cp.clip_value = float2(1, 2);
	cp.cull_value[0] = 3.0;
	cp.cull_value[1] = 4.0;
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 4> op, InputPatch<VSControlPoint, 4> ip)
{
	PatchConstant pc = (PatchConstant)0;
	return pc;
}


================================================
FILE: shaders/stages/hull-patch-output-integer-io.tesc
================================================
struct Foo
{
	int a : A;
	uint b : B;
};

struct PatchConstant
{
	float outer[3] : SV_TessFactor;
	float inner[1] : SV_InsideTessFactor;
	int b : B;
	uint c : C;
	int4 bv : BV;
	uint4 cv : CV;
	int bvs[2] : BVS;
	uint cvs[2] : CVS;
	int4 bvss[2] : BVSS;
	uint4 cvss[2] : CVSS;
};

struct HSControlPoint
{
	float a : A;
};

[domain("tri")]
[partitioning("integer")]
[outputtopology("triangle_ccw")]
[outputcontrolpoints(3)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<Foo, 2> ip)
{
	HSControlPoint cp;
	cp.a = float(ip[0].a) + float(ip[1].b);
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 3> op, InputPatch<Foo, 2> ip)
{
	PatchConstant pc;
	pc.inner[0] = op[0].a + op[1].a;
	pc.outer[0] = float(ip[0].a);
	pc.outer[1] = float(ip[1].b);
	pc.outer[2] = 4.0;

	pc.b = 10;
	pc.c = 20;
	pc.bv = int4(1, 2, 3, 4);
	pc.cv = uint4(5, 6, 7, 8);
	pc.bvs[0] = 50;
	pc.bvs[1] = 60;
	pc.cvs[0] = 70;
	pc.cvs[1] = 80;
	pc.bvss[0] = int4(50, 60, 70, 80);
	pc.bvss[1] = int4(51, 52, 53, 54);
	pc.cvss[0] = int4(70, 71, 72, 73);
	pc.cvss[1] = uint4(80, 81, 82, 83);
	return pc;
}


================================================
FILE: shaders/stages/hull-single-cp.tesc
================================================
struct VSControlPoint
{
	float value : VSValue;
};

struct HSControlPoint
{
	float value : HSValue;
};

struct PatchConstant
{
	float outer[3] : SV_TessFactor;
	float inner[1] : SV_InsideTessFactor;
	float patch : PATCH;
};

[domain("tri")]
[partitioning("integer")]
[outputtopology("triangle_ccw")]
[outputcontrolpoints(1)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 5> ip)
{
	HSControlPoint cp;
	cp.value = ip[0].value + ip[1].value + ip[2].value;
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 1> op, InputPatch<VSControlPoint, 5> ip)
{
	PatchConstant pc;
	pc.inner[0] = op[0].value;
	pc.outer[0] = ip[0].value;
	pc.outer[1] = ip[1].value;
	pc.outer[2] = ip[2].value;
	pc.patch = ip[3].value;
	return pc;
}


================================================
FILE: shaders/stages/hull.tesc
================================================
struct VSControlPoint
{
	float value : VSValue;
};

struct HSControlPoint
{
	float value : HSValue;
};

struct PatchConstant
{
	float outer[4] : SV_TessFactor;
	float inner[2] : SV_InsideTessFactor;
	float patch : PATCH;
};

cbuffer Cbuf : register(b0)
{
	float global_constant;
};

[domain("quad")]
[partitioning("integer")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(4)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 5> ip, uint control : SV_OutputControlPointID)
{
	// Test that alloca gets directly appropriately.
	float4 v;
	v.x = ip[0].value;
	v.y = ip[1].value;
	v.z = ip[2].value;
	v.w = ip[3].value;
	v[int(ip[0].value)] += 40.0;

	HSControlPoint cp;
	cp.value = ip[0].value + ip[1].value + ip[2].value + v[3] + float(control);
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 4> op, InputPatch<VSControlPoint, 5> ip)
{
	// Test that alloca gets directly appropriately.
	float4 v;
	v.x = ip[0].value;
	v.y = ip[1].value;
	v.z = ip[2].value;
	v.w = ip[3].value;
	v[int(ip[0].value)] += 40.0;

	PatchConstant pc;
	pc.inner[0] = op[0].value;
	pc.inner[1] = op[1].value;
	pc.outer[0] = ip[0].value;
	pc.outer[1] = ip[1].value;
	pc.outer[2] = ip[2].value;
	pc.outer[3] = ip[0].value + op[0].value;
	pc.patch = v[3] + global_constant;
	return pc;
}


================================================
FILE: shaders/stages/mesh-basic-line.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float4 b : B;
};

struct PrimOut
{
	bool cull : SV_CullPrimitive;
	uint layer : SV_RenderTargetArrayIndex;
	uint primid : SV_PrimitiveID;
	float4 c : C;
};

groupshared float foo[64];

struct Payload
{
	float p;
};

[numthreads(2, 3, 4)]
[outputtopology("line")]
void main(uint tid : SV_GroupIndex,
		in payload Payload p,
		out vertices VOut vout[24],
		out indices uint2 ind[8],
		out primitives PrimOut prim[8])
{
	foo[tid] = float(tid);
	GroupMemoryBarrierWithGroupSync();
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = foo[tid].xxxx;
	vout[tid].b = foo[tid ^ 1].xxxx + p.p;
	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint2(0, 1);
		prim[tid].cull = bool(tid & 1);
		prim[tid].primid = tid;
		prim[tid].layer = tid;
		prim[tid].c = foo[tid ^ 2].xxxx;
	}
}


================================================
FILE: shaders/stages/mesh-basic.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float4 b : B;
};

struct PrimOut
{
	bool cull : SV_CullPrimitive;
	uint layer : SV_RenderTargetArrayIndex;
	uint primid : SV_PrimitiveID;
	float4 c : C;
};

groupshared float foo[64];

struct Payload
{
	float p;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		in payload Payload p,
		out vertices VOut vout[24],
		out indices uint3 ind[8],
		out primitives PrimOut prim[8])
{
	foo[tid] = float(tid);
	GroupMemoryBarrierWithGroupSync();
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = foo[tid].xxxx;
	vout[tid].b = foo[tid ^ 1].xxxx + p.p;
	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
		prim[tid].cull = bool(tid & 1);
		prim[tid].primid = tid;
		prim[tid].layer = tid;
		prim[tid].c = foo[tid ^ 2].xxxx;
	}
}


================================================
FILE: shaders/stages/mesh-clip-cull.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float4 clip : SV_ClipDistance;
	float4 cull : SV_CullDistance;
};

struct Payload
{
	float4 pos[24];
	float4 clip[24];
	float4 cull[24];
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		in payload Payload p,
		out vertices VOut vout[24],
		out indices uint3 ind[8])
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = p.pos[tid];
	vout[tid].clip = p.clip[tid];
	vout[tid].cull = p.cull[tid];
	if (tid < 8)
		ind[tid] = 3 * tid + uint3(0, 1, 2);
}


================================================
FILE: shaders/stages/raygen-complex-storage-class.rgen
================================================
struct Inner
{
	float4 color;
};

struct Payload
{
	Inner a;
	Inner b;
};

RaytracingAccelerationStructure AS : register(t30, space40);
RWTexture2D<float4> IMG : register(u10, space20);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload p;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p);
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p.a);
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, p.b);

	CallShader(0, p);
	CallShader(0, p.a);
	CallShader(0, p.b);

	IMG[int2(0, 0)] = p.a.color + p.b.color;
}


================================================
FILE: shaders/stages/raygen-skip-inactive-resources.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

RaytracingAccelerationStructure AS : register(t30, space40);
RWTexture2D<float4> IMG : register(u10, space20);

SamplerState S : register(s0);
Texture2D<float4> T : register(t0);

[shader("miss")]
void RayMiss(inout Payload payload)
{
	payload.color = T.SampleLevel(S, 0.5.xx, 0.0);
}

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/stages/raygen.rgen
================================================
struct Payload
{
	float4 color;
};

struct Payload1
{
	float color;
};

RaytracingAccelerationStructure AS : register(t30, space40);
RWTexture2D<float4> IMG : register(u10, space20);

[shader("raygeneration")]
void RayGen()
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;

	Payload payload0;
	Payload1 payload1;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload0);
	TraceRay(AS, RAY_FLAG_NONE, 1, 0, 0, 0, ray, payload1);

	IMG[int2(0, 0)] = payload0.color + payload1.color;
}


================================================
FILE: shaders/stages/raymiss-chain.rmiss
================================================
struct Payload
{
	float4 color;
};

RaytracingAccelerationStructure AS : register(t0);

[shader("miss")]
void RayMiss(inout Payload payload)
{
	RayDesc ray;
	ray.Origin = float3(1, 2, 3);
	ray.Direction = float3(0, 0, 1);
	ray.TMin = 1.0;
	ray.TMax = 4.0;
	TraceRay(AS, RAY_FLAG_NONE, 0, 0, 0, 0, ray, payload);
}


================================================
FILE: shaders/stages/raymiss.rmiss
================================================
struct Payload
{
	float4 color;
};

[shader("miss")]
void RayMiss(inout Payload payload)
{
	payload.color = float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/stages/simple.dual-source-blending.frag
================================================
float4 c0;
float4 c1;

void main(out float4 o0 : SV_Target0, out float4 o1 : SV_Target1)
{
	o0 = c0;
	o1 = c1;
}


================================================
FILE: shaders/stages/simple.invariant.vert
================================================
float4 main(float4 a : A, float4 b : B, float4 c : C) : SV_Position
{
	return mad(a, b, c);
}


================================================
FILE: shaders/stages/stage-input-output.16bit-io.frag
================================================
struct StageIn
{
	half2 a : A;
	int16_t2 b : B;
	uint16_t c : C;
};

struct StageOut
{
	half2 a : SV_Target0;
	int16_t2 b : SV_Target1;
	uint16_t c : SV_Target2;
};

StageOut main(StageIn inputs)
{
	StageOut outputs;
	outputs.a = inputs.a * half(8.0);
	outputs.b = inputs.b * int16_t(-8);
	outputs.c = inputs.c * uint16_t(4);
	return outputs;
}


================================================
FILE: shaders/stages/stage-input-output.frag
================================================
struct StageIn
{
	half2 a : A;
	int16_t2 b : B;
	uint16_t c : C;
};

struct StageOut
{
	half2 a : SV_Target0;
	int16_t2 b : SV_Target1;
	uint16_t c : SV_Target2;
};

StageOut main(StageIn inputs)
{
	StageOut outputs;
	outputs.a = inputs.a * half(8.0);
	outputs.b = inputs.b * int16_t(-8);
	outputs.c = inputs.c * uint16_t(4);
	return outputs;
}


================================================
FILE: shaders/stages/stream-out.stream-out.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float4 s0 : StreamOut0;
	float4 s1 : StreamOut1;
};

VSOut main()
{
	VSOut vout;
	vout.pos = 2.0.xxxx;
	vout.s0 = 4.0.xxxx;
	vout.s1 = 6.0.xxxx;
	return vout;
}


================================================
FILE: shaders/stages/swizzle.rt-swizzle.frag
================================================
struct PixelOut
{
	float4 a : SV_Target0;
	float4 b : SV_Target1;
};

PixelOut main(float4 v : TEXCOORD)
{
	PixelOut pout;
	pout.a = v;
	pout.b = v;
	return pout;
}


================================================
FILE: shaders/stages/task-basic.task
================================================
struct Payload
{
	float4 p[64];
};

groupshared Payload p;

[numthreads(64, 1, 1)]
void main(uint tid : SV_GroupIndex)
{
	p.p[tid] = float(tid).xxxx;
	DispatchMesh(2, 3, 4, p);
}


================================================
FILE: shaders/stages/vertex-array-input.vert
================================================
struct VSIn
{
	float4 v4[4] : ATTR0;
	float v1[4] : ATTR4;
};

float4 main(VSIn v) : SV_Position
{
	return (v.v4[0] + v.v4[3]) + (v.v1[1] + v.v1[2]);
};


================================================
FILE: shaders/stages/vertex-array-output.vert
================================================
struct VSOut
{
	float4 pos : SV_Position;
	float4 v4[4] : ATTR0;
	float v1[4] : ATTR4;
};

VSOut main(float4 v : POSITION)
{
	VSOut vout;
	vout.pos = v;
	vout.v4[0] = v;
	vout.v4[1] = v + 1.0;
	vout.v4[2] = v + 2.0;
	vout.v4[3] = v + 3.0;
	vout.v1[0] = v.x;
	vout.v1[1] = v.y;
	vout.v1[2] = v.z;
	vout.v1[3] = v.w;
	return vout;
}


================================================
FILE: shaders/stages/vertex-input-remapping.vert
================================================
float4 main(float4 a : ATTR2, float b : ATTR4) : SV_Position
{
	return a + b;
}


================================================
FILE: shaders/vectorization/copy-byte-address.ssbo.comp
================================================
RWByteAddressBuffer RO : register(u0);
RWByteAddressBuffer RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint lid : SV_GroupIndex)
{
	uint addr = 96 * (thr + lid) + 96;
	RW.Store<double4>(addr, RO.Load<double4>(addr));
	RW.Store<double3>(addr, RO.Load<double3>(addr));
	RW.Store<double2>(addr, RO.Load<double2>(addr));
	RW.Store<double>(addr, RO.Load<double>(addr));

	RW.Store<float4>(addr, RO.Load<float4>(addr));
	RW.Store<float3>(addr, RO.Load<float3>(addr));
	RW.Store<float2>(addr, RO.Load<float2>(addr));
	RW.Store<float>(addr, RO.Load<float>(addr));

	RW.Store<half4>(addr, RO.Load<half4>(addr));
	RW.Store<half3>(addr, RO.Load<half3>(addr));
	RW.Store<half2>(addr, RO.Load<half2>(addr));
	RW.Store<half>(addr, RO.Load<half>(addr));

	addr = 96 * 2;
	RW.Store<double4>(addr, RO.Load<double4>(addr));
	RW.Store<float4>(addr, RO.Load<float4>(addr));
	RW.Store<half4>(addr, RO.Load<half4>(addr));

	addr = 96 * 2 * thr;
	RW.Store<double4>(addr, RO.Load<double4>(addr));
	RW.Store<float4>(addr, RO.Load<float4>(addr));
	RW.Store<half4>(addr, RO.Load<half4>(addr));
}


================================================
FILE: shaders/vectorization/copy-composite-2.ssbo.comp
================================================
RWStructuredBuffer<float4[4]> RO : register(u0);
RWStructuredBuffer<float4[4]> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID, uint lid : SV_GroupIndex)
{
	RW[thr][lid + 0] = RO[thr][lid + 1];
	RW[thr][lid + 1] = RO[thr][lid];
	RW[thr][2 * lid + 1] = RO[thr][3 * lid - 1];
}


================================================
FILE: shaders/vectorization/copy-composite.ssbo.comp
================================================
struct Foo
{
	float3 a;
	float3 b;
};
RWStructuredBuffer<Foo> RO : register(u0);
RWStructuredBuffer<Foo> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr].a = RO[thr].a;
	RW[thr].b = RO[thr].b;
}


================================================
FILE: shaders/vectorization/copy-composite.ssbo.ssbo-align.bindless.comp
================================================
struct Foo
{
	float3 a;
	float3 b;
};
RWStructuredBuffer<Foo> RO : register(u0);
RWStructuredBuffer<Foo> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr].a = RO[thr].a;
	RW[thr].b = RO[thr].b;
}


================================================
FILE: shaders/vectorization/copy-double2.ssbo.comp
================================================
RWStructuredBuffer<double2> RO : register(u0);
RWStructuredBuffer<double2> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-double2.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<double2> RO : register(u0);
RWStructuredBuffer<double2> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-double3.ssbo.comp
================================================
RWStructuredBuffer<double3> RO : register(u0);
RWStructuredBuffer<double3> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-double3.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<double3> RO : register(u0);
RWStructuredBuffer<double3> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-double4.ssbo.comp
================================================
RWStructuredBuffer<double4> RO : register(u0);
RWStructuredBuffer<double4> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-float2.ssbo.comp
================================================
RWStructuredBuffer<float2> RO : register(u0);
RWStructuredBuffer<float2> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-float2.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<float2> RO : register(u0);
RWStructuredBuffer<float2> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-float2x2.ssbo.comp
================================================
RWStructuredBuffer<float2x2> RO : register(u0);
RWStructuredBuffer<float2x2> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-float3.ssbo.comp
================================================
RWStructuredBuffer<float3> RO : register(u0);
RWStructuredBuffer<float3> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-float3.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<float3> RO : register(u0);
RWStructuredBuffer<float3> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-float4x4.ssbo.comp
================================================
RWStructuredBuffer<float4x4> RO : register(u0);
RWStructuredBuffer<float4x4> RW : register(u1);

[numthreads(128, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-half2.ssbo.comp
================================================
RWStructuredBuffer<half2> RO : register(u0);
RWStructuredBuffer<half2> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<half2> RO : register(u0);
RWStructuredBuffer<half2> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-half3.ssbo.comp
================================================
RWStructuredBuffer<half3> RO : register(u0);
RWStructuredBuffer<half3> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<half3> RO : register(u0);
RWStructuredBuffer<half3> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-half4.ssbo.comp
================================================
RWStructuredBuffer<half4> RO : register(u0);
RWStructuredBuffer<half4> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp
================================================
RWStructuredBuffer<half4> RO : register(u0);
RWStructuredBuffer<half4> RW : register(u1);

[numthreads(2, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
	RW[thr] = RO[thr];
}


================================================
FILE: shaders/view-instancing/geom/basic.view-instancing.last-pre-raster.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(1)]
void main(triangle Inputs input[3], inout PointStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
}


================================================
FILE: shaders/view-instancing/geom/basic.view-instancing.last-pre-raster.view-instance-mask.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(1)]
void main(triangle Inputs input[3], inout PointStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
}


================================================
FILE: shaders/view-instancing/geom/basic.view-instancing.view-instancing-multiview.last-pre-raster.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(2)]
void main(triangle Inputs input[3], inout PointStream<Outputs> o, uint vid : SV_ViewID)
{
	Outputs res;
	res.a = input[0].a + float(vid);
	res.pos = input[1].pos;
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/view-instancing/geom/basic.view-instancing.view-instancing-multiview.view-instancing-viewport-offset.last-pre-raster.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(2)]
void main(triangle Inputs input[3], inout PointStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/view-instancing/geom/basic.view-instancing.view-instancing-viewport-offset.last-pre-raster.geom
================================================
struct Inputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

struct Outputs
{
	float4 a : TEXCOORD;
	float4 pos : SV_Position;
};

[maxvertexcount(2)]
void main(triangle Inputs input[3], inout PointStream<Outputs> o)
{
	Outputs res;
	res.a = input[0].a;
	res.pos = input[1].pos;
	o.Append(res);
	o.Append(res);
}


================================================
FILE: shaders/view-instancing/mesh/basic-export-viewport-layer.view-instancing.last-pre-raster.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

struct PrimOut
{
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8], out primitives PrimOut prims[8], uint vid : SV_ViewID)
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
		prims[tid].layer = vid;
		prims[tid].vp = vid + 1;
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic-export-viewport-layer.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

struct PrimOut
{
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8], out primitives PrimOut prims[8], uint vid : SV_ViewID)
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
		prims[tid].layer = vid;
		prims[tid].vp = vid + 1;
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic-few-thread.view-instancing.last-pre-raster.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

[numthreads(2, 3, 1)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8])
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic-many-thread.view-instancing.last-pre-raster.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

[numthreads(2, 3, 6)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8])
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic.view-instancing.last-pre-raster.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8])
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic.view-instancing.last-pre-raster.view-instance-mask.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8])
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8])
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
	}
}


================================================
FILE: shaders/view-instancing/mesh/basic.view-instancing.mesh
================================================
struct VOut
{
	float4 pos : SV_Position;
	float b : B;
};

[numthreads(2, 3, 4)]
[outputtopology("triangle")]
void main(uint tid : SV_GroupIndex,
		out vertices VOut vout[24],
		out indices uint3 ind[8], uint vid : SV_ViewID)
{
	SetMeshOutputCounts(24, 8);
	vout[tid].pos = float(tid + vid).xxxx;
	vout[tid].b = float(tid ^ 1);

	if (tid < 8)
	{
		ind[tid] = 3 * tid + uint3(0, 1, 2);
	}
}


================================================
FILE: shaders/view-instancing/tesc/basic.view-instancing.tesc
================================================
struct VSControlPoint
{
	float value : VSValue;
};

struct HSControlPoint
{
	float value : HSValue;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

struct PatchConstant
{
	float outer[4] : SV_TessFactor;
	float inner[2] : SV_InsideTessFactor;
};

[domain("quad")]
[partitioning("integer")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(4)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 1> ip, uint control : SV_OutputControlPointID, uint vid : SV_ViewID)
{
	HSControlPoint cp;
	cp.value = float(control + vid);
	cp.layer = vid;
	cp.vp = vid;
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 4> op, InputPatch<VSControlPoint, 1> ip)
{
	PatchConstant pc;
	pc.inner[0] = 1.0;
	pc.inner[1] = 1.0;
	pc.outer[0] = 1.0;
	pc.outer[1] = 1.0;
	pc.outer[2] = 1.0;
	pc.outer[3] = 1.0;
	return pc;
}


================================================
FILE: shaders/view-instancing/tesc/basic.view-instancing.view-instancing-multiview.tesc
================================================
struct VSControlPoint
{
	float value : VSValue;
};

struct HSControlPoint
{
	float value : HSValue;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

struct PatchConstant
{
	float outer[4] : SV_TessFactor;
	float inner[2] : SV_InsideTessFactor;
};

[domain("quad")]
[partitioning("integer")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(4)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 1> ip, uint control : SV_OutputControlPointID, uint vid : SV_ViewID)
{
	HSControlPoint cp;
	cp.value = float(control + vid);
	cp.layer = vid;
	cp.vp = vid;
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 4> op, InputPatch<VSControlPoint, 1> ip)
{
	PatchConstant pc;
	pc.inner[0] = 1.0;
	pc.inner[1] = 1.0;
	pc.outer[0] = 1.0;
	pc.outer[1] = 1.0;
	pc.outer[2] = 1.0;
	pc.outer[3] = 1.0;
	return pc;
}


================================================
FILE: shaders/view-instancing/tese/domain-export-layer-viewport.view-instancing.view-instancing-multiview.last-pre-raster.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

[domain("tri")]
Output main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID)
{
	Output o;
	o.pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	o.pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	o.layer = vid;
	o.vp = vid + 1;
	return o;
}


================================================
FILE: shaders/view-instancing/tese/domain-export-layer-viewport.view-instancing.view-instancing-multiview.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

[domain("tri")]
Output main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID)
{
	Output o;
	o.pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	o.pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	o.layer = vid;
	o.vp = vid + 1;
	return o;
}


================================================
FILE: shaders/view-instancing/tese/domain.view-instancing.last-pre-raster.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/view-instancing/tese/domain.view-instancing.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/view-instancing/tese/domain.view-instancing.view-instancing-multiview.last-pre-raster.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/view-instancing/tese/domain.view-instancing.view-instancing-multiview.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/view-instancing/tese/domain.view-instancing.view-instancing-viewport-offset.last-pre-raster.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/view-instancing/tese/domain.view-instancing.view-instancing-viewport-offset.tese
================================================
struct Foo
{
	float a[3] : A;
	float b : B;
};

struct Patch
{
	float c[3] : C;
	float d : D;
};

[domain("tri")]
float4 main(OutputPatch<Foo, 3> op, Patch p, float3 Coord : SV_DomainLocation, uint vid : SV_ViewID) : SV_Position
{
	float pos = op[0].b * Coord.x + op[1].b * Coord.y + op[2].b * Coord.z + float(vid);
	pos += op[0].a[2] * Coord.x + op[1].a[1] * Coord.y + op[2].a[0] * Coord.z;
	return float4(pos, p.c[0] + p.c[1], p.c[2], p.d);
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.last-pre-raster.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.view-instancing-multiview.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer-viewport.view-instancing-viewport-offset.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer.last-pre-raster.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer.view-instancing-multiview.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-layer.view-instancing-viewport-offset.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint layer : SV_RenderTargetArrayIndex;
};

Output main(uint vid : SV_ViewID, uint layer : LAYER)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.layer = layer;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-viewport.last-pre-raster.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-viewport.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-viewport.view-instancing-multiview.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.export-viewport.view-instancing-viewport-offset.vert
================================================
struct Output
{
	float4 pos : SV_Position;
	uint vp : SV_ViewportArrayIndex;
};

Output main(uint vid : SV_ViewID, uint vp : VP)
{
	Output o;
	o.pos = float(vid).xxxx;
	o.vp = vp;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.last-pre-raster.vert
================================================
struct Output
{
	float4 pos : SV_Position;
};

Output main(uint vid : SV_ViewID)
{
	Output o;
	o.pos = float(vid).xxxx;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.vert
================================================
struct Output
{
	float4 pos : SV_Position;
};

Output main(uint vid : SV_ViewID)
{
	Output o;
	o.pos = float(vid).xxxx;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.view-instance-mask.last-pre-raster.vert
================================================
struct Output
{
	float4 pos : SV_Position;
};

Output main(uint vid : SV_ViewID)
{
	Output o;
	o.pos = float(vid).xxxx;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.view-instance-mask.vert
================================================
struct Output
{
	float4 pos : SV_Position;
};

Output main(uint vid : SV_ViewID)
{
	Output o;
	o.pos = float(vid).xxxx;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.view-instancing-multiview.vert
================================================
struct Output
{
	float4 pos : SV_Position;
};

Output main(uint vid : SV_ViewID)
{
	Output o;
	o.pos = float(vid).xxxx;
	return o;
}


================================================
FILE: shaders/view-instancing/vert/basic.view-instancing.view-instancing-viewport-offset.vert
================================================
struct Output
{
	float4 pos : SV_Position;
};

Output main(uint vid : SV_ViewID)
{
	Output o;
	o.pos = float(vid).xxxx;
	return o;
}


================================================
FILE: shaders/vkmm/coopmat.sm66.ssbo.vkmm.comp
================================================
RWByteAddressBuffer Outputs : register(u0);
ByteAddressBuffer Inputs : register(t0);
RWByteAddressBuffer MAGIC : register(u0, space2147420894);

#include "wmma_ags.h"

[numthreads(32, 1, 1)]
[WaveSize(32)]
void main(uint thr : SV_GroupIndex, uint gid : SV_GroupID)
{
	WMMA_Type TypeA = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_A, WaveMatrixShape_16X16, false);
	WMMA_Type TypeB = WMMA_MakeType(WaveMatrixDataFormat_FP8, WaveMatrixType_B, WaveMatrixShape_16X16, false);
	WMMA_Type TypeC = WMMA_MakeType(WaveMatrixDataFormat_F32, WaveMatrixType_Accumulator, WaveMatrixShape_16X16, false);

	WMMA_Matrix A = WMMA_Load(TypeA, Inputs, 0, 16);
	WMMA_Matrix B = WMMA_Load(TypeB, Inputs, 256, 16);
	WMMA_Matrix C = WMMA_Load(TypeC, Inputs, 512, 64);

	C = WMMA_MatMulAcc(WMMA_F32_16X16X16_FP8_FP8, A, B, C);
	WMMA_Store(TypeC, Outputs, 0, 64, C);
}


================================================
FILE: shaders/vkmm/cross_group_sharing.vkmm.node.inline-ubo.comp
================================================
RWStructuredBuffer<uint> RWBuf0 : register(u0);

struct [NodeTrackRWInputSharing] SharedPayload
{
	uint grid : SV_DispatchGrid;
	uint num_atomics;
};

[Shader("node")]
[NodeLaunch("broadcasting")]
[NumThreads(64, 1, 1)]
[NodeMaxDispatchGrid(1024, 1, 1)]
[NodeID("Broadcast", 0)]
void main(globallycoherent RWDispatchNodeInputRecord<SharedPayload> payload, uint thr : SV_GroupIndex)
{
	uint o;
	InterlockedAdd(payload.Get().num_atomics, 1, o);
	payload.Get().num_atomics += 3;
	Barrier(payload, GROUP_SYNC | GROUP_SCOPE | DEVICE_SCOPE);
	// Spec seems to imply we need to have a barrier outselves. WARP fails if we don't.
	if (payload.FinishedCrossGroupSharing())
	{
		InterlockedAdd(RWBuf0[thr], payload.Get().num_atomics, o);
	}
}


================================================
FILE: shaders/vkmm/descriptor_qa.bindless.descriptor-qa.vkmm.comp
================================================
struct C { float v; };
ConstantBuffer<C> c : register(b0, space0);
ConstantBuffer<C> carr[2] : register(b1, space0);

Texture2D<float> tex : register(t0, space0);
Texture2D<float> texarr[2] : register(t1, space0);
Buffer<float> typedbuf : register(t3, space0);
Buffer<float> typedbufarr[2] : register(t4, space0);
StructuredBuffer<float> buf : register(t6, space0);
StructuredBuffer<float> bufarr[2] : register(t7, space0);
ByteAddressBuffer rawbuf : register(t9, space0);
ByteAddressBuffer rawbufarr[2] : register(t10, space0);

RWStructuredBuffer<float> rwbuf : register(u0, space0);
RWStructuredBuffer<float> rwbufarr[2] : register(u1, space0);
RWByteAddressBuffer rwrawbuf : register(u3, space0);
RWByteAddressBuffer rwrawbufarr[2] : register(u4, space0);
RWTexture2D<float> rwtex : register(u6, space0);
RWTexture2D<float> rwtexarr[2] : register(u7, space0);
RWBuffer<float> rwtypedbuf : register(u9, space0);
RWBuffer<float> rwtypedbufarr[2] : register(u10, space0);

SamplerState samp : register(s0, space0);

[numthreads(1, 1, 1)]
void main(uint index : SV_DispatchThreadID)
{
	float value = 0.0;
	if (index > 2)
		value += c.v;
	if (index > 3)
		value += carr[index].v;
	if (index > 4)
		value += tex.SampleLevel(samp, float2(0.5, 0.5), 0.0);
	if (index > 5)
		value += texarr[index & 1][int2(0, index)];
	if (index > 6)
		value += typedbuf[index];
	if (index > 7)
		value += typedbufarr[index & 1][index];
	if (index > 8)
		value += buf[index];
	if (index > 9)
		value += bufarr[index & 1][index];
	if (index > 10)
		value += asfloat(rawbuf.Load(4 * index));
	if (index > 11)
		value += asfloat(rawbufarr[index & 1].Load(index));

	if (index > 1)
		rwbuf[index] = value;
	if (index > 30)
		rwbufarr[index & 1][index] = value;
	if (index > 40)
		rwrawbuf.Store(4 * index, asuint(value));
	if (index > 50)
		rwrawbufarr[index & 1].Store(4 * index, asuint(value));
	if (index > 80)
		rwtex[int2(index, 0)] = value;
	if (index > 90)
		rwtexarr[index & 1][int2(0, index)] = value;

	rwbuf.IncrementCounter();
}


================================================
FILE: shaders/vkmm/groupshared.vkmm.comp
================================================
groupshared uint blah[64];
RWStructuredBuffer<uint> B;

[numthreads(64, 1, 1)]
void main(uint thr : SV_GroupIndex)
{
	blah[thr] = thr;
	GroupMemoryBarrierWithGroupSync();
	B[thr] = blah[thr ^ 3];
}


================================================
FILE: shaders/vkmm/hull.vkmm.tesc
================================================
struct VSControlPoint
{
	float value : VSValue;
};

struct HSControlPoint
{
	float value : HSValue;
};

struct PatchConstant
{
	float outer[4] : SV_TessFactor;
	float inner[2] : SV_InsideTessFactor;
	float patch : PATCH;
};

cbuffer Cbuf : register(b0)
{
	float global_constant;
};

[domain("quad")]
[partitioning("integer")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(4)]
[patchconstantfunc("main_patch")]
HSControlPoint main(InputPatch<VSControlPoint, 5> ip, uint control : SV_OutputControlPointID)
{
	// Test that alloca gets directly appropriately.
	float4 v;
	v.x = ip[0].value;
	v.y = ip[1].value;
	v.z = ip[2].value;
	v.w = ip[3].value;
	v[int(ip[0].value)] += 40.0;

	HSControlPoint cp;
	cp.value = ip[0].value + ip[1].value + ip[2].value + v[3] + float(control);
	return cp;
}

PatchConstant main_patch(OutputPatch<HSControlPoint, 4> op, InputPatch<VSControlPoint, 5> ip)
{
	// Test that alloca gets directly appropriately.
	float4 v;
	v.x = ip[0].value;
	v.y = ip[1].value;
	v.z = ip[2].value;
	v.w = ip[3].value;
	v[int(ip[0].value)] += 40.0;

	PatchConstant pc;
	pc.inner[0] = op[0].value;
	pc.inner[1] = op[1].value;
	pc.outer[0] = ip[0].value;
	pc.outer[1] = ip[1].value;
	pc.outer[2] = ip[2].value;
	pc.outer[3] = ip[0].value + op[0].value;
	pc.patch = v[3] + global_constant;
	return pc;
}


================================================
FILE: shaders/vkmm/image-load-store.vkmm.comp
================================================
globallycoherent RWTexture2D<float> RWImgCoherent : register(u0);
RWTexture2D<float> RWImg : register(u1);
Texture2D<float> Img : register(t0);

[numthreads(8, 8, 1)]
void main(uint2 thr : SV_DispatchThreadID)
{
	RWImgCoherent[thr] += 1.0;
	RWImg[thr] += Img.Load(int3(thr, 0));
}


================================================
FILE: shaders/vkmm/image-load-store.vkmm.sm66.comp
================================================
[numthreads(8, 8, 1)]
void main(uint2 thr : SV_DispatchThreadID)
{
	globallycoherent RWTexture2D<float> RWImgCoherent = ResourceDescriptorHeap[0];
	RWTexture2D<float> RWImg  = ResourceDescriptorHeap[1];
	Texture2D<float> Img = ResourceDescriptorHeap[2];

	RWImgCoherent[thr] += 1.0;
	RWImg[thr] += Img.Load(int3(thr, 0));
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent-promotion.bindless.ssbo.vkmm.comp
================================================
RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent-promotion.root-descriptor.ssbo.vkmm.comp
================================================
RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent-promotion.sm66.bindless.ssbo.vkmm.comp
================================================
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);
cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	RWStructuredBuffer<float4> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<float4> RW_RDONLY = ResourceDescriptorHeap[1];
	RWStructuredBuffer<float4> RW_WRONLY = ResourceDescriptorHeap[2];

	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent-promotion.sm66.ssbo.vkmm.comp
================================================
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);
cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	RWStructuredBuffer<float4> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<float4> RW_RDONLY = ResourceDescriptorHeap[1];
	RWStructuredBuffer<float4> RW_WRONLY = ResourceDescriptorHeap[2];

	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent-promotion.ssbo.vkmm.comp
================================================
RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent.root-descriptor.ssbo.vkmm.comp
================================================
globallycoherent RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent.sm66.ssbo.vkmm.comp
================================================
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);
cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	globallycoherent RWStructuredBuffer<float4> RW = ResourceDescriptorHeap[0];
	RWStructuredBuffer<float4> RW_RDONLY = ResourceDescriptorHeap[1];
	RWStructuredBuffer<float4> RW_WRONLY = ResourceDescriptorHeap[2];

	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/memory-model/uav-coherent.ssbo.vkmm.comp
================================================
globallycoherent RWStructuredBuffer<float4> RW : register(u0);
RWStructuredBuffer<float4> RW_RDONLY : register(u1);
RWStructuredBuffer<float4> RW_WRONLY : register(u2);
StructuredBuffer<uint> WorkList : register(t0);
StructuredBuffer<float4> RO : register(t1);

cbuffer Constants : register(b0) { uint count; };

[numthreads(256, 1, 1)]
void main(uint id : SV_GroupIndex)
{
	// Prime the L0 caches.
	RW[id] = 0.0.xxxx;

	for (uint iter = 0; iter < count; iter++)
	{
		uint increment_offset = WorkList[256 * iter + id];
		// globallycoherent is not needed when sharing data between threads in the same group.
		DeviceMemoryBarrierWithGroupSync();

		// If caches are incoherent here in the workgroup, this will break hard.
		if (increment_offset < 256)
			RW[increment_offset] += RO[256 * iter + id];
	}

	RW_WRONLY[id] = RW_RDONLY[id];
}


================================================
FILE: shaders/vkmm/report-hit.vkmm.rint
================================================
struct Payload
{
	float p;
};

[shader("intersection")]
void RayIntersect()
{
	Payload p;
	Payload p2;
	p.p = RayTCurrent();
	p2.p = RayTMin();

	ReportHit(4.0, 100, p);
	ReportHit(3.0, 50, p2);

	p2.p = WaveGetLaneIndex();
	ReportHit(2.0, 50, p2);
}


================================================
FILE: shaders/vkmm/rov-structured.vkmm.frag
================================================
RasterizerOrderedStructuredBuffer<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord.y * 1000 + coord.x] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/vkmm/rov-tex2d.vkmm.frag
================================================
RasterizerOrderedTexture2D<float4> RW : register(u0);

[earlydepthstencil]
void main(float4 pos : SV_Position)
{
	uint2 coord = uint2(pos.xy);
	RW[coord] += float4(1.0, 2.0, 3.0, 4.0);
}


================================================
FILE: shaders/vkmm/wmma_ags.h
================================================
#pragma once

static const uint MagicCodeShift   = 28;
static const uint MagicCodeMask    = 0xf;
static const uint OpcodePhaseShift = 24;
static const uint OpcodePhaseMask  = 0x3;
static const uint DataShift        = 8;
static const uint DataMask         = 0xffff;
static const uint OpcodeShift      = 0;
static const uint OpcodeMask       = 0xff;

static const uint MagicCode = 0x5;

static const uint WaveMatrixMulAcc            = 0x28;
static const uint WaveMatrixUavLoad           = 0x29;
static const uint WaveMatrixUavStore          = 0x2a;
static const uint WaveMatrixGlobalLoad        = 0x2b;
static const uint WaveMatrixGlobalStore       = 0x2c;
static const uint WaveMatrixLdsLoad           = 0x2d;
static const uint WaveMatrixLdsStore          = 0x2e;
static const uint WaveMatrixElementFill       = 0x2f;
static const uint WaveMatrixElementExtract    = 0x30;
static const uint WaveMatrixLength            = 0x31;
static const uint WaveMatrixCopy              = 0x32;
static const uint WaveMatrixFill              = 0x33;
static const uint Float8Conversion            = 0x36;

enum WaveMatrixOpDataFormat
{
    WaveMatrixDataFormat_I4   = 0x0,
    WaveMatrixDataFormat_U4   = 0x1,
    WaveMatrixDataFormat_I8   = 0x2,
    WaveMatrixDataFormat_U8   = 0x3,
    WaveMatrixDataFormat_F16  = 0x4,
    WaveMatrixDataFormat_BF16 = 0x5,
    WaveMatrixDataFormat_F32  = 0x6,
    WaveMatrixDataFormat_I32  = 0x7,
    WaveMatrixDataFormat_U32  = 0x8,
    WaveMatrixDataFormat_BF8  = 0x9,
    WaveMatrixDataFormat_FP8  = 0xa,
};

enum WaveMatrixOpMatrixType
{
    WaveMatrixType_A            = 0x0,
    WaveMatrixType_B            = 0x1,
    WaveMatrixType_Accumulator  = 0x2,
};

enum WaveMatrixOpMatrixShape
{
    WaveMatrixShape_16X16 = 0x0,
    WaveMatrixShape_32X16 = 0x1,
    WaveMatrixShape_16X32 = 0x2,
    WaveMatrixShape_64X16 = 0x3,
};

enum WaveMatrixOpcode
{
    WMMA_BF16_16X16X16_BF16     = 0x0,
    WMMA_F16_16X16X16_F16       = 0x1,
    WMMA_F32_16X16X16_BF16      = 0x2,
    WMMA_F32_16X16X16_BF8_BF8   = 0x3,
    WMMA_F32_16X16X16_BF8_FP8   = 0x4,
    WMMA_F32_16X16X16_F16       = 0x5,
    WMMA_F32_16X16X16_FP8_BF8   = 0x6,
    WMMA_F32_16X16X16_FP8_FP8   = 0x7,
    WMMA_I32_16X16X16_I4        = 0x8,
    WMMA_I32_16X16X16_U4        = 0x9,
    WMMA_I32_16X16X16_IU4       = 0xa,
    WMMA_I32_16X16X16_UI4       = 0xb,
    WMMA_I32_16X16X16_I8        = 0xc,
    WMMA_I32_16X16X16_U8        = 0xd,
    WMMA_I32_16X16X16_IU8       = 0xe,
    WMMA_I32_16X16X16_UI8       = 0xf,
    WMMA_I32_16X16X32_I4        = 0x10,
    WMMA_I32_16X16X32_U4        = 0x11,
    WMMA_I32_16X16X32_IU4       = 0x12,
    WMMA_I32_16X16X32_UI4       = 0x13,
    SWMMA_BF16_16X16X32_BF16    = 0x14,
    SWMMA_F16_16X16X32_F16      = 0x15,
    SWMMA_F32_16X16X32_BF16     = 0x16,
    SWMMA_F32_16X16X32_BF8_BF8  = 0x17,
    SWMMA_F32_16X16X32_BF8_FP8  = 0x18,
    SWMMA_F32_16X16X32_F16      = 0x19,
    SWMMA_F32_16X16X32_FP8_BF8  = 0x1a,
    SWMMA_F32_16X16X32_FP8_FP8  = 0x1b,
    SWMMA_I32_16X16X32_I4       = 0x1c,
    SWMMA_I32_16X16X32_U4       = 0x1d,
    SWMMA_I32_16X16X32_IU4      = 0x1e,
    SWMMA_I32_16X16X32_UI4      = 0x1f,
    SWMMA_I32_16X16X32_I8       = 0x20,
    SWMMA_I32_16X16X32_U8       = 0x21,
    SWMMA_I32_16X16X32_IU8      = 0x22,
    SWMMA_I32_16X16X32_UI8      = 0x23,
    SWMMA_I32_16X16X64_I4       = 0x24,
    SWMMA_I32_16X16X64_U4       = 0x25,
    SWMMA_I32_16X16X64_IU4      = 0x26,
    SWMMA_I32_16X16X64_UI4      = 0x27,
};

enum WaveMatrixRegType
{
    WaveMatrixRegType_RetVal_Reg          = 0x0,
    WaveMatrixRegType_A_TempReg           = 0x1,
    WaveMatrixRegType_B_TempReg           = 0x2,
    WaveMatrixRegType_Accumulator_TempReg = 0x3,
};

enum MatrixElementWiseOp
{
    MatrixElementWiseOp_Add   = 0x1,
    MatrixElementWiseOp_Sub   = 0x2,
    MatrixElementWiseOp_Mul   = 0x3,
    MatrixElementWiseOp_Div   = 0x4,
    MatrixElementWiseOp_Times = 0x5,
};

enum SparsityIndexMem
{
    SparsityIndexMem_UavBuffer    = 0x0,
    SparsityIndexMem_GroupShared  = 0x1,
    SparsityIndexMem_GlobalBuffer = 0x2,
};

static const uint WaveMatrixOpcode_OpsShift  = 0;
static const uint WaveMatrixOpcode_OpsMask   = 0x7f;
static const uint WaveMatrixOpcode_FlagShift = 15;
static const uint WaveMatrixOpcode_FlagMask  = 0x1;

static const uint WaveMatrixInOut_ChannelShift        = 0;
static const uint WaveMatrixInOut_ChannelMask         = 0xf;
static const uint WaveMatrixInOut_SecondRegFlagShift  = 4;
static const uint WaveMatrixInOut_SecondRegFlagMask   = 0xf;
static const uint WaveMatrixInOut_MatRegTypeFlagShift = 8;
static const uint WaveMatrixInOut_MatRegTypeFlagMask  = 0xff;

static const uint WaveMatrixModifier_DataFormatFlagShift = 0;
static const uint WaveMatrixModifier_DataFormatFlagMask  = 0xf;
static const uint WaveMatrixModifier_MatrixTypeFlagShift = 4;
static const uint WaveMatrixModifier_MatrixTypeFlagMask  = 0x7;
static const uint WaveMatrixModifier_LayoutFlagShift     = 7;
static const uint WaveMatrixModifier_LayoutFlagMask      = 0x1;
static const uint WaveMatrixModifier_ShapeShift          = 8;
static const uint WaveMatrixModifier_ShapeMask           = 0x7;
static const uint WaveMatrixModifier_MatrixTileShift     = 11;
static const uint WaveMatrixModifier_MatrixTileMask      = 0x1;
static const uint WaveMatrixModifier_IndexMemTypeShift   = 14;
static const uint WaveMatrixModifier_IndexMemTypeMask    = 0x3;

// For wave32 and FP32, we need up to 8 values to store a 16x16 matrix.
struct WMMA_Matrix
{
	uint v[8];
};

uint Code(uint opcode, uint opcodePhase, uint immediateData)
{
    return (MagicCode << MagicCodeShift) |
           ((immediateData & DataMask) << DataShift) |
           ((opcodePhase & OpcodePhaseMask) << OpcodePhaseShift) |
           ((opcode & OpcodeMask) << OpcodeShift);
}

uint AGSMagic(uint code, uint arg0, uint arg1)
{
	uint ret;
	MAGIC.InterlockedCompareExchange(code, arg0, arg1, ret);
	return ret;
}

uint AGSMagic(uint opcode, uint phase, uint imm, uint arg0, uint arg1)
{
	return AGSMagic(Code(opcode, phase, imm), arg0, arg1);
}

uint MatrixIO(uint channel, uint reg, uint type)
{
	return (channel << WaveMatrixInOut_ChannelShift) |
		(reg << WaveMatrixInOut_SecondRegFlagShift) |
		(type << WaveMatrixInOut_MatRegTypeFlagShift);
}

WMMA_Matrix WMMA_MatMulAcc(WaveMatrixOpcode op, WMMA_Matrix A, WMMA_Matrix B, WMMA_Matrix C)
{
	// A matrix
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 0, WaveMatrixRegType_A_TempReg), A.v[0], A.v[1]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 0, WaveMatrixRegType_A_TempReg), A.v[2], A.v[3]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 1, WaveMatrixRegType_A_TempReg), A.v[4], A.v[5]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 1, WaveMatrixRegType_A_TempReg), A.v[6], A.v[7]);

	// B matrix
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 0, WaveMatrixRegType_B_TempReg), B.v[0], B.v[1]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 0, WaveMatrixRegType_B_TempReg), B.v[2], B.v[3]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 1, WaveMatrixRegType_B_TempReg), B.v[4], B.v[5]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 1, WaveMatrixRegType_B_TempReg), B.v[6], B.v[7]);

	// C matrix
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 0, WaveMatrixRegType_Accumulator_TempReg), C.v[0], C.v[1]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 0, WaveMatrixRegType_Accumulator_TempReg), C.v[2], C.v[3]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(0, 1, WaveMatrixRegType_Accumulator_TempReg), C.v[4], C.v[5]);
	AGSMagic(WaveMatrixMulAcc, 0, MatrixIO(1, 1, WaveMatrixRegType_Accumulator_TempReg), C.v[6], C.v[7]);

	// Configure type
	AGSMagic(WaveMatrixMulAcc, 1, int(op) << int(WaveMatrixOpcode_OpsShift), 0, 0);

	// Read output
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixMulAcc, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

struct WMMA_Type
{
	uint code;
};

WMMA_Type WMMA_MakeType(WaveMatrixOpDataFormat fmt, WaveMatrixOpMatrixType mtype, WaveMatrixOpMatrixShape shape, bool transposed)
{
	WMMA_Type type;
	type.code = (int(fmt) << WaveMatrixModifier_DataFormatFlagShift) |
		(int(mtype) << WaveMatrixModifier_MatrixTypeFlagShift) |
		(int(shape) << WaveMatrixModifier_ShapeShift) |
		(transposed << WaveMatrixModifier_LayoutFlagShift);
	return type;
}

WMMA_Matrix WMMA_Load(WMMA_Type type, ByteAddressBuffer BAB, uint offset, uint stride)
{
	uint doorbell = AGSMagic(WaveMatrixUavLoad, 0, 0, offset, stride);
	uint hook = BAB.Load(doorbell);
	AGSMagic(WaveMatrixUavLoad, 1, type.code, hook, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

WMMA_Matrix WMMA_Load(WMMA_Type type, RWByteAddressBuffer BAB, uint offset, uint stride)
{
	uint doorbell = AGSMagic(WaveMatrixUavLoad, 0, 0, offset, stride);
	uint hook = BAB.Load(doorbell);
	AGSMagic(WaveMatrixUavLoad, 1, type.code, hook, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixUavLoad, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

void WMMA_Store(WMMA_Type type, RWByteAddressBuffer BAB, uint offset, uint stride, WMMA_Matrix m)
{
	uint doorbell = AGSMagic(WaveMatrixUavStore, 0, 0, offset, stride);
	uint hook = BAB.Load(doorbell);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixUavStore, 1, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixUavStore, 2, type.code, hook, 0);
}

groupshared uint LDS[512];

WMMA_Matrix WMMA_LoadLDS(WMMA_Type type, uint offset, uint stride)
{
	uint hook;
	uint doorbell = AGSMagic(WaveMatrixLdsLoad, 0, 0, offset, stride);
	InterlockedAdd(LDS[doorbell], 0, hook);
	AGSMagic(WaveMatrixLdsLoad, 1, type.code, hook, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixLdsLoad, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

void WMMA_StoreLDS(WMMA_Type type, uint offset, uint stride, WMMA_Matrix m)
{
	uint doorbell = AGSMagic(WaveMatrixLdsStore, 0, 0, offset, stride);
	uint hook;
	InterlockedAdd(LDS[doorbell], 0, hook);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixLdsStore, 1, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixLdsStore, 2, type.code, hook, 0);
}

WMMA_Matrix WMMA_Convert(WMMA_Type intype, WMMA_Type outtype, WMMA_Matrix m)
{
	AGSMagic(WaveMatrixCopy, 0, 0, 0, 0);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(0, 0, 0), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(1, 0, 0), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(0, 1, 0), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixCopy, 0, MatrixIO(1, 1, 0), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixCopy, 1, intype.code, outtype.code, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixCopy, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

uint WMMA_MatrixLength(WMMA_Type type)
{
	return AGSMagic(WaveMatrixLength, 0, type.code, 0, 0);
}

uint WMMA_MatrixElementExtract(WMMA_Type type, WMMA_Matrix m, uint elem)
{
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixElementExtract, 0, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	return AGSMagic(WaveMatrixElementExtract, 1, type.code, elem, 0); 
}

WMMA_Matrix WMMA_MatrixElementFill(WMMA_Type type, WMMA_Matrix m, uint index, uint data)
{
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), m.v[0], m.v[1]);
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), m.v[2], m.v[3]);
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), m.v[4], m.v[5]);
	AGSMagic(WaveMatrixElementFill, 0, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), m.v[6], m.v[7]);
	AGSMagic(WaveMatrixElementFill, 1, type.code, index, data);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixElementFill, 2, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}

WMMA_Matrix WMMA_MatrixFill(WMMA_Type type, uint value)
{
	AGSMagic(WaveMatrixFill, 0, type.code, value, 0);
	WMMA_Matrix ret;
	ret.v[0] = AGSMagic(WaveMatrixFill, 1, MatrixIO(0, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[1] = AGSMagic(WaveMatrixFill, 1, MatrixIO(1, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[2] = AGSMagic(WaveMatrixFill, 1, MatrixIO(2, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[3] = AGSMagic(WaveMatrixFill, 1, MatrixIO(3, 0, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[4] = AGSMagic(WaveMatrixFill, 1, MatrixIO(0, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[5] = AGSMagic(WaveMatrixFill, 1, MatrixIO(1, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[6] = AGSMagic(WaveMatrixFill, 1, MatrixIO(2, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	ret.v[7] = AGSMagic(WaveMatrixFill, 1, MatrixIO(3, 1, WaveMatrixRegType_RetVal_Reg), 0, 0);
	return ret;
}


================================================
FILE: show_graph.py
================================================
#!/usr/bin/env python3

from graphviz import Source
import sys

def main():
    s = Source.from_file(sys.argv[1])
    s.view()

if __name__ == '__main__':
    main()


================================================
FILE: spirv_module.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "spirv_module.hpp"
#include "descriptor_qa.hpp"
#include "spirv_module_instrumentation.hpp"
#include "SpvBuilder.h"
#include "node.hpp"
#include "scratch_pool.hpp"
#include "logging.hpp"
#include "GLSL.std.450.h"
#include <limits>

namespace dxil_spv
{
constexpr uint32_t GENERATOR = 1967215134;
struct SPIRVModule::Impl : BlockEmissionInterface
{
	explicit Impl(SPIRVModule &module)
	    : self(module), builder(GENERATOR, &build_logger)
	{
	}

	SPIRVModule &self;

	spv::SpvBuildLogger build_logger;
	spv::Builder builder;
	spv::Function *entry_function = nullptr;
	spv::Function *active_function = nullptr;
	spv::Instruction *entry_point = nullptr;

	void emit_entry_point(spv::ExecutionModel model, const char *name, bool physical_storage,
	                      spv::MemoryModel memory_model);
	bool finalize_spirv(Vector<uint32_t> &spirv);

	void register_block(CFGNode *node) override;
	void emit_basic_block(CFGNode *node) override;
	void emit_entry_point_function_body(CFGStructurizer &structurizer);
	void emit_leaf_function_body(spv::Function *func, CFGStructurizer &structurizer);
	static spv::Block *get_spv_block(CFGNode *node);

	void enable_shader_discard(bool supports_demote);
	void build_discard_call_early();
	void build_discard_call_early_cond(spv::Id cond);
	void build_demote_call_cond(spv::Id cond);
	void build_discard_call_exit();
	spv::Id build_descriptor_qa_check(SPIRVModule &module);
	spv::Id build_wave_match(SPIRVModule &module, spv::Id type_id);
	spv::Id build_wave_is_first_lane_masked(SPIRVModule &module);
	spv::Id build_wave_active_all_equal_masked(SPIRVModule &module, spv::Id type_id);
	spv::Id build_wave_read_first_lane_masked(SPIRVModule &module, spv::Id type_id);
	spv::Id build_wave_multi_prefix_count_bits(SPIRVModule &module);
	spv::Id build_wave_multi_prefix_op(SPIRVModule &module, spv::Op opcode, spv::Id type_id);
	spv::Id build_robust_physical_cbv_load(SPIRVModule &module, spv::Id type_id, spv::Id ptr_type_id, unsigned alignment);
	spv::Id build_robust_atomic_counter_op(SPIRVModule &module);
	spv::Id build_quad_all(SPIRVModule &module);
	spv::Id build_quad_any(SPIRVModule &module);
	spv::Id build_quad_vote(SPIRVModule &module, HelperCall call);
	spv::Id build_image_atomic_r64_compact(SPIRVModule &module, bool array, bool non_uniform);
	spv::Id build_finish_cross_group_sharing(SPIRVModule &module);
	spv::Id build_allocate_node_records(SPIRVModule &module, bool per_thread);
	spv::Id build_increment_node_count(SPIRVModule &module, bool per_thread);
	spv::Id build_allocate_node_records_waterfall(SPIRVModule &module);
	spv::Id build_node_coalesce_payload_offset(SPIRVModule &module, const spv::Id *ids, uint32_t id_count);
	spv::Id build_coop_mat_fp8_to_fp16(SPIRVModule &module, const spv::Id *ids, uint32_t id_count);
	spv::Id build_coop_mat_fp16_to_fp8(SPIRVModule &module, const spv::Id *ids, uint32_t id_count);
	spv::Id build_coop_mat_saturate_fp8(SPIRVModule &module, spv::Id type_id);
	spv::Id build_coop_mat_transfer(SPIRVModule &module, const spv::Id *ids, uint32_t id_count);
	spv::Id build_coop_mat_saturation_fixup(SPIRVModule &module, spv::Id type_id);
	spv::Id build_is_quad_uniform_control_flow(SPIRVModule &module);
	spv::Id build_validate_bda_load_store(SPIRVModule &module);
	spv::Id build_allocate_invocation_id(SPIRVModule &module);
	spv::Function *discard_function = nullptr;
	spv::Function *discard_function_cond = nullptr;
	spv::Function *demote_function_cond = nullptr;
	spv::Id discard_state_var_id = 0;
	spv::ExecutionModel execution_model = spv::ExecutionModelMax;

	spv::Id create_variable(spv::StorageClass storage, spv::Id type, const char *name);
	spv::Id create_variable_with_initializer(spv::StorageClass storage, spv::Id type,
	                                         spv::Id initializer, const char *name);
	void register_active_variable(spv::StorageClass storage, spv::Id id);

	struct
	{
		bool supports_demote = false;
	} caps;

	spv::Id get_builtin_shader_input(spv::BuiltIn builtin);
	spv::Id get_builtin_shader_output(spv::BuiltIn builtin);
	bool has_builtin_shader_input(spv::BuiltIn builtin) const;
	bool has_builtin_shader_output(spv::BuiltIn builtin) const;
	void register_builtin_shader_input(spv::Id id, spv::BuiltIn builtin);
	bool query_builtin_shader_input(spv::Id id, spv::BuiltIn *builtin) const;
	void register_builtin_shader_output(spv::Id id, spv::BuiltIn builtin);
	bool query_builtin_shader_output(spv::Id id, spv::BuiltIn *builtin) const;
	UnorderedMap<spv::BuiltIn, spv::Id> builtins_input;
	UnorderedMap<spv::Id, spv::BuiltIn> id_to_builtin_input;
	UnorderedMap<spv::BuiltIn, spv::Id> builtins_output;
	UnorderedMap<spv::Id, spv::BuiltIn> id_to_builtin_output;

	spv::Id get_type_for_builtin(spv::BuiltIn builtin, bool &requires_flat_decoration);
	ScratchPool<Operation> operation_pool;

	bool spirv_requires_14() const;
	bool builtin_requires_volatile(spv::BuiltIn builtin) const;
	bool execution_model_is_ray_tracing() const;
	bool mark_error = false;

	spv::Id get_helper_call_id(SPIRVModule &module, HelperCall call, spv::Id type_id);
	spv::Id get_helper_call_id(SPIRVModule &module, HelperCall call, const spv::Id *aux_ids, uint32_t aux_id_count);
	spv::Id descriptor_qa_helper_call_id = 0;
	spv::Id wave_multi_prefix_count_bits_id = 0;
	spv::Id robust_atomic_counter_call_id = 0;
	spv::Id quad_all_call_id = 0;
	spv::Id quad_any_call_id = 0;
	spv::Id wave_is_first_lane_masked_id = 0;
	Vector<std::pair<spv::Id, spv::Id>> wave_match_call_ids;
	Vector<std::pair<spv::Id, spv::Id>> wave_active_all_equal_masked_ids;
	Vector<std::pair<spv::Id, spv::Id>> wave_read_first_lane_masked_ids;
	spv::Id image_r64_atomic_call_id = 0;
	spv::Id image_r64_array_atomic_call_id = 0;
	spv::Id image_r64_atomic_non_uniform_call_id = 0;
	spv::Id image_r64_array_atomic_non_uniform_call_id = 0;
	spv::Id finish_cross_group_sharing_call_id = 0;
	spv::Id allocate_thread_node_records_call_id = 0;
	spv::Id allocate_group_node_records_call_id = 0;
	spv::Id increment_thread_node_count_call_id = 0;
	spv::Id increment_group_node_count_call_id = 0;
	spv::Id allocate_thread_node_records_waterfall_call_id = 0;
	spv::Id node_coalesce_payload_offset_call_id = 0;
	spv::Id is_quad_uniform_call_id = 0;
	spv::Id validate_bda_load_store_call_id = 0;
	spv::Id allocate_invocation_id_call_id = 0;

	struct MultiPrefixOp
	{
		spv::Op opcode;
		spv::Id type_id;
		spv::Id func_id;
	};
	Vector<MultiPrefixOp> wave_multi_prefix_call_ids;

	struct CBVOp
	{
		spv::Id type_id;
		spv::Id ptr_type_id;
		unsigned alignment;
		spv::Id func_id;
	};
	Vector<CBVOp> physical_cbv_call_ids;

	struct CoopMatConvOp
	{
		spv::Id input_type;
		spv::Id output_type;
		spv::Id func_id;
		HelperCall call;
	};
	Vector<CoopMatConvOp> coop_mat_conv_ids;

	DescriptorQAInfo descriptor_qa_info;

	uint32_t override_spirv_version = 0;
	bool helper_lanes_participate_in_wave_ops = true;

	void add_instruction(spv::Block *bb, std::unique_ptr<spv::Instruction> inst);
	void add_instrumented_instruction(spv::Op op, spv::Block *bb, spv::Id id);
	InstructionInstrumentationState instruction_instrumentation;
};

spv::Id SPIRVModule::Impl::get_type_for_builtin(spv::BuiltIn builtin, bool &requires_flat)
{
	requires_flat = false;
	switch (builtin)
	{
	case spv::BuiltInSampleMask:
		requires_flat = true;
		return builder.makeArrayType(builder.makeUintType(32), builder.makeUintConstant(1), 0);

	case spv::BuiltInTessCoord:
		return builder.makeVectorType(builder.makeFloatType(32), 3);

	case spv::BuiltInLocalInvocationIndex:
	case spv::BuiltInDrawIndex:
	case spv::BuiltInSampleId:
	case spv::BuiltInVertexIndex:
	case spv::BuiltInInstanceIndex:
	case spv::BuiltInBaseVertex:
	case spv::BuiltInBaseInstance:
	case spv::BuiltInInvocationId:
	case spv::BuiltInPrimitiveId:
	case spv::BuiltInShadingRateKHR:
	case spv::BuiltInPrimitiveShadingRateKHR:
	case spv::BuiltInViewIndex:
	case spv::BuiltInPatchVertices:
		requires_flat = true;
		return builder.makeUintType(32);

	case spv::BuiltInSubgroupSize:
	case spv::BuiltInNumSubgroups:
	case spv::BuiltInSubgroupLocalInvocationId:
	case spv::BuiltInSubgroupId:
		builder.addCapability(spv::CapabilityGroupNonUniform);
		requires_flat = true;
		return builder.makeUintType(32);

	case spv::BuiltInGlobalInvocationId:
	case spv::BuiltInLocalInvocationId:
	case spv::BuiltInWorkgroupId:
	case spv::BuiltInLaunchIdKHR:
	case spv::BuiltInLaunchSizeKHR:
	case spv::BuiltInNumWorkgroups:
		return builder.makeVectorType(builder.makeUintType(32), 3);

	case spv::BuiltInObjectRayOriginKHR:
	case spv::BuiltInWorldRayOriginKHR:
	case spv::BuiltInObjectRayDirectionKHR:
	case spv::BuiltInWorldRayDirectionKHR:
		return builder.makeVectorType(builder.makeFloatType(32), 3);

	case spv::BuiltInRayTminKHR:
	case spv::BuiltInRayTmaxKHR:
		return builder.makeFloatType(32);

	case spv::BuiltInWorldToObjectKHR:
	case spv::BuiltInObjectToWorldKHR:
		return builder.makeMatrixType(builder.makeFloatType(32), 4, 3);

	case spv::BuiltInInstanceCustomIndexKHR:
	case spv::BuiltInInstanceId:
	case spv::BuiltInRayGeometryIndexKHR:
	case spv::BuiltInIncomingRayFlagsKHR:
	case spv::BuiltInHitKindKHR:
		requires_flat = true;
		return builder.makeUintType(32);

	case spv::BuiltInHelperInvocation:
	case spv::BuiltInFullyCoveredEXT:
		return builder.makeBoolType();

	case spv::BuiltInClusterIDNV:
		return builder.makeUintType(32);

	default:
		return 0;
	}
}

void SPIRVModule::Impl::register_builtin_shader_input(spv::Id id, spv::BuiltIn builtin)
{
	builtins_input[builtin] = id;
	id_to_builtin_input[id] = builtin;
}

void SPIRVModule::Impl::register_builtin_shader_output(spv::Id id, spv::BuiltIn builtin)
{
	builtins_output[builtin] = id;
	id_to_builtin_output[id] = builtin;
}

bool SPIRVModule::Impl::query_builtin_shader_input(spv::Id id, spv::BuiltIn *builtin) const
{
	auto itr = id_to_builtin_input.find(id);
	if (itr != id_to_builtin_input.end())
	{
		*builtin = itr->second;
		return true;
	}
	else
		return false;
}

bool SPIRVModule::Impl::query_builtin_shader_output(spv::Id id, spv::BuiltIn *builtin) const
{
	auto itr = id_to_builtin_output.find(id);
	if (itr != id_to_builtin_output.end())
	{
		*builtin = itr->second;
		return true;
	}
	else
		return false;
}

bool SPIRVModule::Impl::builtin_requires_volatile(spv::BuiltIn builtin) const
{
	if (!execution_model_is_ray_tracing())
		return false;

	switch (builtin)
	{
	case spv::BuiltInSubgroupId:
	case spv::BuiltInSubgroupLocalInvocationId:
	case spv::BuiltInSubgroupEqMask:
	case spv::BuiltInSubgroupLtMask:
	case spv::BuiltInSubgroupLeMask:
	case spv::BuiltInSubgroupGtMask:
	case spv::BuiltInSubgroupGeMask:
		return true;

	case spv::BuiltInRayTmaxKHR:
		return execution_model == spv::ExecutionModelIntersectionKHR;

	default:
		return false;
	}
}

bool SPIRVModule::Impl::has_builtin_shader_input(spv::BuiltIn builtin) const
{
	return builtins_input.count(builtin) != 0;
}

bool SPIRVModule::Impl::has_builtin_shader_output(spv::BuiltIn builtin) const
{
	return builtins_output.count(builtin) != 0;
}

spv::Id SPIRVModule::Impl::get_builtin_shader_input(spv::BuiltIn builtin)
{
	auto itr = builtins_input.find(builtin);
	if (itr != builtins_input.end())
		return itr->second;

	bool requires_flat = false;
	spv::Id var_id = create_variable(spv::StorageClassInput, get_type_for_builtin(builtin, requires_flat), nullptr);
	builder.addDecoration(var_id, spv::DecorationBuiltIn, builtin);
	if (builtin_requires_volatile(builtin) && !builder.hasCapability(spv::CapabilityVulkanMemoryModel))
		builder.addDecoration(var_id, spv::DecorationVolatile);

	// VUID-StandaloneSpirv-Flat-04744
	if (requires_flat && execution_model == spv::ExecutionModelFragment)
		builder.addDecoration(var_id, spv::DecorationFlat);
	register_builtin_shader_input(var_id, builtin);
	return var_id;
}

spv::Id SPIRVModule::Impl::get_builtin_shader_output(spv::BuiltIn builtin)
{
	auto itr = builtins_output.find(builtin);
	if (itr != builtins_output.end())
		return itr->second;
	else
		return 0;
}

spv::Block *SPIRVModule::Impl::get_spv_block(CFGNode *node)
{
	assert(node->userdata);
	return static_cast<spv::Block *>(node->userdata);
}

void SPIRVModule::Impl::emit_entry_point(spv::ExecutionModel model,
                                         const char *name, bool physical_storage,
                                         spv::MemoryModel memory_model)
{
	execution_model = model;
	builder.addCapability(spv::Capability::CapabilityShader);

	if (memory_model == spv::MemoryModelVulkan)
		builder.addCapability(spv::CapabilityVulkanMemoryModel);

	if (physical_storage)
	{
		builder.setMemoryModel(spv::AddressingModel::AddressingModelPhysicalStorageBuffer64, memory_model);
		builder.addCapability(spv::CapabilityPhysicalStorageBufferAddresses);
		builder.addExtension("SPV_KHR_physical_storage_buffer");
	}
	else
		builder.setMemoryModel(spv::AddressingModel::AddressingModelLogical, memory_model);

	entry_function = builder.makeEntryPoint("main");
	entry_point = builder.addEntryPoint(model, entry_function, name);
	if (model == spv::ExecutionModel::ExecutionModelFragment)
		builder.addExecutionMode(entry_function, spv::ExecutionModeOriginUpperLeft);

	if (instruction_instrumentation.info.enabled)
	{
		builder.addExtension("SPV_KHR_float_controls");
		builder.addCapability(spv::CapabilitySignedZeroInfNanPreserve);

		if (instruction_instrumentation.info.fp16)
		{
			builder.addCapability(spv::CapabilityFloat16);
			builder.addExecutionMode(entry_function, spv::ExecutionModeSignedZeroInfNanPreserve, 16);
		}

		if (instruction_instrumentation.info.fp32)
			builder.addExecutionMode(entry_function, spv::ExecutionModeSignedZeroInfNanPreserve, 32);

		if (instruction_instrumentation.info.fp64)
			builder.addExecutionMode(entry_function, spv::ExecutionModeSignedZeroInfNanPreserve, 64);
	}
}

void SPIRVModule::Impl::enable_shader_discard(bool supports_demote)
{
	caps.supports_demote = supports_demote;
	if (!discard_state_var_id && !caps.supports_demote)
	{
		auto *current_build_point = builder.getBuildPoint();
		discard_state_var_id =
		    create_variable(spv::StorageClassPrivate, builder.makeBoolType(), "discard_state");
		builder.setBuildPoint(entry_function->getEntryBlock());
		builder.createStore(builder.makeBoolConstant(false), discard_state_var_id);
		builder.setBuildPoint(current_build_point);
	}
}

void SPIRVModule::Impl::build_discard_call_early()
{
	builder.createStore(builder.makeBoolConstant(true), discard_state_var_id);
}

void SPIRVModule::Impl::build_demote_call_cond(spv::Id cond)
{
	auto *current_build_point = builder.getBuildPoint();

	if (!demote_function_cond)
	{
		spv::Block *entry = nullptr;
		demote_function_cond =
			builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(), "demote_cond",
		                              { builder.makeBoolType() }, {}, &entry);

		auto *true_block = new spv::Block(builder.getUniqueId(), *demote_function_cond);
		auto *false_block = new spv::Block(builder.getUniqueId(), *demote_function_cond);
		builder.setBuildPoint(entry);
		builder.createSelectionMerge(false_block, 0);
		builder.createConditionalBranch(demote_function_cond->getParamId(0), true_block, false_block);
		add_instruction(true_block, std::make_unique<spv::Instruction>(spv::OpDemoteToHelperInvocationEXT));
		builder.setBuildPoint(true_block);
		builder.createBranch(false_block);
		builder.setBuildPoint(false_block);
		builder.makeReturn(false);
	}

	builder.setBuildPoint(current_build_point);
	builder.createFunctionCall(demote_function_cond, { cond });
}

void SPIRVModule::Impl::build_discard_call_early_cond(spv::Id cond)
{
	auto *current_build_point = builder.getBuildPoint();

	if (!discard_function_cond)
	{
		spv::Block *entry = nullptr;
		discard_function_cond =
			builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(), "discard_cond",
			                          { builder.makeBoolType() }, {}, &entry);

		auto *true_block = new spv::Block(builder.getUniqueId(), *discard_function_cond);
		auto *false_block = new spv::Block(builder.getUniqueId(), *discard_function_cond);
		builder.setBuildPoint(entry);
		builder.createSelectionMerge(false_block, 0);
		builder.createConditionalBranch(discard_function_cond->getParamId(0), true_block, false_block);
		builder.setBuildPoint(true_block);
		builder.createStore(builder.makeBoolConstant(true), discard_state_var_id);
		builder.createBranch(false_block);
		builder.setBuildPoint(false_block);
		builder.makeReturn(false);
	}

	builder.setBuildPoint(current_build_point);
	builder.createFunctionCall(discard_function_cond, { cond });
}

void SPIRVModule::Impl::build_discard_call_exit()
{
	auto *current_build_point = builder.getBuildPoint();

	if (!discard_function)
	{
		spv::Block *entry = nullptr;
		discard_function =
		    builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(), "discard_exit", {}, {}, &entry);

		auto *true_block = new spv::Block(builder.getUniqueId(), *discard_function);
		auto *false_block = new spv::Block(builder.getUniqueId(), *discard_function);

		builder.setBuildPoint(entry);
		spv::Id loaded_state = builder.createLoad(discard_state_var_id);
		builder.createSelectionMerge(false_block, 0);
		builder.createConditionalBranch(loaded_state, true_block, false_block);
		add_instruction(true_block, std::make_unique<spv::Instruction>(spv::OpKill));
		builder.setBuildPoint(false_block);
		builder.makeReturn(false);
	}

	builder.setBuildPoint(current_build_point);
	builder.createFunctionCall(discard_function, {});
}

spv::Id SPIRVModule::Impl::build_descriptor_qa_check(SPIRVModule &module)
{
	if (!descriptor_qa_helper_call_id)
		descriptor_qa_helper_call_id = build_descriptor_qa_check_function(module);
	return descriptor_qa_helper_call_id;
}

spv::Id SPIRVModule::Impl::build_validate_bda_load_store(SPIRVModule &module)
{
	if (!validate_bda_load_store_call_id)
	{
		validate_bda_load_store_call_id = build_validate_bda_load_store_function(
			module,
			instruction_instrumentation.info.control_desc_set,
			instruction_instrumentation.info.control_binding);
	}

	return validate_bda_load_store_call_id;
}

spv::Id SPIRVModule::Impl::build_allocate_invocation_id(SPIRVModule &module)
{
	if (!allocate_invocation_id_call_id)
	{
		allocate_invocation_id_call_id = build_allocate_invocation_id_function(
		    module,
		    instruction_instrumentation.info.control_desc_set,
		    instruction_instrumentation.info.control_binding);
	}

	return allocate_invocation_id_call_id;
}

static const char *opcode_to_multi_prefix_name(spv::Op opcode)
{
	switch (opcode)
	{
	case spv::OpGroupNonUniformFAdd:
	case spv::OpGroupNonUniformIAdd:
		return "WaveMultiPrefixSum";

	case spv::OpGroupNonUniformFMul:
	case spv::OpGroupNonUniformIMul:
		return "WaveMultiPrefixProduct";

	case spv::OpGroupNonUniformBitwiseAnd:
		return "WaveMultiPrefixBitAnd";
	case spv::OpGroupNonUniformBitwiseOr:
		return "WaveMultiPrefixBitOr";
	case spv::OpGroupNonUniformBitwiseXor:
		return "WaveMultiPrefixBitXor";

	default:
		return "";
	}
}

spv::Id SPIRVModule::Impl::build_wave_multi_prefix_op(SPIRVModule &module, spv::Op opcode, spv::Id type_id)
{
	for (auto &call : wave_multi_prefix_call_ids)
		if (call.opcode == opcode && call.type_id == type_id)
			return call.func_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec4_type = builder.makeVectorType(uint_type, 4);
	spv::Id bool_type = builder.makeBoolType();
	spv::Id bvec4_type = builder.makeVectorType(bool_type, 4);

	Vector<spv::Id> types = { type_id, uvec4_type };
	if (!helper_lanes_participate_in_wave_ops)
		types.push_back(bool_type);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, type_id,
	                                       opcode_to_multi_prefix_name(opcode),
	                                       types, {}, &entry);
	spv::Id value_id = func->getParamId(0);
	spv::Id mask_id = func->getParamId(1);
	spv::Id undef_value = builder.createUndefined(type_id);

	spv::Block *outer_entry = nullptr;
	spv::Block *return_block = nullptr;
	auto *header_block = new spv::Block(builder.getUniqueId(), *func);
	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *prefix_block = new spv::Block(builder.getUniqueId(), *func);
	auto *continue_block = new spv::Block(builder.getUniqueId(), *func);

	builder.setBuildPoint(entry);

	if (!helper_lanes_participate_in_wave_ops)
	{
		return_block = new spv::Block(builder.getUniqueId(), *func);
		auto *inner_entry_block = new spv::Block(builder.getUniqueId(), *func);

		builder.createSelectionMerge(return_block, 0);
		builder.createConditionalBranch(func->getParamId(2), return_block, inner_entry_block);
		outer_entry = entry;
		entry = inner_entry_block;
		builder.setBuildPoint(entry);
	}

	{
		auto ballot_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBallot);
		ballot_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		ballot_op->addIdOperand(builder.makeBoolConstant(true));
		auto mask_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpBitwiseAnd);
		mask_op->addIdOperand(ballot_op->getResultId());
		mask_op->addIdOperand(mask_id);
		mask_id = mask_op->getResultId();
		add_instruction(entry, std::move(ballot_op));
		add_instruction(entry, std::move(mask_op));
		builder.createBranch(header_block);
	}

	builder.setBuildPoint(header_block);
	{
		builder.createLoopMerge(merge_block, body_block, 0);
		builder.createBranch(body_block);
	}

	builder.setBuildPoint(body_block);
	spv::Id compare_reduce_id;
	{
		auto broadcast_first = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBroadcastFirst);
		broadcast_first->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		broadcast_first->addIdOperand(mask_id);

		auto compare = std::make_unique<spv::Instruction>(builder.getUniqueId(), bvec4_type, spv::OpIEqual);
		compare->addIdOperand(mask_id);
		compare->addIdOperand(broadcast_first->getResultId());

		auto compare_reduce = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpAll);
		compare_reduce->addIdOperand(compare->getResultId());
		compare_reduce_id = compare_reduce->getResultId();

		add_instruction(body_block, std::move(broadcast_first));
		add_instruction(body_block, std::move(compare));
		add_instruction(body_block, std::move(compare_reduce));
		builder.createSelectionMerge(continue_block, 0);
		builder.createConditionalBranch(compare_reduce_id, prefix_block, continue_block);
	}

	spv::Id result_id;
	builder.setBuildPoint(prefix_block);
	{
		auto prefix_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), type_id, opcode);
		prefix_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		prefix_op->addImmediateOperand(spv::GroupOperationExclusiveScan);
		prefix_op->addIdOperand(value_id);
		result_id = prefix_op->getResultId();
		add_instruction(prefix_block, std::move(prefix_op));
		builder.createBranch(continue_block);
	}

	builder.setBuildPoint(continue_block);
	{
		auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), type_id, spv::OpPhi);
		phi->addIdOperand(result_id);
		phi->addIdOperand(prefix_block->getId());
		phi->addIdOperand(undef_value);
		phi->addIdOperand(body_block->getId());
		result_id = phi->getResultId();
		add_instruction(continue_block, std::move(phi));
		builder.createConditionalBranch(compare_reduce_id, merge_block, header_block);
	}

	builder.setBuildPoint(merge_block);

	if (return_block)
	{
		builder.createBranch(return_block);
		builder.setBuildPoint(return_block);

		auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), type_id, spv::OpPhi);
		phi->addIdOperand(result_id);
		phi->addIdOperand(merge_block->getId());
		phi->addIdOperand(undef_value);
		phi->addIdOperand(outer_entry->getId());
		result_id = phi->getResultId();
		add_instruction(return_block, std::move(phi));
	}

	builder.makeReturn(false, result_id);

	builder.setBuildPoint(current_build_point);
	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);
	wave_multi_prefix_call_ids.push_back({ opcode, type_id, func->getId() });
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_wave_multi_prefix_count_bits(SPIRVModule &module)
{
	if (wave_multi_prefix_count_bits_id)
		return wave_multi_prefix_count_bits_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec4_type = builder.makeVectorType(uint_type, 4);
	spv::Id bool_type = builder.makeBoolType();
	spv::Id bvec4_type = builder.makeVectorType(bool_type, 4);

	spv::Block *entry = nullptr;
	Vector<spv::Id> types = { bool_type, uvec4_type };
	if (!helper_lanes_participate_in_wave_ops)
		types.push_back(bool_type);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uint_type,
	                                       "WaveMultiPrefixCountBits",
	                                       types, {}, &entry);
	spv::Id value_id = func->getParamId(0);
	spv::Id mask_id = func->getParamId(1);
	auto *header_block = new spv::Block(builder.getUniqueId(), *func);
	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	spv::Block *outer_entry = nullptr;
	spv::Block *return_block = nullptr;
	spv::Id undef_id = 0;

	builder.setBuildPoint(entry);

	if (!helper_lanes_participate_in_wave_ops)
	{
		return_block = new spv::Block(builder.getUniqueId(), *func);
		auto *inner_entry_block = new spv::Block(builder.getUniqueId(), *func);
		undef_id = builder.createUndefined(uint_type);
		builder.createSelectionMerge(return_block, 0);
		builder.createConditionalBranch(func->getParamId(2), return_block, inner_entry_block);
		outer_entry = entry;
		entry = inner_entry_block;
		builder.setBuildPoint(entry);
	}

	{
		auto ballot_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBallot);
		ballot_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		ballot_op->addIdOperand(builder.makeBoolConstant(true));
		auto mask_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpBitwiseAnd);
		mask_op->addIdOperand(ballot_op->getResultId());
		mask_op->addIdOperand(mask_id);
		mask_id = mask_op->getResultId();
		add_instruction(entry, std::move(ballot_op));
		add_instruction(entry, std::move(mask_op));
		builder.createBranch(header_block);
	}

	builder.setBuildPoint(header_block);
	{
		builder.createLoopMerge(merge_block, body_block, 0);
		builder.createBranch(body_block);
	}

	spv::Id result_id;
	builder.setBuildPoint(body_block);
	{
		auto broadcast_first =
		    std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBroadcastFirst);
		broadcast_first->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		broadcast_first->addIdOperand(mask_id);

		auto compare = std::make_unique<spv::Instruction>(builder.getUniqueId(), bvec4_type, spv::OpIEqual);
		compare->addIdOperand(mask_id);
		compare->addIdOperand(broadcast_first->getResultId());

		auto compare_reduce = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpAll);
		compare_reduce->addIdOperand(compare->getResultId());
		spv::Id compare_reduce_id = compare_reduce->getResultId();

		auto prefix_input = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLogicalAnd);
		prefix_input->addIdOperand(compare_reduce_id);
		prefix_input->addIdOperand(value_id);

		auto modified_ballot =
		    std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBallot);
		modified_ballot->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		modified_ballot->addIdOperand(prefix_input->getResultId());
		auto count =
		    std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpGroupNonUniformBallotBitCount);
		count->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		count->addImmediateOperand(spv::GroupOperationExclusiveScan);
		count->addIdOperand(modified_ballot->getResultId());
		result_id = count->getResultId();

		add_instruction(body_block, std::move(broadcast_first));
		add_instruction(body_block, std::move(compare));
		add_instruction(body_block, std::move(compare_reduce));
		add_instruction(body_block, std::move(prefix_input));
		add_instruction(body_block, std::move(modified_ballot));
		add_instruction(body_block, std::move(count));
		builder.createConditionalBranch(compare_reduce_id, merge_block, header_block);
	}

	builder.setBuildPoint(merge_block);

	if (return_block)
	{
		builder.createBranch(return_block);
		builder.setBuildPoint(return_block);

		auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpPhi);
		phi->addIdOperand(result_id);
		phi->addIdOperand(merge_block->getId());
		phi->addIdOperand(undef_id);
		phi->addIdOperand(outer_entry->getId());
		result_id = phi->getResultId();
		add_instruction(return_block, std::move(phi));
	}

	builder.makeReturn(false, result_id);
	builder.setBuildPoint(current_build_point);

	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);
	wave_multi_prefix_count_bits_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_wave_active_all_equal_masked(SPIRVModule &module, spv::Id type_id)
{
	for (auto &calls : wave_active_all_equal_masked_ids)
		if (calls.first == type_id)
			return calls.second;

	builder.addCapability(spv::CapabilityGroupNonUniformVote);
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, bool_type,
	                                       "WaveActiveAllEqual",
	                                       { type_id, bool_type }, {}, &entry);

	auto *is_helper_block = new spv::Block(builder.getUniqueId(), *func);
	auto *is_active_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	spv::Id equal_id, undef_id;

	builder.setBuildPoint(entry);

	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(func->getParamId(1), is_helper_block, is_active_block);

	{
		builder.setBuildPoint(is_helper_block);
		// Assist in scalar promotion, if we set something concrete, we will force VGPR.
		undef_id = builder.createUndefined(bool_type);
		builder.createBranch(merge_block);
	}

	{
		builder.setBuildPoint(is_active_block);
		auto all_equal = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpGroupNonUniformAllEqual);
		all_equal->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		all_equal->addIdOperand(func->getParamId(0));
		equal_id = all_equal->getResultId();
		add_instruction(is_active_block, std::move(all_equal));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpPhi);
	phi->addIdOperand(equal_id);
	phi->addIdOperand(is_active_block->getId());
	phi->addIdOperand(undef_id);
	phi->addIdOperand(is_helper_block->getId());
	equal_id = phi->getResultId();
	add_instruction(merge_block, std::move(phi));

	builder.makeReturn(false, equal_id);
	builder.setBuildPoint(current_build_point);
	wave_active_all_equal_masked_ids.emplace_back(type_id, func->getId());
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_wave_read_first_lane_masked(SPIRVModule &module, spv::Id type_id)
{
	for (auto &calls : wave_read_first_lane_masked_ids)
		if (calls.first == type_id)
			return calls.second;

	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	builder.addCapability(spv::CapabilityGroupNonUniformShuffle);
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, type_id,
	                                       "WaveReadFirstLane",
	                                       { type_id, bool_type }, {}, &entry);

	builder.setBuildPoint(entry);

	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec4_type = builder.makeVectorType(uint_type, 4);

	// Shuffle path is more robust since it will avoid undefs.
	// Also matches codegen on AMD.
	// The branchy style where helpers receive undefs confuses NV it seems ...

	auto not_inst = std::make_unique<spv::Instruction>(
		builder.getUniqueId(), bool_type, spv::OpLogicalNot);
	not_inst->addIdOperand(func->getParamId(1));

	auto ballot = std::make_unique<spv::Instruction>(
		builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBallot);
	ballot->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	ballot->addIdOperand(not_inst->getResultId());

	auto lsb = std::make_unique<spv::Instruction>(
		builder.getUniqueId(), builder.makeUintType(32), spv::OpGroupNonUniformBallotFindLSB);
	lsb->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	lsb->addIdOperand(ballot->getResultId());

	auto shuffle = std::make_unique<spv::Instruction>(
		builder.getUniqueId(), type_id, spv::OpGroupNonUniformShuffle);
	shuffle->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	shuffle->addIdOperand(func->getParamId(0));
	shuffle->addIdOperand(lsb->getResultId());

	// Undocumented requirement, if all lanes are helpers, 0 is returned.
	auto ballot_real_lane_count = std::make_unique<spv::Instruction>(
	    builder.getUniqueId(), uint_type, spv::OpGroupNonUniformBallotBitCount);
	ballot_real_lane_count->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	ballot_real_lane_count->addImmediateOperand(spv::GroupOperationReduce);
	ballot_real_lane_count->addIdOperand(ballot->getResultId());
	auto ballot_has_real_lane = std::make_unique<spv::Instruction>(
		builder.getUniqueId(), bool_type, spv::OpINotEqual);
	ballot_has_real_lane->addIdOperand(ballot_real_lane_count->getResultId());
	ballot_has_real_lane->addIdOperand(builder.makeUintConstant(0));

	auto select_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), type_id, spv::OpSelect);
	select_op->addIdOperand(ballot_has_real_lane->getResultId());
	select_op->addIdOperand(shuffle->getResultId());
	select_op->addIdOperand(builder.makeNullConstant(type_id));
	spv::Id ret_id = select_op->getResultId();

	add_instruction(entry, std::move(not_inst));
	add_instruction(entry, std::move(ballot));
	add_instruction(entry, std::move(lsb));
	add_instruction(entry, std::move(shuffle));
	add_instruction(entry, std::move(ballot_real_lane_count));
	add_instruction(entry, std::move(ballot_has_real_lane));
	add_instruction(entry, std::move(select_op));
	builder.makeReturn(false, ret_id);

	builder.setBuildPoint(current_build_point);
	wave_read_first_lane_masked_ids.emplace_back(type_id, func->getId());
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_wave_is_first_lane_masked(SPIRVModule &module)
{
	if (wave_is_first_lane_masked_id)
		return wave_is_first_lane_masked_id;

	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();

	auto *current_build_point = builder.getBuildPoint();
	builder.addCapability(spv::CapabilityGroupNonUniform);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, bool_type,
	                                       "WaveIsFirstLane",
	                                       { bool_type }, {}, &entry);

	auto *is_helper_block = new spv::Block(builder.getUniqueId(), *func);
	auto *is_active_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	spv::Id elect_id;

	builder.setBuildPoint(entry);

	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(func->getParamId(0), is_helper_block, is_active_block);

	{
		builder.setBuildPoint(is_helper_block);
		builder.createBranch(merge_block);
	}

	{
		builder.setBuildPoint(is_active_block);
		auto elect = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpGroupNonUniformElect);
		elect->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		elect_id = elect->getResultId();
		add_instruction(is_active_block, std::move(elect));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpPhi);
	phi->addIdOperand(elect_id);
	phi->addIdOperand(is_active_block->getId());
	phi->addIdOperand(builder.makeBoolConstant(false));
	phi->addIdOperand(is_helper_block->getId());
	elect_id = phi->getResultId();
	add_instruction(merge_block, std::move(phi));

	builder.makeReturn(false, elect_id);
	builder.setBuildPoint(current_build_point);
	wave_is_first_lane_masked_id = func->getId();
	return wave_is_first_lane_masked_id;
}

spv::Id SPIRVModule::Impl::build_wave_match(SPIRVModule &module, spv::Id type_id)
{
	for (auto &type : wave_match_call_ids)
		if (type.first == type_id)
			return type.second;

	auto *current_build_point = builder.getBuildPoint();
	builder.addCapability(spv::CapabilityGroupNonUniform);
	builder.addCapability(spv::CapabilityGroupNonUniformBallot);

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec4_type = builder.makeVectorType(uint_type, 4);
	spv::Id bool_type = builder.makeBoolType();
	Vector<spv::Id> types = { type_id };
	if (!helper_lanes_participate_in_wave_ops)
		types.push_back(bool_type);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uvec4_type,
	                                       "WaveMatch",
	                                       types, {}, &entry);
	spv::Id value_id = func->getParamId(0);

	auto *header_block = new spv::Block(builder.getUniqueId(), *func);
	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	spv::Block *outer_header = nullptr;
	spv::Block *return_block = nullptr;
	spv::Id undef_id = 0;

	builder.setBuildPoint(entry);
	builder.createBranch(header_block);
	builder.setBuildPoint(header_block);

	if (!helper_lanes_participate_in_wave_ops)
	{
		return_block = new spv::Block(builder.getUniqueId(), *func);
		auto *inner_header_block = new spv::Block(builder.getUniqueId(), *func);
		undef_id = builder.createUndefined(uvec4_type);
		builder.createSelectionMerge(return_block, 0);
		builder.createConditionalBranch(func->getParamId(1), return_block, inner_header_block);
		outer_header = header_block;
		header_block = inner_header_block;
		builder.setBuildPoint(header_block);
	}

	builder.createLoopMerge(merge_block, body_block, 0);
	builder.createBranch(body_block);

	auto broadcast_first = std::make_unique<spv::Instruction>(builder.getUniqueId(), type_id, spv::OpGroupNonUniformBroadcastFirst);
	broadcast_first->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	broadcast_first->addIdOperand(value_id);

	// We cannot scalarize floats safely due to NaNs. Caller will bitcast to uint first.
	assert(builder.getTypeClass(type_id) != spv::OpTypeFloat);
	spv::Op equal_op;
	if (builder.getTypeClass(type_id) == spv::OpTypeBool)
		equal_op = spv::OpLogicalEqual;
	else
		equal_op = spv::OpIEqual;

	auto compare = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeBoolType(), equal_op);
	compare->addIdOperand(value_id);
	compare->addIdOperand(broadcast_first->getResultId());
	spv::Id compare_id = compare->getResultId();

	auto ballot = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBallot);
	ballot->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	ballot->addIdOperand(compare->getResultId());
	spv::Id ballot_id = ballot->getResultId();

	builder.setBuildPoint(body_block);
	add_instruction(body_block, std::move(broadcast_first));
	add_instruction(body_block, std::move(compare));
	add_instruction(body_block, std::move(ballot));
	builder.createConditionalBranch(compare_id, merge_block, header_block);

	builder.setBuildPoint(merge_block);

	if (return_block)
	{
		builder.createBranch(return_block);
		builder.setBuildPoint(return_block);

		auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpPhi);
		phi->addIdOperand(ballot_id);
		phi->addIdOperand(merge_block->getId());
		phi->addIdOperand(undef_id);
		phi->addIdOperand(outer_header->getId());
		ballot_id = phi->getResultId();
		add_instruction(return_block, std::move(phi));
	}

	builder.makeReturn(false, ballot_id);
	builder.setBuildPoint(current_build_point);

	wave_match_call_ids.emplace_back(type_id, func->getId());
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_quad_vote(SPIRVModule &module, HelperCall call)
{
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, bool_type,
	                                       call == HelperCall::QuadAll ? "QuadAll" : "QuadAny",
	                                       { bool_type }, {}, &entry);

	builder.setBuildPoint(entry);
	spv::Id ids[4];
	for (unsigned i = 0; i < 4; i++)
	{
		auto broadcast = std::make_unique<spv::Instruction>(
				builder.getUniqueId(), bool_type, spv::OpGroupNonUniformQuadBroadcast);
		broadcast->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		broadcast->addIdOperand(func->getParamId(0));
		broadcast->addIdOperand(builder.makeUintConstant(i));
		ids[i] = broadcast->getResultId();
		add_instruction(entry, std::move(broadcast));
	}

	spv::Op op = call == HelperCall::QuadAll ? spv::OpLogicalAnd : spv::OpLogicalOr;

	spv::Id ret_id = ids[0];
	for (unsigned i = 1; i < 4; i++)
	{
		auto logic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, op);
		logic_op->addIdOperand(ret_id);
		logic_op->addIdOperand(ids[i]);
		ret_id = logic_op->getResultId();
		add_instruction(entry, std::move(logic_op));
	}

	builder.addCapability(spv::CapabilityGroupNonUniformQuad);
	builder.makeReturn(false, ret_id);
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_quad_all(SPIRVModule &module)
{
	if (quad_all_call_id)
		return quad_all_call_id;
	quad_all_call_id = build_quad_vote(module, HelperCall::QuadAll);
	return quad_all_call_id;
}

spv::Id SPIRVModule::Impl::build_quad_any(SPIRVModule &module)
{
	if (quad_any_call_id)
		return quad_any_call_id;
	quad_any_call_id = build_quad_vote(module, HelperCall::QuadAny);
	return quad_any_call_id;
}

spv::Id SPIRVModule::Impl::build_image_atomic_r64_compact(
	SPIRVModule &module, bool array, bool non_uniform)
{
	spv::Id *call_id;
	const char *name;
	if (array)
	{
		if (non_uniform)
		{
			call_id = &image_r64_array_atomic_non_uniform_call_id;
			name = "WriteFeedbackArrayNonUniform";
		}
		else
		{
			call_id = &image_r64_array_atomic_call_id;
			name = "WriteFeedbackArray";
		}
	}
	else
	{
		if (non_uniform)
		{
			call_id = &image_r64_atomic_non_uniform_call_id;
			name = "WriteFeedbackNonUniform";
		}
		else
		{
			call_id = &image_r64_atomic_call_id;
			name = "WriteFeedback";
		}
	}

	if (*call_id)
		return *call_id;

	builder.addCapability(spv::CapabilityGroupNonUniform);
	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);
	auto *current_build_point = builder.getBuildPoint();

	spv::Id image_type = builder.makeImageType(builder.makeUintType(64),
	                                           spv::Dim2D, false, array, false, 2, spv::ImageFormatR64ui);
	spv::Id ptr_image_type = builder.makePointer(spv::StorageClassUniformConstant, image_type);

	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id int_type = builder.makeIntType(32);
	spv::Id coord_type = builder.makeVectorType(int_type, array ? 3 : 2);
	spv::Id bvec_type = builder.makeVectorType(bool_type, array ? 3 : 2);
	spv::Id u64_type = builder.makeUintType(64);
	spv::Id ptr_atomic_type = builder.makePointer(spv::StorageClassImage, u64_type);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(), name,
	                                       { ptr_image_type, coord_type, u64_type, bool_type }, {}, &entry);

	spv::Id image_ptr = func->getParamId(0);
	spv::Id coord = func->getParamId(1);
	spv::Id value = func->getParamId(2);
	spv::Id active = func->getParamId(3);

	builder.addName(image_ptr, "img");
	builder.addName(coord, "coord");
	builder.addName(value, "value");
	builder.addName(active, "active_lane");

	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *loop_merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *loop_body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *loop_continue_block = new spv::Block(builder.getUniqueId(), *func);
	auto *write_block = new spv::Block(builder.getUniqueId(), *func);
	auto *write_merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *elect_block = new spv::Block(builder.getUniqueId(), *func);
	auto *elect_merge_block = new spv::Block(builder.getUniqueId(), *func);

	builder.setBuildPoint(entry);
	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(active, body_block, merge_block);

	spv::Id var_is_done = builder.createVariableWithInitializer(spv::StorageClassFunction, bool_type,
	                                                            builder.makeBoolConstant(false), "is_done");

	builder.setBuildPoint(body_block);
	{
		auto load = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLoad);
		auto inot = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLogicalNot);
		spv::Id inot_id = inot->getResultId();
		load->addIdOperand(var_is_done);
		inot->addIdOperand(load->getResultId());
		add_instruction(body_block, std::move(load));
		add_instruction(body_block, std::move(inot));
		builder.createLoopMerge(loop_merge_block, loop_continue_block, 0);
		builder.createConditionalBranch(inot_id, loop_body_block, loop_merge_block);

		builder.setBuildPoint(loop_body_block);
		{
			auto first = std::make_unique<spv::Instruction>(builder.getUniqueId(), coord_type, spv::OpGroupNonUniformBroadcastFirst);
			auto compare_coord = std::make_unique<spv::Instruction>(builder.getUniqueId(), bvec_type, spv::OpIEqual);
			auto all_equal = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpAll);
			auto store = std::make_unique<spv::Instruction>(spv::OpStore);

			first->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
			first->addIdOperand(coord);
			compare_coord->addIdOperand(coord);
			compare_coord->addIdOperand(first->getResultId());
			all_equal->addIdOperand(compare_coord->getResultId());
			store->addIdOperand(var_is_done);
			store->addIdOperand(all_equal->getResultId());

			spv::Id cond_id = all_equal->getResultId();
			add_instruction(loop_body_block, std::move(first));
			add_instruction(loop_body_block, std::move(compare_coord));
			add_instruction(loop_body_block, std::move(all_equal));
			add_instruction(loop_body_block, std::move(store));

			builder.createSelectionMerge(write_merge_block, 0);
			builder.createConditionalBranch(cond_id, write_block, write_merge_block);
			builder.setBuildPoint(write_block);
			{
				auto or_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), u64_type, spv::OpGroupNonUniformBitwiseOr);
				auto elect = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpGroupNonUniformElect);
				or_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
				or_op->addImmediateOperand(spv::GroupOperationReduce);
				or_op->addIdOperand(value);
				elect->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
				spv::Id elect_id = elect->getResultId();
				spv::Id or_op_id = or_op->getResultId();
				add_instruction(write_block, std::move(or_op));
				add_instruction(write_block, std::move(elect));

				builder.createSelectionMerge(elect_merge_block, 0);
				builder.createConditionalBranch(elect_id, elect_block, elect_merge_block);
				builder.setBuildPoint(elect_block);
				{
					auto texel = std::make_unique<spv::Instruction>(builder.getUniqueId(), ptr_atomic_type, spv::OpImageTexelPointer);
					if (non_uniform)
						builder.addDecoration(texel->getResultId(), spv::DecorationNonUniform);
					auto atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), u64_type, spv::OpAtomicOr);
					texel->addIdOperand(image_ptr);
					texel->addIdOperand(coord);
					texel->addIdOperand(builder.makeIntConstant(0));
					atomic_op->addIdOperand(texel->getResultId());
					atomic_op->addIdOperand(builder.getAtomicDeviceScopeId());
					atomic_op->addIdOperand(builder.makeUintConstant(0));
					atomic_op->addIdOperand(or_op_id);
					add_instruction(elect_block, std::move(texel));
					add_instruction(elect_block, std::move(atomic_op));
					builder.createBranch(elect_merge_block);
				}

				builder.setBuildPoint(elect_merge_block);
				builder.createBranch(write_merge_block);
			}

			builder.setBuildPoint(write_merge_block);
			builder.createBranch(loop_continue_block);

			builder.setBuildPoint(loop_continue_block);
			builder.createBranch(body_block);
		}

		builder.setBuildPoint(loop_merge_block);
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	builder.makeReturn(false);

	builder.addCapability(spv::CapabilityInt64Atomics);

	builder.setBuildPoint(current_build_point);
	*call_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_node_coalesce_payload_offset(
    SPIRVModule &module, const spv::Id *ids, uint32_t id_count)
{
	if (id_count != 3)
		return 0;

	if (node_coalesce_payload_offset_call_id)
		return node_coalesce_payload_offset_call_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uint_type,
	                                       "NodeCoalescePayloadOffset",
	                                       { uint_type, uvec2_type },
	                                       {}, &entry);

	builder.addName(func->getParamId(0), "offset");
	builder.addName(func->getParamId(1), "offsets_bda");

	auto *is_entry_block = new spv::Block(builder.getUniqueId(), *func);
	auto *is_node_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge = new spv::Block(builder.getUniqueId(), *func);

	spv::Id is_entry_id = ids[0];
	spv::Id private_stride_var_id = ids[1];
	spv::Id u32_array_ptr_type_id = ids[2];
	spv::Id entry_result_id = 0;
	spv::Id node_result_id = 0;

	builder.setBuildPoint(entry);
	builder.createSelectionMerge(merge, 0);
	builder.createConditionalBranch(is_entry_id, is_entry_block, is_node_block);

	builder.setBuildPoint(is_entry_block);
	{
		auto load = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(), uint_type, spv::OpLoad);
		auto mul = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(), uint_type, spv::OpIMul);

		load->addIdOperand(private_stride_var_id);
		mul->addIdOperand(load->getResultId());
		mul->addIdOperand(func->getParamId(0));

		entry_result_id = mul->getResultId();
		add_instruction(is_entry_block, std::move(load));
		add_instruction(is_entry_block, std::move(mul));
		builder.createBranch(merge);
	}

	builder.setBuildPoint(is_node_block);
	{
		auto bitcast = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(), u32_array_ptr_type_id, spv::OpBitcast);
		auto chain = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(), builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
		    spv::OpInBoundsAccessChain);
		auto load = std::make_unique<spv::Instruction>(
			builder.getUniqueId(), uint_type, spv::OpLoad);

		bitcast->addIdOperand(func->getParamId(1));
		chain->addIdOperand(bitcast->getResultId());
		chain->addIdOperand(builder.makeUintConstant(0));
		chain->addIdOperand(func->getParamId(0));
		load->addIdOperand(chain->getResultId());
		load->addImmediateOperand(spv::MemoryAccessAlignedMask);
		load->addImmediateOperand(sizeof(uint32_t));
		node_result_id = load->getResultId();
		add_instruction(is_node_block, std::move(bitcast));
		add_instruction(is_node_block, std::move(chain));
		add_instruction(is_node_block, std::move(load));
		builder.createBranch(merge);
	}

	builder.setBuildPoint(merge);

	auto phi = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpPhi);
	phi->addIdOperand(entry_result_id);
	phi->addIdOperand(is_entry_block->getId());
	phi->addIdOperand(node_result_id);
	phi->addIdOperand(is_node_block->getId());
	spv::Id return_value = phi->getResultId();
	add_instruction(merge, std::move(phi));
	builder.makeReturn(false, return_value);
	node_coalesce_payload_offset_call_id = func->getId();
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_allocate_node_records_waterfall(SPIRVModule &module)
{
	if (allocate_thread_node_records_waterfall_call_id)
		return allocate_thread_node_records_waterfall_call_id;

	spv::Id inner_call_id = module.get_helper_call_id(HelperCall::AllocateThreadNodeRecords);

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uint64_type = builder.makeUintType(64);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uint_type,
	                                       "AllocateThreadNodeRecordsWaterfall",
	                                       { uint64_type, uint_type, uint_type, uint_type, uint_type },
	                                       {}, &entry);

	builder.addName(func->getParamId(0), "AtomicCountersBDA");
	builder.addName(func->getParamId(1), "NodeMetadataIndex");
	builder.addName(func->getParamId(2), "Count");
	builder.addName(func->getParamId(3), "Stride");
	builder.addName(func->getParamId(4), "AllocationOffset");

	auto *loop_header = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *body_header = new spv::Block(builder.getUniqueId(), *func);
	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *continue_block = new spv::Block(builder.getUniqueId(), *func);
	auto *body_merge = new spv::Block(builder.getUniqueId(), *func);

	builder.setBuildPoint(entry);
	builder.createBranch(loop_header);

	builder.setBuildPoint(loop_header);
	builder.createLoopMerge(merge_block, continue_block, 0);
	builder.createBranch(body_header);

	builder.setBuildPoint(body_header);
	auto broadcast = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpGroupNonUniformBroadcastFirst);
	broadcast->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	broadcast->addIdOperand(func->getParamId(1));
	spv::Id broadcast_id = broadcast->getResultId();

	auto do_work = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIEqual);
	do_work->addIdOperand(broadcast->getResultId());
	do_work->addIdOperand(func->getParamId(1));
	spv::Id do_work_cond_id = do_work->getResultId();

	add_instruction(body_header, std::move(broadcast));
	add_instruction(body_header, std::move(do_work));
	builder.createSelectionMerge(body_merge, 0);
	builder.createConditionalBranch(do_work_cond_id, body_block, body_merge);

	builder.setBuildPoint(body_block);
	spv::Id return_value;
	{
		auto call_inner = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpFunctionCall);
		call_inner->addIdOperand(inner_call_id);
		call_inner->addIdOperand(func->getParamId(0));
		call_inner->addIdOperand(broadcast_id);
		call_inner->addIdOperand(func->getParamId(2));
		call_inner->addIdOperand(func->getParamId(3));
		call_inner->addIdOperand(func->getParamId(4));

		return_value = call_inner->getResultId();
		add_instruction(body_block, std::move(call_inner));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(body_merge);
	builder.createBranch(continue_block);
	builder.setBuildPoint(continue_block);
	builder.createBranch(loop_header);

	builder.setBuildPoint(merge_block);
	builder.makeReturn(false, return_value);
	builder.setBuildPoint(current_build_point);
	allocate_thread_node_records_waterfall_call_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_increment_node_count(SPIRVModule &, bool per_thread)
{
	auto &call_id = per_thread ? increment_thread_node_count_call_id : increment_group_node_count_call_id;
	if (call_id)
		return call_id;
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uint64_type = builder.makeUintType(64);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(),
	                                       per_thread ? "IncrementThreadNodeCount" : "IncrementGroupNodeCount",
	                                       { uint64_type, uint_type, uint_type },
	                                       {}, &entry);

	builder.addName(func->getParamId(0), "AtomicCountersBDA");
	builder.addName(func->getParamId(1), "NodeMetadataIndex");
	builder.addName(func->getParamId(2), "Count");

	spv::Id total_count_id = func->getParamId(2);

	spv::Block *body_block = nullptr;
	spv::Block *merge_block = nullptr;

	if (!per_thread)
	{
		body_block = new spv::Block(builder.getUniqueId(), *func);
		merge_block = new spv::Block(builder.getUniqueId(), *func);
	}

	spv::Id uint_array_type = builder.makeRuntimeArray(uint_type);
	builder.addDecoration(uint_array_type, spv::DecorationArrayStride, 4);
	spv::Id struct_type_id = builder.makeStructType({ uint_type, uint_type, uint_array_type }, "NodeAtomicsEmpty");
	builder.addDecoration(struct_type_id, spv::DecorationBlock);
	builder.addMemberName(struct_type_id, 0, "payloadCount");
	builder.addMemberName(struct_type_id, 1, "fusedCount");
	builder.addMemberName(struct_type_id, 2, "perNodeTotal");
	builder.addMemberDecoration(struct_type_id, 0, spv::DecorationOffset, 0);
	builder.addMemberDecoration(struct_type_id, 1, spv::DecorationOffset, 4);
	builder.addMemberDecoration(struct_type_id, 2, spv::DecorationOffset, 8);

	auto cast_op = std::make_unique<spv::Instruction>(
	    builder.getUniqueId(),
	    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, struct_type_id),
	    spv::OpBitcast);
	cast_op->addIdOperand(func->getParamId(0));

	auto chain_index_op = std::make_unique<spv::Instruction>(
	    builder.getUniqueId(),
	    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
	    spv::OpInBoundsAccessChain);
	chain_index_op->addIdOperand(cast_op->getResultId());
	chain_index_op->addIdOperand(builder.makeUintConstant(2));
	chain_index_op->addIdOperand(func->getParamId(1));

	auto atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpAtomicIAdd);
	atomic_op->addIdOperand(chain_index_op->getResultId());
	atomic_op->addIdOperand(builder.getAtomicDeviceScopeId());
	atomic_op->addIdOperand(builder.makeUintConstant(0)); // There is no implied sync.
	atomic_op->addIdOperand(total_count_id);

	auto *body = per_thread ? entry : body_block;
	add_instruction(body, std::move(cast_op));
	add_instruction(body, std::move(chain_index_op));
	add_instruction(body, std::move(atomic_op));

	if (!per_thread)
	{
		spv::Id local_invocation_index = get_builtin_shader_input(spv::BuiltInLocalInvocationIndex);
		auto load_local_index = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpLoad);
		load_local_index->addIdOperand(local_invocation_index);

		auto is_first_lane = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIEqual);
		is_first_lane->addIdOperand(load_local_index->getResultId());
		is_first_lane->addIdOperand(builder.makeUintConstant(0));
		spv::Id is_first_lane_id = is_first_lane->getResultId();
		add_instruction(entry, std::move(load_local_index));
		add_instruction(entry, std::move(is_first_lane));

		builder.setBuildPoint(entry);
		builder.createSelectionMerge(merge_block, 0);
		builder.createConditionalBranch(is_first_lane_id, body_block, merge_block);

		builder.setBuildPoint(body_block);
		builder.createBranch(merge_block);
		builder.setBuildPoint(merge_block);
		builder.makeReturn(false);
	}
	else
	{
		builder.setBuildPoint(entry);
		builder.makeReturn(false);
	}

	builder.setBuildPoint(current_build_point);
	call_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_allocate_node_records(SPIRVModule &, bool per_thread)
{
	auto &call_id = per_thread ? allocate_thread_node_records_call_id : allocate_group_node_records_call_id;
	if (call_id)
		return call_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uint64_type = builder.makeUintType(64);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uint_type,
	                                       per_thread ? "AllocateThreadNodeRecords" : "AllocateGroupNodeRecords",
	                                       { uint64_type, uint_type, uint_type, uint_type, uint_type },
	                                       {}, &entry);

	builder.addName(func->getParamId(0), "AtomicCountersBDA");
	builder.addName(func->getParamId(1), "NodeMetadataIndex");
	builder.addName(func->getParamId(2), "Count");
	builder.addName(func->getParamId(3), "Stride");
	builder.addName(func->getParamId(4), "AllocationOffset");

	spv::Id total_count_id;
	spv::Id phi_undefined_id = 0;

	if (per_thread)
	{
		// Do this on a per-wave basis. The node metadata index can be divergent, and we need to consider
		// that the outer function is a waterfall loop.
		builder.addCapability(spv::CapabilityGroupNonUniformBallot);
		builder.addCapability(spv::CapabilityGroupNonUniformArithmetic);

		auto counts_total =
		    std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpGroupNonUniformIAdd);
		counts_total->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		counts_total->addImmediateOperand(spv::GroupOperationReduce);
		counts_total->addIdOperand(func->getParamId(2));

		total_count_id = counts_total->getResultId();
		add_instruction(entry, std::move(counts_total));

		phi_undefined_id = builder.createUndefined(uint_type);
	}
	else
	{
		total_count_id = func->getParamId(2);
	}

	auto *return_block = new spv::Block(builder.getUniqueId(), *func);
	auto check_empty = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIEqual);
	check_empty->addIdOperand(total_count_id);
	check_empty->addIdOperand(builder.makeUintConstant(0));
	spv::Id return_early_cond_id = check_empty->getResultId();
	add_instruction(entry, std::move(check_empty));

	auto *reconverge_block = new spv::Block(builder.getUniqueId(), *func);
	builder.setBuildPoint(entry);
	builder.createSelectionMerge(reconverge_block, 0);
	builder.createConditionalBranch(return_early_cond_id, return_block, reconverge_block);
	builder.setBuildPoint(return_block);
	builder.makeReturn(false, builder.makeUintConstant(0));

	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);

	spv::Id shared_id = 0;
	if (!per_thread)
		shared_id = create_variable(spv::StorageClassWorkgroup, uint_type, "AllocateGroupNodeRecordsShared");

	spv::Id is_first_lane_id;

	if (per_thread)
	{
		auto is_first_lane = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpGroupNonUniformElect);
		is_first_lane->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		is_first_lane_id = is_first_lane->getResultId();
		add_instruction(reconverge_block, std::move(is_first_lane));
	}
	else
	{
		spv::Id local_invocation_index = get_builtin_shader_input(spv::BuiltInLocalInvocationIndex);
		auto load_local_index = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpLoad);
		load_local_index->addIdOperand(local_invocation_index);

		auto is_first_lane = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIEqual);
		is_first_lane->addIdOperand(load_local_index->getResultId());
		is_first_lane->addIdOperand(builder.makeUintConstant(0));
		is_first_lane_id = is_first_lane->getResultId();
		add_instruction(reconverge_block, std::move(load_local_index));
		add_instruction(reconverge_block, std::move(is_first_lane));
	}

	builder.setBuildPoint(reconverge_block);
	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(is_first_lane_id, body_block, merge_block);

	spv::Id payload_offset_id;
	builder.setBuildPoint(body_block);
	{
		// Compute required payload bytes to allocate, and align.
		auto mul_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIMul);
		mul_op->addIdOperand(total_count_id);
		mul_op->addIdOperand(func->getParamId(3));

		auto add_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIAdd);
		add_op->addIdOperand(mul_op->getResultId());
		add_op->addIdOperand(builder.makeUintConstant(15));

		auto and_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpBitwiseAnd);
		and_op->addIdOperand(add_op->getResultId());
		and_op->addIdOperand(builder.makeUintConstant(~15u));

		spv::Id uint_array_type = builder.makeRuntimeArray(uint_type);
		builder.addDecoration(uint_array_type, spv::DecorationArrayStride, 4);
		spv::Id struct_type_id = builder.makeStructType({ uint_type, uint_type, uint_array_type }, "NodeAtomics");
		builder.addDecoration(struct_type_id, spv::DecorationBlock);
		builder.addMemberName(struct_type_id, 0, "payloadCount");
		builder.addMemberName(struct_type_id, 1, "fusedCount");
		builder.addMemberName(struct_type_id, 2, "perNodeTotal");
		builder.addMemberDecoration(struct_type_id, 0, spv::DecorationOffset, 0);
		builder.addMemberDecoration(struct_type_id, 1, spv::DecorationOffset, 4);
		builder.addMemberDecoration(struct_type_id, 2, spv::DecorationOffset, 8);

		auto cast_op = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(),
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, struct_type_id),
		    spv::OpBitcast);
		cast_op->addIdOperand(func->getParamId(0));

		auto chain_payload_total_op = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(),
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
		    spv::OpInBoundsAccessChain);
		chain_payload_total_op->addIdOperand(cast_op->getResultId());
		chain_payload_total_op->addIdOperand(builder.makeUintConstant(0));

		auto chain_fused_total_op = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(),
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
		    spv::OpInBoundsAccessChain);
		chain_fused_total_op->addIdOperand(cast_op->getResultId());
		chain_fused_total_op->addIdOperand(builder.makeUintConstant(1));

		auto payload_total_atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpAtomicIAdd);
		payload_total_atomic_op->addIdOperand(chain_payload_total_op->getResultId());
		payload_total_atomic_op->addIdOperand(builder.getAtomicDeviceScopeId());
		payload_total_atomic_op->addIdOperand(builder.makeUintConstant(0)); // There is no implied sync.
		payload_total_atomic_op->addIdOperand(and_op->getResultId());

		spv::Id node_index = func->getParamId(1);

		auto chain_node_total_op = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(),
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
		    spv::OpInBoundsAccessChain);
		chain_node_total_op->addIdOperand(cast_op->getResultId());
		chain_node_total_op->addIdOperand(builder.makeUintConstant(2));
		chain_node_total_op->addIdOperand(node_index);

		auto atomic_total_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpAtomicIAdd);
		atomic_total_op->addIdOperand(chain_node_total_op->getResultId());
		atomic_total_op->addIdOperand(builder.getAtomicDeviceScopeId());
		atomic_total_op->addIdOperand(builder.makeUintConstant(0)); // There is no implied sync.
		atomic_total_op->addIdOperand(total_count_id);

		auto shift_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpShiftLeftLogical);
		shift_op->addIdOperand(node_index);
		shift_op->addIdOperand(builder.makeUintConstant(8));

		auto count_minus_1 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpISub);
		count_minus_1->addIdOperand(total_count_id);
		count_minus_1->addIdOperand(builder.makeUintConstant(1));

		auto or_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpBitwiseOr);
		or_op->addIdOperand(shift_op->getResultId());
		or_op->addIdOperand(count_minus_1->getResultId());

		auto fused_atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpAtomicIIncrement);
		fused_atomic_op->addIdOperand(chain_fused_total_op->getResultId());
		fused_atomic_op->addIdOperand(builder.getAtomicDeviceScopeId());
		fused_atomic_op->addIdOperand(builder.makeUintConstant(0)); // There is no implied sync.

		auto fused_mul2 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIMul);
		fused_mul2->addIdOperand(builder.makeUintConstant(2));
		fused_mul2->addIdOperand(fused_atomic_op->getResultId());

		auto payload_add_atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIAdd);
		payload_add_atomic_op->addIdOperand(func->getParamId(4));
		payload_add_atomic_op->addIdOperand(fused_mul2->getResultId());

		auto payload_add_atomic_op_plus_1 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIAdd);
		payload_add_atomic_op_plus_1->addIdOperand(payload_add_atomic_op->getResultId());
		payload_add_atomic_op_plus_1->addIdOperand(builder.makeUintConstant(1));

		auto chain_payload_op = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(),
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
		    spv::OpInBoundsAccessChain);
		chain_payload_op->addIdOperand(cast_op->getResultId());
		chain_payload_op->addIdOperand(builder.makeUintConstant(2));
		chain_payload_op->addIdOperand(payload_add_atomic_op->getResultId());

		auto store_payload_inst = std::make_unique<spv::Instruction>(spv::OpStore);
		store_payload_inst->addIdOperand(chain_payload_op->getResultId());
		store_payload_inst->addIdOperand(or_op->getResultId());
		store_payload_inst->addImmediateOperand(spv::MemoryAccessAlignedMask);
		store_payload_inst->addImmediateOperand(8);

		auto chain_payload_op_plus_1 = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(),
		    builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
		    spv::OpInBoundsAccessChain);
		chain_payload_op_plus_1->addIdOperand(cast_op->getResultId());
		chain_payload_op_plus_1->addIdOperand(builder.makeUintConstant(2));
		chain_payload_op_plus_1->addIdOperand(payload_add_atomic_op_plus_1->getResultId());

		auto store_payload_inst_plus_1 = std::make_unique<spv::Instruction>(spv::OpStore);
		store_payload_inst_plus_1->addIdOperand(chain_payload_op_plus_1->getResultId());
		store_payload_inst_plus_1->addIdOperand(payload_total_atomic_op->getResultId());
		store_payload_inst_plus_1->addImmediateOperand(spv::MemoryAccessAlignedMask);
		store_payload_inst_plus_1->addImmediateOperand(4);

		payload_offset_id = payload_total_atomic_op->getResultId();

		add_instruction(body_block, std::move(mul_op));
		add_instruction(body_block, std::move(add_op));
		add_instruction(body_block, std::move(and_op));
		add_instruction(body_block, std::move(cast_op));
		add_instruction(body_block, std::move(chain_payload_total_op));
		add_instruction(body_block, std::move(chain_fused_total_op));
		add_instruction(body_block, std::move(payload_total_atomic_op));
		add_instruction(body_block, std::move(chain_node_total_op));
		add_instruction(body_block, std::move(shift_op));
		add_instruction(body_block, std::move(count_minus_1));
		add_instruction(body_block, std::move(or_op));
		add_instruction(body_block, std::move(fused_atomic_op));
		add_instruction(body_block, std::move(fused_mul2));
		add_instruction(body_block, std::move(atomic_total_op));
		add_instruction(body_block, std::move(payload_add_atomic_op));
		add_instruction(body_block, std::move(payload_add_atomic_op_plus_1));
		add_instruction(body_block, std::move(chain_payload_op));
		add_instruction(body_block, std::move(store_payload_inst));
		add_instruction(body_block, std::move(chain_payload_op_plus_1));
		add_instruction(body_block, std::move(store_payload_inst_plus_1));

		if (!per_thread)
		{
			auto store_inst = std::make_unique<spv::Instruction>(spv::OpStore);
			store_inst->addIdOperand(shared_id);
			store_inst->addIdOperand(payload_offset_id);
			if (builder.hasCapability(spv::CapabilityVulkanMemoryModel))
				store_inst->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask);
			add_instruction(body_block, std::move(store_inst));
		}

		builder.createBranch(merge_block);
	}
	builder.setBuildPoint(merge_block);

	if (!per_thread)
	{
		auto barrier_op = std::make_unique<spv::Instruction>(spv::OpControlBarrier);
		barrier_op->addIdOperand(builder.makeUintConstant(spv::ScopeWorkgroup));
		barrier_op->addIdOperand(builder.makeUintConstant(spv::ScopeWorkgroup));
		barrier_op->addIdOperand(builder.getWorkgroupBarrierSemanticsId());
		add_instruction(merge_block, std::move(barrier_op));
	}

	spv::Id return_value;

	if (per_thread)
	{
		auto counts_offsets =
		    std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpGroupNonUniformIAdd);
		counts_offsets->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		counts_offsets->addImmediateOperand(spv::GroupOperationExclusiveScan);
		counts_offsets->addIdOperand(func->getParamId(2));

		auto phi_node = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpPhi);
		phi_node->addIdOperand(payload_offset_id);
		phi_node->addIdOperand(body_block->getId());
		phi_node->addIdOperand(phi_undefined_id);
		phi_node->addIdOperand(reconverge_block->getId());

		auto broadcast_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpGroupNonUniformBroadcastFirst);
		broadcast_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
		broadcast_op->addIdOperand(phi_node->getResultId());

		auto mul_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIMul);
		mul_op->addIdOperand(counts_offsets->getResultId());
		mul_op->addIdOperand(func->getParamId(3));

		auto add_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIAdd);
		add_op->addIdOperand(broadcast_op->getResultId());
		add_op->addIdOperand(mul_op->getResultId());

		return_value = add_op->getResultId();

		add_instruction(merge_block, std::move(phi_node));
		add_instruction(merge_block, std::move(broadcast_op));
		add_instruction(merge_block, std::move(counts_offsets));
		add_instruction(merge_block, std::move(mul_op));
		add_instruction(merge_block, std::move(add_op));
	}
	else
	{
		auto load_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpLoad);
		load_op->addIdOperand(shared_id);
		if (builder.hasCapability(spv::CapabilityVulkanMemoryModel))
			load_op->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask);
		return_value = load_op->getResultId();
		add_instruction(merge_block, std::move(load_op));
	}

	if (!per_thread)
	{
		// Avoid WAR hazard for back-to-back allocations.
		auto barrier_op = std::make_unique<spv::Instruction>(spv::OpControlBarrier);
		barrier_op->addIdOperand(builder.makeUintConstant(spv::ScopeWorkgroup));
		barrier_op->addIdOperand(builder.makeUintConstant(spv::ScopeWorkgroup));
		barrier_op->addIdOperand(builder.makeUintConstant(
		    spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsWorkgroupMemoryMask));
		add_instruction(merge_block, std::move(barrier_op));
	}

	builder.makeReturn(false, return_value);
	builder.setBuildPoint(current_build_point);
	call_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_finish_cross_group_sharing(SPIRVModule &module)
{
	if (finish_cross_group_sharing_call_id)
		return finish_cross_group_sharing_call_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uint64_type = builder.makeUintType(64);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, bool_type,
	                                       "FinishCrossGroupSharing",
	                                       { uint64_type }, {}, &entry);

	builder.addName(func->getParamId(0), "CounterBDA");

	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);

	spv::Id local_invocation_index = get_builtin_shader_input(spv::BuiltInLocalInvocationIndex);
	spv::Id shared_id = create_variable(spv::StorageClassWorkgroup, bool_type, "FinishCrossGroupSharingBroadcast");

	auto load_local_index = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpLoad);
	load_local_index->addIdOperand(local_invocation_index);

	auto is_first_lane = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIEqual);
	is_first_lane->addIdOperand(load_local_index->getResultId());
	is_first_lane->addIdOperand(builder.makeUintConstant(0));
	spv::Id cond_id = is_first_lane->getResultId();

	add_instruction(entry, std::move(load_local_index));
	add_instruction(entry, std::move(is_first_lane));
	builder.setBuildPoint(entry);
	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(cond_id, body_block, merge_block);

	builder.setBuildPoint(body_block);
	{
		auto cast_op = std::make_unique<spv::Instruction>(
			builder.getUniqueId(),
			builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type),
			spv::OpBitcast);
		cast_op->addIdOperand(func->getParamId(0));

		auto atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpAtomicIDecrement);
		atomic_op->addIdOperand(cast_op->getResultId());
		atomic_op->addIdOperand(builder.getAtomicDeviceScopeId());
		atomic_op->addIdOperand(builder.makeUintConstant(0)); // There is no implied sync.

		auto is_last_wg = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIEqual);
		is_last_wg->addIdOperand(atomic_op->getResultId());
		is_last_wg->addIdOperand(builder.makeUintConstant(1));

		auto store_inst = std::make_unique<spv::Instruction>(spv::OpStore);
		store_inst->addIdOperand(shared_id);
		store_inst->addIdOperand(is_last_wg->getResultId());
		if (builder.hasCapability(spv::CapabilityVulkanMemoryModel))
			store_inst->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask);

		add_instruction(body_block, std::move(cast_op));
		add_instruction(body_block, std::move(atomic_op));
		add_instruction(body_block, std::move(is_last_wg));
		add_instruction(body_block, std::move(store_inst));
		builder.createBranch(merge_block);
	}
	builder.setBuildPoint(merge_block);

	// We don't need double barrier since FinishGroupSharing can only be called once.
	// There is no risk of WAR hazard on the same shared memory.
	auto barrier_op = std::make_unique<spv::Instruction>(spv::OpControlBarrier);
	barrier_op->addIdOperand(builder.makeUintConstant(spv::ScopeWorkgroup));
	barrier_op->addIdOperand(builder.makeUintConstant(spv::ScopeWorkgroup));
	barrier_op->addIdOperand(builder.getWorkgroupBarrierSemanticsId());

	auto load_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLoad);
	load_op->addIdOperand(shared_id);
	if (builder.hasCapability(spv::CapabilityVulkanMemoryModel))
		load_op->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask);

	spv::Id return_value = load_op->getResultId();
	add_instruction(merge_block, std::move(barrier_op));
	add_instruction(merge_block, std::move(load_op));

	builder.makeReturn(false, return_value);
	builder.setBuildPoint(current_build_point);
	finish_cross_group_sharing_call_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_robust_atomic_counter_op(SPIRVModule &module)
{
	if (robust_atomic_counter_call_id)
		return robust_atomic_counter_call_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bda_type = builder.makeVectorType(uint_type, 2);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uint_type,
	                                       "RobustPhysicalAtomicCounter",
	                                       { bda_type, uint_type, uint_type }, {}, &entry);

	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);

	spv::Id bool_type = builder.makeBoolType();
	spv::Id bvec2_type = builder.makeVectorType(bool_type, 2);

	spv::Id null_bda = builder.makeNullConstant(bda_type);
	auto compare = std::make_unique<spv::Instruction>(builder.getUniqueId(), bvec2_type, spv::OpINotEqual);
	compare->addIdOperand(func->getParamId(0));
	compare->addIdOperand(null_bda);
	auto not_zero = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpAny);
	not_zero->addIdOperand(compare->getResultId());
	spv::Id cond_id = not_zero->getResultId();
	add_instruction(entry, std::move(compare));
	add_instruction(entry, std::move(not_zero));
	builder.setBuildPoint(entry);
	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(cond_id, body_block, merge_block);

	spv::Id loaded_id;
	{
		builder.setBuildPoint(body_block);
		spv::Id uint_ptr_type = builder.makePointer(spv::StorageClassPhysicalStorageBuffer, uint_type);
		auto bitcast_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_ptr_type, spv::OpBitcast);
		bitcast_op->addIdOperand(func->getParamId(0));
		auto atomic_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpAtomicIAdd);
		atomic_op->addIdOperand(bitcast_op->getResultId());
		atomic_op->addIdOperand(builder.getAtomicDeviceScopeId());
		atomic_op->addIdOperand(builder.makeUintConstant(0));
		atomic_op->addIdOperand(func->getParamId(1));
		auto add_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpIAdd);
		add_op->addIdOperand(atomic_op->getResultId());
		add_op->addIdOperand(func->getParamId(2));
		loaded_id = add_op->getResultId();
		add_instruction(body_block, std::move(bitcast_op));
		add_instruction(body_block, std::move(atomic_op));
		add_instruction(body_block, std::move(add_op));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	auto phi_op = std::make_unique<spv::Instruction>(
	    builder.getUniqueId(), uint_type, spv::OpPhi);
	phi_op->addIdOperand(builder.makeUintConstant(0));
	phi_op->addIdOperand(entry->getId());
	phi_op->addIdOperand(loaded_id);
	phi_op->addIdOperand(body_block->getId());
	spv::Id return_value = phi_op->getResultId();
	add_instruction(merge_block, std::move(phi_op));
	builder.makeReturn(false, return_value);

	builder.setBuildPoint(current_build_point);
	robust_atomic_counter_call_id = func->getId();
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_robust_physical_cbv_load(SPIRVModule &module, spv::Id type_id, spv::Id ptr_type_id,
                                                          unsigned alignment)
{
	for (auto &func : physical_cbv_call_ids)
		if (func.ptr_type_id == ptr_type_id && func.type_id == type_id && func.alignment == alignment)
			return func.func_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bda_type = builder.makeVectorType(uint_type, 2);
	auto *func = builder.makeFunctionEntry(spv::NoPrecision, type_id,
										   "RobustPhysicalCBVLoad",
										   { bda_type, uint_type }, {}, &entry);

	spv::Id bda_value_id = func->getParamId(0);
	spv::Id index_id = func->getParamId(1);

	auto *body_block = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);

	builder.setBuildPoint(entry);
	auto compare = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeBoolType(), spv::OpULessThan);
	compare->addIdOperand(index_id);
	compare->addIdOperand(builder.makeUintConstant(64 * 1024 / alignment));
	spv::Id compare_id = compare->getResultId();
	add_instruction(entry, std::move(compare));
	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(compare_id, body_block, merge_block);

	spv::Id loaded_id;
	{
		builder.setBuildPoint(body_block);
		auto bitcast_op = std::make_unique<spv::Instruction>(
				builder.getUniqueId(), ptr_type_id, spv::OpBitcast);
		auto chain_op = std::make_unique<spv::Instruction>(
				builder.getUniqueId(),
				builder.makePointer(spv::StorageClassPhysicalStorageBuffer, type_id),
				spv::OpInBoundsAccessChain);
		auto load_op = std::make_unique<spv::Instruction>(
				builder.getUniqueId(), type_id, spv::OpLoad);
		bitcast_op->addIdOperand(bda_value_id);
		chain_op->addIdOperand(bitcast_op->getResultId());
		chain_op->addIdOperand(builder.makeUintConstant(0));
		chain_op->addIdOperand(index_id);
		load_op->addIdOperand(chain_op->getResultId());
		load_op->addImmediateOperand(spv::MemoryAccessAlignedMask);
		load_op->addImmediateOperand(alignment);
		loaded_id = load_op->getResultId();
		add_instruction(body_block, std::move(bitcast_op));
		add_instruction(body_block, std::move(chain_op));
		add_instruction(body_block, std::move(load_op));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	auto phi_op = std::make_unique<spv::Instruction>(
			builder.getUniqueId(), type_id, spv::OpPhi);
	phi_op->addIdOperand(builder.makeNullConstant(type_id));
	phi_op->addIdOperand(entry->getId());
	phi_op->addIdOperand(loaded_id);
	phi_op->addIdOperand(body_block->getId());
	spv::Id return_value = phi_op->getResultId();
	add_instruction(merge_block, std::move(phi_op));
	builder.makeReturn(false, return_value);

	builder.setBuildPoint(current_build_point);
	physical_cbv_call_ids.push_back({ type_id, ptr_type_id, alignment, func->getId() });
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_is_quad_uniform_control_flow(SPIRVModule &module)
{
	if (is_quad_uniform_call_id)
		return is_quad_uniform_call_id;

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec4_type = builder.makeVectorType(uint_type, 4);
	spv::Id bool_type = builder.makeBoolType();

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, bool_type,
	                                       "IsQuadUniformControlFlow",
	                                       {}, {}, &entry);

	auto ballot_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpGroupNonUniformBallot);
	ballot_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	ballot_op->addIdOperand(builder.makeBoolConstant(true));

	spv::Id uint_2 = builder.makeUintConstant(2);
	spv::Id uint_1 = builder.makeUintConstant(1);
	uint_2 = builder.makeCompositeConstant(uvec4_type, { uint_2, uint_2, uint_2, uint_2 });
	uint_1 = builder.makeCompositeConstant(uvec4_type, { uint_1, uint_1, uint_1, uint_1 });

	auto shift_2 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpShiftRightLogical);
	shift_2->addIdOperand(ballot_op->getResultId());
	shift_2->addIdOperand(uint_2);
	auto and_2 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpBitwiseAnd);
	and_2->addIdOperand(ballot_op->getResultId());
	and_2->addIdOperand(shift_2->getResultId());
	auto shift_1 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpShiftRightLogical);
	shift_1->addIdOperand(and_2->getResultId());
	shift_1->addIdOperand(uint_1);
	auto and_1 = std::make_unique<spv::Instruction>(builder.getUniqueId(), uvec4_type, spv::OpBitwiseAnd);
	and_1->addIdOperand(and_2->getResultId());
	and_1->addIdOperand(shift_1->getResultId());

	auto load_invocation_id = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpLoad);
	load_invocation_id->addIdOperand(get_builtin_shader_input(spv::BuiltInSubgroupLocalInvocationId));

	auto and_invocation_id = std::make_unique<spv::Instruction>(builder.getUniqueId(), uint_type, spv::OpBitwiseAnd);
	and_invocation_id->addIdOperand(load_invocation_id->getResultId());
	and_invocation_id->addIdOperand(builder.makeUintConstant(~3u));

	auto extract_op = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpGroupNonUniformBallotBitExtract);
	extract_op->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
	extract_op->addIdOperand(and_1->getResultId());
	extract_op->addIdOperand(and_invocation_id->getResultId());
	spv::Id result_id = extract_op->getResultId();

	entry->addInstruction(std::move(ballot_op));
	entry->addInstruction(std::move(shift_2));
	entry->addInstruction(std::move(and_2));
	entry->addInstruction(std::move(shift_1));
	entry->addInstruction(std::move(and_1));
	entry->addInstruction(std::move(load_invocation_id));
	entry->addInstruction(std::move(and_invocation_id));
	entry->addInstruction(std::move(extract_op));

	builder.makeReturn(false, result_id);
	builder.setBuildPoint(current_build_point);
	builder.addCapability(spv::CapabilityGroupNonUniformBallot);
	builder.addCapability(spv::CapabilityGroupNonUniform);

	is_quad_uniform_call_id = func->getId();
	return is_quad_uniform_call_id;
}

spv::Id SPIRVModule::Impl::build_coop_mat_saturation_fixup(SPIRVModule &module, spv::Id type_id)
{
	for (auto &ops : coop_mat_conv_ids)
		if (ops.output_type == type_id && ops.input_type == type_id && ops.call == HelperCall::CoopMatSaturationFixup)
			return ops.func_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bool_type = builder.makeBoolType();

	// RADV workaround. It fails to understand coopmat passed as value.
	spv::Id input_ptr_type = builder.makePointer(spv::StorageClassFunction, type_id);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, type_id,
	                                       "CoopMatSaturationFixup",
	                                       { input_ptr_type }, {}, &entry);

	spv::Id output_id = create_variable(spv::StorageClassFunction, type_id, "coop_output");
	auto *len = builder.addInstruction(uint_type, spv::OpCooperativeMatrixLengthKHR);
	len->addIdOperand(type_id);

	auto *header = new spv::Block(builder.getUniqueId(), *func);
	auto *merge = new spv::Block(builder.getUniqueId(), *func);
	builder.createBranch(header);
	builder.setBuildPoint(header);

	unsigned bits = builder.getScalarTypeWidth(builder.getContainedTypeId(type_id));

	{
		auto *phi = builder.addInstruction(uint_type, spv::OpPhi);
		auto *iter = builder.addInstruction(uint_type, spv::OpIAdd);
		iter->addIdOperand(phi->getResultId());
		iter->addIdOperand(builder.makeUintConstant(1));

		phi->addIdOperand(builder.makeUintConstant(0));
		phi->addIdOperand(entry->getId());
		phi->addIdOperand(iter->getResultId());
		phi->addIdOperand(header->getId());

		auto *input_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassFunction, builder.getContainedTypeId(type_id)), spv::OpInBoundsAccessChain);
		input_chain->addIdOperand(func->getParamId(0));
		input_chain->addIdOperand(phi->getResultId());

		auto *load = builder.addInstruction(builder.getContainedTypeId(type_id), spv::OpLoad);
		load->addIdOperand(input_chain->getResultId());

		auto *is_inf = builder.addInstruction(builder.makeBoolType(), spv::OpIsInf);
		is_inf->addIdOperand(load->getResultId());

		auto *select = builder.addInstruction(builder.getContainedTypeId(type_id), spv::OpSelect);
		select->addIdOperand(is_inf->getResultId());
		if (bits == 32)
			select->addIdOperand(builder.makeFloatConstant(std::numeric_limits<float>::quiet_NaN()));
		else
			select->addIdOperand(builder.makeFloat16Constant(0xffff));
		select->addIdOperand(load->getResultId());

		auto *output_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassFunction, builder.getContainedTypeId(type_id)), spv::OpInBoundsAccessChain);
		output_chain->addIdOperand(output_id);
		output_chain->addIdOperand(phi->getResultId());
		auto *store = builder.addInstruction(spv::OpStore);
		store->addIdOperand(output_chain->getResultId());
		store->addIdOperand(select->getResultId());

		auto *cmp = builder.addInstruction(bool_type, spv::OpULessThan);
		cmp->addIdOperand(iter->getResultId());
		cmp->addIdOperand(len->getResultId());
		builder.createLoopMerge(merge, header, 0);
		builder.createConditionalBranch(cmp->getResultId(), header, merge);
	}

	builder.setBuildPoint(merge);
	auto *loaded_result = builder.addInstruction(type_id, spv::OpLoad);
	loaded_result->addIdOperand(output_id);
	builder.makeReturn(false, loaded_result->getResultId());
	builder.setBuildPoint(current_build_point);
	coop_mat_conv_ids.push_back({ type_id, type_id, func->getId(), HelperCall::CoopMatSaturationFixup });
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_coop_mat_transfer(SPIRVModule &module, const spv::Id *ids, uint32_t id_count)
{
	if (id_count != 2)
		return 0;

	spv::Id input_type = ids[0];
	spv::Id output_type = ids[1];
	for (auto &ops : coop_mat_conv_ids)
		if (ops.output_type == output_type && ops.input_type == input_type && ops.call == HelperCall::CoopMatTransfer)
			return ops.func_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bool_type = builder.makeBoolType();

	// RADV workaround. It fails to understand coopmat passed as value.
	spv::Id input_ptr_type = builder.makePointer(spv::StorageClassFunction, input_type);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, output_type,
	                                       "CoopMatTransfer",
	                                       { input_ptr_type }, {}, &entry);

	spv::Id output_id = create_variable(spv::StorageClassFunction, output_type, "coop_output");
	auto *len = builder.addInstruction(uint_type, spv::OpCooperativeMatrixLengthKHR);
	len->addIdOperand(output_type);

	auto *header = new spv::Block(builder.getUniqueId(), *func);
	auto *merge = new spv::Block(builder.getUniqueId(), *func);
	builder.createBranch(header);
	builder.setBuildPoint(header);
	{
		auto *phi = builder.addInstruction(uint_type, spv::OpPhi);
		auto *iter = builder.addInstruction(uint_type, spv::OpIAdd);
		iter->addIdOperand(phi->getResultId());
		iter->addIdOperand(builder.makeUintConstant(1));

		phi->addIdOperand(builder.makeUintConstant(0));
		phi->addIdOperand(entry->getId());
		phi->addIdOperand(iter->getResultId());
		phi->addIdOperand(header->getId());

		auto *input_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassFunction, builder.getContainedTypeId(input_type)), spv::OpInBoundsAccessChain);
		input_chain->addIdOperand(func->getParamId(0));
		input_chain->addIdOperand(phi->getResultId());

		auto *load = builder.addInstruction(builder.getContainedTypeId(input_type), spv::OpLoad);
		load->addIdOperand(input_chain->getResultId());

		auto *output_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassFunction, builder.getContainedTypeId(output_type)), spv::OpInBoundsAccessChain);
		output_chain->addIdOperand(output_id);
		output_chain->addIdOperand(phi->getResultId());
		auto *store = builder.addInstruction(spv::OpStore);
		store->addIdOperand(output_chain->getResultId());
		store->addIdOperand(load->getResultId());

		auto *cmp = builder.addInstruction(bool_type, spv::OpULessThan);
		cmp->addIdOperand(iter->getResultId());
		cmp->addIdOperand(len->getResultId());
		builder.createLoopMerge(merge, header, 0);
		builder.createConditionalBranch(cmp->getResultId(), header, merge);
	}

	builder.setBuildPoint(merge);
	auto *loaded_result = builder.addInstruction(output_type, spv::OpLoad);
	loaded_result->addIdOperand(output_id);
	builder.makeReturn(false, loaded_result->getResultId());
	builder.setBuildPoint(current_build_point);
	coop_mat_conv_ids.push_back({ input_type, output_type, func->getId(), HelperCall::CoopMatTransfer });
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_coop_mat_saturate_fp8(SPIRVModule &module, spv::Id type_id)
{
	for (auto &ops : coop_mat_conv_ids)
		if (ops.output_type == type_id && ops.input_type == type_id && ops.call == HelperCall::CoopMatSaturateFP8)
			return ops.func_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bool_type = builder.makeBoolType();

	// RADV workaround. It fails to understand coopmat passed as value.
	spv::Id f16_ptr_type = builder.makePointer(spv::StorageClassFunction, type_id);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, type_id,
	                                       "CoopMatSaturateFP8",
	                                       { f16_ptr_type }, {}, &entry);

	spv::Id output_id = create_variable(spv::StorageClassFunction, type_id, "coop_output");
	auto *len = builder.addInstruction(uint_type, spv::OpCooperativeMatrixLengthKHR);
	len->addIdOperand(type_id);

	auto *header = new spv::Block(builder.getUniqueId(), *func);
	auto *merge = new spv::Block(builder.getUniqueId(), *func);
	builder.createBranch(header);
	builder.setBuildPoint(header);
	{
		auto *phi = builder.addInstruction(uint_type, spv::OpPhi);
		auto *iter = builder.addInstruction(uint_type, spv::OpIAdd);
		iter->addIdOperand(phi->getResultId());
		iter->addIdOperand(builder.makeUintConstant(1));

		phi->addIdOperand(builder.makeUintConstant(0));
		phi->addIdOperand(entry->getId());
		phi->addIdOperand(iter->getResultId());
		phi->addIdOperand(header->getId());

		auto *input_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(16)), spv::OpInBoundsAccessChain);
		input_chain->addIdOperand(func->getParamId(0));
		input_chain->addIdOperand(phi->getResultId());

		auto *load = builder.addInstruction(builder.makeFloatType(16), spv::OpLoad);
		load->addIdOperand(input_chain->getResultId());

		spv::Id glsl450 = builder.import("GLSL.std.450");
		auto *clamp = builder.addInstruction(builder.makeFloatType(16), spv::OpExtInst);
		clamp->addIdOperand(glsl450);
		clamp->addImmediateOperand(GLSLstd450NClamp);
		clamp->addIdOperand(load->getResultId());
		clamp->addIdOperand(builder.makeFloat16Constant(0x5f00 | 0x8000));
		clamp->addIdOperand(builder.makeFloat16Constant(0x5f00)); // 448.0.

		auto *output_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(16)), spv::OpInBoundsAccessChain);
		output_chain->addIdOperand(output_id);
		output_chain->addIdOperand(phi->getResultId());
		auto *store = builder.addInstruction(spv::OpStore);
		store->addIdOperand(output_chain->getResultId());
		store->addIdOperand(clamp->getResultId());

		auto *cmp = builder.addInstruction(bool_type, spv::OpULessThan);
		cmp->addIdOperand(iter->getResultId());
		cmp->addIdOperand(len->getResultId());
		builder.createLoopMerge(merge, header, 0);
		builder.createConditionalBranch(cmp->getResultId(), header, merge);
	}

	builder.setBuildPoint(merge);
	auto *loaded_result = builder.addInstruction(type_id, spv::OpLoad);
	loaded_result->addIdOperand(output_id);
	builder.makeReturn(false, loaded_result->getResultId());
	builder.setBuildPoint(current_build_point);
	coop_mat_conv_ids.push_back({ type_id, type_id, func->getId(), HelperCall::CoopMatSaturateFP8 });
	return func->getId();
}

static spv::Id make_i16vec2_constant(spv::Builder &builder, int16_t val)
{
	spv::Id constant = builder.makeInt16Constant(val);
	spv::Id s16vec2_type = builder.makeVectorType(builder.makeIntType(16), 2);
	return builder.makeCompositeConstant(s16vec2_type, { constant, constant });
}

static spv::Id make_u16vec2_constant(spv::Builder &builder, uint16_t val)
{
	spv::Id constant = builder.makeUint16Constant(val);
	spv::Id s16vec2_type = builder.makeVectorType(builder.makeUintType(16), 2);
	return builder.makeCompositeConstant(s16vec2_type, { constant, constant });
}

spv::Id SPIRVModule::Impl::build_coop_mat_fp16_to_fp8(SPIRVModule &module, const spv::Id *ids, uint32_t id_count)
{
	if (id_count != 2)
		return 0;

	spv::Id u8mat_type = ids[0];
	spv::Id f16mat_type = ids[1];

	for (auto &ops : coop_mat_conv_ids)
		if (ops.output_type == u8mat_type && ops.input_type == f16mat_type && ops.call == HelperCall::CoopMatFP16toFP8)
			return ops.func_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bool_type = builder.makeBoolType();
	spv::Id f16_type = builder.makeFloatType(16);
	spv::Id s16_type = builder.makeIntType(16);
	spv::Id u8_type = builder.makeUintType(8);

	spv::Id bvec2_type = builder.makeVectorType(bool_type, 2);
	spv::Id f16vec2_type = builder.makeVectorType(f16_type, 2);
	spv::Id s16vec2_type = builder.makeVectorType(s16_type, 2);
	spv::Id u8vec2_type = builder.makeVectorType(u8_type, 2);

	// RADV workaround. It fails to understand coopmat passed as value.
	spv::Id f16_ptr_type = builder.makePointer(spv::StorageClassFunction, f16mat_type);

	auto *func =
	    builder.makeFunctionEntry(spv::NoPrecision, u8mat_type, "CoopMatFP16toFP8", { f16_ptr_type }, {}, &entry);

	spv::Id output_id = create_variable(spv::StorageClassFunction, u8mat_type, "coop_output");
	auto *len = builder.addInstruction(uint_type, spv::OpCooperativeMatrixLengthKHR);
	len->addIdOperand(u8mat_type);

	auto *header = new spv::Block(builder.getUniqueId(), *func);
	auto *merge = new spv::Block(builder.getUniqueId(), *func);
	builder.createBranch(header);
	builder.setBuildPoint(header);
	{
		auto *phi = builder.addInstruction(uint_type, spv::OpPhi);
		auto *iter = builder.addInstruction(uint_type, spv::OpIAdd);
		iter->addIdOperand(phi->getResultId());
		// Process two elemts at once because AMD has i16vec2 instructions
		// Assume coop mat length is always a multiple of 2.
		iter->addIdOperand(builder.makeUintConstant(2));

		phi->addIdOperand(builder.makeUintConstant(0));
		phi->addIdOperand(entry->getId());
		phi->addIdOperand(iter->getResultId());
		phi->addIdOperand(header->getId());

		auto *index1 = builder.addInstruction(uint_type, spv::OpIAdd);
		index1->addIdOperand(phi->getResultId());
		index1->addIdOperand(builder.makeUintConstant(1));

		spv::Id components[2];

		for (unsigned i = 0; i < 2; i++)
		{
			spv::Id index = i ? index1->getResultId() : phi->getResultId();

			auto *input_chain = builder.addInstruction(builder.makePointer(spv::StorageClassFunction, f16_type),
			                                           spv::OpInBoundsAccessChain);
			input_chain->addIdOperand(func->getParamId(0));
			input_chain->addIdOperand(index);

			auto *load = builder.addInstruction(f16_type, spv::OpLoad);
			load->addIdOperand(input_chain->getResultId());

			components[i] = load->getResultId();
		}

		auto *composite = builder.addInstruction(f16vec2_type, spv::OpCompositeConstruct);
		composite->addIdOperand(components[0]);
		composite->addIdOperand(components[1]);

		auto *bitcast = builder.addInstruction(s16vec2_type, spv::OpBitcast);
		bitcast->addIdOperand(composite->getResultId());

		// Extract the sign bit.
		auto *sign_in_lsb = builder.addInstruction(s16vec2_type, spv::OpShiftRightLogical);
		sign_in_lsb->addIdOperand(bitcast->getResultId());
		sign_in_lsb->addIdOperand(make_i16vec2_constant(builder, 15));

		auto *sign_bit = builder.addInstruction(s16vec2_type, spv::OpShiftLeftLogical);
		sign_bit->addIdOperand(sign_in_lsb->getResultId());
		sign_bit->addIdOperand(make_i16vec2_constant(builder, 7));
		///

		// When the input is shifted like this the result is E5M3 in upper byte, which is very handy.
		auto *unsigned_e5m11 = builder.addInstruction(s16vec2_type, spv::OpShiftLeftLogical);
		unsigned_e5m11->addIdOperand(bitcast->getResultId());
		unsigned_e5m11->addIdOperand(make_i16vec2_constant(builder, 1));

		// Shift -15 bias to -7 bias.
		auto *shift_exponent = builder.addInstruction(s16vec2_type, spv::OpISub);
		shift_exponent->addIdOperand(unsigned_e5m11->getResultId());
		shift_exponent->addIdOperand(make_i16vec2_constant(builder, 8 << 11));
		unsigned_e5m11 = shift_exponent;

		auto *exponent = builder.addInstruction(s16vec2_type, spv::OpShiftRightArithmetic);
		exponent->addIdOperand(shift_exponent->getResultId());
		exponent->addIdOperand(make_i16vec2_constant(builder, 11));

		auto *exponent_minus1 = builder.addInstruction(s16vec2_type, spv::OpISub);
		exponent_minus1->addIdOperand(exponent->getResultId());
		exponent_minus1->addIdOperand(make_i16vec2_constant(builder, 1));

		auto *denorm_shamt = builder.addInstruction(s16vec2_type, spv::OpSNegate);
		denorm_shamt->addIdOperand(exponent_minus1->getResultId());

		// Ensure we don't get negative shift.
		spv::Id glsl450 = builder.import("GLSL.std.450");
		auto *clamp = builder.addInstruction(s16vec2_type, spv::OpExtInst);
		clamp->addIdOperand(glsl450);
		clamp->addImmediateOperand(GLSLstd450SMax);
		clamp->addIdOperand(denorm_shamt->getResultId());
		clamp->addIdOperand(make_i16vec2_constant(builder, 0));
		denorm_shamt = clamp;

		// If we're denorm, the arith shift ensures the upper bits are all 1.
		auto *denorm_mask = builder.addInstruction(s16vec2_type, spv::OpBitwiseAnd);
		denorm_mask->addIdOperand(exponent_minus1->getResultId());
		denorm_mask->addIdOperand(make_i16vec2_constant(builder, 1 << 11));

		// Serves as a clamping function.
		// If the exponent goes negative here, we need to emit a 0 exponent, marking that we're in denorm region.
		auto *clear_exponent_neg_mask = builder.addInstruction(s16vec2_type, spv::OpBitwiseAnd);
		clear_exponent_neg_mask->addIdOperand(exponent_minus1->getResultId());
		clear_exponent_neg_mask->addIdOperand(make_i16vec2_constant(builder, int16_t(0x1f << 11)));
		auto *clear_exponent_mask = builder.addInstruction(s16vec2_type, spv::OpBitwiseXor);
		clear_exponent_mask->addIdOperand(clear_exponent_neg_mask->getResultId());
		clear_exponent_mask->addIdOperand(make_i16vec2_constant(builder, -1));

		// Clamp negative exponent to 0.
		{
			auto *mask = builder.addInstruction(s16vec2_type, spv::OpBitwiseAnd);
			mask->addIdOperand(unsigned_e5m11->getResultId());
			mask->addIdOperand(clear_exponent_mask->getResultId());
			unsigned_e5m11 = mask;
		}

		// If denorm, add in the implicit 1.xxxx.
		{
			auto *mask = builder.addInstruction(s16vec2_type, spv::OpBitwiseOr);
			mask->addIdOperand(unsigned_e5m11->getResultId());
			mask->addIdOperand(denorm_mask->getResultId());
			unsigned_e5m11 = mask;
		}

		// Before we shift, we need to capture any possible rounding bits.
		// Only capture the lower 7 bits.
		// If we shift more than 7 we've already exhausted all denorm bits on E4M3 anyway, so don't need to care.
		// Cannot capture the top bit in lower half since we might get a false positive for 0.5 condition.
		auto *pre_denorm_rounding_bits = builder.addInstruction(s16vec2_type, spv::OpBitwiseAnd);
		pre_denorm_rounding_bits->addIdOperand(unsigned_e5m11->getResultId());
		pre_denorm_rounding_bits->addIdOperand(make_i16vec2_constant(builder, 0x7f));

		// Now we apply the denorm shift.
		auto *denorm_shift = builder.addInstruction(s16vec2_type, spv::OpShiftRightArithmetic);
		denorm_shift->addIdOperand(unsigned_e5m11->getResultId());
		denorm_shift->addIdOperand(denorm_shamt->getResultId());
		unsigned_e5m11 = denorm_shift;

		// Capture any rounding bits.
		// If we shift due to denorms,
		// we have to know if we shifted away bits that are relevant to RTE.
		auto *rounding_bits = builder.addInstruction(s16vec2_type, spv::OpBitwiseOr);
		rounding_bits->addIdOperand(unsigned_e5m11->getResultId());
		rounding_bits->addIdOperand(pre_denorm_rounding_bits->getResultId());

		auto *unsigned_e5m3 = builder.addInstruction(s16vec2_type, spv::OpShiftRightLogical);
		unsigned_e5m3->addIdOperand(unsigned_e5m11->getResultId());
		unsigned_e5m3->addIdOperand(make_i16vec2_constant(builder, 8));

		auto *is_odd = builder.addInstruction(s16vec2_type, spv::OpBitwiseAnd);
		is_odd->addIdOperand(unsigned_e5m3->getResultId());
		is_odd->addIdOperand(make_i16vec2_constant(builder, 1));

		auto *or_rounding_bits = builder.addInstruction(s16vec2_type, spv::OpBitwiseOr);
		or_rounding_bits->addIdOperand(is_odd->getResultId());
		or_rounding_bits->addIdOperand(rounding_bits->getResultId());

		// To get > 0x80 in lower bits either the fraction is > 0.5, or it's exactly 0.5 and the upper part is odd,
		// which would make the result 0x81.
		// We shift the high bits away and then compare with 0x8000

		auto *rounding_bits_in_msb = builder.addInstruction(s16vec2_type, spv::OpShiftLeftLogical);
		rounding_bits_in_msb->addIdOperand(or_rounding_bits->getResultId());
		rounding_bits_in_msb->addIdOperand(make_i16vec2_constant(builder, 8));

		auto *should_round = builder.addInstruction(bvec2_type, spv::OpUGreaterThan);
		should_round->addIdOperand(rounding_bits_in_msb->getResultId());
		should_round->addIdOperand(make_u16vec2_constant(builder, 0x8000));

		// Compensate for exponent bias when dealing with denorms.
		auto *rounding = builder.addInstruction(s16vec2_type, spv::OpSelect);
		rounding->addIdOperand(should_round->getResultId());
		rounding->addIdOperand(make_i16vec2_constant(builder, 1));
		rounding->addIdOperand(make_i16vec2_constant(builder, 0));

		// Add rounding.
		auto *add_rounding = builder.addInstruction(s16vec2_type, spv::OpIAdd);
		add_rounding->addIdOperand(unsigned_e5m3->getResultId());
		add_rounding->addIdOperand(rounding->getResultId());

		// Mask away the top exponent. We should be in range now anyway.
		auto *e4m3 = builder.addInstruction(s16vec2_type, spv::OpBitwiseAnd);
		e4m3->addIdOperand(add_rounding->getResultId());
		e4m3->addIdOperand(make_i16vec2_constant(builder, 0x7f));

		// OR in the sign bit.
		auto *with_sign = builder.addInstruction(s16vec2_type, spv::OpBitwiseOr);
		with_sign->addIdOperand(e4m3->getResultId());
		with_sign->addIdOperand(sign_bit->getResultId());

		auto *trunc = builder.addInstruction(u8vec2_type, spv::OpUConvert);
		trunc->addIdOperand(with_sign->getResultId());

		for (unsigned i = 0; i < 2; i++)
		{
			spv::Id index = i ? index1->getResultId() : phi->getResultId();

			auto *output_chain = builder.addInstruction(builder.makePointer(spv::StorageClassFunction, u8_type),
			                                            spv::OpInBoundsAccessChain);
			output_chain->addIdOperand(output_id);
			output_chain->addIdOperand(index);

			auto *component = builder.addInstruction(u8_type, spv::OpCompositeExtract);
			component->addIdOperand(trunc->getResultId());
			component->addImmediateOperand(i);

			auto *store = builder.addInstruction(spv::OpStore);
			store->addIdOperand(output_chain->getResultId());
			store->addIdOperand(component->getResultId());
		}

		auto *cmp = builder.addInstruction(bool_type, spv::OpULessThan);
		cmp->addIdOperand(iter->getResultId());
		cmp->addIdOperand(len->getResultId());
		builder.createLoopMerge(merge, header, 0);
		builder.createConditionalBranch(cmp->getResultId(), header, merge);
	}

	builder.setBuildPoint(merge);
	auto *loaded_result = builder.addInstruction(u8mat_type, spv::OpLoad);
	loaded_result->addIdOperand(output_id);
	builder.makeReturn(false, loaded_result->getResultId());
	builder.setBuildPoint(current_build_point);
	coop_mat_conv_ids.push_back({ f16mat_type, u8mat_type, func->getId(), HelperCall::CoopMatFP16toFP8 });
	return func->getId();
}

spv::Id SPIRVModule::Impl::build_coop_mat_fp8_to_fp16(SPIRVModule &module, const spv::Id *ids, uint32_t id_count)
{
	if (id_count != 2)
		return 0;

	spv::Id u8_type = ids[0];
	spv::Id f16_type = ids[1];

	for (auto &ops : coop_mat_conv_ids)
		if (ops.input_type == u8_type && ops.output_type == f16_type && ops.call == HelperCall::CoopMatFP8toFP16)
			return ops.func_id;

	auto *current_build_point = builder.getBuildPoint();

	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id bool_type = builder.makeBoolType();

	// RADV workaround. It fails to understand coopmat passed as value.
	spv::Id u8_ptr_type = builder.makePointer(spv::StorageClassFunction, u8_type);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, f16_type,
	                                       "CoopMatFP8toFP16",
	                                       { u8_ptr_type }, {}, &entry);

	spv::Id output_id = create_variable(spv::StorageClassFunction, f16_type, "coop_output");

	auto *len = builder.addInstruction(uint_type, spv::OpCooperativeMatrixLengthKHR);
	len->addIdOperand(u8_type);

	auto *header = new spv::Block(builder.getUniqueId(), *func);
	auto *merge = new spv::Block(builder.getUniqueId(), *func);
	builder.createBranch(header);
	builder.setBuildPoint(header);
	{
		auto *phi = builder.addInstruction(uint_type, spv::OpPhi);
		auto *iter = builder.addInstruction(uint_type, spv::OpIAdd);
		iter->addIdOperand(phi->getResultId());
		iter->addIdOperand(builder.makeUintConstant(1));

		phi->addIdOperand(builder.makeUintConstant(0));
		phi->addIdOperand(entry->getId());
		phi->addIdOperand(iter->getResultId());
		phi->addIdOperand(header->getId());

		auto *input_chain = builder.addInstruction(
			builder.makePointer(spv::StorageClassFunction, builder.makeUintType(8)), spv::OpInBoundsAccessChain);
		input_chain->addIdOperand(func->getParamId(0));
		input_chain->addIdOperand(phi->getResultId());

		auto *load = builder.addInstruction(builder.makeUintType(8), spv::OpLoad);
		load->addIdOperand(input_chain->getResultId());

		auto *sext = builder.addInstruction(builder.makeIntType(16), spv::OpSConvert);
		sext->addIdOperand(load->getResultId());

		auto *shift = builder.addInstruction(builder.makeIntType(16), spv::OpShiftLeftLogical);
		shift->addIdOperand(sext->getResultId());
		shift->addIdOperand(builder.makeInt16Constant(7));

		auto *mask = builder.addInstruction(builder.makeIntType(16), spv::OpBitwiseAnd);
		mask->addIdOperand(shift->getResultId());
		mask->addIdOperand(builder.makeInt16Constant(int16_t(0xffff ^ 0x4000)));

		auto *bitcast = builder.addInstruction(builder.makeFloatType(16), spv::OpBitcast);
		bitcast->addIdOperand(mask->getResultId());

		auto *output_chain = builder.addInstruction(
			builder.makePointer(spv::StorageClassFunction, builder.makeFloatType(16)), spv::OpInBoundsAccessChain);
		output_chain->addIdOperand(output_id);
		output_chain->addIdOperand(phi->getResultId());
		auto *store = builder.addInstruction(spv::OpStore);
		store->addIdOperand(output_chain->getResultId());
		store->addIdOperand(bitcast->getResultId());

		auto *cmp = builder.addInstruction(bool_type, spv::OpULessThan);
		cmp->addIdOperand(iter->getResultId());
		cmp->addIdOperand(len->getResultId());
		builder.createLoopMerge(merge, header, 0);
		builder.createConditionalBranch(cmp->getResultId(), header, merge);
	}

	builder.setBuildPoint(merge);
	auto *loaded_result = builder.addInstruction(f16_type, spv::OpLoad);
	loaded_result->addIdOperand(output_id);

	// Need post-scale to correctly deal with denorms.
	auto *scale = builder.addInstruction(f16_type, spv::OpMatrixTimesScalar);
	scale->addIdOperand(loaded_result->getResultId());
	scale->addIdOperand(builder.makeFloat16Constant(0x5c00 /* 256.0 */));

	builder.makeReturn(false, scale->getResultId());

	builder.setBuildPoint(current_build_point);
	coop_mat_conv_ids.push_back({ u8_type, f16_type, func->getId(), HelperCall::CoopMatFP8toFP16 });
	return func->getId();
}

spv::Id SPIRVModule::Impl::get_helper_call_id(SPIRVModule &module, HelperCall call,
                                              const spv::Id *aux_ids, uint32_t aux_ids_count)
{
	switch (call)
	{
	case HelperCall::NodeCoalescePayloadOffset:
		return build_node_coalesce_payload_offset(module, aux_ids, aux_ids_count);
	case HelperCall::CoopMatFP8toFP16:
		return build_coop_mat_fp8_to_fp16(module, aux_ids, aux_ids_count);
	case HelperCall::CoopMatFP16toFP8:
		return build_coop_mat_fp16_to_fp8(module, aux_ids, aux_ids_count);
	case HelperCall::CoopMatTransfer:
		return build_coop_mat_transfer(module, aux_ids, aux_ids_count);
	default:
		break;
	}
	return 0;
}

spv::Id SPIRVModule::Impl::get_helper_call_id(SPIRVModule &module, HelperCall call, spv::Id type_id)
{
	switch (call)
	{
	case HelperCall::DescriptorQACheck:
		return build_descriptor_qa_check(module);

	case HelperCall::WaveMatch:
		return build_wave_match(module, type_id);

	case HelperCall::WaveMultiPrefixCountBits:
		return build_wave_multi_prefix_count_bits(module);

	case HelperCall::WaveMultiPrefixFAdd:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformFAdd, type_id);
	case HelperCall::WaveMultiPrefixIAdd:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformIAdd, type_id);
	case HelperCall::WaveMultiPrefixFMul:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformFMul, type_id);
	case HelperCall::WaveMultiPrefixIMul:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformIMul, type_id);
	case HelperCall::WaveMultiPrefixBitOr:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformBitwiseOr, type_id);
	case HelperCall::WaveMultiPrefixBitAnd:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformBitwiseAnd, type_id);
	case HelperCall::WaveMultiPrefixBitXor:
		return build_wave_multi_prefix_op(module, spv::OpGroupNonUniformBitwiseXor, type_id);
	case HelperCall::WaveIsFirstLaneMasked:
		return build_wave_is_first_lane_masked(module);
	case HelperCall::WaveActiveAllEqualMasked:
		return build_wave_active_all_equal_masked(module, type_id);
	case HelperCall::WaveReadFirstLaneMasked:
		return build_wave_read_first_lane_masked(module, type_id);
	case HelperCall::RobustAtomicCounter:
		return build_robust_atomic_counter_op(module);
	case HelperCall::QuadAll:
		return build_quad_all(module);
	case HelperCall::QuadAny:
		return build_quad_any(module);
	case HelperCall::AtomicImageArrayR64Compact:
	case HelperCall::AtomicImageR64Compact:
	case HelperCall::AtomicImageArrayR64CompactNonUniform:
	case HelperCall::AtomicImageR64CompactNonUniform:
		return build_image_atomic_r64_compact(
			module,
			call == HelperCall::AtomicImageArrayR64Compact || call == HelperCall::AtomicImageArrayR64CompactNonUniform,
			call == HelperCall::AtomicImageR64CompactNonUniform || call == HelperCall::AtomicImageArrayR64CompactNonUniform);
	case HelperCall::FinishCrossGroupSharing:
		return build_finish_cross_group_sharing(module);
	case HelperCall::AllocateGroupNodeRecords:
		return build_allocate_node_records(module, false);
	case HelperCall::AllocateThreadNodeRecords:
		return build_allocate_node_records(module, true);
	case HelperCall::AllocateThreadNodeRecordsWaterfall:
		return build_allocate_node_records_waterfall(module);
	case HelperCall::GroupIncrementOutputCount:
		return build_increment_node_count(module, false);
	case HelperCall::ThreadIncrementOutputCount:
		return build_increment_node_count(module, true);
	case HelperCall::IsQuadUniformControlFlow:
		return build_is_quad_uniform_control_flow(module);
	case HelperCall::ValidateBDALoadStore:
		return build_validate_bda_load_store(module);
	case HelperCall::AllocateInvocationID:
		return build_allocate_invocation_id(module);
	case HelperCall::CoopMatSaturationFixup:
		return build_coop_mat_saturation_fixup(module, type_id);
	case HelperCall::CoopMatSaturateFP8:
		return build_coop_mat_saturate_fp8(module, type_id);

	default:
		break;
	}

	return 0;
}

SPIRVModule::SPIRVModule()
{
	impl = std::make_unique<Impl>(*this);
}

void SPIRVModule::emit_entry_point(spv::ExecutionModel model, const char *name, bool physical_storage,
                                   spv::MemoryModel memory_model)
{
	if (memory_model == spv::MemoryModelVulkan)
		set_override_spirv_version(0x10600);
	impl->emit_entry_point(model, name, physical_storage, memory_model);
}

bool SPIRVModule::Impl::execution_model_is_ray_tracing() const
{
	switch (execution_model)
	{
	case spv::ExecutionModelRayGenerationKHR:
	case spv::ExecutionModelAnyHitKHR:
	case spv::ExecutionModelIntersectionKHR:
	case spv::ExecutionModelMissKHR:
	case spv::ExecutionModelClosestHitKHR:
	case spv::ExecutionModelCallableKHR:
		return true;

	default:
		return false;
	}
}

bool SPIRVModule::Impl::spirv_requires_14() const
{
	static const uint32_t Version_1_4 = 0x00010400;
	if (override_spirv_version)
	{
		return override_spirv_version >= Version_1_4;
	}
	else
	{
		return execution_model_is_ray_tracing() || execution_model == spv::ExecutionModelMeshEXT ||
		       execution_model == spv::ExecutionModelTaskEXT;
	}
}

bool SPIRVModule::Impl::finalize_spirv(Vector<uint32_t> &spirv)
{
	spirv.clear();

	mark_error = false;
	builder.dump(spirv);
	if (spirv.size() >= 2)
	{
		if (override_spirv_version)
		{
			spirv[1] = override_spirv_version;
		}
		else
		{
			static const uint32_t Version_1_3 = 0x00010300;
			static const uint32_t Version_1_4 = 0x00010400;
			spirv[1] = spirv_requires_14() ? Version_1_4 : Version_1_3;
		}
	}
	return !mark_error;
}

void SPIRVModule::Impl::register_block(CFGNode *node)
{
	if (!node->userdata || node->id == 0)
	{
		auto *bb = new spv::Block(builder.getUniqueId(), *active_function);
#if 0
		if (!node->name.empty())
			builder.addName(bb->getId(), node->name.c_str());
#endif
		active_function->addBlock(bb);
		node->id = bb->getId();
		node->userdata = bb;
	}
}

void SPIRVModule::Impl::add_instrumented_instruction(spv::Op op, spv::Block *bb, spv::Id id)
{
	if (id == 0 || !instruction_instrumentation.info.enabled)
		return;

	spv::Id *call_id = nullptr;
	spv::Id type_id = 0;

	if (op == spv::OpAssumeTrueKHR)
	{
		call_id = &instruction_instrumentation.assume_true_call_id;
	}
	else
	{
		type_id = builder.getTypeId(id);
		if (builder.getTypeClass(type_id) != spv::OpTypeFloat)
			return;

		switch (builder.getScalarTypeWidth(type_id))
		{
		case 16:
			call_id = &instruction_instrumentation.nan_inf_instrument_fp16_call_id;
			break;

		case 32:
			call_id = &instruction_instrumentation.nan_inf_instrument_fp32_call_id;
			break;

		case 64:
			call_id = &instruction_instrumentation.nan_inf_instrument_fp64_call_id;
			break;

		default:
			break;
		}
	}

	if (!instruction_instrumentation.should_report_instrumentation_id)
	{
		instruction_instrumentation.should_report_instrumentation_id = create_variable_with_initializer(
			spv::StorageClassPrivate, builder.makeBoolType(),
			builder.makeBoolConstant(true), "ShouldReportInstrumentation");

		spv::Id u32_type = builder.makeUintType(32);
		spv::Id uvec4_type = builder.makeVectorType(u32_type, 4);
		spv::Id u32_array_type = builder.makeRuntimeArray(u32_type);
		builder.addDecoration(u32_array_type, spv::DecorationArrayStride, 4);
		spv::Id control_data = builder.makeStructType({ u32_array_type }, "InstrumentationControlDataSSBO");
		builder.addMemberDecoration(control_data, 0, spv::DecorationOffset, 0);
		builder.addMemberName(control_data, 0, "atomics");
		builder.addDecoration(control_data, spv::DecorationBlock);
		instruction_instrumentation.global_nan_inf_control_var_id =
			create_variable(spv::StorageClassStorageBuffer, control_data, "InstrumentationControlData");

		builder.addDecoration(instruction_instrumentation.global_nan_inf_control_var_id,
		                      spv::DecorationDescriptorSet, instruction_instrumentation.info.control_desc_set);

		builder.addDecoration(instruction_instrumentation.global_nan_inf_control_var_id,
		                      spv::DecorationBinding,
		                      instruction_instrumentation.info.control_binding);

		spv::Id payload_data_array = builder.makeRuntimeArray(uvec4_type);
		builder.addDecoration(payload_data_array, spv::DecorationArrayStride, 16);

		spv::Id payload_block = builder.makeStructType({ payload_data_array }, "InstrumentationDataSSBO");
		builder.addMemberName(payload_block, 0, "data");
		builder.addMemberDecoration(payload_block, 0, spv::DecorationOffset, 0);
		builder.addDecoration(payload_block, spv::DecorationBlock);
		instruction_instrumentation.global_nan_inf_data_var_id =
			create_variable(spv::StorageClassStorageBuffer, payload_block, "InstrumentationData");

		builder.addDecoration(instruction_instrumentation.global_nan_inf_data_var_id,
		                      spv::DecorationDescriptorSet, instruction_instrumentation.info.payload_desc_set);

		builder.addDecoration(instruction_instrumentation.global_nan_inf_data_var_id,
		                      spv::DecorationBinding,
		                      instruction_instrumentation.info.payload_binding);
	}

	if (call_id && !*call_id)
	{
		if (op == spv::OpAssumeTrueKHR)
			*call_id = build_assume_true_call_function(self, instruction_instrumentation);
		else
			*call_id = build_nan_inf_instrument_call_function(self, instruction_instrumentation, type_id);
	}

	auto call = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeVoidType(), spv::OpFunctionCall);
	call->addIdOperand(*call_id);
	call->addIdOperand(id);
	call->addIdOperand(builder.makeUintConstant(++instruction_instrumentation.instruction_count));
	bb->addInstruction(std::move(call));
}

void SPIRVModule::Impl::add_instruction(spv::Block *bb, std::unique_ptr<spv::Instruction> inst)
{
	spv::Op op = inst->getOpCode();
	spv::Id id = inst->getResultId();
	spv::Id type_id = inst->getTypeId();

	if (id != 0 && instruction_instrumentation.info.enabled &&
	    instruction_instrumentation.info.type == InstructionInstrumentationType::FlushNaNToZero &&
	    builder.getTypeClass(type_id) == spv::OpTypeFloat && op != spv::OpPhi)
	{
		// A bit special since we're rewriting the IDs.
		spv::Id new_id = builder.getUniqueId();
		spv::Id nan_id = builder.getUniqueId();
		inst->setResultId(new_id);

		spv::Id null_const = builder.makeNullConstant(type_id);
		auto is_nan = std::make_unique<spv::Instruction>(nan_id, builder.makeBoolType(), spv::OpIsNan);
		is_nan->addIdOperand(new_id);
		auto replaced = std::make_unique<spv::Instruction>(id, type_id, spv::OpSelect);
		replaced->addIdOperand(nan_id);
		replaced->addIdOperand(null_const);
		replaced->addIdOperand(new_id);

		bb->addInstruction(std::move(inst));
		bb->addInstruction(std::move(is_nan));
		bb->addInstruction(std::move(replaced));
	}
	else
	{
		bb->addInstruction(std::move(inst));

		// For Full instrumentation add everything, otherwise, we need specialized instrumentation.
		if (instruction_instrumentation.info.enabled &&
		    instruction_instrumentation.info.type == InstructionInstrumentationType::FullNanInf &&
		    op != spv::OpPhi)
		{
			add_instrumented_instruction(op, bb, id);
		}
	}
}

void SPIRVModule::Impl::emit_basic_block(CFGNode *node)
{
	auto *bb = get_spv_block(node);
	auto &ir = node->ir;

	builder.setBuildPoint(bb);

	spv::Block *fake_loop_block = nullptr;

	// Break-like loops might not have a continue block.
	// Infinite loops won't have merge blocks.
	if (node->ir.merge_info.merge_type == MergeType::Loop &&
	    (int(node->ir.merge_info.merge_block != nullptr) +
	     int(node->ir.merge_info.continue_block != nullptr) == 1))
	{
		fake_loop_block = new spv::Block(builder.getUniqueId(), *active_function);
	}

	// Emit phi nodes.
	for (auto &phi : ir.phi)
	{
		if (!phi.id)
			continue;

		auto phi_op = std::make_unique<spv::Instruction>(phi.id, phi.type_id, spv::OpPhi);
		for (auto &incoming : phi.incoming)
		{
			phi_op->addIdOperand(incoming.id);
			phi_op->addIdOperand(incoming.block->id);
		}

		if (fake_loop_block && !node->ir.merge_info.continue_block)
		{
			builder.setBuildPoint(fake_loop_block);
			phi_op->addIdOperand(builder.createUndefined(phi.type_id));
			builder.setBuildPoint(bb);
			phi_op->addIdOperand(fake_loop_block->getId());
		}

		if (phi.relaxed)
			builder.addDecoration(phi.id, spv::DecorationRelaxedPrecision);

		add_instruction(bb, std::move(phi_op));
	}

	bool implicit_terminator = false;
	spv::Id rewrite_phi_incoming_from = node->id;
	spv::Id rewrite_phi_incoming_to = 0;

	const auto emit_loop_header = [&](spv::Block *replacement_continue_bb) {
		auto *continue_bb = ir.merge_info.continue_block ? get_spv_block(ir.merge_info.continue_block) : nullptr;
		if (replacement_continue_bb)
			continue_bb = replacement_continue_bb;

		if (ir.merge_info.merge_block && ir.merge_info.continue_block)
		{
			builder.createLoopMerge(get_spv_block(ir.merge_info.merge_block),
			                        continue_bb,
			                        ir.merge_info.loop_control_mask);
		}
		else if (ir.merge_info.merge_block)
		{
			continue_bb = fake_loop_block;
			active_function->addBlock(continue_bb);
			builder.setBuildPoint(continue_bb);
			builder.createBranch(get_spv_block(node));
			builder.setBuildPoint(bb);
			builder.createLoopMerge(get_spv_block(ir.merge_info.merge_block), continue_bb, 0);
		}
		else if (ir.merge_info.continue_block)
		{
			auto *merge_bb = fake_loop_block;
			active_function->addBlock(merge_bb);
			builder.setBuildPoint(merge_bb);
			builder.createUnreachable();
			builder.setBuildPoint(bb);
			builder.createLoopMerge(merge_bb, continue_bb, 0);
		}
	};

	// Emit opcodes.
	for (auto *op : ir.operations)
	{
		if (implicit_terminator)
			break;

		if (op->flags & Operation::SubgroupSyncPre)
		{
			auto *pre = builder.addInstruction(spv::OpControlBarrier);
			pre->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
			pre->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
			pre->addIdOperand(builder.getWorkgroupBarrierSemanticsId());
		}

		if (op->op == spv::OpIsHelperInvocationEXT && !caps.supports_demote)
		{
			spv::Id helper_var_id = get_builtin_shader_input(spv::BuiltInHelperInvocation);

			if (discard_state_var_id)
			{
				auto is_helper = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeBoolType(), spv::OpLoad);
				is_helper->addIdOperand(helper_var_id);
				spv::Id is_helper_id = is_helper->getResultId();
				add_instruction(bb, std::move(is_helper));

				auto loaded_var = std::make_unique<spv::Instruction>(builder.getUniqueId(), builder.makeBoolType(), spv::OpLoad);
				loaded_var->addIdOperand(discard_state_var_id);
				spv::Id is_discard_id = loaded_var->getResultId();
				add_instruction(bb, std::move(loaded_var));

				auto or_inst = std::make_unique<spv::Instruction>(op->id, op->type_id, spv::OpLogicalOr);
				or_inst->addIdOperand(is_helper_id);
				or_inst->addIdOperand(is_discard_id);
				add_instruction(bb, std::move(or_inst));
			}
			else
			{
				auto is_helper = std::make_unique<spv::Instruction>(op->id, op->type_id, spv::OpLoad);
				is_helper->addIdOperand(helper_var_id);
				add_instruction(bb, std::move(is_helper));
			}
		}
		else if (op->op == spv::OpDemoteToHelperInvocationEXT && !caps.supports_demote)
		{
			if (op->num_arguments)
				build_discard_call_early_cond(op->arguments[0]);
			else
				build_discard_call_early();
		}
		else if (op->op == spv::OpDemoteToHelperInvocationEXT && op->num_arguments)
		{
			builder.addExtension("SPV_EXT_demote_to_helper_invocation");
			builder.addCapability(spv::CapabilityDemoteToHelperInvocationEXT);
			build_demote_call_cond(op->arguments[0]);
		}
		else if (op->op == spv::PseudoOpInstrumentExternallyVisibleStore || op->op == spv::OpAssumeTrueKHR)
		{
			add_instrumented_instruction(op->op, bb, op->arguments[0]);
		}
		else if (op->op == spv::PseudoOpReturnCond ||
		         op->op == spv::PseudoOpMaskedLoad ||
		         op->op == spv::PseudoOpMaskedStore)
		{
			// Have to ensure we emit loop header before we replace block.
			if (ir.merge_info.merge_type == MergeType::Loop && rewrite_phi_incoming_to == 0)
			{
				auto *direct_bb = new spv::Block(builder.getUniqueId(), *active_function);

				// Handle post-domination rule.
				if (ir.merge_info.continue_block && get_spv_block(ir.merge_info.continue_block) == bb)
					emit_loop_header(direct_bb);
				else
					emit_loop_header(nullptr);

				active_function->addBlock(direct_bb);
				builder.createBranch(direct_bb);
				bb = direct_bb;
				builder.setBuildPoint(bb);
			}

			// Pseudo-op. Conditional return.
			auto *inner_bb = new spv::Block(builder.getUniqueId(), *active_function);
			auto *merge_bb = new spv::Block(builder.getUniqueId(), *active_function);
			active_function->addBlock(inner_bb);
			active_function->addBlock(merge_bb);
			builder.createSelectionMerge(merge_bb, 0);
			builder.createConditionalBranch(op->arguments[op->num_arguments - 1], inner_bb, merge_bb);
			builder.setBuildPoint(inner_bb);

			spv::Id inner_id = 0;

			if (op->op == spv::PseudoOpReturnCond)
			{
				builder.makeReturn(false);
			}
			else
			{
				std::unique_ptr<spv::Instruction> inst;
				if (op->op == spv::PseudoOpMaskedStore)
				{
					inst = std::make_unique<spv::Instruction>(spv::OpStore);
				}
				else
				{
					inner_id = builder.getUniqueId();
					inst = std::make_unique<spv::Instruction>(inner_id, op->type_id, spv::OpLoad);
					assert(op->type_id);
				}

				unsigned literal_mask = op->get_literal_mask();
				for (unsigned i = 0; i < op->num_arguments - 1; i++)
				{
					spv::Id arg = op->arguments[i];
					if (literal_mask & 1u)
						inst->addImmediateOperand(arg);
					else
					{
						assert(arg);
						inst->addIdOperand(arg);
					}
					literal_mask >>= 1u;
				}
				add_instruction(inner_bb, std::move(inst));
				builder.createBranch(merge_bb);
			}

			builder.setBuildPoint(merge_bb);

			if (op->op == spv::PseudoOpMaskedLoad)
			{
				auto phi = std::make_unique<spv::Instruction>(op->id, op->type_id, spv::OpPhi);
				phi->addIdOperand(inner_id);
				phi->addIdOperand(inner_bb->getId());
				phi->addIdOperand(builder.makeNullConstant(op->type_id));
				phi->addIdOperand(bb->getId());
				add_instruction(merge_bb, std::move(phi));
			}

			// If this is called in a continue block, the loop header might have a PHI incoming,
			// and we'll have to rewrite that.
			// For purposes of handling PHI later, the incoming block is this ID, i.e. the selection construct merge.
			node->id = merge_bb->getId();
			rewrite_phi_incoming_to = node->id;
			bb = merge_bb;
		}
		else if (op->op != spv::OpNop)
		{
			if (op->op == spv::OpDemoteToHelperInvocationEXT || op->op == spv::OpIsHelperInvocationEXT)
			{
				builder.addExtension("SPV_EXT_demote_to_helper_invocation");
				builder.addCapability(spv::CapabilityDemoteToHelperInvocationEXT);
			}
			else if (op->op == spv::OpTerminateRayKHR || op->op == spv::OpIgnoreIntersectionKHR ||
			         op->op == spv::OpEmitMeshTasksEXT)
			{
				// In DXIL, these must be by unreachable.
				// There is no [[noreturn]] qualifier used for these intrinsics apparently.
				// EmitMeshTasksEXT is similar, but ret void comes after.
				implicit_terminator = true;
			}

			std::unique_ptr<spv::Instruction> inst;
			if (op->id != 0)
			{
				assert(op->type_id);
				inst = std::make_unique<spv::Instruction>(op->id, op->type_id, op->op);
			}
			else
				inst = std::make_unique<spv::Instruction>(op->op);

			unsigned literal_mask = op->get_literal_mask();

			for (auto &arg : *op)
			{
				if (literal_mask & 1u)
					inst->addImmediateOperand(arg);
				else
				{
					assert(arg);
					inst->addIdOperand(arg);
				}
				literal_mask >>= 1u;
			}
			add_instruction(bb, std::move(inst));
		}

		if (op->flags & Operation::SubgroupSyncPost)
		{
			auto *post = builder.addInstruction(spv::OpControlBarrier);
			post->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
			post->addIdOperand(builder.makeUintConstant(spv::ScopeSubgroup));
			post->addIdOperand(builder.getWorkgroupBarrierSemanticsId());
		}
	}

	if (implicit_terminator)
	{
		if (ir.merge_info.merge_type != MergeType::None)
		{
			LOGE("Basic block has implicit terminator, but attempts to merge execution?\n");
			mark_error = true;
		}
		else if (ir.terminator.type != Terminator::Type::Unreachable && ir.terminator.type != Terminator::Type::Return)
		{
			LOGE("Implicitly terminated blocks must terminate with Unreachable or Return.\n");
			mark_error = true;
		}

		return;
	}

	// Emit structured merge information.
	switch (ir.merge_info.merge_type)
	{
	case MergeType::Selection:
		if (ir.merge_info.merge_block)
		{
			builder.createSelectionMerge(get_spv_block(ir.merge_info.merge_block), ir.merge_info.selection_control_mask);
		}
		else
		{
			auto *unreachable_bb = new spv::Block(builder.getUniqueId(), *active_function);
			active_function->addBlock(unreachable_bb);
			builder.setBuildPoint(unreachable_bb);
			builder.createUnreachable();
			builder.setBuildPoint(bb);
			builder.createSelectionMerge(unreachable_bb, 0);
		}
		break;

	case MergeType::Loop:
		if (rewrite_phi_incoming_to == 0)
			emit_loop_header(nullptr);
		break;

	default:
		break;
	}

	// Emit terminator.
	switch (ir.terminator.type)
	{
	case Terminator::Type::Unreachable:
	{
		builder.createUnreachable();
		break;
	}

	case Terminator::Type::Branch:
	{
		auto *direct_target = get_spv_block(ir.terminator.direct_block);
		builder.createBranch(direct_target);
		if (rewrite_phi_incoming_to)
			direct_target->rewritePhiIncoming(rewrite_phi_incoming_from, rewrite_phi_incoming_to);
		break;
	}

	case Terminator::Type::Condition:
	{
		auto *true_block = get_spv_block(ir.terminator.true_block);
		auto *false_block = get_spv_block(ir.terminator.false_block);

		// This used to pass validator, but latest SPIRV-Tools as of 2023-02 started caring about it.
		// Patch this up late by rewriting loop header + conditional branch as
		// loop header -> direct -> selection merge to unreachable -> conditional branch.
		// It's easier to patch it up here than rewriting the CFG itself.
		// We only need to rewrite PHI incoming blocks.
		// A conditional terminator is fine if one of the branches target merge or continue block,
		// since a selection merge is no longer required.
		if (node->ir.merge_info.merge_type == MergeType::Loop &&
		    node->ir.terminator.type == Terminator::Type::Condition &&
		    node->ir.terminator.true_block != node->ir.merge_info.merge_block &&
		    node->ir.terminator.true_block != node->ir.merge_info.continue_block &&
		    node->ir.terminator.false_block != node->ir.merge_info.merge_block &&
		    node->ir.terminator.false_block != node->ir.merge_info.continue_block)
		{
			if (rewrite_phi_incoming_to == 0)
			{
				auto *fake_selection_bb = new spv::Block(builder.getUniqueId(), *active_function);
				auto *unreachable_bb = new spv::Block(builder.getUniqueId(), *active_function);
				active_function->addBlock(fake_selection_bb);
				active_function->addBlock(unreachable_bb);
				builder.createBranch(fake_selection_bb);
				builder.setBuildPoint(fake_selection_bb);
				builder.createSelectionMerge(unreachable_bb, 0);
				builder.createConditionalBranch(ir.terminator.conditional_id, true_block, false_block);
				builder.setBuildPoint(unreachable_bb);
				builder.createUnreachable();

				// For purposes of handling PHI later, the incoming block is this ID, i.e. the selection construct.
				// Any branches that target the loop header have already been resolved since we emit SPIR-V blocks
				// in traversal order.
				// We don't need to consider single loop block constructs since those will never hit this
				// code path. In that case, true or false block would have targeted continue block and
				// avoided this workaround in the first place.
				node->id = fake_selection_bb->getId();
			}
			else
			{
				// If we added blocks through masked ops, we have become a selection
				// construction instead of a loop, and we have to add a merge target to make this valid.
				// If we are directly branching to continue or merge target, we can omit a merge,
				// since that case does not need a merge.
				auto *unreachable_bb = new spv::Block(builder.getUniqueId(), *active_function);
				active_function->addBlock(unreachable_bb);
				builder.setBuildPoint(unreachable_bb);
				builder.createUnreachable();
				builder.setBuildPoint(bb);
				builder.createSelectionMerge(unreachable_bb, 0);
				builder.createConditionalBranch(ir.terminator.conditional_id,
				                                true_block, false_block);
			}
		}
		else
		{
			builder.createConditionalBranch(ir.terminator.conditional_id,
			                                true_block, false_block);
		}

		if (rewrite_phi_incoming_to)
		{
			true_block->rewritePhiIncoming(rewrite_phi_incoming_from, rewrite_phi_incoming_to);
			false_block->rewritePhiIncoming(rewrite_phi_incoming_from, rewrite_phi_incoming_to);
		}
		break;
	}

	case Terminator::Type::Switch:
	{
		auto switch_op = std::make_unique<spv::Instruction>(spv::OpSwitch);
		switch_op->addIdOperand(ir.terminator.conditional_id);

		auto default_itr = std::find_if(ir.terminator.cases.begin(), ir.terminator.cases.end(),
		                                [](const Terminator::Case &c) { return c.is_default; });
		assert(default_itr != ir.terminator.cases.end());
		switch_op->addIdOperand(default_itr->node->id);

		auto *default_block = get_spv_block(default_itr->node);
		default_block->addPredecessor(bb);
		if (rewrite_phi_incoming_to)
			default_block->rewritePhiIncoming(rewrite_phi_incoming_from, rewrite_phi_incoming_to);
		for (auto &switch_case : ir.terminator.cases)
		{
			if (switch_case.is_default)
				continue;
			switch_op->addImmediateOperand(switch_case.value);
			switch_op->addIdOperand(switch_case.node->id);

			auto *case_block = get_spv_block(switch_case.node);
			case_block->addPredecessor(bb);
			if (rewrite_phi_incoming_to)
				case_block->rewritePhiIncoming(rewrite_phi_incoming_from, rewrite_phi_incoming_to);
		}
		add_instruction(bb, std::move(switch_op));
		break;
	}

	case Terminator::Type::Kill:
	{
		auto kill_op = std::make_unique<spv::Instruction>(spv::OpKill);
		add_instruction(bb, std::move(kill_op));
		break;
	}

	case Terminator::Type::Return:
	{
		if (discard_state_var_id)
			build_discard_call_exit();
		builder.makeReturn(false, ir.terminator.return_value);
		break;
	}

	default:
		break;
	}
}

bool SPIRVModule::finalize_spirv(Vector<uint32_t> &spirv) const
{
	return impl->finalize_spirv(spirv);
}

void SPIRVModule::Impl::emit_entry_point_function_body(CFGStructurizer &structurizer)
{
	active_function = entry_function;
	{
		structurizer.traverse(*this);
		builder.setBuildPoint(active_function->getEntryBlock());
		builder.createBranch(get_spv_block(structurizer.get_entry_block()));
		builder.leaveFunction();
	}
	active_function = nullptr;
}

void SPIRVModule::Impl::emit_leaf_function_body(spv::Function *func, CFGStructurizer &structurizer)
{
	active_function = func;
	{
		structurizer.traverse(*this);
		builder.setBuildPoint(active_function->getEntryBlock());
		builder.createBranch(get_spv_block(structurizer.get_entry_block()));
		builder.leaveFunction();
	}
	active_function = nullptr;
}

void SPIRVModule::Impl::register_active_variable(spv::StorageClass storage, spv::Id id)
{
	bool register_entry_point;
	// In SPIR-V 1.4, any global variable is part of the interface.
	if (spirv_requires_14())
		register_entry_point = storage != spv::StorageClassFunction;
	else
		register_entry_point = storage == spv::StorageClassOutput || storage == spv::StorageClassInput;

	if (register_entry_point)
		entry_point->addIdOperand(id);
}

spv::Id SPIRVModule::Impl::create_variable(spv::StorageClass storage, spv::Id type, const char *name)
{
	spv::Id id = builder.createVariable(storage, type, name);
	register_active_variable(storage, id);
	return id;
}

spv::Id SPIRVModule::Impl::create_variable_with_initializer(spv::StorageClass storage, spv::Id type,
                                                            spv::Id initializer, const char *name)
{
	spv::Id id = builder.createVariableWithInitializer(storage, type, initializer, name);
	register_active_variable(storage, id);
	return id;
}

void SPIRVModule::emit_entry_point_function_body(CFGStructurizer &structurizer)
{
	impl->emit_entry_point_function_body(structurizer);
}

void SPIRVModule::emit_leaf_function_body(spv::Function *func, CFGStructurizer &structurizer)
{
	impl->emit_leaf_function_body(func, structurizer);
}

spv::Builder &SPIRVModule::get_builder()
{
	return impl->builder;
}

spv::Instruction *SPIRVModule::get_entry_point()
{
	return impl->entry_point;
}

spv::Function *SPIRVModule::get_entry_function()
{
	return impl->entry_function;
}

uint32_t SPIRVModule::allocate_id()
{
	return impl->builder.getUniqueId();
}

uint32_t SPIRVModule::allocate_ids(uint32_t count)
{
	return impl->builder.getUniqueIds(count);
}

void SPIRVModule::enable_shader_discard(bool supports_demote)
{
	impl->enable_shader_discard(supports_demote);
}

spv::Id SPIRVModule::get_builtin_shader_input(spv::BuiltIn builtin)
{
	return impl->get_builtin_shader_input(builtin);
}

spv::Id SPIRVModule::get_builtin_shader_output(spv::BuiltIn builtin)
{
	return impl->get_builtin_shader_output(builtin);
}

bool SPIRVModule::has_builtin_shader_input(spv::BuiltIn builtin) const
{
	return impl->has_builtin_shader_input(builtin);
}

bool SPIRVModule::has_builtin_shader_output(spv::BuiltIn builtin) const
{
	return impl->has_builtin_shader_output(builtin);
}

void SPIRVModule::register_builtin_shader_input(spv::Id id, spv::BuiltIn builtin)
{
	impl->register_builtin_shader_input(id, builtin);
}

void SPIRVModule::register_builtin_shader_output(spv::Id id, spv::BuiltIn builtin)
{
	impl->register_builtin_shader_output(id, builtin);
}

bool SPIRVModule::query_builtin_shader_input(spv::Id id, spv::BuiltIn *builtin) const
{
	return impl->query_builtin_shader_input(id, builtin);
}

bool SPIRVModule::builtin_requires_volatile(spv::BuiltIn builtin) const
{
	return impl->builtin_requires_volatile(builtin);
}

bool SPIRVModule::query_builtin_shader_output(spv::Id id, spv::BuiltIn *builtin) const
{
	return impl->query_builtin_shader_output(id, builtin);
}

Operation *SPIRVModule::allocate_op()
{
	return impl->operation_pool.allocate();
}

Operation *SPIRVModule::allocate_op(spv::Op op)
{
	return impl->operation_pool.allocate(op);
}

Operation *SPIRVModule::allocate_op(spv::Op op, spv::Id id, spv::Id type_id)
{
	return impl->operation_pool.allocate(op, id, type_id);
}

spv::Id SPIRVModule::create_variable(spv::StorageClass storage, spv::Id type, const char *name)
{
	return impl->create_variable(storage, type, name);
}

spv::Id SPIRVModule::create_variable_with_initializer(spv::StorageClass storage, spv::Id type,
                                                      spv::Id initializer, const char *name)
{
	return impl->create_variable_with_initializer(storage, type, initializer, name);
}

spv::Id SPIRVModule::get_helper_call_id(HelperCall call, spv::Id type_id)
{
	return impl->get_helper_call_id(*this, call, type_id);
}

spv::Id SPIRVModule::get_helper_call_id(HelperCall call, const spv::Id *aux_ids, uint32_t aux_id_count)
{
	return impl->get_helper_call_id(*this, call, aux_ids, aux_id_count);
}

spv::Id SPIRVModule::get_robust_physical_cbv_load_call_id(spv::Id type_id, spv::Id ptr_type_id, unsigned alignment)
{
	return impl->build_robust_physical_cbv_load(*this, type_id, ptr_type_id, alignment);
}

void SPIRVModule::set_descriptor_qa_info(const DescriptorQAInfo &info)
{
	impl->descriptor_qa_info = info;
}

void SPIRVModule::set_instruction_instrumentation_info(const InstructionInstrumentationInfo &info)
{
	impl->instruction_instrumentation.info = info;
}

const DescriptorQAInfo &SPIRVModule::get_descriptor_qa_info() const
{
	return impl->descriptor_qa_info;
}

void SPIRVModule::set_override_spirv_version(uint32_t version)
{
	impl->override_spirv_version = std::max<uint32_t>(impl->override_spirv_version, version);
}

void SPIRVModule::set_helper_lanes_participate_in_wave_ops(bool enable)
{
	impl->helper_lanes_participate_in_wave_ops = enable;
}

void SPIRVModule::set_entry_build_point(spv::Function *func)
{
	get_builder().setBuildPoint(func->getEntryBlock());
}

bool SPIRVModule::opcode_has_side_effect_and_result(spv::Op opcode)
{
	switch (opcode)
	{
	case spv::OpAtomicIAdd:
	case spv::OpAtomicIDecrement:
	case spv::OpAtomicIIncrement:
	case spv::OpAtomicISub:
	case spv::OpAtomicAnd:
	case spv::OpAtomicOr:
	case spv::OpAtomicXor:
	case spv::OpAtomicUMax:
	case spv::OpAtomicUMin:
	case spv::OpAtomicSMax:
	case spv::OpAtomicSMin:
	case spv::OpAtomicFAddEXT:
	case spv::OpAtomicFMaxEXT:
	case spv::OpAtomicFMinEXT:
	case spv::OpAtomicCompareExchange:
	case spv::OpAtomicCompareExchangeWeak:
	case spv::OpAtomicExchange:
	case spv::OpAtomicStore:
	case spv::OpFunctionCall: // This depends, but we have to assume it might.
		return true;

	default:
		return false;
	}
}

bool SPIRVModule::opcode_is_control_dependent(spv::Op opcode)
{
	// An opcode is considered control dependent if it is affected by other invocations in the subgroup.
	switch (opcode)
	{
		// Anything derivatives
	case spv::OpDPdx:
	case spv::OpDPdxCoarse:
	case spv::OpDPdxFine:
	case spv::OpDPdy:
	case spv::OpDPdyCoarse:
	case spv::OpDPdyFine:
	case spv::OpFwidth:
	case spv::OpFwidthCoarse:
	case spv::OpFwidthFine:

		// Anything implicit LOD
	case spv::OpImageSampleImplicitLod:
	case spv::OpImageSampleDrefImplicitLod:
	case spv::OpImageSampleProjImplicitLod:
	case spv::OpImageSampleProjDrefImplicitLod:
	case spv::OpImageSparseSampleImplicitLod:
	case spv::OpImageSparseSampleDrefImplicitLod:
	case spv::OpImageSparseSampleProjImplicitLod:
	case spv::OpImageSparseSampleProjDrefImplicitLod:
	case spv::OpImageQueryLod:
	case spv::OpImageDrefGather:
	case spv::OpImageGather:
	case spv::OpImageSparseDrefGather:
	case spv::OpImageSparseGather:

		// Anything subgroups
	case spv::OpGroupNonUniformElect:
	case spv::OpGroupNonUniformAll:
	case spv::OpGroupNonUniformAny:
	case spv::OpGroupNonUniformAllEqual:
	case spv::OpGroupNonUniformBroadcast:
	case spv::OpGroupNonUniformBroadcastFirst:
	case spv::OpGroupNonUniformBallot:
	case spv::OpGroupNonUniformInverseBallot:
	case spv::OpGroupNonUniformBallotBitExtract:
	case spv::OpGroupNonUniformBallotBitCount:
	case spv::OpGroupNonUniformBallotFindLSB:
	case spv::OpGroupNonUniformBallotFindMSB:
	case spv::OpGroupNonUniformShuffle:
	case spv::OpGroupNonUniformShuffleXor:
	case spv::OpGroupNonUniformShuffleUp:
	case spv::OpGroupNonUniformShuffleDown:
	case spv::OpGroupNonUniformIAdd:
	case spv::OpGroupNonUniformFAdd:
	case spv::OpGroupNonUniformIMul:
	case spv::OpGroupNonUniformFMul:
	case spv::OpGroupNonUniformSMin:
	case spv::OpGroupNonUniformUMin:
	case spv::OpGroupNonUniformFMin:
	case spv::OpGroupNonUniformSMax:
	case spv::OpGroupNonUniformUMax:
	case spv::OpGroupNonUniformFMax:
	case spv::OpGroupNonUniformBitwiseAnd:
	case spv::OpGroupNonUniformBitwiseOr:
	case spv::OpGroupNonUniformBitwiseXor:
	case spv::OpGroupNonUniformLogicalAnd:
	case spv::OpGroupNonUniformLogicalOr:
	case spv::OpGroupNonUniformLogicalXor:
	case spv::OpGroupNonUniformQuadBroadcast:
	case spv::OpGroupNonUniformQuadSwap:

		// Control barriers
	case spv::OpControlBarrier:
	case spv::OpMemoryBarrier:

		// Internal helpers function calls may or may not include control dependent ops.
	case spv::OpFunctionCall:

		// Cooperative matrix is control sensitive.
	case spv::OpCooperativeMatrixLoadKHR:
	case spv::OpCooperativeMatrixStoreKHR:
	case spv::OpCooperativeMatrixMulAddKHR:

		return true;

	default:
		return false;
	}
}

SPIRVModule::~SPIRVModule()
{
}
} // namespace dxil_spv


================================================
FILE: spirv_module.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "thread_local_allocator.hpp"
#include "cfg_structurizer.hpp"
#include "ir.hpp"
#include "descriptor_qa.hpp"
#include <memory>

namespace spv
{
class Function;
class Builder;
class Instruction;

static constexpr spv::Op PseudoOpReturnCond = spv::Op(0x100000);
static constexpr spv::Op PseudoOpInstrumentExternallyVisibleStore = spv::Op(0x100001);
static constexpr spv::Op PseudoOpMaskedLoad = spv::Op(0x100002);
static constexpr spv::Op PseudoOpMaskedStore = spv::Op(0x100003);
} // namespace spv

namespace dxil_spv
{
struct CFGNode;
class CFGNodePool;

enum class HelperCall
{
	DescriptorQACheck,
	WaveMatch,
	WaveMultiPrefixFAdd,
	WaveMultiPrefixIAdd,
	WaveMultiPrefixFMul,
	WaveMultiPrefixIMul,
	WaveMultiPrefixBitAnd,
	WaveMultiPrefixBitOr,
	WaveMultiPrefixBitXor,
	WaveMultiPrefixCountBits,
	RobustAtomicCounter,
	QuadAll,
	QuadAny,
	WaveIsFirstLaneMasked,
	WaveActiveAllEqualMasked,
	WaveReadFirstLaneMasked,
	AtomicImageR64Compact,
	AtomicImageArrayR64Compact,
	AtomicImageR64CompactNonUniform,
	AtomicImageArrayR64CompactNonUniform,
	FinishCrossGroupSharing,
	AllocateThreadNodeRecords,
	AllocateGroupNodeRecords,
	AllocateThreadNodeRecordsWaterfall,
	ThreadIncrementOutputCount,
	GroupIncrementOutputCount,
	NodeCoalescePayloadOffset,
	IsQuadUniformControlFlow,
	ValidateBDALoadStore,
	AllocateInvocationID,
	CoopMatFP8toFP16,
	CoopMatFP16toFP8,
	CoopMatTransfer,
	CoopMatSaturationFixup,
	CoopMatSaturateFP8
};

enum class BDAOperation
{
	Load,
	Store,
	AtomicRMW,
	IndirectRead
};

class SPIRVModule
{
public:
	SPIRVModule();
	~SPIRVModule();
	bool finalize_spirv(Vector<uint32_t> &spirv) const;

	uint32_t allocate_id();
	uint32_t allocate_ids(uint32_t count);

	void emit_entry_point(spv::ExecutionModel model, const char *name, bool physical_storage,
	                      spv::MemoryModel memory_model);
	void emit_entry_point_function_body(CFGStructurizer &structurizer);
	void emit_leaf_function_body(spv::Function *func, CFGStructurizer &structurizer);

	spv::Builder &get_builder();
	spv::Instruction *get_entry_point();
	spv::Function *get_entry_function();

	void enable_shader_discard(bool support_demote);
	spv::Id get_builtin_shader_input(spv::BuiltIn builtin);
	spv::Id get_builtin_shader_output(spv::BuiltIn builtin);
	bool has_builtin_shader_input(spv::BuiltIn builtin) const;
	bool has_builtin_shader_output(spv::BuiltIn builtin) const;
	void register_builtin_shader_input(spv::Id id, spv::BuiltIn builtin);
	bool query_builtin_shader_input(spv::Id id, spv::BuiltIn *builtin) const;
	bool builtin_requires_volatile(spv::BuiltIn builtin) const;

	void register_builtin_shader_output(spv::Id id, spv::BuiltIn builtin);
	bool query_builtin_shader_output(spv::Id id, spv::BuiltIn *builtin) const;

	Operation *allocate_op();
	Operation *allocate_op(spv::Op op);
	Operation *allocate_op(spv::Op op, spv::Id id, spv::Id type_id);

	spv::Id create_variable(spv::StorageClass storage, spv::Id type, const char *name = nullptr);
	spv::Id create_variable_with_initializer(spv::StorageClass storage, spv::Id type, spv::Id initializer,
	                                         const char *name = nullptr);

	spv::Id get_helper_call_id(HelperCall call, spv::Id type_id = 0);
	spv::Id get_helper_call_id(HelperCall call, const spv::Id *aux_ids, uint32_t aux_id_count);
	spv::Id get_robust_physical_cbv_load_call_id(spv::Id type_id, spv::Id ptr_type_id, unsigned alignment);
	void set_descriptor_qa_info(const DescriptorQAInfo &info);
	void set_instruction_instrumentation_info(const InstructionInstrumentationInfo &info);
	const DescriptorQAInfo &get_descriptor_qa_info() const;

	static bool opcode_is_control_dependent(spv::Op opcode);
	static bool opcode_has_side_effect_and_result(spv::Op opcode);

	void set_override_spirv_version(uint32_t version);
	void set_helper_lanes_participate_in_wave_ops(bool enable);

	void set_entry_build_point(spv::Function *func);

	DXIL_SPV_OVERRIDE_NEW_DELETE

private:
	struct Impl;
	std::unique_ptr<Impl> impl;
};
} // namespace dxil_spv


================================================
FILE: spirv_module_instrumentation.cpp
================================================
/* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "spirv_module.hpp"
#include "SpvBuilder.h"
#include "GLSL.std.450.h"

namespace dxil_spv
{
static spv::Id build_instrumentation_ssbo(SPIRVModule &module, spv::Id data_type, uint32_t stride, const char *member_name,
                                          const char *block_name, const char *variable_name,
                                          uint32_t desc_set, uint32_t binding)
{
	auto &builder = module.get_builder();

	spv::Id data_array_type = builder.makeRuntimeArray(data_type);
	builder.addDecoration(data_array_type, spv::DecorationArrayStride, stride);
	spv::Id block_type = builder.makeStructType({ data_array_type }, block_name);
	builder.addMemberName(block_type, 0, member_name);
	builder.addMemberDecoration(block_type, 0, spv::DecorationOffset, 0);
	builder.addDecoration(block_type, spv::DecorationBlock);

	spv::Id var_id = module.create_variable(spv::StorageClassStorageBuffer, block_type, variable_name);
	builder.addDecoration(var_id, spv::DecorationDescriptorSet, desc_set);
	builder.addDecoration(var_id, spv::DecorationBinding, binding);
	return var_id;
}

static spv::Id build_u64_add_u32(spv::Builder &builder, spv::Id a, spv::Id b)
{
	spv::Id u32_type = builder.makeUintType(32);
	auto *extract_lo = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	extract_lo->addIdOperand(a);
	extract_lo->addImmediateOperand(0);
	auto *extract_hi = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	extract_hi->addIdOperand(a);
	extract_hi->addImmediateOperand(1);

	spv::Id struct_type = builder.makeStructType({ u32_type, u32_type }, "IAddCarryResult");
	auto *add_carry = builder.addInstruction(struct_type, spv::OpIAddCarry);
	add_carry->addIdOperand(extract_lo->getResultId());
	add_carry->addIdOperand(b);

	auto *extract_carry_lo = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	extract_carry_lo->addIdOperand(add_carry->getResultId());
	extract_carry_lo->addImmediateOperand(0);

	auto *extract_carry = builder.addInstruction(u32_type, spv::OpCompositeExtract);
	extract_carry->addIdOperand(add_carry->getResultId());
	extract_carry->addImmediateOperand(1);

	auto *add_hi = builder.addInstruction(u32_type, spv::OpIAdd);
	add_hi->addIdOperand(extract_hi->getResultId());
	add_hi->addIdOperand(extract_carry->getResultId());

	spv::Id uvec2_type = builder.makeVectorType(u32_type, 2);
	auto *combine = builder.addInstruction(uvec2_type, spv::OpCompositeConstruct);
	combine->addIdOperand(extract_carry_lo->getResultId());
	combine->addIdOperand(add_hi->getResultId());

	return combine->getResultId();
}

static spv::Id build_byte_mask(spv::Builder &builder, spv::Id addr_lo_id, spv::Id byte_count)
{
	spv::Id u32_type = builder.makeUintType(32);
	auto *extract = builder.addInstruction(u32_type, spv::OpBitFieldUExtract);
	auto *and_op = builder.addInstruction(u32_type, spv::OpBitwiseAnd);
	auto *shift_op = builder.addInstruction(u32_type, spv::OpShiftLeftLogical);
	auto *and2_op = builder.addInstruction(u32_type, spv::OpBitwiseAnd);

	extract->addIdOperand(builder.makeUintConstant(~0u));
	extract->addIdOperand(builder.makeUintConstant(0u));
	extract->addIdOperand(byte_count);
	and_op->addIdOperand(addr_lo_id);
	and_op->addIdOperand(builder.makeUintConstant(15));
	shift_op->addIdOperand(extract->getResultId());
	shift_op->addIdOperand(and_op->getResultId());
	and2_op->addIdOperand(shift_op->getResultId());
	and2_op->addIdOperand(builder.makeUintConstant(0xffff));

	return and2_op->getResultId();
}

static spv::Id build_word_mask(spv::Builder &builder, spv::Id addr_lo_id, spv::Id byte_count)
{
	spv::Id u32_type = builder.makeUintType(32);

	auto *mask_op = builder.addInstruction(u32_type, spv::OpBitwiseAnd);
	auto *add_op = builder.addInstruction(u32_type, spv::OpIAdd);
	auto *add3_op = builder.addInstruction(u32_type, spv::OpIAdd);
	auto *slr2 = builder.addInstruction(u32_type, spv::OpShiftRightLogical);
	auto *extract_shift = builder.addInstruction(u32_type, spv::OpBitFieldUExtract);
	auto *extract = builder.addInstruction(u32_type, spv::OpBitFieldUExtract);
	auto *shifted = builder.addInstruction(u32_type, spv::OpShiftLeftLogical);
	auto *final_mask = builder.addInstruction(u32_type, spv::OpBitwiseAnd);

	mask_op->addIdOperand(addr_lo_id);
	mask_op->addIdOperand(builder.makeUintConstant(3));
	add_op->addIdOperand(mask_op->getResultId());
	add_op->addIdOperand(byte_count);
	add3_op->addIdOperand(add_op->getResultId());
	add3_op->addIdOperand(builder.makeUintConstant(3));
	slr2->addIdOperand(add3_op->getResultId());
	slr2->addIdOperand(builder.makeUintConstant(2));

	extract_shift->addIdOperand(addr_lo_id);
	extract_shift->addIdOperand(builder.makeUintConstant(2));
	extract_shift->addIdOperand(builder.makeUintConstant(2));

	extract->addIdOperand(builder.makeUintConstant(~0u));
	extract->addIdOperand(builder.makeUintConstant(0));
	extract->addIdOperand(slr2->getResultId());

	shifted->addIdOperand(extract->getResultId());
	shifted->addIdOperand(extract_shift->getResultId());

	final_mask->addIdOperand(shifted->getResultId());
	final_mask->addIdOperand(builder.makeUintConstant(0xf));

	return final_mask->getResultId();
}

static spv::Id build_hash_call(SPIRVModule &module)
{
	auto &builder = module.get_builder();

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);

	auto *func =
	    builder.makeFunctionEntry(spv::NoPrecision, uint_type, "AddrHash", { uvec2_type, uint_type }, {}, &entry);

	builder.addName(func->getParamId(0), "addr");
	builder.addName(func->getParamId(1), "prime");

	auto *extract0 = builder.addInstruction(uint_type, spv::OpCompositeExtract);
	auto *extract1 = builder.addInstruction(uint_type, spv::OpCompositeExtract);
	auto *shift_lo = builder.addInstruction(uint_type, spv::OpShiftRightLogical);
	auto *mask_hi = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
	auto *construct = builder.addInstruction(uvec2_type, spv::OpCompositeConstruct);
	auto *splat = builder.addInstruction(uvec2_type, spv::OpCompositeConstruct);

	extract0->addIdOperand(func->getParamId(0));
	extract0->addImmediateOperand(0);
	extract1->addIdOperand(func->getParamId(0));
	extract1->addImmediateOperand(1);

	shift_lo->addIdOperand(extract0->getResultId());
	shift_lo->addIdOperand(builder.makeUintConstant(4));
	mask_hi->addIdOperand(extract1->getResultId());
	mask_hi->addIdOperand(builder.makeUintConstant(0xffff));

	construct->addIdOperand(shift_lo->getResultId());
	construct->addIdOperand(mask_hi->getResultId());

	splat->addIdOperand(func->getParamId(1));
	splat->addIdOperand(func->getParamId(1));

	spv::Id ret_id = construct->getResultId();
	spv::Id splat_id = splat->getResultId();

	spv::Id const8 = builder.makeUintConstant(8);
	spv::Id const8_splat = builder.makeCompositeConstant(uvec2_type, { const8, const8 });

	for (int i = 0; i < 6; i++)
	{
		auto *shuffle = builder.addInstruction(uvec2_type, spv::OpVectorShuffle);
		shuffle->addIdOperand(ret_id);
		shuffle->addIdOperand(ret_id);
		shuffle->addImmediateOperand(1);
		shuffle->addImmediateOperand(0);

		auto *shifted = builder.addInstruction(uvec2_type, spv::OpShiftRightLogical);
		shifted->addIdOperand(ret_id);
		shifted->addIdOperand(const8_splat);

		auto *xor_op = builder.addInstruction(uvec2_type, spv::OpBitwiseXor);
		xor_op->addIdOperand(shifted->getResultId());
		xor_op->addIdOperand(shuffle->getResultId());

		auto *mult = builder.addInstruction(uvec2_type, spv::OpIMul);
		mult->addIdOperand(xor_op->getResultId());
		mult->addIdOperand(splat_id);

		ret_id = mult->getResultId();
	}

	auto *extract = builder.addInstruction(uint_type, spv::OpCompositeExtract);
	extract->addIdOperand(ret_id);
	extract->addImmediateOperand(0);
	ret_id = extract->getResultId();

	builder.makeReturn(false, ret_id);
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

static spv::Id build_hash_mask(spv::Builder &builder, spv::Id var_id)
{
	auto *len = builder.addInstruction(builder.makeUintType(32), spv::OpArrayLength);
	auto *find_msb = builder.addInstruction(builder.makeUintType(32), spv::OpExtInst);
	auto *extract = builder.addInstruction(builder.makeUintType(32), spv::OpBitFieldUExtract);

	spv::Id glsl = builder.import("GLSL.std.450");

	len->addIdOperand(var_id);
	len->addImmediateOperand(0);

	find_msb->addIdOperand(glsl);
	find_msb->addImmediateOperand(GLSLstd450FindUMsb);
	find_msb->addIdOperand(len->getResultId());

	extract->addIdOperand(builder.makeUintConstant(~0u));
	extract->addIdOperand(builder.makeUintConstant(0));
	extract->addIdOperand(find_msb->getResultId());

	return extract->getResultId();
}

static spv::Id build_hash_offset(spv::Builder &builder, spv::Id var_id)
{
	auto *len = builder.addInstruction(builder.makeUintType(32), spv::OpArrayLength);
	auto *find_msb = builder.addInstruction(builder.makeUintType(32), spv::OpExtInst);
	auto *extract = builder.addInstruction(builder.makeUintType(32), spv::OpBitFieldUExtract);
    auto *extract_minus_1 = builder.addInstruction(builder.makeUintType(32), spv::OpISub);

	spv::Id glsl = builder.import("GLSL.std.450");

	len->addIdOperand(var_id);
	len->addImmediateOperand(0);

	find_msb->addIdOperand(glsl);
	find_msb->addImmediateOperand(GLSLstd450FindUMsb);
	find_msb->addIdOperand(len->getResultId());

	extract->addIdOperand(len->getResultId());
	extract->addIdOperand(builder.makeUintConstant(0));
	extract->addIdOperand(find_msb->getResultId());

    extract_minus_1->addIdOperand(extract->getResultId());
    extract_minus_1->addIdOperand(builder.makeUintConstant(1));

    return extract_minus_1->getResultId();
}

static spv::Id build_get_invalidation_mask(spv::Builder &builder, spv::Id id, spv::Id byte_mask_id, spv::Id word_mask_id)
{
	static const uint64_t invalidation_masks[] = {
		0x0fffff0000, // Load -> store and atomics no longer valid
		0xffffffffff, // Store -> nothing is valid
		0xf0ffffffff, // AtomicRMW -> only atomics are valid
		0x0fffff0000, // IndirectRead -> same as load
	};

    spv::Id u32_type = builder.makeUintType(32);
	spv::Id u64_type = builder.makeUintType(64);
	spv::Id u64vec4_type = builder.makeVectorType(u64_type, 4);
	Vector<spv::Id> invalidation_mask_elems;
	invalidation_mask_elems.reserve(4);
	for (auto &mask : invalidation_masks)
		invalidation_mask_elems.push_back(builder.makeUint64Constant(mask));
	spv::Id invalidation_table = builder.makeCompositeConstant(u64vec4_type, invalidation_mask_elems);

	auto *extract = builder.addInstruction(u64_type, spv::OpVectorExtractDynamic);
	extract->addIdOperand(invalidation_table);
	extract->addIdOperand(id);

    auto *byte_mask_shift = builder.addInstruction(u32_type, spv::OpShiftLeftLogical);
    auto *word_mask_shift = builder.addInstruction(u32_type, spv::OpShiftLeftLogical);
    byte_mask_shift->addIdOperand(byte_mask_id);
    byte_mask_shift->addIdOperand(builder.makeUintConstant(16));
    word_mask_shift->addIdOperand(word_mask_id);
    word_mask_shift->addIdOperand(builder.makeUintConstant(4));

    auto *byte_mask_or = builder.addInstruction(u32_type, spv::OpBitwiseOr);
    auto *word_mask_or = builder.addInstruction(u32_type, spv::OpBitwiseOr);
    byte_mask_or->addIdOperand(byte_mask_id);
    byte_mask_or->addIdOperand(byte_mask_shift->getResultId());
    word_mask_or->addIdOperand(word_mask_id);
    word_mask_or->addIdOperand(word_mask_shift->getResultId());

    auto *composite = builder.addInstruction(builder.makeVectorType(u32_type, 2), spv::OpCompositeConstruct);
    composite->addIdOperand(byte_mask_or->getResultId());
    composite->addIdOperand(word_mask_or->getResultId());

    auto *bitcast = builder.addInstruction(u64_type, spv::OpBitcast);
    bitcast->addIdOperand(composite->getResultId());

    auto *mask = builder.addInstruction(u64_type, spv::OpBitwiseAnd);
    mask->addIdOperand(extract->getResultId());
    mask->addIdOperand(bitcast->getResultId());

	builder.addName(mask->getResultId(), "invalidation_mask");

	return mask->getResultId();
}

static spv::Id build_takes_exclusive_ownership(spv::Builder &builder, spv::Id atomic_result, spv::Id byte_mask)
{
	auto *shift = builder.addInstruction(builder.makeUintType(32), spv::OpShiftRightLogical);
	shift->addIdOperand(atomic_result);
	shift->addIdOperand(builder.makeUintConstant(16));

	auto *and_op = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseAnd);
	and_op->addIdOperand(shift->getResultId());
	and_op->addIdOperand(byte_mask);

	auto *cmp = builder.addInstruction(builder.makeBoolType(), spv::OpIEqual);
	cmp->addIdOperand(and_op->getResultId());
	cmp->addIdOperand(builder.makeUintConstant(0));

	return cmp->getResultId();
}

spv::Id build_allocate_invocation_id_function(SPIRVModule &module, uint32_t desc_set, uint32_t binding)
{
	auto &builder = module.get_builder();
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);

	spv::Id bloom_var_id = build_instrumentation_ssbo(
		module, uint_type, 4, "atomics", "BloomBufferInvocationSSBO", "BloomBufferInvocation", desc_set, binding);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, uint_type, "AllocateInvocationID",
	                                       {}, {}, &entry);

	auto *len = builder.addInstruction(uint_type, spv::OpArrayLength);
	len->addIdOperand(bloom_var_id);
	len->addImmediateOperand(0);

	auto *len_minus_1 = builder.addInstruction(uint_type, spv::OpISub);
	len_minus_1->addIdOperand(len->getResultId());
	len_minus_1->addIdOperand(builder.makeUintConstant(1));

	auto *chain = builder.addInstruction(builder.makePointer(spv::StorageClassStorageBuffer, uint_type), spv::OpAccessChain);
	chain->addIdOperand(bloom_var_id);
	chain->addIdOperand(builder.makeUintConstant(0));
	chain->addIdOperand(len_minus_1->getResultId());

	auto *atomic_add = builder.addInstruction(uint_type, spv::OpAtomicIAdd);
	atomic_add->addIdOperand(chain->getResultId());
	atomic_add->addIdOperand(builder.getAtomicDeviceScopeId());
	atomic_add->addIdOperand(builder.makeUintConstant(0));
	atomic_add->addIdOperand(builder.makeUintConstant(1103633207u));

	builder.makeReturn(false, atomic_add->getResultId());
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

spv::Id build_validate_bda_load_store_function(SPIRVModule &module, uint32_t desc_set, uint32_t binding)
{
	auto &builder = module.get_builder();

	spv::Id hash_call_id = build_hash_call(module);

	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id uint_type = builder.makeUintType(32);
	spv::Id u64_type = builder.makeUintType(64);
	spv::Id uvec2_type = builder.makeVectorType(uint_type, 2);
	spv::Id bool_type = builder.makeBoolType();

	spv::Id bloom_var_id_64 = build_instrumentation_ssbo(
		module, u64_type, 8, "atomics", "BloomBufferSSBO", "BloomBuffer", desc_set, binding);
	spv::Id bloom_var_id_32 = build_instrumentation_ssbo(
		module, uvec2_type, 8, "atomics", "BloomBuffer32SSBO", "BloomBuffer32", desc_set, binding);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, bool_type, "ValidateBDALoadStore",
	                                       { uvec2_type, uint_type, uint_type, uint_type, uint_type, bool_type }, {}, &entry);

	builder.addName(func->getParamId(0), "BDA");
	builder.addName(func->getParamId(1), "offset");
	builder.addName(func->getParamId(2), "len");
	builder.addName(func->getParamId(3), "type");
	builder.addName(func->getParamId(4), "invocation_id");
	builder.addName(func->getParamId(5), "in_bounds");
	builder.setBuildPoint(entry);

	auto *early_return_block = new spv::Block(builder.getUniqueId(), *func);
	auto *body = new spv::Block(builder.getUniqueId(), *func);

	builder.createSelectionMerge(body, 0);
	builder.createConditionalBranch(func->getParamId(5), body, early_return_block);
	builder.setBuildPoint(early_return_block);
	builder.makeReturn(false, builder.makeBoolConstant(true));
	builder.setBuildPoint(body);

	spv::Id addr_id = build_u64_add_u32(builder, func->getParamId(0), func->getParamId(1));
	builder.addName(addr_id, "addr");
	spv::Id addr_lo_id;

	auto *extract_lo = builder.addInstruction(uint_type, spv::OpCompositeExtract);
	extract_lo->addIdOperand(addr_id);
	extract_lo->addImmediateOperand(0);
	addr_lo_id = extract_lo->getResultId();
	builder.addName(addr_lo_id, "addr_lo");

	spv::Id byte_mask_id = build_byte_mask(builder, addr_lo_id, func->getParamId(2));
	spv::Id word_mask_id = build_word_mask(builder, addr_lo_id, func->getParamId(2));
	builder.addName(byte_mask_id, "byte_mask");
	builder.addName(word_mask_id, "word_mask");

	spv::Id hash_mask = build_hash_mask(builder, bloom_var_id_64);
	spv::Id hash_offset = build_hash_offset(builder, bloom_var_id_64);
	builder.addName(hash_mask, "hash_mask");
	builder.addName(hash_offset, "hash_offset");

    constexpr int NumHashes = 16;
    constexpr int NumLockHashes = 8;
    static_assert(NumLockHashes <= NumHashes, "Lock hashes must be less or equal.");
	spv::Id hashes[NumHashes];

	for (int i = 0; i < NumHashes; i++)
	{
        // Just pick random primes.
		static const uint32_t noise_primes[NumHashes] = {
			1103515245u,
			1103518333u,
			1103539331u,
			1103633207u,
			10006121u,
			4004951u,
			5005159u,
			6004811u,
            383u,
            821u,
            661u,
            1091u,
            1117u,
            3947u,
            4253u,
            7691u,
		};

		auto *call = builder.addInstruction(uint_type, spv::OpFunctionCall);
		call->addIdOperand(hash_call_id);
		call->addIdOperand(addr_id);
		call->addIdOperand(builder.makeUintConstant(noise_primes[i]));

		auto *mask = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
		mask->addIdOperand(call->getResultId());
		mask->addIdOperand(hash_mask);

		auto *add_offset = builder.addInstruction(uint_type, spv::OpIAdd);
		add_offset->addIdOperand(mask->getResultId());
		add_offset->addIdOperand(hash_offset);

		hashes[i] = add_offset->getResultId();
		builder.addName(hashes[i], "bloom_index");
	}

	spv::Id invalidation_mask = build_get_invalidation_mask(builder, func->getParamId(3), byte_mask_id, word_mask_id);

	spv::Id atomic_result = 0;
	for (auto hash : hashes)
	{
		auto *chain = builder.addInstruction(builder.makePointer(spv::StorageClassStorageBuffer, u64_type),
		                                     spv::OpInBoundsAccessChain);
		chain->addIdOperand(bloom_var_id_64);
		chain->addIdOperand(builder.makeUintConstant(0));
		chain->addIdOperand(hash);

		auto *atom = builder.addInstruction(u64_type, spv::OpAtomicOr);
		atom->addIdOperand(chain->getResultId());
		atom->addIdOperand(builder.getAtomicDeviceScopeId());
		atom->addIdOperand(builder.makeUintConstant(0)); // Relaxed
		atom->addIdOperand(invalidation_mask);

		builder.addName(atom->getResultId(), "prev_hazard_partial");

		if (atomic_result == 0)
		{
			atomic_result = atom->getResultId();
		}
		else
		{
			auto *and_op = builder.addInstruction(u64_type, spv::OpBitwiseAnd);
			and_op->addIdOperand(atomic_result);
			and_op->addIdOperand(atom->getResultId());
			atomic_result = and_op->getResultId();
		}

		builder.addCapability(spv::CapabilityInt64Atomics);
	}

	auto *atomic_result_uvec2 = builder.addInstruction(uvec2_type, spv::OpBitcast);
	atomic_result_uvec2->addIdOperand(atomic_result);
	builder.addName(atomic_result_uvec2->getResultId(), "prev_hazard");

	auto *atomic_result_lo = builder.addInstruction(uint_type, spv::OpCompositeExtract);
	atomic_result_lo->addIdOperand(atomic_result_uvec2->getResultId());
	atomic_result_lo->addImmediateOperand(0);
	builder.addName(atomic_result_lo->getResultId(), "prev_hazard_lo");

	auto *atomic_result_hi = builder.addInstruction(uint_type, spv::OpCompositeExtract);
	atomic_result_hi->addIdOperand(atomic_result_uvec2->getResultId());
	atomic_result_hi->addImmediateOperand(1);
	builder.addName(atomic_result_hi->getResultId(), "prev_hazard_hi");

	// Compute if we took ownership of this byte region. If that's the case then we have zero write hazard overlap.
	// Every memory access marks STORE as a hazard.
	spv::Id has_exclusive = build_takes_exclusive_ownership(builder, atomic_result_lo->getResultId(), byte_mask_id);
	builder.addName(has_exclusive, "has_exclusive_access");

	spv::Id has_complete_self_lock = 0;

	for (int i = 0; i < NumLockHashes; i++)
	{
		int base_bit;

		if (i < 4)
			base_bit = std::min<int>(9 * i, 23);
		else
			base_bit = std::min<int>(8 * (i & 3) + 1, 22);

		spv::Id lock_mask_id = 0;

		for (int j = 0; j < 3; j++)
		{
			// Generate the invocation lock mask. We will set 12 bits in total across 96 available bits.
			auto *extract = builder.addInstruction(uint_type, spv::OpBitFieldUExtract);
			extract->addIdOperand(func->getParamId(4));
			extract->addIdOperand(builder.makeIntConstant(base_bit + 3 * j));
			extract->addIdOperand(builder.makeIntConstant(3));

			auto *shift = builder.addInstruction(uint_type, spv::OpShiftLeftLogical);
			shift->addIdOperand(builder.makeUintConstant(1u << (8 + 8 * j)));
			shift->addIdOperand(extract->getResultId());

			if (lock_mask_id == 0)
			{
				lock_mask_id = shift->getResultId();
			}
			else
			{
				auto *or_op = builder.addInstruction(uint_type, spv::OpBitwiseOr);
				or_op->addIdOperand(lock_mask_id);
				or_op->addIdOperand(shift->getResultId());
				lock_mask_id = or_op->getResultId();
			}
		}

		builder.addName(lock_mask_id, "lock_mask");

		auto *sel = builder.addInstruction(uint_type, spv::OpSelect);
		sel->addIdOperand(has_exclusive);
		sel->addIdOperand(lock_mask_id);
		sel->addIdOperand(builder.makeUintConstant(0));

		auto *chain = builder.addInstruction(builder.makePointer(spv::StorageClassStorageBuffer, uint_type),
		                                     spv::OpInBoundsAccessChain);
		chain->addIdOperand(bloom_var_id_32);
		chain->addIdOperand(builder.makeUintConstant(0));
		chain->addIdOperand(hashes[i]);
		chain->addIdOperand(builder.makeUintConstant(1));

		auto *atom = builder.addInstruction(uint_type, spv::OpAtomicOr);
		atom->addIdOperand(chain->getResultId());
		atom->addIdOperand(builder.getAtomicDeviceScopeId());
		atom->addIdOperand(builder.makeUintConstant(0));
		atom->addIdOperand(sel->getResultId());
		builder.addName(atom->getResultId(), "prev_lock");

		spv::Id lock_feedback = atom->getResultId();

		auto *and_op = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
		and_op->addIdOperand(lock_feedback);
		and_op->addIdOperand(lock_mask_id);

		auto *eq_op = builder.addInstruction(bool_type, spv::OpIEqual);
		eq_op->addIdOperand(and_op->getResultId());
		eq_op->addIdOperand(lock_mask_id);

		if (has_complete_self_lock == 0)
		{
			has_complete_self_lock = eq_op->getResultId();
		}
		else
		{
			auto *logical_and = builder.addInstruction(bool_type, spv::OpLogicalAnd);
			logical_and->addIdOperand(has_complete_self_lock);
			logical_and->addIdOperand(eq_op->getResultId());
			has_complete_self_lock = logical_and->getResultId();
		}
	}

	builder.addName(has_complete_self_lock, "has_complete_self_lock");

	Vector<spv::Block *> segments;
	// The AST-based builder is a bit awkward to use in this context ...
	builder.makeSwitch(func->getParamId(3), 0, 4, { 0, 1, 2 }, { 0, 1, 2 }, 3, segments);
	builder.endSwitch(segments);
	auto *merge = builder.getBuildPoint();
	spv::Id hazards[4];

	builder.setBuildPoint(segments[int(BDAOperation::Load)]);
	{
		auto *mask = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
		auto *neq = builder.addInstruction(bool_type, spv::OpINotEqual);
		mask->addIdOperand(atomic_result_lo->getResultId());
		mask->addIdOperand(byte_mask_id);
		neq->addIdOperand(mask->getResultId());
		neq->addIdOperand(builder.makeUintConstant(0));
		hazards[int(BDAOperation::Load)] = neq->getResultId();
		builder.createBranch(merge);
	}

	builder.setBuildPoint(segments[int(BDAOperation::Store)]);
	{
		auto *shift = builder.addInstruction(uint_type, spv::OpShiftLeftLogical);
		shift->addIdOperand(byte_mask_id);
		shift->addIdOperand(builder.makeUintConstant(16));
		auto *mask = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
		auto *neq = builder.addInstruction(bool_type, spv::OpINotEqual);
		mask->addIdOperand(atomic_result_lo->getResultId());
		mask->addIdOperand(shift->getResultId());
		neq->addIdOperand(mask->getResultId());
		neq->addIdOperand(builder.makeUintConstant(0));
		hazards[int(BDAOperation::Store)] = neq->getResultId();
		builder.createBranch(merge);
	}

	builder.setBuildPoint(segments[int(BDAOperation::AtomicRMW)]);
	{
		auto *mask = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
		auto *neq = builder.addInstruction(bool_type, spv::OpINotEqual);
		mask->addIdOperand(atomic_result_hi->getResultId());
		mask->addIdOperand(word_mask_id);
		neq->addIdOperand(mask->getResultId());
		neq->addIdOperand(builder.makeUintConstant(0));
		hazards[int(BDAOperation::AtomicRMW)] = neq->getResultId();
		builder.createBranch(merge);
	}

	builder.setBuildPoint(segments[int(BDAOperation::IndirectRead)]);
	{
		auto *shift = builder.addInstruction(uint_type, spv::OpShiftLeftLogical);
		shift->addIdOperand(word_mask_id);
		shift->addIdOperand(builder.makeUintConstant(4));
		auto *mask = builder.addInstruction(uint_type, spv::OpBitwiseAnd);
		auto *neq = builder.addInstruction(bool_type, spv::OpINotEqual);
		mask->addIdOperand(atomic_result_hi->getResultId());
		mask->addIdOperand(shift->getResultId());
		neq->addIdOperand(mask->getResultId());
		neq->addIdOperand(builder.makeUintConstant(0));
		hazards[int(BDAOperation::IndirectRead)] = neq->getResultId();
		builder.createBranch(merge);
	}

	builder.setBuildPoint(merge);

	auto *hazard_phi = builder.addInstruction(bool_type, spv::OpPhi);
	for (int i = 0; i < 4; i++)
	{
		hazard_phi->addIdOperand(hazards[i]);
		hazard_phi->addIdOperand(segments[i]->getId());
	}
	builder.addName(hazard_phi->getResultId(), "hazard");

	auto *inv_hazard = builder.addInstruction(bool_type, spv::OpLogicalNot);
	inv_hazard->addIdOperand(hazard_phi->getResultId());

	// Compute self-hazard.
	auto *ret = builder.addInstruction(bool_type, spv::OpLogicalOr);
	ret->addIdOperand(inv_hazard->getResultId());
	ret->addIdOperand(has_complete_self_lock);

	builder.makeReturn(false, ret->getResultId());
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

void emit_instrumentation_hash(SPIRVModule &module, const InstructionInstrumentationState &instrumentation,
                               spv::Function *func, spv::Id value_id, spv::Id instruction_id)
{
	auto &builder = module.get_builder();
	auto *write_payload_path = new spv::Block(builder.getUniqueId(), *func);
	auto *merge_payload_path = new spv::Block(builder.getUniqueId(), *func);

	spv::Id u32_type = builder.makeUintType(32);
	spv::Id bool_type = builder.makeBoolType();
	auto *prime_mul = builder.addInstruction(u32_type, spv::OpIMul);
	auto *hash = builder.addInstruction(u32_type, spv::OpBitwiseXor);

	prime_mul->addIdOperand(instruction_id);
	prime_mul->addIdOperand(builder.makeUintConstant(97)); // Arbitrary prime number

	hash->addIdOperand(prime_mul->getResultId());
	auto shader_hash = instrumentation.info.shader_hash;
	hash->addIdOperand(builder.makeUintConstant(uint32_t(shader_hash) ^ uint32_t(shader_hash >> 32)));

	if (!value_id)
	{
		// For AssumeTrue instrumentation, we're more interested in the dispatch ID.
		// We've reserved space for it in the second last u32 of the control buffer.
		auto *len = builder.addInstruction(u32_type, spv::OpArrayLength);
		len->addIdOperand(instrumentation.global_nan_inf_control_var_id);
		len->addImmediateOperand(0);

		auto *len_minus_2 = builder.addInstruction(u32_type, spv::OpISub);
		len_minus_2->addIdOperand(len->getResultId());
		len_minus_2->addIdOperand(builder.makeUintConstant(2));

		auto *cookie_chain = builder.addInstruction(
		    builder.makePointer(spv::StorageClassStorageBuffer, u32_type), spv::OpAccessChain);
		cookie_chain->addIdOperand(instrumentation.global_nan_inf_control_var_id);
		cookie_chain->addIdOperand(builder.makeUintConstant(0));
		cookie_chain->addIdOperand(len_minus_2->getResultId());

		auto *load = builder.addInstruction(u32_type, spv::OpLoad);
		load->addIdOperand(cookie_chain->getResultId());
		value_id = load->getResultId();

		auto *mul = builder.addInstruction(u32_type, spv::OpIMul);
		mul->addIdOperand(value_id);
		mul->addIdOperand(builder.makeUintConstant(51329u));

		// Allow each dispatch to report their own faults.
		auto *ext_hash = builder.addInstruction(u32_type, spv::OpBitwiseXor);
		ext_hash->addIdOperand(hash->getResultId());
		ext_hash->addIdOperand(mul->getResultId());

		hash = ext_hash;
	}

	auto *payload_size = builder.addInstruction(u32_type, spv::OpArrayLength);
	auto *sub_1 = builder.addInstruction(u32_type, spv::OpISub);
	auto *mask = builder.addInstruction(u32_type, spv::OpBitwiseAnd);
	auto *control_word = builder.addInstruction(u32_type, spv::OpShiftRightLogical);
	auto *control_bit = builder.addInstruction(u32_type, spv::OpBitwiseAnd);
	auto *control_mask = builder.addInstruction(u32_type, spv::OpShiftLeftLogical);
	auto *access_chain = builder.addInstruction(
	    builder.makePointer(spv::StorageClassStorageBuffer, u32_type),
	    spv::OpAccessChain);
	auto *atomic_or = builder.addInstruction(u32_type, spv::OpAtomicOr);
	auto *atomic_result_mask = builder.addInstruction(u32_type, spv::OpBitwiseAnd);
	auto *atomic_need_write = builder.addInstruction(bool_type, spv::OpIEqual);
	auto *store_op = builder.addInstruction(spv::OpStore);

	payload_size->addIdOperand(instrumentation.global_nan_inf_data_var_id);
	payload_size->addImmediateOperand(0);
	sub_1->addIdOperand(payload_size->getResultId());
	sub_1->addIdOperand(builder.makeUintConstant(1));
	mask->addIdOperand(hash->getResultId());
	mask->addIdOperand(sub_1->getResultId());
	spv::Id mask_id = mask->getResultId();

	control_word->addIdOperand(mask->getResultId());
	control_word->addIdOperand(builder.makeUintConstant(4));
	control_bit->addIdOperand(mask->getResultId());
	control_bit->addIdOperand(builder.makeUintConstant(15));
	control_mask->addIdOperand(builder.makeUintConstant(1));
	control_mask->addIdOperand(control_bit->getResultId());

	access_chain->addIdOperand(instrumentation.global_nan_inf_control_var_id);
	access_chain->addIdOperand(builder.makeUintConstant(0));
	access_chain->addIdOperand(control_word->getResultId());

	atomic_or->addIdOperand(access_chain->getResultId());
	atomic_or->addIdOperand(builder.getAtomicDeviceScopeId());
	atomic_or->addIdOperand(builder.makeUintConstant(0));
	atomic_or->addIdOperand(control_mask->getResultId());

	atomic_result_mask->addIdOperand(atomic_or->getResultId());
	atomic_result_mask->addIdOperand(control_mask->getResultId());
	atomic_need_write->addIdOperand(atomic_result_mask->getResultId());
	atomic_need_write->addIdOperand(builder.makeUintConstant(0));

	store_op->addIdOperand(instrumentation.should_report_instrumentation_id);
	store_op->addIdOperand(builder.makeBoolConstant(false));

	spv::Id cond_id = atomic_need_write->getResultId();
	spv::Id control_mask_id = control_mask->getResultId();
	spv::Id control_chain_id = access_chain->getResultId();

	builder.createSelectionMerge(merge_payload_path, 0);
	builder.createConditionalBranch(cond_id, write_payload_path, merge_payload_path);
	builder.setBuildPoint(write_payload_path);
	{
		spv::Id uvec4_type = builder.makeVectorType(u32_type, 4);

		auto *composite = builder.addInstruction(uvec4_type, spv::OpCompositeConstruct);
		auto *shift_16 = builder.addInstruction(u32_type, spv::OpShiftLeftLogical);
		access_chain = builder.addInstruction(
			builder.makePointer(spv::StorageClassStorageBuffer, uvec4_type),
			spv::OpAccessChain);
		store_op = builder.addInstruction(spv::OpStore);
		auto *barrier_op = builder.addInstruction(spv::OpMemoryBarrier);
		atomic_or = builder.addInstruction(u32_type, spv::OpAtomicOr);
		// Important this comes after the atomic or.
		auto *barrier_op2 = builder.addInstruction(spv::OpMemoryBarrier);

		composite->addIdOperand(builder.makeUintConstant(uint32_t(shader_hash >> 0)));
		composite->addIdOperand(builder.makeUintConstant(uint32_t(shader_hash >> 32)));
		composite->addIdOperand(func->getParamId(1));
		composite->addIdOperand(value_id);

		shift_16->addIdOperand(control_mask_id);
		shift_16->addIdOperand(builder.makeUintConstant(16));

		access_chain->addIdOperand(instrumentation.global_nan_inf_data_var_id);
		access_chain->addIdOperand(builder.makeUintConstant(0));
		access_chain->addIdOperand(mask_id);

		store_op->addIdOperand(access_chain->getResultId());
		store_op->addIdOperand(composite->getResultId());
		if (builder.hasCapability(spv::CapabilityVulkanMemoryModel))
			store_op->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask);

		barrier_op->addIdOperand(builder.getAtomicDeviceScopeId());
		barrier_op->addIdOperand(builder.makeUintConstant(spv::MemorySemanticsUniformMemoryMask |
		                                                  spv::MemorySemanticsAcquireReleaseMask));
		barrier_op2->addIdOperand(builder.getAtomicDeviceScopeId());
		barrier_op2->addIdOperand(builder.makeUintConstant(spv::MemorySemanticsUniformMemoryMask |
		                                                   spv::MemorySemanticsAcquireReleaseMask));

		atomic_or->addIdOperand(control_chain_id);
		atomic_or->addIdOperand(builder.getAtomicDeviceScopeId());
		atomic_or->addIdOperand(builder.makeUintConstant(0));
		atomic_or->addIdOperand(shift_16->getResultId());

		builder.createBranch(merge_payload_path);
	}
	builder.setBuildPoint(merge_payload_path);
}

spv::Id build_assume_true_call_function(SPIRVModule &module, const InstructionInstrumentationState &instrumentation)
{
	auto &builder = module.get_builder();

	// Use normal bb->addInstruction here since we don't want to instrument the code that is doing instrumentation :')
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id u32_type = builder.makeUintType(32);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(),
	                                       "AssumeTrue",
	                                       { bool_type, u32_type }, {}, &entry);

	builder.addName(func->getParamId(0), "value");
	builder.addName(func->getParamId(1), "inst");

	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *fail_block = new spv::Block(builder.getUniqueId(), *func);

	auto should_report = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLogicalNot);
	should_report->addIdOperand(func->getParamId(0));

	auto loaded = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLoad);
	loaded->addIdOperand(instrumentation.should_report_instrumentation_id);

	auto will_report = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLogicalAnd);
	will_report->addIdOperand(should_report->getResultId());
	will_report->addIdOperand(loaded->getResultId());

	spv::Id cond_id = will_report->getResultId();

	entry->addInstruction(std::move(should_report));
	entry->addInstruction(std::move(loaded));
	entry->addInstruction(std::move(will_report));

	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(cond_id, fail_block, merge_block);

	builder.setBuildPoint(fail_block);
	{
		// Dummy value is stored.
		emit_instrumentation_hash(module, instrumentation, func, 0, func->getParamId(1));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	builder.makeReturn(false);
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

spv::Id build_nan_inf_instrument_call_function(
    SPIRVModule &module, const InstructionInstrumentationState &instrumentation, spv::Id type_id)
{
	auto &builder = module.get_builder();

	// Use normal bb->addInstruction here since we don't want to instrument the code that is doing instrumentation :')
	auto *current_build_point = builder.getBuildPoint();
	spv::Block *entry = nullptr;
	spv::Id bool_type = builder.makeBoolType();
	spv::Id u32_type = builder.makeUintType(32);

	auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(),
	                                       "NanInfInstrumentation",
	                                       { type_id, u32_type }, {}, &entry);

	auto *merge_block = new spv::Block(builder.getUniqueId(), *func);
	auto *first_nan_inf_path = new spv::Block(builder.getUniqueId(), *func);

	builder.addName(func->getParamId(0), "value");
	builder.addName(func->getParamId(1), "inst");

	builder.setBuildPoint(entry);

	// TODO: Find a way to make this more programmable?
	auto is_nan = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIsNan);
	auto is_inf = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpIsInf);
	auto is_not_normal = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLogicalOr);
	auto should_report = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLoad);
	auto will_report = std::make_unique<spv::Instruction>(builder.getUniqueId(), bool_type, spv::OpLogicalAnd);

	is_nan->addIdOperand(func->getParamId(0));
	is_inf->addIdOperand(func->getParamId(0));
	is_not_normal->addIdOperand(is_nan->getResultId());
	is_not_normal->addIdOperand(is_inf->getResultId());
	should_report->addIdOperand(instrumentation.should_report_instrumentation_id);
	will_report->addIdOperand(should_report->getResultId());
	will_report->addIdOperand(is_not_normal->getResultId());
	spv::Id cond_id = will_report->getResultId();

	entry->addInstruction(std::move(is_nan));
	entry->addInstruction(std::move(is_inf));
	entry->addInstruction(std::move(is_not_normal));
	entry->addInstruction(std::move(should_report));
	entry->addInstruction(std::move(will_report));

	builder.createSelectionMerge(merge_block, 0);
	builder.createConditionalBranch(cond_id, first_nan_inf_path, merge_block);

	builder.setBuildPoint(first_nan_inf_path);
	{
		spv::Id value_id = func->getParamId(0);
		if (builder.getScalarTypeWidth(type_id) != 32)
		{
			auto conv = std::make_unique<spv::Instruction>(
			    builder.getUniqueId(), builder.makeFloatType(32), spv::OpFConvert);
			conv->addIdOperand(value_id);
			value_id = conv->getResultId();
			first_nan_inf_path->addInstruction(std::move(conv));
		}

		auto bitcast = std::make_unique<spv::Instruction>(
		    builder.getUniqueId(), u32_type, spv::OpBitcast);
		bitcast->addIdOperand(value_id);
		value_id = bitcast->getResultId();
		first_nan_inf_path->addInstruction(std::move(bitcast));

		emit_instrumentation_hash(module, instrumentation, func, value_id, func->getParamId(1));
		builder.createBranch(merge_block);
	}

	builder.setBuildPoint(merge_block);
	builder.makeReturn(false);
	builder.setBuildPoint(current_build_point);
	return func->getId();
}

}

================================================
FILE: spirv_module_instrumentation.hpp
================================================
/* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include "spirv_module.hpp"
#include "descriptor_qa.hpp"

namespace spv
{
class Function;
}

namespace dxil_spv
{
spv::Id build_validate_bda_load_store_function(SPIRVModule &module, uint32_t desc_set, uint32_t binding);
spv::Id build_allocate_invocation_id_function(SPIRVModule &module, uint32_t desc_set, uint32_t binding);
spv::Id build_assume_true_call_function(SPIRVModule &module, const InstructionInstrumentationState &instrumentation);
spv::Id build_nan_inf_instrument_call_function(
    SPIRVModule &module, const InstructionInstrumentationState &instrumentation, spv::Id type_id);
void emit_instrumentation_hash(SPIRVModule &module, const InstructionInstrumentationState &instrumentation,
                               spv::Function *func, spv::Id value_id, spv::Id instruction_id);
} // namespace dxil_spv


================================================
FILE: test_shaders.py
================================================
#!/usr/bin/env python3

#
# Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
#
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import sys
import os
import os.path
import subprocess
import tempfile
import re
import itertools
import hashlib
import shutil
import argparse
import codecs
import json
import multiprocessing
import errno
from functools import partial

class Paths():
    def __init__(self, dxc, dxil_spirv):
        self.dxc = dxc 
        self.dxil_spirv = dxil_spirv 

def remove_file(path):
    os.remove(path)

def create_temporary(suff = ''):
    f, path = tempfile.mkstemp(suffix = suff)
    os.close(f)
    return path

def get_sm(shader, version_minor):
    minor_version = '_{}'.format(version_minor)
    lib_version = 'lib_6_{}'.format(5 if version_minor <= 5 else 6)
    mesh_version = '_{}'.format(5 if version_minor <= 5 else version_minor)
    _, ext = os.path.splitext(shader)
    if ext == '.vert':
        return 'vs_6' + minor_version
    elif ext == '.frag':
        return 'ps_6' + minor_version
    elif ext == '.comp':
        return 'lib_6_8' if '.node.' in shader else ('cs_6' + minor_version)
    elif ext == '.tesc':
        return 'hs_6' + minor_version
    elif ext == '.tese':
        return 'ds_6' + minor_version
    elif ext == '.geom':
        return 'gs_6' + minor_version
    elif ext == '.mesh':
        return 'ms_6' + mesh_version
    elif ext == '.task':
        return 'as_6' + mesh_version
    elif ext == '.rmiss':
        return lib_version
    elif ext == '.rint':
        return lib_version
    elif ext == '.rgen':
        return lib_version
    elif ext == '.rhit':
        return lib_version
    elif ext == '.rcall':
        return lib_version
    elif ext == '.rany':
        return lib_version
    elif ext == '.rclosest':
        return lib_version
    else:
        return ''

def cross_compile_dxil(shader, args, paths, is_asm):
    glsl_path = create_temporary(os.path.basename(shader))
    version_minor = 5
    if '.sm60.' in shader:
        version_minor = 0
    elif '.sm66.' in shader:
        version_minor = 6
    elif '.sm67.' in shader:
        version_minor = 7

    if not is_asm:
        dxil_path = create_temporary()
        dxil_cmd = [paths.dxc, '-Qstrip_reflect', '-Qstrip_debug', '-Vd', '-T' + get_sm(shader, version_minor), '-Fo', dxil_path, shader]
        if version_minor >= 2:
            dxil_cmd.append('-enable-16bit-types')
        if '.denorm-ftz.' in shader:
            dxil_cmd += ['-denorm', 'ftz']
        if '.denorm-preserve.' in shader:
            dxil_cmd += ['-denorm', 'preserve']
        if '.no-legacy-cbuf-layout.' in shader:
            dxil_cmd += ['-no-legacy-cbuf-layout']
        subprocess.check_call(dxil_cmd)
    else:
        dxil_path = shader

    hlsl_cmd = [paths.dxil_spirv, '--output', glsl_path, dxil_path, '--vertex-input', 'ATTR', '0']
    skip_glsl = '.noglsl.' in shader

    if not args.bench:
        hlsl_cmd += ['--asm']
        if not skip_glsl:
            hlsl_cmd += ['--glsl']

    if '.bc.' in shader:
        hlsl_cmd += ['--raw-llvm']

    hlsl_cmd += ['--allow-arithmetic-relaxed-precision', '--subgroup-size', '32', '64']

    if '.root-constant.' in shader:
        hlsl_cmd.append('--root-constant')
        hlsl_cmd.append('0')
        hlsl_cmd.append('0')
        hlsl_cmd.append('4')
        hlsl_cmd.append('12')
        hlsl_cmd.append('--root-constant')
        hlsl_cmd.append('1')
        hlsl_cmd.append('0')
        hlsl_cmd.append('0')
        hlsl_cmd.append('16')
    if '.stream-out.' in shader:
        hlsl_cmd.append('--stream-output')
        hlsl_cmd.append('SV_Position')
        hlsl_cmd.append('0')
        hlsl_cmd.append('16')
        hlsl_cmd.append('32')
        hlsl_cmd.append('1')

        hlsl_cmd.append('--stream-output')
        hlsl_cmd.append('StreamOut')
        hlsl_cmd.append('0')
        hlsl_cmd.append('0')
        hlsl_cmd.append('32')
        hlsl_cmd.append('0')

        hlsl_cmd.append('--stream-output')
        hlsl_cmd.append('StreamOut')
        hlsl_cmd.append('1')
        hlsl_cmd.append('0')
        hlsl_cmd.append('16')
        hlsl_cmd.append('1')

    if '.rt-swizzle.' in shader:
        hlsl_cmd.append('--output-rt-swizzle')
        hlsl_cmd.append('0')
        hlsl_cmd.append('wxyz')
        hlsl_cmd.append('--output-rt-swizzle')
        hlsl_cmd.append('1')
        hlsl_cmd.append('yxwz')

    if '.bindless.' in shader:
        hlsl_cmd.append('--bindless')
    if '.nobda.' in shader:
        hlsl_cmd.append('--no-bda')
    if '.local-root-signature.' in shader:
        hlsl_cmd.append('--local-root-signature')
    if '.uav-counter-texel-buffer.' in shader:
        hlsl_cmd.append('--uav-counter-force-texel-buffer')
    if '.uav-counter-ssbo.' in shader:
        hlsl_cmd.append('--uav-counter-force-ssbo')

    if '.inline-ubo.' in shader:
        hlsl_cmd.append('--root-constant-inline-ubo')
        hlsl_cmd.append('6')
        hlsl_cmd.append('1')

    if '.cbv-as-ssbo.' in shader:
        hlsl_cmd.append('--bindless-cbv-as-ssbo')

    if '.invalid.' not in shader:
        hlsl_cmd.append('--validate')

    if ('.demote-to-helper.' in shader) or is_asm:
        hlsl_cmd.append('--enable-shader-demote')
    if '.i8dot.' in shader:
        hlsl_cmd.append('--enable-shader-i8-dot')
    if '.dual-source-blending.' in shader:
        hlsl_cmd.append('--enable-dual-source-blending')
    if ('.ssbo.' in shader) or is_asm:
        hlsl_cmd.append('--ssbo-uav')
        hlsl_cmd.append('--ssbo-srv')
    if '.ssbo-rtas.' in shader:
        hlsl_cmd.append('--ssbo-rtas')
    if '.input-attachment.' in shader:
        hlsl_cmd.append('--input-attachments')
    if '.raw-va-stride-offset.' in shader:
        hlsl_cmd.append('--physical-address-descriptor-indexing')
        hlsl_cmd.append('4')
        hlsl_cmd.append('3')
    if '.ssbo-align.' in shader:
        hlsl_cmd.append('--ssbo-alignment')
        hlsl_cmd.append('64')
    if '.typed-uav-without-format.' in shader:
        hlsl_cmd.append('--typed-uav-read-without-format')
    if '.typed-buffer-offset.' in shader:
        hlsl_cmd.append('--bindless')
        hlsl_cmd.append('--bindless-typed-buffer-offsets')

    if '.root-descriptor.' in shader:
        hlsl_cmd += ['--root-descriptor', 'cbv', '0', '0']
        hlsl_cmd += ['--root-descriptor', 'srv', '0', '0']
        hlsl_cmd += ['--root-descriptor', 'uav', '0', '0']
        hlsl_cmd += ['--root-descriptor', 'uav', '0', '1']

    if '.offset-layout.' in shader:
        hlsl_cmd += ['--bindless-offset-buffer-layout', '0', '1', '2']
    if not args.bench:
        if '.lib.' in shader:
            hlsl_cmd += ['--debug-all-entry-points']
    if '.16bit-io.' in shader:
        hlsl_cmd += ['--storage-input-output-16bit']
    if '.descriptor-qa.' in shader:
        hlsl_cmd += ['--descriptor-qa', '10', '10', 'deadbeef']
    if ('.native-fp16.' in shader) or is_asm:
        hlsl_cmd += ['--min-precision-native-16bit']
    if '.invariant.' in shader:
        hlsl_cmd += ['--invariant-position']
    if '.partitioned.' in shader:
        hlsl_cmd += ['--subgroup-partitioned-nv']
    if '.noderivs.' in shader:
        hlsl_cmd += ['--no-compute-shader-derivatives']
    if '.quad-maximal-reconvergence.' in shader:
        hlsl_cmd += ['--quad-control-maximal-reconvergence']
    if '.raw-access-chains.' in shader:
        hlsl_cmd += ['--raw-access-chains-nv']
    if '.extended-robustness.' in shader:
        hlsl_cmd += ['--extended-robustness']
    if 'bda-instrumentation.' in shader:
        hlsl_cmd += ['--instruction-instrumentation', '4', '0', '2', 'abcd']
    if '.auto-group-shared-barrier.' in shader:
        hlsl_cmd += ['--shader-quirk', '8']
    if '.vkmm.' in shader:
        hlsl_cmd += ['--vkmm']
    if '.full-wmma.' in shader:
        hlsl_cmd += ['--full-wmma', '1', '1']
    if '.nv-coopmat2.' in shader:
        hlsl_cmd += ['--full-wmma', '0', '1']
    if '.nvapi.' in shader:
        hlsl_cmd += ['--nvapi', '127', '0']
    if '.heap-robustness-cbv.' in shader:
        hlsl_cmd += ['--meta-descriptor', '0', '3', '10', '20']
    if '.heap-raw-va-cbv.' in shader:
        hlsl_cmd += ['--meta-descriptor', '1', '4', '10', '21']
    if '.heap-robustness.' in shader:
        hlsl_cmd += ['--descriptor-heap-robustness']
    if '.view-instancing.' in shader:
        hlsl_cmd += ['--view-instancing']
        hlsl_cmd += ['--meta-descriptor', '2', '3', '10', '22']
    if '.view-instance-mask.' in shader:
        hlsl_cmd += ['--meta-descriptor', '3', '3', '10', '23']
    if '.last-pre-raster.' in shader:
        hlsl_cmd += ['--view-instancing-last-pre-rasterization-stage']
    if '.view-instancing-multiview.' in shader:
        hlsl_cmd += ['--view-index-to-view-instance-spec-id', '1000']
    if '.view-instancing-viewport-offset.' in shader:
        hlsl_cmd += ['--view-instance-to-viewport-spec-id', '1001']
    if '.mixed-float-dot-product.' in shader:
        hlsl_cmd += ['--mixed-float-dot-product']

    subprocess.check_call(hlsl_cmd)
    if is_asm:
        return glsl_path
    else:
        return (dxil_path, glsl_path)

def make_unix_newline(buf):
    decoded = codecs.decode(buf, 'utf-8')
    decoded = decoded.replace('\r', '')
    return codecs.encode(decoded, 'utf-8')

def md5_for_file(path):
    md5 = hashlib.md5()
    with open(path, 'rb') as f:
        for chunk in iter(lambda: make_unix_newline(f.read(8192)), b''):
            md5.update(chunk)
    return md5.digest()

def make_reference_dir(path):
    base = os.path.dirname(path)
    if not os.path.exists(base):
        os.makedirs(base)

def reference_path(directory, relpath, opt):
    split_paths = os.path.split(directory)
    reference_dir = os.path.join(split_paths[0], 'reference/' + ('opt/' if opt else ''))
    reference_dir = os.path.join(reference_dir, split_paths[1])
    return os.path.join(reference_dir, relpath)

def regression_check(shader, glsl, args):
    reference = reference_path(shader[0], shader[1], args.opt)
    joined_path = os.path.join(shader[0], shader[1])
    print('Reference shader path:', reference)

    if os.path.exists(reference):
        if md5_for_file(glsl) != md5_for_file(reference):
            if args.update:
                print('Generated source code has changed for {}!'.format(reference))
                # If we expect changes, update the reference file.
                if os.path.exists(reference):
                    remove_file(reference)
                make_reference_dir(reference)
                shutil.move(glsl, reference)
            else:
                print('Generated source code in {} does not match reference {}!'.format(glsl, reference))
                with open(glsl, 'r') as f:
                    print('')
                    print('Generated:')
                    print('======================')
                    print(f.read())
                    print('======================')
                    print('')

                # Otherwise, fail the test. Keep the shader file around so we can inspect.
                if not args.keep:
                    remove_file(glsl)
                raise RuntimeError('Does not match reference')
        else:
            remove_file(glsl)
    else:
        print('Found new shader {}. Placing generated source code in {}'.format(joined_path, reference))
        make_reference_dir(reference)
        shutil.move(glsl, reference)

def test_shader(shader, args, paths):
    joined_path = os.path.join(shader[0], shader[1])

    print('Testing shader:', joined_path)

    if joined_path.endswith('.dxil') or joined_path.endswith('.dxbc'):
        glsl = cross_compile_dxil(joined_path, args, paths, True)
        if not args.bench:
            regression_check(shader, glsl, args)
        else:
            remove_file(glsl)
    elif not (joined_path.endswith('.inc') or joined_path.endswith('.h')):
        dxil, glsl = cross_compile_dxil(joined_path, args, paths, False)
        if not args.bench:
            regression_check(shader, glsl, args)
        else:
            remove_file(glsl)
        remove_file(dxil)

def test_shader_file(relpath, args):
    paths = Paths(args.dxc, args.dxil_spirv)
    try:
        test_shader((args.folder, relpath), args, paths)
        return None
    except Exception as e:
        return e

def test_shaders(args):
    all_files = []
    walk_path = os.path.join(args.folder, args.subfolder) if args.subfolder else args.folder
    for root, dirs, files in os.walk(walk_path):
        files = [ f for f in files if not f.startswith(".") ]   #ignore system files (esp OSX)
        for i in files:
            path = os.path.join(root, i)
            relpath = os.path.relpath(path, args.folder)
            all_files.append(relpath)

    # The child processes in parallel execution mode don't have the proper state for the global args variable, so
    # at this point we need to switch to explicit arguments
    if args.parallel:
        pool = multiprocessing.Pool(multiprocessing.cpu_count())

        results = []
        for f in all_files:
            results.append(pool.apply_async(test_shader_file,
                args = (f, args)))

        for res in results:
            error = res.get()
            if error is not None:
                pool.close()
                pool.join()
                print('Error:', error)
                sys.exit(1)
    else:
        for i in all_files:
            e = test_shader_file(i, args)
            if e is not None:
                print('Error:', e)
                sys.exit(1)

def main():
    parser = argparse.ArgumentParser(description = 'Script for regression testing.')
    parser.add_argument('folder',
            help = 'Folder containing shader files to test.')
    parser.add_argument('--update',
            action = 'store_true',
            help = 'Updates reference files if there is a mismatch. Use when legitimate changes in output is found.')
    parser.add_argument('--keep',
            action = 'store_true',
            help = 'Leave failed GLSL shaders on disk if they fail regression. Useful for debugging.')
    parser.add_argument('--opt',
            action = 'store_true',
            help = 'Run DXC optimization passes as well.')
    parser.add_argument('--parallel',
            action = 'store_true',
            help = 'Execute tests in parallel.  Useful for doing regression quickly, but bad for debugging and stat output.')
    parser.add_argument('--dxc',
            default = './external/dxc-build/bin/dxc',
            help = 'Explicit path to DXC')
    parser.add_argument('--dxil-spirv',
            default = './dxil-spirv',
            help = 'Explicit path to dxil-spirv')
    parser.add_argument('--subfolder',
            help = 'Only test specific subfolder')
    parser.add_argument('--bench',
                        action = 'store_true',
                        help = 'Disable any non dxil-spirv work, for benchmarking')

    args = parser.parse_args()
    if not args.folder:
        sys.stderr.write('Need shader folder.\n')
        sys.exit(1)

    test_shaders(args)
    print('Tests completed!')

if __name__ == '__main__':
    main()


================================================
FILE: third_party/CMakeLists.txt
================================================
add_subdirectory(spirv-headers EXCLUDE_FROM_ALL)
set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/spirv-headers" CACHE STRING "SPIRV-Headers path" FORCE)

add_library(dxil-spirv-headers INTERFACE)
target_include_directories(dxil-spirv-headers INTERFACE
        ${CMAKE_CURRENT_SOURCE_DIR}/spirv-headers/include/spirv/unified1)

set(DXBC_SPV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../subprojects/dxbc-spirv")

add_library(dxbc-spirv STATIC
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_api.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_api.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_container.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_container.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_converter.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_converter.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_disasm.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_disasm.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_interface.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_interface.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_io_map.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_io_map.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_parser.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_parser.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_registers.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_registers.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_resources.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_resources.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_signature.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_signature.h
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_types.cpp
        ${DXBC_SPV_SOURCE_DIR}/dxbc/dxbc_types.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_builder.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_builder.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_container.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_disasm.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_disasm.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_divergence.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_divergence.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_dominance.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_dominance.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_legalize.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_legalize.h
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_utils.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/ir_utils.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_arithmetic.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_arithmetic.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_buffer_kind.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_buffer_kind.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_cfg_cleanup.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_cfg_cleanup.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_cfg_convert.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_cfg_convert.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_cse.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_cse.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_derivative.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_derivative.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_descriptor_indexing.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_descriptor_indexing.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_function.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_function.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_lower_consume.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_lower_consume.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_lower_io.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_lower_io.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_lower_min16.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_lower_min16.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_propagate_types.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_propagate_types.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_propagate_resource_types.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_propagate_resource_types.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_remove_unused.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_remove_unused.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_scalarize.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_scalarize.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_scratch.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_scratch.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_ssa.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_ssa.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_ssa_utils.h
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_sync.cpp
        ${DXBC_SPV_SOURCE_DIR}/ir/passes/ir_pass_sync.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_bit.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_debug.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_flags.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_float16.cpp
        ${DXBC_SPV_SOURCE_DIR}/util/util_float16.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_hash.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_log.cpp
        ${DXBC_SPV_SOURCE_DIR}/util/util_log.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_md5.cpp
        ${DXBC_SPV_SOURCE_DIR}/util/util_md5.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_small_vector.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_swizzle.cpp
        ${DXBC_SPV_SOURCE_DIR}/util/util_swizzle.h
        ${DXBC_SPV_SOURCE_DIR}/util/util_vle.h)

add_library(dxbc-spirv-test STATIC
        ${DXBC_SPV_SOURCE_DIR}/tests/test_common.h
        ${DXBC_SPV_SOURCE_DIR}/tests/test_main.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_common.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_dump.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_io.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_io.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_resources.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_resources.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_arithmetic.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_arithmetic.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_buffer_kind.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_buffer_kind.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_lower_io.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_lower_io.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_scalarize.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_pass_scalarize.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_arithmetic.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_arithmetic.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_misc.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_misc.h
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_spirv.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/api/test_api_spirv.h
        ${DXBC_SPV_SOURCE_DIR}/tests/ir/test_ir.h
        ${DXBC_SPV_SOURCE_DIR}/tests/ir/test_ir_builder.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/ir/test_ir_op.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/ir/test_ir_serialize.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/ir/test_ir_type.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/util/test_util.h
        ${DXBC_SPV_SOURCE_DIR}/tests/util/test_util_small_vector.cpp
        ${DXBC_SPV_SOURCE_DIR}/tests/util/test_util_vle.cpp)

target_include_directories(dxbc-spirv
        PUBLIC ${DXBC_SPV_SOURCE_DIR}
        PRIVATE ${DXBC_SPV_SOURCE_DIR}/ir)
target_include_directories(dxbc-spirv-test PUBLIC ${DXBC_SPV_SOURCE_DIR}/tests)
target_compile_features(dxbc-spirv PUBLIC cxx_std_17)
target_compile_features(dxbc-spirv-test PUBLIC cxx_std_17)
set_target_properties(dxbc-spirv PROPERTIES POSITION_INDEPENDENT_CODE ON)
if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
    target_compile_options(dxbc-spirv PRIVATE -fvisibility=hidden)
endif()
target_link_libraries(dxbc-spirv-test PRIVATE dxbc-spirv)

if (DXIL_SPIRV_CLI)
    set(SPIRV_WERROR OFF CACHE STRING "" FORCE)
    # SPIRV-Tools refuses to configure if this is not set.
    set(CMAKE_CXX_STANDARD 17)
    add_subdirectory(SPIRV-Tools EXCLUDE_FROM_ALL)
    add_subdirectory(SPIRV-Cross EXCLUDE_FROM_ALL)
endif()

add_library(glslang-spirv-builder STATIC
        glslang-spirv/spvIR.h
        glslang-spirv/Logger.cpp
        glslang-spirv/Logger.h
        glslang-spirv/InReadableOrder.cpp
        glslang-spirv/SpvBuilder.h
        glslang-spirv/SpvBuilder.cpp)
set_target_properties(glslang-spirv-builder PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(glslang-spirv-builder PUBLIC AMD_EXTENSIONS)
if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
    target_compile_options(glslang-spirv-builder PRIVATE -fvisibility=hidden)
endif()
target_link_libraries(glslang-spirv-builder PRIVATE dxil-utils)

target_include_directories(glslang-spirv-builder PUBLIC
        ${CMAKE_CURRENT_SOURCE_DIR}/glslang-spirv)
target_link_libraries(glslang-spirv-builder PUBLIC dxil-spirv-headers)

add_library(bc-decoder STATIC
        bc-decoder/llvm_bitreader.h bc-decoder/llvm_decoder.cpp
        bc-decoder/llvm_decoder.h)
target_include_directories(bc-decoder PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/bc-decoder)
set_target_properties(bc-decoder PROPERTIES POSITION_INDEPENDENT_CODE ON)
if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
    target_compile_options(bc-decoder PRIVATE -fvisibility=hidden)
endif()
target_link_libraries(bc-decoder PRIVATE dxil-utils)


================================================
FILE: third_party/bc-decoder/llvm_bitreader.h
================================================
/******************************************************************************
 * The MIT License (MIT)
 *
 * Copyright (c) 2019-2020 Baldur Karlsson
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 ******************************************************************************/

#pragma once

#include <assert.h>
#include <string.h>

namespace LLVMBC
{
#define MAKE_FOURCC(a, b, c, d) (((a) << 0) | ((b) << 8) | ((c) << 16) | ((d) << 24))
using byte = uint8_t;
class BitReader
{
public:
  BitReader(const byte *bits, size_t length)
      : m_Bits(bits), m_Start(bits), m_End(bits + length), m_Offset(0)
  {
  }
  size_t ByteOffset() const { return m_Bits - m_Start; }
  size_t BitOffset() const { return ByteOffset() * 8 + m_Offset; }
  size_t ByteLength() const { return m_End - m_Start; }
  size_t BitLength() const { return (m_End - m_Start) * 8; }
  bool AtEndOfStream() const { return m_Bits >= m_End; }
  void SeekByte(size_t byteOffset)
  {
    m_Bits = m_Start + byteOffset;
    m_Offset = 0;
  }
  void SeekBit(size_t bitOffset)
  {
    m_Bits = m_Start + (bitOffset / 8);
    m_Offset = (bitOffset % 8);
  }
  char c6()
  {
    byte scratch[8];
    scratch[0] = 0;
    byte &c = scratch[0];

    ReadBits(6, &c);

    if(c <= 25)
      return char('a' + c);
    else if(c <= 51)
      return char('A' + c - 26);
    else if(c <= 61)
      return char('0' + c - 52);
    else if(c == 62)
      return '.';
    else if(c == 63)
      return '_';

    return '?';
  }

  template <typename T>
  T fixed(const size_t bitWidth)
  {
    byte scratch[8] = {};

    assert(bitWidth <= 64);

    ReadBits(bitWidth, scratch);

    T ret;
    memcpy(&ret, scratch, sizeof(T));
    return ret;
  }

  template <typename T>
  T vbr(const size_t groupBitSize)
  {
    uint64_t ret = 0;

    assert(groupBitSize > 1 && "chunk size must be greater than 1");
    assert(groupBitSize <= 8 && "Only chunk sizes up to 8 supported");

    // Avoid false positive warning.
    byte scratch[8] = {};

    const byte hibit = 1 << (groupBitSize - 1);
    const byte lobits = hibit - 1;

    uint64_t shift = 0;
    do
    {
      ReadBits(groupBitSize, scratch);

      assert(shift <= 63);

      ret += (uint64_t(scratch[0] & lobits) << shift);

      shift += uint64_t(groupBitSize - 1);
    } while(scratch[0] & hibit);

#ifndef NDEBUG
    // check for overflow of the return type
    const uint64_t mask = ((1ULL << (sizeof(T) * 8 - 1)) - 1) << 1 | 1;
    assert((ret & mask) == ret);
#endif

    return T(ret);
  }

  template <typename T>
  T svbr(size_t groupBitSize)
  {
    // the value will fit in a uint64_t because the any negative values with the high bit set, which
    // would overflow when shifted, no longer have the high bit set after being negated.
    uint64_t var = vbr<uint64_t>(groupBitSize);

    // if the low bit is set, it's negative
    if(var & 0x1)
    {
      return T(-int64_t(var >> 1));
    }
    else
    {
      return T(var >> 1);
    }
  }

  template <typename T>
  T Read()
  {
    byte scratch[sizeof(T)] = {};

    ReadBits(sizeof(T) * 8, scratch);

    T ret;
    memcpy(&ret, scratch, sizeof(T));
    return ret;
  }

  void ReadBlob(const byte *&blobptr, size_t &bloblen)
  {
    // get the blob length
    bloblen = vbr<size_t>(6);

    // align to dword boundary
    align32bits();

    // the blob is at m_Bits now
    blobptr = m_Bits;

    // advance by the length, and align up as well
    m_Bits += bloblen;
    align32bits();
  }

  void align32bits()
  {
    // skip the rest of the current byte, if we're part-way through
    if(m_Offset > 0)
      Advance(8 - m_Offset);

    const size_t byteOffs = ByteOffset();
    const size_t alignedByteOffs = (byteOffs + 0x3) & ~0x3;

    // advance by N bytes to dword align the stream
    m_Bits += (alignedByteOffs - byteOffs);
  }

private:
  const byte *m_Bits, *m_Start, *m_End;
  size_t m_Offset;

  void Advance(size_t N)
  {
    m_Offset += N;
    // shouldn't read more than this byte
    assert(m_Offset <= 8);

    // roll over to next byte after consuming all 8 bits
    if(m_Offset == 8)
    {
      m_Bits++;
      m_Offset = 0;
    }
  }

  void ReadBits(size_t bitsToRead, byte *dst)
  {
    if(BitOffset() + bitsToRead > BitLength())
    {
      // read 0s off the end of the stream.
      // set any whole bytes to 0:
      while(bitsToRead >= 8)
      {
        *dst = 0;
        dst++;
        bitsToRead -= 8;
      }

      // set any remaining bits to 0:
      if(bitsToRead)
        *dst &= ~((1 << bitsToRead) - 1);

      m_Bits = m_End;
      m_Offset = 0;
      return;
    }

    size_t dstoffs = 0;

    // if we're already partway through a byte, read as many bits as we need and we can
    if(m_Offset != 0)
    {
      const size_t avail = 8 - m_Offset;

      if(avail == bitsToRead)
      {
        // if we have just enough in this byte, great! shift and mask off, and update the offset

        // grab the bits into the low end
        *dst = (*m_Bits >> m_Offset);

        Advance(bitsToRead);

        return;
      }
      else if(avail > bitsToRead)
      {
        // we have more than enough. Similar to above but we need to mask out only the bits we need.

        // grab the bits into the low end and mask
        *dst = (*m_Bits >> m_Offset) & ((1 << bitsToRead) - 1);

        Advance(bitsToRead);

        return;
      }
      else
      {
        // we don't have enough. Consume what we can then continue
        *dst = (*m_Bits >> m_Offset);

        dstoffs = avail;
        bitsToRead -= avail;

        Advance(avail);
      }
    }
    else
    {
      // ensure if we didn't read any bits that the byte is zeroed out, so we can OR on bits below
      // without needing to worry
      *dst = 0;
    }

    // we're now at the start of a byte since we read any remainder above.
    assert(m_Offset == 0);

    // if we have to read whole bytes, do that here.
    if(bitsToRead >= 8)
    {
      if(dstoffs == 0)
      {
        // if dstoffs is 0 then it's an easy case, we can just copy all the whole bytes into *dst
        memcpy(dst, m_Bits, bitsToRead / 8);

        // manual advance
        m_Bits += (bitsToRead / 8);
        dst += (bitsToRead / 8);

        // make sure we read any sub-byte remainder
        bitsToRead &= 0x7;
      }
      else
      {
        while(bitsToRead >= 8)
        {
          // manual advance
          const byte cur = *m_Bits;
          m_Bits++;

          bitsToRead -= 8;

          // dstoffs doesn't change because we wrap around to the same offset in the next byte.
          // However we do need to shuffle the bits in cur around to add what will fit into the
          // current byte, and then the remainder into the next byte.
          *dst |= (cur << dstoffs);
          dst++;
          *dst = (cur >> (8 - dstoffs));
        }
      }
    }

    // if nothing remains, return
    if(bitsToRead == 0)
      return;

    // we should now have no more than than 7 bits to read
    assert(bitsToRead < 8);

    // this is the mask to get only the bits we want
    const byte mask = ((1 << bitsToRead) - 1);

    // take the bits that we want from the next byte (knowing we want the low-order bits), and shift
    // them into where they should go.
    byte data = *m_Bits & mask;

    // check if we overlap into the next destination byte
    if(dstoffs + bitsToRead <= 8)
    {
      *dst |= data << dstoffs;
    }
    else
    {
      *dst |= data << dstoffs;
      dst++;
      *dst = data >> (8 - dstoffs);
    }

    // consume the bits we used
    Advance(bitsToRead);
  }
};

};    // namespace LLVMBC


================================================
FILE: third_party/bc-decoder/llvm_decoder.cpp
================================================
/******************************************************************************
 * The MIT License (MIT)
 *
 * Copyright (c) 2019-2020 Baldur Karlsson
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 ******************************************************************************/

#include "thread_local_allocator.hpp"
#include "llvm_decoder.h"

namespace LLVMBC
{
enum class AbbrevEncoding : uint8_t
{
  Fixed = 1,
  VBR = 2,
  Array = 3,
  Char6 = 4,
  Blob = 5,
  // the abbrev encoding is only 3 bits, so 8 is not representable, we can store whether or not
  // we're a literal this way.
  Literal = 8,
};

struct AbbrevParam
{
  AbbrevEncoding encoding;
  uint64_t value;    // this is also the bitwidth for Fixed/VBR
};

struct AbbrevDesc
{
  dxil_spv::Vector<AbbrevParam> params;
};

// the temporary context while pushing/popping blocks
struct BlockContext
{
  DXIL_SPV_OVERRIDE_NEW_DELETE
  BlockContext(size_t size = 2) : abbrevSize(size) {}
  size_t abbrevSize;
  dxil_spv::Vector<AbbrevDesc> abbrevs;
};

// the permanent block info defined by BLOCKINFO
struct BlockInfo
{
  // dxil_spv::String blockname;
  // dxil_spirv::Vector<dxil_spv::String> recordnames;
  dxil_spv::Vector<AbbrevDesc> abbrevs;
};

enum AbbrevId
{
  END_BLOCK = 0,
  ENTER_SUBBLOCK = 1,
  DEFINE_ABBREV = 2,
  UNABBREV_RECORD = 3,
  APPLICATION_ABBREV = 4,
};

enum class BlockInfoRecord
{
  SETBID = 1,
  BLOCKNAME = 2,
  SETRECORDNAME = 3,
};

BitcodeReader::BitcodeReader(const byte *bitcode, size_t length) : b(bitcode, length)
{
  uint32_t magic = b.Read<uint32_t>();
  (void)magic;
  assert(magic == uint32_t(MAKE_FOURCC('B', 'C', 0xC0, 0xDE)));
}

BitcodeReader::~BitcodeReader()
{
  for(auto it = blockInfo.begin(); it != blockInfo.end(); ++it)
    delete it->second;
}

BlockOrRecord BitcodeReader::ReadToplevelBlock()
{
  BlockOrRecord ret;

  // should hit ENTER_SUBBLOCK first for top-level block
  uint32_t abbrevID = b.fixed<uint32_t>(abbrevSize());
  (void)abbrevID;
  assert(abbrevID == ENTER_SUBBLOCK);

  ReadBlockContents(ret);

  return ret;
}

bool BitcodeReader::AtEndOfStream()
{
  return b.AtEndOfStream();
}

void BitcodeReader::ReadBlockContents(BlockOrRecord &block)
{
  block.id = b.vbr<uint32_t>(8);

  blockStack.push_back(new BlockContext(b.vbr<size_t>(4)));

  b.align32bits();
  block.blockDwordLength = b.Read<uint32_t>();

  // used for blockinfo only
  BlockInfo *curBlockInfo = NULL;

  uint32_t abbrevID = ~0U;
  do
  {
    abbrevID = b.fixed<uint32_t>(abbrevSize());

    if(abbrevID == END_BLOCK)
    {
      b.align32bits();
    }
    else if(abbrevID == ENTER_SUBBLOCK)
    {
      BlockOrRecord sub;

      ReadBlockContents(sub);

      block.children.push_back(sub);
    }
    else if(abbrevID == DEFINE_ABBREV)
    {
      AbbrevDesc a;

      uint32_t numops = b.vbr<uint32_t>(5);

      a.params.resize(numops);

      for(uint32_t i = 0; i < numops; i++)
      {
        AbbrevParam &param = a.params[i];

        bool lit = b.fixed<bool>(1);

        if(lit)
        {
          param.encoding = AbbrevEncoding::Literal;
          param.value = b.vbr<uint64_t>(8);
        }
        else
        {
          param.encoding = b.fixed<AbbrevEncoding>(3);

          if(param.encoding == AbbrevEncoding::Fixed || param.encoding == AbbrevEncoding::VBR)
          {
            param.value = b.vbr<uint64_t>(5);
          }
        }
      }

      if(curBlockInfo)
        curBlockInfo->abbrevs.push_back(a);
      else
        blockStack.back()->abbrevs.push_back(a);
    }
    else if(abbrevID == UNABBREV_RECORD)
    {
      BlockOrRecord r;
      r.id = b.vbr<uint32_t>(6);
      uint32_t numops = b.vbr<uint32_t>(6);
      r.ops.resize(numops);
      for(uint32_t i = 0; i < numops; i++)
        r.ops[i] = b.vbr<uint64_t>(6);

      if(block.id == 0)    // BLOCKINFO is block 0
      {
        switch(BlockInfoRecord(r.id))
        {
          case BlockInfoRecord::SETBID:
          {
            curBlockInfo = blockInfo[(uint32_t)r.ops[0]];
            if(curBlockInfo == NULL)
              curBlockInfo = blockInfo[(uint32_t)r.ops[0]] = new BlockInfo;
            break;
          }
          case BlockInfoRecord::BLOCKNAME:
          {
            // skipped because this is so rarely used
            /*
            for(uint32_t i = 0; i < r.ops.size(); i++)
              curBlockInfo->blockname.push_back((char)r.ops[i]);
              */
            break;
          }
          case BlockInfoRecord::SETRECORDNAME:
          {
            // skipped because this is so rarely used
            /*
            uint32_t record = (uint32_t)r.ops[0];
            if(record >= curBlockInfo->recordnames.size())
              curBlockInfo->recordnames.resize(record + 1);
            r.ops.erase(r.ops.begin());
            for(uint32_t i = 0; i < r.ops.size(); i++)
              curBlockInfo->recordnames[record].push_back((char)r.ops[i]);
              */
            break;
          }
        }
      }

      block.children.push_back(r);
    }
    else
    {
      const AbbrevDesc &a = getAbbrev(block.id, abbrevID);

      BlockOrRecord r;

      // should have at least one param for the code itself
      assert(!a.params.empty());

      r.id = (uint32_t)decodeAbbrevParam(a.params[0]);

      // process the rest of the operands - since some might be arrays we don't know until we
      // process it how many ops the record will end up with but it will be at least one per
      // parameter.
      r.ops.reserve(a.params.size() - 1);
      for(size_t i = 1; i < a.params.size(); i++)
      {
        const AbbrevParam &param = a.params[i];

        if(param.encoding == AbbrevEncoding::Array)
        {
          // must be another param to specify the value type, and it must be the last
          assert(i + 1 == a.params.size() - 1);
          const AbbrevParam &elType = a.params[i + 1];

          size_t arrayLen = b.vbr<size_t>(6);

          for(size_t el = 0; el < arrayLen; el++)
            r.ops.push_back(decodeAbbrevParam(elType));

          break;
        }
        else if(param.encoding == AbbrevEncoding::Blob)
        {
          // blob must be the last value
          assert(i == a.params.size() - 1);
          b.ReadBlob(r.blob, r.blobLength);

          break;
        }
        else
        {
          r.ops.push_back(decodeAbbrevParam(param));
        }
      }

      block.children.push_back(r);
    }
  } while(abbrevID != END_BLOCK);

  delete blockStack.back();
  blockStack.erase(blockStack.begin() + (blockStack.size() - 1));
}

uint64_t BitcodeReader::decodeAbbrevParam(const AbbrevParam &param)
{
  assert(param.encoding != AbbrevEncoding::Array && param.encoding != AbbrevEncoding::Blob);

  switch(param.encoding)
  {
    case AbbrevEncoding::Fixed: return b.fixed<uint64_t>((size_t)param.value);
    case AbbrevEncoding::VBR: return b.vbr<uint64_t>((size_t)param.value);
    case AbbrevEncoding::Char6: return b.c6();
    case AbbrevEncoding::Literal: return param.value;
  
    case AbbrevEncoding::Array:
    case AbbrevEncoding::Blob:
      return 0;
  }

  return 0;
}

size_t BitcodeReader::abbrevSize() const
{
  if(blockStack.empty())
    return 2;
  return blockStack.back()->abbrevSize;
}

const AbbrevDesc &BitcodeReader::getAbbrev(uint32_t blockId, uint32_t abbrevID)
{
  const BlockInfo *info = blockInfo[blockId];

  // IDs start at the first application specified ID. Rebase to that to get 0-base indices
  assert(abbrevID >= APPLICATION_ABBREV);
  abbrevID -= APPLICATION_ABBREV;

  if(info)
  {
    // IDs are first assigned to those permanently from BLOCKINFO
    if(abbrevID < info->abbrevs.size())
      return info->abbrevs[abbrevID];

    // block-local IDs start after the BLOCKINFO ones
    abbrevID -= (uint32_t)info->abbrevs.size();
  }

  assert(!blockStack.empty());
  assert(abbrevID < blockStack.back()->abbrevs.size());

  return blockStack.back()->abbrevs[abbrevID];
}

dxil_spv::String BlockOrRecord::getString(size_t startOffset) const
{
  dxil_spv::String ret;
  ret.resize(ops.size() - startOffset);
  for(size_t i = 0; i < ret.size(); i++)
    ret[i] = (char)ops[i + startOffset];
  return ret;
}

};    // namespace LLVMBC

#if ENABLE_UNIT_TESTS

#include "3rdparty/catch/catch.hpp"

TEST_CASE("Check LLVM bitreader", "[llvm]")
{
  SECTION("Check simple reading of bytes")
  {
    byte bits[] = {0x01, 0x02, 0x40, 0x80, 0xff};

    LLVMBC::BitReader b(bits, sizeof(bits));

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);

    // ensure we can read it all out again in whole bytes
    for(size_t i = 0; i < sizeof(bits); i++)
    {
      byte val = b.Read<byte>();
      CHECK(val == bits[i]);
      if(i + 1 < sizeof(bits))
        CHECK(!b.AtEndOfStream());
      else
        CHECK(b.AtEndOfStream());
      CHECK(b.ByteOffset() == i + 1);
      CHECK(b.BitOffset() == (i + 1) * 8);
    }
  }

  SECTION("Check seeking within the stream")
  {
    byte bits[] = {0x01, 0x4f, 0x8c, 0xff};
    byte val;

    LLVMBC::BitReader b(bits, sizeof(bits));

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);

    b.SeekByte(4);

    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == 4);
    CHECK(b.BitOffset() == 32);

    b.SeekBit(32);

    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == 4);
    CHECK(b.BitOffset() == 32);

    b.SeekBit(29);

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 3);
    CHECK(b.BitOffset() == 29);

    val = b.fixed<byte>(3);

    CHECK(val == 0x7);
    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == 4);
    CHECK(b.BitOffset() == 32);

    b.SeekBit(0);

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);
  }

  SECTION("Check with empty bitstream")
  {
    byte bits[] = {0};

    LLVMBC::BitReader b(bits, 0);

    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);
  }

  SECTION("Check out of bounds behaviour")
  {
    byte bits[] = {0x40, 0x80, 0xff};

    LLVMBC::BitReader b(bits, sizeof(bits));

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);

    // first read is fully satisfied, we get the value we expect
    uint32_t val1 = b.fixed<uint32_t>(17);
    CHECK(val1 == 0x18040);

    // second read is partially out of bounds, we should read all 0s
    uint32_t val2 = b.fixed<uint32_t>(16);
    CHECK(val2 == 0);

    // should be exactly at the end of the stream
    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == sizeof(bits));
    CHECK(b.BitOffset() == sizeof(bits) * 8);
  }

  SECTION("Check fixed encoding")
  {
    // 0x96 = 0b 1001 0110
    // 0xF0 = 0b 1111 0000
    // 0xA5 = 0b 1010 0101
    // 0x3C = 0b 0011 1100

    // we pad out with 0s so we don't read off the end of the stream when reading up to 4 32-bit
    // values
    byte bits[] = {
        // dword 1
        0x96, 0xf0, 0xA5, 0x3C,
        // padding dword
        0x00, 0x00, 0x00, 0x00,
        // padding dword
        0x00, 0x00, 0x00, 0x00,
        // padding dword
        0x00, 0x00, 0x00, 0x00,
    };

    LLVMBC::BitReader b(bits, sizeof(bits));

    // for each of the bit widths, 1 to 32, read 4 values.
    // This should decode from the LSB to MSB in the bitstream - in the commented values above that
    // is right-to-left then top-to-bottom
    uint32_t expected[32][4] = {
        // i_1
        {0x00, 0x01, 0x01, 0x00},
        // i_2
        {0x02, 0x01, 0x01, 0x02},
        // i_3
        {0x06, 0x02, 0x02, 0x00},
        // i_4
        {0x06, 0x09, 0x00, 0x0f},
        // i_5
        {0x16, 0x04, 0x1C, 0x0B},
        // i_6
        {0x16, 0x02, 0x1F, 0x29},
        // i_7
        {0x16, 0x61, 0x17, 0x65},
        // i_8
        {0x96, 0xF0, 0xA5, 0x3C},

        // i_9
        {0x0096, 0x00F8, 0x0129, 0x0007},
        // i_10
        {0x0096, 0x017C, 0x03CA, 0x0000},
        // i_11
        {0x0096, 0x04BE, 0x00F2, 0x0000},
        // i_12
        {0x0096, 0x0A5F, 0x003C, 0x0000},
        // i_13
        {0x1096, 0x052F, 0x000F, 0x0000},
        // i_14
        {0x3096, 0x3297, 0x0003, 0x0000},
        // i_15
        {0x7096, 0x794B, 0x0000, 0x0000},
        // i_16
        {0xF096, 0x3CA5, 0x0000, 0x0000},

        // i_17
        {0x0001F096, 0x00001E52},
        // i_18
        {0x0001F096, 0x00000F29},
        // i_19
        {0x0005F096, 0x00000794},
        // i_20
        {0x0005F096, 0x000003CA},
        // i_21
        {0x0005F096, 0x000001E5},
        // i_22
        {0x0025F096, 0x000000F2},
        // i_23
        {0x0025F096, 0x00000079},
        // i_24
        {0x00A5F096, 0x0000003C},
        // i_25
        {0x00A5F096, 0x0000001E},
        // i_26
        {0x00A5F096, 0x0000000F},
        // i_27
        {0x04A5F096, 0x00000007},
        // i_28
        {0x0CA5F096, 0x00000003},
        // i_29
        {0x1CA5F096, 0x00000001},
        // i_30
        {0x3CA5F096, 0x00000000},
        // i_31
        {0x3CA5F096, 0x00000000},
        // i_32
        {0x3CA5F096, 0x00000000},
    };

    for(size_t i = 0; i < 32; i++)
    {
      b.SeekBit(0);
      uint32_t read;

      INFO("Bit width: " << uint32_t(i + 1));

      read = b.fixed<uint32_t>(i + 1);
      CHECK(read == expected[i][0]);

      read = b.fixed<uint32_t>(i + 1);
      CHECK(read == expected[i][1]);

      read = b.fixed<uint32_t>(i + 1);
      CHECK(read == expected[i][2]);

      read = b.fixed<uint32_t>(i + 1);
      CHECK(read == expected[i][3]);
    }

    // should be exactly at the end of the stream
    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == sizeof(bits));
    CHECK(b.BitOffset() == sizeof(bits) * 8);
  }

  SECTION("Check variable encoding")
  {
    SECTION("Single chunk, no extension")
    {
      // just set as many bits as we can in one chunk, so all 1s except the MSB

      byte bits[] = {
          // i_vbr0 (padding)
          0,
          // i_vbr1 (padding)
          0,
          // i_vbr2
          0x01,
          // i_vbr3
          0x03,
          // i_vbr4
          0x07,
          // i_vbr5
          0x0f,
          // i_vbr6
          0x1f,
          // i_vbr7
          0x3f,
          // i_vbr8
          0x7f,
      };

      LLVMBC::BitReader b(bits, sizeof(bits));

      for(size_t i = 2; i <= 8; i++)
      {
        INFO("VBR group size: " << uint32_t(i));
        b.SeekByte(i);

        uint64_t val = b.vbr<uint64_t>(i);
        CHECK(val == bits[i]);
      }

      // should be exactly at the end of the stream
      CHECK(b.AtEndOfStream());
      CHECK(b.ByteOffset() == sizeof(bits));
      CHECK(b.BitOffset() == sizeof(bits) * 8);
    }

    SECTION("Two chunks, one extension")
    {
      // set all bits that we can from two chunks - that means the first chunk is all 1s, the second
      // is all 1s except the leading 0

      byte bits[] = {
          // i_vbr0 (padding)
          0, 0,
          // i_vbr1 (padding)
          0, 0,
          // i_vbr2
          0x07, 0x00,    // 0b 01 11
          // i_vbr3
          0x1f, 0x00,    // 0b 011 111
          // i_vbr4
          0x7f, 0x00,    // 0b 0111 1111
          // i_vbr5
          0xff, 0x01,    // 0b 01111 11111
          // i_vbr6
          0xff, 0x07,    // 0b 011111 111111
          // i_vbr7
          0xff, 0x1f,    // 0b 0111111 1111111
          // i_vbr8
          0xff, 0x7f,    // 0b 01111111 11111111
      };

      uint64_t expected[] = {
          0, 0,
          // i_vbr2
          0x0003,
          // i_vbr3
          0x000f,
          // i_vbr4
          0x003f,
          // i_vbr5
          0x00ff,
          // i_vbr6
          0x03ff,
          // i_vbr7
          0x0fff,
          // i_vbr8
          0x3fff,
      };

      LLVMBC::BitReader b(bits, sizeof(bits));

      for(size_t i = 2; i <= 8; i++)
      {
        INFO("VBR group size: " << uint32_t(i));
        b.SeekByte(i * 2);

        uint64_t val = b.vbr<uint64_t>(i);
        CHECK(val == expected[i]);
      }

      // should be exactly at the end of the stream
      CHECK(b.AtEndOfStream());
      CHECK(b.ByteOffset() == sizeof(bits));
      CHECK(b.BitOffset() == sizeof(bits) * 8);
    }

    SECTION("Five chunks, four extensions")
    {
      // set an alternating 10 pattern from the top bit. Each group except the last has a leading 1

      byte bits[] = {
          // i_vbr0 (padding)
          0, 0, 0, 0, 0,
          // i_vbr1 (padding)
          0, 0, 0, 0, 0,
          // i_vbr2
          0xBB, 0x01, 0x00, 0x00, 0x00,    // 0b 01 10 11 10 11
          // i_vbr3
          0xB6, 0x2D, 0x00, 0x00, 0x00,    // 0b 010 110 110 110 110
          // i_vbr4
          0xAD, 0xAD, 0x05, 0x00, 0x00,    // 0b 0101 1010 1101 1010 1101
          // i_vbr5
          0x5A, 0x6B, 0xAD, 0x00, 0x00,    // 0b 01010 11010 11010 11010 11010
          // i_vbr6
          0xB5, 0x5A, 0xAB, 0x15, 0x00,    // 0b 010101 101010 110101 101010 110101
          // i_vbr7
          0x6A, 0xB5, 0x5A, 0xAD, 0x02,    // 0b 0101010 1101010 1101010 1101010 1101010
          // i_vbr8
          0xD5, 0xAA, 0xD5, 0xAA, 0x55,    // 0b 01010101 10101010 11010101 10101010 11010101
      };

      uint64_t expected[] = {
          0, 0,
          // i_vbr2
          0x0000000015ULL,    // 0b 1 0 1 0 1
          // i_vbr3
          0x00000002AAULL,    // 0b 10 10 10 10 10
          // i_vbr4
          0x0000005555ULL,    // 0b 101 010 101 010 101
          // i_vbr5
          0x00000AAAAAULL,    // 0b 1010 1010 1010 1010 1010
          // i_vbr6
          0x0001555555ULL,    // 0b 10101 01010 10101 01010 10101
          // i_vbr7
          0x002AAAAAAAULL,    // 0b 101010 101010 101010 101010 101010
          // i_vbr8
          0x0555555555ULL,    // 0b 1010101 0101010 1010101 0101010 1010101
      };

      LLVMBC::BitReader b(bits, sizeof(bits));

      for(size_t i = 2; i <= 8; i++)
      {
        INFO("VBR group size: " << uint32_t(i));
        b.SeekByte(i * 5);

        uint64_t val = b.vbr<uint64_t>(i);
        CHECK(val == expected[i]);
      }

      // should be exactly at the end of the stream
      CHECK(b.AtEndOfStream());
      CHECK(b.ByteOffset() == sizeof(bits));
      CHECK(b.BitOffset() == sizeof(bits) * 8);
    }

    SECTION("Check signed vbr decoding")
    {
      // we don't check every possible bit width since this is decoded the same as vbr except for a
      // post-check and shift. Instead we use vbr4 since it's convenient for hex literals
      byte bits[] = {
          0x04,                // 0b 0100 = +2
          0x05,                // 0b 0101 = -2
          0xBA, 0x9E, 0x68,    // 0b 0110 1000 1001 1110 1011 1010 = +98765
          0xBB, 0x9E, 0x68,    // 0b 0110 1000 1001 1110 1011 1011 = -98765
          // INT64_MAX. 64-bits encoded in 3-bit groups is 22 groups, so 22 * 4-bit encoded groups
          // is 88 bits, meaning 11 bytes
          0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F,
          // INT64_MIN. Same as above but with the LSB set to 1
          0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F,
          // one more value just to check that we didn't overrun above
          0x06,    // 0b 0110 = +3
      };

      LLVMBC::BitReader b(bits, sizeof(bits));

      int64_t val;

      val = b.svbr<int64_t>(4);
      CHECK(val == 2);

      val = b.svbr<int64_t>(4);
      CHECK(val == 0);

      val = b.svbr<int64_t>(4);
      CHECK(val == -2);

      val = b.svbr<int64_t>(4);
      CHECK(val == 0);

      val = b.svbr<int64_t>(4);
      CHECK(val == 98765);

      val = b.svbr<int64_t>(4);
      CHECK(val == -98765);

      val = b.svbr<int64_t>(4);
      CHECK(val == INT64_MAX);

      val = b.svbr<int64_t>(4);
      CHECK(val == -INT64_MAX);

      val = b.svbr<int64_t>(4);
      CHECK(val == 3);

      val = b.svbr<int64_t>(4);
      CHECK(val == 0);

      // should be exactly at the end of the stream
      CHECK(b.AtEndOfStream());
      CHECK(b.ByteOffset() == sizeof(bits));
      CHECK(b.BitOffset() == sizeof(bits) * 8);
    }
  }

  SECTION("Check char6 encoding")
  {
    byte bits[64] = {};
    for(size_t i = 0; i < sizeof(bits); i++)
      bits[i] = i & 0xff;

    // this is the char6 encoding
    const char string[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._";

    RDCCOMPILE_ASSERT(sizeof(string) - 1 == sizeof(bits),
                      "bits byte array and string should be same size.");

    LLVMBC::BitReader b(bits, sizeof(bits));

    for(size_t i = 0; i < sizeof(bits); i++)
    {
      char c = b.c6();
      // for simplicity we read padding too
      byte pad = b.fixed<byte>(2);

      CHECK(c == string[i]);
      CHECK(pad == 0);
    }
  }

  SECTION("Check 32-bit aligning")
  {
    byte bits[] = {
        // first i_4 value
        0x04,
        // padding for alignment
        0x00, 0x00, 0x00,

        // second two i_4 values
        0xF5,
        // i_24 value
        0xCA, 0x99, 0x23,

        // no padding - already aligned

        // i_6 value and i_2 value
        0xBF,
    };

    LLVMBC::BitReader b(bits, sizeof(bits));

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);

    uint32_t val;

    // first read is fully satisfied, we get the value we expect
    val = b.fixed<uint32_t>(4);
    CHECK(val == 0x4);
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 4);

    b.align32bits();

    CHECK(b.ByteOffset() == 4);
    CHECK(b.BitOffset() == 32);

    val = b.fixed<uint32_t>(4);
    CHECK(val == 0x5);

    val = b.fixed<uint32_t>(4);
    CHECK(val == 0xf);

    val = b.fixed<uint32_t>(24);
    CHECK(val == 0x2399CA);

    CHECK(b.ByteOffset() == 8);
    CHECK(b.BitOffset() == 64);

    // should be a no-op because we're already aligned
    b.align32bits();

    CHECK(b.ByteOffset() == 8);
    CHECK(b.BitOffset() == 64);

    val = b.fixed<uint32_t>(6);
    CHECK(val == 0x3f);

    val = b.fixed<uint32_t>(2);
    CHECK(val == 0x2);

    // should be exactly at the end of the stream
    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == sizeof(bits));
    CHECK(b.BitOffset() == sizeof(bits) * 8);
  }

  SECTION("Check blob fetch")
  {
    // size = 16 bytes for encoded data and first blob, 70 bytes for second blob, 2 bytes trailing
    // padding
    byte bits[16 + 70 + 2] = {
        // first vbr_6 length
        0x06,
        // padding for alignment
        0x00, 0x00, 0x00,

        // blob data
        0xF5, 0x00, 0xCA, 0x40, 0x99, 0x23,

        // padding for trailing alignment
        0x00, 0x00,

        // i_20 dummy to get us to the point where two vbr_6 chunks would be aligned
        // we choose a length of 70, which is 0b10 00110, then vbr_6 encoded it becomes
        // 0b000010 100110 which is 0xA6, over 12 bits. That leaves 4 bits in the upper part of
        // the last byte of the i_20, and the remaining 8 in the next byte
        0x5B, 0xC2, 0x64, 0x0A,
    };

    LLVMBC::BitReader b(bits, sizeof(bits));

    CHECK(!b.AtEndOfStream());
    CHECK(b.ByteOffset() == 0);
    CHECK(b.BitOffset() == 0);

    const byte *ptr = NULL;
    size_t size = 0;

    b.ReadBlob(ptr, size);

    CHECK(size == 6);
    CHECK(ptr == &bits[4]);

    uint32_t val = b.fixed<uint32_t>(20);
    CHECK(val == 0x4C25B);

    ptr = NULL;
    size = 0;

    b.ReadBlob(ptr, size);

    CHECK(size == 70);
    CHECK(ptr == &bits[16]);

    // should be exactly at the end of the stream
    CHECK(b.AtEndOfStream());
    CHECK(b.ByteOffset() == sizeof(bits));
    CHECK(b.BitOffset() == sizeof(bits) * 8);
  }
}

#endif


================================================
FILE: third_party/bc-decoder/llvm_decoder.h
================================================
/******************************************************************************
 * The MIT License (MIT)
 *
 * Copyright (c) 2019-2020 Baldur Karlsson
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 ******************************************************************************/

#pragma once

#include "thread_local_allocator.hpp"
#include "llvm_bitreader.h"
#include <stdint.h>

namespace LLVMBC
{
struct BlockOrRecord
{
  uint32_t id;
  uint32_t blockDwordLength = 0;    // 0 for records

  bool IsBlock() const { return blockDwordLength > 0; }
  bool IsRecord() const { return blockDwordLength == 0; }
  // if a block, the child blocks/records
  dxil_spv::Vector<BlockOrRecord> children;

  dxil_spv::String getString(size_t startOffset = 0) const;

  // if a record, the ops
  dxil_spv::Vector<uint64_t> ops;
  // if this is an abbreviated record with a blob, this is the last operand
  // this points into the overall byte storage, so the lifetime is limited.
  const byte *blob = NULL;
  size_t blobLength = 0;
};

struct AbbrevParam;
struct AbbrevDesc;
struct BlockContext;
struct BlockInfo;

class BitcodeReader
{
public:
  BitcodeReader(const byte *bitcode, size_t length);
  ~BitcodeReader();
  BlockOrRecord ReadToplevelBlock();
  bool AtEndOfStream();

private:
  BitReader b;

  void ReadBlockContents(BlockOrRecord &block);
  const AbbrevDesc &getAbbrev(uint32_t blockId, uint32_t abbrevID);
  size_t abbrevSize() const;
  uint64_t decodeAbbrevParam(const AbbrevParam &param);

  dxil_spv::Vector<BlockContext *> blockStack;
  dxil_spv::UnorderedMap<uint32_t, BlockInfo *> blockInfo;
};

};    // namespace LLVMBC


================================================
FILE: third_party/cli_parser/cli_parser.cpp
================================================
/* Copyright (c) 2017-2019 Hans-Kristian Arntzen
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

// Reused from Granite.

#include "cli_parser.hpp"
#include "logging.hpp"
#include <limits>

using namespace std;

namespace dxil_spv
{
CLIParser::CLIParser(CLICallbacks cbs_, int argc_, char *argv_[])
	: cbs(move(cbs_)), argc(argc_), argv(argv_)
{
}

bool CLIParser::parse()
{
	while (argc && !ended_state)
	{
		const char *next = *argv++;
		argc--;

		if (*next != '-' && cbs.default_handler)
		{
			cbs.default_handler(next);
		}
		else
		{
			auto itr = cbs.callbacks.find(next);
			if (itr == ::end(cbs.callbacks))
			{
				if (unknown_argument_is_default)
					cbs.default_handler(next);
				else
				{
					if (cbs.error_handler)
						cbs.error_handler();
					return false;
				}
			}
			else
				itr->second(*this);
		}
	}
	return true;
}

void CLIParser::end()
{
	ended_state = true;
}

unsigned CLIParser::next_uint()
{
	if (!argc)
	{
		LOGE("Tried to parse uint, but nothing left in arguments.\n");
		if (cbs.error_handler)
			cbs.error_handler();
		exit(EXIT_FAILURE);
	}

	auto val = stoul(*argv);
	if (val > numeric_limits<unsigned>::max())
	{
		LOGE("next_uint() out of range.\n");
		if (cbs.error_handler)
			cbs.error_handler();
		exit(EXIT_FAILURE);
	}

	argc--;
	argv++;

	return unsigned(val);
}

double CLIParser::next_double()
{
	if (!argc)
	{
		LOGE("Tried to parse double, but nothing left in arguments.\n");
		if (cbs.error_handler)
			cbs.error_handler();
		exit(EXIT_FAILURE);
	}

	double val = stod(*argv);

	argc--;
	argv++;

	return val;
}

const char *CLIParser::next_string()
{
	if (!argc)
	{
		LOGE("Tried to parse string, but nothing left in arguments.\n");
		if (cbs.error_handler)
			cbs.error_handler();
		exit(EXIT_FAILURE);
	}

	const char *ret = *argv;
	argc--;
	argv++;
	return ret;
}
}


================================================
FILE: third_party/cli_parser/cli_parser.hpp
================================================
/* Copyright (c) 2017-2019 Hans-Kristian Arntzen
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

// Reused from Granite.

#pragma once

#include <functional>
#include <string>
#include <unordered_map>
#include <utility>

namespace dxil_spv
{
class CLIParser;

struct CLICallbacks
{
	void add(const char *cli, const std::function<void(CLIParser &)> &func)
	{
		callbacks[cli] = func;
	}

	std::unordered_map<std::string, std::function<void(CLIParser &)>> callbacks;
	std::function<void()> error_handler;
	std::function<void(const char *)> default_handler;
};

class CLIParser
{
public:
	CLIParser(CLICallbacks cbs_, int argc_, char *argv_[]);

	bool parse();

	void end();

	unsigned next_uint();

	double next_double();

	const char *next_string();

	bool is_ended_state() const
	{
		return ended_state;
	}

	void ignore_unknown_arguments()
	{
		unknown_argument_is_default = true;
	}

private:
	CLICallbacks cbs;
	int argc;
	char **argv;
	bool ended_state = false;
	bool unknown_argument_is_default = false;
};
}


================================================
FILE: third_party/glslang-spirv/InReadableOrder.cpp
================================================
//
// Copyright (C) 2016 Google, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//    Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
//    Redistributions in binary form must reproduce the above
//    copyright notice, this list of conditions and the following
//    disclaimer in the documentation and/or other materials provided
//    with the distribution.
//
//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

// The SPIR-V spec requires code blocks to appear in an order satisfying the
// dominator-tree direction (ie, dominator before the dominated).  This is,
// actually, easy to achieve: any pre-order CFG traversal algorithm will do it.
// Because such algorithms visit a block only after traversing some path to it
// from the root, they necessarily visit the block's idom first.
//
// But not every graph-traversal algorithm outputs blocks in an order that
// appears logical to human readers.  The problem is that unrelated branches may
// be interspersed with each other, and merge blocks may come before some of the
// branches being merged.
//
// A good, human-readable order of blocks may be achieved by performing
// depth-first search but delaying merge nodes until after all their branches
// have been visited.  This is implemented below by the inReadableOrder()
// function.

#include "spvIR.h"

#include <cassert>
#include "thread_local_allocator.hpp"

using spv::Block;
using spv::Id;

namespace {
// Traverses CFG in a readable order, invoking a pre-set callback on each block.
// Use by calling visit() on the root block.
class ReadableOrderTraverser {
public:
    explicit ReadableOrderTraverser(std::function<void(Block*)> callback) : callback_(callback) {}
    // Visits the block if it hasn't been visited already and isn't currently
    // being delayed.  Invokes callback(block), then descends into its
    // successors.  Delays merge-block and continue-block processing until all
    // the branches have been completed.
    void visit(Block* block)
    {
        assert(block);
        if (visited_.count(block) || delayed_.count(block))
            return;
        callback_(block);
        visited_.insert(block);
        Block* mergeBlock = nullptr;
        Block* continueBlock = nullptr;
        auto mergeInst = block->getMergeInstruction();
        if (mergeInst) {
            Id mergeId = mergeInst->getIdOperand(0);
            mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock();
            delayed_.insert(mergeBlock);
            if (mergeInst->getOpCode() == spv::OpLoopMerge) {
                Id continueId = mergeInst->getIdOperand(1);
                continueBlock =
                    block->getParent().getParent().getInstruction(continueId)->getBlock();
                delayed_.insert(continueBlock);
            }
        }
        const auto successors = block->getSuccessors();
        for (auto it = successors.cbegin(); it != successors.cend(); ++it)
            visit(*it);
        if (continueBlock) {
            delayed_.erase(continueBlock);
            visit(continueBlock);
        }
        if (mergeBlock) {
            delayed_.erase(mergeBlock);
            visit(mergeBlock);
        }
    }

private:
    std::function<void(Block*)> callback_;
    // Whether a block has already been visited or is being delayed.
    dxil_spv::UnorderedSet<Block *> visited_, delayed_;
};
}

void spv::inReadableOrder(Block* root, std::function<void(Block*)> callback)
{
    ReadableOrderTraverser(callback).visit(root);
}


================================================
FILE: third_party/glslang-spirv/Logger.cpp
================================================
//
// Copyright (C) 2016 Google, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//    Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
//    Redistributions in binary form must reproduce the above
//    copyright notice, this list of conditions and the following
//    disclaimer in the documentation and/or other materials provided
//    with the distribution.
//
//    Neither the name of Google Inc. nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

#include "Logger.h"

#include <algorithm>
#include <iterator>
#include <sstream>

namespace spv {

void SpvBuildLogger::tbdFunctionality(const dxil_spv::String& f)
{
    if (std::find(std::begin(tbdFeatures), std::end(tbdFeatures), f) == std::end(tbdFeatures))
        tbdFeatures.push_back(f);
}

void SpvBuildLogger::missingFunctionality(const dxil_spv::String& f)
{
    if (std::find(std::begin(missingFeatures), std::end(missingFeatures), f) == std::end(missingFeatures))
        missingFeatures.push_back(f);
}

dxil_spv::String SpvBuildLogger::getAllMessages() const {
    dxil_spv::StringStream messages;
    for (auto it = tbdFeatures.cbegin(); it != tbdFeatures.cend(); ++it)
        messages << "TBD functionality: " << *it << "\n";
    for (auto it = missingFeatures.cbegin(); it != missingFeatures.cend(); ++it)
        messages << "Missing functionality: " << *it << "\n";
    for (auto it = warnings.cbegin(); it != warnings.cend(); ++it)
        messages << "warning: " << *it << "\n";
    for (auto it = errors.cbegin(); it != errors.cend(); ++it)
        messages << "error: " << *it << "\n";
    return messages.str();
}

} // end spv namespace


================================================
FILE: third_party/glslang-spirv/Logger.h
================================================
//
// Copyright (C) 2016 Google, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//    Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
//    Redistributions in binary form must reproduce the above
//    copyright notice, this list of conditions and the following
//    disclaimer in the documentation and/or other materials provided
//    with the distribution.
//
//    Neither the name of Google Inc. nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

#ifndef GLSLANG_SPIRV_LOGGER_H
#define GLSLANG_SPIRV_LOGGER_H

#include "thread_local_allocator.hpp"

namespace spv {

// A class for holding all SPIR-V build status messages, including
// missing/TBD functionalities, warnings, and errors.
class SpvBuildLogger {
public:
    SpvBuildLogger() {}

    // Registers a TBD functionality.
    void tbdFunctionality(const dxil_spv::String& f);
    // Registers a missing functionality.
    void missingFunctionality(const dxil_spv::String& f);

    // Logs a warning.
    void warning(const dxil_spv::String& w) { warnings.push_back(w); }
    // Logs an error.
    void error(const dxil_spv::String& e) { errors.push_back(e); }

    // Returns all messages accumulated in the order of:
    // TBD functionalities, missing functionalities, warnings, errors.
    dxil_spv::String getAllMessages() const;

private:
    SpvBuildLogger(const SpvBuildLogger&);

    dxil_spv::Vector<dxil_spv::String> tbdFeatures;
    dxil_spv::Vector<dxil_spv::String> missingFeatures;
    dxil_spv::Vector<dxil_spv::String> warnings;
    dxil_spv::Vector<dxil_spv::String> errors;
};

} // end spv namespace

#endif // GLSLANG_SPIRV_LOGGER_H


================================================
FILE: third_party/glslang-spirv/SpvBuilder.cpp
================================================
//
// Copyright (C) 2014-2015 LunarG, Inc.
// Copyright (C) 2015-2016 Google, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//    Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
//    Redistributions in binary form must reproduce the above
//    copyright notice, this list of conditions and the following
//    disclaimer in the documentation and/or other materials provided
//    with the distribution.
//
//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

//
// Helper for making SPIR-V IR.  Generally, this is documented in the header
// SpvBuilder.h.
//

#include <cassert>
#include <cstdlib>

#include <algorithm>

#include "SpvBuilder.h"

#ifndef _WIN32
    #include <cstdio>
#endif

namespace spv {

Builder::Builder(unsigned int magicNumber, SpvBuildLogger* buildLogger) :
    source(SourceLanguageUnknown),
    sourceVersion(0),
    sourceFileStringId(NoResult),
    currentLine(0),
    emitOpLines(false),
    addressModel(AddressingModelLogical),
    memoryModel(MemoryModelGLSL450),
    builderNumber(magicNumber),
    buildPoint(0),
    uniqueId(0),
    entryPointFunction(0),
    generatingOpCodeForSpecConst(false),
    logger(buildLogger)
{
    clearAccessChain();
}

Builder::~Builder()
{
}

Id Builder::import(const char* name)
{
    for (auto &id : importIDCache)
        if (id.first == name)
            return id.second;

    Instruction* import = new Instruction(getUniqueId(), NoType, OpExtInstImport);
    import->addStringOperand(name);

    imports.push_back(std::unique_ptr<Instruction>(import));
    importIDCache.emplace_back(name, import->getResultId());
    return import->getResultId();
}

// Emit an OpLine if we've been asked to emit OpLines and the line number
// has changed since the last time, and is a valid line number.
void Builder::setLine(int lineNum)
{
    if (lineNum != 0 && lineNum != currentLine) {
        currentLine = lineNum;
        if (emitOpLines)
            addLine(sourceFileStringId, currentLine, 0);
    }
}

spv::Instruction *Builder::addInstruction(spv::Id typeId, spv::Op op)
{
    auto inst = std::make_unique<spv::Instruction>(getUniqueId(), typeId, op);
    auto *ret = inst.get();
    getBuildPoint()->addInstruction(std::move(inst));
    return ret;
}

spv::Instruction *Builder::addInstruction(spv::Op op)
{
    auto inst = std::make_unique<spv::Instruction>(op);
    auto *ret = inst.get();
    getBuildPoint()->addInstruction(std::move(inst));
    return ret;
}

void Builder::addLine(Id fileName, int lineNum, int column)
{
    Instruction* line = new Instruction(OpLine);
    line->addIdOperand(fileName);
    line->addImmediateOperand(lineNum);
    line->addImmediateOperand(column);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(line));
}

void Builder::addExternal(std::unique_ptr<Instruction> inst)
{
    externals.push_back(std::move(inst));
}

spv::Id Builder::addString(const dxil_spv::String &str)
{
    auto spv_str = std::make_unique<Instruction>(getUniqueId(), NoType, OpString);
    spv_str->addStringOperand(str.c_str());
    spv::Id ret = spv_str->getResultId();
    strings.push_back(std::move(spv_str));
    return ret;
}

// For creating new groupedTypes (will return old type if the requested one was already made).
Id Builder::makeVoidType()
{
    Instruction* type;
    if (groupedTypes[OpTypeVoid].size() == 0) {
        type = new Instruction(getUniqueId(), NoType, OpTypeVoid);
        groupedTypes[OpTypeVoid].push_back(type);
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
        module.mapInstruction(type);
    } else
        type = groupedTypes[OpTypeVoid].back();

    return type->getResultId();
}

Id Builder::makeBoolType()
{
    Instruction* type;
    if (groupedTypes[OpTypeBool].size() == 0) {
        type = new Instruction(getUniqueId(), NoType, OpTypeBool);
        groupedTypes[OpTypeBool].push_back(type);
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
        module.mapInstruction(type);
    } else
        type = groupedTypes[OpTypeBool].back();

    return type->getResultId();
}

Id Builder::makeSamplerType()
{
    Instruction* type;
    if (groupedTypes[OpTypeSampler].size() == 0) {
        type = new Instruction(getUniqueId(), NoType, OpTypeSampler);
        groupedTypes[OpTypeSampler].push_back(type);
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
        module.mapInstruction(type);
    } else
        type = groupedTypes[OpTypeSampler].back();

    return type->getResultId();
}

Id Builder::makeAccelerationStructureType()
{
    Instruction* type;
    if (!acceleration_structure_type)
    {
        type = new Instruction(getUniqueId(), NoType, OpTypeAccelerationStructureKHR);
        acceleration_structure_type = type;
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
        module.mapInstruction(type);
    } else
        type = acceleration_structure_type;

    return type->getResultId();
}

Id Builder::makeRayQueryType()
{
    Instruction* type;
    if (!ray_query_type)
    {
        type = new Instruction(getUniqueId(), NoType, OpTypeRayQueryKHR);
        ray_query_type = type;
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
        module.mapInstruction(type);
    } else
        type = ray_query_type;

    return type->getResultId();
}

Id Builder::makeHitObjectNVType()
{
    Instruction *type;
    if (!hit_object_nv_type)
    {
        type = new Instruction(getUniqueId(), NoType, OpTypeHitObjectNV);
        hit_object_nv_type = type;
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
        module.mapInstruction(type);
    } else
        type = hit_object_nv_type;

    return type->getResultId();
}

Id Builder::makePointer(StorageClass storageClass, Id pointee)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
        type = groupedTypes[OpTypePointer][t];
        if (type->getImmediateOperand(0) == (unsigned)storageClass &&
            type->getIdOperand(1) == pointee)
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypePointer);
    type->addImmediateOperand(storageClass);
    type->addIdOperand(pointee);
    groupedTypes[OpTypePointer].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

Id Builder::makeIntegerType(int width, bool hasSign)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeInt].size(); ++t) {
        type = groupedTypes[OpTypeInt][t];
        if (type->getImmediateOperand(0) == (unsigned)width &&
            type->getImmediateOperand(1) == (hasSign ? 1u : 0u))
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeInt);
    type->addImmediateOperand(width);
    type->addImmediateOperand(hasSign ? 1 : 0);
    groupedTypes[OpTypeInt].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    // deal with capabilities
    switch (width) {
    case 8:
        addCapability(CapabilityInt8);
        break;
    case 16:
        addCapability(CapabilityInt16);
        break;
    case 64:
        addCapability(CapabilityInt64);
        break;
    default:
        break;
    }

    return type->getResultId();
}

Id Builder::makeFloatType(int width, int encoding)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeFloat].size(); ++t) {
        type = groupedTypes[OpTypeFloat][t];
        if (encoding >= 0) {
            if (type->getNumOperands() == 2 &&
                type->getImmediateOperand(0) == (unsigned)width &&
                type->getImmediateOperand(1) == (unsigned)encoding) {
                return type->getResultId();
            }
        } else if (type->getNumOperands() == 1 && type->getImmediateOperand(0) == (unsigned)width) {
            return type->getResultId();
        }
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeFloat);
    type->addImmediateOperand(width);
    if (encoding >= 0)
        type->addImmediateOperand(encoding);
    groupedTypes[OpTypeFloat].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    // deal with capabilities
    switch (width) {
    case 8:
        if (encoding >= 0) {
            // Ignore encoding -1 case. It's not legal SPIR-V, only used by hack path.
            addExtension("SPV_EXT_float8");
            addCapability(CapabilityFloat8EXT);
            addCapability(CapabilityFloat8CooperativeMatrixEXT);
        }
        break;
    case 16:
        addCapability(CapabilityFloat16);
        break;
    case 64:
        addCapability(CapabilityFloat64);
        break;
    default:
        break;
    }

    return type->getResultId();
}

Id Builder::makeCooperativeMatrixType(spv::Id scalar_type, spv::Id rows, spv::Id cols, spv::Id use)
{
	Instruction* type;
	for (int t = 0; t < (int)coopmatTypes.size(); ++t) {
		type = coopmatTypes[t];
		if (type->getIdOperand(0) == scalar_type &&
		    type->getIdOperand(2) == rows &&
		    type->getIdOperand(3) == cols &&
		    type->getIdOperand(4) == use)
			return type->getResultId();
	}

	addExtension("SPV_KHR_cooperative_matrix");
	addCapability(spv::CapabilityCooperativeMatrixKHR);
	type = new Instruction(getUniqueId(), NoType, OpTypeCooperativeMatrixKHR);
	type->addIdOperand(scalar_type);
	type->addIdOperand(makeUintConstant(spv::ScopeSubgroup));
	type->addIdOperand(rows);
	type->addIdOperand(cols);
	type->addIdOperand(use);
	coopmatTypes.push_back(type);
	constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
	module.mapInstruction(type);

	return type->getResultId();
}

// Make a struct without checking for duplication.
// See makeStructResultType() for non-decorated structs
// needed as the result of some instructions, which does
// check for duplicates.
Id Builder::makeStructType(const dxil_spv::Vector<Id>& members, const char* name)
{
    // Don't look for previous one, because in the general case,
    // structs can be duplicated except for decorations.

    // not found, make it
    Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeStruct);
    for (int op = 0; op < (int)members.size(); ++op)
        type->addIdOperand(members[op]);
    groupedTypes[OpTypeStruct].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);
    addName(type->getResultId(), name);

    return type->getResultId();
}

// Make a struct for the simple results of several instructions,
// checking for duplication.
Id Builder::makeStructResultType(Id type0, Id type1)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeStruct].size(); ++t) {
        type = groupedTypes[OpTypeStruct][t];
        if (type->getNumOperands() != 2)
            continue;
        if (type->getIdOperand(0) != type0 ||
            type->getIdOperand(1) != type1)
            continue;
        return type->getResultId();
    }

    // not found, make it
    dxil_spv::Vector<spv::Id> members;
    members.push_back(type0);
    members.push_back(type1);

    return makeStructType(members, "ResType");
}

Id Builder::makeVectorType(Id component, int size)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeVector].size(); ++t) {
        type = groupedTypes[OpTypeVector][t];
        if (type->getIdOperand(0) == component &&
            type->getImmediateOperand(1) == (unsigned)size)
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeVector);
    type->addIdOperand(component);
    type->addImmediateOperand(size);
    groupedTypes[OpTypeVector].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

Id Builder::makeMatrixType(Id component, int cols, int rows)
{
    assert(cols <= maxMatrixSize && rows <= maxMatrixSize);

    Id column = makeVectorType(component, rows);

    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeMatrix].size(); ++t) {
        type = groupedTypes[OpTypeMatrix][t];
        if (type->getIdOperand(0) == column &&
            type->getImmediateOperand(1) == (unsigned)cols)
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeMatrix);
    type->addIdOperand(column);
    type->addImmediateOperand(cols);
    groupedTypes[OpTypeMatrix].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

// TODO: performance: track arrays per stride
// If a stride is supplied (non-zero) make an array.
// If no stride (0), reuse previous array types.
// 'size' is an Id of a constant or specialization constant of the array size
Id Builder::makeArrayType(Id element, Id sizeId, int stride)
{
    Instruction* type;
    if (stride == 0) {
        // try to find existing type
        for (int t = 0; t < (int)groupedTypes[OpTypeArray].size(); ++t) {
            type = groupedTypes[OpTypeArray][t];
            if (type->getIdOperand(0) == element &&
                type->getIdOperand(1) == sizeId &&
                !hasDecoration(type->getResultId(), spv::DecorationArrayStride))
                return type->getResultId();
        }
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeArray);
    type->addIdOperand(element);
    type->addIdOperand(sizeId);
    groupedTypes[OpTypeArray].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

Id Builder::makeRuntimeArray(Id element)
{
    Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeRuntimeArray);
    type->addIdOperand(element);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

Id Builder::makeFunctionType(Id returnType, const dxil_spv::Vector<Id>& paramTypes)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeFunction].size(); ++t) {
        type = groupedTypes[OpTypeFunction][t];
        if (type->getIdOperand(0) != returnType || (int)paramTypes.size() != type->getNumOperands() - 1)
            continue;
        bool mismatch = false;
        for (int p = 0; p < (int)paramTypes.size(); ++p) {
            if (paramTypes[p] != type->getIdOperand(p + 1)) {
                mismatch = true;
                break;
            }
        }
        if (! mismatch)
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeFunction);
    type->addIdOperand(returnType);
    for (int p = 0; p < (int)paramTypes.size(); ++p)
        type->addIdOperand(paramTypes[p]);
    groupedTypes[OpTypeFunction].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

Id Builder::makeImageType(Id sampledType, Dim dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format)
{
    assert(sampled == 1 || sampled == 2);

    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeImage].size(); ++t) {
        type = groupedTypes[OpTypeImage][t];
        if (type->getIdOperand(0) == sampledType &&
            type->getImmediateOperand(1) == (unsigned int)dim &&
            type->getImmediateOperand(2) == (  depth ? 1u : 0u) &&
            type->getImmediateOperand(3) == (arrayed ? 1u : 0u) &&
            type->getImmediateOperand(4) == (     ms ? 1u : 0u) &&
            type->getImmediateOperand(5) == sampled &&
            type->getImmediateOperand(6) == (unsigned int)format)
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeImage);
    type->addIdOperand(sampledType);
    type->addImmediateOperand(   dim);
    type->addImmediateOperand(  depth ? 1 : 0);
    type->addImmediateOperand(arrayed ? 1 : 0);
    type->addImmediateOperand(     ms ? 1 : 0);
    type->addImmediateOperand(sampled);
    type->addImmediateOperand((unsigned int)format);

    groupedTypes[OpTypeImage].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    // deal with capabilities
    switch (dim) {
    case DimBuffer:
        if (sampled == 1)
            addCapability(CapabilitySampledBuffer);
        else
            addCapability(CapabilityImageBuffer);
        break;
    case Dim1D:
        if (sampled == 1)
            addCapability(CapabilitySampled1D);
        else
            addCapability(CapabilityImage1D);
        break;
    case DimCube:
        if (arrayed) {
            if (sampled == 1)
                addCapability(CapabilitySampledCubeArray);
            else
                addCapability(CapabilityImageCubeArray);
        }
        break;
    case DimRect:
        if (sampled == 1)
            addCapability(CapabilitySampledRect);
        else
            addCapability(CapabilityImageRect);
        break;
    case DimSubpassData:
        addCapability(CapabilityInputAttachment);
        break;
    default:
        break;
    }

    if (ms) {
        if (sampled == 2) {
            // Images used with subpass data are not storage
            // images, so don't require the capability for them.
            if (dim != Dim::DimSubpassData)
                addCapability(CapabilityStorageImageMultisample);
            if (arrayed)
                addCapability(CapabilityImageMSArray);
        }
    }

    return type->getResultId();
}

Id Builder::makeSampledImageType(Id imageType)
{
    // try to find it
    Instruction* type;
    for (int t = 0; t < (int)groupedTypes[OpTypeSampledImage].size(); ++t) {
        type = groupedTypes[OpTypeSampledImage][t];
        if (type->getIdOperand(0) == imageType)
            return type->getResultId();
    }

    // not found, make it
    type = new Instruction(getUniqueId(), NoType, OpTypeSampledImage);
    type->addIdOperand(imageType);

    groupedTypes[OpTypeSampledImage].push_back(type);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(type));
    module.mapInstruction(type);

    return type->getResultId();
}

Id Builder::getDerefTypeId(Id resultId) const
{
    Id typeId = getTypeId(resultId);
    assert(isPointerType(typeId));

    return module.getInstruction(typeId)->getImmediateOperand(1);
}

Op Builder::getMostBasicTypeClass(Id typeId) const
{
    Instruction* instr = module.getInstruction(typeId);

    Op typeClass = instr->getOpCode();
    switch (typeClass)
    {
    case OpTypeVoid:
    case OpTypeBool:
    case OpTypeInt:
    case OpTypeFloat:
    case OpTypeStruct:
        return typeClass;
    case OpTypeVector:
    case OpTypeMatrix:
    case OpTypeArray:
    case OpTypeRuntimeArray:
        return getMostBasicTypeClass(instr->getIdOperand(0));
    case OpTypePointer:
        return getMostBasicTypeClass(instr->getIdOperand(1));
    default:
        assert(0);
        return OpTypeFloat;
    }
}

int Builder::getNumTypeConstituents(Id typeId) const
{
    Instruction* instr = module.getInstruction(typeId);

    switch (instr->getOpCode())
    {
    case OpTypeBool:
    case OpTypeInt:
    case OpTypeFloat:
        return 1;
    case OpTypeVector:
    case OpTypeMatrix:
        return instr->getImmediateOperand(1);
    case OpTypeArray:
    {
        Id lengthId = instr->getImmediateOperand(1);
        return module.getInstruction(lengthId)->getImmediateOperand(0);
    }
    case OpTypeStruct:
        return instr->getNumOperands();
    default:
        assert(0);
        return 1;
    }
}

// Return the lowest-level type of scalar that an homogeneous composite is made out of.
// Typically, this is just to find out if something is made out of ints or floats.
// However, it includes returning a structure, if say, it is an array of structure.
Id Builder::getScalarTypeId(Id typeId) const
{
    Instruction* instr = module.getInstruction(typeId);

    Op typeClass = instr->getOpCode();
    switch (typeClass)
    {
    case OpTypeVoid:
    case OpTypeBool:
    case OpTypeInt:
    case OpTypeFloat:
    case OpTypeStruct:
        return instr->getResultId();
    case OpTypeVector:
    case OpTypeMatrix:
    case OpTypeArray:
    case OpTypeRuntimeArray:
    case OpTypePointer:
        return getScalarTypeId(getContainedTypeId(typeId));
    default:
        assert(0);
        return NoResult;
    }
}

// Return the type of 'member' of a composite.
Id Builder::getContainedTypeId(Id typeId, int member) const
{
    Instruction* instr = module.getInstruction(typeId);

    Op typeClass = instr->getOpCode();
    switch (typeClass)
    {
    case OpTypeVector:
    case OpTypeMatrix:
    case OpTypeArray:
    case OpTypeRuntimeArray:
    case OpTypeCooperativeMatrixKHR:
        return instr->getIdOperand(0);
    case OpTypePointer:
        return instr->getIdOperand(1);
    case OpTypeStruct:
        return instr->getIdOperand(member);
    default:
        assert(0);
        return NoResult;
    }
}

// Return the immediately contained type of a given composite type.
Id Builder::getContainedTypeId(Id typeId) const
{
    return getContainedTypeId(typeId, 0);
}

// See if a scalar constant of this type has already been created, so it
// can be reused rather than duplicated.  (Required by the specification).
Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const
{
    Instruction* constant;
    for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) {
        constant = groupedConstants[typeClass][i];
        if (constant->getOpCode() == opcode &&
            constant->getTypeId() == typeId &&
            constant->getImmediateOperand(0) == value)
            return constant->getResultId();
    }

    return 0;
}

// Version of findScalarConstant (see above) for scalars that take two operands (e.g. a 'double' or 'int64').
Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const
{
    Instruction* constant;
    for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) {
        constant = groupedConstants[typeClass][i];
        if (constant->getOpCode() == opcode &&
            constant->getTypeId() == typeId &&
            constant->getImmediateOperand(0) == v1 &&
            constant->getImmediateOperand(1) == v2)
            return constant->getResultId();
    }

    return 0;
}

// Return true if consuming 'opcode' means consuming a constant.
// "constant" here means after final transform to executable code,
// the value consumed will be a constant, so includes specialization.
bool Builder::isConstantOpCode(Op opcode) const
{
    switch (opcode) {
    case OpUndef:
    case OpConstantTrue:
    case OpConstantFalse:
    case OpConstant:
    case OpConstantComposite:
    case OpConstantSampler:
    case OpConstantNull:
    case OpSpecConstantTrue:
    case OpSpecConstantFalse:
    case OpSpecConstant:
    case OpSpecConstantComposite:
    case OpSpecConstantOp:
        return true;
    default:
        return false;
    }
}

// Return true if consuming 'opcode' means consuming a specialization constant.
bool Builder::isSpecConstantOpCode(Op opcode) const
{
    switch (opcode) {
    case OpSpecConstantTrue:
    case OpSpecConstantFalse:
    case OpSpecConstant:
    case OpSpecConstantComposite:
    case OpSpecConstantOp:
        return true;
    default:
        return false;
    }
}

Id Builder::makeNullConstant(Id typeId)
{
    Instruction* c = new Instruction(getUniqueId(), typeId, OpConstantNull);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    module.mapInstruction(c);

    return c->getResultId();
}

Id Builder::makeBoolConstant(bool b, bool specConstant)
{
    Id typeId = makeBoolType();
    Instruction* constant;
    Op opcode = specConstant ? (b ? OpSpecConstantTrue : OpSpecConstantFalse) : (b ? OpConstantTrue : OpConstantFalse);

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (! specConstant) {
        Id existing = 0;
        for (int i = 0; i < (int)groupedConstants[OpTypeBool].size(); ++i) {
            constant = groupedConstants[OpTypeBool][i];
            if (constant->getTypeId() == typeId && constant->getOpCode() == opcode)
                existing = constant->getResultId();
        }

        if (existing)
            return existing;
    }

    // Make it
    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeBool].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}

Id Builder::makeIntConstant(Id typeId, unsigned value, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstant : OpConstant;

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (! specConstant) {
        Id existing = findScalarConstant(OpTypeInt, opcode, typeId, value);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    c->addImmediateOperand(value);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeInt].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}

Id Builder::makeInt64Constant(Id typeId, unsigned long long value, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstant : OpConstant;

    unsigned op1 = value & 0xFFFFFFFF;
    unsigned op2 = value >> 32;

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (! specConstant) {
        Id existing = findScalarConstant(OpTypeInt, opcode, typeId, op1, op2);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    c->addImmediateOperand(op1);
    c->addImmediateOperand(op2);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeInt].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}

Id Builder::makeFloatConstant(float f, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstant : OpConstant;
    Id typeId = makeFloatType(32);
    union { float fl; unsigned int ui; } u;
    u.fl = f;
    unsigned value = u.ui;

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (! specConstant) {
        Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    c->addImmediateOperand(value);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeFloat].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}

Id Builder::makeDoubleConstant(double d, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstant : OpConstant;
    Id typeId = makeFloatType(64);
    union { double db; unsigned long long ull; } u;
    u.db = d;
    unsigned long long value = u.ull;
    unsigned op1 = value & 0xFFFFFFFF;
    unsigned op2 = value >> 32;

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (! specConstant) {
        Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, op1, op2);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    c->addImmediateOperand(op1);
    c->addImmediateOperand(op2);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeFloat].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}

#ifdef AMD_EXTENSIONS
Id Builder::makeFloat16Constant(uint16_t f16, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstant : OpConstant;
    Id typeId = makeFloatType(16);

    unsigned value = f16;

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (!specConstant) {
        Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    c->addImmediateOperand(value);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeFloat].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}

Id Builder::makeFloat8Constant(uint8_t f8, int encoding, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstant : OpConstant;
    Id typeId = makeFloatType(8, encoding);

    unsigned value = f8;

    // See if we already made it. Applies only to regular constants, because specialization constants
    // must remain distinct for the purpose of applying a SpecId decoration.
    if (!specConstant) {
        Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    c->addImmediateOperand(value);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));
    groupedConstants[OpTypeFloat].push_back(c);
    module.mapInstruction(c);

    return c->getResultId();
}
#endif

Id Builder::findCompositeConstant(Op typeClass, const dxil_spv::Vector<Id>& comps) const
{
    Instruction* constant = 0;
    bool found = false;

    if (typeClass == spv::OpTypeCooperativeMatrixKHR) {
        for (int i = 0; i < (int)coopmatConstants.size() && !found; ++i) {
            constant = coopmatConstants[i];
            if (constant->getIdOperand(0) == comps[0])
                found = true;
        }
    } else {
        for (int i = 0; i < (int) groupedConstants[typeClass].size(); ++i) {
            constant = groupedConstants[typeClass][i];

            // same shape?
            if (constant->getNumOperands() != (int) comps.size())
                continue;

            // same contents?
            bool mismatch = false;
            for (int op = 0; op < constant->getNumOperands(); ++op)
            {
                if (constant->getIdOperand(op) != comps[op])
                {
                    mismatch = true;
                    break;
                }
            }
            if (!mismatch)
            {
                found = true;
                break;
            }
        }
    }

    return found ? constant->getResultId() : NoResult;
}

// Comments in header
Id Builder::makeCompositeConstant(Id typeId, const dxil_spv::Vector<Id>& members, bool specConstant)
{
    Op opcode = specConstant ? OpSpecConstantComposite : OpConstantComposite;
    assert(typeId);
    Op typeClass = getTypeClass(typeId);

    switch (typeClass) {
    case OpTypeVector:
    case OpTypeArray:
    case OpTypeStruct:
    case OpTypeMatrix:
    case OpTypeCooperativeMatrixKHR:
        break;
    default:
        assert(0);
        return makeFloatConstant(0.0);
    }

    if (! specConstant) {
        Id existing = findCompositeConstant(typeClass, members);
        if (existing)
            return existing;
    }

    Instruction* c = new Instruction(getUniqueId(), typeId, opcode);
    for (int op = 0; op < (int)members.size(); ++op)
        c->addIdOperand(members[op]);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(c));

    if (typeClass == OpTypeCooperativeMatrixKHR)
        coopmatConstants.push_back(c);
    else
        groupedConstants[typeClass].push_back(c);

    module.mapInstruction(c);

    return c->getResultId();
}

Instruction* Builder::addEntryPoint(ExecutionModel model, Function* function, const char* name)
{
    Instruction* entryPoint = new Instruction(OpEntryPoint);
    entryPoint->addImmediateOperand(model);
    entryPoint->addIdOperand(function->getId());
    entryPoint->addStringOperand(name);

    entryPoints.push_back(std::unique_ptr<Instruction>(entryPoint));

    return entryPoint;
}

// Currently relying on the fact that all 'value' of interest are small non-negative values.
void Builder::addExecutionMode(Function* entryPoint, ExecutionMode mode, int value1, int value2, int value3)
{
    for (auto &execMode : executionModes)
    {
        // For execution modes with arguments, we don't have an issue with duplication.
        if (execMode->getIdOperand(0) == entryPoint->getId() &&
                execMode->getImmediateOperand(1) == mode &&
                execMode->getNumOperands() == 2)
            return;
    }

    Instruction* instr = new Instruction(OpExecutionMode);
    instr->addIdOperand(entryPoint->getId());
    instr->addImmediateOperand(mode);
    if (value1 >= 0)
        instr->addImmediateOperand(value1);
    if (value2 >= 0)
        instr->addImmediateOperand(value2);
    if (value3 >= 0)
        instr->addImmediateOperand(value3);

    executionModes.push_back(std::unique_ptr<Instruction>(instr));
}

void Builder::addExecutionModeId(Function* entryPoint, ExecutionMode mode, spv::Id id1, spv::Id id2, spv::Id id3)
{
    Instruction* instr = new Instruction(OpExecutionModeId);
    instr->addIdOperand(entryPoint->getId());
    instr->addImmediateOperand(mode);
    if (id1)
        instr->addIdOperand(id1);
    if (id2)
        instr->addIdOperand(id2);
    if (id3)
        instr->addIdOperand(id3);

    executionModes.push_back(std::unique_ptr<Instruction>(instr));
}

void Builder::addName(Id id, const char* string)
{
    Instruction* name = new Instruction(OpName);
    name->addIdOperand(id);
    name->addStringOperand(string);

    names.push_back(std::unique_ptr<Instruction>(name));
}

void Builder::addMemberName(Id id, int memberNumber, const char* string)
{
    Instruction* name = new Instruction(OpMemberName);
    name->addIdOperand(id);
    name->addImmediateOperand(memberNumber);
    name->addStringOperand(string);

    names.push_back(std::unique_ptr<Instruction>(name));
}

void Builder::addDecoration(Id id, Decoration decoration, int num)
{
    if (decoration == spv::DecorationMax)
        return;
    Instruction* dec = new Instruction(OpDecorate);
    dec->addIdOperand(id);
    dec->addImmediateOperand(decoration);
    if (num >= 0)
        dec->addImmediateOperand(num);

    decorations.push_back(std::unique_ptr<Instruction>(dec));
}

void Builder::removeDecorations(const dxil_spv::UnorderedSet<spv::Id> &ids)
{
    decorations.erase(std::remove_if(decorations.begin(), decorations.end(),
                [&](const std::unique_ptr<Instruction> &inst) {
                    return inst->getOpCode() == OpDecorate && ids.count(inst->getIdOperand(0)) != 0;
                }), decorations.end());
}

bool Builder::hasDecoration(spv::Id id, spv::Decoration decoration) const
{
    for (auto &dec : decorations)
        if (dec->getIdOperand(0) == id && dec->getImmediateOperand(1) == decoration)
            return true;
    return false;
}

void Builder::addUniqueDecoration(Id id, Decoration decoration, int num)
{
    for (auto &dec : decorations)
    {
        if (dec->getIdOperand(0) != id || dec->getImmediateOperand(1) != decoration)
            continue;

        // Can have same decoration but different literal parameter.
        if (num < 0 && dec->getNumOperands() == 2)
            return;
        if (num >= 0 && dec->getImmediateOperand(2) == uint32_t(num))
            return;
    }

    addDecoration(id, decoration, num);
}

void Builder::addMemberDecoration(Id id, unsigned int member, Decoration decoration, int num)
{
    Instruction* dec = new Instruction(OpMemberDecorate);
    dec->addIdOperand(id);
    dec->addImmediateOperand(member);
    dec->addImmediateOperand(decoration);
    if (num >= 0)
        dec->addImmediateOperand(num);

    decorations.push_back(std::unique_ptr<Instruction>(dec));
}

// Comments in header
Function* Builder::makeEntryPoint(const char* entryPoint)
{
    assert(! entryPointFunction);

    Block* entry;
    dxil_spv::Vector<Id> params;
    dxil_spv::Vector<dxil_spv::Vector<Decoration>> decorations;

    entryPointFunction = makeFunctionEntry(NoPrecision, makeVoidType(), entryPoint, params, decorations, &entry);

    return entryPointFunction;
}

// Comments in header
Function* Builder::makeFunctionEntry(Decoration precision, Id returnType, const char* name,
                                     const dxil_spv::Vector<Id>& paramTypes, const dxil_spv::Vector<dxil_spv::Vector<Decoration>>& decorations, Block **entry)
{
    // Make the function and initial instructions in it
    Id typeId = makeFunctionType(returnType, paramTypes);
    Id firstParamId = paramTypes.size() == 0 ? 0 : getUniqueIds((int)paramTypes.size());
    Function* function = new Function(getUniqueId(), returnType, typeId, firstParamId, module);

    // Set up the precisions
    setPrecision(function->getId(), precision);
    for (unsigned p = 0; p < (unsigned)decorations.size(); ++p) {
        for (int d = 0; d < (int)decorations[p].size(); ++d)
            addDecoration(firstParamId + p, decorations[p][d]);
    }

    // CFG
    if (entry) {
        *entry = new Block(getUniqueId(), *function);
        function->addBlock(*entry);
        setBuildPoint(*entry);
    }

    if (name)
        addName(function->getId(), name);

    functions.push_back(std::unique_ptr<Function>(function));

    return function;
}

// Comments in header
void Builder::makeReturn(bool implicit, Id retVal)
{
    if (retVal) {
        Instruction* inst = new Instruction(NoResult, NoType, OpReturnValue);
        inst->addIdOperand(retVal);
        buildPoint->addInstruction(std::unique_ptr<Instruction>(inst));
    } else
        buildPoint->addInstruction(std::unique_ptr<Instruction>(new Instruction(NoResult, NoType, OpReturn)));

    if (! implicit)
        createAndSetNoPredecessorBlock("post-return");
}

// Comments in header
void Builder::leaveFunction()
{
    Block* block = buildPoint;
    Function& function = buildPoint->getParent();
    assert(block);

    // If our function did not contain a return, add a return void now.
    if (! block->isTerminated()) {
        if (function.getReturnType() == makeVoidType())
            makeReturn(true);
        else {
            makeReturn(true, createUndefined(function.getReturnType()));
        }
    }
}

// Comments in header
void Builder::makeDiscard()
{
    buildPoint->addInstruction(std::unique_ptr<Instruction>(new Instruction(OpKill)));
    createAndSetNoPredecessorBlock("post-discard");
}

// Comments in header
Id Builder::createVariable(StorageClass storageClass, Id type, const char* name)
{
    return createVariableWithInitializer(storageClass, type, 0, name);
}

Id Builder::createVariableWithInitializer(StorageClass storageClass, Id type, Id initializer, const char* name)
{
    Id pointerType = makePointer(storageClass, type);
    Instruction* inst = new Instruction(getUniqueId(), pointerType, OpVariable);
    inst->addImmediateOperand(storageClass);
    if (initializer)
        inst->addIdOperand(initializer);

    switch (storageClass) {
    case StorageClassFunction:
        // Validation rules require the declaration in the entry block
        buildPoint->getParent().addLocalVariable(std::unique_ptr<Instruction>(inst));
        break;

    default:
        constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(inst));
        module.mapInstruction(inst);
        break;
    }

    if (name)
        addName(inst->getResultId(), name);

    return inst->getResultId();
}

// Comments in header
Id Builder::createUndefined(Id type)
{
  Instruction* inst = new Instruction(getUniqueId(), type, OpUndef);
  buildPoint->addInstruction(std::unique_ptr<Instruction>(inst));
  module.mapInstruction(inst);
  return inst->getResultId();
}

Id Builder::createUndefinedConstant(Id type)
{
  Instruction* inst = new Instruction(getUniqueId(), type, OpUndef);
  constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(inst));
  module.mapInstruction(inst);
  return inst->getResultId();
}

// Comments in header
void Builder::createStore(Id rValue, Id lValue)
{
    Instruction* store = new Instruction(OpStore);
    store->addIdOperand(lValue);
    store->addIdOperand(rValue);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(store));
}

// Comments in header
Id Builder::createLoad(Id lValue)
{
    Instruction* load = new Instruction(getUniqueId(), getDerefTypeId(lValue), OpLoad);
    load->addIdOperand(lValue);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(load));

    return load->getResultId();
}

// Comments in header
Id Builder::createAccessChain(StorageClass storageClass, Id base, const dxil_spv::Vector<Id>& offsets)
{
    // Figure out the final resulting type.
    spv::Id typeId = getTypeId(base);
    assert(isPointerType(typeId) && offsets.size() > 0);
    typeId = getContainedTypeId(typeId);
    for (int i = 0; i < (int)offsets.size(); ++i) {
        if (isStructType(typeId)) {
            assert(isConstantScalar(offsets[i]));
            typeId = getContainedTypeId(typeId, getConstantScalar(offsets[i]));
        } else
            typeId = getContainedTypeId(typeId, offsets[i]);
    }
    typeId = makePointer(storageClass, typeId);

    // Make the instruction
    Instruction* chain = new Instruction(getUniqueId(), typeId, OpAccessChain);
    chain->addIdOperand(base);
    for (int i = 0; i < (int)offsets.size(); ++i)
        chain->addIdOperand(offsets[i]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(chain));

    return chain->getResultId();
}

Id Builder::createArrayLength(Id base, unsigned int member)
{
    spv::Id intType = makeIntType(32);
    Instruction* length = new Instruction(getUniqueId(), intType, OpArrayLength);
    length->addIdOperand(base);
    length->addImmediateOperand(member);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(length));

    return length->getResultId();
}

Id Builder::createCompositeExtract(Id composite, Id typeId, unsigned index)
{
    // Generate code for spec constants if in spec constant operation
    // generation mode.
    if (generatingOpCodeForSpecConst) {
        return createSpecConstantOp(OpCompositeExtract, typeId, dxil_spv::Vector<Id>(1, composite), dxil_spv::Vector<Id>(1, index));
    }
    Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract);
    extract->addIdOperand(composite);
    extract->addImmediateOperand(index);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(extract));

    return extract->getResultId();
}

Id Builder::createCompositeExtract(Id composite, Id typeId, const dxil_spv::Vector<unsigned>& indexes)
{
    // Generate code for spec constants if in spec constant operation
    // generation mode.
    if (generatingOpCodeForSpecConst) {
        return createSpecConstantOp(OpCompositeExtract, typeId, dxil_spv::Vector<Id>(1, composite), indexes);
    }
    Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract);
    extract->addIdOperand(composite);
    for (int i = 0; i < (int)indexes.size(); ++i)
        extract->addImmediateOperand(indexes[i]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(extract));

    return extract->getResultId();
}

Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, unsigned index)
{
    Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert);
    insert->addIdOperand(object);
    insert->addIdOperand(composite);
    insert->addImmediateOperand(index);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(insert));

    return insert->getResultId();
}

Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, const dxil_spv::Vector<unsigned>& indexes)
{
    Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert);
    insert->addIdOperand(object);
    insert->addIdOperand(composite);
    for (int i = 0; i < (int)indexes.size(); ++i)
        insert->addImmediateOperand(indexes[i]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(insert));

    return insert->getResultId();
}

Id Builder::createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex)
{
    Instruction* extract = new Instruction(getUniqueId(), typeId, OpVectorExtractDynamic);
    extract->addIdOperand(vector);
    extract->addIdOperand(componentIndex);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(extract));

    return extract->getResultId();
}

Id Builder::createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex)
{
    Instruction* insert = new Instruction(getUniqueId(), typeId, OpVectorInsertDynamic);
    insert->addIdOperand(vector);
    insert->addIdOperand(component);
    insert->addIdOperand(componentIndex);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(insert));

    return insert->getResultId();
}

// An opcode that has no operands, no result id, and no type
void Builder::createNoResultOp(Op opCode)
{
    Instruction* op = new Instruction(opCode);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
}

// An opcode that has one operand, no result id, and no type
void Builder::createNoResultOp(Op opCode, Id operand)
{
    Instruction* op = new Instruction(opCode);
    op->addIdOperand(operand);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
}

// An opcode that has one operand, no result id, and no type
void Builder::createNoResultOp(Op opCode, const dxil_spv::Vector<Id>& operands)
{
    Instruction* op = new Instruction(opCode);
    for (auto it = operands.cbegin(); it != operands.cend(); ++it)
        op->addIdOperand(*it);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
}

void Builder::createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask semantics)
{
    Instruction* op = new Instruction(OpControlBarrier);
    op->addImmediateOperand(makeUintConstant(execution));
    op->addImmediateOperand(makeUintConstant(memory));
    op->addImmediateOperand(makeUintConstant(semantics));
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
}

void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemantics)
{
    Instruction* op = new Instruction(OpMemoryBarrier);
    op->addImmediateOperand(makeUintConstant(executionScope));
    op->addImmediateOperand(makeUintConstant(memorySemantics));
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
}

// An opcode that has one operands, a result id, and a type
Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand)
{
    // Generate code for spec constants if in spec constant operation
    // generation mode.
    if (generatingOpCodeForSpecConst) {
        return createSpecConstantOp(opCode, typeId, dxil_spv::Vector<Id>(1, operand), dxil_spv::Vector<Id>());
    }
    Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
    op->addIdOperand(operand);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right)
{
    // Generate code for spec constants if in spec constant operation
    // generation mode.
    if (generatingOpCodeForSpecConst) {
        dxil_spv::Vector<Id> operands(2);
        operands[0] = left; operands[1] = right;
        return createSpecConstantOp(opCode, typeId, operands, dxil_spv::Vector<Id>());
    }
    Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
    op->addIdOperand(left);
    op->addIdOperand(right);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3)
{
    // Generate code for spec constants if in spec constant operation
    // generation mode.
    if (generatingOpCodeForSpecConst) {
        dxil_spv::Vector<Id> operands(3);
        operands[0] = op1;
        operands[1] = op2;
        operands[2] = op3;
        return createSpecConstantOp(
            opCode, typeId, operands, dxil_spv::Vector<Id>());
    }
    Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
    op->addIdOperand(op1);
    op->addIdOperand(op2);
    op->addIdOperand(op3);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

Id Builder::createOp(Op opCode, Id typeId, const dxil_spv::Vector<Id>& operands)
{
    Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
    for (auto it = operands.cbegin(); it != operands.cend(); ++it)
        op->addIdOperand(*it);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

Id Builder::createSpecConstantOp(Op opCode, Id typeId, const dxil_spv::Vector<Id>& operands, const dxil_spv::Vector<unsigned>& literals)
{
    Instruction* op = new Instruction(getUniqueId(), typeId, OpSpecConstantOp);
    op->addImmediateOperand((unsigned) opCode);
    for (auto it = operands.cbegin(); it != operands.cend(); ++it)
        op->addIdOperand(*it);
    for (auto it = literals.cbegin(); it != literals.cend(); ++it)
        op->addImmediateOperand(*it);
    module.mapInstruction(op);
    constantsTypesGlobals.push_back(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

Id Builder::createFunctionCall(spv::Function* function, const dxil_spv::Vector<spv::Id>& args)
{
    Instruction* op = new Instruction(getUniqueId(), function->getReturnType(), OpFunctionCall);
    op->addIdOperand(function->getId());
    for (int a = 0; a < (int)args.size(); ++a)
        op->addIdOperand(args[a]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

// Comments in header
Id Builder::createRvalueSwizzle(Decoration precision, Id typeId, Id source, const dxil_spv::Vector<unsigned>& channels)
{
    if (channels.size() == 1)
        return setPrecision(createCompositeExtract(source, typeId, channels.front()), precision);

    if (generatingOpCodeForSpecConst) {
        dxil_spv::Vector<Id> operands(2);
        operands[0] = operands[1] = source;
        return setPrecision(createSpecConstantOp(OpVectorShuffle, typeId, operands, channels), precision);
    }
    Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle);
    assert(isVector(source));
    swizzle->addIdOperand(source);
    swizzle->addIdOperand(source);
    for (int i = 0; i < (int)channels.size(); ++i)
        swizzle->addImmediateOperand(channels[i]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(swizzle));

    return setPrecision(swizzle->getResultId(), precision);
}

// Comments in header
Id Builder::createLvalueSwizzle(Id typeId, Id target, Id source, const dxil_spv::Vector<unsigned>& channels)
{
    if (channels.size() == 1 && getNumComponents(source) == 1)
        return createCompositeInsert(source, target, typeId, channels.front());

    Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle);
    assert(isVector(target));
    swizzle->addIdOperand(target);
    if (accessChain.component != NoResult)
        // For dynamic component selection, source does not involve in l-value swizzle
        swizzle->addIdOperand(target);
    else {
        assert(getNumComponents(source) == (int)channels.size());
        assert(isVector(source));
        swizzle->addIdOperand(source);
    }

    // Set up an identity shuffle from the base value to the result value
    unsigned int components[4];
    int numTargetComponents = getNumComponents(target);
    for (int i = 0; i < numTargetComponents; ++i)
        components[i] = i;

    // Punch in the l-value swizzle
    for (int i = 0; i < (int)channels.size(); ++i) {
        if (accessChain.component != NoResult)
            components[i] = channels[i]; // Only shuffle the base value
        else
            components[channels[i]] = numTargetComponents + i;
    }

    // finish the instruction with these components selectors
    for (int i = 0; i < numTargetComponents; ++i)
        swizzle->addImmediateOperand(components[i]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(swizzle));

    return swizzle->getResultId();
}

// Comments in header
void Builder::promoteScalar(Decoration precision, Id& left, Id& right)
{
    int direction = getNumComponents(right) - getNumComponents(left);

    if (direction > 0)
        left = smearScalar(precision, left, makeVectorType(getTypeId(left), getNumComponents(right)));
    else if (direction < 0)
        right = smearScalar(precision, right, makeVectorType(getTypeId(right), getNumComponents(left)));

    return;
}

// Comments in header
Id Builder::smearScalar(Decoration precision, Id scalar, Id vectorType)
{
    assert(getNumComponents(scalar) == 1);
    assert(getTypeId(scalar) == getScalarTypeId(vectorType));

    int numComponents = getNumTypeComponents(vectorType);
    if (numComponents == 1)
        return scalar;

    Instruction* smear = nullptr;
    if (generatingOpCodeForSpecConst) {
        auto members = dxil_spv::Vector<spv::Id>(numComponents, scalar);
        // Sometime even in spec-constant-op mode, the temporary vector created by
        // promoting a scalar might not be a spec constant. This should depend on
        // the scalar.
        // e.g.:
        //  const vec2 spec_const_result = a_spec_const_vec2 + a_front_end_const_scalar;
        // In such cases, the temporary vector created from a_front_end_const_scalar
        // is not a spec constant vector, even though the binary operation node is marked
        // as 'specConstant' and we are in spec-constant-op mode.
        auto result_id = makeCompositeConstant(vectorType, members, isSpecConstant(scalar));
        smear = module.getInstruction(result_id);
    } else {
        smear = new Instruction(getUniqueId(), vectorType, OpCompositeConstruct);
        for (int c = 0; c < numComponents; ++c)
            smear->addIdOperand(scalar);
        buildPoint->addInstruction(std::unique_ptr<Instruction>(smear));
    }

    return setPrecision(smear->getResultId(), precision);
}

// Comments in header
Id Builder::createBuiltinCall(Id resultType, Id builtins, int entryPoint, const dxil_spv::Vector<Id>& args)
{
    Instruction* inst = new Instruction(getUniqueId(), resultType, OpExtInst);
    inst->addIdOperand(builtins);
    inst->addImmediateOperand(entryPoint);
    for (int arg = 0; arg < (int)args.size(); ++arg)
        inst->addIdOperand(args[arg]);

    buildPoint->addInstruction(std::unique_ptr<Instruction>(inst));

    return inst->getResultId();
}

// Accept all parameters needed to create a texture instruction.
// Create the correct instruction based on the inputs, and make the call.
Id Builder::createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicitLod, const TextureParameters& parameters)
{
    static const int maxTextureArgs = 10;
    Id texArgs[maxTextureArgs] = {};

    //
    // Set up the fixed arguments
    //
    int numArgs = 0;
    bool explicitLod = false;
    texArgs[numArgs++] = parameters.sampler;
    texArgs[numArgs++] = parameters.coords;
    if (parameters.Dref != NoResult)
        texArgs[numArgs++] = parameters.Dref;
    if (parameters.component != NoResult)
        texArgs[numArgs++] = parameters.component;

    //
    // Set up the optional arguments
    //
    int optArgNum = numArgs;                        // track which operand, if it exists, is the mask of optional arguments
    ++numArgs;                                      // speculatively make room for the mask operand
    ImageOperandsMask mask = ImageOperandsMaskNone; // the mask operand
    if (parameters.bias) {
        mask = (ImageOperandsMask)(mask | ImageOperandsBiasMask);
        texArgs[numArgs++] = parameters.bias;
    }
    if (parameters.lod) {
        mask = (ImageOperandsMask)(mask | ImageOperandsLodMask);
        texArgs[numArgs++] = parameters.lod;
        explicitLod = true;
    } else if (parameters.gradX) {
        mask = (ImageOperandsMask)(mask | ImageOperandsGradMask);
        texArgs[numArgs++] = parameters.gradX;
        texArgs[numArgs++] = parameters.gradY;
        explicitLod = true;
    } else if (noImplicitLod && ! fetch && ! gather) {
        // have to explicitly use lod of 0 if not allowed to have them be implicit, and
        // we would otherwise be about to issue an implicit instruction
        mask = (ImageOperandsMask)(mask | ImageOperandsLodMask);
        texArgs[numArgs++] = makeFloatConstant(0.0);
        explicitLod = true;
    }
    if (parameters.offset) {
        if (isConstant(parameters.offset))
            mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetMask);
        else {
            addCapability(CapabilityImageGatherExtended);
            mask = (ImageOperandsMask)(mask | ImageOperandsOffsetMask);
        }
        texArgs[numArgs++] = parameters.offset;
    }
    if (parameters.offsets) {
        mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetsMask);
        texArgs[numArgs++] = parameters.offsets;
    }
    if (parameters.sample) {
        mask = (ImageOperandsMask)(mask | ImageOperandsSampleMask);
        texArgs[numArgs++] = parameters.sample;
    }
    if (parameters.lodClamp) {
        // capability if this bit is used
        addCapability(CapabilityMinLod);

        mask = (ImageOperandsMask)(mask | ImageOperandsMinLodMask);
        texArgs[numArgs++] = parameters.lodClamp;
    }
    if (mask == ImageOperandsMaskNone)
        --numArgs;  // undo speculative reservation for the mask argument
    else
        texArgs[optArgNum] = mask;

    //
    // Set up the instruction
    //
    Op opCode = OpNop;  // All paths below need to set this
    if (fetch) {
        if (sparse)
            opCode = OpImageSparseFetch;
        else
            opCode = OpImageFetch;
    } else if (gather) {
        if (parameters.Dref)
            if (sparse)
                opCode = OpImageSparseDrefGather;
            else
                opCode = OpImageDrefGather;
        else
            if (sparse)
                opCode = OpImageSparseGather;
            else
                opCode = OpImageGather;
    } else if (explicitLod) {
        if (parameters.Dref) {
            if (proj)
                if (sparse)
                    opCode = OpImageSparseSampleProjDrefExplicitLod;
                else
                    opCode = OpImageSampleProjDrefExplicitLod;
            else
                if (sparse)
                    opCode = OpImageSparseSampleDrefExplicitLod;
                else
                    opCode = OpImageSampleDrefExplicitLod;
        } else {
            if (proj)
                if (sparse)
                    opCode = OpImageSparseSampleProjExplicitLod;
                else
                    opCode = OpImageSampleProjExplicitLod;
            else
                if (sparse)
                    opCode = OpImageSparseSampleExplicitLod;
                else
                    opCode = OpImageSampleExplicitLod;
        }
    } else {
        if (parameters.Dref) {
            if (proj)
                if (sparse)
                    opCode = OpImageSparseSampleProjDrefImplicitLod;
                else
                    opCode = OpImageSampleProjDrefImplicitLod;
            else
                if (sparse)
                    opCode = OpImageSparseSampleDrefImplicitLod;
                else
                    opCode = OpImageSampleDrefImplicitLod;
        } else {
            if (proj)
                if (sparse)
                    opCode = OpImageSparseSampleProjImplicitLod;
                else
                    opCode = OpImageSampleProjImplicitLod;
            else
                if (sparse)
                    opCode = OpImageSparseSampleImplicitLod;
                else
                    opCode = OpImageSampleImplicitLod;
        }
    }

    // See if the result type is expecting a smeared result.
    // This happens when a legacy shadow*() call is made, which
    // gets a vec4 back instead of a float.
    Id smearedType = resultType;
    if (! isScalarType(resultType)) {
        switch (opCode) {
        case OpImageSampleDrefImplicitLod:
        case OpImageSampleDrefExplicitLod:
        case OpImageSampleProjDrefImplicitLod:
        case OpImageSampleProjDrefExplicitLod:
            resultType = getScalarTypeId(resultType);
            break;
        default:
            break;
        }
    }

    Id typeId0 = 0;
    Id typeId1 = 0;

    if (sparse) {
        typeId0 = resultType;
        typeId1 = getDerefTypeId(parameters.texelOut);
        resultType = makeStructResultType(typeId0, typeId1);
    }

    // Build the SPIR-V instruction
    Instruction* textureInst = new Instruction(getUniqueId(), resultType, opCode);
    for (int op = 0; op < optArgNum; ++op)
        textureInst->addIdOperand(texArgs[op]);
    if (optArgNum < numArgs)
        textureInst->addImmediateOperand(texArgs[optArgNum]);
    for (int op = optArgNum + 1; op < numArgs; ++op)
        textureInst->addIdOperand(texArgs[op]);
    setPrecision(textureInst->getResultId(), precision);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(textureInst));

    Id resultId = textureInst->getResultId();

    if (sparse) {
        // set capability
        addCapability(CapabilitySparseResidency);

        // Decode the return type that was a special structure
        createStore(createCompositeExtract(resultId, typeId1, 1), parameters.texelOut);
        resultId = createCompositeExtract(resultId, typeId0, 0);
        setPrecision(resultId, precision);
    } else {
        // When a smear is needed, do it, as per what was computed
        // above when resultType was changed to a scalar type.
        if (resultType != smearedType)
            resultId = smearScalar(precision, resultId, smearedType);
    }

    return resultId;
}

// Comments in header
Id Builder::createTextureQueryCall(Op opCode, const TextureParameters& parameters, bool isUnsignedResult)
{
    // All these need a capability
    addCapability(CapabilityImageQuery);

    // Figure out the result type
    Id resultType = 0;
    switch (opCode) {
    case OpImageQuerySize:
    case OpImageQuerySizeLod:
    {
        int numComponents = 0;
        switch (getTypeDimensionality(getImageType(parameters.sampler))) {
        case Dim1D:
        case DimBuffer:
            numComponents = 1;
            break;
        case Dim2D:
        case DimCube:
        case DimRect:
        case DimSubpassData:
            numComponents = 2;
            break;
        case Dim3D:
            numComponents = 3;
            break;

        default:
            assert(0);
            break;
        }
        if (isArrayedImageType(getImageType(parameters.sampler)))
            ++numComponents;

        Id intType = isUnsignedResult ? makeUintType(32) : makeIntType(32);
        if (numComponents == 1)
            resultType = intType;
        else
            resultType = makeVectorType(intType, numComponents);

        break;
    }
    case OpImageQueryLod:
        resultType = makeVectorType(makeFloatType(32), 2);
        break;
    case OpImageQueryLevels:
    case OpImageQuerySamples:
        resultType = isUnsignedResult ? makeUintType(32) : makeIntType(32);
        break;
    default:
        assert(0);
        break;
    }

    Instruction* query = new Instruction(getUniqueId(), resultType, opCode);
    query->addIdOperand(parameters.sampler);
    if (parameters.coords)
        query->addIdOperand(parameters.coords);
    if (parameters.lod)
        query->addIdOperand(parameters.lod);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(query));

    return query->getResultId();
}

// External comments in header.
// Operates recursively to visit the composite's hierarchy.
Id Builder::createCompositeCompare(Decoration precision, Id value1, Id value2, bool equal)
{
    Id boolType = makeBoolType();
    Id valueType = getTypeId(value1);

    Id resultId = NoResult;

    int numConstituents = getNumTypeConstituents(valueType);

    // Scalars and Vectors

    if (isScalarType(valueType) || isVectorType(valueType)) {
        assert(valueType == getTypeId(value2));
        // These just need a single comparison, just have
        // to figure out what it is.
        Op op;
        switch (getMostBasicTypeClass(valueType)) {
        case OpTypeFloat:
            op = equal ? OpFOrdEqual : OpFOrdNotEqual;
            break;
        case OpTypeInt:
        default:
            op = equal ? OpIEqual : OpINotEqual;
            break;
        case OpTypeBool:
            op = equal ? OpLogicalEqual : OpLogicalNotEqual;
            precision = NoPrecision;
            break;
        }

        if (isScalarType(valueType)) {
            // scalar
            resultId = createBinOp(op, boolType, value1, value2);
        } else {
            // vector
            resultId = createBinOp(op, makeVectorType(boolType, numConstituents), value1, value2);
            setPrecision(resultId, precision);
            // reduce vector compares...
            resultId = createUnaryOp(equal ? OpAll : OpAny, boolType, resultId);
        }

        return setPrecision(resultId, precision);
    }

    // Only structs, arrays, and matrices should be left.
    // They share in common the reduction operation across their constituents.
    assert(isAggregateType(valueType) || isMatrixType(valueType));

    // Compare each pair of constituents
    for (int constituent = 0; constituent < numConstituents; ++constituent) {
        dxil_spv::Vector<unsigned> indexes(1, constituent);
        Id constituentType1 = getContainedTypeId(getTypeId(value1), constituent);
        Id constituentType2 = getContainedTypeId(getTypeId(value2), constituent);
        Id constituent1 = createCompositeExtract(value1, constituentType1, indexes);
        Id constituent2 = createCompositeExtract(value2, constituentType2, indexes);

        Id subResultId = createCompositeCompare(precision, constituent1, constituent2, equal);

        if (constituent == 0)
            resultId = subResultId;
        else
            resultId = setPrecision(createBinOp(equal ? OpLogicalAnd : OpLogicalOr, boolType, resultId, subResultId), precision);
    }

    return resultId;
}

// OpCompositeConstruct
Id Builder::createCompositeConstruct(Id typeId, const dxil_spv::Vector<Id>& constituents)
{
    assert(isAggregateType(typeId) || (getNumTypeConstituents(typeId) > 1 && getNumTypeConstituents(typeId) == (int)constituents.size()));

    if (generatingOpCodeForSpecConst) {
        // Sometime, even in spec-constant-op mode, the constant composite to be
        // constructed may not be a specialization constant.
        // e.g.:
        //  const mat2 m2 = mat2(a_spec_const, a_front_end_const, another_front_end_const, third_front_end_const);
        // The first column vector should be a spec constant one, as a_spec_const is a spec constant.
        // The second column vector should NOT be spec constant, as it does not contain any spec constants.
        // To handle such cases, we check the constituents of the constant vector to determine whether this
        // vector should be created as a spec constant.
        return makeCompositeConstant(typeId, constituents,
                                     std::any_of(constituents.begin(), constituents.end(),
                                                 [&](spv::Id id) { return isSpecConstant(id); }));
    }

    Instruction* op = new Instruction(getUniqueId(), typeId, OpCompositeConstruct);
    for (int c = 0; c < (int)constituents.size(); ++c)
        op->addIdOperand(constituents[c]);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(op));

    return op->getResultId();
}

// Vector or scalar constructor
Id Builder::createConstructor(Decoration precision, const dxil_spv::Vector<Id>& sources, Id resultTypeId)
{
    Id result = NoResult;
    unsigned int numTargetComponents = getNumTypeComponents(resultTypeId);
    unsigned int targetComponent = 0;

    // Special case: when calling a vector constructor with a single scalar
    // argument, smear the scalar
    if (sources.size() == 1 && isScalar(sources[0]) && numTargetComponents > 1)
        return smearScalar(precision, sources[0], resultTypeId);

    // accumulate the arguments for OpCompositeConstruct
    dxil_spv::Vector<Id> constituents;
    Id scalarTypeId = getScalarTypeId(resultTypeId);

    // lambda to store the result of visiting an argument component
    const auto latchResult = [&](Id comp) {
        if (numTargetComponents > 1)
            constituents.push_back(comp);
        else
            result = comp;
        ++targetComponent;
    };

    // lambda to visit a vector argument's components
    const auto accumulateVectorConstituents = [&](Id sourceArg) {
        unsigned int sourceSize = getNumComponents(sourceArg);
        unsigned int sourcesToUse = sourceSize;
        if (sourcesToUse + targetComponent > numTargetComponents)
            sourcesToUse = numTargetComponents - targetComponent;

        for (unsigned int s = 0; s < sourcesToUse; ++s) {
            dxil_spv::Vector<unsigned> swiz;
            swiz.push_back(s);
            latchResult(createRvalueSwizzle(precision, scalarTypeId, sourceArg, swiz));
        }
    };

    // lambda to visit a matrix argument's components
    const auto accumulateMatrixConstituents = [&](Id sourceArg) {
        unsigned int sourceSize = getNumColumns(sourceArg) * getNumRows(sourceArg);
        unsigned int sourcesToUse = sourceSize;
        if (sourcesToUse + targetComponent > numTargetComponents)
            sourcesToUse = numTargetComponents - targetComponent;

        int col = 0;
        int row = 0;
        for (unsigned int s = 0; s < sourcesToUse; ++s) {
            if (row >= getNumRows(sourceArg)) {
                row = 0;
                col++;
            }
            dxil_spv::Vector<Id> indexes;
            indexes.push_back(col);
            indexes.push_back(row);
            latchResult(createCompositeExtract(sourceArg, scalarTypeId, indexes));
            row++;
        }
    };

    // Go through the source arguments, each one could have either
    // a single or multiple components to contribute.
    for (unsigned int i = 0; i < sources.size(); ++i) {
        if (isScalar(sources[i]))
            latchResult(sources[i]);
        else if (isVector(sources[i]))
            accumulateVectorConstituents(sources[i]);
        else if (isMatrix(sources[i]))
            accumulateMatrixConstituents(sources[i]);
        else
            assert(0);

        if (targetComponent >= numTargetComponents)
            break;
    }

    // If the result is a vector, make it from the gathered constituents.
    if (constituents.size() > 0)
        result = createCompositeConstruct(resultTypeId, constituents);

    return setPrecision(result, precision);
}

// Comments in header
Id Builder::createMatrixConstructor(Decoration precision, const dxil_spv::Vector<Id>& sources, Id resultTypeId)
{
    Id componentTypeId = getScalarTypeId(resultTypeId);
    int numCols = getTypeNumColumns(resultTypeId);
    int numRows = getTypeNumRows(resultTypeId);

    Instruction* instr = module.getInstruction(componentTypeId);
    Id bitCount = instr->getIdOperand(0);

    // Will use a two step process
    // 1. make a compile-time 2D array of values
    // 2. construct a matrix from that array

    // Step 1.

    // initialize the array to the identity matrix
    Id ids[maxMatrixSize][maxMatrixSize];
    Id  one = (bitCount == 64 ? makeDoubleConstant(1.0) : makeFloatConstant(1.0));
    Id zero = (bitCount == 64 ? makeDoubleConstant(0.0) : makeFloatConstant(0.0));
    for (int col = 0; col < 4; ++col) {
        for (int row = 0; row < 4; ++row) {
            if (col == row)
                ids[col][row] = one;
            else
                ids[col][row] = zero;
        }
    }

    // modify components as dictated by the arguments
    if (sources.size() == 1 && isScalar(sources[0])) {
        // a single scalar; resets the diagonals
        for (int col = 0; col < 4; ++col)
            ids[col][col] = sources[0];
    } else if (isMatrix(sources[0])) {
        // constructing from another matrix; copy over the parts that exist in both the argument and constructee
        Id matrix = sources[0];
        int minCols = std::min(numCols, getNumColumns(matrix));
        int minRows = std::min(numRows, getNumRows(matrix));
        for (int col = 0; col < minCols; ++col) {
            dxil_spv::Vector<unsigned> indexes;
            indexes.push_back(col);
            for (int row = 0; row < minRows; ++row) {
                indexes.push_back(row);
                ids[col][row] = createCompositeExtract(matrix, componentTypeId, indexes);
                indexes.pop_back();
                setPrecision(ids[col][row], precision);
            }
        }
    } else {
        // fill in the matrix in column-major order with whatever argument components are available
        int row = 0;
        int col = 0;

        for (int arg = 0; arg < (int)sources.size(); ++arg) {
            Id argComp = sources[arg];
            for (int comp = 0; comp < getNumComponents(sources[arg]); ++comp) {
                if (getNumComponents(sources[arg]) > 1) {
                    argComp = createCompositeExtract(sources[arg], componentTypeId, comp);
                    setPrecision(argComp, precision);
                }
                ids[col][row++] = argComp;
                if (row == numRows) {
                    row = 0;
                    col++;
                }
            }
        }
    }

    // Step 2:  Construct a matrix from that array.
    // First make the column vectors, then make the matrix.

    // make the column vectors
    Id columnTypeId = getContainedTypeId(resultTypeId);
    dxil_spv::Vector<Id> matrixColumns;
    for (int col = 0; col < numCols; ++col) {
        dxil_spv::Vector<Id> vectorComponents;
        for (int row = 0; row < numRows; ++row)
            vectorComponents.push_back(ids[col][row]);
        Id column = createCompositeConstruct(columnTypeId, vectorComponents);
        setPrecision(column, precision);
        matrixColumns.push_back(column);
    }

    // make the matrix
    return setPrecision(createCompositeConstruct(resultTypeId, matrixColumns), precision);
}

// Comments in header
Builder::If::If(Id cond, unsigned int ctrl, Builder& gb) :
    builder(gb),
    condition(cond),
    control(ctrl),
    elseBlock(0)
{
    function = &builder.getBuildPoint()->getParent();

    // make the blocks, but only put the then-block into the function,
    // the else-block and merge-block will be added later, in order, after
    // earlier code is emitted
    thenBlock = new Block(builder.getUniqueId(), *function);
    mergeBlock = new Block(builder.getUniqueId(), *function);

    // Save the current block, so that we can add in the flow control split when
    // makeEndIf is called.
    headerBlock = builder.getBuildPoint();

    function->addBlock(thenBlock);
    builder.setBuildPoint(thenBlock);
}

// Comments in header
void Builder::If::makeBeginElse()
{
    // Close out the "then" by having it jump to the mergeBlock
    builder.createBranch(mergeBlock);

    // Make the first else block and add it to the function
    elseBlock = new Block(builder.getUniqueId(), *function);
    function->addBlock(elseBlock);

    // Start building the else block
    builder.setBuildPoint(elseBlock);
}

// Comments in header
void Builder::If::makeEndIf()
{
    // jump to the merge block
    builder.createBranch(mergeBlock);

    // Go back to the headerBlock and make the flow control split
    builder.setBuildPoint(headerBlock);
    builder.createSelectionMerge(mergeBlock, control);
    if (elseBlock)
        builder.createConditionalBranch(condition, thenBlock, elseBlock);
    else
        builder.createConditionalBranch(condition, thenBlock, mergeBlock);

    // add the merge block to the function
    function->addBlock(mergeBlock);
    builder.setBuildPoint(mergeBlock);
}

// Comments in header
void Builder::makeSwitch(Id selector, unsigned int control, int numSegments, const dxil_spv::Vector<int>& caseValues,
                         const dxil_spv::Vector<int>& valueIndexToSegment, int defaultSegment,
                         dxil_spv::Vector<Block*>& segmentBlocks)
{
    Function& function = buildPoint->getParent();

    // make all the blocks
    for (int s = 0; s < numSegments; ++s)
        segmentBlocks.push_back(new Block(getUniqueId(), function));

    Block* mergeBlock = new Block(getUniqueId(), function);

    // make and insert the switch's selection-merge instruction
    createSelectionMerge(mergeBlock, control);

    // make the switch instruction
    Instruction* switchInst = new Instruction(NoResult, NoType, OpSwitch);
    switchInst->addIdOperand(selector);
    auto defaultOrMerge = (defaultSegment >= 0) ? segmentBlocks[defaultSegment] : mergeBlock;
    switchInst->addIdOperand(defaultOrMerge->getId());
    defaultOrMerge->addPredecessor(buildPoint);
    for (int i = 0; i < (int)caseValues.size(); ++i) {
        switchInst->addImmediateOperand(caseValues[i]);
        switchInst->addIdOperand(segmentBlocks[valueIndexToSegment[i]]->getId());
        segmentBlocks[valueIndexToSegment[i]]->addPredecessor(buildPoint);
    }
    buildPoint->addInstruction(std::unique_ptr<Instruction>(switchInst));

    // push the merge block
    switchMerges.push(mergeBlock);
}

// Comments in header
void Builder::addSwitchBreak()
{
    // branch to the top of the merge block stack
    createBranch(switchMerges.top());
    createAndSetNoPredecessorBlock("post-switch-break");
}

// Comments in header
void Builder::nextSwitchSegment(dxil_spv::Vector<Block*>& segmentBlock, int nextSegment)
{
    int lastSegment = nextSegment - 1;
    if (lastSegment >= 0) {
        // Close out previous segment by jumping, if necessary, to next segment
        if (! buildPoint->isTerminated())
            createBranch(segmentBlock[nextSegment]);
    }
    Block* block = segmentBlock[nextSegment];
    block->getParent().addBlock(block);
    setBuildPoint(block);
}

// Comments in header
void Builder::endSwitch(dxil_spv::Vector<Block*>& /*segmentBlock*/)
{
    // Close out previous segment by jumping, if necessary, to next segment
    if (! buildPoint->isTerminated())
        addSwitchBreak();

    switchMerges.top()->getParent().addBlock(switchMerges.top());
    setBuildPoint(switchMerges.top());

    switchMerges.pop();
}

Block& Builder::makeNewBlock()
{
    Function& function = buildPoint->getParent();
    auto block = new Block(getUniqueId(), function);
    function.addBlock(block);
    return *block;
}

Builder::LoopBlocks& Builder::makeNewLoop()
{
    // This verbosity is needed to simultaneously get the same behavior
    // everywhere (id's in the same order), have a syntax that works
    // across lots of versions of C++, have no warnings from pedantic
    // compilation modes, and leave the rest of the code alone.
    Block& head            = makeNewBlock();
    Block& body            = makeNewBlock();
    Block& merge           = makeNewBlock();
    Block& continue_target = makeNewBlock();
    LoopBlocks blocks(head, body, merge, continue_target);
    loops.push(blocks);
    return loops.top();
}

void Builder::createLoopContinue()
{
    createBranch(&loops.top().continue_target);
    // Set up a block for dead code.
    createAndSetNoPredecessorBlock("post-loop-continue");
}

void Builder::createLoopExit()
{
    createBranch(&loops.top().merge);
    // Set up a block for dead code.
    createAndSetNoPredecessorBlock("post-loop-break");
}

void Builder::closeLoop()
{
    loops.pop();
}

void Builder::clearAccessChain()
{
    accessChain.base = NoResult;
    accessChain.indexChain.clear();
    accessChain.instr = NoResult;
    accessChain.swizzle.clear();
    accessChain.component = NoResult;
    accessChain.preSwizzleBaseType = NoType;
    accessChain.isRValue = false;
}

// Comments in header
void Builder::accessChainPushSwizzle(dxil_spv::Vector<unsigned>& swizzle, Id preSwizzleBaseType)
{
    // swizzles can be stacked in GLSL, but simplified to a single
    // one here; the base type doesn't change
    if (accessChain.preSwizzleBaseType == NoType)
        accessChain.preSwizzleBaseType = preSwizzleBaseType;

    // if needed, propagate the swizzle for the current access chain
    if (accessChain.swizzle.size()) {
        dxil_spv::Vector<unsigned> oldSwizzle = accessChain.swizzle;
        accessChain.swizzle.resize(0);
        for (unsigned int i = 0; i < swizzle.size(); ++i) {
            assert(swizzle[i] < oldSwizzle.size());
            accessChain.swizzle.push_back(oldSwizzle[swizzle[i]]);
        }
    } else
        accessChain.swizzle = swizzle;

    // determine if we need to track this swizzle anymore
    simplifyAccessChainSwizzle();
}

// Comments in header
void Builder::accessChainStore(Id rvalue)
{
    assert(accessChain.isRValue == false);

    transferAccessChainSwizzle(true);
    Id base = collapseAccessChain();

    // If swizzle still exists, it is out-of-order or not full, we must load the target vector,
    // extract and insert elements to perform writeMask and/or swizzle.
    Id source = NoResult;
    if (accessChain.swizzle.size()) {
        Id tempBaseId = createLoad(base);
        source = createLvalueSwizzle(getTypeId(tempBaseId), tempBaseId, rvalue, accessChain.swizzle);
    }

    // dynamic component selection
    if (accessChain.component != NoResult) {
        Id tempBaseId = (source == NoResult) ? createLoad(base) : source;
        source = createVectorInsertDynamic(tempBaseId, getTypeId(tempBaseId), rvalue, accessChain.component);
    }

    if (source == NoResult)
        source = rvalue;

    createStore(source, base);
}

// Comments in header
Id Builder::accessChainLoad(Decoration precision, Id resultType)
{
    Id id;

    if (accessChain.isRValue) {
        // transfer access chain, but keep it static, so we can stay in registers
        transferAccessChainSwizzle(false);
        if (accessChain.indexChain.size() > 0) {
            Id swizzleBase = accessChain.preSwizzleBaseType != NoType ? accessChain.preSwizzleBaseType : resultType;

            // if all the accesses are constants, we can use OpCompositeExtract
            dxil_spv::Vector<unsigned> indexes;
            bool constant = true;
            for (int i = 0; i < (int)accessChain.indexChain.size(); ++i) {
                if (isConstantScalar(accessChain.indexChain[i]))
                    indexes.push_back(getConstantScalar(accessChain.indexChain[i]));
                else {
                    constant = false;
                    break;
                }
            }

            if (constant)
                id = createCompositeExtract(accessChain.base, swizzleBase, indexes);
            else {
                // make a new function variable for this r-value
                Id lValue = createVariable(StorageClassFunction, getTypeId(accessChain.base), "indexable");

                // store into it
                createStore(accessChain.base, lValue);

                // move base to the new variable
                accessChain.base = lValue;
                accessChain.isRValue = false;

                // load through the access chain
                id = createLoad(collapseAccessChain());
            }
            setPrecision(id, precision);
        } else
            id = accessChain.base;  // no precision, it was set when this was defined
    } else {
        transferAccessChainSwizzle(true);
        // load through the access chain
        id = createLoad(collapseAccessChain());
        setPrecision(id, precision);
    }

    // Done, unless there are swizzles to do
    if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult)
        return id;

    // Do remaining swizzling
    // First, static swizzling
    if (accessChain.swizzle.size()) {
        // static swizzle
        Id swizzledType = getScalarTypeId(getTypeId(id));
        if (accessChain.swizzle.size() > 1)
            swizzledType = makeVectorType(swizzledType, (int)accessChain.swizzle.size());
        id = createRvalueSwizzle(precision, swizzledType, id, accessChain.swizzle);
    }

    // dynamic single-component selection
    if (accessChain.component != NoResult)
        id = setPrecision(createVectorExtractDynamic(id, resultType, accessChain.component), precision);

    return id;
}

Id Builder::accessChainGetLValue()
{
    assert(accessChain.isRValue == false);

    transferAccessChainSwizzle(true);
    Id lvalue = collapseAccessChain();

    // If swizzle exists, it is out-of-order or not full, we must load the target vector,
    // extract and insert elements to perform writeMask and/or swizzle.  This does not
    // go with getting a direct l-value pointer.
    assert(accessChain.swizzle.size() == 0);
    assert(accessChain.component == NoResult);

    return lvalue;
}

// comment in header
Id Builder::accessChainGetInferredType()
{
    // anything to operate on?
    if (accessChain.base == NoResult)
        return NoType;
    Id type = getTypeId(accessChain.base);

    // do initial dereference
    if (! accessChain.isRValue)
        type = getContainedTypeId(type);

    // dereference each index
    for (auto it = accessChain.indexChain.cbegin(); it != accessChain.indexChain.cend(); ++it) {
        if (isStructType(type))
            type = getContainedTypeId(type, getConstantScalar(*it));
        else
            type = getContainedTypeId(type);
    }

    // dereference swizzle
    if (accessChain.swizzle.size() == 1)
        type = getContainedTypeId(type);
    else if (accessChain.swizzle.size() > 1)
        type = makeVectorType(getContainedTypeId(type), (int)accessChain.swizzle.size());

    // dereference component selection
    if (accessChain.component)
        type = getContainedTypeId(type);

    return type;
}

// comment in header
void Builder::eliminateDeadDecorations() {
    dxil_spv::UnorderedSet<const Block*> reachable_blocks;
    dxil_spv::UnorderedSet<Id> unreachable_definitions;
    // Collect IDs defined in unreachable blocks. For each function, label the
    // reachable blocks first. Then for each unreachable block, collect the
    // result IDs of the instructions in it.
    for (dxil_spv::Vector<Function*>::const_iterator fi = module.getFunctions().cbegin();
        fi != module.getFunctions().cend(); fi++) {
        Function* f = *fi;
        Block* entry = f->getEntryBlock();
        inReadableOrder(entry, [&reachable_blocks](const Block* b) {
            reachable_blocks.insert(b);
        });
        for (dxil_spv::Vector<Block*>::const_iterator bi = f->getBlocks().cbegin();
            bi != f->getBlocks().cend(); bi++) {
            Block* b = *bi;
            if (!reachable_blocks.count(b)) {
                for (dxil_spv::Vector<std::unique_ptr<Instruction> >::const_iterator
                         ii = b->getInstructions().cbegin();
                    ii != b->getInstructions().cend(); ii++) {
                    Instruction* i = ii->get();
                    unreachable_definitions.insert(i->getResultId());
                }
            }
        }
    }
    decorations.erase(std::remove_if(decorations.begin(), decorations.end(),
        [&unreachable_definitions](std::unique_ptr<Instruction>& I) -> bool {
            Instruction* inst = I.get();
            Id decoration_id = inst->getIdOperand(0);
            return unreachable_definitions.count(decoration_id) != 0;
        }),
        decorations.end());
}

void Builder::dump(dxil_spv::Vector<unsigned int>& out) const
{
    // Header, before first instructions:
    out.push_back(MagicNumber);
    out.push_back(Version);
    out.push_back(builderNumber);
    out.push_back(uniqueId + 1);
    out.push_back(0);

    // Capabilities
    for (auto it = capabilities.cbegin(); it != capabilities.cend(); ++it) {
        Instruction capInst(0, 0, OpCapability);
        capInst.addImmediateOperand(*it);
        capInst.dump(out);
    }

    for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) {
        Instruction extInst(0, 0, OpExtension);
        extInst.addStringOperand(it->c_str());
        extInst.dump(out);
    }

    dumpInstructions(out, imports);
    Instruction memInst(0, 0, OpMemoryModel);
    memInst.addImmediateOperand(addressModel);
    memInst.addImmediateOperand(memoryModel);
    memInst.dump(out);

    // Instructions saved up while building:
    dumpInstructions(out, entryPoints);
    dumpInstructions(out, executionModes);

    // Debug instructions
    dumpInstructions(out, strings);
    dumpModuleProcesses(out);
    dumpSourceInstructions(out);
    for (int e = 0; e < (int)sourceExtensions.size(); ++e) {
        Instruction sourceExtInst(0, 0, OpSourceExtension);
        sourceExtInst.addStringOperand(sourceExtensions[e]);
        sourceExtInst.dump(out);
    }
    dumpInstructions(out, names);
    dumpInstructions(out, lines);

    // Annotation instructions
    dumpInstructions(out, decorations);

    dumpInstructions(out, constantsTypesGlobals);
    dumpInstructions(out, externals);

    // The functions
    module.dump(out);
}

//
// Protected methods.
//

// Turn the described access chain in 'accessChain' into an instruction
// computing its address.  This *cannot* include complex swizzles, which must
// be handled after this is called, but it does include swizzles that select
// an individual element, as a single address of a scalar type can be
// computed by an OpAccessChain instruction.
Id Builder::collapseAccessChain()
{
    assert(accessChain.isRValue == false);

    if (accessChain.indexChain.size() > 0) {
        if (accessChain.instr == 0) {
            StorageClass storageClass = (StorageClass)module.getStorageClass(getTypeId(accessChain.base));
            accessChain.instr = createAccessChain(storageClass, accessChain.base, accessChain.indexChain);
        }

        return accessChain.instr;
    } else
        return accessChain.base;

    // note that non-trivial swizzling is left pending...
}

// clear out swizzle if it is redundant, that is reselecting the same components
// that would be present without the swizzle.
void Builder::simplifyAccessChainSwizzle()
{
    // If the swizzle has fewer components than the vector, it is subsetting, and must stay
    // to preserve that fact.
    if (getNumTypeComponents(accessChain.preSwizzleBaseType) > (int)accessChain.swizzle.size())
        return;

    // if components are out of order, it is a swizzle
    for (unsigned int i = 0; i < accessChain.swizzle.size(); ++i) {
        if (i != accessChain.swizzle[i])
            return;
    }

    // otherwise, there is no need to track this swizzle
    accessChain.swizzle.clear();
    if (accessChain.component == NoResult)
        accessChain.preSwizzleBaseType = NoType;
}

// To the extent any swizzling can become part of the chain
// of accesses instead of a post operation, make it so.
// If 'dynamic' is true, include transferring a non-static component index,
// otherwise, only transfer static indexes.
//
// Also, Boolean vectors are likely to be special.  While
// for external storage, they should only be integer types,
// function-local bool vectors could use sub-word indexing,
// so keep that as a separate Insert/Extract on a loaded vector.
void Builder::transferAccessChainSwizzle(bool dynamic)
{
    // too complex?
    if (accessChain.swizzle.size() > 1)
        return;

    // non existent?
    if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult)
        return;

    // single component...

    // skip doing it for Boolean vectors
    if (isBoolType(getContainedTypeId(accessChain.preSwizzleBaseType)))
        return;

    if (accessChain.swizzle.size() == 1) {
        // handle static component
        accessChain.indexChain.push_back(makeUintConstant(accessChain.swizzle.front()));
        accessChain.swizzle.clear();
        // note, the only valid remaining dynamic access would be to this one
        // component, so don't bother even looking at accessChain.component
        accessChain.preSwizzleBaseType = NoType;
        accessChain.component = NoResult;
    } else if (dynamic && accessChain.component != NoResult) {
        // handle dynamic component
        accessChain.indexChain.push_back(accessChain.component);
        accessChain.preSwizzleBaseType = NoType;
        accessChain.component = NoResult;
    }
}

// Utility method for creating a new block and setting the insert point to
// be in it. This is useful for flow-control operations that need a "dummy"
// block proceeding them (e.g. instructions after a discard, etc).
void Builder::createAndSetNoPredecessorBlock(const char* /*name*/)
{
    Block* block = new Block(getUniqueId(), buildPoint->getParent());
    block->setUnreachable();
    buildPoint->getParent().addBlock(block);
    setBuildPoint(block);

    // if (name)
    //    addName(block->getId(), name);
}

// Comments in header
void Builder::createBranch(Block* block)
{
    Instruction* branch = new Instruction(OpBranch);
    branch->addIdOperand(block->getId());
    buildPoint->addInstruction(std::unique_ptr<Instruction>(branch));
    block->addPredecessor(buildPoint);
}

void Builder::createSelectionMerge(Block* mergeBlock, unsigned int control)
{
    Instruction* merge = new Instruction(OpSelectionMerge);
    merge->addIdOperand(mergeBlock->getId());
    merge->addImmediateOperand(control);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(merge));
}

void Builder::createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control)
{
    Instruction* merge = new Instruction(OpLoopMerge);
    merge->addIdOperand(mergeBlock->getId());
    merge->addIdOperand(continueBlock->getId());
    merge->addImmediateOperand(control);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(merge));
}

void Builder::createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock)
{
    Instruction* branch = new Instruction(OpBranchConditional);
    branch->addIdOperand(condition);
    branch->addIdOperand(thenBlock->getId());
    branch->addIdOperand(elseBlock->getId());
    buildPoint->addInstruction(std::unique_ptr<Instruction>(branch));
    thenBlock->addPredecessor(buildPoint);
    elseBlock->addPredecessor(buildPoint);
}

void Builder::createUnreachable()
{
    Instruction* merge = new Instruction(OpUnreachable);
    buildPoint->addInstruction(std::unique_ptr<Instruction>(merge));
}

// OpSource
// [OpSourceContinued]
// ...
void Builder::dumpSourceInstructions(dxil_spv::Vector<unsigned int>& out) const
{
    const int maxWordCount = 0xFFFF;
    const int opSourceWordCount = 4;
    const int nonNullBytesPerInstruction = 4 * (maxWordCount - opSourceWordCount) - 1;

    if (source != SourceLanguageUnknown) {
        // OpSource Language Version File Source
        Instruction sourceInst(NoResult, NoType, OpSource);
        sourceInst.addImmediateOperand(source);
        sourceInst.addImmediateOperand(sourceVersion);
        // File operand
        if (sourceFileStringId != NoResult) {
            sourceInst.addIdOperand(sourceFileStringId);
            // Source operand
            if (sourceText.size() > 0) {
                int nextByte = 0;
                dxil_spv::String subString;
                while ((int)sourceText.size() - nextByte > 0) {
                    subString = sourceText.substr(nextByte, nonNullBytesPerInstruction);
                    if (nextByte == 0) {
                        // OpSource
                        sourceInst.addStringOperand(subString.c_str());
                        sourceInst.dump(out);
                    } else {
                        // OpSourcContinued
                        Instruction sourceContinuedInst(OpSourceContinued);
                        sourceContinuedInst.addStringOperand(subString.c_str());
                        sourceContinuedInst.dump(out);
                    }
                    nextByte += nonNullBytesPerInstruction;
                }
            } else
                sourceInst.dump(out);
        } else
            sourceInst.dump(out);
    }
}

void Builder::dumpInstructions(dxil_spv::Vector<unsigned int>& out, const dxil_spv::Vector<std::unique_ptr<Instruction> >& instructions) const
{
    for (int i = 0; i < (int)instructions.size(); ++i) {
        instructions[i]->dump(out);
    }
}

void Builder::dumpModuleProcesses(dxil_spv::Vector<unsigned int>& out) const
{
    for (int i = 0; i < (int)moduleProcesses.size(); ++i) {
        // TODO: switch this out for the 1.1 headers
        const spv::Op OpModuleProcessed = (spv::Op)330;
        Instruction moduleProcessed(OpModuleProcessed);
        moduleProcessed.addStringOperand(moduleProcesses[i]);
        moduleProcessed.dump(out);
    }
}

}; // end spv namespace


================================================
FILE: third_party/glslang-spirv/SpvBuilder.h
================================================
//
// Copyright (C) 2014-2015 LunarG, Inc.
// Copyright (C) 2015-2016 Google, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//    Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
//    Redistributions in binary form must reproduce the above
//    copyright notice, this list of conditions and the following
//    disclaimer in the documentation and/or other materials provided
//    with the distribution.
//
//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

//
// "Builder" is an interface to fully build SPIR-V IR.   Allocate one of
// these to build (a thread safe) internal SPIR-V representation (IR),
// and then dump it as a binary stream according to the SPIR-V specification.
//
// A Builder has a 1:1 relationship with a SPIR-V module.
//

#pragma once
#ifndef SpvBuilder_H
#define SpvBuilder_H

#include "Logger.h"
#include "spirv.hpp"
#include "spvIR.h"

#include <algorithm>
#include <memory>
#include <sstream>
#include <stack>

#include "thread_local_allocator.hpp"

namespace spv {

class Builder {
public:
    Builder(unsigned int userNumber, SpvBuildLogger* logger);
    virtual ~Builder();

    static const int maxMatrixSize = 4;

    void setSource(spv::SourceLanguage lang, int version)
    {
        source = lang;
        sourceVersion = version;
    }
    void setSourceFile(const dxil_spv::String& file)
    {
        Instruction* fileString = new Instruction(getUniqueId(), NoType, OpString);
        fileString->addStringOperand(file.c_str());
        sourceFileStringId = fileString->getResultId();
        strings.push_back(std::unique_ptr<Instruction>(fileString));
    }
    void setSourceText(const dxil_spv::String& text) { sourceText = text; }
    void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); }
    void addModuleProcessed(const dxil_spv::String& p) { moduleProcesses.push_back(p.c_str()); }
    void setEmitOpLines() { emitOpLines = true; }
    void addExtension(const char* ext) { extensions.insert(ext); }
    Id import(const char*);
    void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem)
    {
        addressModel = addr;
        memoryModel = mem;
    }

    void addCapability(spv::Capability cap) { capabilities.insert(cap); }
    bool hasCapability(spv::Capability cap) const
    {
        return capabilities.count(cap) != 0;
    }

    // To get a new <id> for anything needing a new one.
    Id getUniqueId() { return ++uniqueId; }

    // To get a set of new <id>s, e.g., for a set of function parameters
    Id getUniqueIds(int numIds)
    {
        Id id = uniqueId + 1;
        uniqueId += numIds;
        return id;
    }

    spv::Instruction *addInstruction(spv::Id typeId, spv::Op op);
    spv::Instruction *addInstruction(spv::Op op);

    // Log the current line, and if different than the last one,
    // issue a new OpLine, using the current file name.
    void setLine(int line);
    // Low-level OpLine. See setLine() for a layered helper.
    void addLine(Id fileName, int line, int column);
    void addExternal(std::unique_ptr<spv::Instruction> inst);
    spv::Id addString(const dxil_spv::String &str);

    // For creating new types (will return old type if the requested one was already made).
    Id makeVoidType();
    Id makeBoolType();
    Id makePointer(StorageClass, Id type);
    Id makeIntegerType(int width, bool hasSign);   // generic
    Id makeIntType(int width) { return makeIntegerType(width, true); }
    Id makeUintType(int width) { return makeIntegerType(width, false); }
    Id makeFloatType(int width, int encoding = -1);
    Id makeCooperativeMatrixType(spv::Id scalar_type, spv::Id rows, spv::Id cols, spv::Id use);
    Id makeStructType(const dxil_spv::Vector<Id>& members, const char*);
    Id makeStructResultType(Id type0, Id type1);
    Id makeVectorType(Id component, int size);
    Id makeMatrixType(Id component, int cols, int rows);
    Id makeArrayType(Id element, Id sizeId, int stride);  // 0 stride means no stride decoration
    Id makeRuntimeArray(Id element);
    Id makeFunctionType(Id returnType, const dxil_spv::Vector<Id>& paramTypes);
    Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format);
    Id makeAccelerationStructureType();
    Id makeRayQueryType();
    Id makeHitObjectNVType();
    Id makeSamplerType();
    Id makeSampledImageType(Id imageType);

    // For querying about types.
    spv::Instruction *getInstruction(Id id) { return module.getInstruction(id); }
    Id getTypeId(Id resultId) const { return module.getTypeId(resultId); }
    Id getDerefTypeId(Id resultId) const;
    Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); }
    Op getTypeClass(Id typeId) const { return getOpCode(typeId); }
    Op getMostBasicTypeClass(Id typeId) const;
    int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); }
    int getNumTypeConstituents(Id typeId) const;
    int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); }
    Id getScalarTypeId(Id typeId) const;
    Id getContainedTypeId(Id typeId) const;
    Id getContainedTypeId(Id typeId, int) const;
    StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); }
    ImageFormat getImageTypeFormat(Id typeId) const { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); }

    bool isPointer(Id resultId)      const { return isPointerType(getTypeId(resultId)); }
    bool isScalar(Id resultId)       const { return isScalarType(getTypeId(resultId)); }
    bool isVector(Id resultId)       const { return isVectorType(getTypeId(resultId)); }
    bool isMatrix(Id resultId)       const { return isMatrixType(getTypeId(resultId)); }
    bool isAggregate(Id resultId)    const { return isAggregateType(getTypeId(resultId)); }
    bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); }

    bool isBoolType(Id typeId)         const { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); }
    bool isIntType(Id typeId)          const { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; }
    bool isUintType(Id typeId)         const { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; }
    bool isFloatType(Id typeId)        const { return getTypeClass(typeId) == OpTypeFloat; }
    bool isPointerType(Id typeId)      const { return getTypeClass(typeId) == OpTypePointer; }
    bool isScalarType(Id typeId)       const { return getTypeClass(typeId) == OpTypeFloat  || getTypeClass(typeId) == OpTypeInt || getTypeClass(typeId) == OpTypeBool; }
    bool isVectorType(Id typeId)       const { return getTypeClass(typeId) == OpTypeVector; }
    bool isMatrixType(Id typeId)       const { return getTypeClass(typeId) == OpTypeMatrix; }
    bool isStructType(Id typeId)       const { return getTypeClass(typeId) == OpTypeStruct; }
    bool isArrayType(Id typeId)        const {
        auto typeClass = getTypeClass(typeId);
        return typeClass == OpTypeArray || typeClass == OpTypeRuntimeArray;
    }
    bool isAggregateType(Id typeId)    const { return isArrayType(typeId) || isStructType(typeId); }
    bool isImageType(Id typeId)        const { return getTypeClass(typeId) == OpTypeImage; }
    bool isSamplerType(Id typeId)      const { return getTypeClass(typeId) == OpTypeSampler; }
    bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; }

    bool isConstantOpCode(Op opcode) const;
    bool isSpecConstantOpCode(Op opcode) const;
    bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); }
    bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; }
    bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); }
    unsigned int getConstantScalar(Id resultId) const { return module.getInstruction(resultId)->getImmediateOperand(0); }
    StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); }

    int getScalarTypeWidth(Id typeId) const
    {
        Id scalarTypeId = getScalarTypeId(typeId);
        assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat);
        return module.getInstruction(scalarTypeId)->getImmediateOperand(0);
    }

    int getTypeNumColumns(Id typeId) const
    {
        assert(isMatrixType(typeId));
        return getNumTypeConstituents(typeId);
    }
    int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); }
    int getTypeNumRows(Id typeId) const
    {
        assert(isMatrixType(typeId));
        return getNumTypeComponents(getContainedTypeId(typeId));
    }
    int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); }

    Dim getTypeDimensionality(Id typeId) const
    {
        assert(isImageType(typeId));
        return (Dim)module.getInstruction(typeId)->getImmediateOperand(1);
    }
    Id getImageType(Id resultId) const
    {
        Id typeId = getTypeId(resultId);
        assert(isImageType(typeId) || isSampledImageType(typeId));
        return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId;
    }
    bool isArrayedImageType(Id typeId) const
    {
        assert(isImageType(typeId));
        return module.getInstruction(typeId)->getImmediateOperand(3) != 0;
    }
    bool isMultisampledImageType(Id typeId) const
    {
        assert(isImageType(typeId));
        return module.getInstruction(typeId)->getImmediateOperand(4) != 0;
    }
    bool isStorageImageType(Id typeId) const
    {
        assert(isImageType(typeId));
        return module.getInstruction(typeId)->getImmediateOperand(5) == 2;
    }
    Id getImageComponentType(Id typeId) const
    {
        assert(isImageType(typeId));
        return module.getInstruction(typeId)->getImmediateOperand(0);
    }

    // For making new constants (will return old constant if the requested one was already made).
    Id makeBoolConstant(bool b, bool specConstant = false);
    Id makeIntConstant(int i, bool specConstant = false)         { return makeIntConstant(makeIntType(32),  (unsigned)i, specConstant); }
    Id makeUintConstant(unsigned u, bool specConstant = false)   { return makeIntConstant(makeUintType(32),           u, specConstant); }
    Id makeInt64Constant(long long i, bool specConstant = false)            { return makeInt64Constant(makeIntType(64),  (unsigned long long)i, specConstant); }
    Id makeUint64Constant(unsigned long long u, bool specConstant = false)  { return makeInt64Constant(makeUintType(64),                     u, specConstant); }
#ifdef AMD_EXTENSIONS
    Id makeInt16Constant(short i, bool specConstant = false)        { return makeIntConstant(makeIntType(16),      (unsigned)i, specConstant); }
    Id makeUint16Constant(unsigned short u, bool specConstant = false)  { return makeIntConstant(makeUintType(16), (unsigned)u, specConstant); }
    Id makeUint8Constant(unsigned char u, bool specConstant = false)  { return makeIntConstant(makeUintType(8), (unsigned)u, specConstant); }
    Id makeInt8Constant(signed char u, bool specConstant = false)  { return makeIntConstant(makeIntType(8), (unsigned)u, specConstant); }
#endif
    Id makeFloatConstant(float f, bool specConstant = false);
    Id makeDoubleConstant(double d, bool specConstant = false);
#ifdef AMD_EXTENSIONS
    Id makeFloat16Constant(uint16_t f16, bool specConstant = false);
    Id makeFloat8Constant(uint8_t f8, int encoding, bool specConstant = false);
#endif
    Id makeNullConstant(Id type);

    // Turn the array of constants into a proper spv constant of the requested type.
    Id makeCompositeConstant(Id type, const dxil_spv::Vector<Id>& comps, bool specConst = false);

    // Methods for adding information outside the CFG.
    Instruction* addEntryPoint(ExecutionModel, Function*, const char* name);
    void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1);
    void addExecutionModeId(Function*, ExecutionMode mode, spv::Id id1 = 0, spv::Id id2 = 0, spv::Id id3 = 0);
    void addName(Id, const char* name);
    void addMemberName(Id, int member, const char* name);
    void addDecoration(Id, Decoration, int num = -1);
    void removeDecorations(const dxil_spv::UnorderedSet<spv::Id> &ids);
    bool hasDecoration(Id, Decoration) const;
    void addUniqueDecoration(Id, Decoration, int num = -1);
    void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1);

    // At the end of what block do the next create*() instructions go?
    void setBuildPoint(Block* bp) { buildPoint = bp; }
    Block* getBuildPoint() const { return buildPoint; }

    // Make the entry-point function. The returned pointer is only valid
    // for the lifetime of this builder.
    Function* makeEntryPoint(const char*);

    // Make a shader-style function, and create its entry block if entry is non-zero.
    // Return the function, pass back the entry.
    // The returned pointer is only valid for the lifetime of this builder.
    Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, const dxil_spv::Vector<Id>& paramTypes,
                                const dxil_spv::Vector<dxil_spv::Vector<Decoration>>& precisions, Block **entry = 0);

    // Create a return. An 'implicit' return is one not appearing in the source
    // code.  In the case of an implicit return, no post-return block is inserted.
    void makeReturn(bool implicit, Id retVal = 0);

    // Generate all the code needed to finish up a function.
    void leaveFunction();

    // Create a discard.
    void makeDiscard();

    // Create a global or function local or IO variable.
    Id createVariable(StorageClass, Id type, const char* name = 0);
    Id createVariableWithInitializer(StorageClass, Id type, Id initializer, const char* name = 0);

    // Create an intermediate with an undefined value.
    Id createUndefined(Id type);
    // Create at global scope.
    Id createUndefinedConstant(Id type);

    // Store into an Id and return the l-value
    void createStore(Id rValue, Id lValue);

    // Load from an Id and return it
    Id createLoad(Id lValue);

    // Create an OpAccessChain instruction
    Id createAccessChain(StorageClass, Id base, const dxil_spv::Vector<Id>& offsets);

    // Create an OpArrayLength instruction
    Id createArrayLength(Id base, unsigned int member);

    // Create an OpCompositeExtract instruction
    Id createCompositeExtract(Id composite, Id typeId, unsigned index);
    Id createCompositeExtract(Id composite, Id typeId, const dxil_spv::Vector<unsigned>& indexes);
    Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index);
    Id createCompositeInsert(Id object, Id composite, Id typeId, const dxil_spv::Vector<unsigned>& indexes);

    Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex);
    Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex);

    void createNoResultOp(Op);
    void createNoResultOp(Op, Id operand);
    void createNoResultOp(Op, const dxil_spv::Vector<Id>& operands);
    void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask);
    void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics);
    Id createUnaryOp(Op, Id typeId, Id operand);
    Id createBinOp(Op, Id typeId, Id operand1, Id operand2);
    Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3);
    Id createOp(Op, Id typeId, const dxil_spv::Vector<Id>& operands);
    Id createFunctionCall(spv::Function*, const dxil_spv::Vector<spv::Id>&);
    Id createSpecConstantOp(Op, Id typeId, const dxil_spv::Vector<spv::Id>& operands, const dxil_spv::Vector<unsigned>& literals);

    // Take an rvalue (source) and a set of channels to extract from it to
    // make a new rvalue, which is returned.
    Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const dxil_spv::Vector<unsigned>& channels);

    // Take a copy of an lvalue (target) and a source of components, and set the
    // source components into the lvalue where the 'channels' say to put them.
    // An updated version of the target is returned.
    // (No true lvalue or stores are used.)
    Id createLvalueSwizzle(Id typeId, Id target, Id source, const dxil_spv::Vector<unsigned>& channels);

    // If both the id and precision are valid, the id
    // gets tagged with the requested precision.
    // The passed in id is always the returned id, to simplify use patterns.
    Id setPrecision(Id id, Decoration precision)
    {
        if (precision != NoPrecision && id != NoResult)
            addDecoration(id, precision);

        return id;
    }

    // Can smear a scalar to a vector for the following forms:
    //   - promoteScalar(scalar, vector)  // smear scalar to width of vector
    //   - promoteScalar(vector, scalar)  // smear scalar to width of vector
    //   - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to
    //   - promoteScalar(scalar, scalar)  // do nothing
    // Other forms are not allowed.
    //
    // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'.
    // The type of the created vector is a vector of components of the same type as the scalar.
    //
    // Note: One of the arguments will change, with the result coming back that way rather than 
    // through the return value.
    void promoteScalar(Decoration precision, Id& left, Id& right);

    // Make a value by smearing the scalar to fill the type.
    // vectorType should be the correct type for making a vector of scalarVal.
    // (No conversions are done.)
    Id smearScalar(Decoration precision, Id scalarVal, Id vectorType);

    // Create a call to a built-in function.
    Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const dxil_spv::Vector<Id>& args);

    // List of parameters used to create a texture operation
    struct TextureParameters {
        Id sampler;
        Id coords;
        Id bias;
        Id lod;
        Id Dref;
        Id offset;
        Id offsets;
        Id gradX;
        Id gradY;
        Id sample;
        Id component;
        Id texelOut;
        Id lodClamp;
    };

    // Select the correct texture operation based on all inputs, and emit the correct instruction
    Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicit, const TextureParameters&);

    // Emit the OpTextureQuery* instruction that was passed in.
    // Figure out the right return value and type, and return it.
    Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult);

    Id createSamplePositionCall(Decoration precision, Id, Id);

    Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned);
    Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id);

    // Reduction comparison for composites:  For equal and not-equal resulting in a scalar.
    Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */);

    // OpCompositeConstruct
    Id createCompositeConstruct(Id typeId, const dxil_spv::Vector<Id>& constituents);

    // vector or scalar constructor
    Id createConstructor(Decoration precision, const dxil_spv::Vector<Id>& sources, Id resultTypeId);

    // matrix constructor
    Id createMatrixConstructor(Decoration precision, const dxil_spv::Vector<Id>& sources, Id constructee);

    // Helper to use for building nested control flow with if-then-else.
    class If {
    public:
        If(Id condition, unsigned int ctrl, Builder& builder);
        ~If() {}

        void makeBeginElse();
        void makeEndIf();

    private:
        If(const If&);
        If& operator=(If&);

        Builder& builder;
        Id condition;
        unsigned int control;
        Function* function;
        Block* headerBlock;
        Block* thenBlock;
        Block* elseBlock;
        Block* mergeBlock;
    };

    // Make a switch statement.  A switch has 'numSegments' of pieces of code, not containing
    // any case/default labels, all separated by one or more case/default labels.  Each possible
    // case value v is a jump to the caseValues[v] segment.  The defaultSegment is also in this
    // number space.  How to compute the value is given by 'condition', as in switch(condition).
    //
    // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches.
    //
    // Use a defaultSegment < 0 if there is no default segment (to branch to post switch).
    //
    // Returns the right set of basic blocks to start each code segment with, so that the caller's
    // recursion stack can hold the memory for it.
    //
    void makeSwitch(Id condition, unsigned int control, int numSegments, const dxil_spv::Vector<int>& caseValues,
                    const dxil_spv::Vector<int>& valueToSegment, int defaultSegment, dxil_spv::Vector<Block*>& segmentBB); // return argument

    // Add a branch to the innermost switch's merge block.
    void addSwitchBreak();

    // Move to the next code segment, passing in the return argument in makeSwitch()
    void nextSwitchSegment(dxil_spv::Vector<Block*>& segmentBB, int segment);

    // Finish off the innermost switch.
    void endSwitch(dxil_spv::Vector<Block*>& segmentBB);

    struct LoopBlocks {
        LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) :
            head(head), body(body), merge(merge), continue_target(continue_target) { }
        Block &head, &body, &merge, &continue_target;
        LoopBlocks& operator=(const LoopBlocks&) = delete;
        LoopBlocks(const LoopBlocks&) = default;
    private:
        LoopBlocks();
    };

    // Start a new loop and prepare the builder to generate code for it.  Until
    // closeLoop() is called for this loop, createLoopContinue() and
    // createLoopExit() will target its corresponding blocks.
    LoopBlocks& makeNewLoop();

    // Create a new block in the function containing the build point.  Memory is
    // owned by the function object.
    Block& makeNewBlock();

    // Add a branch to the continue_target of the current (innermost) loop.
    void createLoopContinue();

    // Add an exit (e.g. "break") from the innermost loop that we're currently
    // in.
    void createLoopExit();

    // Close the innermost loop that you're in
    void closeLoop();

    //
    // Access chain design for an R-Value vs. L-Value:
    //
    // There is a single access chain the builder is building at
    // any particular time.  Such a chain can be used to either to a load or
    // a store, when desired.
    //
    // Expressions can be r-values, l-values, or both, or only r-values:
    //    a[b.c].d = ....  // l-value
    //    ... = a[b.c].d;  // r-value, that also looks like an l-value
    //    ++a[b.c].d;      // r-value and l-value
    //    (x + y)[2];      // r-value only, can't possibly be l-value
    //
    // Computing an r-value means generating code.  Hence,
    // r-values should only be computed when they are needed, not speculatively.
    //
    // Computing an l-value means saving away information for later use in the compiler,
    // no code is generated until the l-value is later dereferenced.  It is okay
    // to speculatively generate an l-value, just not okay to speculatively dereference it.
    //
    // The base of the access chain (the left-most variable or expression
    // from which everything is based) can be set either as an l-value
    // or as an r-value.  Most efficient would be to set an l-value if one
    // is available.  If an expression was evaluated, the resulting r-value
    // can be set as the chain base.
    //
    // The users of this single access chain can save and restore if they
    // want to nest or manage multiple chains.
    //

    struct AccessChain {
        Id base;                       // for l-values, pointer to the base object, for r-values, the base object
        dxil_spv::Vector<Id> indexChain;
        Id instr;                      // cache the instruction that generates this access chain
        dxil_spv::Vector<unsigned> swizzle; // each dxil_spv::Vector element selects the next GLSL component number
        Id component;                  // a dynamic component index, can coexist with a swizzle, done after the swizzle, NoResult if not present
        Id preSwizzleBaseType;         // dereferenced type, before swizzle or component is applied; NoType unless a swizzle or component is present
        bool isRValue;                 // true if 'base' is an r-value, otherwise, base is an l-value
    };

    //
    // the SPIR-V builder maintains a single active chain that
    // the following methods operate on
    //

    // for external save and restore
    AccessChain getAccessChain() { return accessChain; }
    void setAccessChain(AccessChain newChain) { accessChain = newChain; }

    // clear accessChain
    void clearAccessChain();

    // set new base as an l-value base
    void setAccessChainLValue(Id lValue)
    {
        assert(isPointer(lValue));
        accessChain.base = lValue;
    }

    // set new base value as an r-value
    void setAccessChainRValue(Id rValue)
    {
        accessChain.isRValue = true;
        accessChain.base = rValue;
    }

    // push offset onto the end of the chain
    void accessChainPush(Id offset)
    {
        accessChain.indexChain.push_back(offset);
    }

    // push new swizzle onto the end of any existing swizzle, merging into a single swizzle
    void accessChainPushSwizzle(dxil_spv::Vector<unsigned>& swizzle, Id preSwizzleBaseType);

    // push a variable component selection onto the access chain; supporting only one, so unsided
    void accessChainPushComponent(Id component, Id preSwizzleBaseType)
    {
        accessChain.component = component;
        if (accessChain.preSwizzleBaseType == NoType)
            accessChain.preSwizzleBaseType = preSwizzleBaseType;
    }

    // use accessChain and swizzle to store value
    void accessChainStore(Id rvalue);

    // use accessChain and swizzle to load an r-value
    Id accessChainLoad(Decoration precision, Id ResultType);

    // get the direct pointer for an l-value
    Id accessChainGetLValue();

    // Get the inferred SPIR-V type of the result of the current access chain,
    // based on the type of the base and the chain of dereferences.
    Id accessChainGetInferredType();

    // Remove OpDecorate instructions whose operands are defined in unreachable
    // blocks.
    void eliminateDeadDecorations();
    void dump(dxil_spv::Vector<unsigned int>&) const;

    void createBranch(Block* block);
    void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock);
    void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control);
    void createUnreachable();

    // Sets to generate opcode for specialization constants.
    void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; }
    // Sets to generate opcode for non-specialization constants (normal mode).
    void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; }
    // Check if the builder is generating code for spec constants.
    bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; }

    void createSelectionMerge(Block* mergeBlock, unsigned int control);

    spv::Scope getAtomicDeviceScope() const
    {
        if (hasCapability(spv::CapabilityVulkanMemoryModel))
            return spv::ScopeQueueFamily;
        else
            return spv::ScopeDevice;
    }

    spv::Id getAtomicDeviceScopeId()
    {
        return makeUintConstant(getAtomicDeviceScope());
    }

    spv::Id getWorkgroupBarrierSemanticsId()
    {
        uint32_t semantics = spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsWorkgroupMemoryMask;
        if (hasCapability(spv::CapabilityVulkanMemoryModel))
            semantics |= spv::MemorySemanticsMakeAvailableMask | spv::MemorySemanticsMakeVisibleMask;
        return makeUintConstant(semantics);
    }

protected:
    Id makeIntConstant(Id typeId, unsigned value, bool specConstant);
    Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant);
    Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const;
    Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const;
    Id findCompositeConstant(Op typeClass, const dxil_spv::Vector<Id>& comps) const;
    Id collapseAccessChain();
    void transferAccessChainSwizzle(bool dynamic);
    void simplifyAccessChainSwizzle();
    void createAndSetNoPredecessorBlock(const char*);
    void dumpSourceInstructions(dxil_spv::Vector<unsigned int>&) const;
    void dumpInstructions(dxil_spv::Vector<unsigned int>&, const dxil_spv::Vector<std::unique_ptr<Instruction> >&) const;
    void dumpModuleProcesses(dxil_spv::Vector<unsigned int>&) const;

    SourceLanguage source;
    int sourceVersion;
    spv::Id sourceFileStringId;
    dxil_spv::String sourceText;
    int currentLine;
    bool emitOpLines;
    dxil_spv::Set<dxil_spv::String> extensions;
    dxil_spv::Vector<const char*> sourceExtensions;
    dxil_spv::Vector<const char*> moduleProcesses;
    AddressingModel addressModel;
    MemoryModel memoryModel;
    dxil_spv::Set<spv::Capability> capabilities;
    int builderNumber;
    Module module;
    Block* buildPoint;
    Id uniqueId;
    Function* entryPointFunction;
    bool generatingOpCodeForSpecConst;
    AccessChain accessChain;

    // special blocks of instructions for output
    dxil_spv::Vector<std::unique_ptr<Instruction> > strings;
    dxil_spv::Vector<std::unique_ptr<Instruction> > imports;
    dxil_spv::Vector<std::unique_ptr<Instruction> > entryPoints;
    dxil_spv::Vector<std::unique_ptr<Instruction> > executionModes;
    dxil_spv::Vector<std::unique_ptr<Instruction> > names;
    dxil_spv::Vector<std::unique_ptr<Instruction> > lines;
    dxil_spv::Vector<std::unique_ptr<Instruction> > decorations;
    dxil_spv::Vector<std::unique_ptr<Instruction> > constantsTypesGlobals;
    dxil_spv::Vector<std::unique_ptr<Instruction> > externals;
    dxil_spv::Vector<std::unique_ptr<Function> > functions;

    // not output, internally used for quick & dirty canonical (unique) creation
    dxil_spv::Vector<Instruction*> groupedConstants[OpConstant];  // all types appear before OpConstant
    dxil_spv::Vector<Instruction*> groupedTypes[OpConstant];
    dxil_spv::Vector<Instruction*> coopmatConstants;  // all types appear before OpConstant
    dxil_spv::Vector<Instruction*> coopmatTypes;
    Instruction *acceleration_structure_type = nullptr;
    Instruction *ray_query_type = nullptr;
    Instruction *hit_object_nv_type = nullptr;

    // stack of switches
    std::stack<Block*> switchMerges;

    // Our loop stack.
    std::stack<LoopBlocks> loops;

    // The stream for outputting warnings and errors.
    SpvBuildLogger* logger;

    dxil_spv::Vector<std::pair<dxil_spv::String, spv::Id>> importIDCache;
};  // end Builder class

};  // end spv namespace

#endif // SpvBuilder_H


================================================
FILE: third_party/glslang-spirv/spvIR.h
================================================
//
// Copyright (C) 2014 LunarG, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//    Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
//    Redistributions in binary form must reproduce the above
//    copyright notice, this list of conditions and the following
//    disclaimer in the documentation and/or other materials provided
//    with the distribution.
//
//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

// SPIRV-IR
//
// Simple in-memory representation (IR) of SPIRV.  Just for holding
// Each function's CFG of blocks.  Has this hierarchy:
//  - Module, which is a list of
//    - Function, which is a list of
//      - Block, which is a list of
//        - Instruction
//

#pragma once
#ifndef spvIR_H
#define spvIR_H

#include "spirv.hpp"
#include "thread_local_allocator.hpp"

#include <algorithm>
#include <cassert>
#include <functional>
#include <iostream>
#include <memory>
#include <utility>

namespace spv {

class Block;
class Function;
class Module;

const Id NoResult = 0;
const Id NoType = 0;

const Decoration NoPrecision = DecorationMax;

#ifdef __GNUC__
#   define POTENTIALLY_UNUSED __attribute__((unused))
#else
#   define POTENTIALLY_UNUSED
#endif

POTENTIALLY_UNUSED
const MemorySemanticsMask MemorySemanticsAllMemory =
                (MemorySemanticsMask)(MemorySemanticsSequentiallyConsistentMask |
                                      MemorySemanticsUniformMemoryMask |
                                      MemorySemanticsSubgroupMemoryMask |
                                      MemorySemanticsWorkgroupMemoryMask |
                                      MemorySemanticsCrossWorkgroupMemoryMask |
                                      MemorySemanticsAtomicCounterMemoryMask |
                                      MemorySemanticsImageMemoryMask);

//
// SPIR-V IR instruction.
//

class Instruction {
public:
    Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { }
    explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { }
    virtual ~Instruction() {}
    void addIdOperand(Id id) { operands.push_back(id); }
    void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); }
    void addStringOperand(const char* str)
    {
        unsigned int word;
        char* wordString = (char*)&word;
        char* wordPtr = wordString;
        int charCount = 0;
        char c;
        do {
            c = *(str++);
            *(wordPtr++) = c;
            ++charCount;
            if (charCount == 4) {
                addImmediateOperand(word);
                wordPtr = wordString;
                charCount = 0;
            }
        } while (c != 0);

        // deal with partial last word
        if (charCount > 0) {
            // pad with 0s
            for (; charCount < 4; ++charCount)
                *(wordPtr++) = 0;
            addImmediateOperand(word);
        }
    }
    void setBlock(Block* b) { block = b; }
    Block* getBlock() const { return block; }
    Op getOpCode() const { return opCode; }
    int getNumOperands() const { return (int)operands.size(); }
    Id getResultId() const { return resultId; }
    void setResultId(Id id) { resultId = id; }
    Id getTypeId() const { return typeId; }
    Id getIdOperand(int op) const { return operands[op]; }
    void setIdOperand(int op, spv::Id id) { operands[op] = id; }
    unsigned int getImmediateOperand(int op) const { return operands[op]; }

    // Write out the binary form.
    void dump(dxil_spv::Vector<unsigned int>& out) const
    {
        // Compute the wordCount
        unsigned int wordCount = 1;
        if (typeId)
            ++wordCount;
        if (resultId)
            ++wordCount;
        wordCount += (unsigned int)operands.size();

        // Write out the beginning of the instruction
        out.push_back(((wordCount) << WordCountShift) | opCode);
        if (typeId)
            out.push_back(typeId);
        if (resultId)
            out.push_back(resultId);

        // Write out the operands
        for (int op = 0; op < (int)operands.size(); ++op)
            out.push_back(operands[op]);
    }

    DXIL_SPV_OVERRIDE_NEW_DELETE

protected:
    Instruction(const Instruction&);
    Id resultId;
    Id typeId;
    Op opCode;
    dxil_spv::Vector<Id> operands;
    Block* block;
};

//
// SPIR-V IR block.
//

class Block {
public:
    Block(Id id, Function& parent);
    virtual ~Block()
    {
    }

    Id getId() { return instructions.front()->getResultId(); }

    Function& getParent() const { return parent; }
    void addInstruction(std::unique_ptr<Instruction> inst);
    void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);}
    void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); }
    const dxil_spv::Vector<Block*>& getPredecessors() const { return predecessors; }
    const dxil_spv::Vector<Block*>& getSuccessors() const { return successors; }
    const dxil_spv::Vector<std::unique_ptr<Instruction> >& getInstructions() const {
        return instructions;
    }
    void setUnreachable() { unreachable = true; }
    bool isUnreachable() const { return unreachable; }
    // Returns the block's merge instruction, if one exists (otherwise null).
    const Instruction* getMergeInstruction() const {
        if (instructions.size() < 2) return nullptr;
        const Instruction* nextToLast = (instructions.cend() - 2)->get();
        switch (nextToLast->getOpCode()) {
            case OpSelectionMerge:
            case OpLoopMerge:
                return nextToLast;
            default:
                return nullptr;
        }
        return nullptr;
    }

    void rewritePhiIncoming(spv::Id from_id, spv::Id to_id);

    bool isTerminated() const
    {
        switch (instructions.back()->getOpCode()) {
        case OpBranch:
        case OpBranchConditional:
        case OpSwitch:
        case OpKill:
        case OpReturn:
        case OpReturnValue:
        case OpUnreachable:
            return true;
        default:
            return false;
        }
    }

    void dump(dxil_spv::Vector<unsigned int>& out) const
    {
        instructions[0]->dump(out);
        for (int i = 0; i < (int)localVariables.size(); ++i)
            localVariables[i]->dump(out);
        for (int i = 1; i < (int)instructions.size(); ++i)
            instructions[i]->dump(out);
    }

    DXIL_SPV_OVERRIDE_NEW_DELETE

protected:
    Block(const Block&);
    Block& operator=(Block&);

    // To enforce keeping parent and ownership in sync:
    friend Function;

    dxil_spv::Vector<std::unique_ptr<Instruction> > instructions;
    dxil_spv::Vector<Block*> predecessors, successors;
    dxil_spv::Vector<std::unique_ptr<Instruction> > localVariables;
    Function& parent;

    // track whether this block is known to be uncreachable (not necessarily
    // true for all unreachable blocks, but should be set at least
    // for the extraneous ones introduced by the builder).
    bool unreachable;
};

// Traverses the control-flow graph rooted at root in an order suited for
// readable code generation.  Invokes callback at every node in the traversal
// order.
void inReadableOrder(Block* root, std::function<void(Block*)> callback);

//
// SPIR-V IR Function.
//

class Function {
public:
    Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent);
    virtual ~Function()
    {
        for (int i = 0; i < (int)parameterInstructions.size(); ++i)
            delete parameterInstructions[i];

        for (int i = 0; i < (int)blocks.size(); ++i)
            delete blocks[i];
    }
    Id getId() const { return functionInstruction.getResultId(); }
    Id getParamId(int p) { return parameterInstructions[p]->getResultId(); }

    void addBlock(Block* block) { blocks.push_back(block); }
    void removeBlock(Block* block)
    {
        auto found = find(blocks.begin(), blocks.end(), block);
        assert(found != blocks.end());
        blocks.erase(found);
        delete block;
    }

    Module& getParent() const { return parent; }
    Block* getEntryBlock() const { return blocks.front(); }
    Block* getLastBlock() const { return blocks.back(); }
    const dxil_spv::Vector<Block*>& getBlocks() const { return blocks; }
    void addLocalVariable(std::unique_ptr<Instruction> inst);
    Id getReturnType() const { return functionInstruction.getTypeId(); }

    void moveLocalDeclarationsFrom(Function* other);

    void setImplicitThis() { implicitThis = true; }
    bool hasImplicitThis() const { return implicitThis; }

    void dump(dxil_spv::Vector<unsigned int>& out) const
    {
        // OpFunction
        functionInstruction.dump(out);

        // OpFunctionParameter
        for (int p = 0; p < (int)parameterInstructions.size(); ++p)
            parameterInstructions[p]->dump(out);

        // Blocks
        inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); });
        Instruction end(0, 0, OpFunctionEnd);
        end.dump(out);
    }

    DXIL_SPV_OVERRIDE_NEW_DELETE

protected:
    Function(const Function&);
    Function& operator=(Function&);

    Module& parent;
    Instruction functionInstruction;
    dxil_spv::Vector<Instruction*> parameterInstructions;
    dxil_spv::Vector<Block*> blocks;
    bool implicitThis;  // true if this is a member function expecting to be passed a 'this' as the first argument
};

//
// SPIR-V IR Module.
//

class Module {
public:
    Module() {}
    virtual ~Module()
    {
        // TODO delete things
    }

    void addFunction(Function *fun) { functions.push_back(fun); }

    void mapInstruction(Instruction *instruction)
    {
        spv::Id resultId = instruction->getResultId();
        // map the instruction's result id
        if (resultId >= idToInstruction.size())
            idToInstruction.resize(resultId + 16);
        idToInstruction[resultId] = instruction;
    }

    Instruction* getInstruction(Id id) const
    {
        if (id < idToInstruction.size())
            return idToInstruction[id];
        else
            return nullptr;
    }

    const dxil_spv::Vector<Function*>& getFunctions() const { return functions; }
    spv::Id getTypeId(Id resultId) const { return idToInstruction[resultId]->getTypeId(); }
    StorageClass getStorageClass(Id typeId) const
    {
        assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer);
        return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0);
    }

    void dump(dxil_spv::Vector<unsigned int>& out) const
    {
        for (int f = 0; f < (int)functions.size(); ++f)
            functions[f]->dump(out);
    }

    DXIL_SPV_OVERRIDE_NEW_DELETE

protected:
    Module(const Module&);
    dxil_spv::Vector<Function*> functions;

    // map from result id to instruction having that result id
    dxil_spv::Vector<Instruction*> idToInstruction;

    // map from a result id to its type id
};

//
// Implementation (it's here due to circular type definitions).
//

// Add both
// - the OpFunction instruction
// - all the OpFunctionParameter instructions
__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent)
    : parent(parent), functionInstruction(id, resultType, OpFunction), implicitThis(false)
{
    // OpFunction
    functionInstruction.addImmediateOperand(FunctionControlMaskNone);
    functionInstruction.addIdOperand(functionType);
    parent.mapInstruction(&functionInstruction);
    parent.addFunction(this);

    // OpFunctionParameter
    Instruction* typeInst = parent.getInstruction(functionType);
    int numParams = typeInst->getNumOperands() - 1;
    for (int p = 0; p < numParams; ++p) {
        Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter);
        parent.mapInstruction(param);
        parameterInstructions.push_back(param);
    }
}

__inline void Function::moveLocalDeclarationsFrom(Function* other)
{
    blocks[0]->localVariables.clear();
    std::swap(blocks[0]->localVariables, other->blocks[0]->localVariables);

    // There shouldn't be any actual code here yet, just Undef declarations, if anything.
    for (auto &b : other->blocks)
    {
        for (auto &inst : b->instructions)
        {
            if (inst->getOpCode() == spv::OpUndef)
            {
                blocks[0]->instructions.push_back(std::move(inst));
                inst = {};
            }
        }

        auto itr = std::remove_if(b->instructions.begin(), b->instructions.end(),
                [](const std::unique_ptr<spv::Instruction> &inst) {
                    return !inst;
                });
        b->instructions.erase(itr, b->instructions.end());
    }
}

__inline void Function::addLocalVariable(std::unique_ptr<Instruction> inst)
{
    Instruction* raw_instruction = inst.get();
    blocks[0]->addLocalVariable(std::move(inst));
    parent.mapInstruction(raw_instruction);
}

__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false)
{
    instructions.push_back(std::unique_ptr<Instruction>(new Instruction(id, NoType, OpLabel)));
    instructions.back()->setBlock(this);
    parent.getParent().mapInstruction(instructions.back().get());
}

__inline void Block::addInstruction(std::unique_ptr<Instruction> inst)
{
    Instruction* raw_instruction = inst.get();
    instructions.push_back(std::move(inst));
    raw_instruction->setBlock(this);
    if (raw_instruction->getResultId())
        parent.getParent().mapInstruction(raw_instruction);
}

__inline void Block::rewritePhiIncoming(spv::Id from_id, spv::Id to_id)
{
    for (auto &inst : instructions)
    {
        if (inst->getOpCode() == spv::OpPhi)
        {
            for (int i = 1, n = inst->getNumOperands(); i < n; i += 2)
                if (inst->getIdOperand(i) == from_id)
                    inst->setIdOperand(i, to_id);
        }
    }
}

};  // end spv namespace

#endif // spvIR_H


================================================
FILE: util/thread_local_allocator.cpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "thread_local_allocator.hpp"
#include <assert.h>
#include <stdint.h>
#include <memory>

namespace dxil_spv
{
static constexpr size_t BLOCK_SIZE = 64 * 1024;

class ChainAllocator
{
public:
	void reset();
	void *allocate(size_t size);

private:
	struct MallocDeleter
	{
		void operator()(void *ptr)
		{
			free(ptr);
		}
	};

	struct Block
	{
		explicit Block(size_t size);
		void *allocate(size_t size);

		std::unique_ptr<uint8_t, MallocDeleter> block;
		size_t offset = 0;
		size_t block_size = 0;
	};
	std::vector<Block> blocks;
	std::vector<Block> huge_blocks;
	unsigned block_index = 0;

	bool ensure_block();
	void *allocate_huge(size_t size);
};

ChainAllocator::Block::Block(size_t size)
	: block(static_cast<uint8_t *>(malloc(size))), block_size(size)
{
}

void *ChainAllocator::Block::allocate(size_t size)
{
	offset = (offset + 15) & ~size_t(15);
	if (offset + size <= block_size)
	{
		void *ret = block.get() + offset;
		offset += size;
		return ret;
	}
	else
		return nullptr;
}

static thread_local ChainAllocator *allocator;

void ChainAllocator::reset()
{
	for (auto &block : blocks)
		block.offset = 0;
	block_index = 0;
	huge_blocks.clear();
}

bool ChainAllocator::ensure_block()
{
	blocks.emplace_back(BLOCK_SIZE);
	return bool(blocks.back().block);
}

void *ChainAllocator::allocate_huge(size_t size)
{
	huge_blocks.emplace_back(size);
	return huge_blocks.back().block.get();
}

void *ChainAllocator::allocate(size_t size)
{
	if (size > BLOCK_SIZE)
		return allocate_huge(size);

	if (block_index >= blocks.size() && !ensure_block())
		return nullptr;

	void *ptr = blocks[block_index].allocate(size);
	if (ptr)
		return ptr;

	block_index++;
	if (block_index >= blocks.size() && !ensure_block())
		return nullptr;

	return blocks[block_index].allocate(size);
}

void *allocate_in_thread(size_t size)
{
	if (!allocator)
		return malloc(size);

	return allocator->allocate(size);
}

void free_in_thread(void *ptr)
{
	if (!allocator)
	{
		free(ptr);
		return;
	}

	// Don't bother freeing ...
}

void begin_thread_allocator_context()
{
	assert(!allocator);
	allocator = new ChainAllocator;
}

void end_thread_allocator_context()
{
	assert(allocator);
	delete allocator;
	allocator = nullptr;
}

void reset_thread_allocator_context()
{
	assert(allocator);
	allocator->reset();
}
}


================================================
FILE: util/thread_local_allocator.hpp
================================================
/* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#pragma once

#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <set>
#include <string>
#include <sstream>
#include <functional>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <stddef.h>

namespace dxil_spv
{
void *allocate_in_thread(std::size_t size);
void free_in_thread(void *ptr);

template <typename T>
class ThreadLocalAllocator
{
public:
	using value_type = T;

	ThreadLocalAllocator() noexcept = default;
	template <typename U>
	ThreadLocalAllocator(const ThreadLocalAllocator<U> &) noexcept {}

	value_type *allocate(size_t n)
	{
		return static_cast<value_type *>(allocate_in_thread(sizeof(T) * n));
	}

	void deallocate(value_type *p, std::size_t)
	{
		free_in_thread(p);
	}

	using is_always_equal = std::true_type;
};

template <typename A, typename B>
static inline bool operator==(const ThreadLocalAllocator<A> &, const ThreadLocalAllocator<B> &)
{
	return true;
}

template <typename A, typename B>
static inline bool operator!=(const ThreadLocalAllocator<A> &, const ThreadLocalAllocator<B> &)
{
	return false;
}

template <typename T>
using Vector = std::vector<T, ThreadLocalAllocator<T>>;

template <typename T, typename Hash = std::hash<T>>
using UnorderedSet = std::unordered_set<T, Hash, std::equal_to<T>, ThreadLocalAllocator<T>>;

template <typename T>
using Set = std::set<T, std::less<T>, ThreadLocalAllocator<T>>;

using String = std::basic_string<char, std::char_traits<char>, ThreadLocalAllocator<char>>;
using StringStream = std::basic_ostringstream<char, std::char_traits<char>, ThreadLocalAllocator<char>>;

template <typename Key, typename Value, typename Hash = std::hash<Key>>
using UnorderedMap = std::unordered_map<Key, Value, Hash, std::equal_to<Key>, ThreadLocalAllocator<std::pair<const Key, Value>>>;

void begin_thread_allocator_context();
void end_thread_allocator_context();
void reset_thread_allocator_context();

template <typename T>
static inline String to_string(T&& t)
{
	auto v = std::to_string(std::forward<T>(t));
	String ret(v.begin(), v.end());
	return ret;
}
}

namespace std
{
template <>
struct hash<dxil_spv::String>
{
	size_t operator()(const dxil_spv::String &str) const
	{
		uint64_t h = 0xcbf29ce484222325ull;
		for (auto c : str)
			h = (h * 0x100000001b3ull) ^ uint8_t(c);
		return size_t(h);
	}
};
}

#define DXIL_SPV_OVERRIDE_NEW_DELETE \
	void *operator new(size_t size) { return ::dxil_spv::allocate_in_thread(size); } \
	void operator delete(void *ptr) { ::dxil_spv::free_in_thread(ptr); } \
	void *operator new[](size_t size) { return ::dxil_spv::allocate_in_thread(size); } \
	void operator delete[](void *ptr) { ::dxil_spv::free_in_thread(ptr); }